]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
8ad349bb | 2 | * Copyright (c) 2006 Apple Computer, Inc. All Rights Reserved. |
1c79356b | 3 | * |
8ad349bb | 4 | * @APPLE_LICENSE_OSREFERENCE_HEADER_START@ |
1c79356b | 5 | * |
8ad349bb A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the | |
10 | * License may not be used to create, or enable the creation or | |
11 | * redistribution of, unlawful or unlicensed copies of an Apple operating | |
12 | * system, or to circumvent, violate, or enable the circumvention or | |
13 | * violation of, any terms of an Apple operating system software license | |
14 | * agreement. | |
15 | * | |
16 | * Please obtain a copy of the License at | |
17 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
18 | * file. | |
19 | * | |
20 | * The Original Code and all software distributed under the License are | |
21 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
22 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
23 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
24 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
25 | * Please see the License for the specific language governing rights and | |
26 | * limitations under the License. | |
27 | * | |
28 | * @APPLE_LICENSE_OSREFERENCE_HEADER_END@ | |
1c79356b A |
29 | */ |
30 | /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ | |
31 | /* | |
32 | * Copyright (c) 1989, 1993, 1995 | |
33 | * The Regents of the University of California. All rights reserved. | |
34 | * | |
35 | * Redistribution and use in source and binary forms, with or without | |
36 | * modification, are permitted provided that the following conditions | |
37 | * are met: | |
38 | * 1. Redistributions of source code must retain the above copyright | |
39 | * notice, this list of conditions and the following disclaimer. | |
40 | * 2. Redistributions in binary form must reproduce the above copyright | |
41 | * notice, this list of conditions and the following disclaimer in the | |
42 | * documentation and/or other materials provided with the distribution. | |
43 | * 3. All advertising materials mentioning features or use of this software | |
44 | * must display the following acknowledgement: | |
45 | * This product includes software developed by the University of | |
46 | * California, Berkeley and its contributors. | |
47 | * 4. Neither the name of the University nor the names of its contributors | |
48 | * may be used to endorse or promote products derived from this software | |
49 | * without specific prior written permission. | |
50 | * | |
51 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
52 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
53 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
54 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
55 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
56 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
57 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
58 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
59 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
60 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
61 | * SUCH DAMAGE. | |
62 | * | |
63 | * @(#)spec_vnops.c 8.14 (Berkeley) 5/21/95 | |
64 | */ | |
65 | ||
66 | #include <sys/param.h> | |
91447636 A |
67 | #include <sys/proc_internal.h> |
68 | #include <sys/kauth.h> | |
1c79356b A |
69 | #include <sys/systm.h> |
70 | #include <sys/kernel.h> | |
71 | #include <sys/conf.h> | |
91447636 A |
72 | #include <sys/buf_internal.h> |
73 | #include <sys/mount_internal.h> | |
1c79356b | 74 | #include <sys/namei.h> |
91447636 | 75 | #include <sys/vnode_internal.h> |
1c79356b A |
76 | #include <sys/stat.h> |
77 | #include <sys/errno.h> | |
78 | #include <sys/ioctl.h> | |
79 | #include <sys/file.h> | |
91447636 | 80 | #include <sys/user.h> |
1c79356b | 81 | #include <sys/malloc.h> |
55e303ae | 82 | #include <sys/disk.h> |
91447636 | 83 | #include <sys/uio_internal.h> |
1c79356b A |
84 | #include <miscfs/specfs/specdev.h> |
85 | #include <vfs/vfs_support.h> | |
86 | ||
9bccf70c | 87 | #include <sys/kdebug.h> |
1c79356b A |
88 | |
89 | struct vnode *speclisth[SPECHSZ]; | |
90 | ||
91 | /* symbolic sleep message strings for devices */ | |
92 | char devopn[] = "devopn"; | |
93 | char devio[] = "devio"; | |
94 | char devwait[] = "devwait"; | |
95 | char devin[] = "devin"; | |
96 | char devout[] = "devout"; | |
97 | char devioc[] = "devioc"; | |
98 | char devcls[] = "devcls"; | |
99 | ||
100 | #define VOPFUNC int (*)(void *) | |
101 | ||
102 | int (**spec_vnodeop_p)(void *); | |
103 | struct vnodeopv_entry_desc spec_vnodeop_entries[] = { | |
91447636 A |
104 | { &vnop_default_desc, (VOPFUNC)vn_default_error }, |
105 | { &vnop_lookup_desc, (VOPFUNC)spec_lookup }, /* lookup */ | |
106 | { &vnop_create_desc, (VOPFUNC)err_create }, /* create */ | |
107 | { &vnop_mknod_desc, (VOPFUNC)err_mknod }, /* mknod */ | |
108 | { &vnop_open_desc, (VOPFUNC)spec_open }, /* open */ | |
109 | { &vnop_close_desc, (VOPFUNC)spec_close }, /* close */ | |
110 | { &vnop_access_desc, (VOPFUNC)spec_access }, /* access */ | |
111 | { &vnop_getattr_desc, (VOPFUNC)spec_getattr }, /* getattr */ | |
112 | { &vnop_setattr_desc, (VOPFUNC)spec_setattr }, /* setattr */ | |
113 | { &vnop_read_desc, (VOPFUNC)spec_read }, /* read */ | |
114 | { &vnop_write_desc, (VOPFUNC)spec_write }, /* write */ | |
115 | { &vnop_ioctl_desc, (VOPFUNC)spec_ioctl }, /* ioctl */ | |
116 | { &vnop_select_desc, (VOPFUNC)spec_select }, /* select */ | |
117 | { &vnop_revoke_desc, (VOPFUNC)nop_revoke }, /* revoke */ | |
118 | { &vnop_mmap_desc, (VOPFUNC)err_mmap }, /* mmap */ | |
119 | { &vnop_fsync_desc, (VOPFUNC)spec_fsync }, /* fsync */ | |
120 | { &vnop_remove_desc, (VOPFUNC)err_remove }, /* remove */ | |
121 | { &vnop_link_desc, (VOPFUNC)err_link }, /* link */ | |
122 | { &vnop_rename_desc, (VOPFUNC)err_rename }, /* rename */ | |
123 | { &vnop_mkdir_desc, (VOPFUNC)err_mkdir }, /* mkdir */ | |
124 | { &vnop_rmdir_desc, (VOPFUNC)err_rmdir }, /* rmdir */ | |
125 | { &vnop_symlink_desc, (VOPFUNC)err_symlink }, /* symlink */ | |
126 | { &vnop_readdir_desc, (VOPFUNC)err_readdir }, /* readdir */ | |
127 | { &vnop_readlink_desc, (VOPFUNC)err_readlink }, /* readlink */ | |
128 | { &vnop_inactive_desc, (VOPFUNC)nop_inactive }, /* inactive */ | |
129 | { &vnop_reclaim_desc, (VOPFUNC)nop_reclaim }, /* reclaim */ | |
130 | { &vnop_strategy_desc, (VOPFUNC)spec_strategy }, /* strategy */ | |
131 | { &vnop_pathconf_desc, (VOPFUNC)spec_pathconf }, /* pathconf */ | |
132 | { &vnop_advlock_desc, (VOPFUNC)err_advlock }, /* advlock */ | |
133 | { &vnop_bwrite_desc, (VOPFUNC)spec_bwrite }, /* bwrite */ | |
8ad349bb | 134 | { &vnop_devblocksize_desc, (VOPFUNC)spec_devblocksize }, /* devblocksize */ |
91447636 A |
135 | { &vnop_pagein_desc, (VOPFUNC)err_pagein }, /* Pagein */ |
136 | { &vnop_pageout_desc, (VOPFUNC)err_pageout }, /* Pageout */ | |
137 | { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copyfile */ | |
138 | { &vnop_blktooff_desc, (VOPFUNC)spec_blktooff }, /* blktooff */ | |
139 | { &vnop_offtoblk_desc, (VOPFUNC)spec_offtoblk }, /* offtoblk */ | |
140 | { &vnop_blockmap_desc, (VOPFUNC)spec_blockmap }, /* blockmap */ | |
1c79356b A |
141 | { (struct vnodeop_desc*)NULL, (int(*)())NULL } |
142 | }; | |
143 | struct vnodeopv_desc spec_vnodeop_opv_desc = | |
144 | { &spec_vnodeop_p, spec_vnodeop_entries }; | |
145 | ||
91447636 A |
146 | |
147 | static void set_blocksize(vnode_t, dev_t); | |
148 | ||
149 | ||
1c79356b A |
150 | /* |
151 | * Trivial lookup routine that always fails. | |
152 | */ | |
153 | int | |
154 | spec_lookup(ap) | |
91447636 | 155 | struct vnop_lookup_args /* { |
1c79356b A |
156 | struct vnode *a_dvp; |
157 | struct vnode **a_vpp; | |
158 | struct componentname *a_cnp; | |
91447636 | 159 | vfs_context_t a_context; |
1c79356b A |
160 | } */ *ap; |
161 | { | |
162 | ||
163 | *ap->a_vpp = NULL; | |
164 | return (ENOTDIR); | |
165 | } | |
166 | ||
91447636 | 167 | static void |
1c79356b A |
168 | set_blocksize(struct vnode *vp, dev_t dev) |
169 | { | |
91447636 | 170 | int (*size)(dev_t); |
1c79356b A |
171 | int rsize; |
172 | ||
173 | if ((major(dev) < nblkdev) && (size = bdevsw[major(dev)].d_psize)) { | |
174 | rsize = (*size)(dev); | |
175 | if (rsize <= 0) /* did size fail? */ | |
176 | vp->v_specsize = DEV_BSIZE; | |
177 | else | |
178 | vp->v_specsize = rsize; | |
179 | } | |
180 | else | |
181 | vp->v_specsize = DEV_BSIZE; | |
182 | } | |
183 | ||
184 | void | |
185 | set_fsblocksize(struct vnode *vp) | |
186 | { | |
187 | ||
188 | if (vp->v_type == VBLK) { | |
189 | dev_t dev = (dev_t)vp->v_rdev; | |
190 | int maj = major(dev); | |
191 | ||
91447636 | 192 | if ((u_int)maj >= (u_int)nblkdev) |
1c79356b A |
193 | return; |
194 | ||
91447636 | 195 | vnode_lock(vp); |
1c79356b | 196 | set_blocksize(vp, dev); |
91447636 | 197 | vnode_unlock(vp); |
1c79356b A |
198 | } |
199 | ||
200 | } | |
201 | ||
202 | ||
203 | /* | |
204 | * Open a special file. | |
205 | */ | |
91447636 | 206 | int |
1c79356b | 207 | spec_open(ap) |
91447636 | 208 | struct vnop_open_args /* { |
1c79356b A |
209 | struct vnode *a_vp; |
210 | int a_mode; | |
91447636 | 211 | vfs_context_t a_context; |
1c79356b A |
212 | } */ *ap; |
213 | { | |
91447636 A |
214 | struct proc *p = vfs_context_proc(ap->a_context); |
215 | kauth_cred_t cred = vfs_context_ucred(ap->a_context); | |
216 | struct vnode *vp = ap->a_vp; | |
1c79356b A |
217 | dev_t bdev, dev = (dev_t)vp->v_rdev; |
218 | int maj = major(dev); | |
219 | int error; | |
220 | ||
221 | /* | |
222 | * Don't allow open if fs is mounted -nodev. | |
223 | */ | |
224 | if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV)) | |
225 | return (ENXIO); | |
226 | ||
227 | switch (vp->v_type) { | |
228 | ||
229 | case VCHR: | |
91447636 | 230 | if ((u_int)maj >= (u_int)nchrdev) |
1c79356b | 231 | return (ENXIO); |
91447636 | 232 | if (cred != FSCRED && (ap->a_mode & FWRITE)) { |
1c79356b A |
233 | /* |
234 | * When running in very secure mode, do not allow | |
235 | * opens for writing of any disk character devices. | |
236 | */ | |
237 | if (securelevel >= 2 && isdisk(dev, VCHR)) | |
238 | return (EPERM); | |
239 | /* | |
240 | * When running in secure mode, do not allow opens | |
241 | * for writing of /dev/mem, /dev/kmem, or character | |
242 | * devices whose corresponding block devices are | |
243 | * currently mounted. | |
244 | */ | |
245 | if (securelevel >= 1) { | |
91447636 | 246 | if ((bdev = chrtoblk(dev)) != NODEV && check_mountedon(bdev, VBLK, &error)) |
1c79356b A |
247 | return (error); |
248 | if (iskmemdev(dev)) | |
249 | return (EPERM); | |
250 | } | |
251 | } | |
91447636 A |
252 | if (cdevsw[maj].d_type == D_TTY) { |
253 | vnode_lock(vp); | |
1c79356b | 254 | vp->v_flag |= VISTTY; |
91447636 A |
255 | vnode_unlock(vp); |
256 | } | |
1c79356b | 257 | error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, p); |
1c79356b A |
258 | return (error); |
259 | ||
260 | case VBLK: | |
91447636 | 261 | if ((u_int)maj >= (u_int)nblkdev) |
1c79356b A |
262 | return (ENXIO); |
263 | /* | |
264 | * When running in very secure mode, do not allow | |
265 | * opens for writing of any disk block devices. | |
266 | */ | |
91447636 | 267 | if (securelevel >= 2 && cred != FSCRED && |
1c79356b A |
268 | (ap->a_mode & FWRITE) && bdevsw[maj].d_type == D_DISK) |
269 | return (EPERM); | |
270 | /* | |
271 | * Do not allow opens of block devices that are | |
272 | * currently mounted. | |
273 | */ | |
91447636 | 274 | if ( (error = vfs_mountedon(vp)) ) |
1c79356b A |
275 | return (error); |
276 | error = (*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, p); | |
277 | if (!error) { | |
55e303ae A |
278 | u_int64_t blkcnt; |
279 | u_int32_t blksize; | |
91447636 A |
280 | int setsize = 0; |
281 | u_int32_t size512 = 512; | |
282 | ||
283 | ||
284 | if (!VNOP_IOCTL(vp, DKIOCGETBLOCKSIZE, (caddr_t)&blksize, 0, ap->a_context)) { | |
285 | /* Switch to 512 byte sectors (temporarily) */ | |
55e303ae | 286 | |
91447636 A |
287 | if (!VNOP_IOCTL(vp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, ap->a_context)) { |
288 | /* Get the number of 512 byte physical blocks. */ | |
289 | if (!VNOP_IOCTL(vp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, ap->a_context)) { | |
290 | setsize = 1; | |
291 | } | |
292 | } | |
293 | /* If it doesn't set back, we can't recover */ | |
294 | if (VNOP_IOCTL(vp, DKIOCSETBLOCKSIZE, (caddr_t)&blksize, FWRITE, ap->a_context)) | |
295 | error = ENXIO; | |
296 | } | |
297 | ||
298 | ||
299 | vnode_lock(vp); | |
1c79356b | 300 | set_blocksize(vp, dev); |
55e303ae A |
301 | |
302 | /* | |
303 | * Cache the size in bytes of the block device for later | |
304 | * use by spec_write(). | |
305 | */ | |
91447636 | 306 | if (setsize) |
55e303ae | 307 | vp->v_specdevsize = blkcnt * (u_int64_t)size512; |
91447636 A |
308 | else |
309 | vp->v_specdevsize = (u_int64_t)0; /* Default: Can't get */ | |
310 | ||
311 | vnode_unlock(vp); | |
312 | ||
1c79356b A |
313 | } |
314 | return(error); | |
91447636 A |
315 | default: |
316 | panic("spec_open type"); | |
1c79356b A |
317 | } |
318 | return (0); | |
319 | } | |
320 | ||
321 | /* | |
322 | * Vnode op for read | |
323 | */ | |
91447636 | 324 | int |
1c79356b | 325 | spec_read(ap) |
91447636 | 326 | struct vnop_read_args /* { |
1c79356b A |
327 | struct vnode *a_vp; |
328 | struct uio *a_uio; | |
329 | int a_ioflag; | |
91447636 | 330 | vfs_context_t a_context; |
1c79356b A |
331 | } */ *ap; |
332 | { | |
333 | register struct vnode *vp = ap->a_vp; | |
334 | register struct uio *uio = ap->a_uio; | |
1c79356b | 335 | struct buf *bp; |
91447636 | 336 | daddr64_t bn, nextbn; |
1c79356b A |
337 | long bsize, bscale; |
338 | int devBlockSize=0; | |
91447636 | 339 | int n, on; |
1c79356b A |
340 | int error = 0; |
341 | dev_t dev; | |
342 | ||
343 | #if DIAGNOSTIC | |
344 | if (uio->uio_rw != UIO_READ) | |
345 | panic("spec_read mode"); | |
91447636 | 346 | if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) |
1c79356b A |
347 | panic("spec_read proc"); |
348 | #endif | |
91447636 | 349 | if (uio_resid(uio) == 0) |
1c79356b A |
350 | return (0); |
351 | ||
352 | switch (vp->v_type) { | |
353 | ||
354 | case VCHR: | |
1c79356b A |
355 | error = (*cdevsw[major(vp->v_rdev)].d_read) |
356 | (vp->v_rdev, uio, ap->a_ioflag); | |
1c79356b A |
357 | return (error); |
358 | ||
359 | case VBLK: | |
360 | if (uio->uio_offset < 0) | |
361 | return (EINVAL); | |
362 | ||
363 | dev = vp->v_rdev; | |
364 | ||
365 | devBlockSize = vp->v_specsize; | |
366 | ||
367 | if (devBlockSize > PAGE_SIZE) | |
368 | return (EINVAL); | |
369 | ||
370 | bscale = PAGE_SIZE / devBlockSize; | |
371 | bsize = bscale * devBlockSize; | |
372 | ||
373 | do { | |
374 | on = uio->uio_offset % bsize; | |
375 | ||
91447636 | 376 | bn = (daddr64_t)((uio->uio_offset / devBlockSize) &~ (bscale - 1)); |
1c79356b | 377 | |
91447636 | 378 | if (vp->v_speclastr + bscale == bn) { |
1c79356b | 379 | nextbn = bn + bscale; |
91447636 | 380 | error = buf_breadn(vp, bn, (int)bsize, &nextbn, |
1c79356b A |
381 | (int *)&bsize, 1, NOCRED, &bp); |
382 | } else | |
91447636 A |
383 | error = buf_bread(vp, bn, (int)bsize, NOCRED, &bp); |
384 | ||
385 | vnode_lock(vp); | |
386 | vp->v_speclastr = bn; | |
387 | vnode_unlock(vp); | |
1c79356b | 388 | |
91447636 | 389 | n = bsize - buf_resid(bp); |
1c79356b A |
390 | if ((on > n) || error) { |
391 | if (!error) | |
392 | error = EINVAL; | |
91447636 | 393 | buf_brelse(bp); |
1c79356b A |
394 | return (error); |
395 | } | |
91447636 A |
396 | // LP64todo - fix this! |
397 | n = min((unsigned)(n - on), uio_resid(uio)); | |
1c79356b | 398 | |
91447636 | 399 | error = uiomove((char *)buf_dataptr(bp) + on, n, uio); |
1c79356b | 400 | if (n + on == bsize) |
91447636 A |
401 | buf_markaged(bp); |
402 | buf_brelse(bp); | |
403 | } while (error == 0 && uio_resid(uio) > 0 && n != 0); | |
1c79356b A |
404 | return (error); |
405 | ||
406 | default: | |
407 | panic("spec_read type"); | |
408 | } | |
409 | /* NOTREACHED */ | |
91447636 A |
410 | |
411 | return (0); | |
1c79356b A |
412 | } |
413 | ||
414 | /* | |
415 | * Vnode op for write | |
416 | */ | |
91447636 | 417 | int |
1c79356b | 418 | spec_write(ap) |
91447636 | 419 | struct vnop_write_args /* { |
1c79356b A |
420 | struct vnode *a_vp; |
421 | struct uio *a_uio; | |
422 | int a_ioflag; | |
91447636 | 423 | vfs_context_t a_context; |
1c79356b A |
424 | } */ *ap; |
425 | { | |
426 | register struct vnode *vp = ap->a_vp; | |
427 | register struct uio *uio = ap->a_uio; | |
1c79356b | 428 | struct buf *bp; |
91447636 | 429 | daddr64_t bn; |
1c79356b A |
430 | int bsize, blkmask, bscale; |
431 | register int io_sync; | |
432 | register int io_size; | |
433 | int devBlockSize=0; | |
434 | register int n, on; | |
435 | int error = 0; | |
436 | dev_t dev; | |
437 | ||
438 | #if DIAGNOSTIC | |
439 | if (uio->uio_rw != UIO_WRITE) | |
440 | panic("spec_write mode"); | |
91447636 | 441 | if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) |
1c79356b A |
442 | panic("spec_write proc"); |
443 | #endif | |
444 | ||
445 | switch (vp->v_type) { | |
446 | ||
447 | case VCHR: | |
1c79356b A |
448 | error = (*cdevsw[major(vp->v_rdev)].d_write) |
449 | (vp->v_rdev, uio, ap->a_ioflag); | |
1c79356b A |
450 | return (error); |
451 | ||
452 | case VBLK: | |
91447636 | 453 | if (uio_resid(uio) == 0) |
1c79356b A |
454 | return (0); |
455 | if (uio->uio_offset < 0) | |
456 | return (EINVAL); | |
457 | ||
458 | io_sync = (ap->a_ioflag & IO_SYNC); | |
91447636 A |
459 | // LP64todo - fix this! |
460 | io_size = uio_resid(uio); | |
1c79356b A |
461 | |
462 | dev = (vp->v_rdev); | |
463 | ||
464 | devBlockSize = vp->v_specsize; | |
465 | if (devBlockSize > PAGE_SIZE) | |
466 | return(EINVAL); | |
467 | ||
468 | bscale = PAGE_SIZE / devBlockSize; | |
469 | blkmask = bscale - 1; | |
470 | bsize = bscale * devBlockSize; | |
471 | ||
472 | ||
473 | do { | |
91447636 | 474 | bn = (daddr64_t)((uio->uio_offset / devBlockSize) &~ blkmask); |
1c79356b A |
475 | on = uio->uio_offset % bsize; |
476 | ||
91447636 A |
477 | // LP64todo - fix this! |
478 | n = min((unsigned)(bsize - on), uio_resid(uio)); | |
1c79356b | 479 | |
55e303ae | 480 | /* |
91447636 | 481 | * Use buf_getblk() as an optimization IFF: |
55e303ae A |
482 | * |
483 | * 1) We are reading exactly a block on a block | |
484 | * aligned boundary | |
485 | * 2) We know the size of the device from spec_open | |
486 | * 3) The read doesn't span the end of the device | |
487 | * | |
91447636 | 488 | * Otherwise, we fall back on buf_bread(). |
55e303ae A |
489 | */ |
490 | if (n == bsize && | |
491 | vp->v_specdevsize != (u_int64_t)0 && | |
492 | (uio->uio_offset + (u_int64_t)n) > vp->v_specdevsize) { | |
493 | /* reduce the size of the read to what is there */ | |
494 | n = (uio->uio_offset + (u_int64_t)n) - vp->v_specdevsize; | |
495 | } | |
496 | ||
1c79356b | 497 | if (n == bsize) |
91447636 | 498 | bp = buf_getblk(vp, bn, bsize, 0, 0, BLK_WRITE); |
1c79356b | 499 | else |
91447636 | 500 | error = (int)buf_bread(vp, bn, bsize, NOCRED, &bp); |
1c79356b | 501 | |
55e303ae | 502 | /* Translate downstream error for upstream, if needed */ |
91447636 A |
503 | if (!error) |
504 | error = (int)buf_error(bp); | |
1c79356b | 505 | if (error) { |
91447636 | 506 | buf_brelse(bp); |
1c79356b A |
507 | return (error); |
508 | } | |
91447636 | 509 | n = min(n, bsize - buf_resid(bp)); |
1c79356b | 510 | |
91447636 A |
511 | error = uiomove((char *)buf_dataptr(bp) + on, n, uio); |
512 | if (error) { | |
513 | buf_brelse(bp); | |
514 | return (error); | |
515 | } | |
516 | buf_markaged(bp); | |
1c79356b A |
517 | |
518 | if (io_sync) | |
91447636 | 519 | error = buf_bwrite(bp); |
1c79356b A |
520 | else { |
521 | if ((n + on) == bsize) | |
91447636 | 522 | error = buf_bawrite(bp); |
1c79356b | 523 | else |
91447636 | 524 | error = buf_bdwrite(bp); |
1c79356b | 525 | } |
91447636 | 526 | } while (error == 0 && uio_resid(uio) > 0 && n != 0); |
1c79356b A |
527 | return (error); |
528 | ||
529 | default: | |
530 | panic("spec_write type"); | |
531 | } | |
532 | /* NOTREACHED */ | |
91447636 A |
533 | |
534 | return (0); | |
1c79356b A |
535 | } |
536 | ||
537 | /* | |
538 | * Device ioctl operation. | |
539 | */ | |
91447636 | 540 | int |
1c79356b | 541 | spec_ioctl(ap) |
91447636 | 542 | struct vnop_ioctl_args /* { |
1c79356b A |
543 | struct vnode *a_vp; |
544 | int a_command; | |
545 | caddr_t a_data; | |
546 | int a_fflag; | |
91447636 | 547 | vfs_context_t a_context; |
1c79356b A |
548 | } */ *ap; |
549 | { | |
91447636 | 550 | proc_t p = vfs_context_proc(ap->a_context); |
1c79356b A |
551 | dev_t dev = ap->a_vp->v_rdev; |
552 | ||
553 | switch (ap->a_vp->v_type) { | |
554 | ||
555 | case VCHR: | |
556 | return ((*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, | |
91447636 | 557 | ap->a_fflag, p)); |
1c79356b A |
558 | |
559 | case VBLK: | |
91447636 | 560 | if (ap->a_command == 0 && (int)ap->a_data == B_TAPE) { |
1c79356b A |
561 | if (bdevsw[major(dev)].d_type == D_TAPE) |
562 | return (0); | |
563 | else | |
564 | return (1); | |
91447636 | 565 | } |
1c79356b | 566 | return ((*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, |
91447636 | 567 | ap->a_fflag, p)); |
1c79356b A |
568 | |
569 | default: | |
570 | panic("spec_ioctl"); | |
571 | /* NOTREACHED */ | |
572 | } | |
91447636 | 573 | return (0); |
1c79356b A |
574 | } |
575 | ||
91447636 | 576 | int |
1c79356b | 577 | spec_select(ap) |
91447636 | 578 | struct vnop_select_args /* { |
1c79356b A |
579 | struct vnode *a_vp; |
580 | int a_which; | |
581 | int a_fflags; | |
0b4e3aa0 | 582 | void * a_wql; |
91447636 | 583 | vfs_context_t a_context; |
1c79356b A |
584 | } */ *ap; |
585 | { | |
91447636 | 586 | proc_t p = vfs_context_proc(ap->a_context); |
1c79356b A |
587 | register dev_t dev; |
588 | ||
589 | switch (ap->a_vp->v_type) { | |
590 | ||
591 | default: | |
592 | return (1); /* XXX */ | |
593 | ||
594 | case VCHR: | |
595 | dev = ap->a_vp->v_rdev; | |
91447636 | 596 | return (*cdevsw[major(dev)].d_select)(dev, ap->a_which, ap->a_wql, p); |
1c79356b A |
597 | } |
598 | } | |
91447636 | 599 | |
1c79356b A |
600 | /* |
601 | * Synch buffers associated with a block device | |
602 | */ | |
1c79356b | 603 | int |
91447636 | 604 | spec_fsync_internal(vnode_t vp, int waitfor, __unused vfs_context_t context) |
1c79356b | 605 | { |
1c79356b A |
606 | if (vp->v_type == VCHR) |
607 | return (0); | |
608 | /* | |
609 | * Flush all dirty buffers associated with a block device. | |
610 | */ | |
91447636 A |
611 | buf_flushdirtyblks(vp, waitfor == MNT_WAIT, 0, (char *)"spec_fsync"); |
612 | ||
1c79356b A |
613 | return (0); |
614 | } | |
615 | ||
91447636 A |
616 | int |
617 | spec_fsync(ap) | |
618 | struct vnop_fsync_args /* { | |
619 | struct vnode *a_vp; | |
620 | int a_waitfor; | |
621 | vfs_context_t a_context; | |
622 | } */ *ap; | |
623 | { | |
624 | return spec_fsync_internal(ap->a_vp, ap->a_waitfor, ap->a_context); | |
625 | } | |
626 | ||
1c79356b A |
627 | /* |
628 | * Just call the device strategy routine | |
629 | */ | |
91447636 A |
630 | extern int hard_throttle_on_root; |
631 | ||
632 | ||
633 | #define LOWPRI_DELAY_MSECS 200 | |
634 | #define LOWPRI_WINDOW_MSECS 200 | |
635 | ||
636 | int lowpri_IO_window_msecs = LOWPRI_WINDOW_MSECS; | |
637 | int lowpri_IO_delay_msecs = LOWPRI_DELAY_MSECS; | |
638 | ||
639 | struct timeval last_normal_IO_timestamp; | |
640 | struct timeval last_lowpri_IO_timestamp; | |
641 | struct timeval lowpri_IO_window = { 0, LOWPRI_WINDOW_MSECS * 1000 }; | |
642 | ||
643 | int | |
1c79356b | 644 | spec_strategy(ap) |
91447636 | 645 | struct vnop_strategy_args /* { |
1c79356b A |
646 | struct buf *a_bp; |
647 | } */ *ap; | |
648 | { | |
91447636 A |
649 | buf_t bp; |
650 | int bflags; | |
651 | dev_t bdev; | |
652 | proc_t p; | |
653 | struct timeval elapsed; | |
9bccf70c A |
654 | |
655 | bp = ap->a_bp; | |
91447636 A |
656 | bdev = buf_device(bp); |
657 | bflags = buf_flags(bp); | |
9bccf70c A |
658 | |
659 | if (kdebug_enable) { | |
91447636 | 660 | int code = 0; |
9bccf70c | 661 | |
91447636 A |
662 | if (bflags & B_READ) |
663 | code |= DKIO_READ; | |
664 | if (bflags & B_ASYNC) | |
665 | code |= DKIO_ASYNC; | |
9bccf70c | 666 | |
91447636 A |
667 | if (bflags & B_META) |
668 | code |= DKIO_META; | |
669 | else if (bflags & B_PAGEIO) | |
670 | code |= DKIO_PAGING; | |
9bccf70c | 671 | |
91447636 A |
672 | KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_DKRW, code) | DBG_FUNC_NONE, |
673 | (unsigned int)bp, bdev, (int)buf_blkno(bp), buf_count(bp), 0); | |
9bccf70c | 674 | } |
91447636 A |
675 | if (((bflags & (B_PAGEIO | B_READ)) == (B_PAGEIO | B_READ)) && |
676 | (buf_vnode(bp)->v_mount->mnt_kern_flag & MNTK_ROOTDEV)) | |
677 | hard_throttle_on_root = 1; | |
678 | ||
679 | if ( lowpri_IO_delay_msecs && lowpri_IO_window_msecs ) { | |
680 | p = current_proc(); | |
681 | ||
682 | if ( (p == NULL) || !(p->p_lflag & P_LLOW_PRI_IO)) { | |
683 | if (!(p->p_lflag & P_LBACKGROUND_IO)) | |
684 | microuptime(&last_normal_IO_timestamp); | |
685 | } else { | |
686 | microuptime(&last_lowpri_IO_timestamp); | |
687 | ||
688 | elapsed = last_lowpri_IO_timestamp; | |
689 | timevalsub(&elapsed, &last_normal_IO_timestamp); | |
690 | ||
691 | lowpri_IO_window.tv_sec = lowpri_IO_window_msecs / 1000; | |
692 | lowpri_IO_window.tv_usec = (lowpri_IO_window_msecs % 1000) * 1000; | |
693 | ||
694 | if (timevalcmp(&elapsed, &lowpri_IO_window, <)) { | |
695 | struct uthread *ut; | |
696 | ||
697 | /* | |
698 | * I'd really like to do the IOSleep here, but | |
699 | * we may be holding all kinds of filesystem related locks | |
700 | * and the pages for this I/O marked 'busy'... | |
701 | * we don't want to cause a normal task to block on | |
702 | * one of these locks while we're throttling a task marked | |
703 | * for low priority I/O... we'll mark the uthread and | |
704 | * do the delay just before we return from the system | |
705 | * call that triggered this I/O or from vnode_pagein | |
706 | */ | |
707 | ut = get_bsdthread_info(current_thread()); | |
708 | ut->uu_lowpri_delay = lowpri_IO_delay_msecs; | |
709 | } | |
ccc36f2f A |
710 | } |
711 | } | |
91447636 | 712 | (*bdevsw[major(bdev)].d_strategy)(bp); |
ccc36f2f | 713 | |
91447636 | 714 | return (0); |
ccc36f2f A |
715 | } |
716 | ||
1c79356b A |
717 | |
718 | /* | |
719 | * This is a noop, simply returning what one has been given. | |
720 | */ | |
91447636 A |
721 | int |
722 | spec_blockmap(__unused struct vnop_blockmap_args *ap) | |
1c79356b | 723 | { |
91447636 | 724 | return (ENOTSUP); |
1c79356b A |
725 | } |
726 | ||
727 | ||
728 | /* | |
729 | * Device close routine | |
730 | */ | |
91447636 | 731 | int |
1c79356b | 732 | spec_close(ap) |
91447636 | 733 | struct vnop_close_args /* { |
1c79356b A |
734 | struct vnode *a_vp; |
735 | int a_fflag; | |
91447636 | 736 | vfs_context_t a_context; |
1c79356b A |
737 | } */ *ap; |
738 | { | |
739 | register struct vnode *vp = ap->a_vp; | |
740 | dev_t dev = vp->v_rdev; | |
91447636 | 741 | int (*devclose)(dev_t, int, int, struct proc *); |
1c79356b | 742 | int mode, error; |
91447636 | 743 | struct proc *p = vfs_context_proc(ap->a_context); |
1c79356b A |
744 | |
745 | switch (vp->v_type) { | |
746 | ||
747 | case VCHR: | |
748 | /* | |
749 | * Hack: a tty device that is a controlling terminal | |
750 | * has a reference from the session structure. | |
751 | * We cannot easily tell that a character device is | |
752 | * a controlling terminal, unless it is the closing | |
753 | * process' controlling terminal. In that case, | |
754 | * if the reference count is 2 (this last descriptor | |
755 | * plus the session), release the reference from the session. | |
756 | */ | |
91447636 A |
757 | if (vcount(vp) == 2 && p && |
758 | vp == p->p_session->s_ttyvp) { | |
759 | p->p_session->s_ttyvp = NULL; | |
760 | vnode_rele(vp); | |
1c79356b A |
761 | } |
762 | /* | |
1c79356b A |
763 | * close on last reference. |
764 | */ | |
91447636 | 765 | if (vcount(vp) > 1) |
1c79356b A |
766 | return (0); |
767 | devclose = cdevsw[major(dev)].d_close; | |
768 | mode = S_IFCHR; | |
769 | break; | |
770 | ||
771 | case VBLK: | |
0b4e3aa0 | 772 | #ifdef DEVFS_IMPLEMENTS_LOCKING |
1c79356b A |
773 | /* |
774 | * On last close of a block device (that isn't mounted) | |
775 | * we must invalidate any in core blocks, so that | |
776 | * we can, for instance, change floppy disks. | |
777 | */ | |
91447636 A |
778 | if ((error = spec_fsync_internal(vp, MNT_WAIT, ap->a_context))) |
779 | return (error); | |
780 | ||
781 | error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0); | |
0b4e3aa0 | 782 | if (error) |
1c79356b A |
783 | return (error); |
784 | /* | |
91447636 | 785 | * Since every use (buffer, vnode, swap, blockmap) |
1c79356b A |
786 | * holds a reference to the vnode, and because we mark |
787 | * any other vnodes that alias this device, when the | |
788 | * sum of the reference counts on all the aliased | |
789 | * vnodes descends to one, we are on last close. | |
790 | */ | |
91447636 | 791 | if (vcount(vp) > 1) |
1c79356b | 792 | return (0); |
0b4e3aa0 A |
793 | #else /* DEVFS_IMPLEMENTS_LOCKING */ |
794 | /* | |
91447636 | 795 | * Since every use (buffer, vnode, swap, blockmap) |
0b4e3aa0 A |
796 | * holds a reference to the vnode, and because we mark |
797 | * any other vnodes that alias this device, when the | |
798 | * sum of the reference counts on all the aliased | |
799 | * vnodes descends to one, we are on last close. | |
800 | */ | |
91447636 | 801 | if (vcount(vp) > 1) |
0b4e3aa0 A |
802 | return (0); |
803 | ||
804 | /* | |
805 | * On last close of a block device (that isn't mounted) | |
806 | * we must invalidate any in core blocks, so that | |
807 | * we can, for instance, change floppy disks. | |
808 | */ | |
91447636 A |
809 | if ((error = spec_fsync_internal(vp, MNT_WAIT, ap->a_context))) |
810 | return (error); | |
811 | ||
812 | error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0); | |
0b4e3aa0 A |
813 | if (error) |
814 | return (error); | |
815 | #endif /* DEVFS_IMPLEMENTS_LOCKING */ | |
1c79356b A |
816 | devclose = bdevsw[major(dev)].d_close; |
817 | mode = S_IFBLK; | |
818 | break; | |
819 | ||
820 | default: | |
821 | panic("spec_close: not special"); | |
822 | } | |
823 | ||
91447636 | 824 | return ((*devclose)(dev, ap->a_fflag, mode, p)); |
1c79356b A |
825 | } |
826 | ||
827 | /* | |
828 | * Return POSIX pathconf information applicable to special devices. | |
829 | */ | |
91447636 | 830 | int |
1c79356b | 831 | spec_pathconf(ap) |
91447636 | 832 | struct vnop_pathconf_args /* { |
1c79356b A |
833 | struct vnode *a_vp; |
834 | int a_name; | |
835 | int *a_retval; | |
91447636 | 836 | vfs_context_t a_context; |
1c79356b A |
837 | } */ *ap; |
838 | { | |
839 | ||
840 | switch (ap->a_name) { | |
841 | case _PC_LINK_MAX: | |
842 | *ap->a_retval = LINK_MAX; | |
843 | return (0); | |
844 | case _PC_MAX_CANON: | |
845 | *ap->a_retval = MAX_CANON; | |
846 | return (0); | |
847 | case _PC_MAX_INPUT: | |
848 | *ap->a_retval = MAX_INPUT; | |
849 | return (0); | |
850 | case _PC_PIPE_BUF: | |
851 | *ap->a_retval = PIPE_BUF; | |
852 | return (0); | |
853 | case _PC_CHOWN_RESTRICTED: | |
854 | *ap->a_retval = 1; | |
855 | return (0); | |
856 | case _PC_VDISABLE: | |
857 | *ap->a_retval = _POSIX_VDISABLE; | |
858 | return (0); | |
859 | default: | |
860 | return (EINVAL); | |
861 | } | |
862 | /* NOTREACHED */ | |
863 | } | |
864 | ||
8ad349bb A |
865 | int |
866 | spec_devblocksize(ap) | |
867 | struct vnop_devblocksize_args /* { | |
868 | struct vnode *a_vp; | |
869 | int *a_retval; | |
870 | } */ *ap; | |
871 | { | |
872 | *ap->a_retval = (ap->a_vp->v_specsize); | |
873 | return (0); | |
874 | } | |
875 | ||
1c79356b A |
876 | /* |
877 | * Special device failed operation | |
878 | */ | |
91447636 A |
879 | int |
880 | spec_ebadf(__unused void *dummy) | |
1c79356b A |
881 | { |
882 | ||
883 | return (EBADF); | |
884 | } | |
885 | ||
886 | /* | |
887 | * Special device bad operation | |
888 | */ | |
91447636 | 889 | int |
1c79356b A |
890 | spec_badop() |
891 | { | |
892 | ||
893 | panic("spec_badop called"); | |
894 | /* NOTREACHED */ | |
895 | } | |
896 | ||
897 | /* Blktooff derives file offset from logical block number */ | |
898 | int | |
899 | spec_blktooff(ap) | |
91447636 | 900 | struct vnop_blktooff_args /* { |
1c79356b | 901 | struct vnode *a_vp; |
91447636 | 902 | daddr64_t a_lblkno; |
1c79356b A |
903 | off_t *a_offset; |
904 | } */ *ap; | |
905 | { | |
906 | register struct vnode *vp = ap->a_vp; | |
907 | ||
908 | switch (vp->v_type) { | |
909 | case VCHR: | |
910 | *ap->a_offset = (off_t)-1; /* failure */ | |
91447636 | 911 | return (ENOTSUP); |
1c79356b A |
912 | |
913 | case VBLK: | |
914 | printf("spec_blktooff: not implemented for VBLK\n"); | |
915 | *ap->a_offset = (off_t)-1; /* failure */ | |
91447636 | 916 | return (ENOTSUP); |
1c79356b A |
917 | |
918 | default: | |
919 | panic("spec_blktooff type"); | |
920 | } | |
921 | /* NOTREACHED */ | |
91447636 A |
922 | |
923 | return (0); | |
1c79356b A |
924 | } |
925 | ||
926 | /* Offtoblk derives logical block number from file offset */ | |
927 | int | |
928 | spec_offtoblk(ap) | |
91447636 | 929 | struct vnop_offtoblk_args /* { |
1c79356b A |
930 | struct vnode *a_vp; |
931 | off_t a_offset; | |
91447636 | 932 | daddr64_t *a_lblkno; |
1c79356b A |
933 | } */ *ap; |
934 | { | |
935 | register struct vnode *vp = ap->a_vp; | |
936 | ||
937 | switch (vp->v_type) { | |
938 | case VCHR: | |
91447636 A |
939 | *ap->a_lblkno = (daddr64_t)-1; /* failure */ |
940 | return (ENOTSUP); | |
1c79356b A |
941 | |
942 | case VBLK: | |
943 | printf("spec_offtoblk: not implemented for VBLK\n"); | |
91447636 A |
944 | *ap->a_lblkno = (daddr64_t)-1; /* failure */ |
945 | return (ENOTSUP); | |
1c79356b A |
946 | |
947 | default: | |
948 | panic("spec_offtoblk type"); | |
949 | } | |
950 | /* NOTREACHED */ | |
91447636 A |
951 | |
952 | return (0); | |
1c79356b | 953 | } |