]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/vn/vn.c
c51fe4ee7b47c48511a8826171021c62611af851
[apple/xnu.git] / bsd / dev / vn / vn.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /*
25 * Copyright (c) 1988 University of Utah.
26 * Copyright (c) 1990, 1993
27 * The Regents of the University of California. All rights reserved.
28 *
29 * This code is derived from software contributed to Berkeley by
30 * the Systems Programming Group of the University of Utah Computer
31 * Science Department.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the University of
44 * California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * from: Utah Hdr: vn.c 1.13 94/04/02
62 *
63 * from: @(#)vn.c 8.6 (Berkeley) 4/1/94
64 * $FreeBSD: src/sys/dev/vn/vn.c,v 1.105.2.4 2001/11/18 07:11:00 dillon Exp $
65 */
66
67 /*
68 * Vnode disk driver.
69 *
70 * Block/character interface to a vnode. Allows one to treat a file
71 * as a disk (e.g. build a filesystem in it, mount it, etc.).
72 *
73 * NOTE 1: This uses the vnop_blockmap/vnop_strategy interface to the vnode
74 * instead of a simple VOP_RDWR. We do this to avoid distorting the
75 * local buffer cache.
76 *
77 * NOTE 2: There is a security issue involved with this driver.
78 * Once mounted all access to the contents of the "mapped" file via
79 * the special file is controlled by the permissions on the special
80 * file, the protection of the mapped file is ignored (effectively,
81 * by using root credentials in all transactions).
82 *
83 * NOTE 3: Doesn't interact with leases, should it?
84 */
85
86 #include "vndevice.h"
87
88 #if NVNDEVICE > 0
89
90 #include <sys/param.h>
91 #include <sys/systm.h>
92 #include <sys/kernel.h>
93 #include <sys/mount.h>
94 #include <sys/namei.h>
95 #include <sys/proc.h>
96 #include <sys/kauth.h>
97 #include <sys/buf.h>
98 #include <sys/malloc.h>
99 #include <sys/vnode_internal.h>
100 #include <sys/fcntl.h>
101 #include <sys/conf.h>
102 #include <sys/disk.h>
103 #include <sys/stat.h>
104 #include <sys/conf.h>
105 #include <sys/uio_internal.h>
106
107 #include <sys/vnioctl.h>
108
109 #include <sys/vm.h>
110
111 #include <vm/vm_pager.h>
112 #include <mach/memory_object_types.h>
113
114 #include <miscfs/devfs/devfs.h>
115
116
117 #include "shadow.h"
118
119 static ioctl_fcn_t vnioctl_chr;
120 static ioctl_fcn_t vnioctl_blk;
121 static open_close_fcn_t vnopen;
122 static open_close_fcn_t vnclose;
123 static psize_fcn_t vnsize;
124 static strategy_fcn_t vnstrategy;
125 static read_write_fcn_t vnread;
126 static read_write_fcn_t vnwrite;
127
128 static int vndevice_bdev_major;
129 static int vndevice_cdev_major;
130
131 /*
132 * cdevsw
133 * D_DISK we want to look like a disk
134 * D_CANFREE We support B_FREEBUF
135 */
136
137 static struct bdevsw vn_bdevsw = {
138 /* open */ vnopen,
139 /* close */ vnclose,
140 /* strategy */ vnstrategy,
141 /* ioctl */ vnioctl_blk,
142 /* dump */ eno_dump,
143 /* psize */ vnsize,
144 /* flags */ D_DISK,
145 };
146
147 static struct cdevsw vn_cdevsw = {
148 /* open */ vnopen,
149 /* close */ vnclose,
150 /* read */ vnread,
151 /* write */ vnwrite,
152 /* ioctl */ vnioctl_chr,
153 /* stop */ eno_stop,
154 /* reset */ eno_reset,
155 /* ttys */ 0,
156 /* select */ eno_select,
157 /* mmap */ eno_mmap,
158 /* strategy */ eno_strat,
159 /* getc */ eno_getc,
160 /* putc */ eno_putc,
161 /* flags */ D_DISK,
162 };
163
164 struct vn_softc {
165 u_int64_t sc_fsize; /* file size in bytes */
166 u_int64_t sc_size; /* size of vn, sc_secsize scale */
167 int sc_flags; /* flags */
168 u_long sc_secsize; /* sector size */
169 struct vnode *sc_vp; /* vnode if not NULL */
170 uint32_t sc_vid;
171 int sc_open_flags;
172 struct vnode *sc_shadow_vp; /* shadow vnode if not NULL */
173 uint32_t sc_shadow_vid;
174 shadow_map_t * sc_shadow_map; /* shadow map if not NULL */
175 kauth_cred_t sc_cred; /* credentials */
176 u_int32_t sc_options; /* options */
177 void * sc_bdev;
178 void * sc_cdev;
179 } vn_table[NVNDEVICE];
180
181 #define ROOT_IMAGE_UNIT 0
182
183 /* sc_flags */
184 #define VNF_INITED 0x01
185 #define VNF_READONLY 0x02
186
187 static u_int32_t vn_options;
188
189 #define IFOPT(vn,opt) if (((vn)->sc_options|vn_options) & (opt))
190 #define TESTOPT(vn,opt) (((vn)->sc_options|vn_options) & (opt))
191
192 static int setcred(struct vnode * vp, struct proc * p,
193 kauth_cred_t cred);
194 static void vnclear (struct vn_softc *vn, struct proc * p);
195 static void vn_ioctl_to_64(struct vn_ioctl *from, struct user_vn_ioctl *to);
196 void vndevice_init(void);
197 int vndevice_root_image(char * path, char devname[], dev_t * dev_p);
198
199 static int
200 vniocattach_file(struct vn_softc *vn,
201 struct user_vn_ioctl *vniop,
202 dev_t dev,
203 int in_kernel,
204 struct proc *p);
205 static int
206 vniocattach_shadow(struct vn_softc * vn,
207 struct user_vn_ioctl *vniop,
208 dev_t dev,
209 int in_kernel,
210 struct proc *p);
211 static __inline__ int
212 vnunit(dev_t dev)
213 {
214 return (minor(dev));
215 }
216
217 static int
218 vnclose(__unused dev_t dev, __unused int flags,
219 __unused int devtype, __unused struct proc *p)
220 {
221 return (0);
222 }
223
224 static int
225 vnopen(dev_t dev, int flags, __unused int devtype, __unused struct proc *p)
226 {
227 struct vn_softc *vn;
228 int unit;
229
230 unit = vnunit(dev);
231 if (vnunit(dev) >= NVNDEVICE) {
232 return (ENXIO);
233 }
234 vn = vn_table + unit;
235 if ((flags & FWRITE) && (vn->sc_flags & VNF_READONLY))
236 return (EACCES);
237
238 return(0);
239 }
240
241 static int
242 file_io(struct vnode * vp, struct vfs_context * context_p,
243 enum uio_rw op, char * base, off_t offset, user_ssize_t count,
244 user_ssize_t * resid)
245 {
246 uio_t auio;
247 int error;
248 char uio_buf[UIO_SIZEOF(1)];
249
250 auio = uio_createwithbuffer(1, offset, UIO_SYSSPACE, op,
251 &uio_buf[0], sizeof(uio_buf));
252 uio_addiov(auio, CAST_USER_ADDR_T(base), count);
253 if (op == UIO_READ)
254 error = VNOP_READ(vp, auio, IO_SYNC, context_p);
255 else
256 error = VNOP_WRITE(vp, auio, IO_SYNC, context_p);
257
258 if (resid != NULL) {
259 *resid = uio_resid(auio);
260 }
261 return (error);
262 }
263
264 static __inline__ off_t
265 block_round(off_t o, int blocksize)
266 {
267 return ((o + blocksize - 1) / blocksize);
268 }
269
270 static __inline__ off_t
271 block_truncate(off_t o, int blocksize)
272 {
273 return (o / blocksize);
274 }
275
276 static __inline__ int
277 block_remainder(off_t o, int blocksize)
278 {
279 return (o % blocksize);
280 }
281
282 static int
283 vnread_shadow(struct vn_softc * vn, struct uio *uio, int ioflag,
284 struct vfs_context * context_p)
285 {
286 u_long blocksize = vn->sc_secsize;
287 int error = 0;
288 off_t offset;
289 user_ssize_t resid;
290 off_t orig_offset;
291 user_ssize_t orig_resid;
292
293 orig_resid = resid = uio_resid(uio);
294 orig_offset = offset = uio_offset(uio);
295
296 while (resid > 0) {
297 u_long remainder;
298 u_long this_block_number;
299 u_long this_block_count;
300 off_t this_offset;
301 user_ssize_t this_resid;
302 struct vnode * vp;
303
304 /* figure out which blocks to read */
305 remainder = block_remainder(offset, blocksize);
306 if (shadow_map_read(vn->sc_shadow_map,
307 block_truncate(offset, blocksize),
308 block_round(resid + remainder, blocksize),
309 &this_block_number, &this_block_count)) {
310 vp = vn->sc_shadow_vp;
311 }
312 else {
313 vp = vn->sc_vp;
314 }
315
316 /* read the blocks (or parts thereof) */
317 this_offset = (off_t)this_block_number * blocksize + remainder;
318 uio_setoffset(uio, this_offset);
319 this_resid = this_block_count * blocksize - remainder;
320 if (this_resid > resid) {
321 this_resid = resid;
322 }
323 uio_setresid(uio, this_resid);
324 error = VNOP_READ(vp, uio, ioflag, context_p);
325 if (error) {
326 break;
327 }
328
329 /* figure out how much we actually read */
330 this_resid -= uio_resid(uio);
331 if (this_resid == 0) {
332 printf("vn device: vnread_shadow zero length read\n");
333 break;
334 }
335 resid -= this_resid;
336 offset += this_resid;
337 }
338 uio_setresid(uio, resid);
339 uio_setoffset(uio, offset);
340 return (error);
341 }
342
343 static int
344 vncopy_block_to_shadow(struct vn_softc * vn, struct vfs_context * context_p,
345 u_long file_block, u_long shadow_block)
346 {
347 int error;
348 char * tmpbuf;
349
350 tmpbuf = _MALLOC(vn->sc_secsize, M_TEMP, M_WAITOK);
351 if (tmpbuf == NULL) {
352 return (ENOMEM);
353 }
354 /* read one block from file at file_block offset */
355 error = file_io(vn->sc_vp, context_p, UIO_READ,
356 tmpbuf, (off_t)file_block * vn->sc_secsize,
357 vn->sc_secsize, NULL);
358 if (error) {
359 goto done;
360 }
361 /* write one block to shadow file at shadow_block offset */
362 error = file_io(vn->sc_shadow_vp, context_p, UIO_WRITE,
363 tmpbuf, (off_t)shadow_block * vn->sc_secsize,
364 vn->sc_secsize, NULL);
365 done:
366 FREE(tmpbuf, M_TEMP);
367 return (error);
368 }
369
370 enum {
371 FLAGS_FIRST_BLOCK_PARTIAL = 0x1,
372 FLAGS_LAST_BLOCK_PARTIAL = 0x2
373 };
374
375 static int
376 vnwrite_shadow(struct vn_softc * vn, struct uio *uio, int ioflag,
377 struct vfs_context * context_p)
378 {
379 u_long blocksize = vn->sc_secsize;
380 int error = 0;
381 user_ssize_t resid;
382 off_t offset;
383
384 resid = uio_resid(uio);
385 offset = uio_offset(uio);
386
387 while (resid > 0) {
388 int flags = 0;
389 u_long offset_block_number;
390 u_long remainder;
391 u_long resid_block_count;
392 u_long shadow_block_count;
393 u_long shadow_block_number;
394 user_ssize_t this_resid;
395
396 /* figure out which blocks to write */
397 offset_block_number = block_truncate(offset, blocksize);
398 remainder = block_remainder(offset, blocksize);
399 resid_block_count = block_round(resid + remainder, blocksize);
400 /* figure out if the first or last blocks are partial writes */
401 if (remainder > 0
402 && !shadow_map_is_written(vn->sc_shadow_map,
403 offset_block_number)) {
404 /* the first block is a partial write */
405 flags |= FLAGS_FIRST_BLOCK_PARTIAL;
406 }
407 if (resid_block_count > 1
408 && !shadow_map_is_written(vn->sc_shadow_map,
409 offset_block_number
410 + resid_block_count - 1)
411 && block_remainder(offset + resid, blocksize) > 0) {
412 /* the last block is a partial write */
413 flags |= FLAGS_LAST_BLOCK_PARTIAL;
414 }
415 if (shadow_map_write(vn->sc_shadow_map,
416 offset_block_number, resid_block_count,
417 &shadow_block_number,
418 &shadow_block_count)) {
419 /* shadow file is growing */
420 #if 0
421 /* truncate the file to its new length before write */
422 off_t size;
423 size = (off_t)shadow_map_shadow_size(vn->sc_shadow_map)
424 * vn->sc_secsize;
425 vnode_setsize(vn->sc_shadow_vp, size, IO_SYNC,
426 context_p);
427 #endif 0
428 }
429 /* write the blocks (or parts thereof) */
430 uio_setoffset(uio, (off_t)
431 shadow_block_number * blocksize + remainder);
432 this_resid = (off_t)shadow_block_count * blocksize - remainder;
433 if (this_resid >= resid) {
434 this_resid = resid;
435 if ((flags & FLAGS_LAST_BLOCK_PARTIAL) != 0) {
436 /* copy the last block to the shadow */
437 u_long d;
438 u_long s;
439
440 s = offset_block_number
441 + resid_block_count - 1;
442 d = shadow_block_number
443 + shadow_block_count - 1;
444 error = vncopy_block_to_shadow(vn, context_p,
445 s, d);
446 if (error) {
447 printf("vnwrite_shadow: failed to copy"
448 " block %d to shadow block %d\n",
449 s, d);
450 break;
451 }
452 }
453 }
454 uio_setresid(uio, this_resid);
455 if ((flags & FLAGS_FIRST_BLOCK_PARTIAL) != 0) {
456 /* copy the first block to the shadow */
457 error = vncopy_block_to_shadow(vn, context_p,
458 offset_block_number,
459 shadow_block_number);
460 if (error) {
461 printf("vnwrite_shadow: failed to"
462 " copy block %d to shadow block %d\n",
463 offset_block_number,
464 shadow_block_number);
465 break;
466 }
467 }
468 error = VNOP_WRITE(vn->sc_shadow_vp, uio, ioflag, context_p);
469 if (error) {
470 break;
471 }
472 /* figure out how much we actually wrote */
473 this_resid -= uio_resid(uio);
474 if (this_resid == 0) {
475 printf("vn device: vnwrite_shadow zero length write\n");
476 break;
477 }
478 resid -= this_resid;
479 offset += this_resid;
480 }
481 uio_setresid(uio, resid);
482 uio_setoffset(uio, offset);
483 return (error);
484 }
485
486 static int
487 vnread(dev_t dev, struct uio *uio, int ioflag)
488 {
489 struct vfs_context context;
490 int error = 0;
491 boolean_t funnel_state;
492 off_t offset;
493 struct proc * p;
494 user_ssize_t resid;
495 struct vn_softc * vn;
496 int unit;
497
498 unit = vnunit(dev);
499 if (vnunit(dev) >= NVNDEVICE) {
500 return (ENXIO);
501 }
502 p = current_proc();
503 funnel_state = thread_funnel_set(kernel_flock, TRUE);
504 vn = vn_table + unit;
505 if ((vn->sc_flags & VNF_INITED) == 0) {
506 error = ENXIO;
507 goto done;
508 }
509 error = vnode_getwithvid(vn->sc_vp, vn->sc_vid);
510 if (error != 0) {
511 /* the vnode is no longer available, abort */
512 error = ENXIO;
513 vnclear(vn, p);
514 goto done;
515 }
516
517 resid = uio_resid(uio);
518 offset = uio_offset(uio);
519
520 /*
521 * If out of bounds return an error. If at the EOF point,
522 * simply read less.
523 */
524 if (offset >= (off_t)vn->sc_fsize) {
525 if (offset > (off_t)vn->sc_fsize) {
526 error = EINVAL;
527 }
528 goto done;
529 }
530 /*
531 * If the request crosses EOF, truncate the request.
532 */
533 if ((offset + resid) > (off_t)vn->sc_fsize) {
534 resid = vn->sc_fsize - offset;
535 uio_setresid(uio, resid);
536 }
537
538 context.vc_proc = p;
539 context.vc_ucred = vn->sc_cred;
540 if (vn->sc_shadow_vp != NULL) {
541 error = vnode_getwithvid(vn->sc_shadow_vp,
542 vn->sc_shadow_vid);
543 if (error != 0) {
544 /* the vnode is no longer available, abort */
545 error = ENXIO;
546 vnode_put(vn->sc_vp);
547 vnclear(vn, p);
548 goto done;
549 }
550 error = vnread_shadow(vn, uio, ioflag, &context);
551 vnode_put(vn->sc_shadow_vp);
552 } else {
553 error = VNOP_READ(vn->sc_vp, uio, ioflag, &context);
554 }
555 vnode_put(vn->sc_vp);
556 done:
557 (void) thread_funnel_set(kernel_flock, funnel_state);
558 return (error);
559 }
560
561 static int
562 vnwrite(dev_t dev, struct uio *uio, int ioflag)
563 {
564 struct vfs_context context;
565 int error;
566 boolean_t funnel_state;
567 off_t offset;
568 struct proc * p;
569 user_ssize_t resid;
570 struct vn_softc * vn;
571 int unit;
572
573 unit = vnunit(dev);
574 if (vnunit(dev) >= NVNDEVICE) {
575 return (ENXIO);
576 }
577 p = current_proc();
578 funnel_state = thread_funnel_set(kernel_flock, TRUE);
579 vn = vn_table + unit;
580 if ((vn->sc_flags & VNF_INITED) == 0) {
581 error = ENXIO;
582 goto done;
583 }
584 if (vn->sc_flags & VNF_READONLY) {
585 error = EROFS;
586 goto done;
587 }
588 error = vnode_getwithvid(vn->sc_vp, vn->sc_vid);
589 if (error != 0) {
590 /* the vnode is no longer available, abort */
591 error = ENXIO;
592 vnclear(vn, p);
593 goto done;
594 }
595 resid = uio_resid(uio);
596 offset = uio_offset(uio);
597
598 /*
599 * If out of bounds return an error. If at the EOF point,
600 * simply write less.
601 */
602 if (offset >= (off_t)vn->sc_fsize) {
603 if (offset > (off_t)vn->sc_fsize) {
604 error = EINVAL;
605 }
606 goto done;
607 }
608 /*
609 * If the request crosses EOF, truncate the request.
610 */
611 if ((offset + resid) > (off_t)vn->sc_fsize) {
612 resid = (off_t)vn->sc_fsize - offset;
613 uio_setresid(uio, resid);
614 }
615
616 context.vc_proc = p;
617 context.vc_ucred = vn->sc_cred;
618
619 if (vn->sc_shadow_vp != NULL) {
620 error = vnode_getwithvid(vn->sc_shadow_vp,
621 vn->sc_shadow_vid);
622 if (error != 0) {
623 /* the vnode is no longer available, abort */
624 error = ENXIO;
625 vnode_put(vn->sc_vp);
626 vnclear(vn, p);
627 goto done;
628 }
629 error = vnwrite_shadow(vn, uio, ioflag, &context);
630 vnode_put(vn->sc_shadow_vp);
631 } else {
632 error = VNOP_WRITE(vn->sc_vp, uio, ioflag, &context);
633 }
634 vnode_put(vn->sc_vp);
635 done:
636 (void) thread_funnel_set(kernel_flock, funnel_state);
637 return (error);
638 }
639
640 static int
641 shadow_read(struct vn_softc * vn, struct buf * bp, char * base, struct proc * p)
642 {
643 u_long blocksize = vn->sc_secsize;
644 struct vfs_context context;
645 int error = 0;
646 u_long offset;
647 boolean_t read_shadow;
648 u_long resid;
649 u_long start = 0;
650
651 context.vc_proc = p;
652 context.vc_ucred = vn->sc_cred;
653 offset = buf_blkno(bp);
654 resid = buf_resid(bp) / blocksize;
655 while (resid > 0) {
656 user_ssize_t temp_resid;
657 u_long this_offset;
658 u_long this_resid;
659 struct vnode * vp;
660
661 read_shadow = shadow_map_read(vn->sc_shadow_map,
662 offset, resid,
663 &this_offset, &this_resid);
664 if (read_shadow) {
665 vp = vn->sc_shadow_vp;
666 }
667 else {
668 vp = vn->sc_vp;
669 }
670 error = file_io(vp, &context, UIO_READ, base + start,
671 (off_t)this_offset * blocksize,
672 (user_ssize_t)this_resid * blocksize,
673 &temp_resid);
674 if (error) {
675 break;
676 }
677 this_resid -= (temp_resid / blocksize);
678 if (this_resid == 0) {
679 printf("vn device: shadow_read zero length read\n");
680 break;
681 }
682 resid -= this_resid;
683 offset += this_resid;
684 start += this_resid * blocksize;
685 }
686 buf_setresid(bp, resid * blocksize);
687 return (error);
688 }
689
690 static int
691 shadow_write(struct vn_softc * vn, struct buf * bp, char * base,
692 struct proc * p)
693 {
694 u_long blocksize = vn->sc_secsize;
695 struct vfs_context context;
696 int error = 0;
697 u_long offset;
698 boolean_t shadow_grew;
699 u_long resid;
700 u_long start = 0;
701
702 context.vc_proc = p;
703 context.vc_ucred = vn->sc_cred;
704 offset = buf_blkno(bp);
705 resid = buf_resid(bp) / blocksize;
706 while (resid > 0) {
707 user_ssize_t temp_resid;
708 u_long this_offset;
709 u_long this_resid;
710
711 shadow_grew = shadow_map_write(vn->sc_shadow_map,
712 offset, resid,
713 &this_offset, &this_resid);
714 if (shadow_grew) {
715 #if 0
716 off_t size;
717 /* truncate the file to its new length before write */
718 size = (off_t)shadow_map_shadow_size(vn->sc_shadow_map)
719 * blocksize;
720 vnode_setsize(vn->sc_shadow_vp, size, IO_SYNC,
721 &context);
722 #endif
723 }
724 error = file_io(vn->sc_shadow_vp, &context, UIO_WRITE,
725 base + start,
726 (off_t)this_offset * blocksize,
727 (user_ssize_t)this_resid * blocksize,
728 &temp_resid);
729 if (error) {
730 break;
731 }
732 this_resid -= (temp_resid / blocksize);
733 if (this_resid == 0) {
734 printf("vn device: shadow_write zero length write\n");
735 break;
736 }
737 resid -= this_resid;
738 offset += this_resid;
739 start += this_resid * blocksize;
740 }
741 buf_setresid(bp, resid * blocksize);
742 return (error);
743 }
744
745 static int
746 vn_readwrite_io(struct vn_softc * vn, struct buf * bp, struct proc * p)
747 {
748 int error = 0;
749 char * iov_base;
750 caddr_t vaddr;
751
752
753 if (buf_map(bp, &vaddr))
754 panic("vn device: buf_map failed");
755 iov_base = (char *)vaddr;
756
757 if (vn->sc_shadow_vp == NULL) {
758 struct vfs_context context;
759 user_ssize_t temp_resid;
760
761 context.vc_proc = p;
762 context.vc_ucred = vn->sc_cred;
763
764 error = file_io(vn->sc_vp, &context,
765 buf_flags(bp) & B_READ ? UIO_READ : UIO_WRITE,
766 iov_base,
767 (off_t)buf_blkno(bp) * vn->sc_secsize,
768 buf_resid(bp), &temp_resid);
769 buf_setresid(bp, temp_resid);
770 }
771 else {
772 if (buf_flags(bp) & B_READ)
773 error = shadow_read(vn, bp, iov_base, p);
774 else
775 error = shadow_write(vn, bp, iov_base, p);
776 }
777 buf_unmap(bp);
778
779 return (error);
780 }
781
782 static void
783 vnstrategy(struct buf *bp)
784 {
785 struct vn_softc *vn;
786 int error = 0;
787 long sz; /* in sc_secsize chunks */
788 daddr64_t blk_num;
789 boolean_t funnel_state;
790 struct proc * p = current_proc();
791 struct vnode * shadow_vp = NULL;
792 struct vnode * vp = NULL;
793
794 funnel_state = thread_funnel_set(kernel_flock, TRUE);
795 vn = vn_table + vnunit(buf_device(bp));
796 if ((vn->sc_flags & VNF_INITED) == 0) {
797 error = ENXIO;
798 goto done;
799 }
800
801 buf_setresid(bp, buf_count(bp));
802 /*
803 * Check for required alignment. Transfers must be a valid
804 * multiple of the sector size.
805 */
806 blk_num = buf_blkno(bp);
807 if (buf_count(bp) % vn->sc_secsize != 0) {
808 error = EINVAL;
809 goto done;
810 }
811 sz = howmany(buf_count(bp), vn->sc_secsize);
812
813 /*
814 * If out of bounds return an error. If at the EOF point,
815 * simply read or write less.
816 */
817 if (blk_num >= 0 && (u_int64_t)blk_num >= vn->sc_size) {
818 if (blk_num > 0 && (u_int64_t)blk_num > vn->sc_size) {
819 error = EINVAL;
820 }
821 goto done;
822 }
823 /*
824 * If the request crosses EOF, truncate the request.
825 */
826 if ((blk_num + sz) > 0 && ((u_int64_t)(blk_num + sz)) > vn->sc_size) {
827 buf_setcount(bp, (vn->sc_size - blk_num) * vn->sc_secsize);
828 buf_setresid(bp, buf_count(bp));
829 }
830 vp = vn->sc_vp;
831 if (vp == NULL) {
832 error = ENXIO;
833 goto done;
834 }
835 error = vnode_getwithvid(vp, vn->sc_vid);
836 if (error != 0) {
837 /* the vnode is no longer available, abort */
838 error = ENXIO;
839 vnclear(vn, p);
840 goto done;
841 }
842 shadow_vp = vn->sc_shadow_vp;
843 if (shadow_vp != NULL) {
844 error = vnode_getwithvid(shadow_vp,
845 vn->sc_shadow_vid);
846 if (error != 0) {
847 /* the vnode is no longer available, abort */
848 error = ENXIO;
849 vnode_put(vn->sc_vp);
850 vnclear(vn, p);
851 goto done;
852 }
853 }
854 error = vn_readwrite_io(vn, bp, p);
855 vnode_put(vp);
856 if (shadow_vp != NULL) {
857 vnode_put(shadow_vp);
858 }
859
860 done:
861 (void) thread_funnel_set(kernel_flock, funnel_state);
862 if (error) {
863 buf_seterror(bp, error);
864 }
865 buf_biodone(bp);
866 return;
867 }
868
869 /* ARGSUSED */
870 static int
871 vnioctl(dev_t dev, u_long cmd, caddr_t data,
872 __unused int flag, struct proc *p,
873 int is_char)
874 {
875 struct vn_softc *vn;
876 struct user_vn_ioctl *viop;
877 int error;
878 u_int32_t *f;
879 u_int64_t * o;
880 int unit;
881 struct vfsioattr ioattr;
882 struct user_vn_ioctl user_vnio;
883 boolean_t funnel_state;
884
885 unit = vnunit(dev);
886 if (vnunit(dev) >= NVNDEVICE) {
887 return (ENXIO);
888 }
889
890 funnel_state = thread_funnel_set(kernel_flock, TRUE);
891 vn = vn_table + unit;
892 error = proc_suser(p);
893 if (error) {
894 goto done;
895 }
896
897 viop = (struct user_vn_ioctl *)data;
898 f = (u_int32_t *)data;
899 o = (u_int64_t *)data;
900 switch (cmd) {
901 case VNIOCDETACH:
902 case VNIOCDETACH64:
903 case DKIOCGETBLOCKSIZE:
904 case DKIOCSETBLOCKSIZE:
905 case DKIOCGETMAXBLOCKCOUNTREAD:
906 case DKIOCGETMAXBLOCKCOUNTWRITE:
907 case DKIOCGETMAXSEGMENTCOUNTREAD:
908 case DKIOCGETMAXSEGMENTCOUNTWRITE:
909 case DKIOCGETMAXSEGMENTBYTECOUNTREAD:
910 case DKIOCGETMAXSEGMENTBYTECOUNTWRITE:
911 case DKIOCGETBLOCKCOUNT:
912 case DKIOCGETBLOCKCOUNT32:
913 if ((vn->sc_flags & VNF_INITED) == 0) {
914 error = ENXIO;
915 goto done;
916 }
917 break;
918 default:
919 break;
920 }
921
922 if (vn->sc_vp != NULL)
923 vfs_ioattr(vnode_mount(vn->sc_vp), &ioattr);
924 else
925 bzero(&ioattr, sizeof(ioattr));
926
927 switch (cmd) {
928 case DKIOCISVIRTUAL:
929 *f = 1;
930 break;
931 case DKIOCGETMAXBLOCKCOUNTREAD:
932 *o = ioattr.io_maxreadcnt / vn->sc_secsize;
933 break;
934 case DKIOCGETMAXBLOCKCOUNTWRITE:
935 *o = ioattr.io_maxwritecnt / vn->sc_secsize;
936 break;
937 case DKIOCGETMAXBYTECOUNTREAD:
938 *o = ioattr.io_maxreadcnt;
939 break;
940 case DKIOCGETMAXBYTECOUNTWRITE:
941 *o = ioattr.io_maxwritecnt;
942 break;
943 case DKIOCGETMAXSEGMENTCOUNTREAD:
944 *o = ioattr.io_segreadcnt;
945 break;
946 case DKIOCGETMAXSEGMENTCOUNTWRITE:
947 *o = ioattr.io_segwritecnt;
948 break;
949 case DKIOCGETMAXSEGMENTBYTECOUNTREAD:
950 *o = ioattr.io_maxsegreadsize;
951 break;
952 case DKIOCGETMAXSEGMENTBYTECOUNTWRITE:
953 *o = ioattr.io_maxsegwritesize;
954 break;
955 case DKIOCGETBLOCKSIZE:
956 *f = vn->sc_secsize;
957 break;
958 case DKIOCSETBLOCKSIZE:
959 if (is_char) {
960 /* can only set block size on block device */
961 error = ENODEV;
962 break;
963 }
964 if (*f < DEV_BSIZE) {
965 error = EINVAL;
966 break;
967 }
968 if (vn->sc_shadow_vp != NULL) {
969 if (*f == (unsigned)vn->sc_secsize) {
970 break;
971 }
972 /* can't change the block size if already shadowing */
973 error = EBUSY;
974 break;
975 }
976 vn->sc_secsize = *f;
977 /* recompute the size in terms of the new blocksize */
978 vn->sc_size = vn->sc_fsize / vn->sc_secsize;
979 break;
980 case DKIOCISWRITABLE:
981 *f = 1;
982 break;
983 case DKIOCGETBLOCKCOUNT32:
984 *f = vn->sc_size;
985 break;
986 case DKIOCGETBLOCKCOUNT:
987 *o = vn->sc_size;
988 break;
989 case VNIOCSHADOW:
990 case VNIOCSHADOW64:
991 if (vn->sc_shadow_vp != NULL) {
992 error = EBUSY;
993 break;
994 }
995 if (vn->sc_vp == NULL) {
996 /* much be attached before we can shadow */
997 error = EINVAL;
998 break;
999 }
1000 if (!proc_is64bit(p)) {
1001 /* downstream code expects LP64 version of vn_ioctl structure */
1002 vn_ioctl_to_64((struct vn_ioctl *)viop, &user_vnio);
1003 viop = &user_vnio;
1004 }
1005 if (viop->vn_file == USER_ADDR_NULL) {
1006 error = EINVAL;
1007 break;
1008 }
1009 error = vniocattach_shadow(vn, viop, dev, 0, p);
1010 break;
1011
1012 case VNIOCATTACH:
1013 case VNIOCATTACH64:
1014 if (is_char) {
1015 /* attach only on block device */
1016 error = ENODEV;
1017 break;
1018 }
1019 if (vn->sc_flags & VNF_INITED) {
1020 error = EBUSY;
1021 break;
1022 }
1023 if (!proc_is64bit(p)) {
1024 /* downstream code expects LP64 version of vn_ioctl structure */
1025 vn_ioctl_to_64((struct vn_ioctl *)viop, &user_vnio);
1026 viop = &user_vnio;
1027 }
1028 if (viop->vn_file == USER_ADDR_NULL) {
1029 error = EINVAL;
1030 break;
1031 }
1032 error = vniocattach_file(vn, viop, dev, 0, p);
1033 break;
1034
1035 case VNIOCDETACH:
1036 case VNIOCDETACH64:
1037 if (is_char) {
1038 /* detach only on block device */
1039 error = ENODEV;
1040 break;
1041 }
1042 /* Note: spec_open won't open a mounted block device */
1043
1044 /*
1045 * XXX handle i/o in progress. Return EBUSY, or wait, or
1046 * flush the i/o.
1047 * XXX handle multiple opens of the device. Return EBUSY,
1048 * or revoke the fd's.
1049 * How are these problems handled for removable and failing
1050 * hardware devices? (Hint: They are not)
1051 */
1052 vnclear(vn, p);
1053 break;
1054
1055 case VNIOCGSET:
1056 vn_options |= *f;
1057 *f = vn_options;
1058 break;
1059
1060 case VNIOCGCLEAR:
1061 vn_options &= ~(*f);
1062 *f = vn_options;
1063 break;
1064
1065 case VNIOCUSET:
1066 vn->sc_options |= *f;
1067 *f = vn->sc_options;
1068 break;
1069
1070 case VNIOCUCLEAR:
1071 vn->sc_options &= ~(*f);
1072 *f = vn->sc_options;
1073 break;
1074
1075 default:
1076 error = ENOTTY;
1077 break;
1078 }
1079 done:
1080 (void) thread_funnel_set(kernel_flock, funnel_state);
1081 return(error);
1082 }
1083
1084 static int
1085 vnioctl_chr(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
1086 {
1087 return (vnioctl(dev, cmd, data, flag, p, TRUE));
1088 }
1089
1090 static int
1091 vnioctl_blk(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
1092 {
1093 return (vnioctl(dev, cmd, data, flag, p, FALSE));
1094 }
1095
1096 /*
1097 * vniocattach_file:
1098 *
1099 * Attach a file to a VN partition. Return the size in the vn_size
1100 * field.
1101 */
1102
1103 static int
1104 vniocattach_file(struct vn_softc *vn,
1105 struct user_vn_ioctl *vniop,
1106 dev_t dev,
1107 int in_kernel,
1108 struct proc *p)
1109 {
1110 dev_t cdev;
1111 struct vfs_context context;
1112 kauth_cred_t cred;
1113 struct nameidata nd;
1114 off_t file_size;
1115 int error, flags;
1116
1117 context.vc_proc = p;
1118 context.vc_ucred = proc_ucred(p);
1119
1120 flags = FREAD|FWRITE;
1121 if (in_kernel) {
1122 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE32, vniop->vn_file, &context);
1123 }
1124 else {
1125 NDINIT(&nd, LOOKUP, FOLLOW,
1126 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1127 vniop->vn_file, &context);
1128 }
1129 /* vn_open gives both long- and short-term references */
1130 error = vn_open(&nd, flags, 0);
1131 if (error) {
1132 if (error != EACCES && error != EPERM && error != EROFS)
1133 return (error);
1134 flags &= ~FWRITE;
1135 if (in_kernel) {
1136 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE32,
1137 vniop->vn_file, &context);
1138 }
1139 else {
1140 NDINIT(&nd, LOOKUP, FOLLOW,
1141 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1142 vniop->vn_file, &context);
1143 }
1144 error = vn_open(&nd, flags, 0);
1145 if (error)
1146 return (error);
1147 }
1148 if (nd.ni_vp->v_type != VREG) {
1149 error = EINVAL;
1150 }
1151 else {
1152 error = vnode_size(nd.ni_vp, &file_size, &context);
1153 }
1154 if (error != 0) {
1155 (void) vn_close(nd.ni_vp, flags, proc_ucred(p), p);
1156 vnode_put(nd.ni_vp);
1157 return (error);
1158 }
1159 cred = kauth_cred_proc_ref(p);
1160 nd.ni_vp->v_flag |= VNOCACHE_DATA;
1161 error = setcred(nd.ni_vp, p, cred);
1162 if (error) {
1163 (void)vn_close(nd.ni_vp, flags, proc_ucred(p), p);
1164 vnode_put(nd.ni_vp);
1165 kauth_cred_rele(cred);
1166 return(error);
1167 }
1168 vn->sc_secsize = DEV_BSIZE;
1169 vn->sc_fsize = file_size;
1170 vn->sc_size = file_size / vn->sc_secsize;
1171 vn->sc_vp = nd.ni_vp;
1172 vn->sc_vid = vnode_vid(nd.ni_vp);
1173 vn->sc_open_flags = flags;
1174 vn->sc_cred = cred;
1175 cdev = makedev(vndevice_cdev_major, minor(dev));
1176 vn->sc_cdev = devfs_make_node(cdev, DEVFS_CHAR,
1177 UID_ROOT, GID_OPERATOR,
1178 0600, "rvn%d",
1179 minor(dev));
1180 vn->sc_flags |= VNF_INITED;
1181 if (flags == FREAD)
1182 vn->sc_flags |= VNF_READONLY;
1183 /* lose the short-term reference */
1184 vnode_put(nd.ni_vp);
1185 return(0);
1186 }
1187
1188 static int
1189 vniocattach_shadow(struct vn_softc *vn, struct user_vn_ioctl *vniop,
1190 __unused int dev, int in_kernel, struct proc *p)
1191 {
1192 struct vfs_context context;
1193 struct nameidata nd;
1194 int error, flags;
1195 shadow_map_t * map;
1196 off_t file_size;
1197
1198 context.vc_proc = p;
1199 context.vc_ucred = proc_ucred(p);
1200
1201 flags = FREAD|FWRITE;
1202 if (in_kernel) {
1203 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE32, vniop->vn_file, &context);
1204 }
1205 else {
1206 NDINIT(&nd, LOOKUP, FOLLOW,
1207 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1208 vniop->vn_file, &context);
1209 }
1210 /* vn_open gives both long- and short-term references */
1211 error = vn_open(&nd, flags, 0);
1212 if (error) {
1213 /* shadow MUST be writable! */
1214 return (error);
1215 }
1216 if (nd.ni_vp->v_type != VREG
1217 || (error = vnode_size(nd.ni_vp, &file_size, &context))) {
1218 (void)vn_close(nd.ni_vp, flags, proc_ucred(p), p);
1219 vnode_put(nd.ni_vp);
1220 return (error ? error : EINVAL);
1221 }
1222 map = shadow_map_create(vn->sc_fsize, file_size,
1223 0, vn->sc_secsize);
1224 if (map == NULL) {
1225 (void)vn_close(nd.ni_vp, flags, proc_ucred(p), p);
1226 vnode_put(nd.ni_vp);
1227 vn->sc_shadow_vp = NULL;
1228 return (ENOMEM);
1229 }
1230 vn->sc_shadow_vp = nd.ni_vp;
1231 vn->sc_shadow_vid = vnode_vid(nd.ni_vp);
1232 vn->sc_shadow_vp->v_flag |= VNOCACHE_DATA;
1233 vn->sc_shadow_map = map;
1234 vn->sc_flags &= ~VNF_READONLY; /* we're now read/write */
1235
1236 /* lose the short-term reference */
1237 vnode_put(nd.ni_vp);
1238 return(0);
1239 }
1240
1241 int
1242 vndevice_root_image(char * path, char devname[], dev_t * dev_p)
1243 {
1244 int error = 0;
1245 struct vn_softc * vn;
1246 struct user_vn_ioctl vnio;
1247
1248 vnio.vn_file = CAST_USER_ADDR_T(path);
1249 vnio.vn_size = 0;
1250
1251 vn = vn_table + ROOT_IMAGE_UNIT;
1252 *dev_p = makedev(vndevice_bdev_major,
1253 ROOT_IMAGE_UNIT);
1254 sprintf(devname, "vn%d", ROOT_IMAGE_UNIT);
1255 error = vniocattach_file(vn, &vnio, *dev_p, 1, current_proc());
1256 return (error);
1257 }
1258
1259 /*
1260 * Duplicate the current processes' credentials. Since we are called only
1261 * as the result of a SET ioctl and only root can do that, any future access
1262 * to this "disk" is essentially as root. Note that credentials may change
1263 * if some other uid can write directly to the mapped file (NFS).
1264 */
1265 static int
1266 setcred(struct vnode * vp, struct proc * p, kauth_cred_t cred)
1267 {
1268 char *tmpbuf;
1269 int error = 0;
1270 struct vfs_context context;
1271
1272 /*
1273 * Horrible kludge to establish credentials for NFS XXX.
1274 */
1275 context.vc_proc = p;
1276 context.vc_ucred = cred;
1277 tmpbuf = _MALLOC(DEV_BSIZE, M_TEMP, M_WAITOK);
1278 error = file_io(vp, &context, UIO_READ, tmpbuf, 0, DEV_BSIZE, NULL);
1279 FREE(tmpbuf, M_TEMP);
1280 return (error);
1281 }
1282
1283 void
1284 vnclear(struct vn_softc *vn, struct proc * p)
1285 {
1286 if (vn->sc_vp != NULL) {
1287 /* release long-term reference */
1288 (void)vn_close(vn->sc_vp, vn->sc_open_flags, vn->sc_cred, p);
1289 vn->sc_vp = NULL;
1290 }
1291 if (vn->sc_shadow_vp != NULL) {
1292 /* release long-term reference */
1293 (void)vn_close(vn->sc_shadow_vp, FREAD | FWRITE,
1294 vn->sc_cred, p);
1295 vn->sc_shadow_vp = NULL;
1296 }
1297 if (vn->sc_shadow_map != NULL) {
1298 shadow_map_free(vn->sc_shadow_map);
1299 vn->sc_shadow_map = NULL;
1300 }
1301 vn->sc_flags &= ~(VNF_INITED | VNF_READONLY);
1302 if (vn->sc_cred) {
1303 kauth_cred_rele(vn->sc_cred);
1304 vn->sc_cred = NULL;
1305 }
1306 vn->sc_size = 0;
1307 vn->sc_fsize = 0;
1308 if (vn->sc_cdev) {
1309 devfs_remove(vn->sc_cdev);
1310 vn->sc_cdev = NULL;
1311 }
1312 }
1313
1314 static int
1315 vnsize(dev_t dev)
1316 {
1317 int secsize;
1318 struct vn_softc *vn;
1319 int unit;
1320 boolean_t funnel_state;
1321
1322 unit = vnunit(dev);
1323 if (vnunit(dev) >= NVNDEVICE) {
1324 return (-1);
1325 }
1326
1327 funnel_state = thread_funnel_set(kernel_flock, TRUE);
1328 vn = vn_table + unit;
1329 if ((vn->sc_flags & VNF_INITED) == 0)
1330 secsize = -1;
1331 else
1332 secsize = vn->sc_secsize;
1333 (void) thread_funnel_set(kernel_flock, funnel_state);
1334 return (secsize);
1335 }
1336
1337 #define CDEV_MAJOR -1
1338 #define BDEV_MAJOR -1
1339 static int vndevice_inited = 0;
1340
1341 void
1342 vndevice_init(void)
1343 {
1344 int i;
1345
1346 if (vndevice_inited)
1347 return;
1348 vndevice_bdev_major = bdevsw_add(BDEV_MAJOR, &vn_bdevsw);
1349
1350 if (vndevice_bdev_major < 0) {
1351 printf("vndevice_init: bdevsw_add() returned %d\n",
1352 vndevice_bdev_major);
1353 return;
1354 }
1355 vndevice_cdev_major = cdevsw_add_with_bdev(CDEV_MAJOR, &vn_cdevsw,
1356 vndevice_bdev_major);
1357 if (vndevice_cdev_major < 0) {
1358 printf("vndevice_init: cdevsw_add() returned %d\n",
1359 vndevice_cdev_major);
1360 return;
1361 }
1362 for (i = 0; i < NVNDEVICE; i++) {
1363 dev_t dev = makedev(vndevice_bdev_major, i);
1364 vn_table[i].sc_bdev = devfs_make_node(dev, DEVFS_BLOCK,
1365 UID_ROOT, GID_OPERATOR,
1366 0600, "vn%d",
1367 i);
1368 if (vn_table[i].sc_bdev == NULL)
1369 printf("vninit: devfs_make_node failed!\n");
1370 }
1371 }
1372
1373 static void
1374 vn_ioctl_to_64(struct vn_ioctl *from, struct user_vn_ioctl *to)
1375 {
1376 to->vn_file = CAST_USER_ADDR_T(from->vn_file);
1377 to->vn_size = from->vn_size;
1378 to->vn_control = from->vn_control;
1379 }
1380
1381 #endif /* NVNDEVICE */