]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/vn/vn.c
xnu-2782.10.72.tar.gz
[apple/xnu.git] / bsd / dev / vn / vn.c
1 /*
2 * Copyright (c) 2000-2010 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * Copyright (c) 1988 University of Utah.
31 * Copyright (c) 1990, 1993
32 * The Regents of the University of California. All rights reserved.
33 *
34 * This code is derived from software contributed to Berkeley by
35 * the Systems Programming Group of the University of Utah Computer
36 * Science Department.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * from: Utah Hdr: vn.c 1.13 94/04/02
67 *
68 * from: @(#)vn.c 8.6 (Berkeley) 4/1/94
69 * $FreeBSD: src/sys/dev/vn/vn.c,v 1.105.2.4 2001/11/18 07:11:00 dillon Exp $
70 */
71
72 /*
73 * Vnode disk driver.
74 *
75 * Block/character interface to a vnode. Allows one to treat a file
76 * as a disk (e.g. build a filesystem in it, mount it, etc.).
77 *
78 * NOTE 1: This uses the vnop_blockmap/vnop_strategy interface to the vnode
79 * instead of a simple VOP_RDWR. We do this to avoid distorting the
80 * local buffer cache.
81 *
82 * NOTE 2: There is a security issue involved with this driver.
83 * Once mounted all access to the contents of the "mapped" file via
84 * the special file is controlled by the permissions on the special
85 * file, the protection of the mapped file is ignored (effectively,
86 * by using root credentials in all transactions).
87 *
88 * NOTE 3: Doesn't interact with leases, should it?
89 */
90
91 #include "vndevice.h"
92
93 #if NVNDEVICE > 0
94
95 #include <sys/param.h>
96 #include <sys/systm.h>
97 #include <sys/kernel.h>
98 #include <sys/mount.h>
99 #include <sys/namei.h>
100 #include <sys/proc.h>
101 #include <sys/kauth.h>
102 #include <sys/buf.h>
103 #include <sys/malloc.h>
104 #include <sys/vnode_internal.h>
105 #include <sys/fcntl.h>
106 #include <sys/conf.h>
107 #include <sys/disk.h>
108 #include <sys/stat.h>
109 #include <sys/conf.h>
110 #include <sys/uio_internal.h>
111
112 #include <sys/vnioctl.h>
113
114 #include <sys/vm.h>
115
116 #include <vm/vm_pager.h>
117 #include <mach/memory_object_types.h>
118
119 #include <miscfs/devfs/devfs.h>
120
121
122 #include "shadow.h"
123 static void
124 vndevice_do_init(void);
125
126 static ioctl_fcn_t vnioctl_chr;
127 static ioctl_fcn_t vnioctl_blk;
128 static open_close_fcn_t vnopen;
129 static open_close_fcn_t vnclose;
130 static psize_fcn_t vnsize;
131 static strategy_fcn_t vnstrategy;
132 static read_write_fcn_t vnread;
133 static read_write_fcn_t vnwrite;
134
135 static int vndevice_bdev_major;
136 static int vndevice_cdev_major;
137
138 /*
139 * cdevsw
140 * D_DISK we want to look like a disk
141 * D_CANFREE We support B_FREEBUF
142 */
143
144 static struct bdevsw vn_bdevsw = {
145 /* open */ vnopen,
146 /* close */ vnclose,
147 /* strategy */ vnstrategy,
148 /* ioctl */ vnioctl_blk,
149 /* dump */ eno_dump,
150 /* psize */ vnsize,
151 /* flags */ D_DISK,
152 };
153
154 static struct cdevsw vn_cdevsw = {
155 /* open */ vnopen,
156 /* close */ vnclose,
157 /* read */ vnread,
158 /* write */ vnwrite,
159 /* ioctl */ vnioctl_chr,
160 /* stop */ eno_stop,
161 /* reset */ eno_reset,
162 /* ttys */ NULL,
163 /* select */ eno_select,
164 /* mmap */ eno_mmap,
165 /* strategy */ eno_strat,
166 /* getc */ eno_getc,
167 /* putc */ eno_putc,
168 /* flags */ D_DISK,
169 };
170
171 struct vn_softc {
172 u_int64_t sc_fsize; /* file size in bytes */
173 u_int64_t sc_size; /* size of vn, sc_secsize scale */
174 int sc_flags; /* flags */
175 u_int32_t sc_secsize; /* sector size */
176 struct vnode *sc_vp; /* vnode if not NULL */
177 uint32_t sc_vid;
178 int sc_open_flags;
179 struct vnode *sc_shadow_vp; /* shadow vnode if not NULL */
180 uint32_t sc_shadow_vid;
181 shadow_map_t * sc_shadow_map; /* shadow map if not NULL */
182 kauth_cred_t sc_cred; /* credentials */
183 u_int32_t sc_options; /* options */
184 void * sc_bdev;
185 void * sc_cdev;
186 } vn_table[NVNDEVICE];
187
188 #define ROOT_IMAGE_UNIT 0
189
190 /* sc_flags */
191 #define VNF_INITED 0x01
192 #define VNF_READONLY 0x02
193
194 static u_int32_t vn_options;
195
196 #define IFOPT(vn,opt) if (((vn)->sc_options|vn_options) & (opt))
197 #define TESTOPT(vn,opt) (((vn)->sc_options|vn_options) & (opt))
198
199 static int setcred(struct vnode * vp, kauth_cred_t cred);
200 static void vnclear (struct vn_softc *vn, vfs_context_t ctx);
201 static void vn_ioctl_to_64(struct vn_ioctl_32 *from, struct vn_ioctl_64 *to);
202 void vndevice_init(void);
203 int vndevice_root_image(char * path, char devname[], dev_t * dev_p);
204
205 static int
206 vniocattach_file(struct vn_softc *vn,
207 struct vn_ioctl_64 *vniop,
208 dev_t dev,
209 int in_kernel,
210 proc_t p);
211 static int
212 vniocattach_shadow(struct vn_softc * vn,
213 struct vn_ioctl_64 *vniop,
214 dev_t dev,
215 int in_kernel,
216 proc_t p);
217 static __inline__ int
218 vnunit(dev_t dev)
219 {
220 return (minor(dev));
221 }
222
223 static int
224 vnclose(__unused dev_t dev, __unused int flags,
225 __unused int devtype, __unused proc_t p)
226 {
227 return (0);
228 }
229
230 static int
231 vnopen(dev_t dev, int flags, __unused int devtype, __unused proc_t p)
232 {
233 struct vn_softc *vn;
234 int unit;
235
236 unit = vnunit(dev);
237 if (vnunit(dev) >= NVNDEVICE) {
238 return (ENXIO);
239 }
240 vn = vn_table + unit;
241 if ((flags & FWRITE) && (vn->sc_flags & VNF_READONLY))
242 return (EACCES);
243
244 return(0);
245 }
246
247 static int
248 file_io(struct vnode * vp, vfs_context_t ctx,
249 enum uio_rw op, char * base, off_t offset, user_ssize_t count,
250 user_ssize_t * resid)
251 {
252 uio_t auio;
253 int error;
254 char uio_buf[UIO_SIZEOF(1)];
255
256 auio = uio_createwithbuffer(1, offset, UIO_SYSSPACE, op,
257 &uio_buf[0], sizeof(uio_buf));
258 uio_addiov(auio, CAST_USER_ADDR_T(base), count);
259 if (op == UIO_READ)
260 error = VNOP_READ(vp, auio, IO_SYNC, ctx);
261 else
262 error = VNOP_WRITE(vp, auio, IO_SYNC, ctx);
263
264 if (resid != NULL) {
265 *resid = uio_resid(auio);
266 }
267 return (error);
268 }
269
270 static __inline__ off_t
271 block_round(off_t o, int blocksize)
272 {
273 return ((o + blocksize - 1) / blocksize);
274 }
275
276 static __inline__ off_t
277 block_truncate(off_t o, int blocksize)
278 {
279 return (o / blocksize);
280 }
281
282 static __inline__ int
283 block_remainder(off_t o, int blocksize)
284 {
285 return (o % blocksize);
286 }
287
288 static int
289 vnread_shadow(struct vn_softc * vn, struct uio *uio, int ioflag,
290 vfs_context_t ctx)
291 {
292 u_int32_t blocksize = vn->sc_secsize;
293 int error = 0;
294 off_t offset;
295 user_ssize_t resid;
296 off_t orig_offset;
297 user_ssize_t orig_resid;
298
299 orig_resid = resid = uio_resid(uio);
300 orig_offset = offset = uio_offset(uio);
301
302 while (resid > 0) {
303 u_int32_t remainder;
304 u_int32_t this_block_number;
305 u_int32_t this_block_count;
306 off_t this_offset;
307 user_ssize_t this_resid;
308 struct vnode * vp;
309
310 /* figure out which blocks to read */
311 remainder = block_remainder(offset, blocksize);
312 if (shadow_map_read(vn->sc_shadow_map,
313 block_truncate(offset, blocksize),
314 block_round(resid + remainder, blocksize),
315 &this_block_number, &this_block_count)) {
316 vp = vn->sc_shadow_vp;
317 }
318 else {
319 vp = vn->sc_vp;
320 }
321
322 /* read the blocks (or parts thereof) */
323 this_offset = (off_t)this_block_number * blocksize + remainder;
324 uio_setoffset(uio, this_offset);
325 this_resid = this_block_count * blocksize - remainder;
326 if (this_resid > resid) {
327 this_resid = resid;
328 }
329 uio_setresid(uio, this_resid);
330 error = VNOP_READ(vp, uio, ioflag, ctx);
331 if (error) {
332 break;
333 }
334
335 /* figure out how much we actually read */
336 this_resid -= uio_resid(uio);
337 if (this_resid == 0) {
338 printf("vn device: vnread_shadow zero length read\n");
339 break;
340 }
341 resid -= this_resid;
342 offset += this_resid;
343 }
344 uio_setresid(uio, resid);
345 uio_setoffset(uio, offset);
346 return (error);
347 }
348
349 static int
350 vncopy_block_to_shadow(struct vn_softc * vn, vfs_context_t ctx,
351 u_int32_t file_block, u_int32_t shadow_block)
352 {
353 int error;
354 char * tmpbuf;
355
356 tmpbuf = _MALLOC(vn->sc_secsize, M_TEMP, M_WAITOK);
357 if (tmpbuf == NULL) {
358 return (ENOMEM);
359 }
360 /* read one block from file at file_block offset */
361 error = file_io(vn->sc_vp, ctx, UIO_READ,
362 tmpbuf, (off_t)file_block * vn->sc_secsize,
363 vn->sc_secsize, NULL);
364 if (error) {
365 goto done;
366 }
367 /* write one block to shadow file at shadow_block offset */
368 error = file_io(vn->sc_shadow_vp, ctx, UIO_WRITE,
369 tmpbuf, (off_t)shadow_block * vn->sc_secsize,
370 vn->sc_secsize, NULL);
371 done:
372 FREE(tmpbuf, M_TEMP);
373 return (error);
374 }
375
376 enum {
377 FLAGS_FIRST_BLOCK_PARTIAL = 0x1,
378 FLAGS_LAST_BLOCK_PARTIAL = 0x2
379 };
380
381 static int
382 vnwrite_shadow(struct vn_softc * vn, struct uio *uio, int ioflag,
383 vfs_context_t ctx)
384 {
385 u_int32_t blocksize = vn->sc_secsize;
386 int error = 0;
387 user_ssize_t resid;
388 off_t offset;
389
390 resid = uio_resid(uio);
391 offset = uio_offset(uio);
392
393 while (resid > 0) {
394 int flags = 0;
395 u_int32_t offset_block_number;
396 u_int32_t remainder;
397 u_int32_t resid_block_count;
398 u_int32_t shadow_block_count;
399 u_int32_t shadow_block_number;
400 user_ssize_t this_resid;
401
402 /* figure out which blocks to write */
403 offset_block_number = block_truncate(offset, blocksize);
404 remainder = block_remainder(offset, blocksize);
405 resid_block_count = block_round(resid + remainder, blocksize);
406 /* figure out if the first or last blocks are partial writes */
407 if (remainder > 0
408 && !shadow_map_is_written(vn->sc_shadow_map,
409 offset_block_number)) {
410 /* the first block is a partial write */
411 flags |= FLAGS_FIRST_BLOCK_PARTIAL;
412 }
413 if (resid_block_count > 1
414 && !shadow_map_is_written(vn->sc_shadow_map,
415 offset_block_number
416 + resid_block_count - 1)
417 && block_remainder(offset + resid, blocksize) > 0) {
418 /* the last block is a partial write */
419 flags |= FLAGS_LAST_BLOCK_PARTIAL;
420 }
421 if (shadow_map_write(vn->sc_shadow_map,
422 offset_block_number, resid_block_count,
423 &shadow_block_number,
424 &shadow_block_count)) {
425 /* shadow file is growing */
426 #if 0
427 /* truncate the file to its new length before write */
428 off_t size;
429 size = (off_t)shadow_map_shadow_size(vn->sc_shadow_map)
430 * vn->sc_secsize;
431 vnode_setsize(vn->sc_shadow_vp, size, IO_SYNC, ctx);
432 #endif
433 }
434 /* write the blocks (or parts thereof) */
435 uio_setoffset(uio, (off_t)
436 shadow_block_number * blocksize + remainder);
437 this_resid = (off_t)shadow_block_count * blocksize - remainder;
438 if (this_resid >= resid) {
439 this_resid = resid;
440 if ((flags & FLAGS_LAST_BLOCK_PARTIAL) != 0) {
441 /* copy the last block to the shadow */
442 u_int32_t d;
443 u_int32_t s;
444
445 s = offset_block_number
446 + resid_block_count - 1;
447 d = shadow_block_number
448 + shadow_block_count - 1;
449 error = vncopy_block_to_shadow(vn, ctx, s, d);
450 if (error) {
451 printf("vnwrite_shadow: failed to copy"
452 " block %u to shadow block %u\n",
453 s, d);
454 break;
455 }
456 }
457 }
458 uio_setresid(uio, this_resid);
459 if ((flags & FLAGS_FIRST_BLOCK_PARTIAL) != 0) {
460 /* copy the first block to the shadow */
461 error = vncopy_block_to_shadow(vn, ctx,
462 offset_block_number,
463 shadow_block_number);
464 if (error) {
465 printf("vnwrite_shadow: failed to"
466 " copy block %u to shadow block %u\n",
467 offset_block_number,
468 shadow_block_number);
469 break;
470 }
471 }
472 error = VNOP_WRITE(vn->sc_shadow_vp, uio, ioflag, ctx);
473 if (error) {
474 break;
475 }
476 /* figure out how much we actually wrote */
477 this_resid -= uio_resid(uio);
478 if (this_resid == 0) {
479 printf("vn device: vnwrite_shadow zero length write\n");
480 break;
481 }
482 resid -= this_resid;
483 offset += this_resid;
484 }
485 uio_setresid(uio, resid);
486 uio_setoffset(uio, offset);
487 return (error);
488 }
489
490 static int
491 vnread(dev_t dev, struct uio *uio, int ioflag)
492 {
493 struct vfs_context context;
494 int error = 0;
495 off_t offset;
496 proc_t p;
497 user_ssize_t resid;
498 struct vn_softc * vn;
499 int unit;
500
501 unit = vnunit(dev);
502 if (vnunit(dev) >= NVNDEVICE) {
503 return (ENXIO);
504 }
505 p = current_proc();
506 vn = vn_table + unit;
507 if ((vn->sc_flags & VNF_INITED) == 0) {
508 error = ENXIO;
509 goto done;
510 }
511
512 context.vc_thread = current_thread();
513 context.vc_ucred = vn->sc_cred;
514
515 error = vnode_getwithvid(vn->sc_vp, vn->sc_vid);
516 if (error != 0) {
517 /* the vnode is no longer available, abort */
518 error = ENXIO;
519 vnclear(vn, &context);
520 goto done;
521 }
522
523 resid = uio_resid(uio);
524 offset = uio_offset(uio);
525
526 /*
527 * If out of bounds return an error. If at the EOF point,
528 * simply read less.
529 */
530 if (offset >= (off_t)vn->sc_fsize) {
531 if (offset > (off_t)vn->sc_fsize) {
532 error = EINVAL;
533 }
534 goto done;
535 }
536 /*
537 * If the request crosses EOF, truncate the request.
538 */
539 if ((offset + resid) > (off_t)vn->sc_fsize) {
540 resid = vn->sc_fsize - offset;
541 uio_setresid(uio, resid);
542 }
543
544 if (vn->sc_shadow_vp != NULL) {
545 error = vnode_getwithvid(vn->sc_shadow_vp,
546 vn->sc_shadow_vid);
547 if (error != 0) {
548 /* the vnode is no longer available, abort */
549 error = ENXIO;
550 vnode_put(vn->sc_vp);
551 vnclear(vn, &context);
552 goto done;
553 }
554 error = vnread_shadow(vn, uio, ioflag, &context);
555 vnode_put(vn->sc_shadow_vp);
556 } else {
557 error = VNOP_READ(vn->sc_vp, uio, ioflag, &context);
558 }
559 vnode_put(vn->sc_vp);
560 done:
561 return (error);
562 }
563
564 static int
565 vnwrite(dev_t dev, struct uio *uio, int ioflag)
566 {
567 struct vfs_context context;
568 int error;
569 off_t offset;
570 proc_t p;
571 user_ssize_t resid;
572 struct vn_softc * vn;
573 int unit;
574
575 unit = vnunit(dev);
576 if (vnunit(dev) >= NVNDEVICE) {
577 return (ENXIO);
578 }
579 p = current_proc();
580 vn = vn_table + unit;
581 if ((vn->sc_flags & VNF_INITED) == 0) {
582 error = ENXIO;
583 goto done;
584 }
585 if (vn->sc_flags & VNF_READONLY) {
586 error = EROFS;
587 goto done;
588 }
589
590 context.vc_thread = current_thread();
591 context.vc_ucred = vn->sc_cred;
592
593 error = vnode_getwithvid(vn->sc_vp, vn->sc_vid);
594 if (error != 0) {
595 /* the vnode is no longer available, abort */
596 error = ENXIO;
597 vnclear(vn, &context);
598 goto done;
599 }
600 resid = uio_resid(uio);
601 offset = uio_offset(uio);
602
603 /*
604 * If out of bounds return an error. If at the EOF point,
605 * simply write less.
606 */
607 if (offset >= (off_t)vn->sc_fsize) {
608 if (offset > (off_t)vn->sc_fsize) {
609 error = EINVAL;
610 }
611 goto done;
612 }
613 /*
614 * If the request crosses EOF, truncate the request.
615 */
616 if ((offset + resid) > (off_t)vn->sc_fsize) {
617 resid = (off_t)vn->sc_fsize - offset;
618 uio_setresid(uio, resid);
619 }
620
621 if (vn->sc_shadow_vp != NULL) {
622 error = vnode_getwithvid(vn->sc_shadow_vp,
623 vn->sc_shadow_vid);
624 if (error != 0) {
625 /* the vnode is no longer available, abort */
626 error = ENXIO;
627 vnode_put(vn->sc_vp);
628 vnclear(vn, &context);
629 goto done;
630 }
631 error = vnwrite_shadow(vn, uio, ioflag, &context);
632 vnode_put(vn->sc_shadow_vp);
633 } else {
634 error = VNOP_WRITE(vn->sc_vp, uio, ioflag, &context);
635 }
636 vnode_put(vn->sc_vp);
637 done:
638 return (error);
639 }
640
641 static int
642 shadow_read(struct vn_softc * vn, struct buf * bp, char * base,
643 vfs_context_t ctx)
644 {
645 u_int32_t blocksize = vn->sc_secsize;
646 int error = 0;
647 u_int32_t offset;
648 boolean_t read_shadow;
649 u_int32_t resid;
650 u_int32_t start = 0;
651
652 offset = buf_blkno(bp);
653 resid = buf_resid(bp) / blocksize;
654 while (resid > 0) {
655 user_ssize_t temp_resid;
656 u_int32_t this_offset;
657 u_int32_t this_resid;
658 struct vnode * vp;
659
660 read_shadow = shadow_map_read(vn->sc_shadow_map,
661 offset, resid,
662 &this_offset, &this_resid);
663 if (read_shadow) {
664 vp = vn->sc_shadow_vp;
665 }
666 else {
667 vp = vn->sc_vp;
668 }
669 error = file_io(vp, ctx, UIO_READ, base + start,
670 (off_t)this_offset * blocksize,
671 (user_ssize_t)this_resid * blocksize,
672 &temp_resid);
673 if (error) {
674 break;
675 }
676 this_resid -= (temp_resid / blocksize);
677 if (this_resid == 0) {
678 printf("vn device: shadow_read zero length read\n");
679 break;
680 }
681 resid -= this_resid;
682 offset += this_resid;
683 start += this_resid * blocksize;
684 }
685 buf_setresid(bp, resid * blocksize);
686 return (error);
687 }
688
689 static int
690 shadow_write(struct vn_softc * vn, struct buf * bp, char * base,
691 vfs_context_t ctx)
692 {
693 u_int32_t blocksize = vn->sc_secsize;
694 int error = 0;
695 u_int32_t offset;
696 boolean_t shadow_grew;
697 u_int32_t resid;
698 u_int32_t start = 0;
699
700 offset = buf_blkno(bp);
701 resid = buf_resid(bp) / blocksize;
702 while (resid > 0) {
703 user_ssize_t temp_resid;
704 u_int32_t this_offset;
705 u_int32_t this_resid;
706
707 shadow_grew = shadow_map_write(vn->sc_shadow_map,
708 offset, resid,
709 &this_offset, &this_resid);
710 if (shadow_grew) {
711 #if 0
712 off_t size;
713 /* truncate the file to its new length before write */
714 size = (off_t)shadow_map_shadow_size(vn->sc_shadow_map)
715 * blocksize;
716 vnode_setsize(vn->sc_shadow_vp, size, IO_SYNC, ctx);
717 #endif
718 }
719 error = file_io(vn->sc_shadow_vp, ctx, UIO_WRITE,
720 base + start,
721 (off_t)this_offset * blocksize,
722 (user_ssize_t)this_resid * blocksize,
723 &temp_resid);
724 if (error) {
725 break;
726 }
727 this_resid -= (temp_resid / blocksize);
728 if (this_resid == 0) {
729 printf("vn device: shadow_write zero length write\n");
730 break;
731 }
732 resid -= this_resid;
733 offset += this_resid;
734 start += this_resid * blocksize;
735 }
736 buf_setresid(bp, resid * blocksize);
737 return (error);
738 }
739
740 static int
741 vn_readwrite_io(struct vn_softc * vn, struct buf * bp, vfs_context_t ctx)
742 {
743 int error = 0;
744 char * iov_base;
745 caddr_t vaddr;
746
747 if (buf_map(bp, &vaddr))
748 panic("vn device: buf_map failed");
749 iov_base = (char *)vaddr;
750
751 if (vn->sc_shadow_vp == NULL) {
752 user_ssize_t temp_resid;
753
754 error = file_io(vn->sc_vp, ctx,
755 buf_flags(bp) & B_READ ? UIO_READ : UIO_WRITE,
756 iov_base,
757 (off_t)buf_blkno(bp) * vn->sc_secsize,
758 buf_resid(bp), &temp_resid);
759 buf_setresid(bp, temp_resid);
760 }
761 else {
762 if (buf_flags(bp) & B_READ)
763 error = shadow_read(vn, bp, iov_base, ctx);
764 else
765 error = shadow_write(vn, bp, iov_base, ctx);
766 }
767 buf_unmap(bp);
768
769 return (error);
770 }
771
772 static void
773 vnstrategy(struct buf *bp)
774 {
775 struct vn_softc *vn;
776 int error = 0;
777 long sz; /* in sc_secsize chunks */
778 daddr64_t blk_num;
779 struct vnode * shadow_vp = NULL;
780 struct vnode * vp = NULL;
781 struct vfs_context context;
782
783 vn = vn_table + vnunit(buf_device(bp));
784 if ((vn->sc_flags & VNF_INITED) == 0) {
785 error = ENXIO;
786 goto done;
787 }
788
789 context.vc_thread = current_thread();
790 context.vc_ucred = vn->sc_cred;
791
792 buf_setresid(bp, buf_count(bp));
793 /*
794 * Check for required alignment. Transfers must be a valid
795 * multiple of the sector size.
796 */
797 blk_num = buf_blkno(bp);
798 if (buf_count(bp) % vn->sc_secsize != 0) {
799 error = EINVAL;
800 goto done;
801 }
802 sz = howmany(buf_count(bp), vn->sc_secsize);
803
804 /*
805 * If out of bounds return an error. If at the EOF point,
806 * simply read or write less.
807 */
808 if (blk_num >= 0 && (u_int64_t)blk_num >= vn->sc_size) {
809 if (blk_num > 0 && (u_int64_t)blk_num > vn->sc_size) {
810 error = EINVAL;
811 }
812 goto done;
813 }
814 /*
815 * If the request crosses EOF, truncate the request.
816 */
817 if ((blk_num + sz) > 0 && ((u_int64_t)(blk_num + sz)) > vn->sc_size) {
818 buf_setcount(bp, (vn->sc_size - blk_num) * vn->sc_secsize);
819 buf_setresid(bp, buf_count(bp));
820 }
821 vp = vn->sc_vp;
822 if (vp == NULL) {
823 error = ENXIO;
824 goto done;
825 }
826
827 error = vnode_getwithvid(vp, vn->sc_vid);
828 if (error != 0) {
829 /* the vnode is no longer available, abort */
830 error = ENXIO;
831 vnclear(vn, &context);
832 goto done;
833 }
834 shadow_vp = vn->sc_shadow_vp;
835 if (shadow_vp != NULL) {
836 error = vnode_getwithvid(shadow_vp,
837 vn->sc_shadow_vid);
838 if (error != 0) {
839 /* the vnode is no longer available, abort */
840 error = ENXIO;
841 vnode_put(vn->sc_vp);
842 vnclear(vn, &context);
843 goto done;
844 }
845 }
846
847 error = vn_readwrite_io(vn, bp, &context);
848 vnode_put(vp);
849 if (shadow_vp != NULL) {
850 vnode_put(shadow_vp);
851 }
852
853 done:
854 if (error) {
855 buf_seterror(bp, error);
856 }
857 buf_biodone(bp);
858 return;
859 }
860
861 /* ARGSUSED */
862 static int
863 vnioctl(dev_t dev, u_long cmd, caddr_t data,
864 __unused int flag, proc_t p,
865 int is_char)
866 {
867 struct vn_softc *vn;
868 struct vn_ioctl_64 *viop;
869 int error;
870 u_int32_t *f;
871 u_int64_t * o;
872 int unit;
873 struct vfsioattr ioattr;
874 struct vn_ioctl_64 user_vnio;
875 struct vfs_context context;
876
877 unit = vnunit(dev);
878 if (vnunit(dev) >= NVNDEVICE) {
879 return (ENXIO);
880 }
881
882 vn = vn_table + unit;
883 error = proc_suser(p);
884 if (error) {
885 goto done;
886 }
887
888 context.vc_thread = current_thread();
889 context.vc_ucred = vn->sc_cred;
890
891 viop = (struct vn_ioctl_64 *)data;
892 f = (u_int32_t *)data;
893 o = (u_int64_t *)data;
894 switch (cmd) {
895 #ifdef __LP64__
896 case VNIOCDETACH32:
897 case VNIOCDETACH:
898 #else
899 case VNIOCDETACH:
900 case VNIOCDETACH64:
901 #endif
902 case DKIOCGETBLOCKSIZE:
903 case DKIOCSETBLOCKSIZE:
904 case DKIOCGETMAXBLOCKCOUNTREAD:
905 case DKIOCGETMAXBLOCKCOUNTWRITE:
906 case DKIOCGETMAXSEGMENTCOUNTREAD:
907 case DKIOCGETMAXSEGMENTCOUNTWRITE:
908 case DKIOCGETMAXSEGMENTBYTECOUNTREAD:
909 case DKIOCGETMAXSEGMENTBYTECOUNTWRITE:
910 case DKIOCGETBLOCKCOUNT:
911 case DKIOCGETBLOCKCOUNT32:
912 if ((vn->sc_flags & VNF_INITED) == 0) {
913 error = ENXIO;
914 goto done;
915 }
916 break;
917 default:
918 break;
919 }
920
921 if (vn->sc_vp != NULL)
922 vfs_ioattr(vnode_mount(vn->sc_vp), &ioattr);
923 else
924 bzero(&ioattr, sizeof(ioattr));
925
926 switch (cmd) {
927 case DKIOCISVIRTUAL:
928 *f = 1;
929 break;
930 case DKIOCGETMAXBLOCKCOUNTREAD:
931 *o = ioattr.io_maxreadcnt / vn->sc_secsize;
932 break;
933 case DKIOCGETMAXBLOCKCOUNTWRITE:
934 *o = ioattr.io_maxwritecnt / vn->sc_secsize;
935 break;
936 case DKIOCGETMAXBYTECOUNTREAD:
937 *o = ioattr.io_maxreadcnt;
938 break;
939 case DKIOCGETMAXBYTECOUNTWRITE:
940 *o = ioattr.io_maxwritecnt;
941 break;
942 case DKIOCGETMAXSEGMENTCOUNTREAD:
943 *o = ioattr.io_segreadcnt;
944 break;
945 case DKIOCGETMAXSEGMENTCOUNTWRITE:
946 *o = ioattr.io_segwritecnt;
947 break;
948 case DKIOCGETMAXSEGMENTBYTECOUNTREAD:
949 *o = ioattr.io_maxsegreadsize;
950 break;
951 case DKIOCGETMAXSEGMENTBYTECOUNTWRITE:
952 *o = ioattr.io_maxsegwritesize;
953 break;
954 case DKIOCGETBLOCKSIZE:
955 *f = vn->sc_secsize;
956 break;
957 case DKIOCSETBLOCKSIZE:
958 if (is_char) {
959 /* can only set block size on block device */
960 error = ENODEV;
961 break;
962 }
963 if (*f < DEV_BSIZE) {
964 error = EINVAL;
965 break;
966 }
967 if (vn->sc_shadow_vp != NULL) {
968 if (*f == (unsigned)vn->sc_secsize) {
969 break;
970 }
971 /* can't change the block size if already shadowing */
972 error = EBUSY;
973 break;
974 }
975 vn->sc_secsize = *f;
976 /* recompute the size in terms of the new blocksize */
977 vn->sc_size = vn->sc_fsize / vn->sc_secsize;
978 break;
979 case DKIOCISWRITABLE:
980 *f = 1;
981 break;
982 case DKIOCGETBLOCKCOUNT32:
983 *f = vn->sc_size;
984 break;
985 case DKIOCGETBLOCKCOUNT:
986 *o = vn->sc_size;
987 break;
988 #ifdef __LP64__
989 case VNIOCSHADOW32:
990 case VNIOCSHADOW:
991 #else
992 case VNIOCSHADOW:
993 case VNIOCSHADOW64:
994 #endif
995 if (vn->sc_shadow_vp != NULL) {
996 error = EBUSY;
997 break;
998 }
999 if (vn->sc_vp == NULL) {
1000 /* much be attached before we can shadow */
1001 error = EINVAL;
1002 break;
1003 }
1004 if (!proc_is64bit(p)) {
1005 /* downstream code expects LP64 version of vn_ioctl structure */
1006 vn_ioctl_to_64((struct vn_ioctl_32 *)viop, &user_vnio);
1007 viop = &user_vnio;
1008 }
1009 if (viop->vn_file == USER_ADDR_NULL) {
1010 error = EINVAL;
1011 break;
1012 }
1013 error = vniocattach_shadow(vn, viop, dev, 0, p);
1014 break;
1015
1016 #ifdef __LP64__
1017 case VNIOCATTACH32:
1018 case VNIOCATTACH:
1019 #else
1020 case VNIOCATTACH:
1021 case VNIOCATTACH64:
1022 #endif
1023 if (is_char) {
1024 /* attach only on block device */
1025 error = ENODEV;
1026 break;
1027 }
1028 if (vn->sc_flags & VNF_INITED) {
1029 error = EBUSY;
1030 break;
1031 }
1032 if (!proc_is64bit(p)) {
1033 /* downstream code expects LP64 version of vn_ioctl structure */
1034 vn_ioctl_to_64((struct vn_ioctl_32 *)viop, &user_vnio);
1035 viop = &user_vnio;
1036 }
1037 if (viop->vn_file == USER_ADDR_NULL) {
1038 error = EINVAL;
1039 break;
1040 }
1041 error = vniocattach_file(vn, viop, dev, 0, p);
1042 break;
1043
1044 #ifdef __LP64__
1045 case VNIOCDETACH32:
1046 case VNIOCDETACH:
1047 #else
1048 case VNIOCDETACH:
1049 case VNIOCDETACH64:
1050 #endif
1051 if (is_char) {
1052 /* detach only on block device */
1053 error = ENODEV;
1054 break;
1055 }
1056 /* Note: spec_open won't open a mounted block device */
1057
1058 /*
1059 * XXX handle i/o in progress. Return EBUSY, or wait, or
1060 * flush the i/o.
1061 * XXX handle multiple opens of the device. Return EBUSY,
1062 * or revoke the fd's.
1063 * How are these problems handled for removable and failing
1064 * hardware devices? (Hint: They are not)
1065 */
1066 vnclear(vn, &context);
1067 break;
1068
1069 case VNIOCGSET:
1070 vn_options |= *f;
1071 *f = vn_options;
1072 break;
1073
1074 case VNIOCGCLEAR:
1075 vn_options &= ~(*f);
1076 *f = vn_options;
1077 break;
1078
1079 case VNIOCUSET:
1080 vn->sc_options |= *f;
1081 *f = vn->sc_options;
1082 break;
1083
1084 case VNIOCUCLEAR:
1085 vn->sc_options &= ~(*f);
1086 *f = vn->sc_options;
1087 break;
1088
1089 default:
1090 error = ENOTTY;
1091 break;
1092 }
1093 done:
1094 return(error);
1095 }
1096
1097 static int
1098 vnioctl_chr(dev_t dev, u_long cmd, caddr_t data, int flag, proc_t p)
1099 {
1100 return (vnioctl(dev, cmd, data, flag, p, TRUE));
1101 }
1102
1103 static int
1104 vnioctl_blk(dev_t dev, u_long cmd, caddr_t data, int flag, proc_t p)
1105 {
1106 return (vnioctl(dev, cmd, data, flag, p, FALSE));
1107 }
1108
1109 /*
1110 * vniocattach_file:
1111 *
1112 * Attach a file to a VN partition. Return the size in the vn_size
1113 * field.
1114 */
1115
1116 static int
1117 vniocattach_file(struct vn_softc *vn,
1118 struct vn_ioctl_64 *vniop,
1119 dev_t dev,
1120 int in_kernel,
1121 proc_t p)
1122 {
1123 dev_t cdev;
1124 vfs_context_t ctx = vfs_context_current();
1125 kauth_cred_t cred;
1126 struct nameidata nd;
1127 off_t file_size;
1128 int error, flags;
1129
1130 flags = FREAD|FWRITE;
1131 if (in_kernel) {
1132 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE, vniop->vn_file, ctx);
1133 }
1134 else {
1135 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW,
1136 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1137 vniop->vn_file, ctx);
1138 }
1139 /* vn_open gives both long- and short-term references */
1140 error = vn_open(&nd, flags, 0);
1141 if (error) {
1142 if (error != EACCES && error != EPERM && error != EROFS) {
1143 return (error);
1144 }
1145 flags &= ~FWRITE;
1146 if (in_kernel) {
1147 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE,
1148 vniop->vn_file, ctx);
1149 }
1150 else {
1151 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW,
1152 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1153 vniop->vn_file, ctx);
1154 }
1155 error = vn_open(&nd, flags, 0);
1156 if (error) {
1157 return (error);
1158 }
1159 }
1160 if (nd.ni_vp->v_type != VREG) {
1161 error = EINVAL;
1162 }
1163 else {
1164 error = vnode_size(nd.ni_vp, &file_size, ctx);
1165 }
1166 if (error != 0) {
1167 (void) vn_close(nd.ni_vp, flags, ctx);
1168 vnode_put(nd.ni_vp);
1169 return (error);
1170 }
1171 cred = kauth_cred_proc_ref(p);
1172 nd.ni_vp->v_flag |= VNOCACHE_DATA;
1173 error = setcred(nd.ni_vp, cred);
1174 if (error) {
1175 (void)vn_close(nd.ni_vp, flags, ctx);
1176 vnode_put(nd.ni_vp);
1177 kauth_cred_unref(&cred);
1178 return(error);
1179 }
1180 vn->sc_secsize = DEV_BSIZE;
1181 vn->sc_fsize = file_size;
1182 vn->sc_size = file_size / vn->sc_secsize;
1183 vn->sc_vp = nd.ni_vp;
1184 vn->sc_vid = vnode_vid(nd.ni_vp);
1185 vn->sc_open_flags = flags;
1186 vn->sc_cred = cred;
1187 cdev = makedev(vndevice_cdev_major, minor(dev));
1188 vn->sc_cdev = devfs_make_node(cdev, DEVFS_CHAR,
1189 UID_ROOT, GID_OPERATOR,
1190 0600, "rvn%d",
1191 minor(dev));
1192 vn->sc_flags |= VNF_INITED;
1193 if (flags == FREAD)
1194 vn->sc_flags |= VNF_READONLY;
1195 /* lose the short-term reference */
1196 vnode_put(nd.ni_vp);
1197 return(0);
1198 }
1199
1200 static int
1201 vniocattach_shadow(struct vn_softc *vn, struct vn_ioctl_64 *vniop,
1202 __unused dev_t dev, int in_kernel, proc_t p)
1203 {
1204 vfs_context_t ctx = vfs_context_current();
1205 struct nameidata nd;
1206 int error, flags;
1207 shadow_map_t * map;
1208 off_t file_size;
1209
1210 flags = FREAD|FWRITE;
1211 if (in_kernel) {
1212 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE, vniop->vn_file, ctx);
1213 }
1214 else {
1215 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW,
1216 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1217 vniop->vn_file, ctx);
1218 }
1219 /* vn_open gives both long- and short-term references */
1220 error = vn_open(&nd, flags, 0);
1221 if (error) {
1222 /* shadow MUST be writable! */
1223 return (error);
1224 }
1225 if (nd.ni_vp->v_type != VREG
1226 || (error = vnode_size(nd.ni_vp, &file_size, ctx))) {
1227 (void)vn_close(nd.ni_vp, flags, ctx);
1228 vnode_put(nd.ni_vp);
1229 return (error ? error : EINVAL);
1230 }
1231 map = shadow_map_create(vn->sc_fsize, file_size,
1232 0, vn->sc_secsize);
1233 if (map == NULL) {
1234 (void)vn_close(nd.ni_vp, flags, ctx);
1235 vnode_put(nd.ni_vp);
1236 vn->sc_shadow_vp = NULL;
1237 return (ENOMEM);
1238 }
1239 vn->sc_shadow_vp = nd.ni_vp;
1240 vn->sc_shadow_vid = vnode_vid(nd.ni_vp);
1241 vn->sc_shadow_vp->v_flag |= VNOCACHE_DATA;
1242 vn->sc_shadow_map = map;
1243 vn->sc_flags &= ~VNF_READONLY; /* we're now read/write */
1244
1245 /* lose the short-term reference */
1246 vnode_put(nd.ni_vp);
1247 return(0);
1248 }
1249
1250 int
1251 vndevice_root_image(char * path, char devname[], dev_t * dev_p)
1252 {
1253 int error = 0;
1254 struct vn_softc * vn;
1255 struct vn_ioctl_64 vnio;
1256
1257 vnio.vn_file = CAST_USER_ADDR_T(path);
1258 vnio.vn_size = 0;
1259
1260 vn = vn_table + ROOT_IMAGE_UNIT;
1261 *dev_p = makedev(vndevice_bdev_major,
1262 ROOT_IMAGE_UNIT);
1263 snprintf(devname, 16, "vn%d", ROOT_IMAGE_UNIT);
1264 error = vniocattach_file(vn, &vnio, *dev_p, 1, current_proc());
1265 return (error);
1266 }
1267
1268 /*
1269 * Duplicate the current processes' credentials. Since we are called only
1270 * as the result of a SET ioctl and only root can do that, any future access
1271 * to this "disk" is essentially as root. Note that credentials may change
1272 * if some other uid can write directly to the mapped file (NFS).
1273 */
1274 static int
1275 setcred(struct vnode * vp, kauth_cred_t cred)
1276 {
1277 char *tmpbuf;
1278 int error = 0;
1279 struct vfs_context context;
1280
1281 /*
1282 * Horrible kludge to establish credentials for NFS XXX.
1283 */
1284 context.vc_thread = current_thread();
1285 context.vc_ucred = cred;
1286 tmpbuf = _MALLOC(DEV_BSIZE, M_TEMP, M_WAITOK);
1287 error = file_io(vp, &context, UIO_READ, tmpbuf, 0, DEV_BSIZE, NULL);
1288 FREE(tmpbuf, M_TEMP);
1289 return (error);
1290 }
1291
1292 void
1293 vnclear(struct vn_softc *vn, vfs_context_t ctx)
1294 {
1295 if (vn->sc_vp != NULL) {
1296 /* release long-term reference */
1297 (void)vn_close(vn->sc_vp, vn->sc_open_flags, ctx);
1298 vn->sc_vp = NULL;
1299 }
1300 if (vn->sc_shadow_vp != NULL) {
1301 /* release long-term reference */
1302 (void)vn_close(vn->sc_shadow_vp, FREAD | FWRITE, ctx);
1303 vn->sc_shadow_vp = NULL;
1304 }
1305 if (vn->sc_shadow_map != NULL) {
1306 shadow_map_free(vn->sc_shadow_map);
1307 vn->sc_shadow_map = NULL;
1308 }
1309 vn->sc_flags &= ~(VNF_INITED | VNF_READONLY);
1310 if (vn->sc_cred) {
1311 kauth_cred_unref(&vn->sc_cred);
1312 }
1313 vn->sc_size = 0;
1314 vn->sc_fsize = 0;
1315 if (vn->sc_cdev) {
1316 devfs_remove(vn->sc_cdev);
1317 vn->sc_cdev = NULL;
1318 }
1319 }
1320
1321 static int
1322 vnsize(dev_t dev)
1323 {
1324 int secsize;
1325 struct vn_softc *vn;
1326 int unit;
1327
1328 unit = vnunit(dev);
1329 if (vnunit(dev) >= NVNDEVICE) {
1330 return (-1);
1331 }
1332
1333 vn = vn_table + unit;
1334 if ((vn->sc_flags & VNF_INITED) == 0)
1335 secsize = -1;
1336 else
1337 secsize = vn->sc_secsize;
1338
1339 return (secsize);
1340 }
1341
1342 #define CDEV_MAJOR -1
1343 #define BDEV_MAJOR -1
1344 static int vndevice_inited = 0;
1345
1346 void
1347 vndevice_init(void)
1348 {
1349 if (vndevice_inited)
1350 return;
1351
1352 vndevice_do_init();
1353 }
1354
1355 static void
1356 vndevice_do_init( void )
1357 {
1358 int i;
1359
1360 vndevice_bdev_major = bdevsw_add(BDEV_MAJOR, &vn_bdevsw);
1361
1362 if (vndevice_bdev_major < 0) {
1363 printf("vndevice_init: bdevsw_add() returned %d\n",
1364 vndevice_bdev_major);
1365 return;
1366 }
1367 vndevice_cdev_major = cdevsw_add_with_bdev(CDEV_MAJOR, &vn_cdevsw,
1368 vndevice_bdev_major);
1369 if (vndevice_cdev_major < 0) {
1370 printf("vndevice_init: cdevsw_add() returned %d\n",
1371 vndevice_cdev_major);
1372 return;
1373 }
1374 for (i = 0; i < NVNDEVICE; i++) {
1375 dev_t dev = makedev(vndevice_bdev_major, i);
1376 vn_table[i].sc_bdev = devfs_make_node(dev, DEVFS_BLOCK,
1377 UID_ROOT, GID_OPERATOR,
1378 0600, "vn%d",
1379 i);
1380 if (vn_table[i].sc_bdev == NULL)
1381 printf("vninit: devfs_make_node failed!\n");
1382 }
1383 }
1384
1385 static void
1386 vn_ioctl_to_64(struct vn_ioctl_32 *from, struct vn_ioctl_64 *to)
1387 {
1388 to->vn_file = CAST_USER_ADDR_T(from->vn_file);
1389 to->vn_size = from->vn_size;
1390 to->vn_control = from->vn_control;
1391 }
1392
1393 #endif /* NVNDEVICE */