]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/vn/vn.c
xnu-792.2.4.tar.gz
[apple/xnu.git] / bsd / dev / vn / vn.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22
23 /*
24 * Copyright (c) 1988 University of Utah.
25 * Copyright (c) 1990, 1993
26 * The Regents of the University of California. All rights reserved.
27 *
28 * This code is derived from software contributed to Berkeley by
29 * the Systems Programming Group of the University of Utah Computer
30 * Science Department.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * from: Utah Hdr: vn.c 1.13 94/04/02
61 *
62 * from: @(#)vn.c 8.6 (Berkeley) 4/1/94
63 * $FreeBSD: src/sys/dev/vn/vn.c,v 1.105.2.4 2001/11/18 07:11:00 dillon Exp $
64 */
65
66 /*
67 * Vnode disk driver.
68 *
69 * Block/character interface to a vnode. Allows one to treat a file
70 * as a disk (e.g. build a filesystem in it, mount it, etc.).
71 *
72 * NOTE 1: This uses the vnop_blockmap/vnop_strategy interface to the vnode
73 * instead of a simple VOP_RDWR. We do this to avoid distorting the
74 * local buffer cache.
75 *
76 * NOTE 2: There is a security issue involved with this driver.
77 * Once mounted all access to the contents of the "mapped" file via
78 * the special file is controlled by the permissions on the special
79 * file, the protection of the mapped file is ignored (effectively,
80 * by using root credentials in all transactions).
81 *
82 * NOTE 3: Doesn't interact with leases, should it?
83 */
84
85 #include "vndevice.h"
86
87 #if NVNDEVICE > 0
88
89 #include <sys/param.h>
90 #include <sys/systm.h>
91 #include <sys/kernel.h>
92 #include <sys/mount.h>
93 #include <sys/namei.h>
94 #include <sys/proc.h>
95 #include <sys/kauth.h>
96 #include <sys/buf.h>
97 #include <sys/malloc.h>
98 #include <sys/vnode_internal.h>
99 #include <sys/fcntl.h>
100 #include <sys/conf.h>
101 #include <sys/disk.h>
102 #include <sys/stat.h>
103 #include <sys/conf.h>
104 #include <sys/uio_internal.h>
105
106 #include <sys/vnioctl.h>
107
108 #include <sys/vm.h>
109
110 #include <vm/vm_pager.h>
111 #include <mach/memory_object_types.h>
112
113 #include <miscfs/devfs/devfs.h>
114
115
116 #include "shadow.h"
117
118 static ioctl_fcn_t vnioctl_chr;
119 static ioctl_fcn_t vnioctl_blk;
120 static open_close_fcn_t vnopen;
121 static open_close_fcn_t vnclose;
122 static psize_fcn_t vnsize;
123 static strategy_fcn_t vnstrategy;
124 static read_write_fcn_t vnread;
125 static read_write_fcn_t vnwrite;
126
127 static int vndevice_bdev_major;
128 static int vndevice_cdev_major;
129
130 /*
131 * cdevsw
132 * D_DISK we want to look like a disk
133 * D_CANFREE We support B_FREEBUF
134 */
135
136 static struct bdevsw vn_bdevsw = {
137 /* open */ vnopen,
138 /* close */ vnclose,
139 /* strategy */ vnstrategy,
140 /* ioctl */ vnioctl_blk,
141 /* dump */ eno_dump,
142 /* psize */ vnsize,
143 /* flags */ D_DISK,
144 };
145
146 static struct cdevsw vn_cdevsw = {
147 /* open */ vnopen,
148 /* close */ vnclose,
149 /* read */ vnread,
150 /* write */ vnwrite,
151 /* ioctl */ vnioctl_chr,
152 /* stop */ eno_stop,
153 /* reset */ eno_reset,
154 /* ttys */ 0,
155 /* select */ eno_select,
156 /* mmap */ eno_mmap,
157 /* strategy */ eno_strat,
158 /* getc */ eno_getc,
159 /* putc */ eno_putc,
160 /* flags */ D_DISK,
161 };
162
163 struct vn_softc {
164 u_int64_t sc_fsize; /* file size in bytes */
165 u_int64_t sc_size; /* size of vn, sc_secsize scale */
166 int sc_flags; /* flags */
167 u_long sc_secsize; /* sector size */
168 struct vnode *sc_vp; /* vnode if not NULL */
169 uint32_t sc_vid;
170 int sc_open_flags;
171 struct vnode *sc_shadow_vp; /* shadow vnode if not NULL */
172 uint32_t sc_shadow_vid;
173 shadow_map_t * sc_shadow_map; /* shadow map if not NULL */
174 kauth_cred_t sc_cred; /* credentials */
175 u_int32_t sc_options; /* options */
176 void * sc_bdev;
177 void * sc_cdev;
178 } vn_table[NVNDEVICE];
179
180 #define ROOT_IMAGE_UNIT 0
181
182 /* sc_flags */
183 #define VNF_INITED 0x01
184 #define VNF_READONLY 0x02
185
186 static u_int32_t vn_options;
187
188 #define IFOPT(vn,opt) if (((vn)->sc_options|vn_options) & (opt))
189 #define TESTOPT(vn,opt) (((vn)->sc_options|vn_options) & (opt))
190
191 static int setcred(struct vnode * vp, struct proc * p,
192 kauth_cred_t cred);
193 static void vnclear (struct vn_softc *vn, struct proc * p);
194 static void vn_ioctl_to_64(struct vn_ioctl *from, struct user_vn_ioctl *to);
195 void vndevice_init(void);
196 int vndevice_root_image(char * path, char devname[], dev_t * dev_p);
197
198 static int
199 vniocattach_file(struct vn_softc *vn,
200 struct user_vn_ioctl *vniop,
201 dev_t dev,
202 int in_kernel,
203 struct proc *p);
204 static int
205 vniocattach_shadow(struct vn_softc * vn,
206 struct user_vn_ioctl *vniop,
207 dev_t dev,
208 int in_kernel,
209 struct proc *p);
210 static __inline__ int
211 vnunit(dev_t dev)
212 {
213 return (minor(dev));
214 }
215
216 static int
217 vnclose(__unused dev_t dev, __unused int flags,
218 __unused int devtype, __unused struct proc *p)
219 {
220 return (0);
221 }
222
223 static int
224 vnopen(dev_t dev, int flags, __unused int devtype, __unused struct proc *p)
225 {
226 struct vn_softc *vn;
227 int unit;
228
229 unit = vnunit(dev);
230 if (vnunit(dev) >= NVNDEVICE) {
231 return (ENXIO);
232 }
233 vn = vn_table + unit;
234 if ((flags & FWRITE) && (vn->sc_flags & VNF_READONLY))
235 return (EACCES);
236
237 return(0);
238 }
239
240 static int
241 file_io(struct vnode * vp, struct vfs_context * context_p,
242 enum uio_rw op, char * base, off_t offset, user_ssize_t count,
243 user_ssize_t * resid)
244 {
245 uio_t auio;
246 int error;
247 char uio_buf[UIO_SIZEOF(1)];
248
249 auio = uio_createwithbuffer(1, offset, UIO_SYSSPACE, op,
250 &uio_buf[0], sizeof(uio_buf));
251 uio_addiov(auio, CAST_USER_ADDR_T(base), count);
252 if (op == UIO_READ)
253 error = VNOP_READ(vp, auio, IO_SYNC, context_p);
254 else
255 error = VNOP_WRITE(vp, auio, IO_SYNC, context_p);
256
257 if (resid != NULL) {
258 *resid = uio_resid(auio);
259 }
260 return (error);
261 }
262
263 static __inline__ off_t
264 block_round(off_t o, int blocksize)
265 {
266 return ((o + blocksize - 1) / blocksize);
267 }
268
269 static __inline__ off_t
270 block_truncate(off_t o, int blocksize)
271 {
272 return (o / blocksize);
273 }
274
275 static __inline__ int
276 block_remainder(off_t o, int blocksize)
277 {
278 return (o % blocksize);
279 }
280
281 static int
282 vnread_shadow(struct vn_softc * vn, struct uio *uio, int ioflag,
283 struct vfs_context * context_p)
284 {
285 u_long blocksize = vn->sc_secsize;
286 int error = 0;
287 off_t offset;
288 user_ssize_t resid;
289 off_t orig_offset;
290 user_ssize_t orig_resid;
291
292 orig_resid = resid = uio_resid(uio);
293 orig_offset = offset = uio_offset(uio);
294
295 while (resid > 0) {
296 u_long remainder;
297 u_long this_block_number;
298 u_long this_block_count;
299 off_t this_offset;
300 user_ssize_t this_resid;
301 struct vnode * vp;
302
303 /* figure out which blocks to read */
304 remainder = block_remainder(offset, blocksize);
305 if (shadow_map_read(vn->sc_shadow_map,
306 block_truncate(offset, blocksize),
307 block_round(resid + remainder, blocksize),
308 &this_block_number, &this_block_count)) {
309 vp = vn->sc_shadow_vp;
310 }
311 else {
312 vp = vn->sc_vp;
313 }
314
315 /* read the blocks (or parts thereof) */
316 this_offset = (off_t)this_block_number * blocksize + remainder;
317 uio_setoffset(uio, this_offset);
318 this_resid = this_block_count * blocksize - remainder;
319 if (this_resid > resid) {
320 this_resid = resid;
321 }
322 uio_setresid(uio, this_resid);
323 error = VNOP_READ(vp, uio, ioflag, context_p);
324 if (error) {
325 break;
326 }
327
328 /* figure out how much we actually read */
329 this_resid -= uio_resid(uio);
330 if (this_resid == 0) {
331 printf("vn device: vnread_shadow zero length read\n");
332 break;
333 }
334 resid -= this_resid;
335 offset += this_resid;
336 }
337 uio_setresid(uio, resid);
338 uio_setoffset(uio, offset);
339 return (error);
340 }
341
342 static int
343 vncopy_block_to_shadow(struct vn_softc * vn, struct vfs_context * context_p,
344 u_long file_block, u_long shadow_block)
345 {
346 int error;
347 char * tmpbuf;
348
349 tmpbuf = _MALLOC(vn->sc_secsize, M_TEMP, M_WAITOK);
350 if (tmpbuf == NULL) {
351 return (ENOMEM);
352 }
353 /* read one block from file at file_block offset */
354 error = file_io(vn->sc_vp, context_p, UIO_READ,
355 tmpbuf, (off_t)file_block * vn->sc_secsize,
356 vn->sc_secsize, NULL);
357 if (error) {
358 goto done;
359 }
360 /* write one block to shadow file at shadow_block offset */
361 error = file_io(vn->sc_shadow_vp, context_p, UIO_WRITE,
362 tmpbuf, (off_t)shadow_block * vn->sc_secsize,
363 vn->sc_secsize, NULL);
364 done:
365 FREE(tmpbuf, M_TEMP);
366 return (error);
367 }
368
369 enum {
370 FLAGS_FIRST_BLOCK_PARTIAL = 0x1,
371 FLAGS_LAST_BLOCK_PARTIAL = 0x2
372 };
373
374 static int
375 vnwrite_shadow(struct vn_softc * vn, struct uio *uio, int ioflag,
376 struct vfs_context * context_p)
377 {
378 u_long blocksize = vn->sc_secsize;
379 int error = 0;
380 user_ssize_t resid;
381 off_t offset;
382
383 resid = uio_resid(uio);
384 offset = uio_offset(uio);
385
386 while (resid > 0) {
387 int flags = 0;
388 u_long offset_block_number;
389 u_long remainder;
390 u_long resid_block_count;
391 u_long shadow_block_count;
392 u_long shadow_block_number;
393 user_ssize_t this_resid;
394
395 /* figure out which blocks to write */
396 offset_block_number = block_truncate(offset, blocksize);
397 remainder = block_remainder(offset, blocksize);
398 resid_block_count = block_round(resid + remainder, blocksize);
399 /* figure out if the first or last blocks are partial writes */
400 if (remainder > 0
401 && !shadow_map_is_written(vn->sc_shadow_map,
402 offset_block_number)) {
403 /* the first block is a partial write */
404 flags |= FLAGS_FIRST_BLOCK_PARTIAL;
405 }
406 if (resid_block_count > 1
407 && !shadow_map_is_written(vn->sc_shadow_map,
408 offset_block_number
409 + resid_block_count - 1)
410 && block_remainder(offset + resid, blocksize) > 0) {
411 /* the last block is a partial write */
412 flags |= FLAGS_LAST_BLOCK_PARTIAL;
413 }
414 if (shadow_map_write(vn->sc_shadow_map,
415 offset_block_number, resid_block_count,
416 &shadow_block_number,
417 &shadow_block_count)) {
418 /* shadow file is growing */
419 #if 0
420 /* truncate the file to its new length before write */
421 off_t size;
422 size = (off_t)shadow_map_shadow_size(vn->sc_shadow_map)
423 * vn->sc_secsize;
424 vnode_setsize(vn->sc_shadow_vp, size, IO_SYNC,
425 context_p);
426 #endif 0
427 }
428 /* write the blocks (or parts thereof) */
429 uio_setoffset(uio, (off_t)
430 shadow_block_number * blocksize + remainder);
431 this_resid = (off_t)shadow_block_count * blocksize - remainder;
432 if (this_resid >= resid) {
433 this_resid = resid;
434 if ((flags & FLAGS_LAST_BLOCK_PARTIAL) != 0) {
435 /* copy the last block to the shadow */
436 u_long d;
437 u_long s;
438
439 s = offset_block_number
440 + resid_block_count - 1;
441 d = shadow_block_number
442 + shadow_block_count - 1;
443 error = vncopy_block_to_shadow(vn, context_p,
444 s, d);
445 if (error) {
446 printf("vnwrite_shadow: failed to copy"
447 " block %d to shadow block %d\n",
448 s, d);
449 break;
450 }
451 }
452 }
453 uio_setresid(uio, this_resid);
454 if ((flags & FLAGS_FIRST_BLOCK_PARTIAL) != 0) {
455 /* copy the first block to the shadow */
456 error = vncopy_block_to_shadow(vn, context_p,
457 offset_block_number,
458 shadow_block_number);
459 if (error) {
460 printf("vnwrite_shadow: failed to"
461 " copy block %d to shadow block %d\n",
462 offset_block_number,
463 shadow_block_number);
464 break;
465 }
466 }
467 error = VNOP_WRITE(vn->sc_shadow_vp, uio, ioflag, context_p);
468 if (error) {
469 break;
470 }
471 /* figure out how much we actually wrote */
472 this_resid -= uio_resid(uio);
473 if (this_resid == 0) {
474 printf("vn device: vnwrite_shadow zero length write\n");
475 break;
476 }
477 resid -= this_resid;
478 offset += this_resid;
479 }
480 uio_setresid(uio, resid);
481 uio_setoffset(uio, offset);
482 return (error);
483 }
484
485 static int
486 vnread(dev_t dev, struct uio *uio, int ioflag)
487 {
488 struct vfs_context context;
489 int error = 0;
490 boolean_t funnel_state;
491 off_t offset;
492 struct proc * p;
493 user_ssize_t resid;
494 struct vn_softc * vn;
495 int unit;
496
497 unit = vnunit(dev);
498 if (vnunit(dev) >= NVNDEVICE) {
499 return (ENXIO);
500 }
501 p = current_proc();
502 funnel_state = thread_funnel_set(kernel_flock, TRUE);
503 vn = vn_table + unit;
504 if ((vn->sc_flags & VNF_INITED) == 0) {
505 error = ENXIO;
506 goto done;
507 }
508 error = vnode_getwithvid(vn->sc_vp, vn->sc_vid);
509 if (error != 0) {
510 /* the vnode is no longer available, abort */
511 error = ENXIO;
512 vnclear(vn, p);
513 goto done;
514 }
515
516 resid = uio_resid(uio);
517 offset = uio_offset(uio);
518
519 /*
520 * If out of bounds return an error. If at the EOF point,
521 * simply read less.
522 */
523 if (offset >= (off_t)vn->sc_fsize) {
524 if (offset > (off_t)vn->sc_fsize) {
525 error = EINVAL;
526 }
527 goto done;
528 }
529 /*
530 * If the request crosses EOF, truncate the request.
531 */
532 if ((offset + resid) > (off_t)vn->sc_fsize) {
533 resid = vn->sc_fsize - offset;
534 uio_setresid(uio, resid);
535 }
536
537 context.vc_proc = p;
538 context.vc_ucred = vn->sc_cred;
539 if (vn->sc_shadow_vp != NULL) {
540 error = vnode_getwithvid(vn->sc_shadow_vp,
541 vn->sc_shadow_vid);
542 if (error != 0) {
543 /* the vnode is no longer available, abort */
544 error = ENXIO;
545 vnode_put(vn->sc_vp);
546 vnclear(vn, p);
547 goto done;
548 }
549 error = vnread_shadow(vn, uio, ioflag, &context);
550 vnode_put(vn->sc_shadow_vp);
551 } else {
552 error = VNOP_READ(vn->sc_vp, uio, ioflag, &context);
553 }
554 vnode_put(vn->sc_vp);
555 done:
556 (void) thread_funnel_set(kernel_flock, funnel_state);
557 return (error);
558 }
559
560 static int
561 vnwrite(dev_t dev, struct uio *uio, int ioflag)
562 {
563 struct vfs_context context;
564 int error;
565 boolean_t funnel_state;
566 off_t offset;
567 struct proc * p;
568 user_ssize_t resid;
569 struct vn_softc * vn;
570 int unit;
571
572 unit = vnunit(dev);
573 if (vnunit(dev) >= NVNDEVICE) {
574 return (ENXIO);
575 }
576 p = current_proc();
577 funnel_state = thread_funnel_set(kernel_flock, TRUE);
578 vn = vn_table + unit;
579 if ((vn->sc_flags & VNF_INITED) == 0) {
580 error = ENXIO;
581 goto done;
582 }
583 if (vn->sc_flags & VNF_READONLY) {
584 error = EROFS;
585 goto done;
586 }
587 error = vnode_getwithvid(vn->sc_vp, vn->sc_vid);
588 if (error != 0) {
589 /* the vnode is no longer available, abort */
590 error = ENXIO;
591 vnclear(vn, p);
592 goto done;
593 }
594 resid = uio_resid(uio);
595 offset = uio_offset(uio);
596
597 /*
598 * If out of bounds return an error. If at the EOF point,
599 * simply write less.
600 */
601 if (offset >= (off_t)vn->sc_fsize) {
602 if (offset > (off_t)vn->sc_fsize) {
603 error = EINVAL;
604 }
605 goto done;
606 }
607 /*
608 * If the request crosses EOF, truncate the request.
609 */
610 if ((offset + resid) > (off_t)vn->sc_fsize) {
611 resid = (off_t)vn->sc_fsize - offset;
612 uio_setresid(uio, resid);
613 }
614
615 context.vc_proc = p;
616 context.vc_ucred = vn->sc_cred;
617
618 if (vn->sc_shadow_vp != NULL) {
619 error = vnode_getwithvid(vn->sc_shadow_vp,
620 vn->sc_shadow_vid);
621 if (error != 0) {
622 /* the vnode is no longer available, abort */
623 error = ENXIO;
624 vnode_put(vn->sc_vp);
625 vnclear(vn, p);
626 goto done;
627 }
628 error = vnwrite_shadow(vn, uio, ioflag, &context);
629 vnode_put(vn->sc_shadow_vp);
630 } else {
631 error = VNOP_WRITE(vn->sc_vp, uio, ioflag, &context);
632 }
633 vnode_put(vn->sc_vp);
634 done:
635 (void) thread_funnel_set(kernel_flock, funnel_state);
636 return (error);
637 }
638
639 static int
640 shadow_read(struct vn_softc * vn, struct buf * bp, char * base, struct proc * p)
641 {
642 u_long blocksize = vn->sc_secsize;
643 struct vfs_context context;
644 int error = 0;
645 u_long offset;
646 boolean_t read_shadow;
647 u_long resid;
648 u_long start = 0;
649
650 context.vc_proc = p;
651 context.vc_ucred = vn->sc_cred;
652 offset = buf_blkno(bp);
653 resid = buf_resid(bp) / blocksize;
654 while (resid > 0) {
655 user_ssize_t temp_resid;
656 u_long this_offset;
657 u_long this_resid;
658 struct vnode * vp;
659
660 read_shadow = shadow_map_read(vn->sc_shadow_map,
661 offset, resid,
662 &this_offset, &this_resid);
663 if (read_shadow) {
664 vp = vn->sc_shadow_vp;
665 }
666 else {
667 vp = vn->sc_vp;
668 }
669 error = file_io(vp, &context, UIO_READ, base + start,
670 (off_t)this_offset * blocksize,
671 (user_ssize_t)this_resid * blocksize,
672 &temp_resid);
673 if (error) {
674 break;
675 }
676 this_resid -= (temp_resid / blocksize);
677 if (this_resid == 0) {
678 printf("vn device: shadow_read zero length read\n");
679 break;
680 }
681 resid -= this_resid;
682 offset += this_resid;
683 start += this_resid * blocksize;
684 }
685 buf_setresid(bp, resid * blocksize);
686 return (error);
687 }
688
689 static int
690 shadow_write(struct vn_softc * vn, struct buf * bp, char * base,
691 struct proc * p)
692 {
693 u_long blocksize = vn->sc_secsize;
694 struct vfs_context context;
695 int error = 0;
696 u_long offset;
697 boolean_t shadow_grew;
698 u_long resid;
699 u_long start = 0;
700
701 context.vc_proc = p;
702 context.vc_ucred = vn->sc_cred;
703 offset = buf_blkno(bp);
704 resid = buf_resid(bp) / blocksize;
705 while (resid > 0) {
706 user_ssize_t temp_resid;
707 u_long this_offset;
708 u_long this_resid;
709
710 shadow_grew = shadow_map_write(vn->sc_shadow_map,
711 offset, resid,
712 &this_offset, &this_resid);
713 if (shadow_grew) {
714 #if 0
715 off_t size;
716 /* truncate the file to its new length before write */
717 size = (off_t)shadow_map_shadow_size(vn->sc_shadow_map)
718 * blocksize;
719 vnode_setsize(vn->sc_shadow_vp, size, IO_SYNC,
720 &context);
721 #endif
722 }
723 error = file_io(vn->sc_shadow_vp, &context, UIO_WRITE,
724 base + start,
725 (off_t)this_offset * blocksize,
726 (user_ssize_t)this_resid * blocksize,
727 &temp_resid);
728 if (error) {
729 break;
730 }
731 this_resid -= (temp_resid / blocksize);
732 if (this_resid == 0) {
733 printf("vn device: shadow_write zero length write\n");
734 break;
735 }
736 resid -= this_resid;
737 offset += this_resid;
738 start += this_resid * blocksize;
739 }
740 buf_setresid(bp, resid * blocksize);
741 return (error);
742 }
743
744 static int
745 vn_readwrite_io(struct vn_softc * vn, struct buf * bp, struct proc * p)
746 {
747 int error = 0;
748 char * iov_base;
749 caddr_t vaddr;
750
751
752 if (buf_map(bp, &vaddr))
753 panic("vn device: buf_map failed");
754 iov_base = (char *)vaddr;
755
756 if (vn->sc_shadow_vp == NULL) {
757 struct vfs_context context;
758 user_ssize_t temp_resid;
759
760 context.vc_proc = p;
761 context.vc_ucred = vn->sc_cred;
762
763 error = file_io(vn->sc_vp, &context,
764 buf_flags(bp) & B_READ ? UIO_READ : UIO_WRITE,
765 iov_base,
766 (off_t)buf_blkno(bp) * vn->sc_secsize,
767 buf_resid(bp), &temp_resid);
768 buf_setresid(bp, temp_resid);
769 }
770 else {
771 if (buf_flags(bp) & B_READ)
772 error = shadow_read(vn, bp, iov_base, p);
773 else
774 error = shadow_write(vn, bp, iov_base, p);
775 }
776 buf_unmap(bp);
777
778 return (error);
779 }
780
781 static void
782 vnstrategy(struct buf *bp)
783 {
784 struct vn_softc *vn;
785 int error = 0;
786 long sz; /* in sc_secsize chunks */
787 daddr64_t blk_num;
788 boolean_t funnel_state;
789 struct proc * p = current_proc();
790 struct vnode * shadow_vp = NULL;
791 struct vnode * vp = NULL;
792
793 funnel_state = thread_funnel_set(kernel_flock, TRUE);
794 vn = vn_table + vnunit(buf_device(bp));
795 if ((vn->sc_flags & VNF_INITED) == 0) {
796 error = ENXIO;
797 goto done;
798 }
799
800 buf_setresid(bp, buf_count(bp));
801 /*
802 * Check for required alignment. Transfers must be a valid
803 * multiple of the sector size.
804 */
805 blk_num = buf_blkno(bp);
806 if (buf_count(bp) % vn->sc_secsize != 0) {
807 error = EINVAL;
808 goto done;
809 }
810 sz = howmany(buf_count(bp), vn->sc_secsize);
811
812 /*
813 * If out of bounds return an error. If at the EOF point,
814 * simply read or write less.
815 */
816 if (blk_num >= 0 && (u_int64_t)blk_num >= vn->sc_size) {
817 if (blk_num > 0 && (u_int64_t)blk_num > vn->sc_size) {
818 error = EINVAL;
819 }
820 goto done;
821 }
822 /*
823 * If the request crosses EOF, truncate the request.
824 */
825 if ((blk_num + sz) > 0 && ((u_int64_t)(blk_num + sz)) > vn->sc_size) {
826 buf_setcount(bp, (vn->sc_size - blk_num) * vn->sc_secsize);
827 buf_setresid(bp, buf_count(bp));
828 }
829 vp = vn->sc_vp;
830 if (vp == NULL) {
831 error = ENXIO;
832 goto done;
833 }
834 error = vnode_getwithvid(vp, vn->sc_vid);
835 if (error != 0) {
836 /* the vnode is no longer available, abort */
837 error = ENXIO;
838 vnclear(vn, p);
839 goto done;
840 }
841 shadow_vp = vn->sc_shadow_vp;
842 if (shadow_vp != NULL) {
843 error = vnode_getwithvid(shadow_vp,
844 vn->sc_shadow_vid);
845 if (error != 0) {
846 /* the vnode is no longer available, abort */
847 error = ENXIO;
848 vnode_put(vn->sc_vp);
849 vnclear(vn, p);
850 goto done;
851 }
852 }
853 error = vn_readwrite_io(vn, bp, p);
854 vnode_put(vp);
855 if (shadow_vp != NULL) {
856 vnode_put(shadow_vp);
857 }
858
859 done:
860 (void) thread_funnel_set(kernel_flock, funnel_state);
861 if (error) {
862 buf_seterror(bp, error);
863 }
864 buf_biodone(bp);
865 return;
866 }
867
868 /* ARGSUSED */
869 static int
870 vnioctl(dev_t dev, u_long cmd, caddr_t data,
871 __unused int flag, struct proc *p,
872 int is_char)
873 {
874 struct vn_softc *vn;
875 struct user_vn_ioctl *viop;
876 int error;
877 u_int32_t *f;
878 u_int64_t * o;
879 int unit;
880 struct vfsioattr ioattr;
881 struct user_vn_ioctl user_vnio;
882 boolean_t funnel_state;
883
884 unit = vnunit(dev);
885 if (vnunit(dev) >= NVNDEVICE) {
886 return (ENXIO);
887 }
888
889 funnel_state = thread_funnel_set(kernel_flock, TRUE);
890 vn = vn_table + unit;
891 error = proc_suser(p);
892 if (error) {
893 goto done;
894 }
895
896 viop = (struct user_vn_ioctl *)data;
897 f = (u_int32_t *)data;
898 o = (u_int64_t *)data;
899 switch (cmd) {
900 case VNIOCDETACH:
901 case VNIOCDETACH64:
902 case DKIOCGETBLOCKSIZE:
903 case DKIOCSETBLOCKSIZE:
904 case DKIOCGETMAXBLOCKCOUNTREAD:
905 case DKIOCGETMAXBLOCKCOUNTWRITE:
906 case DKIOCGETMAXSEGMENTCOUNTREAD:
907 case DKIOCGETMAXSEGMENTCOUNTWRITE:
908 case DKIOCGETMAXSEGMENTBYTECOUNTREAD:
909 case DKIOCGETMAXSEGMENTBYTECOUNTWRITE:
910 case DKIOCGETBLOCKCOUNT:
911 case DKIOCGETBLOCKCOUNT32:
912 if ((vn->sc_flags & VNF_INITED) == 0) {
913 error = ENXIO;
914 goto done;
915 }
916 break;
917 default:
918 break;
919 }
920
921 if (vn->sc_vp != NULL)
922 vfs_ioattr(vnode_mount(vn->sc_vp), &ioattr);
923 else
924 bzero(&ioattr, sizeof(ioattr));
925
926 switch (cmd) {
927 case DKIOCISVIRTUAL:
928 *f = 1;
929 break;
930 case DKIOCGETMAXBLOCKCOUNTREAD:
931 *o = ioattr.io_maxreadcnt / vn->sc_secsize;
932 break;
933 case DKIOCGETMAXBLOCKCOUNTWRITE:
934 *o = ioattr.io_maxwritecnt / vn->sc_secsize;
935 break;
936 case DKIOCGETMAXBYTECOUNTREAD:
937 *o = ioattr.io_maxreadcnt;
938 break;
939 case DKIOCGETMAXBYTECOUNTWRITE:
940 *o = ioattr.io_maxwritecnt;
941 break;
942 case DKIOCGETMAXSEGMENTCOUNTREAD:
943 *o = ioattr.io_segreadcnt;
944 break;
945 case DKIOCGETMAXSEGMENTCOUNTWRITE:
946 *o = ioattr.io_segwritecnt;
947 break;
948 case DKIOCGETMAXSEGMENTBYTECOUNTREAD:
949 *o = ioattr.io_maxsegreadsize;
950 break;
951 case DKIOCGETMAXSEGMENTBYTECOUNTWRITE:
952 *o = ioattr.io_maxsegwritesize;
953 break;
954 case DKIOCGETBLOCKSIZE:
955 *f = vn->sc_secsize;
956 break;
957 case DKIOCSETBLOCKSIZE:
958 if (is_char) {
959 /* can only set block size on block device */
960 error = ENODEV;
961 break;
962 }
963 if (*f < DEV_BSIZE) {
964 error = EINVAL;
965 break;
966 }
967 if (vn->sc_shadow_vp != NULL) {
968 if (*f == (unsigned)vn->sc_secsize) {
969 break;
970 }
971 /* can't change the block size if already shadowing */
972 error = EBUSY;
973 break;
974 }
975 vn->sc_secsize = *f;
976 /* recompute the size in terms of the new blocksize */
977 vn->sc_size = vn->sc_fsize / vn->sc_secsize;
978 break;
979 case DKIOCISWRITABLE:
980 *f = 1;
981 break;
982 case DKIOCGETBLOCKCOUNT32:
983 *f = vn->sc_size;
984 break;
985 case DKIOCGETBLOCKCOUNT:
986 *o = vn->sc_size;
987 break;
988 case VNIOCSHADOW:
989 case VNIOCSHADOW64:
990 if (vn->sc_shadow_vp != NULL) {
991 error = EBUSY;
992 break;
993 }
994 if (vn->sc_vp == NULL) {
995 /* much be attached before we can shadow */
996 error = EINVAL;
997 break;
998 }
999 if (!proc_is64bit(p)) {
1000 /* downstream code expects LP64 version of vn_ioctl structure */
1001 vn_ioctl_to_64((struct vn_ioctl *)viop, &user_vnio);
1002 viop = &user_vnio;
1003 }
1004 if (viop->vn_file == USER_ADDR_NULL) {
1005 error = EINVAL;
1006 break;
1007 }
1008 error = vniocattach_shadow(vn, viop, dev, 0, p);
1009 break;
1010
1011 case VNIOCATTACH:
1012 case VNIOCATTACH64:
1013 if (is_char) {
1014 /* attach only on block device */
1015 error = ENODEV;
1016 break;
1017 }
1018 if (vn->sc_flags & VNF_INITED) {
1019 error = EBUSY;
1020 break;
1021 }
1022 if (!proc_is64bit(p)) {
1023 /* downstream code expects LP64 version of vn_ioctl structure */
1024 vn_ioctl_to_64((struct vn_ioctl *)viop, &user_vnio);
1025 viop = &user_vnio;
1026 }
1027 if (viop->vn_file == USER_ADDR_NULL) {
1028 error = EINVAL;
1029 break;
1030 }
1031 error = vniocattach_file(vn, viop, dev, 0, p);
1032 break;
1033
1034 case VNIOCDETACH:
1035 case VNIOCDETACH64:
1036 if (is_char) {
1037 /* detach only on block device */
1038 error = ENODEV;
1039 break;
1040 }
1041 /* Note: spec_open won't open a mounted block device */
1042
1043 /*
1044 * XXX handle i/o in progress. Return EBUSY, or wait, or
1045 * flush the i/o.
1046 * XXX handle multiple opens of the device. Return EBUSY,
1047 * or revoke the fd's.
1048 * How are these problems handled for removable and failing
1049 * hardware devices? (Hint: They are not)
1050 */
1051 vnclear(vn, p);
1052 break;
1053
1054 case VNIOCGSET:
1055 vn_options |= *f;
1056 *f = vn_options;
1057 break;
1058
1059 case VNIOCGCLEAR:
1060 vn_options &= ~(*f);
1061 *f = vn_options;
1062 break;
1063
1064 case VNIOCUSET:
1065 vn->sc_options |= *f;
1066 *f = vn->sc_options;
1067 break;
1068
1069 case VNIOCUCLEAR:
1070 vn->sc_options &= ~(*f);
1071 *f = vn->sc_options;
1072 break;
1073
1074 default:
1075 error = ENOTTY;
1076 break;
1077 }
1078 done:
1079 (void) thread_funnel_set(kernel_flock, funnel_state);
1080 return(error);
1081 }
1082
1083 static int
1084 vnioctl_chr(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
1085 {
1086 return (vnioctl(dev, cmd, data, flag, p, TRUE));
1087 }
1088
1089 static int
1090 vnioctl_blk(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
1091 {
1092 return (vnioctl(dev, cmd, data, flag, p, FALSE));
1093 }
1094
1095 /*
1096 * vniocattach_file:
1097 *
1098 * Attach a file to a VN partition. Return the size in the vn_size
1099 * field.
1100 */
1101
1102 static int
1103 vniocattach_file(struct vn_softc *vn,
1104 struct user_vn_ioctl *vniop,
1105 dev_t dev,
1106 int in_kernel,
1107 struct proc *p)
1108 {
1109 dev_t cdev;
1110 struct vfs_context context;
1111 kauth_cred_t cred;
1112 struct nameidata nd;
1113 off_t file_size;
1114 int error, flags;
1115
1116 context.vc_proc = p;
1117 context.vc_ucred = proc_ucred(p);
1118
1119 flags = FREAD|FWRITE;
1120 if (in_kernel) {
1121 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE32, vniop->vn_file, &context);
1122 }
1123 else {
1124 NDINIT(&nd, LOOKUP, FOLLOW,
1125 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1126 vniop->vn_file, &context);
1127 }
1128 /* vn_open gives both long- and short-term references */
1129 error = vn_open(&nd, flags, 0);
1130 if (error) {
1131 if (error != EACCES && error != EPERM && error != EROFS)
1132 return (error);
1133 flags &= ~FWRITE;
1134 if (in_kernel) {
1135 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE32,
1136 vniop->vn_file, &context);
1137 }
1138 else {
1139 NDINIT(&nd, LOOKUP, FOLLOW,
1140 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1141 vniop->vn_file, &context);
1142 }
1143 error = vn_open(&nd, flags, 0);
1144 if (error)
1145 return (error);
1146 }
1147 if (nd.ni_vp->v_type != VREG) {
1148 error = EINVAL;
1149 }
1150 else {
1151 error = vnode_size(nd.ni_vp, &file_size, &context);
1152 }
1153 if (error != 0) {
1154 (void) vn_close(nd.ni_vp, flags, proc_ucred(p), p);
1155 vnode_put(nd.ni_vp);
1156 return (error);
1157 }
1158 cred = kauth_cred_proc_ref(p);
1159 nd.ni_vp->v_flag |= VNOCACHE_DATA;
1160 error = setcred(nd.ni_vp, p, cred);
1161 if (error) {
1162 (void)vn_close(nd.ni_vp, flags, proc_ucred(p), p);
1163 vnode_put(nd.ni_vp);
1164 kauth_cred_rele(cred);
1165 return(error);
1166 }
1167 vn->sc_secsize = DEV_BSIZE;
1168 vn->sc_fsize = file_size;
1169 vn->sc_size = file_size / vn->sc_secsize;
1170 vn->sc_vp = nd.ni_vp;
1171 vn->sc_vid = vnode_vid(nd.ni_vp);
1172 vn->sc_open_flags = flags;
1173 vn->sc_cred = cred;
1174 cdev = makedev(vndevice_cdev_major, minor(dev));
1175 vn->sc_cdev = devfs_make_node(cdev, DEVFS_CHAR,
1176 UID_ROOT, GID_OPERATOR,
1177 0600, "rvn%d",
1178 minor(dev));
1179 vn->sc_flags |= VNF_INITED;
1180 if (flags == FREAD)
1181 vn->sc_flags |= VNF_READONLY;
1182 /* lose the short-term reference */
1183 vnode_put(nd.ni_vp);
1184 return(0);
1185 }
1186
1187 static int
1188 vniocattach_shadow(struct vn_softc *vn, struct user_vn_ioctl *vniop,
1189 __unused int dev, int in_kernel, struct proc *p)
1190 {
1191 struct vfs_context context;
1192 struct nameidata nd;
1193 int error, flags;
1194 shadow_map_t * map;
1195 off_t file_size;
1196
1197 context.vc_proc = p;
1198 context.vc_ucred = proc_ucred(p);
1199
1200 flags = FREAD|FWRITE;
1201 if (in_kernel) {
1202 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE32, vniop->vn_file, &context);
1203 }
1204 else {
1205 NDINIT(&nd, LOOKUP, FOLLOW,
1206 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1207 vniop->vn_file, &context);
1208 }
1209 /* vn_open gives both long- and short-term references */
1210 error = vn_open(&nd, flags, 0);
1211 if (error) {
1212 /* shadow MUST be writable! */
1213 return (error);
1214 }
1215 if (nd.ni_vp->v_type != VREG
1216 || (error = vnode_size(nd.ni_vp, &file_size, &context))) {
1217 (void)vn_close(nd.ni_vp, flags, proc_ucred(p), p);
1218 vnode_put(nd.ni_vp);
1219 return (error ? error : EINVAL);
1220 }
1221 map = shadow_map_create(vn->sc_fsize, file_size,
1222 0, vn->sc_secsize);
1223 if (map == NULL) {
1224 (void)vn_close(nd.ni_vp, flags, proc_ucred(p), p);
1225 vnode_put(nd.ni_vp);
1226 vn->sc_shadow_vp = NULL;
1227 return (ENOMEM);
1228 }
1229 vn->sc_shadow_vp = nd.ni_vp;
1230 vn->sc_shadow_vid = vnode_vid(nd.ni_vp);
1231 vn->sc_shadow_vp->v_flag |= VNOCACHE_DATA;
1232 vn->sc_shadow_map = map;
1233 vn->sc_flags &= ~VNF_READONLY; /* we're now read/write */
1234
1235 /* lose the short-term reference */
1236 vnode_put(nd.ni_vp);
1237 return(0);
1238 }
1239
1240 int
1241 vndevice_root_image(char * path, char devname[], dev_t * dev_p)
1242 {
1243 int error = 0;
1244 struct vn_softc * vn;
1245 struct user_vn_ioctl vnio;
1246
1247 vnio.vn_file = CAST_USER_ADDR_T(path);
1248 vnio.vn_size = 0;
1249
1250 vn = vn_table + ROOT_IMAGE_UNIT;
1251 *dev_p = makedev(vndevice_bdev_major,
1252 ROOT_IMAGE_UNIT);
1253 sprintf(devname, "vn%d", ROOT_IMAGE_UNIT);
1254 error = vniocattach_file(vn, &vnio, *dev_p, 1, current_proc());
1255 return (error);
1256 }
1257
1258 /*
1259 * Duplicate the current processes' credentials. Since we are called only
1260 * as the result of a SET ioctl and only root can do that, any future access
1261 * to this "disk" is essentially as root. Note that credentials may change
1262 * if some other uid can write directly to the mapped file (NFS).
1263 */
1264 static int
1265 setcred(struct vnode * vp, struct proc * p, kauth_cred_t cred)
1266 {
1267 char *tmpbuf;
1268 int error = 0;
1269 struct vfs_context context;
1270
1271 /*
1272 * Horrible kludge to establish credentials for NFS XXX.
1273 */
1274 context.vc_proc = p;
1275 context.vc_ucred = cred;
1276 tmpbuf = _MALLOC(DEV_BSIZE, M_TEMP, M_WAITOK);
1277 error = file_io(vp, &context, UIO_READ, tmpbuf, 0, DEV_BSIZE, NULL);
1278 FREE(tmpbuf, M_TEMP);
1279 return (error);
1280 }
1281
1282 void
1283 vnclear(struct vn_softc *vn, struct proc * p)
1284 {
1285 if (vn->sc_vp != NULL) {
1286 /* release long-term reference */
1287 (void)vn_close(vn->sc_vp, vn->sc_open_flags, vn->sc_cred, p);
1288 vn->sc_vp = NULL;
1289 }
1290 if (vn->sc_shadow_vp != NULL) {
1291 /* release long-term reference */
1292 (void)vn_close(vn->sc_shadow_vp, FREAD | FWRITE,
1293 vn->sc_cred, p);
1294 vn->sc_shadow_vp = NULL;
1295 }
1296 if (vn->sc_shadow_map != NULL) {
1297 shadow_map_free(vn->sc_shadow_map);
1298 vn->sc_shadow_map = NULL;
1299 }
1300 vn->sc_flags &= ~(VNF_INITED | VNF_READONLY);
1301 if (vn->sc_cred) {
1302 kauth_cred_rele(vn->sc_cred);
1303 vn->sc_cred = NULL;
1304 }
1305 vn->sc_size = 0;
1306 vn->sc_fsize = 0;
1307 if (vn->sc_cdev) {
1308 devfs_remove(vn->sc_cdev);
1309 vn->sc_cdev = NULL;
1310 }
1311 }
1312
1313 static int
1314 vnsize(dev_t dev)
1315 {
1316 int secsize;
1317 struct vn_softc *vn;
1318 int unit;
1319 boolean_t funnel_state;
1320
1321 unit = vnunit(dev);
1322 if (vnunit(dev) >= NVNDEVICE) {
1323 return (-1);
1324 }
1325
1326 funnel_state = thread_funnel_set(kernel_flock, TRUE);
1327 vn = vn_table + unit;
1328 if ((vn->sc_flags & VNF_INITED) == 0)
1329 secsize = -1;
1330 else
1331 secsize = vn->sc_secsize;
1332 (void) thread_funnel_set(kernel_flock, funnel_state);
1333 return (secsize);
1334 }
1335
1336 #define CDEV_MAJOR -1
1337 #define BDEV_MAJOR -1
1338 static int vndevice_inited = 0;
1339
1340 void
1341 vndevice_init(void)
1342 {
1343 int i;
1344
1345 if (vndevice_inited)
1346 return;
1347 vndevice_bdev_major = bdevsw_add(BDEV_MAJOR, &vn_bdevsw);
1348
1349 if (vndevice_bdev_major < 0) {
1350 printf("vndevice_init: bdevsw_add() returned %d\n",
1351 vndevice_bdev_major);
1352 return;
1353 }
1354 vndevice_cdev_major = cdevsw_add_with_bdev(CDEV_MAJOR, &vn_cdevsw,
1355 vndevice_bdev_major);
1356 if (vndevice_cdev_major < 0) {
1357 printf("vndevice_init: cdevsw_add() returned %d\n",
1358 vndevice_cdev_major);
1359 return;
1360 }
1361 for (i = 0; i < NVNDEVICE; i++) {
1362 dev_t dev = makedev(vndevice_bdev_major, i);
1363 vn_table[i].sc_bdev = devfs_make_node(dev, DEVFS_BLOCK,
1364 UID_ROOT, GID_OPERATOR,
1365 0600, "vn%d",
1366 i);
1367 if (vn_table[i].sc_bdev == NULL)
1368 printf("vninit: devfs_make_node failed!\n");
1369 }
1370 }
1371
1372 static void
1373 vn_ioctl_to_64(struct vn_ioctl *from, struct user_vn_ioctl *to)
1374 {
1375 to->vn_file = CAST_USER_ADDR_T(from->vn_file);
1376 to->vn_size = from->vn_size;
1377 to->vn_control = from->vn_control;
1378 }
1379
1380 #endif /* NVNDEVICE */