]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/vn/vn.c
xnu-344.23.tar.gz
[apple/xnu.git] / bsd / dev / vn / vn.c
1
2 /*
3 * Copyright (c) 1988 University of Utah.
4 * Copyright (c) 1990, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * the Systems Programming Group of the University of Utah Computer
9 * Science Department.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 * from: Utah Hdr: vn.c 1.13 94/04/02
40 *
41 * from: @(#)vn.c 8.6 (Berkeley) 4/1/94
42 * $FreeBSD: src/sys/dev/vn/vn.c,v 1.105.2.4 2001/11/18 07:11:00 dillon Exp $
43 */
44
45 /*
46 * Vnode disk driver.
47 *
48 * Block/character interface to a vnode. Allows one to treat a file
49 * as a disk (e.g. build a filesystem in it, mount it, etc.).
50 *
51 * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode
52 * instead of a simple VOP_RDWR. We do this to avoid distorting the
53 * local buffer cache.
54 *
55 * NOTE 2: There is a security issue involved with this driver.
56 * Once mounted all access to the contents of the "mapped" file via
57 * the special file is controlled by the permissions on the special
58 * file, the protection of the mapped file is ignored (effectively,
59 * by using root credentials in all transactions).
60 *
61 * NOTE 3: Doesn't interact with leases, should it?
62 */
63
64 #include "vndevice.h"
65
66 #if NVNDEVICE > 0
67
68 #include <sys/param.h>
69 #include <sys/systm.h>
70 #include <sys/kernel.h>
71 #include <sys/mount.h>
72 #include <sys/namei.h>
73 #include <sys/proc.h>
74 #include <sys/buf.h>
75 #include <sys/malloc.h>
76 #include <sys/mount.h>
77 #include <sys/vnode.h>
78 #include <sys/fcntl.h>
79 #include <sys/conf.h>
80 #include <dev/disk.h>
81 #include <sys/stat.h>
82 #include <sys/conf.h>
83
84 #include <sys/vnioctl.h>
85
86 #include <sys/vm.h>
87
88 #include <vm/vm_pager.h>
89 #include <vm/vm_pageout.h>
90 #include <mach/memory_object_types.h>
91
92 #include <miscfs/devfs/devfs.h>
93
94 #include "shadow.h"
95
96 static ioctl_fcn_t vnioctl_chr;
97 static ioctl_fcn_t vnioctl_blk;
98 static open_close_fcn_t vnopen;
99 static open_close_fcn_t vnclose;
100 static psize_fcn_t vnsize;
101 static strategy_fcn_t vnstrategy;
102 static read_write_fcn_t vnread;
103 static read_write_fcn_t vnwrite;
104
105 static int vndevice_bdev_major;
106 static int vndevice_cdev_major;
107
108 /*
109 * cdevsw
110 * D_DISK we want to look like a disk
111 * D_CANFREE We support B_FREEBUF
112 */
113
114 static struct bdevsw vn_bdevsw = {
115 /* open */ vnopen,
116 /* close */ vnclose,
117 /* strategy */ vnstrategy,
118 /* ioctl */ vnioctl_blk,
119 /* dump */ eno_dump,
120 /* psize */ vnsize,
121 /* flags */ D_DISK,
122 };
123
124 static struct cdevsw vn_cdevsw = {
125 /* open */ vnopen,
126 /* close */ vnclose,
127 /* read */ vnread,
128 /* write */ vnwrite,
129 /* ioctl */ vnioctl_chr,
130 /* stop */ eno_stop,
131 /* reset */ eno_reset,
132 /* ttys */ 0,
133 /* select */ eno_select,
134 /* mmap */ eno_mmap,
135 /* strategy */ eno_strat,
136 /* getc */ eno_getc,
137 /* putc */ eno_putc,
138 /* flags */ D_DISK,
139 };
140
141 struct vn_softc {
142 u_int64_t sc_fsize; /* file size in bytes */
143 u_int64_t sc_size; /* size of vn, sc_secsize scale */
144 int sc_flags; /* flags */
145 int sc_secsize; /* sector size */
146 struct vnode *sc_vp; /* vnode if not NULL */
147 int sc_open_flags;
148 struct vnode *sc_shadow_vp; /* shadow vnode if not NULL */
149 shadow_map_t * sc_shadow_map; /* shadow map if not NULL */
150 struct ucred *sc_cred; /* credentials */
151 u_long sc_options; /* options */
152 void * sc_bdev;
153 void * sc_cdev;
154 } vn_table[NVNDEVICE];
155
156 #define ROOT_IMAGE_UNIT 0
157
158 /* sc_flags */
159 #define VNF_INITED 0x01
160 #define VNF_READONLY 0x02
161
162 static u_long vn_options;
163
164 #define IFOPT(vn,opt) if (((vn)->sc_options|vn_options) & (opt))
165 #define TESTOPT(vn,opt) (((vn)->sc_options|vn_options) & (opt))
166
167 static int vnsetcred (struct vn_softc *vn, struct proc *p);
168 static void vnclear (struct vn_softc *vn);
169
170 static int
171 vniocattach_file(struct vn_softc *vn,
172 struct vn_ioctl *vio,
173 dev_t dev,
174 int in_kernel,
175 struct proc *p);
176 static int
177 vniocattach_shadow(struct vn_softc * vn,
178 struct vn_ioctl *vio,
179 dev_t dev,
180 int in_kernel,
181 struct proc *p);
182 static __inline__
183 vnunit(dev_t dev)
184 {
185 return (minor(dev));
186 }
187
188 static int
189 vnclose(dev_t dev, int flags, int devtype, struct proc *p)
190 {
191 return (0);
192 }
193
194 static int
195 vnopen(dev_t dev, int flags, int devtype, struct proc *p)
196 {
197 struct vn_softc *vn;
198 int unit;
199
200 unit = vnunit(dev);
201 if (vnunit(dev) >= NVNDEVICE) {
202 return (ENXIO);
203 }
204 vn = vn_table + unit;
205 if ((flags & FWRITE) && (vn->sc_flags & VNF_READONLY))
206 return (EACCES);
207
208 return(0);
209 }
210
211 static int
212 vnread(dev_t dev, struct uio *uio, int ioflag)
213 {
214 struct proc * p = current_proc();
215 int status;
216 struct vn_softc * vn;
217 int unit;
218
219 unit = vnunit(dev);
220 if (vnunit(dev) >= NVNDEVICE) {
221 return (ENXIO);
222 }
223 vn = vn_table + unit;
224 if ((vn->sc_flags & VNF_INITED) == 0) {
225 return (ENXIO);
226 }
227 if (vn->sc_shadow_vp != NULL) {
228 return (ENODEV);
229 }
230 vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY, p);
231 status = VOP_READ(vn->sc_vp, uio, ioflag, vn->sc_cred);
232 VOP_UNLOCK(vn->sc_vp, 0, p);
233
234 return (status);
235 }
236
237 static int
238 vnwrite(dev_t dev, struct uio *uio, int ioflag)
239 {
240 struct proc * p = current_proc();
241 int status;
242 struct vn_softc * vn;
243 int unit;
244
245 unit = vnunit(dev);
246 if (vnunit(dev) >= NVNDEVICE) {
247 return (ENXIO);
248 }
249 vn = vn_table + unit;
250 if ((vn->sc_flags & VNF_INITED) == 0) {
251 return (ENXIO);
252 }
253 if (vn->sc_shadow_vp != NULL) {
254 return (ENODEV);
255 }
256 if (vn->sc_flags & VNF_READONLY) {
257 return (EROFS);
258 }
259
260 vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY, p);
261 status = VOP_WRITE(vn->sc_vp, uio, ioflag, vn->sc_cred);
262 VOP_UNLOCK(vn->sc_vp, 0, p);
263
264 return (status);
265 }
266
267 static boolean_t
268 bp_is_mapped(struct buf * bp, vm_offset_t * vaddr)
269 {
270 boolean_t is_mapped = FALSE;
271
272 if (bp->b_flags & B_NEED_IODONE) {
273 struct buf * real_bp = (struct buf *)bp->b_real_bp;
274
275 if (real_bp && real_bp->b_data) {
276 *vaddr = (vm_offset_t)real_bp->b_data;
277 is_mapped = TRUE;
278 }
279 }
280 return (is_mapped);
281 }
282
283 static __inline__ int
284 file_io(struct vnode * vp, struct ucred * cred,
285 enum uio_rw op, char * base, off_t offset, long count,
286 struct proc * p, long * resid)
287 {
288 struct uio auio;
289 struct iovec aiov;
290 int error;
291
292 bzero(&auio, sizeof(auio));
293 aiov.iov_base = base;
294 aiov.iov_len = count;
295 auio.uio_iov = &aiov;
296 auio.uio_iovcnt = 1;
297 auio.uio_segflg = UIO_SYSSPACE;
298 auio.uio_offset = offset;
299 auio.uio_rw = op;
300 auio.uio_resid = count;
301 auio.uio_procp = p;
302 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
303 if (op == UIO_READ)
304 error = VOP_READ(vp, &auio, IO_SYNC, cred);
305 else
306 error = VOP_WRITE(vp, &auio, IO_SYNC, cred);
307 VOP_UNLOCK(vp, 0, p);
308 *resid = auio.uio_resid;
309 return (error);
310 }
311
312 static int
313 shadow_read(struct vn_softc * vn, struct buf * bp, char * base, struct proc * p)
314 {
315 int error = 0;
316 u_long offset;
317 boolean_t read_shadow;
318 u_long resid;
319 u_long start = 0;
320
321 offset = bp->b_blkno;
322 resid = bp->b_bcount / vn->sc_secsize;
323
324 while (resid > 0) {
325 u_long temp_resid;
326 u_long this_offset;
327 u_long this_resid;
328 struct vnode * vp;
329
330 read_shadow = shadow_map_read(vn->sc_shadow_map,
331 offset, resid,
332 &this_offset, &this_resid);
333 if (read_shadow) {
334 vp = vn->sc_shadow_vp;
335 }
336 else {
337 vp = vn->sc_vp;
338 }
339 error = file_io(vp, vn->sc_cred, UIO_READ, base + start,
340 (off_t)this_offset * vn->sc_secsize,
341 this_resid * vn->sc_secsize, p, &temp_resid);
342 if (error)
343 break;
344 temp_resid = this_resid - temp_resid / vn->sc_secsize;
345 if (temp_resid == 0) {
346 static int printed = 0;
347 printf("vn device: shadow_write zero length read (printed %d)\n", printed);
348 printed++;
349 break;
350 }
351 resid -= temp_resid;
352 offset += temp_resid;
353 start += temp_resid * vn->sc_secsize;;
354 }
355 bp->b_resid = resid * vn->sc_secsize;
356 return (error);
357 }
358
359 static int
360 shadow_write(struct vn_softc * vn, struct buf * bp, char * base,
361 struct proc * p)
362 {
363 int error = 0;
364 u_long offset;
365 boolean_t shadow_grew;
366 u_long resid;
367 u_long start = 0;
368
369 offset = bp->b_blkno;
370 resid = bp->b_bcount / vn->sc_secsize;
371
372 while (resid > 0) {
373 u_long temp_resid;
374 u_long this_offset;
375 u_long this_resid;
376 struct vnode * vp;
377
378 shadow_grew = shadow_map_write(vn->sc_shadow_map,
379 offset, resid,
380 &this_offset, &this_resid);
381 if (shadow_grew) {
382 #if 0
383 off_t size;
384 /* truncate the file to its new length before write */
385 size = (off_t)shadow_map_shadow_size(vn->sc_shadow_map)
386 * vn->sc_secsize;
387 vn_lock(vn->sc_shadow_vp, LK_EXCLUSIVE | LK_RETRY, p);
388 VOP_TRUNCATE(vn->sc_shadow_vp, size,
389 IO_SYNC, vn->sc_cred, p);
390 VOP_UNLOCK(vn->sc_shadow_vp, 0, p);
391 #endif 0
392 }
393 error = file_io(vn->sc_shadow_vp, vn->sc_cred, UIO_WRITE,
394 base + start,
395 (off_t)this_offset * vn->sc_secsize,
396 this_resid * vn->sc_secsize, p, &temp_resid);
397 if (error) {
398 break;
399 }
400 temp_resid = this_resid - temp_resid / vn->sc_secsize;
401 if (temp_resid == 0) {
402 static int printed = 0;
403 printf("vn device: shadow_write zero length write (printed %d)\n", printed);
404 printed++;
405 break;
406 }
407 resid -= temp_resid;
408 offset += temp_resid;
409 start += temp_resid * vn->sc_secsize;;
410 }
411 bp->b_resid = resid * vn->sc_secsize;
412 return (error);
413 }
414
415 static int
416 vn_readwrite_io(struct vn_softc * vn, struct buf * bp)
417 {
418 int error = 0;
419 char * iov_base;
420 boolean_t need_unmap = FALSE;
421 struct proc * p = current_proc();
422 vm_offset_t vaddr = NULL;
423
424 if (bp->b_flags & B_VECTORLIST) {
425 if (bp_is_mapped(bp, &vaddr) == FALSE) {
426 if (ubc_upl_map(bp->b_pagelist, &vaddr)
427 != KERN_SUCCESS) {
428 panic("vn device: ubc_upl_map failed");
429 }
430 else {
431 need_unmap = TRUE;
432 }
433 }
434 }
435 if (error)
436 return (error);
437
438 if (vaddr != NULL)
439 iov_base = (caddr_t)(vaddr + bp->b_uploffset);
440 else
441 iov_base = bp->b_data;
442 if (vn->sc_shadow_vp == NULL) {
443 error = file_io(vn->sc_vp, vn->sc_cred,
444 bp->b_flags & B_READ ? UIO_READ : UIO_WRITE,
445 iov_base, (off_t)bp->b_blkno * vn->sc_secsize,
446 bp->b_bcount, p, &bp->b_resid);
447 }
448 else {
449 if (bp->b_flags & B_READ)
450 error = shadow_read(vn, bp, iov_base, p);
451 else
452 error = shadow_write(vn, bp, iov_base, p);
453 if (error == 0)
454 bp->b_resid = 0;
455
456 }
457 if (need_unmap) {
458 ubc_upl_unmap(bp->b_pagelist);
459 }
460 return (error);
461 }
462
463 static void
464 vnstrategy(struct buf *bp)
465 {
466 struct vn_softc *vn;
467 int error = 0;
468 long sz; /* in sc_secsize chunks */
469
470 vn = vn_table + vnunit(bp->b_dev);
471 if ((vn->sc_flags & VNF_INITED) == 0) {
472 bp->b_error = ENXIO;
473 bp->b_flags |= B_ERROR;
474 biodone(bp);
475 return;
476 }
477
478 bp->b_resid = bp->b_bcount;
479 /*
480 * Check for required alignment. Transfers must be a valid
481 * multiple of the sector size.
482 */
483 if (bp->b_bcount % vn->sc_secsize != 0 ||
484 bp->b_blkno % (vn->sc_secsize / DEV_BSIZE) != 0) {
485 bp->b_error = EINVAL;
486 bp->b_flags |= B_ERROR | B_INVAL;
487 biodone(bp);
488 return;
489 }
490 sz = howmany(bp->b_bcount, vn->sc_secsize);
491
492 /*
493 * If out of bounds return an error. If at the EOF point,
494 * simply read or write less.
495 */
496 if (bp->b_blkno >= vn->sc_size) {
497 bp->b_error = EINVAL;
498 bp->b_flags |= B_ERROR | B_INVAL;
499 biodone(bp);
500 return;
501 }
502 /*
503 * If the request crosses EOF, truncate the request.
504 */
505 if ((bp->b_blkno + sz) > vn->sc_size) {
506 bp->b_bcount = (vn->sc_size - bp->b_blkno) * vn->sc_secsize;
507 bp->b_resid = bp->b_bcount;
508 }
509
510 if (vn->sc_vp) {
511 error = vn_readwrite_io(vn, bp);
512 if (error) {
513 bp->b_error = error;
514 bp->b_flags |= B_ERROR;
515 }
516 biodone(bp);
517 }
518 else {
519 bp->b_flags |= B_ERROR;
520 bp->b_error = EINVAL;
521 biodone(bp);
522 }
523 }
524
525 /* ARGSUSED */
526 static int
527 vnioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p,
528 int is_char)
529 {
530 struct vn_softc *vn;
531 struct vn_ioctl *vio;
532 int error;
533 u_long *f;
534 u_int64_t * o;
535 int unit;
536
537 unit = vnunit(dev);
538 if (vnunit(dev) >= NVNDEVICE) {
539 return (ENXIO);
540 }
541 vn = vn_table + unit;
542 error = suser(p->p_ucred, &p->p_acflag);
543 if (error)
544 return (error);
545
546 vio = (struct vn_ioctl *)data;
547 f = (u_long*)data;
548 o = (u_int64_t *)data;
549 switch (cmd) {
550 case VNIOCDETACH:
551 case DKIOCGETMAXBLOCKCOUNTREAD:
552 case DKIOCGETMAXBLOCKCOUNTWRITE:
553 case DKIOCGETMAXSEGMENTCOUNTREAD:
554 case DKIOCGETMAXSEGMENTCOUNTWRITE:
555 case DKIOCGETBLOCKCOUNT32:
556 if ((vn->sc_flags & VNF_INITED) == 0) {
557 return (ENXIO);
558 }
559 break;
560 default:
561 break;
562 }
563 switch (cmd) {
564 case DKIOCGETMAXBLOCKCOUNTREAD:
565 *o = vn->sc_vp->v_mount->mnt_maxreadcnt / vn->sc_secsize;
566 break;
567 case DKIOCGETMAXBLOCKCOUNTWRITE:
568 *o = vn->sc_vp->v_mount->mnt_maxwritecnt / vn->sc_secsize;
569 break;
570 case DKIOCGETMAXSEGMENTCOUNTREAD:
571 *o = vn->sc_vp->v_mount->mnt_segreadcnt;
572 break;
573 case DKIOCGETMAXSEGMENTCOUNTWRITE:
574 *o = vn->sc_vp->v_mount->mnt_segwritecnt;
575 break;
576 case DKIOCGETBLOCKSIZE:
577 *f = vn->sc_secsize;
578 break;
579 case DKIOCSETBLOCKSIZE:
580 if (is_char) {
581 /* can only set block size on block device */
582 return (ENODEV);
583 }
584 if (vn->sc_shadow_vp != NULL) {
585 /* can't set the block size if already shadowing */
586 return (EBUSY);
587 }
588 if (*f < DEV_BSIZE) {
589 return (EINVAL);
590 }
591 vn->sc_secsize = *f;
592 /* recompute the size in terms of the new blocksize */
593 vn->sc_size = vn->sc_fsize / vn->sc_secsize;
594 break;
595 case DKIOCISWRITABLE:
596 *f = 1;
597 break;
598 case DKIOCGETBLOCKCOUNT32:
599 *f = vn->sc_size;
600 break;
601 case DKIOCGETBLOCKCOUNT64:
602 *o = vn->sc_size;
603 break;
604 case VNIOCSHADOW:
605 if (vn->sc_shadow_vp != NULL) {
606 return (EBUSY);
607 }
608 if (vn->sc_vp == NULL) {
609 /* much be attached before we can shadow */
610 return (EINVAL);
611 }
612 if (vio->vn_file == NULL) {
613 return (EINVAL);
614 }
615 error = vniocattach_shadow(vn, vio, dev, 0, p);
616 break;
617
618 case VNIOCATTACH:
619 if (is_char) {
620 /* attach only on block device */
621 return (ENODEV);
622 }
623 if (vn->sc_flags & VNF_INITED) {
624 return (EBUSY);
625 }
626 if (vio->vn_file == NULL) {
627 return (EINVAL);
628 }
629 error = vniocattach_file(vn, vio, dev, 0, p);
630 break;
631
632 case VNIOCDETACH:
633 if (is_char) {
634 /* detach only on block device */
635 return (ENODEV);
636 }
637 /* Note: spec_open won't open a mounted block device */
638
639 /*
640 * XXX handle i/o in progress. Return EBUSY, or wait, or
641 * flush the i/o.
642 * XXX handle multiple opens of the device. Return EBUSY,
643 * or revoke the fd's.
644 * How are these problems handled for removable and failing
645 * hardware devices? (Hint: They are not)
646 */
647 vnclear(vn);
648 break;
649
650 case VNIOCGSET:
651 vn_options |= *f;
652 *f = vn_options;
653 break;
654
655 case VNIOCGCLEAR:
656 vn_options &= ~(*f);
657 *f = vn_options;
658 break;
659
660 case VNIOCUSET:
661 vn->sc_options |= *f;
662 *f = vn->sc_options;
663 break;
664
665 case VNIOCUCLEAR:
666 vn->sc_options &= ~(*f);
667 *f = vn->sc_options;
668 break;
669
670 default:
671 error = ENOTTY;
672 break;
673 }
674 return(error);
675 }
676
677 static int
678 vnioctl_chr(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
679 {
680 return (vnioctl(dev, cmd, data, flag, p, TRUE));
681 }
682
683 static int
684 vnioctl_blk(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
685 {
686 return (vnioctl(dev, cmd, data, flag, p, FALSE));
687 }
688
689 /*
690 * vniocattach_file:
691 *
692 * Attach a file to a VN partition. Return the size in the vn_size
693 * field.
694 */
695
696 static int
697 vniocattach_file(struct vn_softc *vn,
698 struct vn_ioctl *vio,
699 dev_t dev,
700 int in_kernel,
701 struct proc *p)
702 {
703 struct vattr vattr;
704 struct nameidata nd;
705 int error, flags;
706
707 flags = FREAD|FWRITE;
708 if (in_kernel) {
709 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, vio->vn_file, p);
710 }
711 else {
712 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vn_file, p);
713 }
714 error = vn_open(&nd, flags, 0);
715 if (error) {
716 if (error != EACCES && error != EPERM && error != EROFS)
717 return (error);
718 flags &= ~FWRITE;
719 if (in_kernel) {
720 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE,
721 vio->vn_file, p);
722 }
723 else {
724 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE,
725 vio->vn_file, p);
726 }
727 error = vn_open(&nd, flags, 0);
728 if (error)
729 return (error);
730 }
731 if (nd.ni_vp->v_type != VREG) {
732 error = EINVAL;
733 }
734 else if (ubc_isinuse(nd.ni_vp, 1)) {
735 error = EBUSY;
736 }
737 else {
738 error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p);
739 }
740 if (error != 0) {
741 VOP_UNLOCK(nd.ni_vp, 0, p);
742 (void) vn_close(nd.ni_vp, flags, p->p_ucred, p);
743 return (error);
744 }
745 vn->sc_vp = nd.ni_vp;
746 vn->sc_vp->v_flag |= VNOCACHE_DATA;
747 VOP_UNLOCK(nd.ni_vp, 0, p);
748
749 vn->sc_open_flags = flags;
750
751 /*
752 * If the size is specified, override the file attributes. Note that
753 * the vn_size argument is in PAGE_SIZE sized blocks.
754 */
755 #if 0
756 if (vio->vn_size)
757 vn->sc_size = (quad_t)vio->vn_size * PAGE_SIZE / vn->sc_secsize;
758 else
759 vn->sc_size = vattr.va_size / vn->sc_secsize;
760 #endif 0
761 vn->sc_secsize = DEV_BSIZE;
762 vn->sc_fsize = vattr.va_size;
763 vn->sc_size = vattr.va_size / vn->sc_secsize;
764 error = vnsetcred(vn, p);
765 if (error) {
766 (void) vn_close(nd.ni_vp, flags, p->p_ucred, p);
767 return(error);
768 }
769 {
770 dev_t cdev = makedev(vndevice_cdev_major,
771 minor(dev));
772 vn->sc_cdev = devfs_make_node(cdev, DEVFS_CHAR,
773 UID_ROOT, GID_OPERATOR,
774 0600, "rvn%d",
775 minor(dev));
776 }
777 vn->sc_flags |= VNF_INITED;
778 if (flags == FREAD)
779 vn->sc_flags |= VNF_READONLY;
780 return(0);
781 }
782
783 static int
784 vniocattach_shadow(vn, vio, dev, in_kernel, p)
785 struct vn_softc *vn;
786 struct vn_ioctl *vio;
787 dev_t dev;
788 int in_kernel;
789 struct proc *p;
790 {
791 struct vattr vattr;
792 struct nameidata nd;
793 int error, flags;
794 shadow_map_t * map;
795
796 flags = FREAD|FWRITE;
797 if (in_kernel) {
798 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, vio->vn_file, p);
799 }
800 else {
801 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vn_file, p);
802 }
803 error = vn_open(&nd, flags, 0);
804 if (error) {
805 /* shadow MUST be writable! */
806 return (error);
807 }
808 if (nd.ni_vp->v_type != VREG ||
809 (error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p))) {
810 VOP_UNLOCK(nd.ni_vp, 0, p);
811 (void) vn_close(nd.ni_vp, flags, p->p_ucred, p);
812 return (error ? error : EINVAL);
813 }
814 vn->sc_shadow_vp = nd.ni_vp;
815 vn->sc_shadow_vp->v_flag |= VNOCACHE_DATA;
816 VOP_UNLOCK(nd.ni_vp, 0, p);
817
818 map = shadow_map_create(vn->sc_fsize, vattr.va_size,
819 0, vn->sc_secsize);
820 if (map == NULL) {
821 (void) vn_close(nd.ni_vp, flags, p->p_ucred, p);
822 vn->sc_shadow_vp = NULL;
823 return (ENOMEM);
824 }
825 vn->sc_shadow_map = map;
826 vn->sc_flags &= ~VNF_READONLY; /* we're now read/write */
827 return(0);
828 }
829
830 int
831 vndevice_root_image(char * path, char devname[], dev_t * dev_p)
832 {
833 int error = 0;
834 int flags;
835 struct vn_softc * vn;
836 struct vn_ioctl vio;
837
838 vio.vn_file = path;
839 vio.vn_size = 0;
840
841 vn = vn_table + ROOT_IMAGE_UNIT;
842 *dev_p = makedev(vndevice_bdev_major,
843 ROOT_IMAGE_UNIT);
844 sprintf(devname, "vn%d", ROOT_IMAGE_UNIT);
845 error = vniocattach_file(vn, &vio, *dev_p, 1, current_proc());
846 return (error);
847 }
848
849 /*
850 * Duplicate the current processes' credentials. Since we are called only
851 * as the result of a SET ioctl and only root can do that, any future access
852 * to this "disk" is essentially as root. Note that credentials may change
853 * if some other uid can write directly to the mapped file (NFS).
854 */
855 int
856 vnsetcred(struct vn_softc *vn, struct proc * p)
857 {
858 char *tmpbuf;
859 int error = 0;
860 struct proc * current_proc();
861 struct ucred * cred = p->p_ucred;
862
863 /*
864 * Set credits in our softc
865 */
866
867 if (vn->sc_cred)
868 crfree(vn->sc_cred);
869 vn->sc_cred = crdup(cred);
870
871 /*
872 * Horrible kludge to establish credentials for NFS XXX.
873 */
874
875 if (vn->sc_vp) {
876 struct uio auio;
877 struct iovec aiov;
878
879 tmpbuf = _MALLOC(vn->sc_secsize, M_TEMP, M_WAITOK);
880 bzero(&auio, sizeof(auio));
881
882 aiov.iov_base = tmpbuf;
883 aiov.iov_len = vn->sc_secsize;
884 auio.uio_iov = &aiov;
885 auio.uio_iovcnt = 1;
886 auio.uio_offset = 0;
887 auio.uio_rw = UIO_READ;
888 auio.uio_segflg = UIO_SYSSPACE;
889 auio.uio_resid = aiov.iov_len;
890 vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY, p);
891 error = VOP_READ(vn->sc_vp, &auio, 0, vn->sc_cred);
892 VOP_UNLOCK(vn->sc_vp, 0, p);
893 FREE(tmpbuf, M_TEMP);
894 }
895 return (error);
896 }
897
898 void
899 vnclear(struct vn_softc *vn)
900 {
901 int flags;
902 struct proc * p = current_proc(); /* XXX */
903
904 if (vn->sc_vp != NULL) {
905 (void)vn_close(vn->sc_vp, vn->sc_open_flags, vn->sc_cred, p);
906 vn->sc_vp = NULL;
907 }
908 if (vn->sc_shadow_vp != NULL) {
909 (void)vn_close(vn->sc_shadow_vp, FREAD | FWRITE,
910 vn->sc_cred, p);
911 vn->sc_shadow_vp = NULL;
912 }
913 if (vn->sc_shadow_map != NULL) {
914 shadow_map_free(vn->sc_shadow_map);
915 vn->sc_shadow_map = NULL;
916 }
917 vn->sc_flags = ~(VNF_INITED | VNF_READONLY);
918 if (vn->sc_cred) {
919 crfree(vn->sc_cred);
920 vn->sc_cred = NULL;
921 }
922 vn->sc_size = 0;
923 vn->sc_fsize = 0;
924 if (vn->sc_cdev) {
925 devfs_remove(vn->sc_cdev);
926 vn->sc_cdev = NULL;
927 }
928 }
929
930 static int
931 vnsize(dev_t dev)
932 {
933 struct vn_softc *vn;
934 int unit;
935
936 unit = vnunit(dev);
937 if (vnunit(dev) >= NVNDEVICE) {
938 return (ENXIO);
939 }
940 vn = vn_table + unit;
941
942 if ((vn->sc_flags & VNF_INITED) == 0)
943 return(-1);
944
945 return(vn->sc_secsize);
946 }
947
948 #define CDEV_MAJOR -1
949 #define BDEV_MAJOR -1
950 static int vndevice_inited = 0;
951
952 void
953 vndevice_init()
954 {
955 int i;
956
957 if (vndevice_inited)
958 return;
959 vndevice_bdev_major = bdevsw_add(BDEV_MAJOR, &vn_bdevsw);
960
961 if (vndevice_bdev_major < 0) {
962 printf("vndevice_init: bdevsw_add() returned %d\n",
963 vndevice_bdev_major);
964 return;
965 }
966 vndevice_cdev_major = cdevsw_add_with_bdev(CDEV_MAJOR, &vn_cdevsw,
967 vndevice_bdev_major);
968 if (vndevice_cdev_major < 0) {
969 printf("vndevice_init: cdevsw_add() returned %d\n",
970 vndevice_cdev_major);
971 return;
972 }
973 for (i = 0; i < NVNDEVICE; i++) {
974 dev_t dev = makedev(vndevice_bdev_major, i);
975 vn_table[i].sc_bdev = devfs_make_node(dev, DEVFS_BLOCK,
976 UID_ROOT, GID_OPERATOR,
977 0600, "vn%d",
978 i);
979 if (vn_table[i].sc_bdev == NULL)
980 printf("vninit: devfs_make_node failed!\n");
981 }
982 }
983 #endif NVNDEVICE