2 * Copyright (c) 2000-2010 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 * Copyright (c) 1988 University of Utah.
31 * Copyright (c) 1990, 1993
32 * The Regents of the University of California. All rights reserved.
34 * This code is derived from software contributed to Berkeley by
35 * the Systems Programming Group of the University of Utah Computer
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66 * from: Utah Hdr: vn.c 1.13 94/04/02
68 * from: @(#)vn.c 8.6 (Berkeley) 4/1/94
69 * $FreeBSD: src/sys/dev/vn/vn.c,v 1.105.2.4 2001/11/18 07:11:00 dillon Exp $
75 * Block/character interface to a vnode. Allows one to treat a file
76 * as a disk (e.g. build a filesystem in it, mount it, etc.).
78 * NOTE 1: This uses the vnop_blockmap/vnop_strategy interface to the vnode
79 * instead of a simple VOP_RDWR. We do this to avoid distorting the
82 * NOTE 2: There is a security issue involved with this driver.
83 * Once mounted all access to the contents of the "mapped" file via
84 * the special file is controlled by the permissions on the special
85 * file, the protection of the mapped file is ignored (effectively,
86 * by using root credentials in all transactions).
88 * NOTE 3: Doesn't interact with leases, should it?
95 #include <sys/param.h>
96 #include <sys/systm.h>
97 #include <sys/kernel.h>
98 #include <sys/mount.h>
99 #include <sys/namei.h>
100 #include <sys/proc.h>
101 #include <sys/kauth.h>
103 #include <sys/malloc.h>
104 #include <sys/vnode_internal.h>
105 #include <sys/fcntl.h>
106 #include <sys/conf.h>
107 #include <sys/disk.h>
108 #include <sys/stat.h>
109 #include <sys/conf.h>
110 #include <sys/uio_internal.h>
112 #include <sys/vnioctl.h>
116 #include <vm/vm_pager.h>
117 #include <mach/memory_object_types.h>
119 #include <miscfs/devfs/devfs.h>
124 vndevice_do_init(void) __attribute__((section("__TEXT, initcode")));
126 static ioctl_fcn_t vnioctl_chr
;
127 static ioctl_fcn_t vnioctl_blk
;
128 static open_close_fcn_t vnopen
;
129 static open_close_fcn_t vnclose
;
130 static psize_fcn_t vnsize
;
131 static strategy_fcn_t vnstrategy
;
132 static read_write_fcn_t vnread
;
133 static read_write_fcn_t vnwrite
;
135 static int vndevice_bdev_major
;
136 static int vndevice_cdev_major
;
140 * D_DISK we want to look like a disk
141 * D_CANFREE We support B_FREEBUF
144 static struct bdevsw vn_bdevsw
= {
147 /* strategy */ vnstrategy
,
148 /* ioctl */ vnioctl_blk
,
154 static struct cdevsw vn_cdevsw
= {
159 /* ioctl */ vnioctl_chr
,
161 /* reset */ eno_reset
,
163 /* select */ eno_select
,
165 /* strategy */ eno_strat
,
172 u_int64_t sc_fsize
; /* file size in bytes */
173 u_int64_t sc_size
; /* size of vn, sc_secsize scale */
174 int sc_flags
; /* flags */
175 u_int32_t sc_secsize
; /* sector size */
176 struct vnode
*sc_vp
; /* vnode if not NULL */
179 struct vnode
*sc_shadow_vp
; /* shadow vnode if not NULL */
180 uint32_t sc_shadow_vid
;
181 shadow_map_t
* sc_shadow_map
; /* shadow map if not NULL */
182 kauth_cred_t sc_cred
; /* credentials */
183 u_int32_t sc_options
; /* options */
186 } vn_table
[NVNDEVICE
];
188 #define ROOT_IMAGE_UNIT 0
191 #define VNF_INITED 0x01
192 #define VNF_READONLY 0x02
194 static u_int32_t vn_options
;
196 #define IFOPT(vn,opt) if (((vn)->sc_options|vn_options) & (opt))
197 #define TESTOPT(vn,opt) (((vn)->sc_options|vn_options) & (opt))
199 static int setcred(struct vnode
* vp
, kauth_cred_t cred
);
200 static void vnclear (struct vn_softc
*vn
, vfs_context_t ctx
);
201 static void vn_ioctl_to_64(struct vn_ioctl_32
*from
, struct vn_ioctl_64
*to
);
202 void vndevice_init(void);
203 int vndevice_root_image(char * path
, char devname
[], dev_t
* dev_p
);
206 vniocattach_file(struct vn_softc
*vn
,
207 struct vn_ioctl_64
*vniop
,
212 vniocattach_shadow(struct vn_softc
* vn
,
213 struct vn_ioctl_64
*vniop
,
217 static __inline__
int
224 vnclose(__unused dev_t dev
, __unused
int flags
,
225 __unused
int devtype
, __unused proc_t p
)
231 vnopen(dev_t dev
, int flags
, __unused
int devtype
, __unused proc_t p
)
237 if (vnunit(dev
) >= NVNDEVICE
) {
240 vn
= vn_table
+ unit
;
241 if ((flags
& FWRITE
) && (vn
->sc_flags
& VNF_READONLY
))
248 file_io(struct vnode
* vp
, vfs_context_t ctx
,
249 enum uio_rw op
, char * base
, off_t offset
, user_ssize_t count
,
250 user_ssize_t
* resid
)
254 char uio_buf
[UIO_SIZEOF(1)];
256 auio
= uio_createwithbuffer(1, offset
, UIO_SYSSPACE
, op
,
257 &uio_buf
[0], sizeof(uio_buf
));
258 uio_addiov(auio
, CAST_USER_ADDR_T(base
), count
);
260 error
= VNOP_READ(vp
, auio
, IO_SYNC
, ctx
);
262 error
= VNOP_WRITE(vp
, auio
, IO_SYNC
, ctx
);
265 *resid
= uio_resid(auio
);
270 static __inline__ off_t
271 block_round(off_t o
, int blocksize
)
273 return ((o
+ blocksize
- 1) / blocksize
);
276 static __inline__ off_t
277 block_truncate(off_t o
, int blocksize
)
279 return (o
/ blocksize
);
282 static __inline__
int
283 block_remainder(off_t o
, int blocksize
)
285 return (o
% blocksize
);
289 vnread_shadow(struct vn_softc
* vn
, struct uio
*uio
, int ioflag
,
292 u_int32_t blocksize
= vn
->sc_secsize
;
297 user_ssize_t orig_resid
;
299 orig_resid
= resid
= uio_resid(uio
);
300 orig_offset
= offset
= uio_offset(uio
);
304 u_int32_t this_block_number
;
305 u_int32_t this_block_count
;
307 user_ssize_t this_resid
;
310 /* figure out which blocks to read */
311 remainder
= block_remainder(offset
, blocksize
);
312 if (shadow_map_read(vn
->sc_shadow_map
,
313 block_truncate(offset
, blocksize
),
314 block_round(resid
+ remainder
, blocksize
),
315 &this_block_number
, &this_block_count
)) {
316 vp
= vn
->sc_shadow_vp
;
322 /* read the blocks (or parts thereof) */
323 this_offset
= (off_t
)this_block_number
* blocksize
+ remainder
;
324 uio_setoffset(uio
, this_offset
);
325 this_resid
= this_block_count
* blocksize
- remainder
;
326 if (this_resid
> resid
) {
329 uio_setresid(uio
, this_resid
);
330 error
= VNOP_READ(vp
, uio
, ioflag
, ctx
);
335 /* figure out how much we actually read */
336 this_resid
-= uio_resid(uio
);
337 if (this_resid
== 0) {
338 printf("vn device: vnread_shadow zero length read\n");
342 offset
+= this_resid
;
344 uio_setresid(uio
, resid
);
345 uio_setoffset(uio
, offset
);
350 vncopy_block_to_shadow(struct vn_softc
* vn
, vfs_context_t ctx
,
351 u_int32_t file_block
, u_int32_t shadow_block
)
356 tmpbuf
= _MALLOC(vn
->sc_secsize
, M_TEMP
, M_WAITOK
);
357 if (tmpbuf
== NULL
) {
360 /* read one block from file at file_block offset */
361 error
= file_io(vn
->sc_vp
, ctx
, UIO_READ
,
362 tmpbuf
, (off_t
)file_block
* vn
->sc_secsize
,
363 vn
->sc_secsize
, NULL
);
367 /* write one block to shadow file at shadow_block offset */
368 error
= file_io(vn
->sc_shadow_vp
, ctx
, UIO_WRITE
,
369 tmpbuf
, (off_t
)shadow_block
* vn
->sc_secsize
,
370 vn
->sc_secsize
, NULL
);
372 FREE(tmpbuf
, M_TEMP
);
377 FLAGS_FIRST_BLOCK_PARTIAL
= 0x1,
378 FLAGS_LAST_BLOCK_PARTIAL
= 0x2
382 vnwrite_shadow(struct vn_softc
* vn
, struct uio
*uio
, int ioflag
,
385 u_int32_t blocksize
= vn
->sc_secsize
;
390 resid
= uio_resid(uio
);
391 offset
= uio_offset(uio
);
395 u_int32_t offset_block_number
;
397 u_int32_t resid_block_count
;
398 u_int32_t shadow_block_count
;
399 u_int32_t shadow_block_number
;
400 user_ssize_t this_resid
;
402 /* figure out which blocks to write */
403 offset_block_number
= block_truncate(offset
, blocksize
);
404 remainder
= block_remainder(offset
, blocksize
);
405 resid_block_count
= block_round(resid
+ remainder
, blocksize
);
406 /* figure out if the first or last blocks are partial writes */
408 && !shadow_map_is_written(vn
->sc_shadow_map
,
409 offset_block_number
)) {
410 /* the first block is a partial write */
411 flags
|= FLAGS_FIRST_BLOCK_PARTIAL
;
413 if (resid_block_count
> 1
414 && !shadow_map_is_written(vn
->sc_shadow_map
,
416 + resid_block_count
- 1)
417 && block_remainder(offset
+ resid
, blocksize
) > 0) {
418 /* the last block is a partial write */
419 flags
|= FLAGS_LAST_BLOCK_PARTIAL
;
421 if (shadow_map_write(vn
->sc_shadow_map
,
422 offset_block_number
, resid_block_count
,
423 &shadow_block_number
,
424 &shadow_block_count
)) {
425 /* shadow file is growing */
427 /* truncate the file to its new length before write */
429 size
= (off_t
)shadow_map_shadow_size(vn
->sc_shadow_map
)
431 vnode_setsize(vn
->sc_shadow_vp
, size
, IO_SYNC
, ctx
);
434 /* write the blocks (or parts thereof) */
435 uio_setoffset(uio
, (off_t
)
436 shadow_block_number
* blocksize
+ remainder
);
437 this_resid
= (off_t
)shadow_block_count
* blocksize
- remainder
;
438 if (this_resid
>= resid
) {
440 if ((flags
& FLAGS_LAST_BLOCK_PARTIAL
) != 0) {
441 /* copy the last block to the shadow */
445 s
= offset_block_number
446 + resid_block_count
- 1;
447 d
= shadow_block_number
448 + shadow_block_count
- 1;
449 error
= vncopy_block_to_shadow(vn
, ctx
, s
, d
);
451 printf("vnwrite_shadow: failed to copy"
452 " block %u to shadow block %u\n",
458 uio_setresid(uio
, this_resid
);
459 if ((flags
& FLAGS_FIRST_BLOCK_PARTIAL
) != 0) {
460 /* copy the first block to the shadow */
461 error
= vncopy_block_to_shadow(vn
, ctx
,
463 shadow_block_number
);
465 printf("vnwrite_shadow: failed to"
466 " copy block %u to shadow block %u\n",
468 shadow_block_number
);
472 error
= VNOP_WRITE(vn
->sc_shadow_vp
, uio
, ioflag
, ctx
);
476 /* figure out how much we actually wrote */
477 this_resid
-= uio_resid(uio
);
478 if (this_resid
== 0) {
479 printf("vn device: vnwrite_shadow zero length write\n");
483 offset
+= this_resid
;
485 uio_setresid(uio
, resid
);
486 uio_setoffset(uio
, offset
);
491 vnread(dev_t dev
, struct uio
*uio
, int ioflag
)
493 struct vfs_context context
;
495 boolean_t funnel_state
;
499 struct vn_softc
* vn
;
503 if (vnunit(dev
) >= NVNDEVICE
) {
507 funnel_state
= thread_funnel_set(kernel_flock
, TRUE
);
508 vn
= vn_table
+ unit
;
509 if ((vn
->sc_flags
& VNF_INITED
) == 0) {
514 context
.vc_thread
= current_thread();
515 context
.vc_ucred
= vn
->sc_cred
;
517 error
= vnode_getwithvid(vn
->sc_vp
, vn
->sc_vid
);
519 /* the vnode is no longer available, abort */
521 vnclear(vn
, &context
);
525 resid
= uio_resid(uio
);
526 offset
= uio_offset(uio
);
529 * If out of bounds return an error. If at the EOF point,
532 if (offset
>= (off_t
)vn
->sc_fsize
) {
533 if (offset
> (off_t
)vn
->sc_fsize
) {
539 * If the request crosses EOF, truncate the request.
541 if ((offset
+ resid
) > (off_t
)vn
->sc_fsize
) {
542 resid
= vn
->sc_fsize
- offset
;
543 uio_setresid(uio
, resid
);
546 if (vn
->sc_shadow_vp
!= NULL
) {
547 error
= vnode_getwithvid(vn
->sc_shadow_vp
,
550 /* the vnode is no longer available, abort */
552 vnode_put(vn
->sc_vp
);
553 vnclear(vn
, &context
);
556 error
= vnread_shadow(vn
, uio
, ioflag
, &context
);
557 vnode_put(vn
->sc_shadow_vp
);
559 error
= VNOP_READ(vn
->sc_vp
, uio
, ioflag
, &context
);
561 vnode_put(vn
->sc_vp
);
563 (void) thread_funnel_set(kernel_flock
, funnel_state
);
568 vnwrite(dev_t dev
, struct uio
*uio
, int ioflag
)
570 struct vfs_context context
;
572 boolean_t funnel_state
;
576 struct vn_softc
* vn
;
580 if (vnunit(dev
) >= NVNDEVICE
) {
584 funnel_state
= thread_funnel_set(kernel_flock
, TRUE
);
585 vn
= vn_table
+ unit
;
586 if ((vn
->sc_flags
& VNF_INITED
) == 0) {
590 if (vn
->sc_flags
& VNF_READONLY
) {
595 context
.vc_thread
= current_thread();
596 context
.vc_ucred
= vn
->sc_cred
;
598 error
= vnode_getwithvid(vn
->sc_vp
, vn
->sc_vid
);
600 /* the vnode is no longer available, abort */
602 vnclear(vn
, &context
);
605 resid
= uio_resid(uio
);
606 offset
= uio_offset(uio
);
609 * If out of bounds return an error. If at the EOF point,
612 if (offset
>= (off_t
)vn
->sc_fsize
) {
613 if (offset
> (off_t
)vn
->sc_fsize
) {
619 * If the request crosses EOF, truncate the request.
621 if ((offset
+ resid
) > (off_t
)vn
->sc_fsize
) {
622 resid
= (off_t
)vn
->sc_fsize
- offset
;
623 uio_setresid(uio
, resid
);
626 if (vn
->sc_shadow_vp
!= NULL
) {
627 error
= vnode_getwithvid(vn
->sc_shadow_vp
,
630 /* the vnode is no longer available, abort */
632 vnode_put(vn
->sc_vp
);
633 vnclear(vn
, &context
);
636 error
= vnwrite_shadow(vn
, uio
, ioflag
, &context
);
637 vnode_put(vn
->sc_shadow_vp
);
639 error
= VNOP_WRITE(vn
->sc_vp
, uio
, ioflag
, &context
);
641 vnode_put(vn
->sc_vp
);
643 (void) thread_funnel_set(kernel_flock
, funnel_state
);
648 shadow_read(struct vn_softc
* vn
, struct buf
* bp
, char * base
,
651 u_int32_t blocksize
= vn
->sc_secsize
;
654 boolean_t read_shadow
;
658 offset
= buf_blkno(bp
);
659 resid
= buf_resid(bp
) / blocksize
;
661 user_ssize_t temp_resid
;
662 u_int32_t this_offset
;
663 u_int32_t this_resid
;
666 read_shadow
= shadow_map_read(vn
->sc_shadow_map
,
668 &this_offset
, &this_resid
);
670 vp
= vn
->sc_shadow_vp
;
675 error
= file_io(vp
, ctx
, UIO_READ
, base
+ start
,
676 (off_t
)this_offset
* blocksize
,
677 (user_ssize_t
)this_resid
* blocksize
,
682 this_resid
-= (temp_resid
/ blocksize
);
683 if (this_resid
== 0) {
684 printf("vn device: shadow_read zero length read\n");
688 offset
+= this_resid
;
689 start
+= this_resid
* blocksize
;
691 buf_setresid(bp
, resid
* blocksize
);
696 shadow_write(struct vn_softc
* vn
, struct buf
* bp
, char * base
,
699 u_int32_t blocksize
= vn
->sc_secsize
;
702 boolean_t shadow_grew
;
706 offset
= buf_blkno(bp
);
707 resid
= buf_resid(bp
) / blocksize
;
709 user_ssize_t temp_resid
;
710 u_int32_t this_offset
;
711 u_int32_t this_resid
;
713 shadow_grew
= shadow_map_write(vn
->sc_shadow_map
,
715 &this_offset
, &this_resid
);
719 /* truncate the file to its new length before write */
720 size
= (off_t
)shadow_map_shadow_size(vn
->sc_shadow_map
)
722 vnode_setsize(vn
->sc_shadow_vp
, size
, IO_SYNC
, ctx
);
725 error
= file_io(vn
->sc_shadow_vp
, ctx
, UIO_WRITE
,
727 (off_t
)this_offset
* blocksize
,
728 (user_ssize_t
)this_resid
* blocksize
,
733 this_resid
-= (temp_resid
/ blocksize
);
734 if (this_resid
== 0) {
735 printf("vn device: shadow_write zero length write\n");
739 offset
+= this_resid
;
740 start
+= this_resid
* blocksize
;
742 buf_setresid(bp
, resid
* blocksize
);
747 vn_readwrite_io(struct vn_softc
* vn
, struct buf
* bp
, vfs_context_t ctx
)
753 if (buf_map(bp
, &vaddr
))
754 panic("vn device: buf_map failed");
755 iov_base
= (char *)vaddr
;
757 if (vn
->sc_shadow_vp
== NULL
) {
758 user_ssize_t temp_resid
;
760 error
= file_io(vn
->sc_vp
, ctx
,
761 buf_flags(bp
) & B_READ
? UIO_READ
: UIO_WRITE
,
763 (off_t
)buf_blkno(bp
) * vn
->sc_secsize
,
764 buf_resid(bp
), &temp_resid
);
765 buf_setresid(bp
, temp_resid
);
768 if (buf_flags(bp
) & B_READ
)
769 error
= shadow_read(vn
, bp
, iov_base
, ctx
);
771 error
= shadow_write(vn
, bp
, iov_base
, ctx
);
779 vnstrategy(struct buf
*bp
)
783 long sz
; /* in sc_secsize chunks */
785 boolean_t funnel_state
;
786 struct vnode
* shadow_vp
= NULL
;
787 struct vnode
* vp
= NULL
;
788 struct vfs_context context
;
790 funnel_state
= thread_funnel_set(kernel_flock
, TRUE
);
791 vn
= vn_table
+ vnunit(buf_device(bp
));
792 if ((vn
->sc_flags
& VNF_INITED
) == 0) {
797 context
.vc_thread
= current_thread();
798 context
.vc_ucred
= vn
->sc_cred
;
800 buf_setresid(bp
, buf_count(bp
));
802 * Check for required alignment. Transfers must be a valid
803 * multiple of the sector size.
805 blk_num
= buf_blkno(bp
);
806 if (buf_count(bp
) % vn
->sc_secsize
!= 0) {
810 sz
= howmany(buf_count(bp
), vn
->sc_secsize
);
813 * If out of bounds return an error. If at the EOF point,
814 * simply read or write less.
816 if (blk_num
>= 0 && (u_int64_t
)blk_num
>= vn
->sc_size
) {
817 if (blk_num
> 0 && (u_int64_t
)blk_num
> vn
->sc_size
) {
823 * If the request crosses EOF, truncate the request.
825 if ((blk_num
+ sz
) > 0 && ((u_int64_t
)(blk_num
+ sz
)) > vn
->sc_size
) {
826 buf_setcount(bp
, (vn
->sc_size
- blk_num
) * vn
->sc_secsize
);
827 buf_setresid(bp
, buf_count(bp
));
835 error
= vnode_getwithvid(vp
, vn
->sc_vid
);
837 /* the vnode is no longer available, abort */
839 vnclear(vn
, &context
);
842 shadow_vp
= vn
->sc_shadow_vp
;
843 if (shadow_vp
!= NULL
) {
844 error
= vnode_getwithvid(shadow_vp
,
847 /* the vnode is no longer available, abort */
849 vnode_put(vn
->sc_vp
);
850 vnclear(vn
, &context
);
855 error
= vn_readwrite_io(vn
, bp
, &context
);
857 if (shadow_vp
!= NULL
) {
858 vnode_put(shadow_vp
);
862 (void) thread_funnel_set(kernel_flock
, funnel_state
);
864 buf_seterror(bp
, error
);
872 vnioctl(dev_t dev
, u_long cmd
, caddr_t data
,
873 __unused
int flag
, proc_t p
,
877 struct vn_ioctl_64
*viop
;
882 struct vfsioattr ioattr
;
883 struct vn_ioctl_64 user_vnio
;
884 boolean_t funnel_state
;
885 struct vfs_context context
;
888 if (vnunit(dev
) >= NVNDEVICE
) {
892 funnel_state
= thread_funnel_set(kernel_flock
, TRUE
);
893 vn
= vn_table
+ unit
;
894 error
= proc_suser(p
);
899 context
.vc_thread
= current_thread();
900 context
.vc_ucred
= vn
->sc_cred
;
902 viop
= (struct vn_ioctl_64
*)data
;
903 f
= (u_int32_t
*)data
;
904 o
= (u_int64_t
*)data
;
913 case DKIOCGETBLOCKSIZE
:
914 case DKIOCSETBLOCKSIZE
:
915 case DKIOCGETMAXBLOCKCOUNTREAD
:
916 case DKIOCGETMAXBLOCKCOUNTWRITE
:
917 case DKIOCGETMAXSEGMENTCOUNTREAD
:
918 case DKIOCGETMAXSEGMENTCOUNTWRITE
:
919 case DKIOCGETMAXSEGMENTBYTECOUNTREAD
:
920 case DKIOCGETMAXSEGMENTBYTECOUNTWRITE
:
921 case DKIOCGETBLOCKCOUNT
:
922 case DKIOCGETBLOCKCOUNT32
:
923 if ((vn
->sc_flags
& VNF_INITED
) == 0) {
932 if (vn
->sc_vp
!= NULL
)
933 vfs_ioattr(vnode_mount(vn
->sc_vp
), &ioattr
);
935 bzero(&ioattr
, sizeof(ioattr
));
941 case DKIOCGETMAXBLOCKCOUNTREAD
:
942 *o
= ioattr
.io_maxreadcnt
/ vn
->sc_secsize
;
944 case DKIOCGETMAXBLOCKCOUNTWRITE
:
945 *o
= ioattr
.io_maxwritecnt
/ vn
->sc_secsize
;
947 case DKIOCGETMAXBYTECOUNTREAD
:
948 *o
= ioattr
.io_maxreadcnt
;
950 case DKIOCGETMAXBYTECOUNTWRITE
:
951 *o
= ioattr
.io_maxwritecnt
;
953 case DKIOCGETMAXSEGMENTCOUNTREAD
:
954 *o
= ioattr
.io_segreadcnt
;
956 case DKIOCGETMAXSEGMENTCOUNTWRITE
:
957 *o
= ioattr
.io_segwritecnt
;
959 case DKIOCGETMAXSEGMENTBYTECOUNTREAD
:
960 *o
= ioattr
.io_maxsegreadsize
;
962 case DKIOCGETMAXSEGMENTBYTECOUNTWRITE
:
963 *o
= ioattr
.io_maxsegwritesize
;
965 case DKIOCGETBLOCKSIZE
:
968 case DKIOCSETBLOCKSIZE
:
970 /* can only set block size on block device */
974 if (*f
< DEV_BSIZE
) {
978 if (vn
->sc_shadow_vp
!= NULL
) {
979 if (*f
== (unsigned)vn
->sc_secsize
) {
982 /* can't change the block size if already shadowing */
987 /* recompute the size in terms of the new blocksize */
988 vn
->sc_size
= vn
->sc_fsize
/ vn
->sc_secsize
;
990 case DKIOCISWRITABLE
:
993 case DKIOCGETBLOCKCOUNT32
:
996 case DKIOCGETBLOCKCOUNT
:
1006 if (vn
->sc_shadow_vp
!= NULL
) {
1010 if (vn
->sc_vp
== NULL
) {
1011 /* much be attached before we can shadow */
1015 if (!proc_is64bit(p
)) {
1016 /* downstream code expects LP64 version of vn_ioctl structure */
1017 vn_ioctl_to_64((struct vn_ioctl_32
*)viop
, &user_vnio
);
1020 if (viop
->vn_file
== USER_ADDR_NULL
) {
1024 error
= vniocattach_shadow(vn
, viop
, dev
, 0, p
);
1035 /* attach only on block device */
1039 if (vn
->sc_flags
& VNF_INITED
) {
1043 if (!proc_is64bit(p
)) {
1044 /* downstream code expects LP64 version of vn_ioctl structure */
1045 vn_ioctl_to_64((struct vn_ioctl_32
*)viop
, &user_vnio
);
1048 if (viop
->vn_file
== USER_ADDR_NULL
) {
1052 error
= vniocattach_file(vn
, viop
, dev
, 0, p
);
1063 /* detach only on block device */
1067 /* Note: spec_open won't open a mounted block device */
1070 * XXX handle i/o in progress. Return EBUSY, or wait, or
1072 * XXX handle multiple opens of the device. Return EBUSY,
1073 * or revoke the fd's.
1074 * How are these problems handled for removable and failing
1075 * hardware devices? (Hint: They are not)
1077 vnclear(vn
, &context
);
1086 vn_options
&= ~(*f
);
1091 vn
->sc_options
|= *f
;
1092 *f
= vn
->sc_options
;
1096 vn
->sc_options
&= ~(*f
);
1097 *f
= vn
->sc_options
;
1105 (void) thread_funnel_set(kernel_flock
, funnel_state
);
1110 vnioctl_chr(dev_t dev
, u_long cmd
, caddr_t data
, int flag
, proc_t p
)
1112 return (vnioctl(dev
, cmd
, data
, flag
, p
, TRUE
));
1116 vnioctl_blk(dev_t dev
, u_long cmd
, caddr_t data
, int flag
, proc_t p
)
1118 return (vnioctl(dev
, cmd
, data
, flag
, p
, FALSE
));
1124 * Attach a file to a VN partition. Return the size in the vn_size
1129 vniocattach_file(struct vn_softc
*vn
,
1130 struct vn_ioctl_64
*vniop
,
1136 vfs_context_t ctx
= vfs_context_current();
1138 struct nameidata nd
;
1142 flags
= FREAD
|FWRITE
;
1144 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
, UIO_SYSSPACE
, vniop
->vn_file
, ctx
);
1147 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
,
1148 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
1149 vniop
->vn_file
, ctx
);
1151 /* vn_open gives both long- and short-term references */
1152 error
= vn_open(&nd
, flags
, 0);
1154 if (error
!= EACCES
&& error
!= EPERM
&& error
!= EROFS
) {
1159 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
, UIO_SYSSPACE
,
1160 vniop
->vn_file
, ctx
);
1163 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
,
1164 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
1165 vniop
->vn_file
, ctx
);
1167 error
= vn_open(&nd
, flags
, 0);
1172 if (nd
.ni_vp
->v_type
!= VREG
) {
1176 error
= vnode_size(nd
.ni_vp
, &file_size
, ctx
);
1179 (void) vn_close(nd
.ni_vp
, flags
, ctx
);
1180 vnode_put(nd
.ni_vp
);
1183 cred
= kauth_cred_proc_ref(p
);
1184 nd
.ni_vp
->v_flag
|= VNOCACHE_DATA
;
1185 error
= setcred(nd
.ni_vp
, cred
);
1187 (void)vn_close(nd
.ni_vp
, flags
, ctx
);
1188 vnode_put(nd
.ni_vp
);
1189 kauth_cred_unref(&cred
);
1192 vn
->sc_secsize
= DEV_BSIZE
;
1193 vn
->sc_fsize
= file_size
;
1194 vn
->sc_size
= file_size
/ vn
->sc_secsize
;
1195 vn
->sc_vp
= nd
.ni_vp
;
1196 vn
->sc_vid
= vnode_vid(nd
.ni_vp
);
1197 vn
->sc_open_flags
= flags
;
1199 cdev
= makedev(vndevice_cdev_major
, minor(dev
));
1200 vn
->sc_cdev
= devfs_make_node(cdev
, DEVFS_CHAR
,
1201 UID_ROOT
, GID_OPERATOR
,
1204 vn
->sc_flags
|= VNF_INITED
;
1206 vn
->sc_flags
|= VNF_READONLY
;
1207 /* lose the short-term reference */
1208 vnode_put(nd
.ni_vp
);
1213 vniocattach_shadow(struct vn_softc
*vn
, struct vn_ioctl_64
*vniop
,
1214 __unused dev_t dev
, int in_kernel
, proc_t p
)
1216 vfs_context_t ctx
= vfs_context_current();
1217 struct nameidata nd
;
1222 flags
= FREAD
|FWRITE
;
1224 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
, UIO_SYSSPACE
, vniop
->vn_file
, ctx
);
1227 NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW
,
1228 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
1229 vniop
->vn_file
, ctx
);
1231 /* vn_open gives both long- and short-term references */
1232 error
= vn_open(&nd
, flags
, 0);
1234 /* shadow MUST be writable! */
1237 if (nd
.ni_vp
->v_type
!= VREG
1238 || (error
= vnode_size(nd
.ni_vp
, &file_size
, ctx
))) {
1239 (void)vn_close(nd
.ni_vp
, flags
, ctx
);
1240 vnode_put(nd
.ni_vp
);
1241 return (error
? error
: EINVAL
);
1243 map
= shadow_map_create(vn
->sc_fsize
, file_size
,
1246 (void)vn_close(nd
.ni_vp
, flags
, ctx
);
1247 vnode_put(nd
.ni_vp
);
1248 vn
->sc_shadow_vp
= NULL
;
1251 vn
->sc_shadow_vp
= nd
.ni_vp
;
1252 vn
->sc_shadow_vid
= vnode_vid(nd
.ni_vp
);
1253 vn
->sc_shadow_vp
->v_flag
|= VNOCACHE_DATA
;
1254 vn
->sc_shadow_map
= map
;
1255 vn
->sc_flags
&= ~VNF_READONLY
; /* we're now read/write */
1257 /* lose the short-term reference */
1258 vnode_put(nd
.ni_vp
);
1263 vndevice_root_image(char * path
, char devname
[], dev_t
* dev_p
)
1266 struct vn_softc
* vn
;
1267 struct vn_ioctl_64 vnio
;
1269 vnio
.vn_file
= CAST_USER_ADDR_T(path
);
1272 vn
= vn_table
+ ROOT_IMAGE_UNIT
;
1273 *dev_p
= makedev(vndevice_bdev_major
,
1275 snprintf(devname
, 16, "vn%d", ROOT_IMAGE_UNIT
);
1276 error
= vniocattach_file(vn
, &vnio
, *dev_p
, 1, current_proc());
1281 * Duplicate the current processes' credentials. Since we are called only
1282 * as the result of a SET ioctl and only root can do that, any future access
1283 * to this "disk" is essentially as root. Note that credentials may change
1284 * if some other uid can write directly to the mapped file (NFS).
1287 setcred(struct vnode
* vp
, kauth_cred_t cred
)
1291 struct vfs_context context
;
1294 * Horrible kludge to establish credentials for NFS XXX.
1296 context
.vc_thread
= current_thread();
1297 context
.vc_ucred
= cred
;
1298 tmpbuf
= _MALLOC(DEV_BSIZE
, M_TEMP
, M_WAITOK
);
1299 error
= file_io(vp
, &context
, UIO_READ
, tmpbuf
, 0, DEV_BSIZE
, NULL
);
1300 FREE(tmpbuf
, M_TEMP
);
1305 vnclear(struct vn_softc
*vn
, vfs_context_t ctx
)
1307 if (vn
->sc_vp
!= NULL
) {
1308 /* release long-term reference */
1309 (void)vn_close(vn
->sc_vp
, vn
->sc_open_flags
, ctx
);
1312 if (vn
->sc_shadow_vp
!= NULL
) {
1313 /* release long-term reference */
1314 (void)vn_close(vn
->sc_shadow_vp
, FREAD
| FWRITE
, ctx
);
1315 vn
->sc_shadow_vp
= NULL
;
1317 if (vn
->sc_shadow_map
!= NULL
) {
1318 shadow_map_free(vn
->sc_shadow_map
);
1319 vn
->sc_shadow_map
= NULL
;
1321 vn
->sc_flags
&= ~(VNF_INITED
| VNF_READONLY
);
1323 kauth_cred_unref(&vn
->sc_cred
);
1328 devfs_remove(vn
->sc_cdev
);
1337 struct vn_softc
*vn
;
1339 boolean_t funnel_state
;
1342 if (vnunit(dev
) >= NVNDEVICE
) {
1346 funnel_state
= thread_funnel_set(kernel_flock
, TRUE
);
1347 vn
= vn_table
+ unit
;
1348 if ((vn
->sc_flags
& VNF_INITED
) == 0)
1351 secsize
= vn
->sc_secsize
;
1352 (void) thread_funnel_set(kernel_flock
, funnel_state
);
1356 #define CDEV_MAJOR -1
1357 #define BDEV_MAJOR -1
1358 static int vndevice_inited
= 0;
1363 if (vndevice_inited
)
1370 vndevice_do_init( void )
1374 vndevice_bdev_major
= bdevsw_add(BDEV_MAJOR
, &vn_bdevsw
);
1376 if (vndevice_bdev_major
< 0) {
1377 printf("vndevice_init: bdevsw_add() returned %d\n",
1378 vndevice_bdev_major
);
1381 vndevice_cdev_major
= cdevsw_add_with_bdev(CDEV_MAJOR
, &vn_cdevsw
,
1382 vndevice_bdev_major
);
1383 if (vndevice_cdev_major
< 0) {
1384 printf("vndevice_init: cdevsw_add() returned %d\n",
1385 vndevice_cdev_major
);
1388 for (i
= 0; i
< NVNDEVICE
; i
++) {
1389 dev_t dev
= makedev(vndevice_bdev_major
, i
);
1390 vn_table
[i
].sc_bdev
= devfs_make_node(dev
, DEVFS_BLOCK
,
1391 UID_ROOT
, GID_OPERATOR
,
1394 if (vn_table
[i
].sc_bdev
== NULL
)
1395 printf("vninit: devfs_make_node failed!\n");
1400 vn_ioctl_to_64(struct vn_ioctl_32
*from
, struct vn_ioctl_64
*to
)
1402 to
->vn_file
= CAST_USER_ADDR_T(from
->vn_file
);
1403 to
->vn_size
= from
->vn_size
;
1404 to
->vn_control
= from
->vn_control
;
1407 #endif /* NVNDEVICE */