]> git.saurik.com Git - apple/xnu.git/blame - bsd/dev/vn/vn.c
xnu-1504.15.3.tar.gz
[apple/xnu.git] / bsd / dev / vn / vn.c
CommitLineData
91447636 1/*
2d21ac55 2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
91447636 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
91447636 27 */
9bccf70c
A
28
29/*
30 * Copyright (c) 1988 University of Utah.
31 * Copyright (c) 1990, 1993
32 * The Regents of the University of California. All rights reserved.
33 *
34 * This code is derived from software contributed to Berkeley by
35 * the Systems Programming Group of the University of Utah Computer
36 * Science Department.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * from: Utah Hdr: vn.c 1.13 94/04/02
67 *
68 * from: @(#)vn.c 8.6 (Berkeley) 4/1/94
69 * $FreeBSD: src/sys/dev/vn/vn.c,v 1.105.2.4 2001/11/18 07:11:00 dillon Exp $
70 */
71
72/*
73 * Vnode disk driver.
74 *
75 * Block/character interface to a vnode. Allows one to treat a file
76 * as a disk (e.g. build a filesystem in it, mount it, etc.).
77 *
91447636 78 * NOTE 1: This uses the vnop_blockmap/vnop_strategy interface to the vnode
9bccf70c
A
79 * instead of a simple VOP_RDWR. We do this to avoid distorting the
80 * local buffer cache.
81 *
82 * NOTE 2: There is a security issue involved with this driver.
83 * Once mounted all access to the contents of the "mapped" file via
84 * the special file is controlled by the permissions on the special
85 * file, the protection of the mapped file is ignored (effectively,
86 * by using root credentials in all transactions).
87 *
88 * NOTE 3: Doesn't interact with leases, should it?
89 */
90
91#include "vndevice.h"
92
93#if NVNDEVICE > 0
94
95#include <sys/param.h>
96#include <sys/systm.h>
97#include <sys/kernel.h>
98#include <sys/mount.h>
99#include <sys/namei.h>
100#include <sys/proc.h>
91447636 101#include <sys/kauth.h>
9bccf70c
A
102#include <sys/buf.h>
103#include <sys/malloc.h>
91447636 104#include <sys/vnode_internal.h>
9bccf70c
A
105#include <sys/fcntl.h>
106#include <sys/conf.h>
55e303ae 107#include <sys/disk.h>
9bccf70c
A
108#include <sys/stat.h>
109#include <sys/conf.h>
91447636 110#include <sys/uio_internal.h>
9bccf70c
A
111
112#include <sys/vnioctl.h>
113
114#include <sys/vm.h>
115
116#include <vm/vm_pager.h>
9bccf70c
A
117#include <mach/memory_object_types.h>
118
119#include <miscfs/devfs/devfs.h>
120
55e303ae 121
9bccf70c 122#include "shadow.h"
2d21ac55
A
123static void
124vndevice_do_init(void) __attribute__((section("__TEXT, initcode")));
9bccf70c
A
125
126static ioctl_fcn_t vnioctl_chr;
127static ioctl_fcn_t vnioctl_blk;
128static open_close_fcn_t vnopen;
129static open_close_fcn_t vnclose;
130static psize_fcn_t vnsize;
131static strategy_fcn_t vnstrategy;
132static read_write_fcn_t vnread;
133static read_write_fcn_t vnwrite;
134
135static int vndevice_bdev_major;
136static int vndevice_cdev_major;
137
138/*
139 * cdevsw
140 * D_DISK we want to look like a disk
141 * D_CANFREE We support B_FREEBUF
142 */
143
144static struct bdevsw vn_bdevsw = {
145 /* open */ vnopen,
146 /* close */ vnclose,
147 /* strategy */ vnstrategy,
148 /* ioctl */ vnioctl_blk,
149 /* dump */ eno_dump,
150 /* psize */ vnsize,
151 /* flags */ D_DISK,
152};
153
154static struct cdevsw vn_cdevsw = {
155 /* open */ vnopen,
156 /* close */ vnclose,
157 /* read */ vnread,
158 /* write */ vnwrite,
159 /* ioctl */ vnioctl_chr,
160 /* stop */ eno_stop,
161 /* reset */ eno_reset,
2d21ac55 162 /* ttys */ NULL,
9bccf70c
A
163 /* select */ eno_select,
164 /* mmap */ eno_mmap,
165 /* strategy */ eno_strat,
166 /* getc */ eno_getc,
167 /* putc */ eno_putc,
168 /* flags */ D_DISK,
169};
170
171struct vn_softc {
172 u_int64_t sc_fsize; /* file size in bytes */
173 u_int64_t sc_size; /* size of vn, sc_secsize scale */
174 int sc_flags; /* flags */
b0d623f7 175 u_int32_t sc_secsize; /* sector size */
9bccf70c 176 struct vnode *sc_vp; /* vnode if not NULL */
91447636 177 uint32_t sc_vid;
9bccf70c
A
178 int sc_open_flags;
179 struct vnode *sc_shadow_vp; /* shadow vnode if not NULL */
91447636 180 uint32_t sc_shadow_vid;
9bccf70c 181 shadow_map_t * sc_shadow_map; /* shadow map if not NULL */
2d21ac55 182 kauth_cred_t sc_cred; /* credentials */
91447636 183 u_int32_t sc_options; /* options */
9bccf70c
A
184 void * sc_bdev;
185 void * sc_cdev;
186} vn_table[NVNDEVICE];
187
188#define ROOT_IMAGE_UNIT 0
189
190/* sc_flags */
191#define VNF_INITED 0x01
192#define VNF_READONLY 0x02
193
91447636 194static u_int32_t vn_options;
9bccf70c
A
195
196#define IFOPT(vn,opt) if (((vn)->sc_options|vn_options) & (opt))
197#define TESTOPT(vn,opt) (((vn)->sc_options|vn_options) & (opt))
198
2d21ac55
A
199static int setcred(struct vnode * vp, kauth_cred_t cred);
200static void vnclear (struct vn_softc *vn, vfs_context_t ctx);
b0d623f7 201static void vn_ioctl_to_64(struct vn_ioctl_32 *from, struct vn_ioctl_64 *to);
91447636
A
202void vndevice_init(void);
203int vndevice_root_image(char * path, char devname[], dev_t * dev_p);
9bccf70c
A
204
205static int
206vniocattach_file(struct vn_softc *vn,
b0d623f7 207 struct vn_ioctl_64 *vniop,
9bccf70c
A
208 dev_t dev,
209 int in_kernel,
2d21ac55 210 proc_t p);
9bccf70c
A
211static int
212vniocattach_shadow(struct vn_softc * vn,
b0d623f7 213 struct vn_ioctl_64 *vniop,
9bccf70c
A
214 dev_t dev,
215 int in_kernel,
2d21ac55 216 proc_t p);
91447636 217static __inline__ int
9bccf70c
A
218vnunit(dev_t dev)
219{
220 return (minor(dev));
221}
222
223static int
91447636 224vnclose(__unused dev_t dev, __unused int flags,
2d21ac55 225 __unused int devtype, __unused proc_t p)
9bccf70c
A
226{
227 return (0);
228}
229
230static int
2d21ac55 231vnopen(dev_t dev, int flags, __unused int devtype, __unused proc_t p)
9bccf70c
A
232{
233 struct vn_softc *vn;
234 int unit;
235
236 unit = vnunit(dev);
237 if (vnunit(dev) >= NVNDEVICE) {
238 return (ENXIO);
239 }
240 vn = vn_table + unit;
241 if ((flags & FWRITE) && (vn->sc_flags & VNF_READONLY))
242 return (EACCES);
243
244 return(0);
245}
246
91447636 247static int
2d21ac55 248file_io(struct vnode * vp, vfs_context_t ctx,
91447636
A
249 enum uio_rw op, char * base, off_t offset, user_ssize_t count,
250 user_ssize_t * resid)
251{
252 uio_t auio;
253 int error;
254 char uio_buf[UIO_SIZEOF(1)];
255
256 auio = uio_createwithbuffer(1, offset, UIO_SYSSPACE, op,
257 &uio_buf[0], sizeof(uio_buf));
258 uio_addiov(auio, CAST_USER_ADDR_T(base), count);
259 if (op == UIO_READ)
2d21ac55 260 error = VNOP_READ(vp, auio, IO_SYNC, ctx);
91447636 261 else
2d21ac55 262 error = VNOP_WRITE(vp, auio, IO_SYNC, ctx);
91447636
A
263
264 if (resid != NULL) {
265 *resid = uio_resid(auio);
266 }
267 return (error);
268}
269
270static __inline__ off_t
271block_round(off_t o, int blocksize)
272{
273 return ((o + blocksize - 1) / blocksize);
274}
275
276static __inline__ off_t
277block_truncate(off_t o, int blocksize)
278{
279 return (o / blocksize);
280}
281
282static __inline__ int
283block_remainder(off_t o, int blocksize)
284{
285 return (o % blocksize);
286}
287
288static int
289vnread_shadow(struct vn_softc * vn, struct uio *uio, int ioflag,
2d21ac55 290 vfs_context_t ctx)
91447636 291{
b0d623f7 292 u_int32_t blocksize = vn->sc_secsize;
91447636
A
293 int error = 0;
294 off_t offset;
295 user_ssize_t resid;
296 off_t orig_offset;
297 user_ssize_t orig_resid;
298
299 orig_resid = resid = uio_resid(uio);
300 orig_offset = offset = uio_offset(uio);
301
302 while (resid > 0) {
b0d623f7
A
303 u_int32_t remainder;
304 u_int32_t this_block_number;
305 u_int32_t this_block_count;
91447636
A
306 off_t this_offset;
307 user_ssize_t this_resid;
308 struct vnode * vp;
309
310 /* figure out which blocks to read */
311 remainder = block_remainder(offset, blocksize);
312 if (shadow_map_read(vn->sc_shadow_map,
313 block_truncate(offset, blocksize),
314 block_round(resid + remainder, blocksize),
315 &this_block_number, &this_block_count)) {
316 vp = vn->sc_shadow_vp;
317 }
318 else {
319 vp = vn->sc_vp;
320 }
321
322 /* read the blocks (or parts thereof) */
323 this_offset = (off_t)this_block_number * blocksize + remainder;
324 uio_setoffset(uio, this_offset);
325 this_resid = this_block_count * blocksize - remainder;
326 if (this_resid > resid) {
327 this_resid = resid;
328 }
329 uio_setresid(uio, this_resid);
2d21ac55 330 error = VNOP_READ(vp, uio, ioflag, ctx);
91447636
A
331 if (error) {
332 break;
333 }
334
335 /* figure out how much we actually read */
336 this_resid -= uio_resid(uio);
337 if (this_resid == 0) {
338 printf("vn device: vnread_shadow zero length read\n");
339 break;
340 }
341 resid -= this_resid;
342 offset += this_resid;
343 }
344 uio_setresid(uio, resid);
345 uio_setoffset(uio, offset);
346 return (error);
347}
348
349static int
2d21ac55 350vncopy_block_to_shadow(struct vn_softc * vn, vfs_context_t ctx,
b0d623f7 351 u_int32_t file_block, u_int32_t shadow_block)
91447636
A
352{
353 int error;
354 char * tmpbuf;
355
356 tmpbuf = _MALLOC(vn->sc_secsize, M_TEMP, M_WAITOK);
357 if (tmpbuf == NULL) {
358 return (ENOMEM);
359 }
360 /* read one block from file at file_block offset */
2d21ac55 361 error = file_io(vn->sc_vp, ctx, UIO_READ,
91447636
A
362 tmpbuf, (off_t)file_block * vn->sc_secsize,
363 vn->sc_secsize, NULL);
364 if (error) {
365 goto done;
366 }
367 /* write one block to shadow file at shadow_block offset */
2d21ac55 368 error = file_io(vn->sc_shadow_vp, ctx, UIO_WRITE,
91447636
A
369 tmpbuf, (off_t)shadow_block * vn->sc_secsize,
370 vn->sc_secsize, NULL);
371 done:
372 FREE(tmpbuf, M_TEMP);
373 return (error);
374}
375
376enum {
377 FLAGS_FIRST_BLOCK_PARTIAL = 0x1,
378 FLAGS_LAST_BLOCK_PARTIAL = 0x2
379};
380
381static int
382vnwrite_shadow(struct vn_softc * vn, struct uio *uio, int ioflag,
2d21ac55 383 vfs_context_t ctx)
91447636 384{
b0d623f7 385 u_int32_t blocksize = vn->sc_secsize;
91447636
A
386 int error = 0;
387 user_ssize_t resid;
388 off_t offset;
389
390 resid = uio_resid(uio);
391 offset = uio_offset(uio);
392
393 while (resid > 0) {
394 int flags = 0;
b0d623f7
A
395 u_int32_t offset_block_number;
396 u_int32_t remainder;
397 u_int32_t resid_block_count;
398 u_int32_t shadow_block_count;
399 u_int32_t shadow_block_number;
91447636
A
400 user_ssize_t this_resid;
401
402 /* figure out which blocks to write */
403 offset_block_number = block_truncate(offset, blocksize);
404 remainder = block_remainder(offset, blocksize);
405 resid_block_count = block_round(resid + remainder, blocksize);
406 /* figure out if the first or last blocks are partial writes */
407 if (remainder > 0
408 && !shadow_map_is_written(vn->sc_shadow_map,
409 offset_block_number)) {
410 /* the first block is a partial write */
411 flags |= FLAGS_FIRST_BLOCK_PARTIAL;
412 }
413 if (resid_block_count > 1
414 && !shadow_map_is_written(vn->sc_shadow_map,
415 offset_block_number
416 + resid_block_count - 1)
417 && block_remainder(offset + resid, blocksize) > 0) {
418 /* the last block is a partial write */
419 flags |= FLAGS_LAST_BLOCK_PARTIAL;
420 }
421 if (shadow_map_write(vn->sc_shadow_map,
422 offset_block_number, resid_block_count,
423 &shadow_block_number,
424 &shadow_block_count)) {
425 /* shadow file is growing */
426#if 0
427 /* truncate the file to its new length before write */
428 off_t size;
429 size = (off_t)shadow_map_shadow_size(vn->sc_shadow_map)
430 * vn->sc_secsize;
2d21ac55 431 vnode_setsize(vn->sc_shadow_vp, size, IO_SYNC, ctx);
b0d623f7 432#endif
91447636
A
433 }
434 /* write the blocks (or parts thereof) */
435 uio_setoffset(uio, (off_t)
436 shadow_block_number * blocksize + remainder);
437 this_resid = (off_t)shadow_block_count * blocksize - remainder;
438 if (this_resid >= resid) {
439 this_resid = resid;
440 if ((flags & FLAGS_LAST_BLOCK_PARTIAL) != 0) {
441 /* copy the last block to the shadow */
b0d623f7
A
442 u_int32_t d;
443 u_int32_t s;
91447636
A
444
445 s = offset_block_number
446 + resid_block_count - 1;
447 d = shadow_block_number
448 + shadow_block_count - 1;
2d21ac55 449 error = vncopy_block_to_shadow(vn, ctx, s, d);
91447636
A
450 if (error) {
451 printf("vnwrite_shadow: failed to copy"
b0d623f7 452 " block %u to shadow block %u\n",
91447636
A
453 s, d);
454 break;
455 }
456 }
457 }
458 uio_setresid(uio, this_resid);
459 if ((flags & FLAGS_FIRST_BLOCK_PARTIAL) != 0) {
460 /* copy the first block to the shadow */
2d21ac55 461 error = vncopy_block_to_shadow(vn, ctx,
91447636
A
462 offset_block_number,
463 shadow_block_number);
464 if (error) {
465 printf("vnwrite_shadow: failed to"
b0d623f7 466 " copy block %u to shadow block %u\n",
91447636
A
467 offset_block_number,
468 shadow_block_number);
469 break;
470 }
471 }
2d21ac55 472 error = VNOP_WRITE(vn->sc_shadow_vp, uio, ioflag, ctx);
91447636
A
473 if (error) {
474 break;
475 }
476 /* figure out how much we actually wrote */
477 this_resid -= uio_resid(uio);
478 if (this_resid == 0) {
479 printf("vn device: vnwrite_shadow zero length write\n");
480 break;
481 }
482 resid -= this_resid;
483 offset += this_resid;
484 }
485 uio_setresid(uio, resid);
486 uio_setoffset(uio, offset);
487 return (error);
488}
489
9bccf70c
A
490static int
491vnread(dev_t dev, struct uio *uio, int ioflag)
492{
91447636
A
493 struct vfs_context context;
494 int error = 0;
495 boolean_t funnel_state;
496 off_t offset;
2d21ac55 497 proc_t p;
91447636 498 user_ssize_t resid;
9bccf70c
A
499 struct vn_softc * vn;
500 int unit;
501
502 unit = vnunit(dev);
503 if (vnunit(dev) >= NVNDEVICE) {
504 return (ENXIO);
505 }
91447636
A
506 p = current_proc();
507 funnel_state = thread_funnel_set(kernel_flock, TRUE);
9bccf70c
A
508 vn = vn_table + unit;
509 if ((vn->sc_flags & VNF_INITED) == 0) {
91447636
A
510 error = ENXIO;
511 goto done;
9bccf70c 512 }
2d21ac55
A
513
514 context.vc_thread = current_thread();
515 context.vc_ucred = vn->sc_cred;
516
91447636
A
517 error = vnode_getwithvid(vn->sc_vp, vn->sc_vid);
518 if (error != 0) {
519 /* the vnode is no longer available, abort */
520 error = ENXIO;
2d21ac55 521 vnclear(vn, &context);
91447636
A
522 goto done;
523 }
524
525 resid = uio_resid(uio);
526 offset = uio_offset(uio);
527
528 /*
529 * If out of bounds return an error. If at the EOF point,
530 * simply read less.
531 */
532 if (offset >= (off_t)vn->sc_fsize) {
533 if (offset > (off_t)vn->sc_fsize) {
534 error = EINVAL;
535 }
536 goto done;
537 }
538 /*
539 * If the request crosses EOF, truncate the request.
540 */
541 if ((offset + resid) > (off_t)vn->sc_fsize) {
542 resid = vn->sc_fsize - offset;
543 uio_setresid(uio, resid);
9bccf70c 544 }
9bccf70c 545
91447636
A
546 if (vn->sc_shadow_vp != NULL) {
547 error = vnode_getwithvid(vn->sc_shadow_vp,
548 vn->sc_shadow_vid);
549 if (error != 0) {
550 /* the vnode is no longer available, abort */
551 error = ENXIO;
552 vnode_put(vn->sc_vp);
2d21ac55 553 vnclear(vn, &context);
91447636
A
554 goto done;
555 }
556 error = vnread_shadow(vn, uio, ioflag, &context);
557 vnode_put(vn->sc_shadow_vp);
558 } else {
559 error = VNOP_READ(vn->sc_vp, uio, ioflag, &context);
560 }
561 vnode_put(vn->sc_vp);
562 done:
563 (void) thread_funnel_set(kernel_flock, funnel_state);
564 return (error);
9bccf70c
A
565}
566
567static int
568vnwrite(dev_t dev, struct uio *uio, int ioflag)
569{
91447636
A
570 struct vfs_context context;
571 int error;
572 boolean_t funnel_state;
573 off_t offset;
2d21ac55 574 proc_t p;
91447636 575 user_ssize_t resid;
9bccf70c
A
576 struct vn_softc * vn;
577 int unit;
578
579 unit = vnunit(dev);
580 if (vnunit(dev) >= NVNDEVICE) {
581 return (ENXIO);
582 }
91447636
A
583 p = current_proc();
584 funnel_state = thread_funnel_set(kernel_flock, TRUE);
9bccf70c
A
585 vn = vn_table + unit;
586 if ((vn->sc_flags & VNF_INITED) == 0) {
91447636
A
587 error = ENXIO;
588 goto done;
9bccf70c
A
589 }
590 if (vn->sc_flags & VNF_READONLY) {
91447636
A
591 error = EROFS;
592 goto done;
9bccf70c 593 }
2d21ac55
A
594
595 context.vc_thread = current_thread();
596 context.vc_ucred = vn->sc_cred;
597
91447636
A
598 error = vnode_getwithvid(vn->sc_vp, vn->sc_vid);
599 if (error != 0) {
600 /* the vnode is no longer available, abort */
601 error = ENXIO;
2d21ac55 602 vnclear(vn, &context);
91447636
A
603 goto done;
604 }
605 resid = uio_resid(uio);
606 offset = uio_offset(uio);
9bccf70c 607
91447636
A
608 /*
609 * If out of bounds return an error. If at the EOF point,
610 * simply write less.
611 */
612 if (offset >= (off_t)vn->sc_fsize) {
613 if (offset > (off_t)vn->sc_fsize) {
614 error = EINVAL;
615 }
616 goto done;
617 }
618 /*
619 * If the request crosses EOF, truncate the request.
620 */
621 if ((offset + resid) > (off_t)vn->sc_fsize) {
622 resid = (off_t)vn->sc_fsize - offset;
623 uio_setresid(uio, resid);
624 }
9bccf70c 625
91447636
A
626 if (vn->sc_shadow_vp != NULL) {
627 error = vnode_getwithvid(vn->sc_shadow_vp,
628 vn->sc_shadow_vid);
629 if (error != 0) {
630 /* the vnode is no longer available, abort */
631 error = ENXIO;
632 vnode_put(vn->sc_vp);
2d21ac55 633 vnclear(vn, &context);
91447636 634 goto done;
9bccf70c 635 }
91447636
A
636 error = vnwrite_shadow(vn, uio, ioflag, &context);
637 vnode_put(vn->sc_shadow_vp);
638 } else {
639 error = VNOP_WRITE(vn->sc_vp, uio, ioflag, &context);
9bccf70c 640 }
91447636
A
641 vnode_put(vn->sc_vp);
642 done:
643 (void) thread_funnel_set(kernel_flock, funnel_state);
9bccf70c
A
644 return (error);
645}
646
647static int
2d21ac55
A
648shadow_read(struct vn_softc * vn, struct buf * bp, char * base,
649 vfs_context_t ctx)
9bccf70c 650{
b0d623f7 651 u_int32_t blocksize = vn->sc_secsize;
9bccf70c 652 int error = 0;
b0d623f7 653 u_int32_t offset;
9bccf70c 654 boolean_t read_shadow;
b0d623f7
A
655 u_int32_t resid;
656 u_int32_t start = 0;
9bccf70c 657
91447636
A
658 offset = buf_blkno(bp);
659 resid = buf_resid(bp) / blocksize;
9bccf70c 660 while (resid > 0) {
91447636 661 user_ssize_t temp_resid;
b0d623f7
A
662 u_int32_t this_offset;
663 u_int32_t this_resid;
9bccf70c
A
664 struct vnode * vp;
665
666 read_shadow = shadow_map_read(vn->sc_shadow_map,
667 offset, resid,
668 &this_offset, &this_resid);
669 if (read_shadow) {
670 vp = vn->sc_shadow_vp;
671 }
672 else {
673 vp = vn->sc_vp;
674 }
2d21ac55 675 error = file_io(vp, ctx, UIO_READ, base + start,
91447636
A
676 (off_t)this_offset * blocksize,
677 (user_ssize_t)this_resid * blocksize,
678 &temp_resid);
679 if (error) {
9bccf70c 680 break;
91447636
A
681 }
682 this_resid -= (temp_resid / blocksize);
683 if (this_resid == 0) {
684 printf("vn device: shadow_read zero length read\n");
9bccf70c
A
685 break;
686 }
91447636
A
687 resid -= this_resid;
688 offset += this_resid;
689 start += this_resid * blocksize;
9bccf70c 690 }
91447636 691 buf_setresid(bp, resid * blocksize);
9bccf70c
A
692 return (error);
693}
694
695static int
696shadow_write(struct vn_softc * vn, struct buf * bp, char * base,
2d21ac55 697 vfs_context_t ctx)
9bccf70c 698{
b0d623f7 699 u_int32_t blocksize = vn->sc_secsize;
9bccf70c 700 int error = 0;
b0d623f7 701 u_int32_t offset;
9bccf70c 702 boolean_t shadow_grew;
b0d623f7
A
703 u_int32_t resid;
704 u_int32_t start = 0;
9bccf70c 705
91447636
A
706 offset = buf_blkno(bp);
707 resid = buf_resid(bp) / blocksize;
9bccf70c 708 while (resid > 0) {
91447636 709 user_ssize_t temp_resid;
b0d623f7
A
710 u_int32_t this_offset;
711 u_int32_t this_resid;
9bccf70c
A
712
713 shadow_grew = shadow_map_write(vn->sc_shadow_map,
714 offset, resid,
715 &this_offset, &this_resid);
716 if (shadow_grew) {
717#if 0
718 off_t size;
719 /* truncate the file to its new length before write */
720 size = (off_t)shadow_map_shadow_size(vn->sc_shadow_map)
91447636 721 * blocksize;
2d21ac55 722 vnode_setsize(vn->sc_shadow_vp, size, IO_SYNC, ctx);
55e303ae 723#endif
9bccf70c 724 }
2d21ac55 725 error = file_io(vn->sc_shadow_vp, ctx, UIO_WRITE,
9bccf70c 726 base + start,
91447636
A
727 (off_t)this_offset * blocksize,
728 (user_ssize_t)this_resid * blocksize,
729 &temp_resid);
9bccf70c
A
730 if (error) {
731 break;
732 }
91447636
A
733 this_resid -= (temp_resid / blocksize);
734 if (this_resid == 0) {
735 printf("vn device: shadow_write zero length write\n");
9bccf70c
A
736 break;
737 }
91447636
A
738 resid -= this_resid;
739 offset += this_resid;
740 start += this_resid * blocksize;
9bccf70c 741 }
91447636 742 buf_setresid(bp, resid * blocksize);
9bccf70c
A
743 return (error);
744}
745
746static int
2d21ac55 747vn_readwrite_io(struct vn_softc * vn, struct buf * bp, vfs_context_t ctx)
9bccf70c
A
748{
749 int error = 0;
750 char * iov_base;
91447636 751 caddr_t vaddr;
9bccf70c 752
91447636
A
753 if (buf_map(bp, &vaddr))
754 panic("vn device: buf_map failed");
755 iov_base = (char *)vaddr;
756
9bccf70c 757 if (vn->sc_shadow_vp == NULL) {
91447636
A
758 user_ssize_t temp_resid;
759
2d21ac55 760 error = file_io(vn->sc_vp, ctx,
91447636
A
761 buf_flags(bp) & B_READ ? UIO_READ : UIO_WRITE,
762 iov_base,
763 (off_t)buf_blkno(bp) * vn->sc_secsize,
764 buf_resid(bp), &temp_resid);
765 buf_setresid(bp, temp_resid);
9bccf70c
A
766 }
767 else {
91447636 768 if (buf_flags(bp) & B_READ)
2d21ac55 769 error = shadow_read(vn, bp, iov_base, ctx);
9bccf70c 770 else
2d21ac55 771 error = shadow_write(vn, bp, iov_base, ctx);
9bccf70c 772 }
91447636
A
773 buf_unmap(bp);
774
9bccf70c
A
775 return (error);
776}
777
778static void
779vnstrategy(struct buf *bp)
780{
781 struct vn_softc *vn;
782 int error = 0;
783 long sz; /* in sc_secsize chunks */
91447636
A
784 daddr64_t blk_num;
785 boolean_t funnel_state;
91447636
A
786 struct vnode * shadow_vp = NULL;
787 struct vnode * vp = NULL;
2d21ac55 788 struct vfs_context context;
9bccf70c 789
91447636
A
790 funnel_state = thread_funnel_set(kernel_flock, TRUE);
791 vn = vn_table + vnunit(buf_device(bp));
9bccf70c 792 if ((vn->sc_flags & VNF_INITED) == 0) {
91447636
A
793 error = ENXIO;
794 goto done;
9bccf70c
A
795 }
796
2d21ac55
A
797 context.vc_thread = current_thread();
798 context.vc_ucred = vn->sc_cred;
799
91447636 800 buf_setresid(bp, buf_count(bp));
9bccf70c
A
801 /*
802 * Check for required alignment. Transfers must be a valid
803 * multiple of the sector size.
804 */
91447636
A
805 blk_num = buf_blkno(bp);
806 if (buf_count(bp) % vn->sc_secsize != 0) {
807 error = EINVAL;
808 goto done;
9bccf70c 809 }
91447636 810 sz = howmany(buf_count(bp), vn->sc_secsize);
9bccf70c
A
811
812 /*
813 * If out of bounds return an error. If at the EOF point,
814 * simply read or write less.
815 */
91447636
A
816 if (blk_num >= 0 && (u_int64_t)blk_num >= vn->sc_size) {
817 if (blk_num > 0 && (u_int64_t)blk_num > vn->sc_size) {
818 error = EINVAL;
55e303ae 819 }
91447636 820 goto done;
9bccf70c
A
821 }
822 /*
823 * If the request crosses EOF, truncate the request.
824 */
91447636
A
825 if ((blk_num + sz) > 0 && ((u_int64_t)(blk_num + sz)) > vn->sc_size) {
826 buf_setcount(bp, (vn->sc_size - blk_num) * vn->sc_secsize);
827 buf_setresid(bp, buf_count(bp));
9bccf70c 828 }
91447636
A
829 vp = vn->sc_vp;
830 if (vp == NULL) {
831 error = ENXIO;
832 goto done;
833 }
2d21ac55 834
91447636
A
835 error = vnode_getwithvid(vp, vn->sc_vid);
836 if (error != 0) {
837 /* the vnode is no longer available, abort */
838 error = ENXIO;
2d21ac55 839 vnclear(vn, &context);
91447636
A
840 goto done;
841 }
842 shadow_vp = vn->sc_shadow_vp;
843 if (shadow_vp != NULL) {
844 error = vnode_getwithvid(shadow_vp,
845 vn->sc_shadow_vid);
846 if (error != 0) {
847 /* the vnode is no longer available, abort */
848 error = ENXIO;
849 vnode_put(vn->sc_vp);
2d21ac55 850 vnclear(vn, &context);
91447636 851 goto done;
9bccf70c 852 }
9bccf70c 853 }
2d21ac55
A
854
855 error = vn_readwrite_io(vn, bp, &context);
91447636
A
856 vnode_put(vp);
857 if (shadow_vp != NULL) {
858 vnode_put(shadow_vp);
9bccf70c 859 }
91447636
A
860
861 done:
862 (void) thread_funnel_set(kernel_flock, funnel_state);
863 if (error) {
864 buf_seterror(bp, error);
865 }
866 buf_biodone(bp);
867 return;
9bccf70c
A
868}
869
870/* ARGSUSED */
871static int
91447636 872vnioctl(dev_t dev, u_long cmd, caddr_t data,
2d21ac55 873 __unused int flag, proc_t p,
9bccf70c
A
874 int is_char)
875{
876 struct vn_softc *vn;
b0d623f7 877 struct vn_ioctl_64 *viop;
9bccf70c 878 int error;
91447636 879 u_int32_t *f;
9bccf70c
A
880 u_int64_t * o;
881 int unit;
91447636 882 struct vfsioattr ioattr;
b0d623f7 883 struct vn_ioctl_64 user_vnio;
91447636 884 boolean_t funnel_state;
2d21ac55 885 struct vfs_context context;
9bccf70c
A
886
887 unit = vnunit(dev);
888 if (vnunit(dev) >= NVNDEVICE) {
889 return (ENXIO);
890 }
91447636
A
891
892 funnel_state = thread_funnel_set(kernel_flock, TRUE);
9bccf70c 893 vn = vn_table + unit;
91447636
A
894 error = proc_suser(p);
895 if (error) {
896 goto done;
897 }
9bccf70c 898
2d21ac55
A
899 context.vc_thread = current_thread();
900 context.vc_ucred = vn->sc_cred;
901
b0d623f7 902 viop = (struct vn_ioctl_64 *)data;
91447636 903 f = (u_int32_t *)data;
9bccf70c
A
904 o = (u_int64_t *)data;
905 switch (cmd) {
b0d623f7
A
906#ifdef __LP64__
907 case VNIOCDETACH32:
908 case VNIOCDETACH:
909#else
9bccf70c 910 case VNIOCDETACH:
91447636 911 case VNIOCDETACH64:
b0d623f7 912#endif
55e303ae 913 case DKIOCGETBLOCKSIZE:
91447636 914 case DKIOCSETBLOCKSIZE:
9bccf70c
A
915 case DKIOCGETMAXBLOCKCOUNTREAD:
916 case DKIOCGETMAXBLOCKCOUNTWRITE:
917 case DKIOCGETMAXSEGMENTCOUNTREAD:
918 case DKIOCGETMAXSEGMENTCOUNTWRITE:
55e303ae
A
919 case DKIOCGETMAXSEGMENTBYTECOUNTREAD:
920 case DKIOCGETMAXSEGMENTBYTECOUNTWRITE:
921 case DKIOCGETBLOCKCOUNT:
9bccf70c
A
922 case DKIOCGETBLOCKCOUNT32:
923 if ((vn->sc_flags & VNF_INITED) == 0) {
91447636
A
924 error = ENXIO;
925 goto done;
9bccf70c
A
926 }
927 break;
928 default:
929 break;
930 }
91447636
A
931
932 if (vn->sc_vp != NULL)
933 vfs_ioattr(vnode_mount(vn->sc_vp), &ioattr);
934 else
935 bzero(&ioattr, sizeof(ioattr));
936
9bccf70c 937 switch (cmd) {
91447636
A
938 case DKIOCISVIRTUAL:
939 *f = 1;
940 break;
9bccf70c 941 case DKIOCGETMAXBLOCKCOUNTREAD:
91447636 942 *o = ioattr.io_maxreadcnt / vn->sc_secsize;
9bccf70c
A
943 break;
944 case DKIOCGETMAXBLOCKCOUNTWRITE:
91447636 945 *o = ioattr.io_maxwritecnt / vn->sc_secsize;
55e303ae
A
946 break;
947 case DKIOCGETMAXBYTECOUNTREAD:
91447636 948 *o = ioattr.io_maxreadcnt;
55e303ae
A
949 break;
950 case DKIOCGETMAXBYTECOUNTWRITE:
91447636 951 *o = ioattr.io_maxwritecnt;
9bccf70c
A
952 break;
953 case DKIOCGETMAXSEGMENTCOUNTREAD:
91447636 954 *o = ioattr.io_segreadcnt;
9bccf70c
A
955 break;
956 case DKIOCGETMAXSEGMENTCOUNTWRITE:
91447636 957 *o = ioattr.io_segwritecnt;
55e303ae
A
958 break;
959 case DKIOCGETMAXSEGMENTBYTECOUNTREAD:
91447636 960 *o = ioattr.io_maxsegreadsize;
55e303ae
A
961 break;
962 case DKIOCGETMAXSEGMENTBYTECOUNTWRITE:
91447636 963 *o = ioattr.io_maxsegwritesize;
9bccf70c 964 break;
91447636
A
965 case DKIOCGETBLOCKSIZE:
966 *f = vn->sc_secsize;
9bccf70c 967 break;
91447636 968 case DKIOCSETBLOCKSIZE:
9bccf70c
A
969 if (is_char) {
970 /* can only set block size on block device */
91447636
A
971 error = ENODEV;
972 break;
9bccf70c
A
973 }
974 if (*f < DEV_BSIZE) {
91447636
A
975 error = EINVAL;
976 break;
977 }
978 if (vn->sc_shadow_vp != NULL) {
979 if (*f == (unsigned)vn->sc_secsize) {
980 break;
981 }
982 /* can't change the block size if already shadowing */
983 error = EBUSY;
984 break;
9bccf70c
A
985 }
986 vn->sc_secsize = *f;
987 /* recompute the size in terms of the new blocksize */
988 vn->sc_size = vn->sc_fsize / vn->sc_secsize;
989 break;
990 case DKIOCISWRITABLE:
991 *f = 1;
992 break;
993 case DKIOCGETBLOCKCOUNT32:
994 *f = vn->sc_size;
995 break;
55e303ae 996 case DKIOCGETBLOCKCOUNT:
9bccf70c
A
997 *o = vn->sc_size;
998 break;
b0d623f7
A
999#ifdef __LP64__
1000 case VNIOCSHADOW32:
1001 case VNIOCSHADOW:
1002#else
9bccf70c 1003 case VNIOCSHADOW:
91447636 1004 case VNIOCSHADOW64:
b0d623f7 1005#endif
9bccf70c 1006 if (vn->sc_shadow_vp != NULL) {
91447636
A
1007 error = EBUSY;
1008 break;
9bccf70c
A
1009 }
1010 if (vn->sc_vp == NULL) {
1011 /* much be attached before we can shadow */
91447636
A
1012 error = EINVAL;
1013 break;
1014 }
1015 if (!proc_is64bit(p)) {
1016 /* downstream code expects LP64 version of vn_ioctl structure */
b0d623f7 1017 vn_ioctl_to_64((struct vn_ioctl_32 *)viop, &user_vnio);
91447636 1018 viop = &user_vnio;
9bccf70c 1019 }
91447636
A
1020 if (viop->vn_file == USER_ADDR_NULL) {
1021 error = EINVAL;
1022 break;
9bccf70c 1023 }
91447636 1024 error = vniocattach_shadow(vn, viop, dev, 0, p);
9bccf70c
A
1025 break;
1026
b0d623f7
A
1027#ifdef __LP64__
1028 case VNIOCATTACH32:
1029 case VNIOCATTACH:
1030#else
9bccf70c 1031 case VNIOCATTACH:
91447636 1032 case VNIOCATTACH64:
b0d623f7 1033#endif
9bccf70c
A
1034 if (is_char) {
1035 /* attach only on block device */
91447636
A
1036 error = ENODEV;
1037 break;
9bccf70c
A
1038 }
1039 if (vn->sc_flags & VNF_INITED) {
91447636
A
1040 error = EBUSY;
1041 break;
1042 }
1043 if (!proc_is64bit(p)) {
1044 /* downstream code expects LP64 version of vn_ioctl structure */
b0d623f7 1045 vn_ioctl_to_64((struct vn_ioctl_32 *)viop, &user_vnio);
91447636 1046 viop = &user_vnio;
9bccf70c 1047 }
91447636
A
1048 if (viop->vn_file == USER_ADDR_NULL) {
1049 error = EINVAL;
1050 break;
9bccf70c 1051 }
91447636 1052 error = vniocattach_file(vn, viop, dev, 0, p);
9bccf70c
A
1053 break;
1054
b0d623f7
A
1055#ifdef __LP64__
1056 case VNIOCDETACH32:
1057 case VNIOCDETACH:
1058#else
9bccf70c 1059 case VNIOCDETACH:
91447636 1060 case VNIOCDETACH64:
b0d623f7 1061#endif
9bccf70c
A
1062 if (is_char) {
1063 /* detach only on block device */
91447636
A
1064 error = ENODEV;
1065 break;
9bccf70c
A
1066 }
1067 /* Note: spec_open won't open a mounted block device */
1068
1069 /*
1070 * XXX handle i/o in progress. Return EBUSY, or wait, or
1071 * flush the i/o.
1072 * XXX handle multiple opens of the device. Return EBUSY,
1073 * or revoke the fd's.
1074 * How are these problems handled for removable and failing
1075 * hardware devices? (Hint: They are not)
1076 */
2d21ac55 1077 vnclear(vn, &context);
9bccf70c
A
1078 break;
1079
1080 case VNIOCGSET:
1081 vn_options |= *f;
1082 *f = vn_options;
1083 break;
1084
1085 case VNIOCGCLEAR:
1086 vn_options &= ~(*f);
1087 *f = vn_options;
1088 break;
1089
1090 case VNIOCUSET:
1091 vn->sc_options |= *f;
1092 *f = vn->sc_options;
1093 break;
1094
1095 case VNIOCUCLEAR:
1096 vn->sc_options &= ~(*f);
1097 *f = vn->sc_options;
1098 break;
1099
1100 default:
1101 error = ENOTTY;
1102 break;
1103 }
91447636
A
1104 done:
1105 (void) thread_funnel_set(kernel_flock, funnel_state);
9bccf70c
A
1106 return(error);
1107}
1108
1109static int
2d21ac55 1110vnioctl_chr(dev_t dev, u_long cmd, caddr_t data, int flag, proc_t p)
9bccf70c
A
1111{
1112 return (vnioctl(dev, cmd, data, flag, p, TRUE));
1113}
1114
1115static int
2d21ac55 1116vnioctl_blk(dev_t dev, u_long cmd, caddr_t data, int flag, proc_t p)
9bccf70c
A
1117{
1118 return (vnioctl(dev, cmd, data, flag, p, FALSE));
1119}
1120
1121/*
1122 * vniocattach_file:
1123 *
1124 * Attach a file to a VN partition. Return the size in the vn_size
1125 * field.
1126 */
1127
1128static int
1129vniocattach_file(struct vn_softc *vn,
b0d623f7 1130 struct vn_ioctl_64 *vniop,
9bccf70c
A
1131 dev_t dev,
1132 int in_kernel,
2d21ac55 1133 proc_t p)
9bccf70c 1134{
91447636 1135 dev_t cdev;
2d21ac55 1136 vfs_context_t ctx = vfs_context_current();
91447636 1137 kauth_cred_t cred;
9bccf70c 1138 struct nameidata nd;
91447636 1139 off_t file_size;
9bccf70c 1140 int error, flags;
91447636 1141
9bccf70c
A
1142 flags = FREAD|FWRITE;
1143 if (in_kernel) {
b0d623f7 1144 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, vniop->vn_file, ctx);
9bccf70c
A
1145 }
1146 else {
91447636
A
1147 NDINIT(&nd, LOOKUP, FOLLOW,
1148 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2d21ac55 1149 vniop->vn_file, ctx);
9bccf70c 1150 }
91447636 1151 /* vn_open gives both long- and short-term references */
9bccf70c
A
1152 error = vn_open(&nd, flags, 0);
1153 if (error) {
0c530ab8 1154 if (error != EACCES && error != EPERM && error != EROFS) {
9bccf70c 1155 return (error);
0c530ab8 1156 }
9bccf70c
A
1157 flags &= ~FWRITE;
1158 if (in_kernel) {
b0d623f7 1159 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE,
2d21ac55 1160 vniop->vn_file, ctx);
9bccf70c
A
1161 }
1162 else {
91447636
A
1163 NDINIT(&nd, LOOKUP, FOLLOW,
1164 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2d21ac55 1165 vniop->vn_file, ctx);
9bccf70c
A
1166 }
1167 error = vn_open(&nd, flags, 0);
0c530ab8 1168 if (error) {
9bccf70c 1169 return (error);
0c530ab8 1170 }
9bccf70c
A
1171 }
1172 if (nd.ni_vp->v_type != VREG) {
1173 error = EINVAL;
1174 }
9bccf70c 1175 else {
2d21ac55 1176 error = vnode_size(nd.ni_vp, &file_size, ctx);
9bccf70c
A
1177 }
1178 if (error != 0) {
2d21ac55 1179 (void) vn_close(nd.ni_vp, flags, ctx);
91447636 1180 vnode_put(nd.ni_vp);
9bccf70c
A
1181 return (error);
1182 }
91447636
A
1183 cred = kauth_cred_proc_ref(p);
1184 nd.ni_vp->v_flag |= VNOCACHE_DATA;
2d21ac55 1185 error = setcred(nd.ni_vp, cred);
9bccf70c 1186 if (error) {
2d21ac55 1187 (void)vn_close(nd.ni_vp, flags, ctx);
91447636 1188 vnode_put(nd.ni_vp);
0c530ab8 1189 kauth_cred_unref(&cred);
9bccf70c
A
1190 return(error);
1191 }
91447636
A
1192 vn->sc_secsize = DEV_BSIZE;
1193 vn->sc_fsize = file_size;
1194 vn->sc_size = file_size / vn->sc_secsize;
1195 vn->sc_vp = nd.ni_vp;
1196 vn->sc_vid = vnode_vid(nd.ni_vp);
1197 vn->sc_open_flags = flags;
1198 vn->sc_cred = cred;
1199 cdev = makedev(vndevice_cdev_major, minor(dev));
1200 vn->sc_cdev = devfs_make_node(cdev, DEVFS_CHAR,
1201 UID_ROOT, GID_OPERATOR,
1202 0600, "rvn%d",
1203 minor(dev));
9bccf70c
A
1204 vn->sc_flags |= VNF_INITED;
1205 if (flags == FREAD)
1206 vn->sc_flags |= VNF_READONLY;
91447636
A
1207 /* lose the short-term reference */
1208 vnode_put(nd.ni_vp);
9bccf70c
A
1209 return(0);
1210}
1211
1212static int
b0d623f7
A
1213vniocattach_shadow(struct vn_softc *vn, struct vn_ioctl_64 *vniop,
1214 __unused dev_t dev, int in_kernel, proc_t p)
9bccf70c 1215{
2d21ac55 1216 vfs_context_t ctx = vfs_context_current();
9bccf70c
A
1217 struct nameidata nd;
1218 int error, flags;
1219 shadow_map_t * map;
91447636
A
1220 off_t file_size;
1221
9bccf70c
A
1222 flags = FREAD|FWRITE;
1223 if (in_kernel) {
b0d623f7 1224 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, vniop->vn_file, ctx);
9bccf70c
A
1225 }
1226 else {
91447636
A
1227 NDINIT(&nd, LOOKUP, FOLLOW,
1228 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2d21ac55 1229 vniop->vn_file, ctx);
9bccf70c 1230 }
91447636 1231 /* vn_open gives both long- and short-term references */
9bccf70c
A
1232 error = vn_open(&nd, flags, 0);
1233 if (error) {
1234 /* shadow MUST be writable! */
1235 return (error);
1236 }
91447636 1237 if (nd.ni_vp->v_type != VREG
2d21ac55
A
1238 || (error = vnode_size(nd.ni_vp, &file_size, ctx))) {
1239 (void)vn_close(nd.ni_vp, flags, ctx);
91447636 1240 vnode_put(nd.ni_vp);
9bccf70c
A
1241 return (error ? error : EINVAL);
1242 }
91447636 1243 map = shadow_map_create(vn->sc_fsize, file_size,
9bccf70c
A
1244 0, vn->sc_secsize);
1245 if (map == NULL) {
2d21ac55 1246 (void)vn_close(nd.ni_vp, flags, ctx);
91447636 1247 vnode_put(nd.ni_vp);
9bccf70c
A
1248 vn->sc_shadow_vp = NULL;
1249 return (ENOMEM);
1250 }
91447636
A
1251 vn->sc_shadow_vp = nd.ni_vp;
1252 vn->sc_shadow_vid = vnode_vid(nd.ni_vp);
1253 vn->sc_shadow_vp->v_flag |= VNOCACHE_DATA;
9bccf70c
A
1254 vn->sc_shadow_map = map;
1255 vn->sc_flags &= ~VNF_READONLY; /* we're now read/write */
91447636
A
1256
1257 /* lose the short-term reference */
1258 vnode_put(nd.ni_vp);
9bccf70c
A
1259 return(0);
1260}
1261
1262int
1263vndevice_root_image(char * path, char devname[], dev_t * dev_p)
1264{
1265 int error = 0;
91447636 1266 struct vn_softc * vn;
b0d623f7 1267 struct vn_ioctl_64 vnio;
9bccf70c 1268
91447636
A
1269 vnio.vn_file = CAST_USER_ADDR_T(path);
1270 vnio.vn_size = 0;
9bccf70c
A
1271
1272 vn = vn_table + ROOT_IMAGE_UNIT;
1273 *dev_p = makedev(vndevice_bdev_major,
1274 ROOT_IMAGE_UNIT);
2d21ac55 1275 snprintf(devname, 16, "vn%d", ROOT_IMAGE_UNIT);
91447636 1276 error = vniocattach_file(vn, &vnio, *dev_p, 1, current_proc());
9bccf70c
A
1277 return (error);
1278}
1279
1280/*
1281 * Duplicate the current processes' credentials. Since we are called only
1282 * as the result of a SET ioctl and only root can do that, any future access
1283 * to this "disk" is essentially as root. Note that credentials may change
1284 * if some other uid can write directly to the mapped file (NFS).
1285 */
91447636 1286static int
2d21ac55 1287setcred(struct vnode * vp, kauth_cred_t cred)
9bccf70c
A
1288{
1289 char *tmpbuf;
1290 int error = 0;
91447636 1291 struct vfs_context context;
9bccf70c
A
1292
1293 /*
1294 * Horrible kludge to establish credentials for NFS XXX.
1295 */
2d21ac55 1296 context.vc_thread = current_thread();
91447636
A
1297 context.vc_ucred = cred;
1298 tmpbuf = _MALLOC(DEV_BSIZE, M_TEMP, M_WAITOK);
1299 error = file_io(vp, &context, UIO_READ, tmpbuf, 0, DEV_BSIZE, NULL);
1300 FREE(tmpbuf, M_TEMP);
9bccf70c
A
1301 return (error);
1302}
1303
1304void
2d21ac55 1305vnclear(struct vn_softc *vn, vfs_context_t ctx)
9bccf70c 1306{
9bccf70c 1307 if (vn->sc_vp != NULL) {
91447636 1308 /* release long-term reference */
2d21ac55 1309 (void)vn_close(vn->sc_vp, vn->sc_open_flags, ctx);
9bccf70c
A
1310 vn->sc_vp = NULL;
1311 }
1312 if (vn->sc_shadow_vp != NULL) {
91447636 1313 /* release long-term reference */
2d21ac55 1314 (void)vn_close(vn->sc_shadow_vp, FREAD | FWRITE, ctx);
9bccf70c
A
1315 vn->sc_shadow_vp = NULL;
1316 }
1317 if (vn->sc_shadow_map != NULL) {
1318 shadow_map_free(vn->sc_shadow_map);
1319 vn->sc_shadow_map = NULL;
1320 }
91447636 1321 vn->sc_flags &= ~(VNF_INITED | VNF_READONLY);
9bccf70c 1322 if (vn->sc_cred) {
0c530ab8 1323 kauth_cred_unref(&vn->sc_cred);
9bccf70c
A
1324 }
1325 vn->sc_size = 0;
1326 vn->sc_fsize = 0;
1327 if (vn->sc_cdev) {
1328 devfs_remove(vn->sc_cdev);
1329 vn->sc_cdev = NULL;
1330 }
1331}
1332
1333static int
1334vnsize(dev_t dev)
1335{
91447636 1336 int secsize;
9bccf70c
A
1337 struct vn_softc *vn;
1338 int unit;
91447636 1339 boolean_t funnel_state;
9bccf70c
A
1340
1341 unit = vnunit(dev);
1342 if (vnunit(dev) >= NVNDEVICE) {
91447636 1343 return (-1);
9bccf70c 1344 }
9bccf70c 1345
91447636
A
1346 funnel_state = thread_funnel_set(kernel_flock, TRUE);
1347 vn = vn_table + unit;
9bccf70c 1348 if ((vn->sc_flags & VNF_INITED) == 0)
91447636
A
1349 secsize = -1;
1350 else
1351 secsize = vn->sc_secsize;
1352 (void) thread_funnel_set(kernel_flock, funnel_state);
1353 return (secsize);
9bccf70c
A
1354}
1355
1356#define CDEV_MAJOR -1
1357#define BDEV_MAJOR -1
1358static int vndevice_inited = 0;
1359
1360void
91447636 1361vndevice_init(void)
9bccf70c 1362{
9bccf70c
A
1363 if (vndevice_inited)
1364 return;
2d21ac55
A
1365
1366 vndevice_do_init();
1367}
1368
1369static void
1370vndevice_do_init( void )
1371{
1372 int i;
1373
9bccf70c
A
1374 vndevice_bdev_major = bdevsw_add(BDEV_MAJOR, &vn_bdevsw);
1375
1376 if (vndevice_bdev_major < 0) {
1377 printf("vndevice_init: bdevsw_add() returned %d\n",
1378 vndevice_bdev_major);
1379 return;
1380 }
1381 vndevice_cdev_major = cdevsw_add_with_bdev(CDEV_MAJOR, &vn_cdevsw,
1382 vndevice_bdev_major);
1383 if (vndevice_cdev_major < 0) {
1384 printf("vndevice_init: cdevsw_add() returned %d\n",
1385 vndevice_cdev_major);
1386 return;
1387 }
1388 for (i = 0; i < NVNDEVICE; i++) {
1389 dev_t dev = makedev(vndevice_bdev_major, i);
1390 vn_table[i].sc_bdev = devfs_make_node(dev, DEVFS_BLOCK,
1391 UID_ROOT, GID_OPERATOR,
1392 0600, "vn%d",
1393 i);
1394 if (vn_table[i].sc_bdev == NULL)
1395 printf("vninit: devfs_make_node failed!\n");
1396 }
1397}
91447636
A
1398
1399static void
b0d623f7 1400vn_ioctl_to_64(struct vn_ioctl_32 *from, struct vn_ioctl_64 *to)
91447636
A
1401{
1402 to->vn_file = CAST_USER_ADDR_T(from->vn_file);
1403 to->vn_size = from->vn_size;
1404 to->vn_control = from->vn_control;
1405}
1406
55e303ae 1407#endif /* NVNDEVICE */