]> git.saurik.com Git - apple/xnu.git/blob - bsd/miscfs/nullfs/null_vnops.c
176f84e7419e7ab3a83fa88c5d8840d640b7f738
[apple/xnu.git] / bsd / miscfs / nullfs / null_vnops.c
1 /*
2 * Copyright (c) 2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /*-
25 * Portions Copyright (c) 1992, 1993
26 * The Regents of the University of California. All rights reserved.
27 *
28 * This code is derived from software contributed to Berkeley by
29 * John Heidemann of the UCLA Ficus project.
30 *
31 * Redistribution and use in source and binary forms, with or without
32 * modification, are permitted provided that the following conditions
33 * are met:
34 * 1. Redistributions of source code must retain the above copyright
35 * notice, this list of conditions and the following disclaimer.
36 * 2. Redistributions in binary form must reproduce the above copyright
37 * notice, this list of conditions and the following disclaimer in the
38 * documentation and/or other materials provided with the distribution.
39 * 4. Neither the name of the University nor the names of its contributors
40 * may be used to endorse or promote products derived from this software
41 * without specific prior written permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 *
55 * @(#)null_vnops.c 8.6 (Berkeley) 5/27/95
56 *
57 * Ancestors:
58 * @(#)lofs_vnops.c 1.2 (Berkeley) 6/18/92
59 * ...and...
60 * @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project
61 *
62 * $FreeBSD$
63 */
64
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/conf.h>
68 #include <sys/kernel.h>
69 #include <sys/lock.h>
70 #include <sys/malloc.h>
71 #include <sys/mount.h>
72 #include <sys/mount_internal.h>
73 #include <sys/namei.h>
74 #include <sys/sysctl.h>
75 #include <sys/vnode.h>
76 #include <sys/xattr.h>
77 #include <sys/ubc.h>
78 #include <sys/types.h>
79 #include <sys/dirent.h>
80 #include <sys/kauth.h>
81
82 #include "nullfs.h"
83
84 #define NULL_ROOT_INO 2
85 #define NULL_SECOND_INO 3
86 #define NULL_THIRD_INO 4
87
88 vop_t * nullfs_vnodeop_p = NULL;
89
90 /* the mountpoint lock should be held going into this function */
91 static int
92 nullfs_isspecialvp(struct vnode * vp)
93 {
94 struct null_mount * null_mp;
95
96 null_mp = MOUNTTONULLMOUNT(vnode_mount(vp));
97
98 /* only check for root and second here, third is special in a different way,
99 * related only to lookup and readdir */
100 if (vp && (vp == null_mp->nullm_rootvp || vp == null_mp->nullm_secondvp)) {
101 return 1;
102 }
103 return 0;
104 }
105
106 /* helper function to handle locking where possible */
107 static int
108 nullfs_checkspecialvp(struct vnode* vp)
109 {
110 int result = 0;
111 struct null_mount * null_mp;
112
113 null_mp = MOUNTTONULLMOUNT(vnode_mount(vp));
114
115 lck_mtx_lock(&null_mp->nullm_lock);
116 result = (nullfs_isspecialvp(vp));
117 lck_mtx_unlock(&null_mp->nullm_lock);
118
119 return result;
120 }
121
122 vfs_context_t
123 nullfs_get_patched_context(struct null_mount * null_mp, vfs_context_t ctx)
124 {
125 struct vfs_context* ectx = ctx;
126 if ((null_mp->nullm_flags & NULLM_UNVEIL) == NULLM_UNVEIL) {
127 ectx = vfs_context_create(ctx);
128 ectx->vc_ucred = kauth_cred_setuidgid(ectx->vc_ucred, null_mp->uid, null_mp->gid);
129 }
130 return ectx;
131 }
132
133 void
134 nullfs_cleanup_patched_context(struct null_mount * null_mp, vfs_context_t ctx)
135 {
136 if ((null_mp->nullm_flags & NULLM_UNVEIL) == NULLM_UNVEIL) {
137 vfs_context_rele(ctx);
138 }
139 }
140
141 static int
142 nullfs_default(__unused struct vnop_generic_args * args)
143 {
144 NULLFSDEBUG("%s (default)\n", ((struct vnodeop_desc_fake *)args->a_desc)->vdesc_name);
145 return ENOTSUP;
146 }
147
148 static int
149 nullfs_special_getattr(struct vnop_getattr_args * args)
150 {
151 mount_t mp = vnode_mount(args->a_vp);
152 struct null_mount * null_mp = MOUNTTONULLMOUNT(mp);
153
154 ino_t ino = NULL_ROOT_INO;
155 struct vnode_attr covered_rootattr;
156 vnode_t checkvp = null_mp->nullm_lowerrootvp;
157 vfs_context_t ectx = nullfs_get_patched_context(null_mp, args->a_context);
158
159 VATTR_INIT(&covered_rootattr);
160 VATTR_WANTED(&covered_rootattr, va_uid);
161 VATTR_WANTED(&covered_rootattr, va_gid);
162 VATTR_WANTED(&covered_rootattr, va_create_time);
163 VATTR_WANTED(&covered_rootattr, va_modify_time);
164 VATTR_WANTED(&covered_rootattr, va_access_time);
165
166 /* prefer to get this from the lower root vp, but if not (i.e. forced unmount
167 * of lower fs) try the mount point covered vnode */
168 if (vnode_getwithvid(checkvp, null_mp->nullm_lowerrootvid)) {
169 checkvp = vfs_vnodecovered(mp);
170 if (checkvp == NULL) {
171 nullfs_cleanup_patched_context(null_mp, ectx);
172 return EIO;
173 }
174 }
175
176 int error = vnode_getattr(checkvp, &covered_rootattr, ectx);
177
178 vnode_put(checkvp);
179 if (error) {
180 /* we should have been able to get attributes fore one of the two choices so
181 * fail if we didn't */
182 nullfs_cleanup_patched_context(null_mp, ectx);
183 return error;
184 }
185
186 /* we got the attributes of the vnode we cover so plow ahead */
187 if (args->a_vp == null_mp->nullm_secondvp) {
188 ino = NULL_SECOND_INO;
189 }
190
191 VATTR_RETURN(args->a_vap, va_type, vnode_vtype(args->a_vp));
192 VATTR_RETURN(args->a_vap, va_rdev, 0);
193 VATTR_RETURN(args->a_vap, va_nlink, 3); /* always just ., .., and the child */
194 VATTR_RETURN(args->a_vap, va_total_size, 0); // hoping this is ok
195
196 VATTR_RETURN(args->a_vap, va_data_size, 0); // hoping this is ok
197 VATTR_RETURN(args->a_vap, va_data_alloc, 0);
198 VATTR_RETURN(args->a_vap, va_iosize, vfs_statfs(mp)->f_iosize);
199 VATTR_RETURN(args->a_vap, va_fileid, ino);
200 VATTR_RETURN(args->a_vap, va_linkid, ino);
201 if (VATTR_IS_ACTIVE(args->a_vap, va_fsid)) {
202 VATTR_RETURN(args->a_vap, va_fsid, vfs_statfs(mp)->f_fsid.val[0]); // return the fsid of the mount point
203 }
204 if (VATTR_IS_ACTIVE(args->a_vap, va_fsid64)) {
205 VATTR_RETURN(args->a_vap, va_fsid64, vfs_statfs(mp)->f_fsid);
206 }
207 VATTR_RETURN(args->a_vap, va_filerev, 0);
208 VATTR_RETURN(args->a_vap, va_gen, 0);
209 VATTR_RETURN(args->a_vap, va_flags, UF_HIDDEN); /* mark our fake directories as hidden. People
210 * shouldn't be enocouraged to poke around in them */
211
212 if (ino == NULL_SECOND_INO) {
213 VATTR_RETURN(args->a_vap, va_parentid, NULL_ROOT_INO); /* no parent at the root, so
214 * the only other vnode that
215 * goes through this path is
216 * second and its parent is
217 * 1.*/
218 }
219
220 if (VATTR_IS_ACTIVE(args->a_vap, va_mode)) {
221 /* force dr_xr_xr_x */
222 VATTR_RETURN(args->a_vap, va_mode, S_IFDIR | S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
223 }
224 if (VATTR_IS_ACTIVE(args->a_vap, va_uid)) {
225 VATTR_RETURN(args->a_vap, va_uid, covered_rootattr.va_uid);
226 }
227 if (VATTR_IS_ACTIVE(args->a_vap, va_gid)) {
228 VATTR_RETURN(args->a_vap, va_gid, covered_rootattr.va_gid);
229 }
230
231 if (VATTR_IS_ACTIVE(args->a_vap, va_create_time)) {
232 VATTR_SET_SUPPORTED(args->a_vap, va_create_time);
233 args->a_vap->va_create_time.tv_sec = covered_rootattr.va_create_time.tv_sec;
234 args->a_vap->va_create_time.tv_nsec = covered_rootattr.va_create_time.tv_nsec;
235 }
236 if (VATTR_IS_ACTIVE(args->a_vap, va_modify_time)) {
237 VATTR_SET_SUPPORTED(args->a_vap, va_modify_time);
238 args->a_vap->va_modify_time.tv_sec = covered_rootattr.va_modify_time.tv_sec;
239 args->a_vap->va_modify_time.tv_nsec = covered_rootattr.va_modify_time.tv_nsec;
240 }
241 if (VATTR_IS_ACTIVE(args->a_vap, va_access_time)) {
242 VATTR_SET_SUPPORTED(args->a_vap, va_access_time);
243 args->a_vap->va_modify_time.tv_sec = covered_rootattr.va_access_time.tv_sec;
244 args->a_vap->va_modify_time.tv_nsec = covered_rootattr.va_access_time.tv_nsec;
245 }
246
247 nullfs_cleanup_patched_context(null_mp, ectx);
248 return 0;
249 }
250
251 static int
252 nullfs_getattr(struct vnop_getattr_args * args)
253 {
254 int error;
255 struct null_mount * null_mp = MOUNTTONULLMOUNT(vnode_mount(args->a_vp));
256 kauth_cred_t cred = vfs_context_ucred(args->a_context);
257 NULLFSDEBUG("%s %p\n", __FUNCTION__, args->a_vp);
258
259 lck_mtx_lock(&null_mp->nullm_lock);
260 if (nullfs_isspecialvp(args->a_vp)) {
261 error = nullfs_special_getattr(args);
262 lck_mtx_unlock(&null_mp->nullm_lock);
263 return error;
264 }
265 lck_mtx_unlock(&null_mp->nullm_lock);
266
267 /* this will return a different inode for third than read dir will */
268 struct vnode * lowervp = NULLVPTOLOWERVP(args->a_vp);
269 vfs_context_t ectx = nullfs_get_patched_context(null_mp, args->a_context);
270 error = vnode_getwithref(lowervp);
271
272 if (error == 0) {
273 error = VNOP_GETATTR(lowervp, args->a_vap, ectx);
274 vnode_put(lowervp);
275
276 if (error == 0) {
277 /* fix up fsid so it doesn't say the underlying fs*/
278 if (VATTR_IS_ACTIVE(args->a_vap, va_fsid)) {
279 VATTR_RETURN(args->a_vap, va_fsid, vfs_statfs(vnode_mount(args->a_vp))->f_fsid.val[0]);
280 }
281 if (VATTR_IS_ACTIVE(args->a_vap, va_fsid64)) {
282 VATTR_RETURN(args->a_vap, va_fsid64, vfs_statfs(vnode_mount(args->a_vp))->f_fsid);
283 }
284
285 /* Conjure up permissions */
286 if ((null_mp->nullm_flags & NULLM_UNVEIL) == NULLM_UNVEIL) {
287 if (VATTR_IS_ACTIVE(args->a_vap, va_mode)) {
288 mode_t mode = args->a_vap->va_mode; // We will take away permisions if we don't have them
289
290 // Check for authorizations
291 // If we can read:
292 if (vnode_authorize(lowervp, NULL, KAUTH_VNODE_GENERIC_READ_BITS, ectx) == 0) {
293 mode |= S_IRUSR;
294 } else {
295 mode &= ~S_IRUSR;
296 }
297
298 // Or execute
299 // Directories need an execute bit...
300 if (vnode_authorize(lowervp, NULL, KAUTH_VNODE_GENERIC_EXECUTE_BITS, ectx) == 0) {
301 mode |= S_IXUSR;
302 } else {
303 mode &= ~S_IXUSR;
304 }
305
306 NULLFSDEBUG("Settings bits to %d\n", mode);
307 VATTR_RETURN(args->a_vap, va_mode, mode);
308 }
309 if (VATTR_IS_ACTIVE(args->a_vap, va_uid)) {
310 VATTR_RETURN(args->a_vap, va_uid, kauth_cred_getuid(cred));
311 }
312 if (VATTR_IS_ACTIVE(args->a_vap, va_gid)) {
313 VATTR_RETURN(args->a_vap, va_gid, kauth_cred_getgid(cred));
314 }
315 }
316 }
317 }
318
319 nullfs_cleanup_patched_context(null_mp, ectx);
320 return error;
321 }
322
323 static int
324 nullfs_open(struct vnop_open_args * args)
325 {
326 int error;
327 struct vnode *vp, *lvp;
328 mount_t mp = vnode_mount(args->a_vp);
329 struct null_mount * null_mp = MOUNTTONULLMOUNT(mp);
330 NULLFSDEBUG("%s %p\n", __FUNCTION__, args->a_vp);
331
332 if (nullfs_checkspecialvp(args->a_vp)) {
333 return 0; /* nothing extra needed */
334 }
335
336 vfs_context_t ectx = nullfs_get_patched_context(null_mp, args->a_context);
337 vp = args->a_vp;
338 lvp = NULLVPTOLOWERVP(vp);
339 error = vnode_getwithref(lvp);
340 if (error == 0) {
341 error = VNOP_OPEN(lvp, args->a_mode, ectx);
342 vnode_put(lvp);
343 }
344
345 nullfs_cleanup_patched_context(null_mp, ectx);
346 return error;
347 }
348
349 static int
350 nullfs_close(struct vnop_close_args * args)
351 {
352 int error;
353 struct vnode *vp, *lvp;
354 mount_t mp = vnode_mount(args->a_vp);
355 struct null_mount * null_mp = MOUNTTONULLMOUNT(mp);
356
357 NULLFSDEBUG("%s %p\n", __FUNCTION__, args->a_vp);
358
359 if (nullfs_checkspecialvp(args->a_vp)) {
360 return 0; /* nothing extra needed */
361 }
362
363 vfs_context_t ectx = nullfs_get_patched_context(null_mp, args->a_context);
364 vp = args->a_vp;
365 lvp = NULLVPTOLOWERVP(vp);
366
367 error = vnode_getwithref(lvp);
368 if (error == 0) {
369 error = VNOP_CLOSE(lvp, args->a_fflag, ectx);
370 vnode_put(lvp);
371 }
372
373 nullfs_cleanup_patched_context(null_mp, ectx);
374 return error;
375 }
376
377 /* get lvp's parent, if possible, even if it isn't set.
378 *
379 * lvp is expected to have an iocount before and after this call.
380 *
381 * if a dvpp is populated the returned vnode has an iocount. */
382 static int
383 null_get_lowerparent(vnode_t lvp, vnode_t * dvpp, vfs_context_t ctx)
384 {
385 int error = 0;
386 struct vnode_attr va;
387 mount_t mp = vnode_mount(lvp);
388 vnode_t dvp = vnode_parent(lvp);
389
390 if (dvp) {
391 error = vnode_get(dvp);
392 goto end;
393 }
394
395 error = ENOENT;
396 if (!(mp->mnt_kern_flag & MNTK_PATH_FROM_ID)) {
397 goto end;
398 }
399
400 VATTR_INIT(&va);
401 VATTR_WANTED(&va, va_parentid);
402
403 error = vnode_getattr(lvp, &va, ctx);
404
405 if (error || !VATTR_IS_SUPPORTED(&va, va_parentid)) {
406 if (!error) {
407 error = ENOTSUP;
408 }
409 goto end;
410 }
411
412 error = VFS_VGET(mp, (ino64_t)va.va_parentid, &dvp, ctx);
413
414 end:
415 if (error == 0) {
416 *dvpp = dvp;
417 }
418 return error;
419 }
420
421 /* the mountpoint lock should be held going into this function */
422 static int
423 null_special_lookup(struct vnop_lookup_args * ap)
424 {
425 struct componentname * cnp = ap->a_cnp;
426 struct vnode * dvp = ap->a_dvp;
427 struct vnode * ldvp = NULL;
428 struct vnode * lvp = NULL;
429 struct vnode * vp = NULL;
430 struct mount * mp = vnode_mount(dvp);
431 struct null_mount * null_mp = MOUNTTONULLMOUNT(mp);
432 int error = ENOENT;
433 vfs_context_t ectx = nullfs_get_patched_context(null_mp, ap->a_context);
434
435 if (dvp == null_mp->nullm_rootvp) {
436 /* handle . and .. */
437 if (cnp->cn_nameptr[0] == '.') {
438 if (cnp->cn_namelen == 1 || (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.')) {
439 /* this is the root so both . and .. give back the root */
440 vp = dvp;
441 error = vnode_get(vp);
442 goto end;
443 }
444 }
445
446 /* our virtual wrapper directory should be d but D is acceptable if the
447 * lower file system is case insensitive */
448 if (cnp->cn_namelen == 1 &&
449 (cnp->cn_nameptr[0] == 'd' || (null_mp->nullm_flags & NULLM_CASEINSENSITIVE ? cnp->cn_nameptr[0] == 'D' : 0))) {
450 error = 0;
451 if (null_mp->nullm_secondvp == NULL) {
452 error = null_getnewvnode(mp, NULL, dvp, &vp, cnp, 0);
453 if (error) {
454 goto end;
455 }
456
457 null_mp->nullm_secondvp = vp;
458 } else {
459 vp = null_mp->nullm_secondvp;
460 error = vnode_get(vp);
461 }
462 }
463 } else if (dvp == null_mp->nullm_secondvp) {
464 /* handle . and .. */
465 if (cnp->cn_nameptr[0] == '.') {
466 if (cnp->cn_namelen == 1) {
467 vp = dvp;
468 error = vnode_get(vp);
469 goto end;
470 } else if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
471 /* parent here is the root vp */
472 vp = null_mp->nullm_rootvp;
473 error = vnode_get(vp);
474 goto end;
475 }
476 }
477 /* nullmp->nullm_lowerrootvp was set at mount time so don't need to lock to
478 * access it */
479 /* v_name should be null terminated but cn_nameptr is not necessarily.
480 * cn_namelen is the number of characters before the null in either case */
481 error = vnode_getwithvid(null_mp->nullm_lowerrootvp, null_mp->nullm_lowerrootvid);
482 if (error) {
483 goto end;
484 }
485
486 /* We don't want to mess with case insensitivity and unicode, so the plan to
487 * check here is
488 * 1. try to get the lower root's parent
489 * 2. If we get a parent, then perform a lookup on the lower file system
490 * using the parent and the passed in cnp
491 * 3. If that worked and we got a vp, then see if the vp is lowerrootvp. If
492 * so we got a match
493 * 4. Anything else results in ENOENT.
494 */
495 error = null_get_lowerparent(null_mp->nullm_lowerrootvp, &ldvp, ectx);
496
497 if (error == 0) {
498 error = VNOP_LOOKUP(ldvp, &lvp, cnp, ectx);
499 vnode_put(ldvp);
500
501 if (error == 0) {
502 if (lvp == null_mp->nullm_lowerrootvp) {
503 /* always check the hashmap for a vnode for this, the root of the
504 * mirrored system */
505 error = null_nodeget(mp, lvp, dvp, &vp, cnp, 0);
506
507 if (error == 0 && null_mp->nullm_thirdcovervp == NULL) {
508 /* if nodeget succeeded then vp has an iocount*/
509 null_mp->nullm_thirdcovervp = vp;
510 }
511 } else {
512 error = ENOENT;
513 }
514 vnode_put(lvp);
515 }
516 }
517 vnode_put(null_mp->nullm_lowerrootvp);
518 }
519
520 end:
521 nullfs_cleanup_patched_context(null_mp, ectx);
522 if (error == 0) {
523 *ap->a_vpp = vp;
524 }
525 return error;
526 }
527
528 /*
529 * We have to carry on the locking protocol on the null layer vnodes
530 * as we progress through the tree. We also have to enforce read-only
531 * if this layer is mounted read-only.
532 */
533 static int
534 null_lookup(struct vnop_lookup_args * ap)
535 {
536 struct componentname * cnp = ap->a_cnp;
537 struct vnode * dvp = ap->a_dvp;
538 struct vnode *vp, *ldvp, *lvp;
539 struct mount * mp;
540 struct null_mount * null_mp;
541 int error;
542 vfs_context_t ectx;
543
544 NULLFSDEBUG("%s parent: %p component: %.*s\n", __FUNCTION__, ap->a_dvp, cnp->cn_namelen, cnp->cn_nameptr);
545
546 mp = vnode_mount(dvp);
547 /* rename and delete are not allowed. this is a read only file system */
548 if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME || cnp->cn_nameiop == CREATE) {
549 return EROFS;
550 }
551 null_mp = MOUNTTONULLMOUNT(mp);
552
553
554 lck_mtx_lock(&null_mp->nullm_lock);
555 if (nullfs_isspecialvp(dvp)) {
556 error = null_special_lookup(ap);
557 lck_mtx_unlock(&null_mp->nullm_lock);
558 return error;
559 }
560 lck_mtx_unlock(&null_mp->nullm_lock);
561
562 // . and .. handling
563 if (cnp->cn_nameptr[0] == '.') {
564 if (cnp->cn_namelen == 1) {
565 vp = dvp;
566 } else if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
567 /* mount point crossing is handled in null_special_lookup */
568 vp = vnode_parent(dvp);
569 } else {
570 goto notdot;
571 }
572
573 error = vp ? vnode_get(vp) : ENOENT;
574
575 if (error == 0) {
576 *ap->a_vpp = vp;
577 }
578
579 return error;
580 }
581
582 notdot:
583 ectx = nullfs_get_patched_context(null_mp, ap->a_context);
584 ldvp = NULLVPTOLOWERVP(dvp);
585 vp = lvp = NULL;
586
587 /*
588 * Hold ldvp. The reference on it, owned by dvp, is lost in
589 * case of dvp reclamation.
590 */
591 error = vnode_getwithref(ldvp);
592 if (error) {
593 nullfs_cleanup_patched_context(null_mp, ectx);
594 return error;
595 }
596
597 error = VNOP_LOOKUP(ldvp, &lvp, cnp, ectx);
598
599 vnode_put(ldvp);
600
601 if ((error == 0 || error == EJUSTRETURN) && lvp != NULL) {
602 if (ldvp == lvp) {
603 vp = dvp;
604 error = vnode_get(vp);
605 } else {
606 error = null_nodeget(mp, lvp, dvp, &vp, cnp, 0);
607 }
608 if (error == 0) {
609 *ap->a_vpp = vp;
610 }
611 /* if we got lvp, drop the iocount from VNOP_LOOKUP */
612 if (lvp != NULL) {
613 vnode_put(lvp);
614 }
615 }
616
617 nullfs_cleanup_patched_context(null_mp, ectx);
618 return error;
619 }
620
621 /*
622 * Don't think this needs to do anything
623 */
624 static int
625 null_inactive(__unused struct vnop_inactive_args * ap)
626 {
627 NULLFSDEBUG("%s %p\n", __FUNCTION__, ap->a_vp);
628
629 return 0;
630 }
631
632 static int
633 null_reclaim(struct vnop_reclaim_args * ap)
634 {
635 struct vnode * vp;
636 struct null_node * xp;
637 struct vnode * lowervp;
638 struct null_mount * null_mp = MOUNTTONULLMOUNT(vnode_mount(ap->a_vp));
639
640 NULLFSDEBUG("%s %p\n", __FUNCTION__, ap->a_vp);
641
642 vp = ap->a_vp;
643
644 xp = VTONULL(vp);
645 lowervp = xp->null_lowervp;
646
647 lck_mtx_lock(&null_mp->nullm_lock);
648
649 vnode_removefsref(vp);
650
651 if (lowervp != NULL) {
652 /* root and second don't have a lowervp, so nothing to release and nothing
653 * got hashed */
654 if (xp->null_flags & NULL_FLAG_HASHED) {
655 /* only call this if we actually made it into the hash list. reclaim gets
656 * called also to
657 * clean up a vnode that got created when it didn't need to under race
658 * conditions */
659 null_hashrem(xp);
660 }
661 vnode_getwithref(lowervp);
662 vnode_rele(lowervp);
663 vnode_put(lowervp);
664 }
665
666 if (vp == null_mp->nullm_rootvp) {
667 null_mp->nullm_rootvp = NULL;
668 } else if (vp == null_mp->nullm_secondvp) {
669 null_mp->nullm_secondvp = NULL;
670 } else if (vp == null_mp->nullm_thirdcovervp) {
671 null_mp->nullm_thirdcovervp = NULL;
672 }
673
674 lck_mtx_unlock(&null_mp->nullm_lock);
675
676 cache_purge(vp);
677 vnode_clearfsnode(vp);
678
679 FREE(xp, M_TEMP);
680
681 return 0;
682 }
683
684 #define DIRENT_SZ(dp) ((sizeof(struct dirent) - NAME_MAX) + (((dp)->d_namlen + 1 + 3) & ~3))
685
686 static int
687 store_entry_special(ino_t ino, const char * name, struct uio * uio)
688 {
689 struct dirent e;
690 size_t namelen = strlen(name);
691 int error = EINVAL;
692
693 if (namelen + 1 <= NAME_MAX) {
694 memset(&e, 0, sizeof(e));
695
696 e.d_ino = ino;
697 e.d_type = DT_DIR;
698
699 e.d_namlen = namelen; /* don't include NUL */
700 e.d_reclen = DIRENT_SZ(&e);
701 if (uio_resid(uio) >= e.d_reclen) {
702 strlcpy(e.d_name, name, NAME_MAX);
703 error = uiomove((caddr_t)&e, e.d_reclen, uio);
704 } else {
705 error = EMSGSIZE;
706 }
707 }
708 return error;
709 }
710
711 static int
712 nullfs_special_readdir(struct vnop_readdir_args * ap)
713 {
714 struct vnode * vp = ap->a_vp;
715 struct uio * uio = ap->a_uio;
716 struct null_mount * null_mp = MOUNTTONULLMOUNT(vnode_mount(vp));
717 off_t offset = uio_offset(uio);
718 int error = ERANGE;
719 int items = 0;
720 ino_t ino = 0;
721 const char * name = NULL;
722
723 if (ap->a_flags & (VNODE_READDIR_EXTENDED | VNODE_READDIR_REQSEEKOFF)) {
724 return EINVAL;
725 }
726
727 if (offset == 0) {
728 /* . case */
729 if (vp == null_mp->nullm_rootvp) {
730 ino = NULL_ROOT_INO;
731 } else { /* only get here if vp matches nullm_rootvp or nullm_secondvp */
732 ino = NULL_SECOND_INO;
733 }
734 error = store_entry_special(ino, ".", uio);
735 if (error) {
736 goto out;
737 }
738 offset++;
739 items++;
740 }
741 if (offset == 1) {
742 /* .. case */
743 /* only get here if vp matches nullm_rootvp or nullm_secondvp */
744 ino = NULL_ROOT_INO;
745
746 error = store_entry_special(ino, "..", uio);
747 if (error) {
748 goto out;
749 }
750 offset++;
751 items++;
752 }
753 if (offset == 2) {
754 /* the directory case */
755 if (vp == null_mp->nullm_rootvp) {
756 ino = NULL_SECOND_INO;
757 name = "d";
758 } else { /* only get here if vp matches nullm_rootvp or nullm_secondvp */
759 ino = NULL_THIRD_INO;
760 if (vnode_getwithvid(null_mp->nullm_lowerrootvp, null_mp->nullm_lowerrootvid)) {
761 /* In this case the lower file system has been ripped out from under us,
762 * but we don't want to error out
763 * Instead we just want d to look empty. */
764 error = 0;
765 goto out;
766 }
767 name = vnode_getname_printable(null_mp->nullm_lowerrootvp);
768 }
769 error = store_entry_special(ino, name, uio);
770
771 if (ino == NULL_THIRD_INO) {
772 vnode_putname_printable(name);
773 vnode_put(null_mp->nullm_lowerrootvp);
774 }
775
776 if (error) {
777 goto out;
778 }
779 offset++;
780 items++;
781 }
782
783 out:
784 if (error == EMSGSIZE) {
785 error = 0; /* return success if we ran out of space, but we wanted to make
786 * sure that we didn't update offset and items incorrectly */
787 }
788 uio_setoffset(uio, offset);
789 if (ap->a_numdirent) {
790 *ap->a_numdirent = items;
791 }
792 return error;
793 }
794
795 static int
796 nullfs_readdir(struct vnop_readdir_args * ap)
797 {
798 struct vnode *vp, *lvp;
799 int error;
800 struct null_mount * null_mp = MOUNTTONULLMOUNT(vnode_mount(ap->a_vp));
801
802 NULLFSDEBUG("%s %p\n", __FUNCTION__, ap->a_vp);
803 /* assumption is that any vp that comes through here had to go through lookup
804 */
805
806 lck_mtx_lock(&null_mp->nullm_lock);
807 if (nullfs_isspecialvp(ap->a_vp)) {
808 error = nullfs_special_readdir(ap);
809 lck_mtx_unlock(&null_mp->nullm_lock);
810 return error;
811 }
812 lck_mtx_unlock(&null_mp->nullm_lock);
813
814 vfs_context_t ectx = nullfs_get_patched_context(null_mp, ap->a_context);
815 vp = ap->a_vp;
816 lvp = NULLVPTOLOWERVP(vp);
817 error = vnode_getwithref(lvp);
818 if (error == 0) {
819 error = VNOP_READDIR(lvp, ap->a_uio, ap->a_flags, ap->a_eofflag, ap->a_numdirent, ectx);
820 vnode_put(lvp);
821 }
822
823 nullfs_cleanup_patched_context(null_mp, ectx);
824 return error;
825 }
826
827 static int
828 nullfs_readlink(struct vnop_readlink_args * ap)
829 {
830 NULLFSDEBUG("%s %p\n", __FUNCTION__, ap->a_vp);
831 int error;
832 struct vnode *vp, *lvp;
833 struct null_mount * null_mp = MOUNTTONULLMOUNT(vnode_mount(ap->a_vp));
834
835 if (nullfs_checkspecialvp(ap->a_vp)) {
836 return ENOTSUP; /* the special vnodes aren't links */
837 }
838
839 vfs_context_t ectx = nullfs_get_patched_context(null_mp, ap->a_context);
840 vp = ap->a_vp;
841 lvp = NULLVPTOLOWERVP(vp);
842
843 error = vnode_getwithref(lvp);
844 if (error == 0) {
845 error = VNOP_READLINK(lvp, ap->a_uio, ectx);
846 vnode_put(lvp);
847
848 if (error) {
849 NULLFSDEBUG("readlink failed: %d\n", error);
850 }
851 }
852
853 nullfs_cleanup_patched_context(null_mp, ectx);
854 return error;
855 }
856
857 static int
858 nullfs_pathconf(__unused struct vnop_pathconf_args * args)
859 {
860 NULLFSDEBUG("%s %p\n", __FUNCTION__, args->a_vp);
861 return EINVAL;
862 }
863
864 static int
865 nullfs_fsync(__unused struct vnop_fsync_args * args)
866 {
867 NULLFSDEBUG("%s %p\n", __FUNCTION__, args->a_vp);
868 return 0;
869 }
870
871 static int
872 nullfs_mmap(struct vnop_mmap_args * args)
873 {
874 int error;
875 struct vnode *vp, *lvp;
876 struct null_mount * null_mp = MOUNTTONULLMOUNT(vnode_mount(args->a_vp));
877
878 NULLFSDEBUG("%s %p\n", __FUNCTION__, args->a_vp);
879
880 if (nullfs_checkspecialvp(args->a_vp)) {
881 return 0; /* nothing extra needed */
882 }
883
884 vfs_context_t ectx = nullfs_get_patched_context(null_mp, args->a_context);
885 vp = args->a_vp;
886 lvp = NULLVPTOLOWERVP(vp);
887 error = vnode_getwithref(lvp);
888 if (error == 0) {
889 error = VNOP_MMAP(lvp, args->a_fflags, ectx);
890 vnode_put(lvp);
891 }
892
893 nullfs_cleanup_patched_context(null_mp, ectx);
894 return error;
895 }
896
897 static int
898 nullfs_mnomap(struct vnop_mnomap_args * args)
899 {
900 int error;
901 struct vnode *vp, *lvp;
902 struct null_mount * null_mp = MOUNTTONULLMOUNT(vnode_mount(args->a_vp));
903
904 NULLFSDEBUG("%s %p\n", __FUNCTION__, args->a_vp);
905
906 if (nullfs_checkspecialvp(args->a_vp)) {
907 return 0; /* nothing extra needed */
908 }
909
910 vfs_context_t ectx = nullfs_get_patched_context(null_mp, args->a_context);
911 vp = args->a_vp;
912 lvp = NULLVPTOLOWERVP(vp);
913 error = vnode_getwithref(lvp);
914 if (error == 0) {
915 error = VNOP_MNOMAP(lvp, ectx);
916 vnode_put(lvp);
917 }
918
919 nullfs_cleanup_patched_context(null_mp, ectx);
920 return error;
921 }
922
923 static int
924 nullfs_getxattr(struct vnop_getxattr_args * args)
925 {
926 int error;
927 struct vnode *vp, *lvp;
928 struct null_mount * null_mp = MOUNTTONULLMOUNT(vnode_mount(args->a_vp));
929
930 NULLFSDEBUG("%s %p\n", __FUNCTION__, args->a_vp);
931
932 if (nullfs_checkspecialvp(args->a_vp)) {
933 return ENOATTR; /* no xattrs on the special vnodes */
934 }
935
936 vfs_context_t ectx = nullfs_get_patched_context(null_mp, args->a_context);
937 vp = args->a_vp;
938 lvp = NULLVPTOLOWERVP(vp);
939 error = vnode_getwithref(lvp);
940 if (error == 0) {
941 error = VNOP_GETXATTR(lvp, args->a_name, args->a_uio, args->a_size, args->a_options, ectx);
942 vnode_put(lvp);
943 }
944
945 nullfs_cleanup_patched_context(null_mp, ectx);
946 return error;
947 }
948
949 static int
950 nullfs_listxattr(struct vnop_listxattr_args * args)
951 {
952 int error;
953 struct vnode *vp, *lvp;
954 struct null_mount * null_mp = MOUNTTONULLMOUNT(vnode_mount(args->a_vp));
955
956 NULLFSDEBUG("%s %p\n", __FUNCTION__, args->a_vp);
957
958 if (nullfs_checkspecialvp(args->a_vp)) {
959 return 0; /* no xattrs on the special vnodes */
960 }
961
962 vfs_context_t ectx = nullfs_get_patched_context(null_mp, args->a_context);
963 vp = args->a_vp;
964 lvp = NULLVPTOLOWERVP(vp);
965 error = vnode_getwithref(lvp);
966 if (error == 0) {
967 error = VNOP_LISTXATTR(lvp, args->a_uio, args->a_size, args->a_options, ectx);
968 vnode_put(lvp);
969 }
970
971 nullfs_cleanup_patched_context(null_mp, ectx);
972 return error;
973 }
974
975 /* relies on v1 paging */
976 static int
977 nullfs_pagein(struct vnop_pagein_args * ap)
978 {
979 int error = EIO;
980 struct vnode *vp, *lvp;
981 struct null_mount * null_mp = MOUNTTONULLMOUNT(vnode_mount(ap->a_vp));
982 NULLFSDEBUG("%s %p\n", __FUNCTION__, ap->a_vp);
983
984 vp = ap->a_vp;
985 lvp = NULLVPTOLOWERVP(vp);
986
987 if (vnode_vtype(vp) != VREG) {
988 return ENOTSUP;
989 }
990
991 vfs_context_t ectx = nullfs_get_patched_context(null_mp, ap->a_context);
992 /*
993 * Ask VM/UBC/VFS to do our bidding
994 */
995 if (vnode_getwithvid(lvp, NULLVPTOLOWERVID(vp)) == 0) {
996 vm_offset_t ioaddr;
997 uio_t auio;
998 kern_return_t kret;
999 off_t bytes_to_commit;
1000 off_t lowersize;
1001 upl_t upl = ap->a_pl;
1002 user_ssize_t bytes_remaining = 0;
1003
1004 auio = uio_create(1, ap->a_f_offset, UIO_SYSSPACE, UIO_READ);
1005 if (auio == NULL) {
1006 error = EIO;
1007 goto exit_no_unmap;
1008 }
1009
1010 kret = ubc_upl_map(upl, &ioaddr);
1011 if (KERN_SUCCESS != kret) {
1012 panic("nullfs_pagein: ubc_upl_map() failed with (%d)", kret);
1013 }
1014
1015 ioaddr += ap->a_pl_offset;
1016
1017 error = uio_addiov(auio, (user_addr_t)ioaddr, ap->a_size);
1018 if (error) {
1019 goto exit;
1020 }
1021
1022 lowersize = ubc_getsize(lvp);
1023 if (lowersize != ubc_getsize(vp)) {
1024 (void)ubc_setsize(vp, lowersize); /* ignore failures, nothing can be done */
1025 }
1026
1027 error = VNOP_READ(lvp, auio, ((ap->a_flags & UPL_IOSYNC) ? IO_SYNC : 0), ectx);
1028
1029 bytes_remaining = uio_resid(auio);
1030 if (bytes_remaining > 0 && bytes_remaining <= (user_ssize_t)ap->a_size) {
1031 /* zero bytes that weren't read in to the upl */
1032 bzero((void*)((uintptr_t)(ioaddr + ap->a_size - bytes_remaining)), (size_t) bytes_remaining);
1033 }
1034
1035 exit:
1036 kret = ubc_upl_unmap(upl);
1037 if (KERN_SUCCESS != kret) {
1038 panic("nullfs_pagein: ubc_upl_unmap() failed with (%d)", kret);
1039 }
1040
1041 if (auio != NULL) {
1042 uio_free(auio);
1043 }
1044
1045 exit_no_unmap:
1046 if ((ap->a_flags & UPL_NOCOMMIT) == 0) {
1047 if (!error && (bytes_remaining >= 0) && (bytes_remaining <= (user_ssize_t)ap->a_size)) {
1048 /* only commit what was read in (page aligned)*/
1049 bytes_to_commit = ap->a_size - bytes_remaining;
1050 if (bytes_to_commit) {
1051 /* need to make sure bytes_to_commit and byte_remaining are page aligned before calling ubc_upl_commit_range*/
1052 if (bytes_to_commit & PAGE_MASK) {
1053 bytes_to_commit = (bytes_to_commit & (~PAGE_MASK)) + (PAGE_MASK + 1);
1054 assert(bytes_to_commit <= (off_t)ap->a_size);
1055
1056 bytes_remaining = ap->a_size - bytes_to_commit;
1057 }
1058 ubc_upl_commit_range(upl, ap->a_pl_offset, (upl_size_t)bytes_to_commit, UPL_COMMIT_FREE_ON_EMPTY);
1059 }
1060
1061 /* abort anything thats left */
1062 if (bytes_remaining) {
1063 ubc_upl_abort_range(upl, ap->a_pl_offset + bytes_to_commit, (upl_size_t)bytes_remaining, UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
1064 }
1065 } else {
1066 ubc_upl_abort_range(upl, ap->a_pl_offset, (upl_size_t)ap->a_size, UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
1067 }
1068 }
1069 vnode_put(lvp);
1070 } else if ((ap->a_flags & UPL_NOCOMMIT) == 0) {
1071 ubc_upl_abort_range(ap->a_pl, ap->a_pl_offset, (upl_size_t)ap->a_size, UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
1072 }
1073
1074 nullfs_cleanup_patched_context(null_mp, ectx);
1075 return error;
1076 }
1077
1078 static int
1079 nullfs_read(struct vnop_read_args * ap)
1080 {
1081 int error = EIO;
1082
1083 struct vnode *vp, *lvp;
1084 struct null_mount * null_mp = MOUNTTONULLMOUNT(vnode_mount(ap->a_vp));
1085 NULLFSDEBUG("%s %p\n", __FUNCTION__, ap->a_vp);
1086
1087 if (nullfs_checkspecialvp(ap->a_vp)) {
1088 return ENOTSUP; /* the special vnodes can't be read */
1089 }
1090
1091 vfs_context_t ectx = nullfs_get_patched_context(null_mp, ap->a_context);
1092 vp = ap->a_vp;
1093 lvp = NULLVPTOLOWERVP(vp);
1094
1095 /*
1096 * First some house keeping
1097 */
1098 if (vnode_getwithvid(lvp, NULLVPTOLOWERVID(vp)) == 0) {
1099 if (!vnode_isreg(lvp) && !vnode_islnk(lvp)) {
1100 error = EPERM;
1101 goto end;
1102 }
1103
1104 if (uio_resid(ap->a_uio) == 0) {
1105 error = 0;
1106 goto end;
1107 }
1108
1109 /*
1110 * Now ask VM/UBC/VFS to do our bidding
1111 */
1112
1113 error = VNOP_READ(lvp, ap->a_uio, ap->a_ioflag, ectx);
1114 if (error) {
1115 NULLFSDEBUG("VNOP_READ failed: %d\n", error);
1116 }
1117 end:
1118 vnode_put(lvp);
1119 }
1120
1121 nullfs_cleanup_patched_context(null_mp, ectx);
1122 return error;
1123 }
1124
1125 /*
1126 * Global vfs data structures
1127 */
1128
1129 static const struct vnodeopv_entry_desc nullfs_vnodeop_entries[] = {
1130 {.opve_op = &vnop_default_desc, .opve_impl = (vop_t)nullfs_default}, {.opve_op = &vnop_getattr_desc, .opve_impl = (vop_t)nullfs_getattr},
1131 {.opve_op = &vnop_open_desc, .opve_impl = (vop_t)nullfs_open}, {.opve_op = &vnop_close_desc, .opve_impl = (vop_t)nullfs_close},
1132 {.opve_op = &vnop_inactive_desc, .opve_impl = (vop_t)null_inactive}, {.opve_op = &vnop_reclaim_desc, .opve_impl = (vop_t)null_reclaim},
1133 {.opve_op = &vnop_lookup_desc, .opve_impl = (vop_t)null_lookup}, {.opve_op = &vnop_readdir_desc, .opve_impl = (vop_t)nullfs_readdir},
1134 {.opve_op = &vnop_readlink_desc, .opve_impl = (vop_t)nullfs_readlink}, {.opve_op = &vnop_pathconf_desc, .opve_impl = (vop_t)nullfs_pathconf},
1135 {.opve_op = &vnop_fsync_desc, .opve_impl = (vop_t)nullfs_fsync}, {.opve_op = &vnop_mmap_desc, .opve_impl = (vop_t)nullfs_mmap},
1136 {.opve_op = &vnop_mnomap_desc, .opve_impl = (vop_t)nullfs_mnomap}, {.opve_op = &vnop_getxattr_desc, .opve_impl = (vop_t)nullfs_getxattr},
1137 {.opve_op = &vnop_pagein_desc, .opve_impl = (vop_t)nullfs_pagein}, {.opve_op = &vnop_read_desc, .opve_impl = (vop_t)nullfs_read},
1138 {.opve_op = &vnop_listxattr_desc, .opve_impl = (vop_t)nullfs_listxattr}, {.opve_op = NULL, .opve_impl = NULL},
1139 };
1140
1141 const struct vnodeopv_desc nullfs_vnodeop_opv_desc = {.opv_desc_vector_p = &nullfs_vnodeop_p, .opv_desc_ops = nullfs_vnodeop_entries};
1142
1143 //NULLFS Specific helper function
1144
1145 int
1146 nullfs_getbackingvnode(vnode_t in_vp, vnode_t* out_vpp)
1147 {
1148 int result = EINVAL;
1149
1150 if (out_vpp == NULL || in_vp == NULL) {
1151 goto end;
1152 }
1153
1154 struct vfsstatfs * sp = NULL;
1155 mount_t mp = vnode_mount(in_vp);
1156
1157 sp = vfs_statfs(mp);
1158 //If this isn't a nullfs vnode or it is but it's a special vnode
1159 if (strcmp(sp->f_fstypename, "nullfs") != 0 || nullfs_checkspecialvp(in_vp)) {
1160 *out_vpp = NULLVP;
1161 result = ENOENT;
1162 goto end;
1163 }
1164
1165 vnode_t lvp = NULLVPTOLOWERVP(in_vp);
1166 if ((result = vnode_getwithvid(lvp, NULLVPTOLOWERVID(in_vp)))) {
1167 goto end;
1168 }
1169
1170 *out_vpp = lvp;
1171
1172 end:
1173 return result;
1174 }