]> git.saurik.com Git - apple/xnu.git/blob - bsd/miscfs/nullfs/null_vnops.c
xnu-7195.50.7.100.1.tar.gz
[apple/xnu.git] / bsd / miscfs / nullfs / null_vnops.c
1 /*
2 * Copyright (c) 2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /*-
25 * Portions Copyright (c) 1992, 1993
26 * The Regents of the University of California. All rights reserved.
27 *
28 * This code is derived from software contributed to Berkeley by
29 * John Heidemann of the UCLA Ficus project.
30 *
31 * Redistribution and use in source and binary forms, with or without
32 * modification, are permitted provided that the following conditions
33 * are met:
34 * 1. Redistributions of source code must retain the above copyright
35 * notice, this list of conditions and the following disclaimer.
36 * 2. Redistributions in binary form must reproduce the above copyright
37 * notice, this list of conditions and the following disclaimer in the
38 * documentation and/or other materials provided with the distribution.
39 * 4. Neither the name of the University nor the names of its contributors
40 * may be used to endorse or promote products derived from this software
41 * without specific prior written permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 *
55 * @(#)null_vnops.c 8.6 (Berkeley) 5/27/95
56 *
57 * Ancestors:
58 * @(#)lofs_vnops.c 1.2 (Berkeley) 6/18/92
59 * ...and...
60 * @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project
61 *
62 * $FreeBSD$
63 */
64
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/conf.h>
68 #include <sys/kernel.h>
69 #include <sys/lock.h>
70 #include <sys/malloc.h>
71 #include <sys/mount.h>
72 #include <sys/mount_internal.h>
73 #include <sys/namei.h>
74 #include <sys/sysctl.h>
75 #include <sys/vnode.h>
76 #include <sys/xattr.h>
77 #include <sys/ubc.h>
78 #include <sys/types.h>
79 #include <sys/dirent.h>
80 #include <sys/kauth.h>
81
82 #include "nullfs.h"
83
84 #define NULL_ROOT_INO 2
85 #define NULL_SECOND_INO 3
86 #define NULL_THIRD_INO 4
87
88 vop_t * nullfs_vnodeop_p = NULL;
89
90 /* the mountpoint lock should be held going into this function */
91 static int
92 nullfs_isspecialvp(struct vnode * vp)
93 {
94 struct null_mount * null_mp;
95
96 null_mp = MOUNTTONULLMOUNT(vnode_mount(vp));
97
98 /* only check for root and second here, third is special in a different way,
99 * related only to lookup and readdir */
100 if (vp && (vp == null_mp->nullm_rootvp || vp == null_mp->nullm_secondvp)) {
101 return 1;
102 }
103 return 0;
104 }
105
106 /* helper function to handle locking where possible */
107 static int
108 nullfs_checkspecialvp(struct vnode* vp)
109 {
110 int result = 0;
111 struct null_mount * null_mp;
112
113 null_mp = MOUNTTONULLMOUNT(vnode_mount(vp));
114
115 lck_mtx_lock(&null_mp->nullm_lock);
116 result = (nullfs_isspecialvp(vp));
117 lck_mtx_unlock(&null_mp->nullm_lock);
118
119 return result;
120 }
121
122 vfs_context_t
123 nullfs_get_patched_context(struct null_mount * null_mp, vfs_context_t ctx)
124 {
125 struct vfs_context* ectx = ctx;
126 if ((null_mp->nullm_flags & NULLM_UNVEIL) == NULLM_UNVEIL) {
127 ectx = vfs_context_create(ctx);
128 ectx->vc_ucred = kauth_cred_setuidgid(ectx->vc_ucred, null_mp->uid, null_mp->gid);
129 }
130 return ectx;
131 }
132
133 void
134 nullfs_cleanup_patched_context(struct null_mount * null_mp, vfs_context_t ctx)
135 {
136 if ((null_mp->nullm_flags & NULLM_UNVEIL) == NULLM_UNVEIL) {
137 vfs_context_rele(ctx);
138 }
139 }
140
141 static int
142 nullfs_default(__unused struct vnop_generic_args * args)
143 {
144 NULLFSDEBUG("%s (default)\n", ((struct vnodeop_desc_fake *)args->a_desc)->vdesc_name);
145 return ENOTSUP;
146 }
147
148 static int
149 nullfs_special_getattr(struct vnop_getattr_args * args)
150 {
151 mount_t mp = vnode_mount(args->a_vp);
152 struct null_mount * null_mp = MOUNTTONULLMOUNT(mp);
153
154 ino_t ino = NULL_ROOT_INO;
155 struct vnode_attr covered_rootattr;
156 vnode_t checkvp = null_mp->nullm_lowerrootvp;
157 vfs_context_t ectx = nullfs_get_patched_context(null_mp, args->a_context);
158
159 VATTR_INIT(&covered_rootattr);
160 VATTR_WANTED(&covered_rootattr, va_uid);
161 VATTR_WANTED(&covered_rootattr, va_gid);
162 VATTR_WANTED(&covered_rootattr, va_create_time);
163 VATTR_WANTED(&covered_rootattr, va_modify_time);
164 VATTR_WANTED(&covered_rootattr, va_access_time);
165
166 /* prefer to get this from the lower root vp, but if not (i.e. forced unmount
167 * of lower fs) try the mount point covered vnode */
168 if (vnode_getwithvid(checkvp, null_mp->nullm_lowerrootvid)) {
169 checkvp = vfs_vnodecovered(mp);
170 if (checkvp == NULL) {
171 nullfs_cleanup_patched_context(null_mp, ectx);
172 return EIO;
173 }
174 }
175
176 int error = vnode_getattr(checkvp, &covered_rootattr, ectx);
177
178 vnode_put(checkvp);
179 if (error) {
180 /* we should have been able to get attributes fore one of the two choices so
181 * fail if we didn't */
182 nullfs_cleanup_patched_context(null_mp, ectx);
183 return error;
184 }
185
186 /* we got the attributes of the vnode we cover so plow ahead */
187 if (args->a_vp == null_mp->nullm_secondvp) {
188 ino = NULL_SECOND_INO;
189 }
190
191 VATTR_RETURN(args->a_vap, va_type, vnode_vtype(args->a_vp));
192 VATTR_RETURN(args->a_vap, va_rdev, 0);
193 VATTR_RETURN(args->a_vap, va_nlink, 3); /* always just ., .., and the child */
194 VATTR_RETURN(args->a_vap, va_total_size, 0); // hoping this is ok
195
196 VATTR_RETURN(args->a_vap, va_data_size, 0); // hoping this is ok
197 VATTR_RETURN(args->a_vap, va_data_alloc, 0);
198 VATTR_RETURN(args->a_vap, va_iosize, vfs_statfs(mp)->f_iosize);
199 VATTR_RETURN(args->a_vap, va_fileid, ino);
200 VATTR_RETURN(args->a_vap, va_linkid, ino);
201 if (VATTR_IS_ACTIVE(args->a_vap, va_fsid)) {
202 VATTR_RETURN(args->a_vap, va_fsid, vfs_statfs(mp)->f_fsid.val[0]); // return the fsid of the mount point
203 }
204 if (VATTR_IS_ACTIVE(args->a_vap, va_fsid64)) {
205 VATTR_RETURN(args->a_vap, va_fsid64, vfs_statfs(mp)->f_fsid);
206 }
207 VATTR_RETURN(args->a_vap, va_filerev, 0);
208 VATTR_RETURN(args->a_vap, va_gen, 0);
209 VATTR_RETURN(args->a_vap, va_flags, UF_HIDDEN); /* mark our fake directories as hidden. People
210 * shouldn't be enocouraged to poke around in them */
211
212 if (ino == NULL_SECOND_INO) {
213 VATTR_RETURN(args->a_vap, va_parentid, NULL_ROOT_INO); /* no parent at the root, so
214 * the only other vnode that
215 * goes through this path is
216 * second and its parent is
217 * 1.*/
218 }
219
220 if (VATTR_IS_ACTIVE(args->a_vap, va_mode)) {
221 /* force dr_xr_xr_x */
222 VATTR_RETURN(args->a_vap, va_mode, S_IFDIR | S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
223 }
224 if (VATTR_IS_ACTIVE(args->a_vap, va_uid)) {
225 VATTR_RETURN(args->a_vap, va_uid, covered_rootattr.va_uid);
226 }
227 if (VATTR_IS_ACTIVE(args->a_vap, va_gid)) {
228 VATTR_RETURN(args->a_vap, va_gid, covered_rootattr.va_gid);
229 }
230
231 if (VATTR_IS_ACTIVE(args->a_vap, va_create_time)) {
232 VATTR_SET_SUPPORTED(args->a_vap, va_create_time);
233 args->a_vap->va_create_time.tv_sec = covered_rootattr.va_create_time.tv_sec;
234 args->a_vap->va_create_time.tv_nsec = covered_rootattr.va_create_time.tv_nsec;
235 }
236 if (VATTR_IS_ACTIVE(args->a_vap, va_modify_time)) {
237 VATTR_SET_SUPPORTED(args->a_vap, va_modify_time);
238 args->a_vap->va_modify_time.tv_sec = covered_rootattr.va_modify_time.tv_sec;
239 args->a_vap->va_modify_time.tv_nsec = covered_rootattr.va_modify_time.tv_nsec;
240 }
241 if (VATTR_IS_ACTIVE(args->a_vap, va_access_time)) {
242 VATTR_SET_SUPPORTED(args->a_vap, va_access_time);
243 args->a_vap->va_modify_time.tv_sec = covered_rootattr.va_access_time.tv_sec;
244 args->a_vap->va_modify_time.tv_nsec = covered_rootattr.va_access_time.tv_nsec;
245 }
246
247 nullfs_cleanup_patched_context(null_mp, ectx);
248 return 0;
249 }
250
251 static int
252 nullfs_getattr(struct vnop_getattr_args * args)
253 {
254 int error;
255 struct null_mount * null_mp = MOUNTTONULLMOUNT(vnode_mount(args->a_vp));
256 kauth_cred_t cred = vfs_context_ucred(args->a_context);
257 NULLFSDEBUG("%s %p\n", __FUNCTION__, args->a_vp);
258
259 lck_mtx_lock(&null_mp->nullm_lock);
260 if (nullfs_isspecialvp(args->a_vp)) {
261 error = nullfs_special_getattr(args);
262 lck_mtx_unlock(&null_mp->nullm_lock);
263 return error;
264 }
265 lck_mtx_unlock(&null_mp->nullm_lock);
266
267 /* this will return a different inode for third than read dir will */
268 struct vnode * lowervp = NULLVPTOLOWERVP(args->a_vp);
269 vfs_context_t ectx = nullfs_get_patched_context(null_mp, args->a_context);
270 error = vnode_getwithref(lowervp);
271
272 if (error == 0) {
273 error = VNOP_GETATTR(lowervp, args->a_vap, ectx);
274 vnode_put(lowervp);
275
276 if (error == 0) {
277 /* fix up fsid so it doesn't say the underlying fs*/
278 if (VATTR_IS_ACTIVE(args->a_vap, va_fsid)) {
279 VATTR_RETURN(args->a_vap, va_fsid, vfs_statfs(vnode_mount(args->a_vp))->f_fsid.val[0]);
280 }
281 if (VATTR_IS_ACTIVE(args->a_vap, va_fsid64)) {
282 VATTR_RETURN(args->a_vap, va_fsid64, vfs_statfs(vnode_mount(args->a_vp))->f_fsid);
283 }
284
285 /* Conjure up permissions */
286 if ((null_mp->nullm_flags & NULLM_UNVEIL) == NULLM_UNVEIL) {
287 if (VATTR_IS_ACTIVE(args->a_vap, va_mode)) {
288 mode_t mode = args->a_vap->va_mode; // We will take away permisions if we don't have them
289
290 // Check for authorizations
291 // If we can read:
292 if (vnode_authorize(lowervp, NULL, KAUTH_VNODE_GENERIC_READ_BITS, ectx) == 0) {
293 mode |= S_IRUSR;
294 } else {
295 mode &= ~S_IRUSR;
296 }
297
298 // Or execute
299 // Directories need an execute bit...
300 if (vnode_authorize(lowervp, NULL, KAUTH_VNODE_GENERIC_EXECUTE_BITS, ectx) == 0) {
301 mode |= S_IXUSR;
302 } else {
303 mode &= ~S_IXUSR;
304 }
305
306 NULLFSDEBUG("Settings bits to %d\n", mode);
307 VATTR_RETURN(args->a_vap, va_mode, mode);
308 }
309 if (VATTR_IS_ACTIVE(args->a_vap, va_uid)) {
310 VATTR_RETURN(args->a_vap, va_uid, kauth_cred_getuid(cred));
311 }
312 if (VATTR_IS_ACTIVE(args->a_vap, va_gid)) {
313 VATTR_RETURN(args->a_vap, va_gid, kauth_cred_getgid(cred));
314 }
315 }
316 }
317 }
318
319 nullfs_cleanup_patched_context(null_mp, ectx);
320 return error;
321 }
322
323 static int
324 nullfs_open(struct vnop_open_args * args)
325 {
326 int error;
327 struct vnode *vp, *lvp;
328 mount_t mp = vnode_mount(args->a_vp);
329 struct null_mount * null_mp = MOUNTTONULLMOUNT(mp);
330 NULLFSDEBUG("%s %p\n", __FUNCTION__, args->a_vp);
331
332 if (nullfs_checkspecialvp(args->a_vp)) {
333 return 0; /* nothing extra needed */
334 }
335
336 vfs_context_t ectx = nullfs_get_patched_context(null_mp, args->a_context);
337 vp = args->a_vp;
338 lvp = NULLVPTOLOWERVP(vp);
339 error = vnode_getwithref(lvp);
340 if (error == 0) {
341 error = VNOP_OPEN(lvp, args->a_mode, ectx);
342 vnode_put(lvp);
343 }
344
345 nullfs_cleanup_patched_context(null_mp, ectx);
346 return error;
347 }
348
349 static int
350 nullfs_close(struct vnop_close_args * args)
351 {
352 int error;
353 struct vnode *vp, *lvp;
354 mount_t mp = vnode_mount(args->a_vp);
355 struct null_mount * null_mp = MOUNTTONULLMOUNT(mp);
356
357 NULLFSDEBUG("%s %p\n", __FUNCTION__, args->a_vp);
358
359 if (nullfs_checkspecialvp(args->a_vp)) {
360 return 0; /* nothing extra needed */
361 }
362
363 vfs_context_t ectx = nullfs_get_patched_context(null_mp, args->a_context);
364 vp = args->a_vp;
365 lvp = NULLVPTOLOWERVP(vp);
366
367 error = vnode_getwithref(lvp);
368 if (error == 0) {
369 error = VNOP_CLOSE(lvp, args->a_fflag, ectx);
370 vnode_put(lvp);
371 }
372
373 nullfs_cleanup_patched_context(null_mp, ectx);
374 return error;
375 }
376
377 /* get lvp's parent, if possible, even if it isn't set.
378 *
379 * lvp is expected to have an iocount before and after this call.
380 *
381 * if a dvpp is populated the returned vnode has an iocount. */
382 static int
383 null_get_lowerparent(vnode_t lvp, vnode_t * dvpp, vfs_context_t ctx)
384 {
385 int error = 0;
386 struct vnode_attr va;
387 mount_t mp = vnode_mount(lvp);
388 vnode_t dvp = vnode_parent(lvp);
389
390 if (dvp) {
391 error = vnode_get(dvp);
392 goto end;
393 }
394
395 error = ENOENT;
396 if (!(mp->mnt_kern_flag & MNTK_PATH_FROM_ID)) {
397 goto end;
398 }
399
400 VATTR_INIT(&va);
401 VATTR_WANTED(&va, va_parentid);
402
403 error = vnode_getattr(lvp, &va, ctx);
404
405 if (error || !VATTR_IS_SUPPORTED(&va, va_parentid)) {
406 goto end;
407 }
408
409 error = VFS_VGET(mp, (ino64_t)va.va_parentid, &dvp, ctx);
410
411 end:
412 if (error == 0) {
413 *dvpp = dvp;
414 }
415 return error;
416 }
417
418 /* the mountpoint lock should be held going into this function */
419 static int
420 null_special_lookup(struct vnop_lookup_args * ap)
421 {
422 struct componentname * cnp = ap->a_cnp;
423 struct vnode * dvp = ap->a_dvp;
424 struct vnode * ldvp = NULL;
425 struct vnode * lvp = NULL;
426 struct vnode * vp = NULL;
427 struct mount * mp = vnode_mount(dvp);
428 struct null_mount * null_mp = MOUNTTONULLMOUNT(mp);
429 int error = ENOENT;
430 vfs_context_t ectx = nullfs_get_patched_context(null_mp, ap->a_context);
431
432 if (dvp == null_mp->nullm_rootvp) {
433 /* handle . and .. */
434 if (cnp->cn_nameptr[0] == '.') {
435 if (cnp->cn_namelen == 1 || (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.')) {
436 /* this is the root so both . and .. give back the root */
437 vp = dvp;
438 error = vnode_get(vp);
439 goto end;
440 }
441 }
442
443 /* our virtual wrapper directory should be d but D is acceptable if the
444 * lower file system is case insensitive */
445 if (cnp->cn_namelen == 1 &&
446 (cnp->cn_nameptr[0] == 'd' || (null_mp->nullm_flags & NULLM_CASEINSENSITIVE ? cnp->cn_nameptr[0] == 'D' : 0))) {
447 error = 0;
448 if (null_mp->nullm_secondvp == NULL) {
449 error = null_getnewvnode(mp, NULL, dvp, &vp, cnp, 0);
450 if (error) {
451 goto end;
452 }
453
454 null_mp->nullm_secondvp = vp;
455 } else {
456 vp = null_mp->nullm_secondvp;
457 error = vnode_get(vp);
458 }
459 }
460 } else if (dvp == null_mp->nullm_secondvp) {
461 /* handle . and .. */
462 if (cnp->cn_nameptr[0] == '.') {
463 if (cnp->cn_namelen == 1) {
464 vp = dvp;
465 error = vnode_get(vp);
466 goto end;
467 } else if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
468 /* parent here is the root vp */
469 vp = null_mp->nullm_rootvp;
470 error = vnode_get(vp);
471 goto end;
472 }
473 }
474 /* nullmp->nullm_lowerrootvp was set at mount time so don't need to lock to
475 * access it */
476 /* v_name should be null terminated but cn_nameptr is not necessarily.
477 * cn_namelen is the number of characters before the null in either case */
478 error = vnode_getwithvid(null_mp->nullm_lowerrootvp, null_mp->nullm_lowerrootvid);
479 if (error) {
480 goto end;
481 }
482
483 /* We don't want to mess with case insensitivity and unicode, so the plan to
484 * check here is
485 * 1. try to get the lower root's parent
486 * 2. If we get a parent, then perform a lookup on the lower file system
487 * using the parent and the passed in cnp
488 * 3. If that worked and we got a vp, then see if the vp is lowerrootvp. If
489 * so we got a match
490 * 4. Anything else results in ENOENT.
491 */
492 error = null_get_lowerparent(null_mp->nullm_lowerrootvp, &ldvp, ectx);
493
494 if (error == 0) {
495 error = VNOP_LOOKUP(ldvp, &lvp, cnp, ectx);
496 vnode_put(ldvp);
497
498 if (error == 0) {
499 if (lvp == null_mp->nullm_lowerrootvp) {
500 /* always check the hashmap for a vnode for this, the root of the
501 * mirrored system */
502 error = null_nodeget(mp, lvp, dvp, &vp, cnp, 0);
503
504 if (error == 0 && null_mp->nullm_thirdcovervp == NULL) {
505 /* if nodeget succeeded then vp has an iocount*/
506 null_mp->nullm_thirdcovervp = vp;
507 }
508 } else {
509 error = ENOENT;
510 }
511 vnode_put(lvp);
512 }
513 }
514 vnode_put(null_mp->nullm_lowerrootvp);
515 }
516
517 end:
518 nullfs_cleanup_patched_context(null_mp, ectx);
519 if (error == 0) {
520 *ap->a_vpp = vp;
521 }
522 return error;
523 }
524
525 /*
526 * We have to carry on the locking protocol on the null layer vnodes
527 * as we progress through the tree. We also have to enforce read-only
528 * if this layer is mounted read-only.
529 */
530 static int
531 null_lookup(struct vnop_lookup_args * ap)
532 {
533 struct componentname * cnp = ap->a_cnp;
534 struct vnode * dvp = ap->a_dvp;
535 struct vnode *vp, *ldvp, *lvp;
536 struct mount * mp;
537 struct null_mount * null_mp;
538 int error;
539 vfs_context_t ectx;
540
541 NULLFSDEBUG("%s parent: %p component: %.*s\n", __FUNCTION__, ap->a_dvp, cnp->cn_namelen, cnp->cn_nameptr);
542
543 mp = vnode_mount(dvp);
544 /* rename and delete are not allowed. this is a read only file system */
545 if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME || cnp->cn_nameiop == CREATE) {
546 return EROFS;
547 }
548 null_mp = MOUNTTONULLMOUNT(mp);
549
550
551 lck_mtx_lock(&null_mp->nullm_lock);
552 if (nullfs_isspecialvp(dvp)) {
553 error = null_special_lookup(ap);
554 lck_mtx_unlock(&null_mp->nullm_lock);
555 return error;
556 }
557 lck_mtx_unlock(&null_mp->nullm_lock);
558
559 // . and .. handling
560 if (cnp->cn_nameptr[0] == '.') {
561 if (cnp->cn_namelen == 1) {
562 vp = dvp;
563 } else if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
564 /* mount point crossing is handled in null_special_lookup */
565 vp = vnode_parent(dvp);
566 } else {
567 goto notdot;
568 }
569
570 error = vp ? vnode_get(vp) : ENOENT;
571
572 if (error == 0) {
573 *ap->a_vpp = vp;
574 }
575
576 return error;
577 }
578
579 notdot:
580 ectx = nullfs_get_patched_context(null_mp, ap->a_context);
581 ldvp = NULLVPTOLOWERVP(dvp);
582 vp = lvp = NULL;
583
584 /*
585 * Hold ldvp. The reference on it, owned by dvp, is lost in
586 * case of dvp reclamation.
587 */
588 error = vnode_getwithref(ldvp);
589 if (error) {
590 nullfs_cleanup_patched_context(null_mp, ectx);
591 return error;
592 }
593
594 error = VNOP_LOOKUP(ldvp, &lvp, cnp, ectx);
595
596 vnode_put(ldvp);
597
598 if ((error == 0 || error == EJUSTRETURN) && lvp != NULL) {
599 if (ldvp == lvp) {
600 vp = dvp;
601 error = vnode_get(vp);
602 } else {
603 error = null_nodeget(mp, lvp, dvp, &vp, cnp, 0);
604 }
605 if (error == 0) {
606 *ap->a_vpp = vp;
607 }
608 }
609
610 /* if we got lvp, drop the iocount from VNOP_LOOKUP */
611 if (lvp != NULL) {
612 vnode_put(lvp);
613 }
614
615 nullfs_cleanup_patched_context(null_mp, ectx);
616 return error;
617 }
618
619 /*
620 * Don't think this needs to do anything
621 */
622 static int
623 null_inactive(__unused struct vnop_inactive_args * ap)
624 {
625 NULLFSDEBUG("%s %p\n", __FUNCTION__, ap->a_vp);
626
627 return 0;
628 }
629
630 static int
631 null_reclaim(struct vnop_reclaim_args * ap)
632 {
633 struct vnode * vp;
634 struct null_node * xp;
635 struct vnode * lowervp;
636 struct null_mount * null_mp = MOUNTTONULLMOUNT(vnode_mount(ap->a_vp));
637
638 NULLFSDEBUG("%s %p\n", __FUNCTION__, ap->a_vp);
639
640 vp = ap->a_vp;
641
642 xp = VTONULL(vp);
643 lowervp = xp->null_lowervp;
644
645 lck_mtx_lock(&null_mp->nullm_lock);
646
647 vnode_removefsref(vp);
648
649 if (lowervp != NULL) {
650 /* root and second don't have a lowervp, so nothing to release and nothing
651 * got hashed */
652 if (xp->null_flags & NULL_FLAG_HASHED) {
653 /* only call this if we actually made it into the hash list. reclaim gets
654 * called also to
655 * clean up a vnode that got created when it didn't need to under race
656 * conditions */
657 null_hashrem(xp);
658 }
659 vnode_getwithref(lowervp);
660 vnode_rele(lowervp);
661 vnode_put(lowervp);
662 }
663
664 if (vp == null_mp->nullm_rootvp) {
665 null_mp->nullm_rootvp = NULL;
666 } else if (vp == null_mp->nullm_secondvp) {
667 null_mp->nullm_secondvp = NULL;
668 } else if (vp == null_mp->nullm_thirdcovervp) {
669 null_mp->nullm_thirdcovervp = NULL;
670 }
671
672 lck_mtx_unlock(&null_mp->nullm_lock);
673
674 cache_purge(vp);
675 vnode_clearfsnode(vp);
676
677 FREE(xp, M_TEMP);
678
679 return 0;
680 }
681
682 #define DIRENT_SZ(dp) ((sizeof(struct dirent) - NAME_MAX) + (((dp)->d_namlen + 1 + 3) & ~3))
683
684 static int
685 store_entry_special(ino_t ino, const char * name, struct uio * uio)
686 {
687 struct dirent e;
688 size_t namelen = strlen(name);
689 int error = EINVAL;
690
691 if (namelen + 1 <= NAME_MAX) {
692 memset(&e, 0, sizeof(e));
693
694 e.d_ino = ino;
695 e.d_type = DT_DIR;
696
697 e.d_namlen = namelen; /* don't include NUL */
698 e.d_reclen = DIRENT_SZ(&e);
699 if (uio_resid(uio) >= e.d_reclen) {
700 strlcpy(e.d_name, name, NAME_MAX);
701 error = uiomove((caddr_t)&e, e.d_reclen, uio);
702 } else {
703 error = EMSGSIZE;
704 }
705 }
706 return error;
707 }
708
709 static int
710 nullfs_special_readdir(struct vnop_readdir_args * ap)
711 {
712 struct vnode * vp = ap->a_vp;
713 struct uio * uio = ap->a_uio;
714 struct null_mount * null_mp = MOUNTTONULLMOUNT(vnode_mount(vp));
715 off_t offset = uio_offset(uio);
716 int error = ERANGE;
717 int items = 0;
718 ino_t ino = 0;
719 const char * name = NULL;
720
721 if (ap->a_flags & (VNODE_READDIR_EXTENDED | VNODE_READDIR_REQSEEKOFF)) {
722 return EINVAL;
723 }
724
725 if (offset == 0) {
726 /* . case */
727 if (vp == null_mp->nullm_rootvp) {
728 ino = NULL_ROOT_INO;
729 } else { /* only get here if vp matches nullm_rootvp or nullm_secondvp */
730 ino = NULL_SECOND_INO;
731 }
732 error = store_entry_special(ino, ".", uio);
733 if (error) {
734 goto out;
735 }
736 offset++;
737 items++;
738 }
739 if (offset == 1) {
740 /* .. case */
741 /* only get here if vp matches nullm_rootvp or nullm_secondvp */
742 ino = NULL_ROOT_INO;
743
744 error = store_entry_special(ino, "..", uio);
745 if (error) {
746 goto out;
747 }
748 offset++;
749 items++;
750 }
751 if (offset == 2) {
752 /* the directory case */
753 if (vp == null_mp->nullm_rootvp) {
754 ino = NULL_SECOND_INO;
755 name = "d";
756 } else { /* only get here if vp matches nullm_rootvp or nullm_secondvp */
757 ino = NULL_THIRD_INO;
758 if (vnode_getwithvid(null_mp->nullm_lowerrootvp, null_mp->nullm_lowerrootvid)) {
759 /* In this case the lower file system has been ripped out from under us,
760 * but we don't want to error out
761 * Instead we just want d to look empty. */
762 error = 0;
763 goto out;
764 }
765 name = vnode_getname_printable(null_mp->nullm_lowerrootvp);
766 }
767 error = store_entry_special(ino, name, uio);
768
769 if (ino == NULL_THIRD_INO) {
770 vnode_putname_printable(name);
771 vnode_put(null_mp->nullm_lowerrootvp);
772 }
773
774 if (error) {
775 goto out;
776 }
777 offset++;
778 items++;
779 }
780
781 out:
782 if (error == EMSGSIZE) {
783 error = 0; /* return success if we ran out of space, but we wanted to make
784 * sure that we didn't update offset and items incorrectly */
785 }
786 uio_setoffset(uio, offset);
787 if (ap->a_numdirent) {
788 *ap->a_numdirent = items;
789 }
790 return error;
791 }
792
793 static int
794 nullfs_readdir(struct vnop_readdir_args * ap)
795 {
796 struct vnode *vp, *lvp;
797 int error;
798 struct null_mount * null_mp = MOUNTTONULLMOUNT(vnode_mount(ap->a_vp));
799
800 NULLFSDEBUG("%s %p\n", __FUNCTION__, ap->a_vp);
801 /* assumption is that any vp that comes through here had to go through lookup
802 */
803
804 lck_mtx_lock(&null_mp->nullm_lock);
805 if (nullfs_isspecialvp(ap->a_vp)) {
806 error = nullfs_special_readdir(ap);
807 lck_mtx_unlock(&null_mp->nullm_lock);
808 return error;
809 }
810 lck_mtx_unlock(&null_mp->nullm_lock);
811
812 vfs_context_t ectx = nullfs_get_patched_context(null_mp, ap->a_context);
813 vp = ap->a_vp;
814 lvp = NULLVPTOLOWERVP(vp);
815 error = vnode_getwithref(lvp);
816 if (error == 0) {
817 error = VNOP_READDIR(lvp, ap->a_uio, ap->a_flags, ap->a_eofflag, ap->a_numdirent, ectx);
818 vnode_put(lvp);
819 }
820
821 nullfs_cleanup_patched_context(null_mp, ectx);
822 return error;
823 }
824
825 static int
826 nullfs_readlink(struct vnop_readlink_args * ap)
827 {
828 NULLFSDEBUG("%s %p\n", __FUNCTION__, ap->a_vp);
829 int error;
830 struct vnode *vp, *lvp;
831 struct null_mount * null_mp = MOUNTTONULLMOUNT(vnode_mount(ap->a_vp));
832
833 if (nullfs_checkspecialvp(ap->a_vp)) {
834 return ENOTSUP; /* the special vnodes aren't links */
835 }
836
837 vfs_context_t ectx = nullfs_get_patched_context(null_mp, ap->a_context);
838 vp = ap->a_vp;
839 lvp = NULLVPTOLOWERVP(vp);
840
841 error = vnode_getwithref(lvp);
842 if (error == 0) {
843 error = VNOP_READLINK(lvp, ap->a_uio, ectx);
844 vnode_put(lvp);
845
846 if (error) {
847 NULLFSDEBUG("readlink failed: %d\n", error);
848 }
849 }
850
851 nullfs_cleanup_patched_context(null_mp, ectx);
852 return error;
853 }
854
855 static int
856 nullfs_pathconf(__unused struct vnop_pathconf_args * args)
857 {
858 NULLFSDEBUG("%s %p\n", __FUNCTION__, args->a_vp);
859 return EINVAL;
860 }
861
862 static int
863 nullfs_fsync(__unused struct vnop_fsync_args * args)
864 {
865 NULLFSDEBUG("%s %p\n", __FUNCTION__, args->a_vp);
866 return 0;
867 }
868
869 static int
870 nullfs_mmap(struct vnop_mmap_args * args)
871 {
872 int error;
873 struct vnode *vp, *lvp;
874 struct null_mount * null_mp = MOUNTTONULLMOUNT(vnode_mount(args->a_vp));
875
876 NULLFSDEBUG("%s %p\n", __FUNCTION__, args->a_vp);
877
878 if (nullfs_checkspecialvp(args->a_vp)) {
879 return 0; /* nothing extra needed */
880 }
881
882 vfs_context_t ectx = nullfs_get_patched_context(null_mp, args->a_context);
883 vp = args->a_vp;
884 lvp = NULLVPTOLOWERVP(vp);
885 error = vnode_getwithref(lvp);
886 if (error == 0) {
887 error = VNOP_MMAP(lvp, args->a_fflags, ectx);
888 vnode_put(lvp);
889 }
890
891 nullfs_cleanup_patched_context(null_mp, ectx);
892 return error;
893 }
894
895 static int
896 nullfs_mnomap(struct vnop_mnomap_args * args)
897 {
898 int error;
899 struct vnode *vp, *lvp;
900 struct null_mount * null_mp = MOUNTTONULLMOUNT(vnode_mount(args->a_vp));
901
902 NULLFSDEBUG("%s %p\n", __FUNCTION__, args->a_vp);
903
904 if (nullfs_checkspecialvp(args->a_vp)) {
905 return 0; /* nothing extra needed */
906 }
907
908 vfs_context_t ectx = nullfs_get_patched_context(null_mp, args->a_context);
909 vp = args->a_vp;
910 lvp = NULLVPTOLOWERVP(vp);
911 error = vnode_getwithref(lvp);
912 if (error == 0) {
913 error = VNOP_MNOMAP(lvp, ectx);
914 vnode_put(lvp);
915 }
916
917 nullfs_cleanup_patched_context(null_mp, ectx);
918 return error;
919 }
920
921 static int
922 nullfs_getxattr(struct vnop_getxattr_args * args)
923 {
924 int error;
925 struct vnode *vp, *lvp;
926 struct null_mount * null_mp = MOUNTTONULLMOUNT(vnode_mount(args->a_vp));
927
928 NULLFSDEBUG("%s %p\n", __FUNCTION__, args->a_vp);
929
930 if (nullfs_checkspecialvp(args->a_vp)) {
931 return ENOATTR; /* no xattrs on the special vnodes */
932 }
933
934 vfs_context_t ectx = nullfs_get_patched_context(null_mp, args->a_context);
935 vp = args->a_vp;
936 lvp = NULLVPTOLOWERVP(vp);
937 error = vnode_getwithref(lvp);
938 if (error == 0) {
939 error = VNOP_GETXATTR(lvp, args->a_name, args->a_uio, args->a_size, args->a_options, ectx);
940 vnode_put(lvp);
941 }
942
943 nullfs_cleanup_patched_context(null_mp, ectx);
944 return error;
945 }
946
947 static int
948 nullfs_listxattr(struct vnop_listxattr_args * args)
949 {
950 int error;
951 struct vnode *vp, *lvp;
952 struct null_mount * null_mp = MOUNTTONULLMOUNT(vnode_mount(args->a_vp));
953
954 NULLFSDEBUG("%s %p\n", __FUNCTION__, args->a_vp);
955
956 if (nullfs_checkspecialvp(args->a_vp)) {
957 return 0; /* no xattrs on the special vnodes */
958 }
959
960 vfs_context_t ectx = nullfs_get_patched_context(null_mp, args->a_context);
961 vp = args->a_vp;
962 lvp = NULLVPTOLOWERVP(vp);
963 error = vnode_getwithref(lvp);
964 if (error == 0) {
965 error = VNOP_LISTXATTR(lvp, args->a_uio, args->a_size, args->a_options, ectx);
966 vnode_put(lvp);
967 }
968
969 nullfs_cleanup_patched_context(null_mp, ectx);
970 return error;
971 }
972
973 /* relies on v1 paging */
974 static int
975 nullfs_pagein(struct vnop_pagein_args * ap)
976 {
977 int error = EIO;
978 struct vnode *vp, *lvp;
979 struct null_mount * null_mp = MOUNTTONULLMOUNT(vnode_mount(ap->a_vp));
980 NULLFSDEBUG("%s %p\n", __FUNCTION__, ap->a_vp);
981
982 vp = ap->a_vp;
983 lvp = NULLVPTOLOWERVP(vp);
984
985 if (vnode_vtype(vp) != VREG) {
986 return ENOTSUP;
987 }
988
989 vfs_context_t ectx = nullfs_get_patched_context(null_mp, ap->a_context);
990 /*
991 * Ask VM/UBC/VFS to do our bidding
992 */
993 if (vnode_getwithvid(lvp, NULLVPTOLOWERVID(vp)) == 0) {
994 vm_offset_t ioaddr;
995 uio_t auio;
996 kern_return_t kret;
997 off_t bytes_to_commit;
998 off_t lowersize;
999 upl_t upl = ap->a_pl;
1000 user_ssize_t bytes_remaining = 0;
1001
1002 auio = uio_create(1, ap->a_f_offset, UIO_SYSSPACE, UIO_READ);
1003 if (auio == NULL) {
1004 error = EIO;
1005 goto exit_no_unmap;
1006 }
1007
1008 kret = ubc_upl_map(upl, &ioaddr);
1009 if (KERN_SUCCESS != kret) {
1010 panic("nullfs_pagein: ubc_upl_map() failed with (%d)", kret);
1011 }
1012
1013 ioaddr += ap->a_pl_offset;
1014
1015 error = uio_addiov(auio, (user_addr_t)ioaddr, ap->a_size);
1016 if (error) {
1017 goto exit;
1018 }
1019
1020 lowersize = ubc_getsize(lvp);
1021 if (lowersize != ubc_getsize(vp)) {
1022 (void)ubc_setsize(vp, lowersize); /* ignore failures, nothing can be done */
1023 }
1024
1025 error = VNOP_READ(lvp, auio, ((ap->a_flags & UPL_IOSYNC) ? IO_SYNC : 0), ectx);
1026
1027 bytes_remaining = uio_resid(auio);
1028 if (bytes_remaining > 0 && bytes_remaining <= (user_ssize_t)ap->a_size) {
1029 /* zero bytes that weren't read in to the upl */
1030 bzero((void*)((uintptr_t)(ioaddr + ap->a_size - bytes_remaining)), (size_t) bytes_remaining);
1031 }
1032
1033 exit:
1034 kret = ubc_upl_unmap(upl);
1035 if (KERN_SUCCESS != kret) {
1036 panic("nullfs_pagein: ubc_upl_unmap() failed with (%d)", kret);
1037 }
1038
1039 if (auio != NULL) {
1040 uio_free(auio);
1041 }
1042
1043 exit_no_unmap:
1044 if ((ap->a_flags & UPL_NOCOMMIT) == 0) {
1045 if (!error && (bytes_remaining >= 0) && (bytes_remaining <= (user_ssize_t)ap->a_size)) {
1046 /* only commit what was read in (page aligned)*/
1047 bytes_to_commit = ap->a_size - bytes_remaining;
1048 if (bytes_to_commit) {
1049 /* need to make sure bytes_to_commit and byte_remaining are page aligned before calling ubc_upl_commit_range*/
1050 if (bytes_to_commit & PAGE_MASK) {
1051 bytes_to_commit = (bytes_to_commit & (~PAGE_MASK)) + (PAGE_MASK + 1);
1052 assert(bytes_to_commit <= (off_t)ap->a_size);
1053
1054 bytes_remaining = ap->a_size - bytes_to_commit;
1055 }
1056 ubc_upl_commit_range(upl, ap->a_pl_offset, (upl_size_t)bytes_to_commit, UPL_COMMIT_FREE_ON_EMPTY);
1057 }
1058
1059 /* abort anything thats left */
1060 if (bytes_remaining) {
1061 ubc_upl_abort_range(upl, ap->a_pl_offset + bytes_to_commit, (upl_size_t)bytes_remaining, UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
1062 }
1063 } else {
1064 ubc_upl_abort_range(upl, ap->a_pl_offset, (upl_size_t)ap->a_size, UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
1065 }
1066 }
1067 vnode_put(lvp);
1068 } else if ((ap->a_flags & UPL_NOCOMMIT) == 0) {
1069 ubc_upl_abort_range(ap->a_pl, ap->a_pl_offset, (upl_size_t)ap->a_size, UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
1070 }
1071
1072 nullfs_cleanup_patched_context(null_mp, ectx);
1073 return error;
1074 }
1075
1076 static int
1077 nullfs_read(struct vnop_read_args * ap)
1078 {
1079 int error = EIO;
1080
1081 struct vnode *vp, *lvp;
1082 struct null_mount * null_mp = MOUNTTONULLMOUNT(vnode_mount(ap->a_vp));
1083 NULLFSDEBUG("%s %p\n", __FUNCTION__, ap->a_vp);
1084
1085 if (nullfs_checkspecialvp(ap->a_vp)) {
1086 return ENOTSUP; /* the special vnodes can't be read */
1087 }
1088
1089 vfs_context_t ectx = nullfs_get_patched_context(null_mp, ap->a_context);
1090 vp = ap->a_vp;
1091 lvp = NULLVPTOLOWERVP(vp);
1092
1093 /*
1094 * First some house keeping
1095 */
1096 if (vnode_getwithvid(lvp, NULLVPTOLOWERVID(vp)) == 0) {
1097 if (!vnode_isreg(lvp) && !vnode_islnk(lvp)) {
1098 error = EPERM;
1099 goto end;
1100 }
1101
1102 if (uio_resid(ap->a_uio) == 0) {
1103 error = 0;
1104 goto end;
1105 }
1106
1107 /*
1108 * Now ask VM/UBC/VFS to do our bidding
1109 */
1110
1111 error = VNOP_READ(lvp, ap->a_uio, ap->a_ioflag, ectx);
1112 if (error) {
1113 NULLFSDEBUG("VNOP_READ failed: %d\n", error);
1114 }
1115 end:
1116 vnode_put(lvp);
1117 }
1118
1119 nullfs_cleanup_patched_context(null_mp, ectx);
1120 return error;
1121 }
1122
1123 /*
1124 * Global vfs data structures
1125 */
1126
1127 static const struct vnodeopv_entry_desc nullfs_vnodeop_entries[] = {
1128 {.opve_op = &vnop_default_desc, .opve_impl = (vop_t)nullfs_default}, {.opve_op = &vnop_getattr_desc, .opve_impl = (vop_t)nullfs_getattr},
1129 {.opve_op = &vnop_open_desc, .opve_impl = (vop_t)nullfs_open}, {.opve_op = &vnop_close_desc, .opve_impl = (vop_t)nullfs_close},
1130 {.opve_op = &vnop_inactive_desc, .opve_impl = (vop_t)null_inactive}, {.opve_op = &vnop_reclaim_desc, .opve_impl = (vop_t)null_reclaim},
1131 {.opve_op = &vnop_lookup_desc, .opve_impl = (vop_t)null_lookup}, {.opve_op = &vnop_readdir_desc, .opve_impl = (vop_t)nullfs_readdir},
1132 {.opve_op = &vnop_readlink_desc, .opve_impl = (vop_t)nullfs_readlink}, {.opve_op = &vnop_pathconf_desc, .opve_impl = (vop_t)nullfs_pathconf},
1133 {.opve_op = &vnop_fsync_desc, .opve_impl = (vop_t)nullfs_fsync}, {.opve_op = &vnop_mmap_desc, .opve_impl = (vop_t)nullfs_mmap},
1134 {.opve_op = &vnop_mnomap_desc, .opve_impl = (vop_t)nullfs_mnomap}, {.opve_op = &vnop_getxattr_desc, .opve_impl = (vop_t)nullfs_getxattr},
1135 {.opve_op = &vnop_pagein_desc, .opve_impl = (vop_t)nullfs_pagein}, {.opve_op = &vnop_read_desc, .opve_impl = (vop_t)nullfs_read},
1136 {.opve_op = &vnop_listxattr_desc, .opve_impl = (vop_t)nullfs_listxattr}, {.opve_op = NULL, .opve_impl = NULL},
1137 };
1138
1139 const struct vnodeopv_desc nullfs_vnodeop_opv_desc = {.opv_desc_vector_p = &nullfs_vnodeop_p, .opv_desc_ops = nullfs_vnodeop_entries};
1140
1141 //NULLFS Specific helper function
1142
1143 int
1144 nullfs_getbackingvnode(vnode_t in_vp, vnode_t* out_vpp)
1145 {
1146 int result = EINVAL;
1147
1148 if (out_vpp == NULL || in_vp == NULL) {
1149 goto end;
1150 }
1151
1152 struct vfsstatfs * sp = NULL;
1153 mount_t mp = vnode_mount(in_vp);
1154
1155 sp = vfs_statfs(mp);
1156 //If this isn't a nullfs vnode or it is but it's a special vnode
1157 if (strcmp(sp->f_fstypename, "nullfs") != 0 || nullfs_checkspecialvp(in_vp)) {
1158 *out_vpp = NULLVP;
1159 result = ENOENT;
1160 goto end;
1161 }
1162
1163 vnode_t lvp = NULLVPTOLOWERVP(in_vp);
1164 if ((result = vnode_getwithvid(lvp, NULLVPTOLOWERVID(in_vp)))) {
1165 goto end;
1166 }
1167
1168 *out_vpp = lvp;
1169
1170 end:
1171 return result;
1172 }