]> git.saurik.com Git - apple/xnu.git/blame - bsd/miscfs/bindfs/bind_vfsops.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / miscfs / bindfs / bind_vfsops.c
CommitLineData
f427ee49
A
1/*
2 * Copyright (c) 2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24/*-
25 * Portions Copyright (c) 1992, 1993, 1995
26 * The Regents of the University of California. All rights reserved.
27 *
28 * This code is derived from software donated to Berkeley by
29 * Jan-Simon Pendry.
30 *
31 * Redistribution and use in source and binary forms, with or without
32 * modification, are permitted provided that the following conditions
33 * are met:
34 * 1. Redistributions of source code must retain the above copyright
35 * notice, this list of conditions and the following disclaimer.
36 * 2. Redistributions in binary form must reproduce the above copyright
37 * notice, this list of conditions and the following disclaimer in the
38 * documentation and/or other materials provided with the distribution.
39 * 4. Neither the name of the University nor the names of its contributors
40 * may be used to endorse or promote products derived from this software
41 * without specific prior written permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 *
55 * @(#)null_vfsops.c 8.2 (Berkeley) 1/21/94
56 *
57 * @(#)lofs_vfsops.c 1.2 (Berkeley) 6/18/92
58 * $FreeBSD$
59 */
60
61#include <sys/param.h>
62#include <sys/systm.h>
63#include <sys/fcntl.h>
64#include <sys/kernel.h>
65#include <sys/lock.h>
66#include <sys/malloc.h>
67#include <sys/mount.h>
68#include <sys/mount_internal.h>
69#include <sys/namei.h>
70#include <sys/proc.h>
71#include <sys/vnode.h>
72#include <sys/vnode_internal.h>
73#include <security/mac_internal.h>
74
75#include <sys/param.h>
76
77#include <IOKit/IOBSD.h>
78
79#include "bindfs.h"
80
81#define BINDFS_ENTITLEMENT "com.apple.private.bindfs-allow"
82
83#define SIZEOF_MEMBER(type, member) (sizeof(((type *)0)->member))
84#define MAX_MNT_FROM_LENGTH (SIZEOF_MEMBER(struct vfsstatfs, f_mntfromname))
85
86static int
87bindfs_vfs_getlowerattr(mount_t mp, struct vfs_attr * vfap, vfs_context_t ctx)
88{
89 memset(vfap, 0, sizeof(*vfap));
90 VFSATTR_INIT(vfap);
91 VFSATTR_WANTED(vfap, f_bsize);
92 VFSATTR_WANTED(vfap, f_iosize);
93 VFSATTR_WANTED(vfap, f_blocks);
94 VFSATTR_WANTED(vfap, f_bfree);
95 VFSATTR_WANTED(vfap, f_bavail);
96 VFSATTR_WANTED(vfap, f_bused);
97 VFSATTR_WANTED(vfap, f_files);
98 VFSATTR_WANTED(vfap, f_ffree);
99 VFSATTR_WANTED(vfap, f_capabilities);
100
101 return vfs_getattr(mp, vfap, ctx);
102}
103
104/*
105 * Mount bind layer
106 */
107static int
108bindfs_mount(struct mount * mp, __unused vnode_t devvp, user_addr_t user_data, vfs_context_t ctx)
109{
110 int error = 0;
111 struct vnode *lowerrootvp = NULL, *vp = NULL;
112 struct vfsstatfs * sp = NULL;
113 struct bind_mount * xmp = NULL;
114 char data[MAXPATHLEN];
115 size_t count;
116 struct vfs_attr vfa;
117 /* set defaults (arbitrary since this file system is readonly) */
118 uint32_t bsize = BLKDEV_IOSIZE;
119 size_t iosize = BLKDEV_IOSIZE;
120 uint64_t blocks = 4711 * 4711;
121 uint64_t bfree = 0;
122 uint64_t bavail = 0;
123 uint64_t bused = 4711;
124 uint64_t files = 4711;
125 uint64_t ffree = 0;
126
127 kauth_cred_t cred = vfs_context_ucred(ctx);
128
129 BINDFSDEBUG("mp = %p %llx\n", (void *)mp, vfs_flags(mp));
130
131 if (vfs_flags(mp) & MNT_ROOTFS) {
132 return EOPNOTSUPP;
133 }
134
135 /*
136 * Update is a no-op
137 */
138 if (vfs_isupdate(mp)) {
139 return ENOTSUP;
140 }
141
142 /* check entitlement */
143 if (!IOTaskHasEntitlement(current_task(), BINDFS_ENTITLEMENT)) {
144 return EPERM;
145 }
146
147 /*
148 * Get argument
149 */
150 error = copyinstr(user_data, data, MAXPATHLEN - 1, &count);
151 if (error) {
152 BINDFSERROR("error copying data from user %d\n", error);
153 goto error;
154 }
155
156 /* This could happen if the system is configured for 32 bit inodes instead of
157 * 64 bit */
158 if (count > MAX_MNT_FROM_LENGTH) {
159 error = EINVAL;
160 BINDFSERROR("path to mount too large for this system %zu vs %lu\n", count, MAX_MNT_FROM_LENGTH);
161 goto error;
162 }
163
164 error = vnode_lookup(data, 0, &lowerrootvp, ctx);
165 if (error) {
166 BINDFSERROR("lookup of %s failed error: %d\n", data, error);
167 goto error;
168 }
169
170 /* lowervrootvp has an iocount after vnode_lookup, drop that for a usecount.
171 * Keep this to signal what we want to keep around the thing we are mirroring.
172 * Drop it in unmount.*/
173 error = vnode_ref(lowerrootvp);
174 vnode_put(lowerrootvp);
175 if (error) {
176 // If vnode_ref failed, then bind it out so it can't be used anymore in cleanup.
177 lowerrootvp = NULL;
178 goto error;
179 }
180
181 BINDFSDEBUG("mount %s\n", data);
182
183 MALLOC(xmp, struct bind_mount *, sizeof(*xmp), M_TEMP, M_WAITOK | M_ZERO);
184 if (xmp == NULL) {
185 error = ENOMEM;
186 goto error;
187 }
188
189 /*
190 * Save reference to underlying FS
191 */
192 xmp->bindm_lowerrootvp = lowerrootvp;
193 xmp->bindm_lowerrootvid = vnode_vid(lowerrootvp);
194
195 error = bind_nodeget(mp, lowerrootvp, NULL, &vp, NULL, 1);
196 if (error) {
197 goto error;
198 }
199 /* After bind_nodeget our root vnode is in the hash table and we have to usecounts on lowerrootvp
200 * One use count will get dropped when we reclaim the root during unmount.
201 * The other will get dropped in unmount */
202
203
204 /* vp has an iocount on it from vnode_create. drop that for a usecount. This
205 * is our root vnode so we drop the ref in unmount
206 *
207 * Assuming for now that because we created this vnode and we aren't finished mounting we can get a ref*/
208 vnode_ref(vp);
209 vnode_put(vp);
210
211 xmp->bindm_rootvp = vp;
212
213 /* read the flags the user set, but then ignore some of them, we will only
214 * allow them if they are set on the lower file system */
215 uint64_t flags = vfs_flags(mp) & (~(MNT_IGNORE_OWNERSHIP | MNT_LOCAL));
216 uint64_t lowerflags = vfs_flags(vnode_mount(lowerrootvp)) & (MNT_LOCAL | MNT_QUARANTINE | MNT_IGNORE_OWNERSHIP | MNT_NOEXEC);
217
218 if (lowerflags) {
219 flags |= lowerflags;
220 }
221
222 /* force these flags */
223 flags |= (MNT_DONTBROWSE | MNT_MULTILABEL | MNT_NOSUID | MNT_RDONLY);
224 vfs_setflags(mp, flags);
225
226 vfs_setfsprivate(mp, xmp);
227 vfs_getnewfsid(mp);
228 vfs_setlocklocal(mp);
229
230 /* fill in the stat block */
231 sp = vfs_statfs(mp);
232 strlcpy(sp->f_mntfromname, data, MAX_MNT_FROM_LENGTH);
233
234 sp->f_flags = flags;
235
236 xmp->bindm_flags = BINDM_CASEINSENSITIVE; /* default to case insensitive */
237
238 error = bindfs_vfs_getlowerattr(vnode_mount(lowerrootvp), &vfa, ctx);
239 if (error == 0) {
240 if (VFSATTR_IS_SUPPORTED(&vfa, f_bsize)) {
241 bsize = vfa.f_bsize;
242 }
243 if (VFSATTR_IS_SUPPORTED(&vfa, f_iosize)) {
244 iosize = vfa.f_iosize;
245 }
246 if (VFSATTR_IS_SUPPORTED(&vfa, f_blocks)) {
247 blocks = vfa.f_blocks;
248 }
249 if (VFSATTR_IS_SUPPORTED(&vfa, f_bfree)) {
250 bfree = vfa.f_bfree;
251 }
252 if (VFSATTR_IS_SUPPORTED(&vfa, f_bavail)) {
253 bavail = vfa.f_bavail;
254 }
255 if (VFSATTR_IS_SUPPORTED(&vfa, f_bused)) {
256 bused = vfa.f_bused;
257 }
258 if (VFSATTR_IS_SUPPORTED(&vfa, f_files)) {
259 files = vfa.f_files;
260 }
261 if (VFSATTR_IS_SUPPORTED(&vfa, f_ffree)) {
262 ffree = vfa.f_ffree;
263 }
264 if (VFSATTR_IS_SUPPORTED(&vfa, f_capabilities)) {
265 if ((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & (VOL_CAP_FMT_CASE_SENSITIVE)) &&
266 (vfa.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & (VOL_CAP_FMT_CASE_SENSITIVE))) {
267 xmp->bindm_flags &= ~BINDM_CASEINSENSITIVE;
268 }
269 }
270 } else {
271 goto error;
272 }
273
274 sp->f_bsize = bsize;
275 sp->f_iosize = iosize;
276 sp->f_blocks = blocks;
277 sp->f_bfree = bfree;
278 sp->f_bavail = bavail;
279 sp->f_bused = bused;
280 sp->f_files = files;
281 sp->f_ffree = ffree;
282
283 /* Associate the mac label information from the mirrored filesystem with the
284 * mirror */
285 MAC_PERFORM(mount_label_associate, cred, vnode_mount(lowerrootvp), vfs_mntlabel(mp));
286
287 BINDFSDEBUG("lower %s, alias at %s\n", sp->f_mntfromname, sp->f_mntonname);
288 return 0;
289
290error:
291 if (xmp) {
292 FREE(xmp, M_TEMP);
293 }
294 if (lowerrootvp) {
295 vnode_getwithref(lowerrootvp);
296 vnode_rele(lowerrootvp);
297 vnode_put(lowerrootvp);
298 }
299 if (vp) {
300 /* we made the root vnode but the mount is failed, so clean it up */
301 vnode_getwithref(vp);
302 vnode_rele(vp);
303 /* give vp back */
304 vnode_recycle(vp);
305 vnode_put(vp);
306 }
307 return error;
308}
309
310/*
311 * Free reference to bind layer
312 */
313static int
314bindfs_unmount(struct mount * mp, int mntflags, __unused vfs_context_t ctx)
315{
316 struct bind_mount * mntdata;
317 struct vnode * vp;
318 int error, flags;
319
320 BINDFSDEBUG("mp = %p\n", (void *)mp);
321
322 /* check entitlement or superuser*/
323 if (!IOTaskHasEntitlement(current_task(), BINDFS_ENTITLEMENT) &&
324 vfs_context_suser(ctx) != 0) {
325 return EPERM;
326 }
327
328 if (mntflags & MNT_FORCE) {
329 flags = FORCECLOSE;
330 } else {
331 flags = 0;
332 }
333
334 mntdata = MOUNTTOBINDMOUNT(mp);
335 vp = mntdata->bindm_rootvp;
336
337 // release our reference on the root before flushing.
338 // it will get pulled out of the mount structure by reclaim
339 vnode_getalways(vp);
340
341 error = vflush(mp, vp, flags);
342 if (error) {
343 vnode_put(vp);
344 return error;
345 }
346
347 if (vnode_isinuse(vp, 1) && flags == 0) {
348 vnode_put(vp);
349 return EBUSY;
350 }
351
352 vnode_rele(vp); // Drop reference taken by bindfs_mount
353 vnode_put(vp); // Drop ref taken above
354
355 //Force close to get rid of the last vnode
356 (void)vflush(mp, NULL, FORCECLOSE);
357
358 /* no more vnodes, so tear down the mountpoint */
359
360 vfs_setfsprivate(mp, NULL);
361
362 vnode_getalways(mntdata->bindm_lowerrootvp);
363 vnode_rele(mntdata->bindm_lowerrootvp);
364 vnode_put(mntdata->bindm_lowerrootvp);
365
366 FREE(mntdata, M_TEMP);
367
368 uint64_t vflags = vfs_flags(mp);
369 vfs_setflags(mp, vflags & ~MNT_LOCAL);
370
371 return 0;
372}
373
374static int
375bindfs_root(struct mount * mp, struct vnode ** vpp, __unused vfs_context_t ctx)
376{
377 struct vnode * vp;
378 int error;
379
380 BINDFSDEBUG("mp = %p, vp = %p\n", (void *)mp, (void *)MOUNTTOBINDMOUNT(mp)->bindm_rootvp);
381
382 /*
383 * Return locked reference to root.
384 */
385 vp = MOUNTTOBINDMOUNT(mp)->bindm_rootvp;
386
387 error = vnode_get(vp);
388 if (error) {
389 return error;
390 }
391
392 *vpp = vp;
393 return 0;
394}
395
396static int
397bindfs_vfs_getattr(struct mount * mp, struct vfs_attr * vfap, vfs_context_t ctx)
398{
399 struct vnode * coveredvp = NULL;
400 struct vfs_attr vfa;
401 struct bind_mount * bind_mp = MOUNTTOBINDMOUNT(mp);
402 vol_capabilities_attr_t capabilities;
403 struct vfsstatfs * sp = vfs_statfs(mp);
404
405 struct timespec tzero = {.tv_sec = 0, .tv_nsec = 0};
406
407 BINDFSDEBUG("\n");
408
409 /* Set default capabilities in case the lower file system is gone */
410 memset(&capabilities, 0, sizeof(capabilities));
411 capabilities.capabilities[VOL_CAPABILITIES_FORMAT] = VOL_CAP_FMT_FAST_STATFS | VOL_CAP_FMT_HIDDEN_FILES;
412 capabilities.valid[VOL_CAPABILITIES_FORMAT] = VOL_CAP_FMT_FAST_STATFS | VOL_CAP_FMT_HIDDEN_FILES;
413
414 if (bindfs_vfs_getlowerattr(vnode_mount(bind_mp->bindm_lowerrootvp), &vfa, ctx) == 0) {
415 if (VFSATTR_IS_SUPPORTED(&vfa, f_capabilities)) {
416 memcpy(&capabilities, &vfa.f_capabilities, sizeof(capabilities));
417 /* don't support vget */
418 capabilities.capabilities[VOL_CAPABILITIES_FORMAT] &= ~(VOL_CAP_FMT_PERSISTENTOBJECTIDS | VOL_CAP_FMT_PATH_FROM_ID);
419
420 capabilities.capabilities[VOL_CAPABILITIES_FORMAT] |= VOL_CAP_FMT_HIDDEN_FILES; /* Always support UF_HIDDEN */
421
422 capabilities.valid[VOL_CAPABILITIES_FORMAT] &= ~(VOL_CAP_FMT_PERSISTENTOBJECTIDS | VOL_CAP_FMT_PATH_FROM_ID);
423
424 capabilities.valid[VOL_CAPABILITIES_FORMAT] |= VOL_CAP_FMT_HIDDEN_FILES; /* Always support UF_HIDDEN */
425
426 /* dont' support interfaces that only make sense on a writable file system
427 * or one with specific vnops implemented */
428 capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] = 0;
429
430 capabilities.valid[VOL_CAPABILITIES_INTERFACES] &=
431 ~(VOL_CAP_INT_SEARCHFS | VOL_CAP_INT_ATTRLIST | VOL_CAP_INT_READDIRATTR | VOL_CAP_INT_EXCHANGEDATA |
432 VOL_CAP_INT_COPYFILE | VOL_CAP_INT_ALLOCATE | VOL_CAP_INT_VOL_RENAME | VOL_CAP_INT_ADVLOCK | VOL_CAP_INT_FLOCK);
433 }
434 }
435
436 if (VFSATTR_IS_ACTIVE(vfap, f_create_time)) {
437 VFSATTR_RETURN(vfap, f_create_time, tzero);
438 }
439
440 if (VFSATTR_IS_ACTIVE(vfap, f_modify_time)) {
441 VFSATTR_RETURN(vfap, f_modify_time, tzero);
442 }
443
444 if (VFSATTR_IS_ACTIVE(vfap, f_access_time)) {
445 VFSATTR_RETURN(vfap, f_access_time, tzero);
446 }
447
448 if (VFSATTR_IS_ACTIVE(vfap, f_bsize)) {
449 VFSATTR_RETURN(vfap, f_bsize, sp->f_bsize);
450 }
451
452 if (VFSATTR_IS_ACTIVE(vfap, f_iosize)) {
453 VFSATTR_RETURN(vfap, f_iosize, sp->f_iosize);
454 }
455
456 if (VFSATTR_IS_ACTIVE(vfap, f_owner)) {
457 VFSATTR_RETURN(vfap, f_owner, 0);
458 }
459
460 if (VFSATTR_IS_ACTIVE(vfap, f_blocks)) {
461 VFSATTR_RETURN(vfap, f_blocks, sp->f_blocks);
462 }
463
464 if (VFSATTR_IS_ACTIVE(vfap, f_bfree)) {
465 VFSATTR_RETURN(vfap, f_bfree, sp->f_bfree);
466 }
467
468 if (VFSATTR_IS_ACTIVE(vfap, f_bavail)) {
469 VFSATTR_RETURN(vfap, f_bavail, sp->f_bavail);
470 }
471
472 if (VFSATTR_IS_ACTIVE(vfap, f_bused)) {
473 VFSATTR_RETURN(vfap, f_bused, sp->f_bused);
474 }
475
476 if (VFSATTR_IS_ACTIVE(vfap, f_files)) {
477 VFSATTR_RETURN(vfap, f_files, sp->f_files);
478 }
479
480 if (VFSATTR_IS_ACTIVE(vfap, f_ffree)) {
481 VFSATTR_RETURN(vfap, f_ffree, sp->f_ffree);
482 }
483
484 if (VFSATTR_IS_ACTIVE(vfap, f_fssubtype)) {
485 VFSATTR_RETURN(vfap, f_fssubtype, 0);
486 }
487
488 if (VFSATTR_IS_ACTIVE(vfap, f_capabilities)) {
489 memcpy(&vfap->f_capabilities, &capabilities, sizeof(vol_capabilities_attr_t));
490
491 VFSATTR_SET_SUPPORTED(vfap, f_capabilities);
492 }
493
494 if (VFSATTR_IS_ACTIVE(vfap, f_attributes)) {
495 vol_attributes_attr_t * volattr = &vfap->f_attributes;
496
497 volattr->validattr.commonattr = 0;
498 volattr->validattr.volattr = ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
499 volattr->validattr.dirattr = 0;
500 volattr->validattr.fileattr = 0;
501 volattr->validattr.forkattr = 0;
502
503 volattr->nativeattr.commonattr = 0;
504 volattr->nativeattr.volattr = ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
505 volattr->nativeattr.dirattr = 0;
506 volattr->nativeattr.fileattr = 0;
507 volattr->nativeattr.forkattr = 0;
508
509 VFSATTR_SET_SUPPORTED(vfap, f_attributes);
510 }
511
512 if (VFSATTR_IS_ACTIVE(vfap, f_vol_name)) {
513 /* The name of the volume is the same as the directory we mounted on */
514 coveredvp = vfs_vnodecovered(mp);
515 if (coveredvp) {
516 const char * name = vnode_getname_printable(coveredvp);
517 strlcpy(vfap->f_vol_name, name, MAXPATHLEN);
518 vnode_putname_printable(name);
519
520 VFSATTR_SET_SUPPORTED(vfap, f_vol_name);
521 vnode_put(coveredvp);
522 }
523 }
524
525 return 0;
526}
527
528static int
529bindfs_sync(__unused struct mount * mp, __unused int waitfor, __unused vfs_context_t ctx)
530{
531 return 0;
532}
533
534
535
536static int
537bindfs_vfs_start(__unused struct mount * mp, __unused int flags, __unused vfs_context_t ctx)
538{
539 BINDFSDEBUG("\n");
540 return 0;
541}
542
543extern const struct vnodeopv_desc bindfs_vnodeop_opv_desc;
544
545const struct vnodeopv_desc * bindfs_vnodeopv_descs[] = {
546 &bindfs_vnodeop_opv_desc,
547};
548
549struct vfsops bindfs_vfsops = {
550 .vfs_mount = bindfs_mount,
551 .vfs_unmount = bindfs_unmount,
552 .vfs_start = bindfs_vfs_start,
553 .vfs_root = bindfs_root,
554 .vfs_getattr = bindfs_vfs_getattr,
555 .vfs_sync = bindfs_sync,
556 .vfs_init = bindfs_init,
557 .vfs_sysctl = NULL,
558 .vfs_setattr = NULL,
559};