xnu-7195.50.7.100.1.tar.gz

[apple/xnu.git] / bsd / miscfs / bindfs / bind_vfsops.c
diff --git a/bsd/miscfs/bindfs/bind_vfsops.c b/bsd/miscfs/bindfs/bind_vfsops.c

new file mode 100644 (file)

index 0000000..248469a
--- /dev/null
+++ b/bsd/miscfs/bindfs/bind_vfsops.c
@@ -0,0 +1,559 @@
+/*
+ * Copyright (c) 2019 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+/*-
+ * Portions Copyright (c) 1992, 1993, 1995
+ *  The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *  @(#)null_vfsops.c   8.2 (Berkeley) 1/21/94
+ *
+ * @(#)lofs_vfsops.c    1.2 (Berkeley) 6/18/92
+ * $FreeBSD$
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/fcntl.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/mount_internal.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/vnode_internal.h>
+#include <security/mac_internal.h>
+
+#include <sys/param.h>
+
+#include <IOKit/IOBSD.h>
+
+#include "bindfs.h"
+
+#define BINDFS_ENTITLEMENT "com.apple.private.bindfs-allow"
+
+#define SIZEOF_MEMBER(type, member) (sizeof(((type *)0)->member))
+#define MAX_MNT_FROM_LENGTH (SIZEOF_MEMBER(struct vfsstatfs, f_mntfromname))
+
+static int
+bindfs_vfs_getlowerattr(mount_t mp, struct vfs_attr * vfap, vfs_context_t ctx)
+{
+       memset(vfap, 0, sizeof(*vfap));
+       VFSATTR_INIT(vfap);
+       VFSATTR_WANTED(vfap, f_bsize);
+       VFSATTR_WANTED(vfap, f_iosize);
+       VFSATTR_WANTED(vfap, f_blocks);
+       VFSATTR_WANTED(vfap, f_bfree);
+       VFSATTR_WANTED(vfap, f_bavail);
+       VFSATTR_WANTED(vfap, f_bused);
+       VFSATTR_WANTED(vfap, f_files);
+       VFSATTR_WANTED(vfap, f_ffree);
+       VFSATTR_WANTED(vfap, f_capabilities);
+
+       return vfs_getattr(mp, vfap, ctx);
+}
+
+/*
+ * Mount bind layer
+ */
+static int
+bindfs_mount(struct mount * mp, __unused vnode_t devvp, user_addr_t user_data, vfs_context_t ctx)
+{
+       int error                 = 0;
+       struct vnode *lowerrootvp = NULL, *vp = NULL;
+       struct vfsstatfs * sp   = NULL;
+       struct bind_mount * xmp = NULL;
+       char data[MAXPATHLEN];
+       size_t count;
+       struct vfs_attr vfa;
+       /* set defaults (arbitrary since this file system is readonly) */
+       uint32_t bsize  = BLKDEV_IOSIZE;
+       size_t iosize   = BLKDEV_IOSIZE;
+       uint64_t blocks = 4711 * 4711;
+       uint64_t bfree  = 0;
+       uint64_t bavail = 0;
+       uint64_t bused  = 4711;
+       uint64_t files  = 4711;
+       uint64_t ffree  = 0;
+
+       kauth_cred_t cred = vfs_context_ucred(ctx);
+
+       BINDFSDEBUG("mp = %p %llx\n", (void *)mp, vfs_flags(mp));
+
+       if (vfs_flags(mp) & MNT_ROOTFS) {
+               return EOPNOTSUPP;
+       }
+
+       /*
+        * Update is a no-op
+        */
+       if (vfs_isupdate(mp)) {
+               return ENOTSUP;
+       }
+
+       /* check entitlement */
+       if (!IOTaskHasEntitlement(current_task(), BINDFS_ENTITLEMENT)) {
+               return EPERM;
+       }
+
+       /*
+        * Get argument
+        */
+       error = copyinstr(user_data, data, MAXPATHLEN - 1, &count);
+       if (error) {
+               BINDFSERROR("error copying data from user %d\n", error);
+               goto error;
+       }
+
+       /* This could happen if the system is configured for 32 bit inodes instead of
+        * 64 bit */
+       if (count > MAX_MNT_FROM_LENGTH) {
+               error = EINVAL;
+               BINDFSERROR("path to mount too large for this system %zu vs %lu\n", count, MAX_MNT_FROM_LENGTH);
+               goto error;
+       }
+
+       error = vnode_lookup(data, 0, &lowerrootvp, ctx);
+       if (error) {
+               BINDFSERROR("lookup of %s failed error: %d\n", data, error);
+               goto error;
+       }
+
+       /* lowervrootvp has an iocount after vnode_lookup, drop that for a usecount.
+        *  Keep this to signal what we want to keep around the thing we are mirroring.
+        *  Drop it in unmount.*/
+       error = vnode_ref(lowerrootvp);
+       vnode_put(lowerrootvp);
+       if (error) {
+               // If vnode_ref failed, then bind it out so it can't be used anymore in cleanup.
+               lowerrootvp = NULL;
+               goto error;
+       }
+
+       BINDFSDEBUG("mount %s\n", data);
+
+       MALLOC(xmp, struct bind_mount *, sizeof(*xmp), M_TEMP, M_WAITOK | M_ZERO);
+       if (xmp == NULL) {
+               error = ENOMEM;
+               goto error;
+       }
+
+       /*
+        * Save reference to underlying FS
+        */
+       xmp->bindm_lowerrootvp  = lowerrootvp;
+       xmp->bindm_lowerrootvid = vnode_vid(lowerrootvp);
+
+       error = bind_nodeget(mp, lowerrootvp, NULL, &vp, NULL, 1);
+       if (error) {
+               goto error;
+       }
+       /* After bind_nodeget our root vnode is in the hash table and we have to usecounts on lowerrootvp
+        * One use count will get dropped when we reclaim the root during unmount.
+        * The other will get dropped in unmount */
+
+
+       /* vp has an iocount on it from vnode_create. drop that for a usecount. This
+        * is our root vnode so we drop the ref in unmount
+        *
+        * Assuming for now that because we created this vnode and we aren't finished mounting we can get a ref*/
+       vnode_ref(vp);
+       vnode_put(vp);
+
+       xmp->bindm_rootvp = vp;
+
+       /* read the flags the user set, but then ignore some of them, we will only
+        * allow them if they are set on the lower file system */
+       uint64_t flags      = vfs_flags(mp) & (~(MNT_IGNORE_OWNERSHIP | MNT_LOCAL));
+       uint64_t lowerflags = vfs_flags(vnode_mount(lowerrootvp)) & (MNT_LOCAL | MNT_QUARANTINE | MNT_IGNORE_OWNERSHIP | MNT_NOEXEC);
+
+       if (lowerflags) {
+               flags |= lowerflags;
+       }
+
+       /* force these flags */
+       flags |= (MNT_DONTBROWSE | MNT_MULTILABEL | MNT_NOSUID | MNT_RDONLY);
+       vfs_setflags(mp, flags);
+
+       vfs_setfsprivate(mp, xmp);
+       vfs_getnewfsid(mp);
+       vfs_setlocklocal(mp);
+
+       /* fill in the stat block */
+       sp = vfs_statfs(mp);
+       strlcpy(sp->f_mntfromname, data, MAX_MNT_FROM_LENGTH);
+
+       sp->f_flags = flags;
+
+       xmp->bindm_flags = BINDM_CASEINSENSITIVE; /* default to case insensitive */
+
+       error = bindfs_vfs_getlowerattr(vnode_mount(lowerrootvp), &vfa, ctx);
+       if (error == 0) {
+               if (VFSATTR_IS_SUPPORTED(&vfa, f_bsize)) {
+                       bsize = vfa.f_bsize;
+               }
+               if (VFSATTR_IS_SUPPORTED(&vfa, f_iosize)) {
+                       iosize = vfa.f_iosize;
+               }
+               if (VFSATTR_IS_SUPPORTED(&vfa, f_blocks)) {
+                       blocks = vfa.f_blocks;
+               }
+               if (VFSATTR_IS_SUPPORTED(&vfa, f_bfree)) {
+                       bfree = vfa.f_bfree;
+               }
+               if (VFSATTR_IS_SUPPORTED(&vfa, f_bavail)) {
+                       bavail = vfa.f_bavail;
+               }
+               if (VFSATTR_IS_SUPPORTED(&vfa, f_bused)) {
+                       bused = vfa.f_bused;
+               }
+               if (VFSATTR_IS_SUPPORTED(&vfa, f_files)) {
+                       files = vfa.f_files;
+               }
+               if (VFSATTR_IS_SUPPORTED(&vfa, f_ffree)) {
+                       ffree = vfa.f_ffree;
+               }
+               if (VFSATTR_IS_SUPPORTED(&vfa, f_capabilities)) {
+                       if ((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & (VOL_CAP_FMT_CASE_SENSITIVE)) &&
+                           (vfa.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & (VOL_CAP_FMT_CASE_SENSITIVE))) {
+                               xmp->bindm_flags &= ~BINDM_CASEINSENSITIVE;
+                       }
+               }
+       } else {
+               goto error;
+       }
+
+       sp->f_bsize  = bsize;
+       sp->f_iosize = iosize;
+       sp->f_blocks = blocks;
+       sp->f_bfree  = bfree;
+       sp->f_bavail = bavail;
+       sp->f_bused  = bused;
+       sp->f_files  = files;
+       sp->f_ffree  = ffree;
+
+       /* Associate the mac label information from the mirrored filesystem with the
+        * mirror */
+       MAC_PERFORM(mount_label_associate, cred, vnode_mount(lowerrootvp), vfs_mntlabel(mp));
+
+       BINDFSDEBUG("lower %s, alias at %s\n", sp->f_mntfromname, sp->f_mntonname);
+       return 0;
+
+error:
+       if (xmp) {
+               FREE(xmp, M_TEMP);
+       }
+       if (lowerrootvp) {
+               vnode_getwithref(lowerrootvp);
+               vnode_rele(lowerrootvp);
+               vnode_put(lowerrootvp);
+       }
+       if (vp) {
+               /* we made the root vnode but the mount is failed, so clean it up */
+               vnode_getwithref(vp);
+               vnode_rele(vp);
+               /* give vp back */
+               vnode_recycle(vp);
+               vnode_put(vp);
+       }
+       return error;
+}
+
+/*
+ * Free reference to bind layer
+ */
+static int
+bindfs_unmount(struct mount * mp, int mntflags, __unused vfs_context_t ctx)
+{
+       struct bind_mount * mntdata;
+       struct vnode * vp;
+       int error, flags;
+
+       BINDFSDEBUG("mp = %p\n", (void *)mp);
+
+       /* check entitlement or superuser*/
+       if (!IOTaskHasEntitlement(current_task(), BINDFS_ENTITLEMENT) &&
+           vfs_context_suser(ctx) != 0) {
+               return EPERM;
+       }
+
+       if (mntflags & MNT_FORCE) {
+               flags = FORCECLOSE;
+       } else {
+               flags = 0;
+       }
+
+       mntdata = MOUNTTOBINDMOUNT(mp);
+       vp      = mntdata->bindm_rootvp;
+
+       // release our reference on the root before flushing.
+       // it will get pulled out of the mount structure by reclaim
+       vnode_getalways(vp);
+
+       error = vflush(mp, vp, flags);
+       if (error) {
+               vnode_put(vp);
+               return error;
+       }
+
+       if (vnode_isinuse(vp, 1) && flags == 0) {
+               vnode_put(vp);
+               return EBUSY;
+       }
+
+       vnode_rele(vp); // Drop reference taken by bindfs_mount
+       vnode_put(vp); // Drop ref taken above
+
+       //Force close to get rid of the last vnode
+       (void)vflush(mp, NULL, FORCECLOSE);
+
+       /* no more vnodes, so tear down the mountpoint */
+
+       vfs_setfsprivate(mp, NULL);
+
+       vnode_getalways(mntdata->bindm_lowerrootvp);
+       vnode_rele(mntdata->bindm_lowerrootvp);
+       vnode_put(mntdata->bindm_lowerrootvp);
+
+       FREE(mntdata, M_TEMP);
+
+       uint64_t vflags = vfs_flags(mp);
+       vfs_setflags(mp, vflags & ~MNT_LOCAL);
+
+       return 0;
+}
+
+static int
+bindfs_root(struct mount * mp, struct vnode ** vpp, __unused vfs_context_t ctx)
+{
+       struct vnode * vp;
+       int error;
+
+       BINDFSDEBUG("mp = %p, vp = %p\n", (void *)mp, (void *)MOUNTTOBINDMOUNT(mp)->bindm_rootvp);
+
+       /*
+        * Return locked reference to root.
+        */
+       vp = MOUNTTOBINDMOUNT(mp)->bindm_rootvp;
+
+       error = vnode_get(vp);
+       if (error) {
+               return error;
+       }
+
+       *vpp = vp;
+       return 0;
+}
+
+static int
+bindfs_vfs_getattr(struct mount * mp, struct vfs_attr * vfap, vfs_context_t ctx)
+{
+       struct vnode * coveredvp = NULL;
+       struct vfs_attr vfa;
+       struct bind_mount * bind_mp = MOUNTTOBINDMOUNT(mp);
+       vol_capabilities_attr_t capabilities;
+       struct vfsstatfs * sp = vfs_statfs(mp);
+
+       struct timespec tzero = {.tv_sec = 0, .tv_nsec = 0};
+
+       BINDFSDEBUG("\n");
+
+       /* Set default capabilities in case the lower file system is gone */
+       memset(&capabilities, 0, sizeof(capabilities));
+       capabilities.capabilities[VOL_CAPABILITIES_FORMAT] = VOL_CAP_FMT_FAST_STATFS | VOL_CAP_FMT_HIDDEN_FILES;
+       capabilities.valid[VOL_CAPABILITIES_FORMAT]        = VOL_CAP_FMT_FAST_STATFS | VOL_CAP_FMT_HIDDEN_FILES;
+
+       if (bindfs_vfs_getlowerattr(vnode_mount(bind_mp->bindm_lowerrootvp), &vfa, ctx) == 0) {
+               if (VFSATTR_IS_SUPPORTED(&vfa, f_capabilities)) {
+                       memcpy(&capabilities, &vfa.f_capabilities, sizeof(capabilities));
+                       /* don't support vget */
+                       capabilities.capabilities[VOL_CAPABILITIES_FORMAT] &= ~(VOL_CAP_FMT_PERSISTENTOBJECTIDS | VOL_CAP_FMT_PATH_FROM_ID);
+
+                       capabilities.capabilities[VOL_CAPABILITIES_FORMAT] |= VOL_CAP_FMT_HIDDEN_FILES; /* Always support UF_HIDDEN */
+
+                       capabilities.valid[VOL_CAPABILITIES_FORMAT] &= ~(VOL_CAP_FMT_PERSISTENTOBJECTIDS | VOL_CAP_FMT_PATH_FROM_ID);
+
+                       capabilities.valid[VOL_CAPABILITIES_FORMAT] |= VOL_CAP_FMT_HIDDEN_FILES; /* Always support UF_HIDDEN */
+
+                       /* dont' support interfaces that only make sense on a writable file system
+                        * or one with specific vnops implemented */
+                       capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] = 0;
+
+                       capabilities.valid[VOL_CAPABILITIES_INTERFACES] &=
+                           ~(VOL_CAP_INT_SEARCHFS | VOL_CAP_INT_ATTRLIST | VOL_CAP_INT_READDIRATTR | VOL_CAP_INT_EXCHANGEDATA |
+                           VOL_CAP_INT_COPYFILE | VOL_CAP_INT_ALLOCATE | VOL_CAP_INT_VOL_RENAME | VOL_CAP_INT_ADVLOCK | VOL_CAP_INT_FLOCK);
+               }
+       }
+
+       if (VFSATTR_IS_ACTIVE(vfap, f_create_time)) {
+               VFSATTR_RETURN(vfap, f_create_time, tzero);
+       }
+
+       if (VFSATTR_IS_ACTIVE(vfap, f_modify_time)) {
+               VFSATTR_RETURN(vfap, f_modify_time, tzero);
+       }
+
+       if (VFSATTR_IS_ACTIVE(vfap, f_access_time)) {
+               VFSATTR_RETURN(vfap, f_access_time, tzero);
+       }
+
+       if (VFSATTR_IS_ACTIVE(vfap, f_bsize)) {
+               VFSATTR_RETURN(vfap, f_bsize, sp->f_bsize);
+       }
+
+       if (VFSATTR_IS_ACTIVE(vfap, f_iosize)) {
+               VFSATTR_RETURN(vfap, f_iosize, sp->f_iosize);
+       }
+
+       if (VFSATTR_IS_ACTIVE(vfap, f_owner)) {
+               VFSATTR_RETURN(vfap, f_owner, 0);
+       }
+
+       if (VFSATTR_IS_ACTIVE(vfap, f_blocks)) {
+               VFSATTR_RETURN(vfap, f_blocks, sp->f_blocks);
+       }
+
+       if (VFSATTR_IS_ACTIVE(vfap, f_bfree)) {
+               VFSATTR_RETURN(vfap, f_bfree, sp->f_bfree);
+       }
+
+       if (VFSATTR_IS_ACTIVE(vfap, f_bavail)) {
+               VFSATTR_RETURN(vfap, f_bavail, sp->f_bavail);
+       }
+
+       if (VFSATTR_IS_ACTIVE(vfap, f_bused)) {
+               VFSATTR_RETURN(vfap, f_bused, sp->f_bused);
+       }
+
+       if (VFSATTR_IS_ACTIVE(vfap, f_files)) {
+               VFSATTR_RETURN(vfap, f_files, sp->f_files);
+       }
+
+       if (VFSATTR_IS_ACTIVE(vfap, f_ffree)) {
+               VFSATTR_RETURN(vfap, f_ffree, sp->f_ffree);
+       }
+
+       if (VFSATTR_IS_ACTIVE(vfap, f_fssubtype)) {
+               VFSATTR_RETURN(vfap, f_fssubtype, 0);
+       }
+
+       if (VFSATTR_IS_ACTIVE(vfap, f_capabilities)) {
+               memcpy(&vfap->f_capabilities, &capabilities, sizeof(vol_capabilities_attr_t));
+
+               VFSATTR_SET_SUPPORTED(vfap, f_capabilities);
+       }
+
+       if (VFSATTR_IS_ACTIVE(vfap, f_attributes)) {
+               vol_attributes_attr_t * volattr = &vfap->f_attributes;
+
+               volattr->validattr.commonattr = 0;
+               volattr->validattr.volattr    = ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
+               volattr->validattr.dirattr    = 0;
+               volattr->validattr.fileattr   = 0;
+               volattr->validattr.forkattr   = 0;
+
+               volattr->nativeattr.commonattr = 0;
+               volattr->nativeattr.volattr    = ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
+               volattr->nativeattr.dirattr    = 0;
+               volattr->nativeattr.fileattr   = 0;
+               volattr->nativeattr.forkattr   = 0;
+
+               VFSATTR_SET_SUPPORTED(vfap, f_attributes);
+       }
+
+       if (VFSATTR_IS_ACTIVE(vfap, f_vol_name)) {
+               /* The name of the volume is the same as the directory we mounted on */
+               coveredvp = vfs_vnodecovered(mp);
+               if (coveredvp) {
+                       const char * name = vnode_getname_printable(coveredvp);
+                       strlcpy(vfap->f_vol_name, name, MAXPATHLEN);
+                       vnode_putname_printable(name);
+
+                       VFSATTR_SET_SUPPORTED(vfap, f_vol_name);
+                       vnode_put(coveredvp);
+               }
+       }
+
+       return 0;
+}
+
+static int
+bindfs_sync(__unused struct mount * mp, __unused int waitfor, __unused vfs_context_t ctx)
+{
+       return 0;
+}
+
+
+
+static int
+bindfs_vfs_start(__unused struct mount * mp, __unused int flags, __unused vfs_context_t ctx)
+{
+       BINDFSDEBUG("\n");
+       return 0;
+}
+
+extern const struct vnodeopv_desc bindfs_vnodeop_opv_desc;
+
+const struct vnodeopv_desc * bindfs_vnodeopv_descs[] = {
+       &bindfs_vnodeop_opv_desc,
+};
+
+struct vfsops bindfs_vfsops = {
+       .vfs_mount              = bindfs_mount,
+       .vfs_unmount            = bindfs_unmount,
+       .vfs_start              = bindfs_vfs_start,
+       .vfs_root               = bindfs_root,
+       .vfs_getattr            = bindfs_vfs_getattr,
+       .vfs_sync               = bindfs_sync,
+       .vfs_init               = bindfs_init,
+       .vfs_sysctl             = NULL,
+       .vfs_setattr            = NULL,
+};