/*
- * Copyright (c) 1995-2017 Apple Inc. All rights reserved.
+ * Copyright (c) 1995-2020 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <sys/proc_internal.h>
#include <sys/kauth.h>
#include <sys/uio_internal.h>
-#include <sys/malloc.h>
+#include <kern/kalloc.h>
#include <sys/mman.h>
#include <sys/dirent.h>
#include <sys/attr.h>
#include <sys/fsevents.h>
#include <sys/imgsrc.h>
#include <sys/sysproto.h>
+#include <sys/sysctl.h>
#include <sys/xattr.h>
#include <sys/fcntl.h>
#include <sys/fsctl.h>
#include <sys/clonefile.h>
#include <sys/snapshot.h>
#include <sys/priv.h>
+#include <sys/fsgetpath.h>
#include <machine/cons.h>
#include <machine/limits.h>
#include <miscfs/specfs/specdev.h>
#include <vm/vm_protos.h>
#include <libkern/OSAtomic.h>
+#include <os/atomic_private.h>
#include <pexpert/pexpert.h>
#include <IOKit/IOBSD.h>
+// deps for MIG call
+#include <kern/host.h>
+#include <kern/ipc_misc.h>
+#include <mach/host_priv.h>
+#include <mach/vfs_nspace.h>
+#include <os/log.h>
+
+#include <nfs/nfs_conf.h>
+
#if ROUTEFS
#include <miscfs/routefs/routefs.h>
#endif /* ROUTEFS */
#if CONFIG_FSE
#define GET_PATH(x) \
- (x) = get_pathbuff();
+ ((x) = get_pathbuff())
#define RELEASE_PATH(x) \
- release_pathbuff(x);
+ release_pathbuff(x)
#else
#define GET_PATH(x) \
- MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
+ ((x) = zalloc(ZV_NAMEI))
#define RELEASE_PATH(x) \
- FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
+ zfree(ZV_NAMEI, x)
#endif /* CONFIG_FSE */
#ifndef HFS_GET_BOOT_INFO
#define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
#endif
+/*
+ * If you need accounting for KM_FD_VN_DATA consider using
+ * ZONE_VIEW_DEFINE to define a zone view.
+ */
+#define KM_FD_VN_DATA KHEAP_DEFAULT
+
extern void disk_conditioner_unmount(mount_t mp);
/* struct for checkdirs iteration */
static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
user_addr_t bufp, int *sizep, boolean_t is_64_bit,
boolean_t partial_copy);
-static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
- user_addr_t bufp);
static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
struct componentname *cnp, user_addr_t fsmountargs,
*/
#define MAX_AUTHORIZE_ENOENT_RETRIES 1024
-static int rmdirat_internal(vfs_context_t, int, user_addr_t, enum uio_seg);
+/* Max retry limit for rename due to vnode recycling. */
+#define MAX_RENAME_ERECYCLE_RETRIES 1024
+
+static int rmdirat_internal(vfs_context_t, int, user_addr_t, enum uio_seg,
+ int unlink_flags);
-static int fsgetpath_internal(vfs_context_t, int, uint64_t, vm_size_t, caddr_t, int *);
+static int fsgetpath_internal(vfs_context_t, int, uint64_t, vm_size_t, caddr_t, uint32_t options, int *);
#ifdef CONFIG_IMGSRC_ACCESS
static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
#endif /* CONFIG_IMGSRC_ACCESS */
+#if CONFIG_LOCKERBOOT
+int mount_locker_protoboot(const char *fsname, const char *mntpoint,
+ const char *pbdevpath);
+#endif
+
//snapshot functions
#if CONFIG_MNT_ROOTSNAP
-static int snapshot_root(int dirfd, user_addr_t name, uint32_t flags, vfs_context_t ctx);
+static int __attribute__ ((noinline)) snapshot_root(int dirfd, user_addr_t name, uint32_t flags, vfs_context_t ctx);
#else
-static int snapshot_root(int dirfd, user_addr_t name, uint32_t flags, vfs_context_t ctx) __attribute__((unused));
+static int __attribute__ ((noinline)) snapshot_root(int dirfd, user_addr_t name, uint32_t flags, vfs_context_t ctx) __attribute__((unused));
#endif
-int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
-
__private_extern__
int sync_internal(void);
extern lck_grp_attr_t *fd_vn_lck_grp_attr;
extern lck_attr_t *fd_vn_lck_attr;
+extern lck_rw_t * rootvnode_rw_lock;
+
/*
* incremented each time a mount or unmount operation occurs
* used to invalidate the cached value of the rootvp in the
/* counts number of mount and unmount operations */
unsigned int vfs_nummntops = 0;
+/* system-wide, per-boot unique mount ID */
+static _Atomic uint64_t mount_unique_id = 1;
+
extern const struct fileops vnops;
#if CONFIG_APPLEDOUBLE
extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
* Virtual File System System Calls
*/
-#if NFSCLIENT || DEVFS || ROUTEFS
+#if CONFIG_NFS_CLIENT || DEVFS || ROUTEFS
/*
* Private in-kernel mounting spi (NFS only, not exported)
*/
__private_extern__
int
kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
- void *data, __unused size_t datalen, int syscall_flags, __unused uint32_t kern_flags, vfs_context_t ctx)
+ void *data, __unused size_t datalen, int syscall_flags, uint32_t kern_flags, vfs_context_t ctx)
{
struct nameidata nd;
boolean_t did_namei;
if (vp == NULLVP) {
error = namei(&nd);
if (error) {
+ if (kern_flags & (KERNEL_MOUNT_SNAPSHOT | KERNEL_MOUNT_VOLBYROLE_MASK)) {
+ printf("failed to locate mount-on path: %s ", path);
+ }
return error;
}
vp = nd.ni_vp;
char *pnbuf = CAST_DOWN(char *, path);
nd.ni_cnd.cn_pnbuf = pnbuf;
- nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
+ nd.ni_cnd.cn_pnlen = (int)(strlen(pnbuf) + 1);
did_namei = FALSE;
}
return error;
}
-#endif /* NFSCLIENT || DEVFS */
+#endif /* CONFIG_NFS_CLIENT || DEVFS */
/*
* Mount a file system.
}
memset(&cn, 0, sizeof(struct componentname));
- MALLOC(cn.cn_pnbuf, char *, MAXPATHLEN, M_TEMP, M_WAITOK);
+ cn.cn_pnbuf = zalloc_flags(ZV_NAMEI, Z_WAITOK);
cn.cn_pnlen = MAXPATHLEN;
if ((error = vn_getpath(vp, cn.cn_pnbuf, &cn.cn_pnlen)) != 0) {
- FREE(cn.cn_pnbuf, M_TEMP);
+ zfree(ZV_NAMEI, cn.cn_pnbuf);
vnode_put(pvp);
vnode_put(vp);
file_drop(uap->fd);
error = mount_common(fstypename, pvp, vp, &cn, uap->data, flags, 0, labelstr, FALSE, ctx);
- FREE(cn.cn_pnbuf, M_TEMP);
+ zfree(ZV_NAMEI, cn.cn_pnbuf);
vnode_put(pvp);
vnode_put(vp);
file_drop(uap->fd);
struct nameidata nd;
size_t dummy = 0;
char *labelstr = NULL;
+ size_t labelsz = 0;
int flags = uap->flags;
int error;
#if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
if (is_64bit) {
struct user64_mac mac64;
error = copyin(uap->mac_p, &mac64, sizeof(mac64));
- mac.m_buflen = mac64.m_buflen;
- mac.m_string = mac64.m_string;
+ mac.m_buflen = (user_size_t)mac64.m_buflen;
+ mac.m_string = (user_addr_t)mac64.m_string;
} else {
struct user32_mac mac32;
error = copyin(uap->mac_p, &mac32, sizeof(mac32));
error = EINVAL;
goto out;
}
- MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
+ labelsz = mac.m_buflen;
+ labelstr = kheap_alloc(KHEAP_TEMP, labelsz, Z_WAITOK);
error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
if (error) {
goto out;
out:
#if CONFIG_MACF
- if (labelstr) {
- FREE(labelstr, M_MACTEMP);
- }
+ kheap_free(KHEAP_DEFAULT, labelstr, labelsz);
#endif /* CONFIG_MACF */
if (vp) {
struct vfstable *vfsp = (struct vfstable *)0;
struct proc *p = vfs_context_proc(ctx);
int error, flag = 0;
+ bool flag_set = false;
user_addr_t devpath = USER_ADDR_NULL;
int ronly = 0;
int mntalloc = 0;
boolean_t is_rwlock_locked = FALSE;
boolean_t did_rele = FALSE;
boolean_t have_usecount = FALSE;
+ boolean_t did_set_lmount = FALSE;
+
+#if CONFIG_ROSV_STARTUP || CONFIG_MOUNT_VM || CONFIG_BASESYSTEMROOT
+ /* Check for mutually-exclusive flag bits */
+ uint32_t checkflags = (internal_flags & (KERNEL_MOUNT_VOLBYROLE_MASK | KERNEL_MOUNT_BASESYSTEMROOT));
+ int bitcount = 0;
+ while (checkflags != 0) {
+ checkflags &= (checkflags - 1);
+ bitcount++;
+ }
+
+ if (bitcount > 1) {
+ //not allowed to request multiple mount-by-role flags
+ error = EINVAL;
+ goto out1;
+ }
+#endif
/*
* Process an update for an existing mount
}
mp = vp->v_mount;
- /* unmount in progress return error */
+ /* if unmount or mount in progress, return error */
mount_lock_spin(mp);
- if (mp->mnt_lflag & MNT_LUNMOUNT) {
+ if (mp->mnt_lflag & (MNT_LUNMOUNT | MNT_LMOUNT)) {
mount_unlock(mp);
error = EBUSY;
goto out1;
}
+ mp->mnt_lflag |= MNT_LMOUNT;
+ did_set_lmount = TRUE;
mount_unlock(mp);
lck_rw_lock_exclusive(&mp->mnt_rwlock);
is_rwlock_locked = TRUE;
goto out1;
}
-#ifdef CONFIG_IMGSRC_ACCESS
+ /*
+ * can't turn off MNT_REMOVABLE either but it may be an unexpected
+ * failure to return an error for this so we'll just silently
+ * add it if it is not passed in.
+ */
+ if ((mp->mnt_flag & MNT_REMOVABLE) &&
+ ((flags & MNT_REMOVABLE) == 0)) {
+ flags |= MNT_REMOVABLE;
+ }
+
/* Can't downgrade the backer of the root FS */
if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
(!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
error = ENOTSUP;
goto out1;
}
-#endif /* CONFIG_IMGSRC_ACCESS */
/*
* Only root, or the user that did the original mount is
}
}
flag = mp->mnt_flag;
+ flag_set = true;
vfsp = mp->mnt_vtable;
goto update;
- }
+ } // MNT_UPDATE
/*
* For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
}
/*
- * VFC_VFSLOCALARGS is not currently supported for kernel mounts
+ * VFC_VFSLOCALARGS is not currently supported for kernel mounts,
+ * except in ROSV configs and for the initial BaseSystem root.
*/
- if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS)) {
+ if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) &&
+ ((internal_flags & KERNEL_MOUNT_VOLBYROLE_MASK) == 0) &&
+ ((internal_flags & KERNEL_MOUNT_BASESYSTEMROOT) == 0)) {
error = EINVAL; /* unsupported request */
goto out1;
}
/*
* Allocate and initialize the filesystem (mount_t)
*/
- MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
- M_MOUNT, M_WAITOK);
- bzero((char *)mp, (u_int32_t)sizeof(struct mount));
+ mp = zalloc_flags(mount_zone, Z_WAITOK | Z_ZERO);
mntalloc = 1;
/* Initialize the default IO constraints */
mp->mnt_realrootvp = NULLVP;
mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
+ mp->mnt_lflag |= MNT_LMOUNT;
+ did_set_lmount = TRUE;
+
TAILQ_INIT(&mp->mnt_vnodelist);
TAILQ_INIT(&mp->mnt_workerqueue);
TAILQ_INIT(&mp->mnt_newvnodes);
//mp->mnt_stat.f_type = vfsp->vfc_typenum;
mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
- strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
+ do {
+ int pathlen = MAXPATHLEN;
+
+ if (vn_getpath_ext(vp, pvp, mp->mnt_vfsstat.f_mntonname, &pathlen, VN_GETPATH_FSENTER)) {
+ strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
+ }
+ } while (0);
mp->mnt_vnodecovered = vp;
mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
mp->mnt_devbsdunit = 0;
+ mp->mnt_mount_id = os_atomic_inc_orig(&mount_unique_id, relaxed);
/* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
-#if NFSCLIENT || DEVFS || ROUTEFS
+#if CONFIG_NFS_CLIENT || DEVFS || ROUTEFS
if (kernelmount) {
mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
}
if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0) {
mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
}
-#endif /* NFSCLIENT || DEVFS */
+#endif /* CONFIG_NFS_CLIENT || DEVFS */
update:
mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
- MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
+ MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME | MNT_STRICTATIME |
MNT_QUARANTINE | MNT_CPROTECT);
#if SECURE_KERNEL
mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
- MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
+ MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME | MNT_STRICTATIME |
MNT_QUARANTINE | MNT_CPROTECT);
#if CONFIG_MACF
}
#endif
/*
- * Process device path for local file systems if requested
+ * Process device path for local file systems if requested.
+ *
+ * Snapshot and mount-by-role mounts do not use this path; they are
+ * passing other opaque data in the device path field.
+ *
+ * Basesystemroot mounts pass a device path to be resolved here,
+ * but it's just a char * already inside the kernel, which
+ * kernel_mount() shoved into a user_addr_t to call us. So for such
+ * mounts we must skip copyin (both of the address and of the string
+ * (in NDINIT).
*/
if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS &&
- !(internal_flags & KERNEL_MOUNT_SNAPSHOT)) {
- if (vfs_context_is64bit(ctx)) {
- if ((error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath)))) {
- goto out1;
- }
- fsmountargs += sizeof(devpath);
- } else {
- user32_addr_t tmp;
- if ((error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp)))) {
- goto out1;
+ !(internal_flags & (KERNEL_MOUNT_SNAPSHOT | KERNEL_MOUNT_VOLBYROLE_MASK))) {
+ boolean_t do_copyin_devpath = true;
+#if CONFIG_BASESYSTEMROOT
+ if (internal_flags & KERNEL_MOUNT_BASESYSTEMROOT) {
+ // KERNEL_MOUNT_BASESYSTEMROOT implies subtle behavior worh nothing:
+ // We have been passed fsmountargs, which is typed as a user_addr_t,
+ // but is actually a char ** pointing to a (kernelspace) string.
+ // We manually unpack it with a series of casts and dereferences
+ // that reverses what was done just above us on the stack in
+ // imageboot_pivot_image().
+ // After retrieving the path to the dev node (which we will NDINIT
+ // in a moment), we pass NULL fsmountargs on to the filesystem.
+ _Static_assert(sizeof(char **) == sizeof(fsmountargs), "fsmountargs should fit a (kernel) address");
+ char **devnamepp = (char **)fsmountargs;
+ char *devnamep = *devnamepp;
+ devpath = CAST_USER_ADDR_T(devnamep);
+ do_copyin_devpath = false;
+ fsmountargs = USER_ADDR_NULL;
+
+ //Now that we have a mp, denote that this mount is for the basesystem.
+ mp->mnt_supl_kern_flag |= MNTK_SUPL_BASESYSTEM;
+ }
+#endif // CONFIG_BASESYSTEMROOT
+
+ if (do_copyin_devpath) {
+ if (vfs_context_is64bit(ctx)) {
+ if ((error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath)))) {
+ goto out1;
+ }
+ fsmountargs += sizeof(devpath);
+ } else {
+ user32_addr_t tmp;
+ if ((error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp)))) {
+ goto out1;
+ }
+ /* munge into LP64 addr */
+ devpath = CAST_USER_ADDR_T(tmp);
+ fsmountargs += sizeof(tmp);
}
- /* munge into LP64 addr */
- devpath = CAST_USER_ADDR_T(tmp);
- fsmountargs += sizeof(tmp);
}
/* Lookup device and authorize access to it */
if ((devpath)) {
struct nameidata nd;
- NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
+ enum uio_seg seg = UIO_USERSPACE;
+#if CONFIG_BASESYSTEMROOT
+ if (internal_flags & KERNEL_MOUNT_BASESYSTEMROOT) {
+ seg = UIO_SYSSPACE;
+ }
+#endif // CONFIG_BASESYSTEMROOT
+
+ NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, seg, devpath, ctx);
if ((error = namei(&nd))) {
goto out1;
}
goto out2;
}
}
- }
+ } // localargs && !(snapshot | data | vm)
+
#if CONFIG_MACF
if ((flags & MNT_UPDATE) == 0) {
mac_mount_label_init(mp);
}
#endif
/*
- * Mount the filesystem.
+ * Mount the filesystem. We already asserted that internal_flags
+ * cannot have more than one mount-by-role bit set.
*/
if (internal_flags & KERNEL_MOUNT_SNAPSHOT) {
error = VFS_IOCTL(mp, VFSIOC_MOUNT_SNAPSHOT,
(caddr_t)fsmountargs, 0, ctx);
+ } else if (internal_flags & KERNEL_MOUNT_DATAVOL) {
+#if CONFIG_ROSV_STARTUP
+ struct mount *origin_mp = (struct mount*)fsmountargs;
+ fs_role_mount_args_t frma = {origin_mp, VFS_DATA_ROLE};
+ error = VFS_IOCTL(mp, VFSIOC_MOUNT_BYROLE, (caddr_t)&frma, 0, ctx);
+ if (error) {
+ printf("MOUNT-BY-ROLE (%d) failed! (%d)", VFS_DATA_ROLE, error);
+ } else {
+ /* Mark volume associated with system volume */
+ mp->mnt_kern_flag |= MNTK_SYSTEM;
+
+ /* Attempt to acquire the mnt_devvp and set it up */
+ struct vnode *mp_devvp = NULL;
+ if (mp->mnt_vfsstat.f_mntfromname[0] != 0) {
+ errno_t lerr = vnode_lookup(mp->mnt_vfsstat.f_mntfromname,
+ 0, &mp_devvp, vfs_context_kernel());
+ if (!lerr) {
+ mp->mnt_devvp = mp_devvp;
+ //vnode_lookup took an iocount, need to drop it.
+ vnode_put(mp_devvp);
+ // now set `device_vnode` to the devvp that was acquired.
+ // this is needed in order to ensure vfs_init_io_attributes is invoked.
+ // note that though the iocount above was dropped, the mount acquires
+ // an implicit reference against the device.
+ device_vnode = mp_devvp;
+ }
+ }
+ }
+#else
+ error = EINVAL;
+#endif
+ } else if (internal_flags & KERNEL_MOUNT_VMVOL) {
+#if CONFIG_MOUNT_VM
+ struct mount *origin_mp = (struct mount*)fsmountargs;
+ fs_role_mount_args_t frma = {origin_mp, VFS_VM_ROLE};
+ error = VFS_IOCTL(mp, VFSIOC_MOUNT_BYROLE, (caddr_t)&frma, 0, ctx);
+ if (error) {
+ printf("MOUNT-BY-ROLE (%d) failed! (%d)", VFS_VM_ROLE, error);
+ } else {
+ /* Mark volume associated with system volume and a swap mount */
+ mp->mnt_kern_flag |= (MNTK_SYSTEM | MNTK_SWAP_MOUNT);
+ /* Attempt to acquire the mnt_devvp and set it up */
+ struct vnode *mp_devvp = NULL;
+ if (mp->mnt_vfsstat.f_mntfromname[0] != 0) {
+ errno_t lerr = vnode_lookup(mp->mnt_vfsstat.f_mntfromname,
+ 0, &mp_devvp, vfs_context_kernel());
+ if (!lerr) {
+ mp->mnt_devvp = mp_devvp;
+ //vnode_lookup took an iocount, need to drop it.
+ vnode_put(mp_devvp);
+
+ // now set `device_vnode` to the devvp that was acquired.
+ // note that though the iocount above was dropped, the mount acquires
+ // an implicit reference against the device.
+ device_vnode = mp_devvp;
+ }
+ }
+ }
+#else
+ error = EINVAL;
+#endif
+ } else if ((internal_flags & KERNEL_MOUNT_PREBOOTVOL) || (internal_flags & KERNEL_MOUNT_RECOVERYVOL)) {
+#if CONFIG_MOUNT_PREBOOTRECOVERY
+ struct mount *origin_mp = (struct mount*)fsmountargs;
+ uint32_t mount_role = 0;
+ if (internal_flags & KERNEL_MOUNT_PREBOOTVOL) {
+ mount_role = VFS_PREBOOT_ROLE;
+ } else if (internal_flags & KERNEL_MOUNT_RECOVERYVOL) {
+ mount_role = VFS_RECOVERY_ROLE;
+ }
+
+ if (mount_role != 0) {
+ fs_role_mount_args_t frma = {origin_mp, mount_role};
+ error = VFS_IOCTL(mp, VFSIOC_MOUNT_BYROLE, (caddr_t)&frma, 0, ctx);
+ if (error) {
+ printf("MOUNT-BY-ROLE (%d) failed! (%d)", mount_role, error);
+ } else {
+ // NOT YET - need to qualify how this interacts with shutdown, ERP/ERB, etc
+ /* Mark volume associated with system volume */
+ //mp->mnt_kern_flag |= MNTK_SYSTEM;
+ /* Attempt to acquire the mnt_devvp and set it up */
+ struct vnode *mp_devvp = NULL;
+ if (mp->mnt_vfsstat.f_mntfromname[0] != 0) {
+ errno_t lerr = vnode_lookup(mp->mnt_vfsstat.f_mntfromname,
+ 0, &mp_devvp, vfs_context_kernel());
+ if (!lerr) {
+ mp->mnt_devvp = mp_devvp;
+ //vnode_lookup took an iocount, need to drop it.
+ vnode_put(mp_devvp);
+
+ // now set `device_vnode` to the devvp that was acquired.
+ // note that though the iocount above was dropped, the mount acquires
+ // an implicit reference against the device.
+ device_vnode = mp_devvp;
+ }
+ }
+ }
+ } else {
+ printf("MOUNT-BY-ROLE (%d) failed - ROLE UNRECOGNIZED! (%d)", mount_role, error);
+ error = EINVAL;
+ }
+#else
+ error = EINVAL;
+#endif
} else {
error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
}
if (error == 0) {
struct vfs_attr vfsattr;
#if CONFIG_MACF
+ error = mac_mount_check_mount_late(ctx, mp);
+ if (error != 0) {
+ goto out4;
+ }
+
if (vfs_flags(mp) & MNT_MULTILABEL) {
error = VFS_ROOT(mp, &rvp, ctx);
if (error) {
printf("%s() VFS_ROOT returned %d\n", __func__, error);
- goto out3;
+ goto out4;
}
error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
/*
vnode_put(rvp);
if (error) {
- goto out3;
+ goto out4;
}
}
#endif /* MAC */
#if CONFIG_MACF
mac_mount_label_destroy(mp);
#endif
- FREE_ZONE(mp, sizeof(struct mount), M_MOUNT);
+ zfree(mount_zone, mp);
+ did_set_lmount = false;
}
exit:
/*
vnode_put(devvp);
}
+ if (did_set_lmount) {
+ mount_lock_spin(mp);
+ mp->mnt_lflag &= ~MNT_LMOUNT;
+ mount_unlock(mp);
+ }
+
return error;
/* Error condition exits */
out1:
/* Release mnt_rwlock only when it was taken */
if (is_rwlock_locked == TRUE) {
+ if (flag_set) {
+ mp->mnt_flag = flag; /* restore mnt_flag value */
+ }
lck_rw_done(&mp->mnt_rwlock);
}
+ if (did_set_lmount) {
+ mount_lock_spin(mp);
+ mp->mnt_lflag &= ~MNT_LMOUNT;
+ mount_unlock(mp);
+ }
+
if (mntalloc) {
if (mp->mnt_crossref) {
mount_dropcrossref(mp, vp, 0);
#if CONFIG_MACF
mac_mount_label_destroy(mp);
#endif
- FREE_ZONE(mp, sizeof(struct mount), M_MOUNT);
+ zfree(mount_zone, mp);
}
}
if (vfsp_ref) {
#if CONFIG_IMGSRC_ACCESS
-#if DEBUG
-#define IMGSRC_DEBUG(args...) printf(args)
+#define DEBUG_IMGSRC 0
+
+#if DEBUG_IMGSRC
+#define IMGSRC_DEBUG(args...) printf("imgsrc: " args)
#else
#define IMGSRC_DEBUG(args...) do { } while(0)
#endif
vnode_t vp, realdevvp;
mode_t accessmode;
int error;
+ enum uio_seg uio = UIO_USERSPACE;
+
+ if (ctx == vfs_context_kernel()) {
+ uio = UIO_SYSSPACE;
+ }
- NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
+ NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, uio, devpath, ctx);
if ((error = namei(&nd))) {
IMGSRC_DEBUG("namei() failed with %d\n", error);
return error;
out1:
vnode_put(realdevvp);
+
out:
nameidone(&nd);
+
if (error) {
vnode_put(vp);
}
mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
+ IMGSRC_DEBUG("placing: fsname = %s, vp = %s\n",
+ mp->mnt_vtable->vfc_name, vnode_getname(vp));
+
vnode_lock_spin(vp);
CLR(vp->v_flag, VMOUNT);
vp->v_mountedhere = mp;
/* unmount in progress return error */
mount_lock_spin(mp);
- if (mp->mnt_lflag & MNT_LUNMOUNT) {
+ if (mp->mnt_lflag & (MNT_LUNMOUNT | MNT_LMOUNT)) {
mount_unlock(mp);
return EBUSY;
}
}
static int
-relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp,
- const char *fsname, vfs_context_t ctx,
+relocate_imageboot_source(vnode_t pvp, vnode_t vp,
+ struct componentname *cnp, const char *fsname, vfs_context_t ctx,
boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
{
int error;
mount_t mp;
boolean_t placed = FALSE;
- vnode_t devvp = NULLVP;
struct vfstable *vfsp;
user_addr_t devpath;
char *old_mntonname;
vnode_t rvp;
+ vnode_t devvp;
uint32_t height;
uint32_t flags;
height = mia64.mi_height;
flags = mia64.mi_flags;
- devpath = mia64.mi_devpath;
+ devpath = (user_addr_t)mia64.mi_devpath;
} else {
struct user32_mnt_imgsrc_args mia32;
error = copyin(fsmountargs, &mia32, sizeof(mia32));
error = get_imgsrc_rootvnode(height, &rvp);
if (error != 0) {
- IMGSRC_DEBUG("getting root vnode failed with %d\n", error);
+ IMGSRC_DEBUG("getting old root vnode failed with %d\n", error);
return error;
}
- IMGSRC_DEBUG("got root vnode.\n");
+ IMGSRC_DEBUG("got old root vnode\n");
- MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
+ old_mntonname = zalloc_flags(ZV_NAMEI, Z_WAITOK);
/* Can only move once */
mp = vnode_mount(rvp);
goto out0;
}
+ IMGSRC_DEBUG("moving rvp: fsname = %s\n", mp->mnt_vtable->vfc_name);
IMGSRC_DEBUG("Starting updated.\n");
/* Get exclusive rwlock on mount, authorize update on mp */
goto out1;
}
-
IMGSRC_DEBUG("Preparing coveredvp.\n");
/* Mark covered vnode as mount in progress, authorize placing mount on top */
/* Sanity check the name caller has provided */
vfsp = mp->mnt_vtable;
if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
- IMGSRC_DEBUG("Wrong fs name.\n");
+ IMGSRC_DEBUG("Wrong fs name: actual = %s, expected = %s\n",
+ vfsp->vfc_name, fsname);
error = EINVAL;
goto out2;
}
mount_end_update(mp);
vnode_put(rvp);
- FREE(old_mntonname, M_TEMP);
+ zfree(ZV_NAMEI, old_mntonname);
vfs_notify_mount(pvp);
out0:
vnode_put(rvp);
- FREE(old_mntonname, M_TEMP);
+ zfree(ZV_NAMEI, old_mntonname);
return error;
}
+#if CONFIG_LOCKERBOOT
+__private_extern__
+int
+mount_locker_protoboot(const char *fsname, const char *mntpoint,
+ const char *pbdevpath)
+{
+ int error = -1;
+ struct nameidata nd;
+ boolean_t cleanup_nd = FALSE;
+ vfs_context_t ctx = vfs_context_kernel();
+ boolean_t is64 = TRUE;
+ boolean_t by_index = TRUE;
+ struct user64_mnt_imgsrc_args mia64 = {
+ .mi_height = 0,
+ .mi_flags = 0,
+ .mi_devpath = CAST_USER_ADDR_T(pbdevpath),
+ };
+ user_addr_t mia64addr = CAST_USER_ADDR_T(&mia64);
+
+ NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
+ UIO_SYSSPACE, CAST_USER_ADDR_T(mntpoint), ctx);
+ error = namei(&nd);
+ if (error) {
+ IMGSRC_DEBUG("namei: %d\n", error);
+ goto out;
+ }
+
+ cleanup_nd = TRUE;
+ error = relocate_imageboot_source(nd.ni_dvp, nd.ni_vp,
+ &nd.ni_cnd, fsname, ctx, is64, mia64addr, by_index);
+
+out:
+ if (cleanup_nd) {
+ int stashed = error;
+
+ error = vnode_put(nd.ni_vp);
+ if (error) {
+ panic("vnode_put() returned non-zero: %d", error);
+ }
+
+ if (nd.ni_dvp) {
+ error = vnode_put(nd.ni_dvp);
+ if (error) {
+ panic("vnode_put() returned non-zero: %d", error);
+ }
+ }
+ nameidone(&nd);
+
+ error = stashed;
+ }
+ return error;
+}
+#endif /* CONFIG_LOCKERBOOT */
#endif /* CONFIG_IMGSRC_ACCESS */
void
vnode_t olddp = cdrp->olddp;
vnode_t newdp = cdrp->newdp;
struct filedesc *fdp;
- vnode_t tvp;
- vnode_t fdp_cvp;
- vnode_t fdp_rvp;
- int cdir_changed = 0;
- int rdir_changed = 0;
+ vnode_t new_cvp = newdp;
+ vnode_t new_rvp = newdp;
+ vnode_t old_cvp = NULL;
+ vnode_t old_rvp = NULL;
/*
* XXX Also needs to iterate each thread in the process to see if it
* XXX update that as well.
*/
+ /*
+ * First, with the proc_fdlock held, check to see if we will need
+ * to do any work. If not, we will get out fast.
+ */
proc_fdlock(p);
fdp = p->p_fd;
- if (fdp == (struct filedesc *)0) {
+ if (fdp == NULL ||
+ (fdp->fd_cdir != olddp && fdp->fd_rdir != olddp)) {
proc_fdunlock(p);
return PROC_RETURNED;
}
- fdp_cvp = fdp->fd_cdir;
- fdp_rvp = fdp->fd_rdir;
proc_fdunlock(p);
- if (fdp_cvp == olddp) {
- vnode_ref(newdp);
- tvp = fdp->fd_cdir;
- fdp_cvp = newdp;
- cdir_changed = 1;
- vnode_rele(tvp);
+ /*
+ * Ok, we will have to do some work. Always take two refs
+ * because we might need that many. We'll dispose of whatever
+ * we ended up not using.
+ */
+ if (vnode_ref(newdp) != 0) {
+ return PROC_RETURNED;
}
- if (fdp_rvp == olddp) {
- vnode_ref(newdp);
- tvp = fdp->fd_rdir;
- fdp_rvp = newdp;
- rdir_changed = 1;
- vnode_rele(tvp);
+ if (vnode_ref(newdp) != 0) {
+ vnode_rele(newdp);
+ return PROC_RETURNED;
}
- if (cdir_changed || rdir_changed) {
- proc_fdlock(p);
- fdp->fd_cdir = fdp_cvp;
- fdp->fd_rdir = fdp_rvp;
- proc_fdunlock(p);
+
+ proc_dirs_lock_exclusive(p);
+ /*
+ * Now do the work. Note: we dropped the proc_fdlock, so we
+ * have to do all of the checks again.
+ */
+ proc_fdlock(p);
+ fdp = p->p_fd;
+ if (fdp != NULL) {
+ if (fdp->fd_cdir == olddp) {
+ old_cvp = olddp;
+ fdp->fd_cdir = newdp;
+ new_cvp = NULL;
+ }
+ if (fdp->fd_rdir == olddp) {
+ old_rvp = olddp;
+ fdp->fd_rdir = newdp;
+ new_rvp = NULL;
+ }
+ }
+ proc_fdunlock(p);
+ proc_dirs_unlock_exclusive(p);
+
+ /*
+ * Dispose of any references that are no longer needed.
+ */
+ if (old_cvp != NULL) {
+ vnode_rele(old_cvp);
}
+ if (old_rvp != NULL) {
+ vnode_rele(old_rvp);
+ }
+ if (new_cvp != NULL) {
+ vnode_rele(new_cvp);
+ }
+ if (new_rvp != NULL) {
+ vnode_rele(new_rvp);
+ }
+
return PROC_RETURNED;
}
if (rootvnode == olddp) {
vnode_ref(newdp);
+ lck_rw_lock_exclusive(rootvnode_rw_lock);
tvp = rootvnode;
rootvnode = newdp;
+ lck_rw_unlock_exclusive(rootvnode_rw_lock);
vnode_rele(tvp);
}
return safedounmount(mp, flags, ctx);
}
+#define ROLE_ACCOUNT_UNMOUNT_ENTITLEMENT \
+ "com.apple.private.vfs.role-account-unmount"
/*
* The mount struct comes with a mount ref which will be consumed.
}
/*
- * Skip authorization if the mount is tagged as permissive and
- * this is not a forced-unmount attempt.
+ * Skip authorization in two cases:
+ * - If the process running the unmount has ROLE_ACCOUNT_UNMOUNT_ENTITLEMENT.
+ * This entitlement allows non-root processes unmount volumes mounted by
+ * other processes.
+ * - If the mount is tagged as permissive and this is not a forced-unmount
+ * attempt.
*/
- if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
+ if (!IOTaskHasEntitlement(current_task(), ROLE_ACCOUNT_UNMOUNT_ENTITLEMENT) &&
+ (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0)))) {
/*
* Only root, or the user that did the original mount is
* permitted to unmount this filesystem.
}
}
/*
- * Don't allow unmounting the root file system.
+ * Don't allow unmounting the root file system, or other volumes
+ * associated with it (for example, the associated VM or DATA mounts) .
*/
- if (mp->mnt_flag & MNT_ROOTFS) {
- error = EBUSY; /* the root is always busy */
+ if ((mp->mnt_flag & MNT_ROOTFS) || (mp->mnt_kern_flag & MNTK_SYSTEM)) {
+ error = EBUSY; /* the root (or associated volumes) is always busy */
goto out;
}
-#ifdef CONFIG_IMGSRC_ACCESS
+ /*
+ * If the mount is providing the root filesystem's disk image
+ * (i.e. imageboot), don't allow unmounting
+ */
if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
error = EBUSY;
goto out;
}
-#endif /* CONFIG_IMGSRC_ACCESS */
return dounmount(mp, flags, 1, ctx);
* If already an unmount in progress just return EBUSY.
* Even a forced unmount cannot override.
*/
- if (mp->mnt_lflag & MNT_LUNMOUNT) {
+ if (mp->mnt_lflag & (MNT_LUNMOUNT | MNT_LMOUNT)) {
if (withref != 0) {
mount_drop(mp, 1);
}
}
}
- /* free disk_conditioner_info structure for this mount */
- disk_conditioner_unmount(mp);
-
IOBSDMountChange(mp, kIOMountChangeUnmount);
#if CONFIG_TRIGGERS
wakeup((caddr_t)mp);
}
mount_refdrain(mp);
+
+ /* free disk_conditioner_info structure for this mount */
+ disk_conditioner_unmount(mp);
+
out:
if (mp->mnt_lflag & MNT_LWAIT) {
mp->mnt_lflag &= ~MNT_LWAIT;
#if CONFIG_MACF
mac_mount_label_destroy(mp);
#endif
- FREE_ZONE(mp, sizeof(struct mount), M_MOUNT);
+ zfree(mount_zone, mp);
} else {
panic("dounmount: no coveredvp");
}
TAILQ_FOREACH(smp, &mountlist, mnt_list)
count++;
fsids_sz = count * sizeof(fsid_t);
- MALLOC(fsids, fsid_t *, fsids_sz, M_TEMP, M_NOWAIT);
+ fsids = kheap_alloc(KHEAP_TEMP, fsids_sz, Z_NOWAIT);
if (fsids == NULL) {
mount_list_unlock();
goto out;
}
}
out:
- if (fsids) {
- FREE(fsids, M_TEMP);
- }
+ kheap_free(KHEAP_TEMP, fsids, fsids_sz);
}
void
#if CONFIG_MACF
mac_mount_label_destroy(mp);
#endif
- FREE_ZONE(mp, sizeof(struct mount), M_MOUNT);
+ zfree(mount_zone, mp);
return;
}
if (need_put) {
int print_vmpage_stat = 0;
+/*
+ * sync_callback: simple wrapper that calls VFS_SYNC() on volumes
+ * mounted read-write with the passed waitfor value.
+ *
+ * Parameters: mp mount-point descriptor per mounted file-system instance.
+ * arg user argument (please see below)
+ *
+ * User argument is a pointer to 32 bit unsigned integer which describes the
+ * type of waitfor value to set for calling VFS_SYNC(). If user argument is
+ * passed as NULL, VFS_SYNC() is called with MNT_NOWAIT set as the default
+ * waitfor value.
+ *
+ * Returns: VFS_RETURNED
+ */
static int
-sync_callback(mount_t mp, __unused void *arg)
+sync_callback(mount_t mp, void *arg)
{
if ((mp->mnt_flag & MNT_RDONLY) == 0) {
int asyncflag = mp->mnt_flag & MNT_ASYNC;
+ unsigned waitfor = MNT_NOWAIT;
+
+ if (arg) {
+ waitfor = *(uint32_t*)arg;
+ }
+
+ /* Sanity check for flags - these are the only valid combinations for the flag bits*/
+ if (waitfor != MNT_WAIT &&
+ waitfor != (MNT_WAIT | MNT_VOLUME) &&
+ waitfor != MNT_NOWAIT &&
+ waitfor != (MNT_NOWAIT | MNT_VOLUME) &&
+ waitfor != MNT_DWAIT &&
+ waitfor != (MNT_DWAIT | MNT_VOLUME)) {
+ panic("Passed inappropriate waitfor %u to "
+ "sync_callback()", waitfor);
+ }
mp->mnt_flag &= ~MNT_ASYNC;
- VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_kernel());
+ (void)VFS_SYNC(mp, waitfor, vfs_context_kernel());
if (asyncflag) {
mp->mnt_flag |= MNT_ASYNC;
}
if ((sync_type == SYNC_ONLY_RELIABLE_MEDIA) && !is_reliable) {
return VFS_RETURNED;
- } else if ((sync_type = SYNC_ONLY_UNRELIABLE_MEDIA) && is_reliable) {
+ } else if ((sync_type == SYNC_ONLY_UNRELIABLE_MEDIA) && is_reliable) {
return VFS_RETURNED;
}
}
#define SYNC_THREAD_RUN 0x0001
#define SYNC_THREAD_RUNNING 0x0002
+#if CONFIG_PHYS_WRITE_ACCT
+thread_t pm_sync_thread;
+#endif /* CONFIG_PHYS_WRITE_ACCT */
+
static void
sync_thread(__unused void *arg, __unused wait_result_t wr)
{
sync_type_t sync_type;
+#if CONFIG_PHYS_WRITE_ACCT
+ pm_sync_thread = current_thread();
+#endif /* CONFIG_PHYS_WRITE_ACCT */
lck_mtx_lock(sync_mtx_lck);
while (sync_thread_state & SYNC_THREAD_RUN) {
*/
wakeup(&sync_thread_state);
sync_thread_state &= ~SYNC_THREAD_RUNNING;
+#if CONFIG_PHYS_WRITE_ACCT
+ pm_sync_thread = NULL;
+#endif /* CONFIG_PHYS_WRITE_ACCT */
lck_mtx_unlock(sync_mtx_lck);
if (print_vmpage_stat) {
#endif /* DIAGNOSTIC */
}
-struct timeval sync_timeout_last_print = {0, 0};
+struct timeval sync_timeout_last_print = {.tv_sec = 0, .tv_usec = 0};
/*
* An in-kernel sync for power management to call.
thread_t thd;
int error;
int thread_created = FALSE;
- struct timespec ts = {sync_timeout_seconds, 0};
+ struct timespec ts = {.tv_sec = sync_timeout_seconds, .tv_nsec = 0};
lck_mtx_lock(sync_mtx_lck);
sync_thread_state |= SYNC_THREAD_RUN;
return error;
}
mp = nd.ni_vp->v_mount;
+ mount_ref(mp, 0);
vnode_put(nd.ni_vp);
nameidone(&nd);
case Q_QUOTAON:
/* uap->arg specifies a file from which to take the quotas */
fnamelen = MAXPATHLEN;
- datap = kalloc(MAXPATHLEN);
+ datap = zalloc(ZV_NAMEI);
error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
break;
case Q_GETQUOTA:
switch (quota_cmd) {
case Q_QUOTAON:
if (datap != NULL) {
- kfree(datap, MAXPATHLEN);
+ zfree(ZV_NAMEI, datap);
}
break;
case Q_GETQUOTA:
break;
} /* switch */
+ mount_drop(mp, 0);
return error;
}
#else
#if CONFIG_MACF
error = mac_mount_check_stat(ctx, mp);
if (error != 0) {
+ vnode_put(vp);
return error;
}
#endif
return error;
}
-/*
- * Common routine to handle copying of statfs64 data to user space
- */
-static int
-statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
-{
- int error;
- struct statfs64 sfs;
-
- bzero(&sfs, sizeof(sfs));
-
- sfs.f_bsize = sfsp->f_bsize;
- sfs.f_iosize = (int32_t)sfsp->f_iosize;
- sfs.f_blocks = sfsp->f_blocks;
- sfs.f_bfree = sfsp->f_bfree;
- sfs.f_bavail = sfsp->f_bavail;
- sfs.f_files = sfsp->f_files;
- sfs.f_ffree = sfsp->f_ffree;
- sfs.f_fsid = sfsp->f_fsid;
- sfs.f_owner = sfsp->f_owner;
- sfs.f_type = mp->mnt_vtable->vfc_typenum;
- sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
- sfs.f_fssubtype = sfsp->f_fssubtype;
+void
+vfs_get_statfs64(struct mount *mp, struct statfs64 *sfs)
+{
+ struct vfsstatfs *vsfs = &mp->mnt_vfsstat;
+
+ bzero(sfs, sizeof(*sfs));
+
+ sfs->f_bsize = vsfs->f_bsize;
+ sfs->f_iosize = (int32_t)vsfs->f_iosize;
+ sfs->f_blocks = vsfs->f_blocks;
+ sfs->f_bfree = vsfs->f_bfree;
+ sfs->f_bavail = vsfs->f_bavail;
+ sfs->f_files = vsfs->f_files;
+ sfs->f_ffree = vsfs->f_ffree;
+ sfs->f_fsid = vsfs->f_fsid;
+ sfs->f_owner = vsfs->f_owner;
+ sfs->f_type = mp->mnt_vtable->vfc_typenum;
+ sfs->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+ sfs->f_fssubtype = vsfs->f_fssubtype;
+ sfs->f_flags_ext = (mp->mnt_kern_flag & MNTK_SYSTEMDATA) ? MNT_EXT_ROOT_DATA_VOL : 0;
if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
- strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
+ strlcpy(&sfs->f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
} else {
- strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
+ strlcpy(&sfs->f_fstypename[0], &vsfs->f_fstypename[0], MFSTYPENAMELEN);
}
- strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
- strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
-
- error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
-
- return error;
+ strlcpy(&sfs->f_mntonname[0], &vsfs->f_mntonname[0], MAXPATHLEN);
+ strlcpy(&sfs->f_mntfromname[0], &vsfs->f_mntfromname[0], MAXPATHLEN);
}
/*
statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
{
struct mount *mp;
- struct vfsstatfs *sp;
int error;
- struct nameidata nd;
+ struct nameidata *ndp;
+ struct statfs64 *sfsp;
vfs_context_t ctxp = vfs_context_current();
vnode_t vp;
+ union {
+ struct nameidata nd;
+ struct statfs64 sfs;
+ } *__nameidata_statfs64;
- NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
+ __nameidata_statfs64 = kheap_alloc(KHEAP_TEMP, sizeof(*__nameidata_statfs64),
+ Z_WAITOK);
+ ndp = &__nameidata_statfs64->nd;
+
+ NDINIT(ndp, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
UIO_USERSPACE, uap->path, ctxp);
- error = namei(&nd);
+ error = namei(ndp);
if (error != 0) {
- return error;
+ goto out;
}
- vp = nd.ni_vp;
+ vp = ndp->ni_vp;
mp = vp->v_mount;
- sp = &mp->mnt_vfsstat;
- nameidone(&nd);
+ nameidone(ndp);
#if CONFIG_MACF
error = mac_mount_check_stat(ctxp, mp);
if (error != 0) {
- return error;
+ vnode_put(vp);
+ goto out;
}
#endif
error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
if (error != 0) {
vnode_put(vp);
- return error;
+ goto out;
}
- error = statfs64_common(mp, sp, uap->buf);
+ sfsp = &__nameidata_statfs64->sfs;
+ vfs_get_statfs64(mp, sfsp);
+ if ((mp->mnt_kern_flag & MNTK_SYSTEMDATA) &&
+ (p->p_vfs_iopolicy & P_VFS_IOPOLICY_STATFS_NO_DATA_VOLUME)) {
+ /* This process does not want to see a seperate data volume mountpoint */
+ strlcpy(&sfsp->f_mntonname[0], "/", sizeof("/"));
+ }
+ error = copyout(sfsp, uap->buf, sizeof(*sfsp));
vnode_put(vp);
+out:
+ kheap_free(KHEAP_TEMP, __nameidata_statfs64, sizeof(*__nameidata_statfs64));
+
return error;
}
{
struct vnode *vp;
struct mount *mp;
- struct vfsstatfs *sp;
+ struct statfs64 sfs;
int error;
AUDIT_ARG(fd, uap->fd);
}
#endif
- sp = &mp->mnt_vfsstat;
if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
goto out;
}
- error = statfs64_common(mp, sp, uap->buf);
+ vfs_get_statfs64(mp, &sfs);
+ if ((mp->mnt_kern_flag & MNTK_SYSTEMDATA) &&
+ (p->p_vfs_iopolicy & P_VFS_IOPOLICY_STATFS_NO_DATA_VOLUME)) {
+ /* This process does not want to see a seperate data volume mountpoint */
+ strlcpy(&sfs.f_mntonname[0], "/", sizeof("/"));
+ }
+ error = copyout(&sfs, uap->buf, sizeof(sfs));
out:
file_drop(uap->fd);
* If MNT_NOWAIT is specified, do not refresh the
* fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
*/
- if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
- (error = vfs_update_vfsstat(mp, ctx,
- VFS_USER_EVENT))) {
+ if ((mp->mnt_lflag & MNT_LDEAD) ||
+ (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
+ (!(mp->mnt_lflag & MNT_LUNMOUNT)) &&
+ (error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT)))) {
KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
return VFS_RETURNED;
}
size_t count, maxcount, bufsize, macsize;
struct getfsstat_struct fst;
+ if ((unsigned)uap->bufsize > INT_MAX || (unsigned)uap->macsize > INT_MAX) {
+ return EINVAL;
+ }
+
bufsize = (size_t) uap->bufsize;
macsize = (size_t) uap->macsize;
}
/* Copy in the array */
- MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
+ mp0 = kheap_alloc(KHEAP_TEMP, macsize, Z_WAITOK);
if (mp0 == NULL) {
return ENOMEM;
}
error = copyin(uap->mac, mp0, macsize);
if (error) {
- FREE(mp0, M_MACTEMP);
+ kheap_free(KHEAP_TEMP, mp0, macsize);
return error;
}
/* Normalize to an array of user_addr_t */
- MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
+ mp = kheap_alloc(KHEAP_TEMP, count * sizeof(user_addr_t), Z_WAITOK);
if (mp == NULL) {
- FREE(mp0, M_MACTEMP);
+ kheap_free(KHEAP_TEMP, mp0, macsize);
return ENOMEM;
}
mp[i] = (user_addr_t)mp0[i];
}
}
- FREE(mp0, M_MACTEMP);
+ kheap_free(KHEAP_TEMP, mp0, macsize);
}
#endif
fst.flags = uap->flags;
fst.count = 0;
fst.error = 0;
- fst.maxcount = maxcount;
+ fst.maxcount = (int)maxcount;
- vfs_iterate(0, getfsstat_callback, &fst);
+ vfs_iterate(VFS_ITERATE_NOSKIP_UNMOUNT, getfsstat_callback, &fst);
if (mp) {
- FREE(mp, M_MACTEMP);
+ kheap_free(KHEAP_TEMP, mp, count * sizeof(user_addr_t));
}
if (fst.error) {
{
struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
struct vfsstatfs *sp;
+ struct statfs64 sfs;
int error;
if (fstp->sfsp && fstp->count < fstp->maxcount) {
* getfsstat, since the constants are out of the same
* namespace.
*/
- if (((fstp->flags & MNT_NOWAIT) == 0 ||
- (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
- (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
+ if ((mp->mnt_lflag & MNT_LDEAD) ||
+ ((((fstp->flags & MNT_NOWAIT) == 0) || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
+ (!(mp->mnt_lflag & MNT_LUNMOUNT)) &&
+ (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)))) {
KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
return VFS_RETURNED;
}
- error = statfs64_common(mp, sp, fstp->sfsp);
+ vfs_get_statfs64(mp, &sfs);
+ error = copyout(&sfs, fstp->sfsp, sizeof(sfs));
if (error) {
fstp->error = error;
return VFS_RETURNED_DONE;
}
- fstp->sfsp += sizeof(struct statfs64);
+ fstp->sfsp += sizeof(sfs);
}
fstp->count++;
return VFS_RETURNED;
fst.error = 0;
fst.maxcount = maxcount;
- vfs_iterate(0, getfsstat64_callback, &fst);
+ vfs_iterate(VFS_ITERATE_NOSKIP_UNMOUNT, getfsstat64_callback, &fst);
if (fst.error) {
KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
vnode_t tdp;
vnode_t tvp;
struct mount *mp;
- int error;
+ int error, should_put = 1;
vfs_context_t ctx = vfs_context_current();
AUDIT_ARG(fd, uap->fd);
goto out;
}
vnode_put(vp);
+ should_put = 0;
if (per_thread) {
thread_t th = vfs_context_thread(ctx);
OSBitOrAtomic(P_THCWD, &p->p_flag);
} else {
vnode_rele(vp);
- return ENOENT;
+ error = ENOENT;
+ goto out;
}
} else {
+ proc_dirs_lock_exclusive(p);
proc_fdlock(p);
tvp = fdp->fd_cdir;
fdp->fd_cdir = vp;
proc_fdunlock(p);
+ proc_dirs_unlock_exclusive(p);
}
if (tvp) {
vnode_rele(tvp);
}
- file_drop(uap->fd);
- return 0;
out:
- vnode_put(vp);
+ if (should_put) {
+ vnode_put(vp);
+ }
file_drop(uap->fd);
return error;
return common_fchdir(p, (void *)uap, 1);
}
+
/*
* Change current working directory (".").
*
* vnode_ref:ENOENT No such file or directory
*/
/* ARGSUSED */
-static int
-common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
+int
+chdir_internal(proc_t p, vfs_context_t ctx, struct nameidata *ndp, int per_thread)
{
struct filedesc *fdp = p->p_fd;
int error;
- struct nameidata nd;
vnode_t tvp;
- vfs_context_t ctx = vfs_context_current();
- NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
- UIO_USERSPACE, uap->path, ctx);
- error = change_dir(&nd, ctx);
+ error = change_dir(ndp, ctx);
if (error) {
return error;
}
- if ((error = vnode_ref(nd.ni_vp))) {
- vnode_put(nd.ni_vp);
+ if ((error = vnode_ref(ndp->ni_vp))) {
+ vnode_put(ndp->ni_vp);
return error;
}
/*
* drop the iocount we picked up in change_dir
*/
- vnode_put(nd.ni_vp);
+ vnode_put(ndp->ni_vp);
if (per_thread) {
thread_t th = vfs_context_thread(ctx);
if (th) {
uthread_t uth = get_bsdthread_info(th);
tvp = uth->uu_cdir;
- uth->uu_cdir = nd.ni_vp;
+ uth->uu_cdir = ndp->ni_vp;
OSBitOrAtomic(P_THCWD, &p->p_flag);
} else {
- vnode_rele(nd.ni_vp);
+ vnode_rele(ndp->ni_vp);
return ENOENT;
}
} else {
+ proc_dirs_lock_exclusive(p);
proc_fdlock(p);
tvp = fdp->fd_cdir;
- fdp->fd_cdir = nd.ni_vp;
+ fdp->fd_cdir = ndp->ni_vp;
proc_fdunlock(p);
+ proc_dirs_unlock_exclusive(p);
}
if (tvp) {
/*
- * chdir
- *
- * Change current working directory (".") for the entire process
- *
- * Parameters: p Process requesting the call
- * uap User argument descriptor (see below)
- * retval (ignored)
+ * Change current working directory (".").
+ *
+ * Returns: 0 Success
+ * chdir_internal:ENOTDIR
+ * chdir_internal:ENOENT No such file or directory
+ * chdir_internal:???
+ */
+/* ARGSUSED */
+static int
+common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
+{
+ struct nameidata nd;
+ vfs_context_t ctx = vfs_context_current();
+
+ NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path, ctx);
+
+ return chdir_internal(p, ctx, &nd, per_thread);
+}
+
+
+/*
+ * chdir
+ *
+ * Change current working directory (".") for the entire process
+ *
+ * Parameters: p Process requesting the call
+ * uap User argument descriptor (see below)
+ * retval (ignored)
*
* Indirect parameters: uap->path Directory path
*
}
vnode_put(nd.ni_vp);
+ /*
+ * This lock provides the guarantee that as long as you hold the lock
+ * fdp->fd_rdir has a usecount on it. This is used to take an iocount
+ * on a referenced vnode in namei when determining the rootvnode for
+ * a process.
+ */
+ /* needed for synchronization with lookup */
+ proc_dirs_lock_exclusive(p);
+ /* needed for setting the flag and other activities on the fd itself */
proc_fdlock(p);
tvp = fdp->fd_rdir;
fdp->fd_rdir = nd.ni_vp;
fdp->fd_flags |= FD_CHROOT;
proc_fdunlock(p);
+ proc_dirs_unlock_exclusive(p);
if (tvp != NULL) {
vnode_rele(tvp);
return 0;
}
+#define PATHSTATICBUFLEN 256
+#define PIVOT_ROOT_ENTITLEMENT \
+ "com.apple.private.vfs.pivot-root"
+
+#if defined(XNU_TARGET_OS_OSX)
+int
+pivot_root(proc_t p, struct pivot_root_args *uap, __unused int *retval)
+{
+ int error;
+ char new_rootfs_path_before[PATHSTATICBUFLEN] = {0};
+ char old_rootfs_path_after[PATHSTATICBUFLEN] = {0};
+ char *new_rootfs_path_before_buf = NULL;
+ char *old_rootfs_path_after_buf = NULL;
+ char *incoming = NULL;
+ char *outgoing = NULL;
+ vnode_t incoming_rootvp = NULLVP;
+ size_t bytes_copied;
+
+ /*
+ * XXX : Additional restrictions needed
+ * - perhaps callable only once.
+ */
+ if ((error = suser(kauth_cred_get(), &p->p_acflag))) {
+ return error;
+ }
+
+ /*
+ * pivot_root can be executed by launchd only.
+ * Enforce entitlement.
+ */
+ if ((p->p_pid != 1) || !IOTaskHasEntitlement(current_task(), PIVOT_ROOT_ENTITLEMENT)) {
+ return EPERM;
+ }
+
+ error = copyinstr(uap->new_rootfs_path_before, &new_rootfs_path_before[0], PATHSTATICBUFLEN, &bytes_copied);
+ if (error == ENAMETOOLONG) {
+ new_rootfs_path_before_buf = zalloc_flags(ZV_NAMEI, Z_WAITOK);
+ error = copyinstr(uap->new_rootfs_path_before, new_rootfs_path_before_buf, MAXPATHLEN, &bytes_copied);
+ }
+
+ if (error) {
+ goto out;
+ }
+
+ error = copyinstr(uap->old_rootfs_path_after, &old_rootfs_path_after[0], PATHSTATICBUFLEN, &bytes_copied);
+ if (error == ENAMETOOLONG) {
+ old_rootfs_path_after_buf = zalloc_flags(ZV_NAMEI, Z_WAITOK);
+ error = copyinstr(uap->old_rootfs_path_after, old_rootfs_path_after_buf, MAXPATHLEN, &bytes_copied);
+ }
+ if (error) {
+ goto out;
+ }
+
+ if (new_rootfs_path_before_buf) {
+ incoming = new_rootfs_path_before_buf;
+ } else {
+ incoming = &new_rootfs_path_before[0];
+ }
+
+ if (old_rootfs_path_after_buf) {
+ outgoing = old_rootfs_path_after_buf;
+ } else {
+ outgoing = &old_rootfs_path_after[0];
+ }
+
+ /*
+ * The proposed incoming FS MUST be authenticated (i.e. not a chunklist DMG).
+ * Userland is not allowed to pivot to an image.
+ */
+ error = vnode_lookup(incoming, 0, &incoming_rootvp, vfs_context_kernel());
+ if (error) {
+ goto out;
+ }
+ error = VNOP_IOCTL(incoming_rootvp, FSIOC_KERNEL_ROOTAUTH, NULL, 0, vfs_context_kernel());
+ if (error) {
+ goto out;
+ }
+
+ error = vfs_switch_root(incoming, outgoing, VFSSR_VIRTUALDEV_PROHIBITED);
+
+out:
+ if (incoming_rootvp != NULLVP) {
+ vnode_put(incoming_rootvp);
+ incoming_rootvp = NULLVP;
+ }
+
+ if (old_rootfs_path_after_buf) {
+ zfree(ZV_NAMEI, old_rootfs_path_after_buf);
+ }
+
+ if (new_rootfs_path_before_buf) {
+ zfree(ZV_NAMEI, new_rootfs_path_before_buf);
+ }
+
+ return error;
+}
+#else
+int
+pivot_root(proc_t p, __unused struct pivot_root_args *uap, int *retval)
+{
+ return nosys(p, NULL, retval);
+}
+#endif /* XNU_TARGET_OS_OSX */
+
/*
* Common routine for chroot and chdir.
*
struct fd_vn_data *fvdata;
/* Allocate per fd vnode data */
- MALLOC(fvdata, struct fd_vn_data *, (sizeof(struct fd_vn_data)),
- M_FD_VN_DATA, M_WAITOK | M_ZERO);
+ fvdata = kheap_alloc(KM_FD_VN_DATA, sizeof(struct fd_vn_data),
+ Z_WAITOK | Z_ZERO);
lck_mtx_init(&fvdata->fv_lock, fd_vn_lck_grp, fd_vn_lck_attr);
return fvdata;
}
{
struct fd_vn_data *fvdata = (struct fd_vn_data *)fgvndata;
- if (fvdata->fv_buf) {
- FREE(fvdata->fv_buf, M_FD_DIRBUF);
- }
+ kheap_free(KHEAP_DATA_BUFFERS, fvdata->fv_buf, fvdata->fv_bufallocsiz);
lck_mtx_destroy(&fvdata->fv_lock, fd_vn_lck_grp);
- FREE(fvdata, M_FD_VN_DATA);
+ kheap_free(KM_FD_VN_DATA, fvdata, sizeof(struct fd_vn_data));
}
/*
vnode_t vp;
int flags, oflags;
int type, indx, error;
- struct flock lf;
struct vfs_context context;
oflags = uflags;
uu->uu_dupfd = 0;
vp = ndp->ni_vp;
- fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY | FENCRYPTED | FUNENCRYPTED);
- fp->f_fglob->fg_ops = &vnops;
- fp->f_fglob->fg_data = (caddr_t)vp;
+ fp->fp_glob->fg_flag = flags & (FMASK | O_EVTONLY | FENCRYPTED | FUNENCRYPTED);
+ fp->fp_glob->fg_ops = &vnops;
+ fp->fp_glob->fg_data = (caddr_t)vp;
if (flags & (O_EXLOCK | O_SHLOCK)) {
- lf.l_whence = SEEK_SET;
- lf.l_start = 0;
- lf.l_len = 0;
+ struct flock lf = {
+ .l_whence = SEEK_SET,
+ };
+
if (flags & O_EXLOCK) {
lf.l_type = F_WRLCK;
} else {
type |= F_WAIT;
}
#if CONFIG_MACF
- error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
+ error = mac_file_check_lock(vfs_context_ucred(ctx), fp->fp_glob,
F_SETLK, &lf);
if (error) {
goto bad;
}
#endif
- if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL))) {
+ if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_SETLK, &lf, type, ctx, NULL))) {
goto bad;
}
- fp->f_fglob->fg_flag |= FHASLOCK;
- }
-
-#if DEVELOPMENT || DEBUG
- /*
- * XXX VSWAP: Check for entitlements or special flag here
- * so we can restrict access appropriately.
- */
-#else /* DEVELOPMENT || DEBUG */
-
- if (vnode_isswap(vp) && (flags & (FWRITE | O_TRUNC)) && (ctx != vfs_context_kernel())) {
- /* block attempt to write/truncate swapfile */
- error = EPERM;
- goto bad;
+ fp->fp_glob->fg_flag |= FWASLOCKED;
}
-#endif /* DEVELOPMENT || DEBUG */
/* try to truncate by setting the size attribute */
if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0)) {
* For directories we hold some additional information in the fd.
*/
if (vnode_vtype(vp) == VDIR) {
- fp->f_fglob->fg_vn_data = fg_vn_data_alloc();
+ fp->fp_glob->fg_vn_data = fg_vn_data_alloc();
} else {
- fp->f_fglob->fg_vn_data = NULL;
+ fp->fp_glob->fg_vn_data = NULL;
}
vnode_put(vp);
!(flags & O_NOCTTY)) {
int tmp = 0;
- (void)(*fp->f_fglob->fg_ops->fo_ioctl)(fp, (int)TIOCSCTTY,
+ (void)(*fp->fp_glob->fg_ops->fo_ioctl)(fp, (int)TIOCSCTTY,
(caddr_t)&tmp, ctx);
}
#if CONFIG_SECLUDED_MEMORY
if (secluded_for_filecache &&
- FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE &&
+ FILEGLOB_DTYPE(fp->fp_glob) == DTYPE_VNODE &&
vnode_vtype(vp) == VREG) {
memory_object_control_t moc;
if (moc == MEMORY_OBJECT_CONTROL_NULL) {
/* nothing to do... */
- } else if (fp->f_fglob->fg_flag & FWRITE) {
+ } else if (fp->fp_glob->fg_flag & FWRITE) {
/* writable -> no longer eligible for secluded pages */
memory_object_mark_eligible_for_secluded(moc,
FALSE);
size_t copied;
/* XXX FBDP: better way to detect /Applications/ ? */
if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
- copyinstr(ndp->ni_dirp,
+ (void)copyinstr(ndp->ni_dirp,
pathname,
sizeof(pathname),
&copied);
#else
/* not implemented... */
#endif
- if (!strncmp(vp->v_name,
- DYLD_SHARED_CACHE_NAME,
- strlen(DYLD_SHARED_CACHE_NAME)) ||
- !strncmp(vp->v_name,
- "dyld",
- strlen(vp->v_name)) ||
- !strncmp(vp->v_name,
- "launchd",
- strlen(vp->v_name)) ||
- !strncmp(vp->v_name,
- "Camera",
- strlen(vp->v_name)) ||
- !strncmp(vp->v_name,
- "mediaserverd",
- strlen(vp->v_name)) ||
- !strncmp(vp->v_name,
- "SpringBoard",
- strlen(vp->v_name)) ||
- !strncmp(vp->v_name,
- "backboardd",
- strlen(vp->v_name))) {
+ size_t len = strlen(vp->v_name);
+ if (!strncmp(vp->v_name, DYLD_SHARED_CACHE_NAME, len) ||
+ !strncmp(vp->v_name, "dyld", len) ||
+ !strncmp(vp->v_name, "launchd", len) ||
+ !strncmp(vp->v_name, "Camera", len) ||
+ !strncmp(vp->v_name, "mediaserverd", len) ||
+ !strncmp(vp->v_name, "SpringBoard", len) ||
+ !strncmp(vp->v_name, "backboardd", len)) {
/*
* This file matters when launching Camera:
* do not store its contents in the secluded
return 0;
bad:
context = *vfs_context_current();
- context.vc_ucred = fp->f_fglob->fg_cred;
+ context.vc_ucred = fp->fp_glob->fg_cred;
- if ((fp->f_fglob->fg_flag & FHASLOCK) &&
- (FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE)) {
- lf.l_whence = SEEK_SET;
- lf.l_start = 0;
- lf.l_len = 0;
- lf.l_type = F_UNLCK;
+ if ((fp->fp_glob->fg_flag & FWASLOCKED) &&
+ (FILEGLOB_DTYPE(fp->fp_glob) == DTYPE_VNODE)) {
+ struct flock lf = {
+ .l_whence = SEEK_SET,
+ .l_type = F_UNLCK,
+ };
(void)VNOP_ADVLOCK(
- vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
+ vp, (caddr_t)fp->fp_glob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
}
- vn_close(vp, fp->f_fglob->fg_flag, &context);
+ vn_close(vp, fp->fp_glob->fg_flag, &context);
vnode_put(vp);
fp_free(p, indx, fp);
VATTR_INIT(&va);
cmode = ((uap->mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
- VATTR_SET(&va, va_mode, cmode);
+ VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
if (uap->uid != KAUTH_UID_NONE) {
VATTR_SET(&va, va_uid, uap->uid);
}
int fd, enum uio_seg segflg, int *retval)
{
struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
- struct vnode_attr va;
- struct nameidata nd;
+ struct {
+ struct vnode_attr va;
+ struct nameidata nd;
+ } *__open_data;
+ struct vnode_attr *vap;
+ struct nameidata *ndp;
int cmode;
+ int error;
- VATTR_INIT(&va);
+ __open_data = kheap_alloc(KHEAP_TEMP, sizeof(*__open_data), Z_WAITOK);
+ vap = &__open_data->va;
+ ndp = &__open_data->nd;
+
+ VATTR_INIT(vap);
/* Mask off all but regular access permissions */
cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
- VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
+ VATTR_SET(vap, va_mode, cmode & ACCESSPERMS);
- NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1,
+ NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1,
segflg, path, ctx);
- return open1at(ctx, &nd, flags, &va, fileproc_alloc_init, NULL,
- retval, fd);
+ error = open1at(ctx, ndp, flags, vap, fileproc_alloc_init, NULL,
+ retval, fd);
+
+ kheap_free(KHEAP_TEMP, __open_data, sizeof(*__open_data));
+
+ return error;
}
int
/*resolve path from fsis, objid*/
do {
- MALLOC(buf, char *, buflen + 1, M_TEMP, M_WAITOK);
+ buf = kheap_alloc(KHEAP_TEMP, buflen + 1, Z_WAITOK);
if (buf == NULL) {
return ENOMEM;
}
- error = fsgetpath_internal(
- ctx, fsid.val[0], objid,
- buflen, buf, &pathlen);
+ error = fsgetpath_internal( ctx, fsid.val[0], objid, buflen,
+ buf, FSOPT_ISREALFSID, &pathlen);
if (error) {
- FREE(buf, M_TEMP);
+ kheap_free(KHEAP_TEMP, buf, buflen + 1);
buf = NULL;
}
} while (error == ENOSPC && (buflen += MAXPATHLEN));
error = openat_internal(
ctx, (user_addr_t)buf, uap->oflags, 0, AT_FDCWD, UIO_SYSSPACE, retval);
- FREE(buf, M_TEMP);
+ kheap_free(KHEAP_TEMP, buf, buflen + 1);
return error;
}
return mkfifo1(ctx, uap->path, &va);
}
- AUDIT_ARG(mode, uap->mode);
+ AUDIT_ARG(mode, (mode_t)uap->mode);
AUDIT_ARG(value32, uap->dev);
if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
return mkfifo1(vfs_context_current(), uap->path, &va);
}
-
-static char *
-my_strrchr(char *p, int ch)
-{
- char *save;
-
- for (save = NULL;; ++p) {
- if (*p == ch) {
- save = p;
- }
- if (!*p) {
- return save;
- }
- }
- /* NOTREACHED */
-}
-
+extern int safe_getpath_new(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path, int firmlink);
extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
+extern int safe_getpath_no_firmlink(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
int
-safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
+safe_getpath_new(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path, int firmlink)
{
int ret, len = _len;
*truncated_path = 0;
- ret = vn_getpath(dvp, path, &len);
+
+ if (firmlink) {
+ ret = vn_getpath(dvp, path, &len);
+ } else {
+ ret = vn_getpath_no_firmlink(dvp, path, &len);
+ }
if (ret == 0 && len < (MAXPATHLEN - 1)) {
if (leafname) {
path[len - 1] = '/';
// the string got truncated!
*truncated_path = 1;
- ptr = my_strrchr(path, '/');
+ ptr = strrchr(path, '/');
if (ptr) {
*ptr = '\0'; // chop off the string at the last directory component
}
- len = strlen(path) + 1;
+ len = (int)strlen(path) + 1;
}
}
} else if (ret == 0) {
}
len = _len;
- ret = vn_getpath(mydvp, path, &len);
+ if (firmlink) {
+ ret = vn_getpath(mydvp, path, &len);
+ } else {
+ ret = vn_getpath_no_firmlink(mydvp, path, &len);
+ }
} while (ret == ENOSPC);
}
return len;
}
+int
+safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
+{
+ return safe_getpath_new(dvp, leafname, path, _len, truncated_path, 1);
+}
+
+int
+safe_getpath_no_firmlink(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
+{
+ return safe_getpath_new(dvp, leafname, path, _len, truncated_path, 0);
+}
/*
* Make a hard file link.
linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
user_addr_t link, int flag, enum uio_seg segflg)
{
- vnode_t vp, dvp, lvp;
+ vnode_t vp, pvp, dvp, lvp;
struct nameidata nd;
int follow;
int error;
/* build the path to the new link file */
GET_PATH(target_path);
- if (target_path == NULL) {
- error = ENOMEM;
- goto out2;
- }
len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
if (has_listeners) {
/* build the path to file we are linking to */
GET_PATH(link_to_path);
- if (link_to_path == NULL) {
- error = ENOMEM;
- goto out2;
- }
link_name_len = MAXPATHLEN;
if (vn_getpath(vp, link_to_path, &link_name_len) == 0) {
FSE_ARG_FINFO, &finfo,
FSE_ARG_DONE);
}
- if (vp->v_parent) {
+
+ pvp = vp->v_parent;
+ // need an iocount on pvp in this case
+ if (pvp && pvp != dvp) {
+ error = vnode_get(pvp);
+ if (error) {
+ pvp = NULLVP;
+ error = 0;
+ }
+ }
+ if (pvp) {
add_fsevent(FSE_STAT_CHANGED, ctx,
- FSE_ARG_VNODE, vp->v_parent,
- FSE_ARG_DONE);
+ FSE_ARG_VNODE, pvp, FSE_ARG_DONE);
+ }
+ if (pvp && pvp != dvp) {
+ vnode_put(pvp);
}
}
#endif
error = 0;
if (UIO_SEG_IS_USER_SPACE(segflg)) {
- MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
+ path = zalloc(ZV_NAMEI);
error = copyinstr(path_data, path, MAXPATHLEN, &dummy);
} else {
path = (char *)path_data;
error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
}
-#if CONFIG_MACF
+ /* do fallback attribute handling */
if (error == 0 && vp) {
- error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
+ error = vnode_setattr_fallback(vp, &va, ctx);
}
-#endif
- /* do fallback attribute handling */
+#if CONFIG_MACF
if (error == 0 && vp) {
- error = vnode_setattr_fallback(vp, &va, ctx);
+ error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
}
+#endif
if (error == 0) {
int update_flags = 0;
vnode_put(dvp);
out:
if (path && (path != (char *)path_data)) {
- FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
+ zfree(ZV_NAMEI, path);
}
return error;
int error;
struct componentname *cnp;
char *path = NULL;
- int len = 0;
+ char *no_firmlink_path = NULL;
+ int len_path = 0;
+ int len_no_firmlink_path = 0;
#if CONFIG_FSE
fse_info finfo;
struct vnode_attr va;
int need_event;
int has_listeners;
int truncated_path;
+ int truncated_no_firmlink_path;
int batched;
struct vnode_attr *vap;
int do_retry;
need_event = 0;
has_listeners = 0;
truncated_path = 0;
+ truncated_no_firmlink_path = 0;
vap = NULL;
NDINIT(&nd, DELETE, OP_UNLINK, cn_flags, segflg, path_arg, ctx);
/*
* The root of a mounted filesystem cannot be deleted.
*/
- if (vp->v_flag & VROOT) {
+ if ((vp->v_flag & VROOT) || (dvp->v_mount != vp->v_mount)) {
error = EBUSY;
+ goto out;
}
#if DEVELOPMENT || DEBUG
error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
if (error) {
if (error == ENOENT) {
- assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
do_retry = 1;
retry_count++;
if (need_event || has_listeners) {
if (path == NULL) {
GET_PATH(path);
- if (path == NULL) {
- error = ENOMEM;
- goto out;
- }
}
- len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
+ len_path = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
+ if (no_firmlink_path == NULL) {
+ GET_PATH(no_firmlink_path);
+ }
+ len_no_firmlink_path = safe_getpath_no_firmlink(dvp, nd.ni_cnd.cn_nameptr, no_firmlink_path, MAXPATHLEN, &truncated_no_firmlink_path);
}
#if NAMEDRSRCFORK
}
goto continue_lookup;
} else if (error == ENOENT && batched) {
- assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
/*
* For compound VNOPs, the authorization callback may
finfo.mode |= FSE_TRUNCATED_PATH;
}
add_fsevent(FSE_DELETE, ctx,
- FSE_ARG_STRING, len, path,
+ FSE_ARG_STRING, len_no_firmlink_path, no_firmlink_path,
FSE_ARG_FINFO, &finfo,
FSE_ARG_DONE);
}
out:
if (path != NULL) {
RELEASE_PATH(path);
+ path = NULL;
}
+ if (no_firmlink_path != NULL) {
+ RELEASE_PATH(no_firmlink_path);
+ no_firmlink_path = NULL;
+ }
#if NAMEDRSRCFORK
/* recycle the deleted rsrc fork vnode to force a reclaim, which
* will cause its shadow file to go away if necessary.
int
unlinkat(__unused proc_t p, struct unlinkat_args *uap, __unused int32_t *retval)
{
- if (uap->flag & ~AT_REMOVEDIR) {
+ if (uap->flag & ~(AT_REMOVEDIR | AT_REMOVEDIR_DATALESS)) {
return EINVAL;
}
- if (uap->flag & AT_REMOVEDIR) {
+ if (uap->flag & (AT_REMOVEDIR | AT_REMOVEDIR_DATALESS)) {
+ int unlink_flags = 0;
+
+ if (uap->flag & AT_REMOVEDIR_DATALESS) {
+ unlink_flags |= VNODE_REMOVE_DATALESS_DIR;
+ }
return rmdirat_internal(vfs_context_current(), uap->fd,
- uap->path, UIO_USERSPACE);
+ uap->path, UIO_USERSPACE, unlink_flags);
} else {
return unlinkat_internal(vfs_context_current(), uap->fd,
NULLVP, uap->path, UIO_USERSPACE, 0);
#if CONFIG_MACF
if (uap->whence == L_INCR && uap->offset == 0) {
error = mac_file_check_get_offset(vfs_context_ucred(ctx),
- fp->f_fglob);
+ fp->fp_glob);
} else {
error = mac_file_check_change_offset(vfs_context_ucred(ctx),
- fp->f_fglob);
+ fp->fp_glob);
}
if (error) {
file_drop(uap->fd);
switch (uap->whence) {
case L_INCR:
- offset += fp->f_fglob->fg_offset;
+ offset += fp->fp_glob->fg_offset;
break;
case L_XTND:
if ((error = vnode_size(vp, &file_size, ctx)) != 0) {
error = EINVAL;
} else {
/* Success */
- fp->f_fglob->fg_offset = offset;
- *retval = fp->f_fglob->fg_offset;
+ fp->fp_glob->fg_offset = offset;
+ *retval = fp->fp_glob->fg_offset;
}
}
}
errno_t *result = NULL;
errno_t error = 0;
int wantdelete = 0;
- unsigned int desc_max, desc_actual, i, j;
+ size_t desc_max, desc_actual;
+ unsigned int i, j;
struct vfs_context context;
struct nameidata nd;
int niopts;
if (uap->size <= sizeof(stack_input)) {
input = stack_input;
} else {
- MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
+ input = kheap_alloc(KHEAP_DATA_BUFFERS, uap->size, Z_WAITOK);
if (input == NULL) {
error = ENOMEM;
goto out;
error = ENOMEM;
goto out;
}
- MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK | M_ZERO);
+ result = kheap_alloc(KHEAP_DATA_BUFFERS, desc_actual * sizeof(errno_t),
+ Z_WAITOK | Z_ZERO);
if (result == NULL) {
error = ENOMEM;
goto out;
out:
if (input && input != stack_input) {
- FREE(input, M_TEMP);
+ kheap_free(KHEAP_DATA_BUFFERS, input, uap->size);
}
if (result) {
- FREE(result, M_TEMP);
+ kheap_free(KHEAP_DATA_BUFFERS, result, desc_actual * sizeof(errno_t));
}
if (vp) {
vnode_put(vp);
context.vc_thread = ctx->vc_thread;
- niopts = FOLLOW | AUDITVNPATH1;
+ niopts = (flag & AT_SYMLINK_NOFOLLOW ? NOFOLLOW : FOLLOW) | AUDITVNPATH1;
/* need parent for vnode_authorize for deletion test */
if (amode & _DELETE_OK) {
niopts |= WANTPARENT;
faccessat(__unused proc_t p, struct faccessat_args *uap,
__unused int32_t *retval)
{
- if (uap->flag & ~AT_EACCESS) {
+ if (uap->flag & ~(AT_EACCESS | AT_SYMLINK_NOFOLLOW)) {
return EINVAL;
}
kauth_filesec_t fsec;
size_t xsecurity_bufsize;
void * statptr;
+ struct fileproc *fp = NULL;
+ int needsrealdev = 0;
follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
NDINIT(&nd, LOOKUP, OP_GETATTR, follow | AUDITVNPATH1,
/* stat calls are allowed for resource forks. */
nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
#endif
- error = nameiat(&nd, fd);
- if (error) {
- return error;
+
+ if (flag & AT_FDONLY) {
+ vnode_t fvp;
+
+ error = fp_getfvp(vfs_context_proc(ctx), fd, &fp, &fvp);
+ if (error) {
+ return error;
+ }
+ if ((error = vnode_getwithref(fvp))) {
+ file_drop(fd);
+ return error;
+ }
+ nd.ni_vp = fvp;
+ } else {
+ error = nameiat(&nd, fd);
+ if (error) {
+ return error;
+ }
}
fsec = KAUTH_FILESEC_NONE;
}
#endif
- error = vn_stat(nd.ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
+ needsrealdev = flag & AT_REALDEV ? 1 : 0;
+ if (fp && (xsecurity == USER_ADDR_NULL)) {
+ /*
+ * If the caller has the file open, and is not
+ * requesting extended security information, we are
+ * going to let them get the basic stat information.
+ */
+ error = vn_stat_noauth(nd.ni_vp, statptr, NULL, isstat64, needsrealdev, ctx,
+ fp->fp_glob->fg_cred);
+ } else {
+ error = vn_stat(nd.ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL),
+ isstat64, needsrealdev, ctx);
+ }
#if NAMEDRSRCFORK
if (is_namedstream) {
#endif
vnode_put(nd.ni_vp);
nameidone(&nd);
+ if (fp) {
+ file_drop(fd);
+ fp = NULL;
+ }
if (error) {
return error;
int
fstatat(__unused proc_t p, struct fstatat_args *uap, __unused int32_t *retval)
{
- if (uap->flag & ~AT_SYMLINK_NOFOLLOW) {
+ if (uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_REALDEV | AT_FDONLY)) {
return EINVAL;
}
fstatat64(__unused proc_t p, struct fstatat64_args *uap,
__unused int32_t *retval)
{
- if (uap->flag & ~AT_SYMLINK_NOFOLLOW) {
+ if (uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_REALDEV | AT_FDONLY)) {
return EINVAL;
}
struct nameidata nd;
char uio_buf[UIO_SIZEOF(1)];
+ if (bufsize > INT32_MAX) {
+ return EINVAL;
+ }
+
NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
seg, path, ctx);
}
vnode_put(vp);
- *retval = bufsize - (int)uio_resid(auio);
+ *retval = (int)(bufsize - uio_resid(auio));
return error;
}
}
/*
- * Change file flags.
- *
- * NOTE: this will vnode_put() `vp'
+ * Change file flags, the deep inner layer.
*/
static int
-chflags1(vnode_t vp, int flags, vfs_context_t ctx)
+chflags0(vnode_t vp, struct vnode_attr *va,
+ int (*setattr)(vnode_t, void *, vfs_context_t),
+ void *arg, vfs_context_t ctx)
{
- struct vnode_attr va;
- kauth_action_t action;
+ kauth_action_t action = 0;
int error;
- VATTR_INIT(&va);
- VATTR_SET(&va, va_flags, flags);
-
#if CONFIG_MACF
- error = mac_vnode_check_setflags(ctx, vp, flags);
+ error = mac_vnode_check_setflags(ctx, vp, va->va_flags);
if (error) {
goto out;
}
#endif
/* request authorisation, disregard immutability */
- if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
+ if ((error = vnode_authattr(vp, va, &action, ctx)) != 0) {
goto out;
}
/*
if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0)) {
goto out;
}
- error = vnode_setattr(vp, &va, ctx);
+ error = (*setattr)(vp, arg, ctx);
#if CONFIG_MACF
if (error == 0) {
- mac_vnode_notify_setflags(ctx, vp, flags);
+ mac_vnode_notify_setflags(ctx, vp, va->va_flags);
}
#endif
+out:
+ return error;
+}
+
+/*
+ * Change file flags.
+ *
+ * NOTE: this will vnode_put() `vp'
+ */
+static int
+chflags1(vnode_t vp, int flags, vfs_context_t ctx)
+{
+ struct vnode_attr va;
+ int error;
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_flags, flags);
+
+ error = chflags0(vp, &va, (void *)vnode_setattr, &va, ctx);
+ vnode_put(vp);
+
if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
error = ENOTSUP;
}
-out:
- vnode_put(vp);
+
return error;
}
if (error) {
return error;
}
- TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
- TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
+ TIMEVAL64_TO_TIMESPEC(&tv[0], &tsp[0]);
+ TIMEVAL64_TO_TIMESPEC(&tv[1], &tsp[1]);
} else {
struct user32_timeval tv[2];
error = copyin(usrtvp, (void *)tv, sizeof(tv));
*/
/* ARGSUSED */
int
-truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
+truncate(proc_t p, struct truncate_args *uap, __unused int32_t *retval)
{
vnode_t vp;
struct vnode_attr va;
int error;
struct nameidata nd;
kauth_action_t action;
+ rlim_t fsize_limit;
if (uap->length < 0) {
return EINVAL;
}
+
+ fsize_limit = proc_limitgetcur(p, RLIMIT_FSIZE, TRUE);
+ if ((rlim_t)uap->length > fsize_limit) {
+ psignal(p, SIGXFSZ);
+ return EFBIG;
+ }
+
NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
UIO_USERSPACE, uap->path, ctx);
if ((error = namei(&nd))) {
struct fileproc *fp;
int error;
int fd = uap->fd;
+ rlim_t fsize_limit;
AUDIT_ARG(fd, uap->fd);
if (uap->length < 0) {
return EINVAL;
}
+ fsize_limit = proc_limitgetcur(p, RLIMIT_FSIZE, TRUE);
+ if ((rlim_t)uap->length > fsize_limit) {
+ psignal(p, SIGXFSZ);
+ return EFBIG;
+ }
+
if ((error = fp_lookup(p, fd, &fp, 0))) {
return error;
}
- switch (FILEGLOB_DTYPE(fp->f_fglob)) {
+ switch (FILEGLOB_DTYPE(fp->fp_glob)) {
case DTYPE_PSXSHM:
error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
goto out;
goto out;
}
- vp = (vnode_t)fp->f_fglob->fg_data;
+ vp = (vnode_t)fp->fp_glob->fg_data;
- if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
+ if ((fp->fp_glob->fg_flag & FWRITE) == 0) {
AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
error = EINVAL;
goto out;
#if CONFIG_MACF
error = mac_vnode_check_truncate(ctx,
- fp->f_fglob->fg_cred, vp);
+ fp->fp_glob->fg_cred, vp);
if (error) {
(void)vnode_put(vp);
goto out;
#if CONFIG_MACF
if (error == 0) {
- mac_vnode_notify_truncate(ctx, fp->f_fglob->fg_cred, vp);
+ mac_vnode_notify_truncate(ctx, fp->fp_glob->fg_cred, vp);
}
#endif
(vp->v_flag & VISNAMEDSTREAM) &&
(vp->v_parent != NULLVP) &&
vnode_isshadow(vp) &&
- (fp->f_flags & FP_WRITTEN)) {
+ (fp->fp_glob->fg_flag & FWASWRITTEN)) {
(void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
}
#endif
int fsevent;
#endif /* CONFIG_FSE */
-#if CONFIG_MACF
- (void)vnode_label(vnode_mount(tvp), tdvp, tvp, cnp,
- VNODE_LABEL_CREATE, ctx);
-#endif
/*
* If some of the requested attributes weren't handled by the
* VNOP, use our fallback code.
(void)vnode_setattr_fallback(tvp, &nva, ctx);
}
+#if CONFIG_MACF
+ (void)vnode_label(vnode_mount(tvp), tdvp, tvp, cnp,
+ VNODE_LABEL_CREATE, ctx);
+#endif
+
// Make sure the name & parent pointers are hooked up
if (tvp->v_name == NULL) {
update_flags |= VNODE_UPDATE_NAME;
return error;
}
- if ((fp->f_fglob->fg_flag & FREAD) == 0) {
+ if ((fp->fp_glob->fg_flag & FREAD) == 0) {
AUDIT_ARG(vnpath_withref, fvp, ARG_VNODE1);
error = EBADF;
goto out;
return error;
}
-/*
- * Rename files. Source and destination must either both be directories,
- * or both not be directories. If target is a directory, it must be empty.
- */
-/* ARGSUSED */
static int
-renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from,
- int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags)
+rename_submounts_callback(mount_t mp, void *arg)
{
- if (flags & ~VFS_RENAME_FLAGS_MASK) {
- return EINVAL;
+ int error = 0;
+ mount_t pmp = (mount_t)arg;
+ int prefix_len = (int)strlen(pmp->mnt_vfsstat.f_mntonname);
+
+ if (strncmp(mp->mnt_vfsstat.f_mntonname, pmp->mnt_vfsstat.f_mntonname, prefix_len) != 0) {
+ return 0;
}
- if (ISSET(flags, VFS_RENAME_SWAP) && ISSET(flags, VFS_RENAME_EXCL)) {
- return EINVAL;
+ if (mp->mnt_vfsstat.f_mntonname[prefix_len] != '/') {
+ return 0;
}
- vnode_t tvp, tdvp;
+ if ((error = vfs_busy(mp, LK_NOWAIT))) {
+ printf("vfs_busy failed with %d for %s\n", error, mp->mnt_vfsstat.f_mntonname);
+ return -1;
+ }
+
+ int pathlen = MAXPATHLEN;
+ if ((error = vn_getpath_ext(mp->mnt_vnodecovered, NULL, mp->mnt_vfsstat.f_mntonname, &pathlen, VN_GETPATH_FSENTER))) {
+ printf("vn_getpath_ext failed with %d for mnt_vnodecovered of %s\n", error, mp->mnt_vfsstat.f_mntonname);
+ }
+
+ vfs_unbusy(mp);
+
+ return error;
+}
+
+/*
+ * Rename files. Source and destination must either both be directories,
+ * or both not be directories. If target is a directory, it must be empty.
+ */
+/* ARGSUSED */
+static int
+renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from,
+ int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags)
+{
+ if (flags & ~VFS_RENAME_FLAGS_MASK) {
+ return EINVAL;
+ }
+
+ if (ISSET(flags, VFS_RENAME_SWAP) && ISSET(flags, VFS_RENAME_EXCL)) {
+ return EINVAL;
+ }
+
+ vnode_t tvp, tdvp;
vnode_t fvp, fdvp;
+ vnode_t mnt_fvp;
struct nameidata *fromnd, *tond;
int error;
int do_retry;
int has_listeners;
const char *oname = NULL;
char *from_name = NULL, *to_name = NULL;
+ char *from_name_no_firmlink = NULL, *to_name_no_firmlink = NULL;
int from_len = 0, to_len = 0;
+ int from_len_no_firmlink = 0, to_len_no_firmlink = 0;
int holding_mntlock;
+ int vn_authorize_skipped;
mount_t locked_mp = NULL;
vnode_t oparent = NULLVP;
#if CONFIG_FSE
fse_info from_finfo, to_finfo;
#endif
- int from_truncated = 0, to_truncated;
+ int from_truncated = 0, to_truncated = 0;
+ int from_truncated_no_firmlink = 0, to_truncated_no_firmlink = 0;
int batched = 0;
struct vnode_attr *fvap, *tvap;
int continuing = 0;
struct nameidata from_node, to_node;
struct vnode_attr fv_attr, tv_attr;
} * __rename_data;
- MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
+ __rename_data = kheap_alloc(KHEAP_TEMP, sizeof(*__rename_data), Z_WAITOK);
fromnd = &__rename_data->from_node;
tond = &__rename_data->to_node;
fvp = tvp = NULL;
fdvp = tdvp = NULL;
fvap = tvap = NULL;
+ mnt_fvp = NULLVP;
mntrename = FALSE;
+ vn_authorize_skipped = FALSE;
NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
segflg, from, ctx);
}
if (tvp && ISSET(flags, VFS_RENAME_EXCL)) {
- error = EEXIST;
- goto out1;
+ int32_t pval = 0;
+ int err = 0;
+
+ /*
+ * We allow rename with VFS_RENAME_EXCL flag for an existing file which
+ * has the same name as target iff the following conditions are met:
+ * 1. the target file system is case insensitive
+ * 2. source and target directories are the same
+ * 3. source and target files are the same
+ * 4. name only differs in case (determined by underlying filesystem)
+ */
+ if (fvp != tvp || fdvp != tdvp) {
+ error = EEXIST;
+ goto out1;
+ }
+
+ /*
+ * Assume that the target file system is case sensitive if
+ * _PC_CASE_SENSITIVE selector isn't supported.
+ */
+ err = VNOP_PATHCONF(tvp, _PC_CASE_SENSITIVE, &pval, ctx);
+ if (err != 0 || pval != 0) {
+ error = EEXIST;
+ goto out1;
+ }
}
batched = vnode_compound_rename_available(fdvp);
if (need_event || has_listeners) {
if (from_name == NULL) {
GET_PATH(from_name);
- if (from_name == NULL) {
- error = ENOMEM;
- goto out1;
- }
}
from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
+
+ if (from_name_no_firmlink == NULL) {
+ GET_PATH(from_name_no_firmlink);
+ }
+
+ from_len_no_firmlink = safe_getpath_no_firmlink(fdvp, fromnd->ni_cnd.cn_nameptr, from_name_no_firmlink, MAXPATHLEN, &from_truncated_no_firmlink);
}
if (need_event || need_kpath2 || has_listeners) {
if (to_name == NULL) {
GET_PATH(to_name);
- if (to_name == NULL) {
- error = ENOMEM;
- goto out1;
- }
}
to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
+
+ if (to_name_no_firmlink == NULL) {
+ GET_PATH(to_name_no_firmlink);
+ }
+
+ to_len_no_firmlink = safe_getpath_no_firmlink(tdvp, tond->ni_cnd.cn_nameptr, to_name_no_firmlink, MAXPATHLEN, &to_truncated_no_firmlink);
if (to_name && need_kpath2) {
AUDIT_ARG(kpath, to_name, ARG_KPATH2);
}
goto skipped_lookup;
}
- if (!batched) {
- error = vn_authorize_renamex_with_paths(fdvp, fvp, &fromnd->ni_cnd, from_name, tdvp, tvp, &tond->ni_cnd, to_name, ctx, flags, NULL);
- if (error) {
- if (error == ENOENT) {
- assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
- if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
- /*
- * We encountered a race where after doing the namei, tvp stops
- * being valid. If so, simply re-drive the rename call from the
- * top.
- */
- do_retry = 1;
- retry_count += 1;
- }
- }
- goto out1;
- }
- }
-
/*
* If the source and destination are the same (i.e. they're
* links to the same vnode) and the target file system is
*/
if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
pathconf_val != 0) {
+ vn_authorize_skipped = TRUE;
goto out1;
}
}
* - target must not exist
* - target must reside in the same directory as source
* - union mounts cannot be renamed
- * - "/" cannot be renamed
+ * - the root fs, and tightly-linked system volumes, cannot be renamed
*
* XXX Handle this in VFS after a continued lookup (if we missed
* in the cache to start off)
(fvp->v_mountedhere == NULL) &&
(fdvp == tdvp) &&
((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) &&
+ ((fvp->v_mount->mnt_kern_flag & MNTK_SYSTEM) == 0) &&
(fvp->v_mount->mnt_vnodecovered != NULLVP)) {
vnode_t coveredvp;
error = ENOENT;
goto out1;
}
- vnode_put(fvp);
+ /*
+ * Save the 'fvp' as it is needed for vn_authorize_renamex_with_paths()
+ * later.
+ */
+ mnt_fvp = fvp;
fvp = coveredvp;
mntrename = TRUE;
if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
!bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
fromnd->ni_cnd.cn_namelen)) {
+ vn_authorize_skipped = TRUE;
goto out1;
}
}
vnode_put(fvp);
vnode_put(fdvp);
+ if (mnt_fvp != NULLVP) {
+ vnode_put(mnt_fvp);
+ }
+
mount_lock_renames(locked_mp);
holding_mntlock = 1;
}
}
+ if (!batched) {
+ error = vn_authorize_renamex_with_paths(fdvp, mntrename ? mnt_fvp : fvp,
+ &fromnd->ni_cnd, from_name, tdvp, tvp, &tond->ni_cnd, to_name, ctx,
+ flags, NULL);
+ if (error) {
+ if (error == ENOENT) {
+ if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
+ /*
+ * We encountered a race where after doing the namei,
+ * tvp stops being valid. If so, simply re-drive the rename
+ * call from the top.
+ */
+ do_retry = 1;
+ retry_count += 1;
+ }
+ }
+ goto out1;
+ }
+ }
+
+ /* Release the 'mnt_fvp' now that it is no longer needed. */
+ if (mnt_fvp != NULLVP) {
+ vnode_put(mnt_fvp);
+ mnt_fvp = NULLVP;
+ }
+
// save these off so we can later verify that fvp is the same
oname = fvp->v_name;
oparent = fvp->v_parent;
holding_mntlock = 0;
}
if (error) {
+ if (error == EDATALESS) {
+ /*
+ * If we've been here before, something has gone
+ * horribly wrong and we should just get out lest
+ * we spiral around the drain forever.
+ */
+ if (flags & VFS_RENAME_DATALESS) {
+ error = EIO;
+ goto out1;
+ }
+
+ /*
+ * The object we're renaming is dataless (or has a
+ * dataless descendent) and requires materialization
+ * before the rename occurs. But we're holding the
+ * mount point's rename lock, so it's not safe to
+ * make the upcall.
+ *
+ * In this case, we release the lock, perform the
+ * materialization, and start the whole thing over.
+ */
+ error = vnode_materialize_dataless_file(fvp,
+ NAMESPACE_HANDLER_RENAME_OP);
+
+ if (error == 0) {
+ /*
+ * The next time around we need to tell the
+ * file system that the materializtaion has
+ * been performed.
+ */
+ flags |= VFS_RENAME_DATALESS;
+ do_retry = 1;
+ }
+ goto out1;
+ }
if (error == EKEEPLOOKING) {
if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
* but other filesystems susceptible to this race could return it, too.
*/
if (error == ERECYCLE) {
- do_retry = 1;
+ if (retry_count < MAX_RENAME_ERECYCLE_RETRIES) {
+ do_retry = 1;
+ retry_count += 1;
+ } else {
+ printf("rename retry limit due to ERECYCLE reached\n");
+ error = ENOENT;
+ }
}
/*
* cache, redrive the lookup.
*/
if (batched && error == ENOENT) {
- assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
do_retry = 1;
retry_count += 1;
if (tvp) {
add_fsevent(FSE_RENAME, ctx,
- FSE_ARG_STRING, from_len, from_name,
+ FSE_ARG_STRING, from_len_no_firmlink, from_name_no_firmlink,
FSE_ARG_FINFO, &from_finfo,
- FSE_ARG_STRING, to_len, to_name,
+ FSE_ARG_STRING, to_len_no_firmlink, to_name_no_firmlink,
FSE_ARG_FINFO, &to_finfo,
FSE_ARG_DONE);
if (flags & VFS_RENAME_SWAP) {
* two.
*/
add_fsevent(FSE_RENAME, ctx,
- FSE_ARG_STRING, to_len, to_name,
+ FSE_ARG_STRING, to_len_no_firmlink, to_name_no_firmlink,
FSE_ARG_FINFO, &to_finfo,
- FSE_ARG_STRING, from_len, from_name,
+ FSE_ARG_STRING, from_len_no_firmlink, from_name_no_firmlink,
FSE_ARG_FINFO, &from_finfo,
FSE_ARG_DONE);
}
} else {
add_fsevent(FSE_RENAME, ctx,
- FSE_ARG_STRING, from_len, from_name,
+ FSE_ARG_STRING, from_len_no_firmlink, from_name_no_firmlink,
FSE_ARG_FINFO, &from_finfo,
- FSE_ARG_STRING, to_len, to_name,
+ FSE_ARG_STRING, to_len_no_firmlink, to_name_no_firmlink,
FSE_ARG_DONE);
}
}
error = EBUSY;
goto out1;
}
- MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
+ tobuf = zalloc(ZV_NAMEI);
if (UIO_SEG_IS_USER_SPACE(segflg)) {
error = copyinstr(to, tobuf, MAXPATHLEN, &len);
mpname = cp + 1;
}
}
+
+ /* Update f_mntonname of sub mounts */
+ vfs_iterate(0, rename_submounts_callback, (void *)mp);
+
/* append name to prefix */
- maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
+ maxlen = MAXPATHLEN - (int)(pathend - mp->mnt_vfsstat.f_mntonname);
bzero(pathend, maxlen);
+
strlcpy(pathend, mpname, maxlen);
}
- FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
+ zfree(ZV_NAMEI, tobuf);
vfs_unbusy(mp);
+
+ vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
}
/*
* fix up name & parent pointers. note that we first
vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
}
out1:
+ /*
+ * There are some cases (for e.g. 'fvp == tvp') when vn_authorize was
+ * skipped earlier as no actual rename was performed.
+ */
+ if (vn_authorize_skipped && error == 0) {
+ error = vn_authorize_renamex_with_paths(fdvp, fvp,
+ &fromnd->ni_cnd, from_name, tdvp, tvp, &tond->ni_cnd, to_name, ctx,
+ flags, NULL);
+ if (error && error == ENOENT) {
+ if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
+ do_retry = 1;
+ retry_count += 1;
+ }
+ }
+ }
if (to_name != NULL) {
RELEASE_PATH(to_name);
to_name = NULL;
}
+ if (to_name_no_firmlink != NULL) {
+ RELEASE_PATH(to_name_no_firmlink);
+ to_name_no_firmlink = NULL;
+ }
if (from_name != NULL) {
RELEASE_PATH(from_name);
from_name = NULL;
}
+ if (from_name_no_firmlink != NULL) {
+ RELEASE_PATH(from_name_no_firmlink);
+ from_name_no_firmlink = NULL;
+ }
if (holding_mntlock) {
mount_unlock_renames(locked_mp);
mount_drop(locked_mp, 0);
}
vnode_put(fdvp);
}
-
+ if (mnt_fvp != NULLVP) {
+ vnode_put(mnt_fvp);
+ }
/*
* If things changed after we did the namei, then we will re-drive
* this rename call from the top.
goto retry;
}
- FREE(__rename_data, M_TEMP);
+ kheap_free(KHEAP_TEMP, __rename_data, sizeof(*__rename_data));
return error;
}
static int
rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath,
- enum uio_seg segflg)
+ enum uio_seg segflg, int unlink_flags)
{
vnode_t vp, dvp;
int error;
struct nameidata nd;
char *path = NULL;
- int len = 0;
+ char *no_firmlink_path = NULL;
+ int len_path = 0;
+ int len_no_firmlink_path = 0;
int has_listeners = 0;
int need_event = 0;
- int truncated = 0;
+ int truncated_path = 0;
+ int truncated_no_firmlink_path = 0;
#if CONFIG_FSE
struct vnode_attr va;
#endif /* CONFIG_FSE */
error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
if (error) {
if (error == ENOENT) {
- assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
restart_flag = 1;
restart_count += 1;
}
#if CONFIG_FSE
- fse_info finfo;
+ fse_info finfo = {0};
need_event = need_fsevent(FSE_DELETE, dvp);
if (need_event) {
if (need_event || has_listeners) {
if (path == NULL) {
GET_PATH(path);
- if (path == NULL) {
- error = ENOMEM;
- goto out;
- }
}
- len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
+ len_path = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
+
+ if (no_firmlink_path == NULL) {
+ GET_PATH(no_firmlink_path);
+ }
+
+ len_no_firmlink_path = safe_getpath_no_firmlink(dvp, nd.ni_cnd.cn_nameptr, no_firmlink_path, MAXPATHLEN, &truncated_no_firmlink_path);
#if CONFIG_FSE
- if (truncated) {
+ if (truncated_no_firmlink_path) {
finfo.mode |= FSE_TRUNCATED_PATH;
}
#endif
if (error == EKEEPLOOKING) {
goto continue_lookup;
} else if (batched && error == ENOENT) {
- assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
/*
* For compound VNOPs, the authorization callback
goto out;
}
}
+
+ /*
+ * XXX There's no provision for passing flags
+ * to VNOP_RMDIR(). So, if vn_rmdir() fails
+ * because it's not empty, then we try again
+ * with VNOP_REMOVE(), passing in a special
+ * flag that clever file systems will know
+ * how to handle.
+ */
+ if (error == ENOTEMPTY &&
+ (unlink_flags & VNODE_REMOVE_DATALESS_DIR) != 0) {
+ /*
+ * If this fails, we want to keep the original
+ * error.
+ */
+ if (vn_remove(dvp, &vp, &nd,
+ VNODE_REMOVE_DATALESS_DIR, vap, ctx) == 0) {
+ error = 0;
+ }
+ }
+
#if CONFIG_APPLEDOUBLE
/*
* Special case to remove orphaned AppleDouble
* so here we are.
*/
if (error == ENOTEMPTY) {
- error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
- if (error == EBUSY) {
+ int ad_error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
+ if (ad_error == EBUSY) {
+ error = ad_error;
goto out;
}
/*
* Assuming everything went well, we will try the RMDIR again
*/
- if (!error) {
+ if (!ad_error) {
error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
}
}
vnode_get_fse_info_from_vap(vp, &finfo, vap);
}
add_fsevent(FSE_DELETE, ctx,
- FSE_ARG_STRING, len, path,
+ FSE_ARG_STRING, len_no_firmlink_path, no_firmlink_path,
FSE_ARG_FINFO, &finfo,
FSE_ARG_DONE);
}
RELEASE_PATH(path);
path = NULL;
}
+
+ if (no_firmlink_path != NULL) {
+ RELEASE_PATH(no_firmlink_path);
+ no_firmlink_path = NULL;
+ }
+
/*
* nameidone has to happen before we vnode_put(dvp)
* since it may need to release the fs_nodelock on the dvp
rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
{
return rmdirat_internal(vfs_context_current(), AT_FDCWD,
- CAST_USER_ADDR_T(uap->path), UIO_USERSPACE);
+ CAST_USER_ADDR_T(uap->path), UIO_USERSPACE, 0);
}
/* Get direntry length padded to 8 byte alignment */
uio_t auio;
struct direntry *entry64;
struct dirent *dep;
- int bytesread;
+ size_t bytesread;
int error;
/*
* prevent uio_resid() * 3 / 8 from overflowing.
*/
bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
- MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
+ bufptr = kheap_alloc(KHEAP_DATA_BUFFERS, bufsize, Z_WAITOK);
if (bufptr == NULL) {
return ENOMEM;
}
dep = (struct dirent *)bufptr;
bytesread = bufsize - uio_resid(auio);
- MALLOC(entry64, struct direntry *, sizeof(struct direntry),
- M_TEMP, M_WAITOK);
+ entry64 = kheap_alloc(KHEAP_TEMP, sizeof(struct direntry), Z_WAITOK);
/*
* Convert all the entries and copy them out to user's buffer.
*/
while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
- size_t enbufsize = DIRENT64_LEN(dep->d_namlen);
-
- if (DIRENT_END(dep) > ((char *)bufptr + bytesread) ||
+ /* First check that the dirent struct up to d_name is within the buffer */
+ if ((char*)dep + offsetof(struct dirent, d_name) > ((char *)bufptr + bytesread) ||
+ /* Check that the length of the entire dirent is within the buffer */
+ DIRENT_END(dep) > ((char *)bufptr + bytesread) ||
+ /* Check that the actual length including the name doesn't exceed d_reclen */
DIRENT_LEN(dep->d_namlen) > dep->d_reclen) {
printf("%s: %s: Bad dirent recived from directory %s\n", __func__,
vp->v_mount->mnt_vfsstat.f_mntonname,
break;
}
+ size_t enbufsize = DIRENT64_LEN(dep->d_namlen);
+
bzero(entry64, enbufsize);
/* Convert a dirent to a dirent64. */
entry64->d_ino = dep->d_ino;
entry64->d_seekoff = 0;
- entry64->d_reclen = enbufsize;
+ entry64->d_reclen = (uint16_t)enbufsize;
entry64->d_namlen = dep->d_namlen;
entry64->d_type = dep->d_type;
bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
uio->uio_offset = auio->uio_offset;
}
uio_free(auio);
- FREE(bufptr, M_TEMP);
- FREE(entry64, M_TEMP);
+ kheap_free(KHEAP_DATA_BUFFERS, bufptr, bufsize);
+ kheap_free(KHEAP_TEMP, entry64, sizeof(struct direntry));
return error;
}
}
*/
static int
getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
- off_t *offset, int flags)
+ off_t *offset, int *eofflag, int flags)
{
vnode_t vp;
struct vfs_context context = *vfs_context_current(); /* local copy */
uio_t auio;
int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
off_t loff;
- int error, eofflag, numdirent;
+ int error, numdirent;
char uio_buf[UIO_SIZEOF(1)];
error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
if (error) {
return error;
}
- if ((fp->f_fglob->fg_flag & FREAD) == 0) {
+ if ((fp->fp_glob->fg_flag & FREAD) == 0) {
AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
error = EBADF;
goto out;
}
#if CONFIG_MACF
- error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
+ error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->fp_glob);
if (error) {
goto out;
}
}
#endif /* MAC */
- loff = fp->f_fglob->fg_offset;
+ loff = fp->fp_glob->fg_offset;
auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
uio_addiov(auio, bufp, bufsize);
if (flags & VNODE_READDIR_EXTENDED) {
- error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
- fp->f_fglob->fg_offset = uio_offset(auio);
+ error = vnode_readdir64(vp, auio, flags, eofflag, &numdirent, &context);
+ fp->fp_glob->fg_offset = uio_offset(auio);
} else {
- error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
- fp->f_fglob->fg_offset = uio_offset(auio);
+ error = VNOP_READDIR(vp, auio, 0, eofflag, &numdirent, &context);
+ fp->fp_glob->fg_offset = uio_offset(auio);
}
if (error) {
(void)vnode_put(vp);
}
if ((user_ssize_t)bufsize == uio_resid(auio)) {
- if (union_dircheckp) {
- error = union_dircheckp(&vp, fp, &context);
- if (error == -1) {
- goto unionread;
- }
- if (error) {
- (void)vnode_put(vp);
- goto out;
- }
- }
-
if ((vp->v_mount->mnt_flag & MNT_UNION)) {
struct vnode *tvp = vp;
if (lookup_traverse_union(tvp, &vp, &context) == 0) {
vnode_ref(vp);
- fp->f_fglob->fg_data = (caddr_t) vp;
- fp->f_fglob->fg_offset = 0;
+ fp->fp_glob->fg_data = (caddr_t) vp;
+ fp->fp_glob->fg_offset = 0;
vnode_rele(tvp);
vnode_put(tvp);
goto unionread;
{
off_t offset;
ssize_t bytesread;
- int error;
+ int error, eofflag;
AUDIT_ARG(fd, uap->fd);
- error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
+ error = getdirentries_common(uap->fd, uap->buf, uap->count,
+ &bytesread, &offset, &eofflag, 0);
if (error == 0) {
if (proc_is64bit(p)) {
user32_long_t base = (user32_long_t)offset;
error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
}
- *retval = bytesread;
+ *retval = (int)bytesread;
}
return error;
}
{
off_t offset;
ssize_t bytesread;
- int error;
+ int error, eofflag;
+ user_size_t bufsize;
AUDIT_ARG(fd, uap->fd);
- error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
+
+ /*
+ * If the buffer is at least GETDIRENTRIES64_EXTENDED_BUFSIZE large,
+ * then the kernel carves out the last 4 bytes to return extended
+ * information to userspace (namely whether we reached EOF with this call).
+ */
+ if (uap->bufsize >= GETDIRENTRIES64_EXTENDED_BUFSIZE) {
+ bufsize = uap->bufsize - sizeof(getdirentries64_flags_t);
+ } else {
+ bufsize = uap->bufsize;
+ }
+
+ error = getdirentries_common(uap->fd, uap->buf, bufsize,
+ &bytesread, &offset, &eofflag, VNODE_READDIR_EXTENDED);
if (error == 0) {
*retval = bytesread;
error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
+
+ if (error == 0 && uap->bufsize >= GETDIRENTRIES64_EXTENDED_BUFSIZE) {
+ getdirentries64_flags_t flags = 0;
+ if (eofflag) {
+ flags |= GETDIRENTRIES64_EOF;
+ }
+ error = copyout(&flags, (user_addr_t)uap->buf + bufsize,
+ sizeof(flags));
+ }
}
return error;
}
uint32_t count = 0, savecount = 0;
uint32_t newstate = 0;
int error, eofflag;
- uint32_t loff = 0;
+ off_t loff = 0;
struct attrlist attributelist;
vfs_context_t ctx = vfs_context_current();
int fd = uap->fd;
if ((error = fp_getfvp(p, fd, &fp, &vp))) {
return error;
}
- if ((fp->f_fglob->fg_flag & FREAD) == 0) {
+ if ((fp->fp_glob->fg_flag & FREAD) == 0) {
AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
error = EBADF;
goto out;
#if CONFIG_MACF
error = mac_file_check_change_offset(vfs_context_ucred(ctx),
- fp->f_fglob);
+ fp->fp_glob);
if (error) {
goto out;
}
#endif /* MAC */
/* set up the uio structure which will contain the users return buffer */
- loff = fp->f_fglob->fg_offset;
+ loff = fp->fp_glob->fg_offset;
auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
uio_addiov(auio, uap->buffer, uap->buffersize);
* info, so truncate before extending again */
error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
- (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
+ (uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
}
if (error) {
} else { // Empty buffer
struct vnode *tvp = vp;
if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
- vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
- fp->f_fglob->fg_data = (caddr_t) vp;
- fp->f_fglob->fg_offset = 0; // reset index for new dir
+ vnode_ref_ext(vp, fp->fp_glob->fg_flag & O_EVTONLY, 0);
+ fp->fp_glob->fg_data = (caddr_t) vp;
+ fp->fp_glob->fg_offset = 0; // reset index for new dir
count = savecount;
- vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
+ vnode_rele_internal(tvp, fp->fp_glob->fg_flag & O_EVTONLY, 0, 0);
vnode_put(tvp);
goto unionread;
}
if (error) {
goto out;
}
- fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
+ fp->fp_glob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
if ((error = copyout((caddr_t) &count, uap->count, sizeof(count)))) {
goto out;
kauth_authorize_fileop_has_listeners()) {
GET_PATH(fpath);
GET_PATH(spath);
- if (fpath == NULL || spath == NULL) {
- error = ENOMEM;
- goto out;
- }
flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
freespace_mb(vnode_t vp)
{
vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
- return ((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
- vp->v_mount->mnt_vfsstat.f_bsize) >> 20;
+ return (uint32_t)(((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
+ vp->v_mount->mnt_vfsstat.f_bsize) >> 20);
}
#if CONFIG_SEARCHFS
uio_t auio = NULL;
int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
uint32_t nummatches;
- int mallocsize;
+ size_t mallocsize;
uint32_t nameiflags;
vfs_context_t ctx = vfs_context_current();
char uio_buf[UIO_SIZEOF(1)];
mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
sizeof(struct attrlist) + sizeof(struct searchstate) + (2 * sizeof(uint32_t));
- MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
+ searchparams1 = kheap_alloc(KHEAP_DATA_BUFFERS, mallocsize, Z_WAITOK);
/* Now set up the various pointers to the correct place in our newly allocated memory */
searchparams1,
searchparams2,
&searchblock.searchattrs,
- (u_long)searchblock.maxmatches,
+ (uint32_t)searchblock.maxmatches,
&timelimit,
returnattrs,
&nummatches,
- (u_long)uap->scriptcode,
- (u_long)uap->options,
+ (uint32_t)uap->scriptcode,
+ (uint32_t)uap->options,
auio,
(struct searchstate *) &state->ss_fsstate,
ctx);
freeandexit:
- FREE(searchparams1, M_TEMP);
+ kheap_free(KHEAP_DATA_BUFFERS, searchparams1, mallocsize);
return error;
} /* end of searchfs system call */
#endif /* CONFIG_SEARCHFS */
-lck_grp_attr_t * nspace_group_attr;
-lck_attr_t * nspace_lock_attr;
-lck_grp_t * nspace_mutex_group;
+#if CONFIG_DATALESS_FILES
-lck_mtx_t nspace_handler_lock;
-lck_mtx_t nspace_handler_exclusion_lock;
+/*
+ * === Namespace Resolver Up-call Mechanism ===
+ *
+ * When I/O is performed to a dataless file or directory (read, write,
+ * lookup-in, etc.), the file system performs an upcall to the namespace
+ * resolver (filecoordinationd) to materialize the object.
+ *
+ * We need multiple up-calls to be in flight at once, and we need these
+ * up-calls to be interruptible, thus the following implementation:
+ *
+ * => The nspace_resolver_request represents the in-kernel request state.
+ * It contains a request ID, storage space for the errno code returned
+ * by filecoordinationd, and flags.
+ *
+ * => The request ID is simply a global monotonically incrementing 32-bit
+ * number. Outstanding requests are stored in a hash table, and the
+ * hash function is extremely simple.
+ *
+ * => When an upcall is to be made to filecoordinationd, a request structure
+ * is allocated on the stack (it is small, and needs to live only during
+ * the duration of the call to resolve_nspace_item_ext()). It is
+ * initialized and inserted into the table. Some backpressure from
+ * filecoordinationd is applied by limiting the numnber of entries that
+ * can be inserted into the table (and thus limiting the number of
+ * outstanding requests issued to filecoordinationd); waiting for an
+ * available slot is interruptible.
+ *
+ * => Once the request has been inserted into the table, the up-call is made
+ * to filecoordinationd via a MiG-generated stub. The up-call returns
+ * immediately and filecoordinationd processes the request asynchronously.
+ *
+ * => The caller now waits for the request to complete. Tnis is achieved by
+ * sleeping on the address of the request structure and waiting for
+ * filecoordinationd to mark the request structure as complete. This
+ * is an interruptible sleep call; if interrupted, the request structure
+ * is removed from the table and EINTR is returned to the caller. If
+ * this occurs, an advisory up-call is made to filecoordinationd with
+ * the request ID to indicate that the request can be aborted or
+ * de-prioritized at the discretion of filecoordinationd.
+ *
+ * => When filecoordinationd has completed the request, it signals completion
+ * by writing to the vfs.nspace.complete sysctl node. Only a process
+ * decorated as a namespace resolver can write to this sysctl node. The
+ * value is a request ID / errno tuple passed as an array of 2 uint32_t's.
+ * The request ID is looked up in the table, and if the request is found,
+ * the error code is stored in the request structure and a wakeup()
+ * issued on the address of the request structure. If the request is not
+ * found, we simply drop the completion notification, assuming that the
+ * caller was interrupted.
+ *
+ * => When the waiting thread wakes up, it extracts the error code from the
+ * request structure, removes the request from the table, and returns the
+ * error code to the calling function. Fini!
+ */
-time_t snapshot_timestamp = 0;
-int nspace_allow_virtual_devs = 0;
+struct nspace_resolver_request {
+ LIST_ENTRY(nspace_resolver_request) r_hashlink;
+ vnode_t r_vp;
+ uint32_t r_req_id;
+ int r_resolver_error;
+ int r_flags;
+};
-void nspace_handler_init(void);
+#define RRF_COMPLETE 0x0001
-typedef struct nspace_item_info {
- struct vnode *vp;
- void *arg;
- uint64_t op;
- uint32_t vid;
- uint32_t flags;
- uint32_t token;
- uint32_t refcount;
-} nspace_item_info;
-
-#define MAX_NSPACE_ITEMS 128
-nspace_item_info nspace_items[MAX_NSPACE_ITEMS];
-uint32_t nspace_item_idx = 0; // also used as the sleep/wakeup rendezvous address
-uint32_t nspace_token_id = 0;
-uint32_t nspace_handler_timeout = 15; // seconds
-
-#define NSPACE_ITEM_NEW 0x0001
-#define NSPACE_ITEM_PROCESSING 0x0002
-#define NSPACE_ITEM_DEAD 0x0004
-#define NSPACE_ITEM_CANCELLED 0x0008
-#define NSPACE_ITEM_DONE 0x0010
-#define NSPACE_ITEM_RESET_TIMER 0x0020
-
-#define NSPACE_ITEM_NSPACE_EVENT 0x0040
-#define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
-
-#define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
-
-//#pragma optimization_level 0
+static uint32_t
+next_nspace_req_id(void)
+{
+ static uint32_t next_req_id;
-typedef enum {
- NSPACE_HANDLER_NSPACE = 0,
- NSPACE_HANDLER_SNAPSHOT = 1,
-
- NSPACE_HANDLER_COUNT,
-} nspace_type_t;
-
-typedef struct {
- uint64_t handler_tid;
- struct proc *handler_proc;
- int handler_busy;
-} nspace_handler_t;
-
-nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT];
-
-/* namespace fsctl functions */
-static int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type);
-static int nspace_item_flags_for_type(nspace_type_t nspace_type);
-static int nspace_open_flags_for_type(nspace_type_t nspace_type);
-static nspace_type_t nspace_type_for_op(uint64_t op);
-static int nspace_is_special_process(struct proc *proc);
-static int vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx);
-static int wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type);
-static int validate_namespace_args(int is64bit, int size);
-static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data);
-
-
-static inline int
-nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type)
-{
- switch (nspace_type) {
- case NSPACE_HANDLER_NSPACE:
- return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT;
- case NSPACE_HANDLER_SNAPSHOT:
- return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT;
- default:
- printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type);
- return 0;
- }
+ return OSAddAtomic(1, &next_req_id);
}
-static inline int
-nspace_item_flags_for_type(nspace_type_t nspace_type)
-{
- switch (nspace_type) {
- case NSPACE_HANDLER_NSPACE:
- return NSPACE_ITEM_NSPACE_EVENT;
- case NSPACE_HANDLER_SNAPSHOT:
- return NSPACE_ITEM_SNAPSHOT_EVENT;
- default:
- printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type);
- return 0;
- }
-}
+#define NSPACE_RESOLVER_REQ_HASHSIZE 32 /* XXX tune */
+#define NSPACE_RESOLVER_MAX_OUTSTANDING 256 /* XXX tune */
-static inline int
-nspace_open_flags_for_type(nspace_type_t nspace_type)
-{
- switch (nspace_type) {
- case NSPACE_HANDLER_NSPACE:
- return FREAD | FWRITE | O_EVTONLY;
- case NSPACE_HANDLER_SNAPSHOT:
- return FREAD | O_EVTONLY;
- default:
- printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type);
- return 0;
- }
-}
+static LIST_HEAD(nspace_resolver_requesthead,
+ nspace_resolver_request) * nspace_resolver_request_hashtbl;
+static u_long nspace_resolver_request_hashmask;
+static u_int nspace_resolver_request_count;
+static bool nspace_resolver_request_wait_slot;
+static lck_grp_t *nspace_resolver_request_lck_grp;
+static lck_mtx_t nspace_resolver_request_hash_mutex;
+
+#define NSPACE_REQ_LOCK() \
+ lck_mtx_lock(&nspace_resolver_request_hash_mutex)
+#define NSPACE_REQ_UNLOCK() \
+ lck_mtx_unlock(&nspace_resolver_request_hash_mutex)
-static inline nspace_type_t
-nspace_type_for_op(uint64_t op)
+#define NSPACE_RESOLVER_HASH(req_id) \
+ (&nspace_resolver_request_hashtbl[(req_id) & \
+ nspace_resolver_request_hashmask])
+
+static struct nspace_resolver_request *
+nspace_resolver_req_lookup(uint32_t req_id)
{
- switch (op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) {
- case NAMESPACE_HANDLER_NSPACE_EVENT:
- return NSPACE_HANDLER_NSPACE;
- case NAMESPACE_HANDLER_SNAPSHOT_EVENT:
- return NSPACE_HANDLER_SNAPSHOT;
- default:
- printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK);
- return NSPACE_HANDLER_NSPACE;
+ struct nspace_resolver_requesthead *bucket;
+ struct nspace_resolver_request *req;
+
+ bucket = NSPACE_RESOLVER_HASH(req_id);
+ LIST_FOREACH(req, bucket, r_hashlink) {
+ if (req->r_req_id == req_id) {
+ return req;
+ }
}
+
+ return NULL;
}
-static inline int
-nspace_is_special_process(struct proc *proc)
+static int
+nspace_resolver_req_add(struct nspace_resolver_request *req)
{
- int i;
- for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
- if (proc == nspace_handlers[i].handler_proc) {
- return 1;
+ struct nspace_resolver_requesthead *bucket;
+ int error;
+
+ while (nspace_resolver_request_count >=
+ NSPACE_RESOLVER_MAX_OUTSTANDING) {
+ nspace_resolver_request_wait_slot = true;
+ error = msleep(&nspace_resolver_request_count,
+ &nspace_resolver_request_hash_mutex,
+ PVFS | PCATCH, "nspacerq", NULL);
+ if (error) {
+ return error;
}
}
+
+ bucket = NSPACE_RESOLVER_HASH(req->r_req_id);
+#if DIAGNOSTIC
+ assert(nspace_resolver_req_lookup(req->r_req_id) == NULL);
+#endif /* DIAGNOSTIC */
+ LIST_INSERT_HEAD(bucket, req, r_hashlink);
+ nspace_resolver_request_count++;
+
return 0;
}
-void
-nspace_handler_init(void)
+static void
+nspace_resolver_req_remove(struct nspace_resolver_request *req)
{
- nspace_lock_attr = lck_attr_alloc_init();
- nspace_group_attr = lck_grp_attr_alloc_init();
- nspace_mutex_group = lck_grp_alloc_init("nspace-mutex", nspace_group_attr);
- lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr);
- lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr);
- memset(&nspace_items[0], 0, sizeof(nspace_items));
+ struct nspace_resolver_requesthead *bucket;
+
+ bucket = NSPACE_RESOLVER_HASH(req->r_req_id);
+#if DIAGNOSTIC
+ assert(nspace_resolver_req_lookup(req->r_req_id) != NULL);
+#endif /* DIAGNOSTIC */
+ LIST_REMOVE(req, r_hashlink);
+ nspace_resolver_request_count--;
+
+ if (nspace_resolver_request_wait_slot) {
+ nspace_resolver_request_wait_slot = false;
+ wakeup(&nspace_resolver_request_count);
+ }
}
-void
-nspace_proc_exit(struct proc *p)
+static void
+nspace_resolver_req_cancel(uint32_t req_id)
{
- int i, event_mask = 0;
+ kern_return_t kr;
+ mach_port_t mp;
- for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
- if (p == nspace_handlers[i].handler_proc) {
- event_mask |= nspace_item_flags_for_type(i);
- nspace_handlers[i].handler_tid = 0;
- nspace_handlers[i].handler_proc = NULL;
- }
- }
+ // Failures here aren't fatal -- the cancellation message
+ // sent to the resolver is merely advisory.
- if (event_mask == 0) {
+ kr = host_get_filecoordinationd_port(host_priv_self(), &mp);
+ if (kr != KERN_SUCCESS || !IPC_PORT_VALID(mp)) {
return;
}
- lck_mtx_lock(&nspace_handler_lock);
- if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
- // if this process was the snapshot handler, zero snapshot_timeout
- snapshot_timestamp = 0;
- }
-
- //
- // unblock anyone that's waiting for the handler that died
- //
- for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
- if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) {
- if (nspace_items[i].flags & event_mask) {
- if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
- vnode_lock_spin(nspace_items[i].vp);
- nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
- vnode_unlock(nspace_items[i].vp);
- }
- nspace_items[i].vp = NULL;
- nspace_items[i].vid = 0;
- nspace_items[i].flags = NSPACE_ITEM_DONE;
- nspace_items[i].token = 0;
-
- wakeup((caddr_t)&(nspace_items[i].vp));
- }
- }
+ kr = send_nspace_resolve_cancel(mp, req_id);
+ if (kr != KERN_SUCCESS) {
+ os_log_error(OS_LOG_DEFAULT,
+ "NSPACE send_nspace_resolve_cancel failure: %d", kr);
}
- wakeup((caddr_t)&nspace_item_idx);
- lck_mtx_unlock(&nspace_handler_lock);
+ ipc_port_release_send(mp);
}
-
-int
-resolve_nspace_item(struct vnode *vp, uint64_t op)
+static int
+nspace_resolver_req_wait(struct nspace_resolver_request *req)
{
- return resolve_nspace_item_ext(vp, op, NULL);
-}
+ bool send_cancel_message = false;
+ int error;
-int
-resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg)
-{
- int i, error, keep_waiting;
- struct timespec ts;
- nspace_type_t nspace_type = nspace_type_for_op(op);
+ NSPACE_REQ_LOCK();
- // only allow namespace events on regular files, directories and symlinks.
- if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
- return 0;
+ while ((req->r_flags & RRF_COMPLETE) == 0) {
+ error = msleep(req, &nspace_resolver_request_hash_mutex,
+ PVFS | PCATCH, "nspace", NULL);
+ if (error && error != ERESTART) {
+ req->r_resolver_error = (error == EINTR) ? EINTR :
+ ETIMEDOUT;
+ send_cancel_message = true;
+ break;
+ }
}
- //
- // if this is a snapshot event and the vnode is on a
- // disk image just pretend nothing happened since any
- // change to the disk image will cause the disk image
- // itself to get backed up and this avoids multi-way
- // deadlocks between the snapshot handler and the ever
- // popular diskimages-helper process. the variable
- // nspace_allow_virtual_devs allows this behavior to
- // be overridden (for use by the Mobile TimeMachine
- // testing infrastructure which uses disk images)
- //
- if ((op & NAMESPACE_HANDLER_SNAPSHOT_EVENT)
- && (vp->v_mount != NULL)
- && (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
- && !nspace_allow_virtual_devs) {
- return 0;
- }
+ nspace_resolver_req_remove(req);
- // if (thread_tid(current_thread()) == namespace_handler_tid) {
- if (nspace_handlers[nspace_type].handler_proc == NULL) {
- return 0;
- }
+ NSPACE_REQ_UNLOCK();
- if (nspace_is_special_process(current_proc())) {
- return EDEADLK;
+ if (send_cancel_message) {
+ nspace_resolver_req_cancel(req->r_req_id);
}
- lck_mtx_lock(&nspace_handler_lock);
+ return req->r_resolver_error;
+}
-retry:
- for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
- if (vp == nspace_items[i].vp && op == nspace_items[i].op) {
- break;
- }
- }
+static void
+nspace_resolver_req_mark_complete(
+ struct nspace_resolver_request *req,
+ int resolver_error)
+{
+ req->r_resolver_error = resolver_error;
+ req->r_flags |= RRF_COMPLETE;
+ wakeup(req);
+}
- if (i >= MAX_NSPACE_ITEMS) {
- for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
- if (nspace_items[i].flags == 0) {
- break;
+static void
+nspace_resolver_req_completed(uint32_t req_id, int resolver_error, uint64_t orig_gencount)
+{
+ struct nspace_resolver_request *req;
+
+ NSPACE_REQ_LOCK();
+
+ // If we don't find the request corresponding to our req_id,
+ // just drop the completion signal on the floor; it's likely
+ // that the requester interrupted with a signal.
+
+ req = nspace_resolver_req_lookup(req_id);
+ if (req) {
+ mount_t locked_mp = NULL;
+
+ locked_mp = req->r_vp->v_mount;
+ mount_ref(locked_mp, 0);
+ mount_lock_renames(locked_mp);
+
+ //
+ // if the resolver isn't already returning an error and we have an
+ // orig_gencount, then get an iocount on the request vnode and check
+ // that the gencount on req->r_vp has not changed.
+ //
+ // note: a ref was taken on req->r_vp when the request was created
+ // and that ref will be dropped by that thread when it wakes up.
+ //
+ if (resolver_error == 0 &&
+ orig_gencount != 0 &&
+ vnode_getwithref(req->r_vp) == 0) {
+ struct vnode_attr va;
+ uint64_t cur_gencount;
+
+ VATTR_INIT(&va);
+ VATTR_WANTED(&va, va_recursive_gencount);
+
+ if (vnode_getattr(req->r_vp, &va, vfs_context_kernel()) == 0) {
+ cur_gencount = va.va_recursive_gencount;
+ } else {
+ cur_gencount = 0;
}
- }
- } else {
- nspace_items[i].refcount++;
- }
- if (i >= MAX_NSPACE_ITEMS) {
- ts.tv_sec = nspace_handler_timeout;
- ts.tv_nsec = 0;
+ if (resolver_error == 0 && cur_gencount && orig_gencount && cur_gencount != orig_gencount) {
+ printf("nspace.complete: gencount changed! (orig %llu cur %llu)\n", orig_gencount, cur_gencount);
- error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS | PCATCH, "nspace-no-space", &ts);
- if (error == 0) {
- // an entry got free'd up, go see if we can get a slot
- goto retry;
- } else {
- lck_mtx_unlock(&nspace_handler_lock);
- return error;
- }
- }
+ // this error will be returned to the thread that initiated the
+ // materialization of req->r_vp.
+ resolver_error = EBUSY;
- //
- // if it didn't already exist, add it. if it did exist
- // we'll get woken up when someone does a wakeup() on
- // the slot in the nspace_items table.
- //
- if (vp != nspace_items[i].vp) {
- nspace_items[i].vp = vp;
- nspace_items[i].arg = (arg == NSPACE_REARM_NO_ARG) ? NULL : arg; // arg is {NULL, true, uio *} - only pass uio thru to the user
- nspace_items[i].op = op;
- nspace_items[i].vid = vnode_vid(vp);
- nspace_items[i].flags = NSPACE_ITEM_NEW;
- nspace_items[i].flags |= nspace_item_flags_for_type(nspace_type);
- if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) {
- if (arg) {
- vnode_lock_spin(vp);
- vp->v_flag |= VNEEDSSNAPSHOT;
- vnode_unlock(vp);
+ // note: we explicitly do not return an error to the caller (i.e.
+ // the thread that did the materialization) because they said they
+ // don't want one.
}
+
+ vnode_put(req->r_vp);
}
- nspace_items[i].token = 0;
- nspace_items[i].refcount = 1;
+ mount_unlock_renames(locked_mp);
+ mount_drop(locked_mp, 0);
- wakeup((caddr_t)&nspace_item_idx);
+ nspace_resolver_req_mark_complete(req, resolver_error);
}
- //
- // Now go to sleep until the handler does a wakeup on this
- // slot in the nspace_items table (or we timeout).
- //
- keep_waiting = 1;
- while (keep_waiting) {
- ts.tv_sec = nspace_handler_timeout;
- ts.tv_nsec = 0;
- error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS | PCATCH, "namespace-done", &ts);
-
- if (nspace_items[i].flags & NSPACE_ITEM_DONE) {
- error = 0;
- } else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) {
- error = nspace_items[i].token;
- } else if (error == EWOULDBLOCK || error == ETIMEDOUT) {
- if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) {
- nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER;
- continue;
- } else {
- error = ETIMEDOUT;
- }
- } else if (error == 0) {
- // hmmm, why did we get woken up?
- printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
- nspace_items[i].token);
- }
+ NSPACE_REQ_UNLOCK();
- if (--nspace_items[i].refcount == 0) {
- nspace_items[i].vp = NULL; // clear this so that no one will match on it again
- nspace_items[i].arg = NULL;
- nspace_items[i].token = 0; // clear this so that the handler will not find it anymore
- nspace_items[i].flags = 0; // this clears it for re-use
- }
- wakeup(&nspace_token_id);
- keep_waiting = 0;
- }
+ return;
+}
- lck_mtx_unlock(&nspace_handler_lock);
+static struct proc *nspace_resolver_proc;
- return error;
+static int
+nspace_resolver_get_proc_state(struct proc *p, int *is_resolver)
+{
+ *is_resolver = ((p->p_lflag & P_LNSPACE_RESOLVER) &&
+ p == nspace_resolver_proc) ? 1 : 0;
+ return 0;
}
-int
-nspace_snapshot_event(vnode_t vp, time_t ctime, uint64_t op_type, void *arg)
+static int
+nspace_resolver_set_proc_state(struct proc *p, int is_resolver)
{
- int snapshot_error = 0;
+ vfs_context_t ctx = vfs_context_current();
+ int error = 0;
- if (vp == NULL) {
- return 0;
+ //
+ // The system filecoordinationd runs as uid == 0. This also
+ // has the nice side-effect of filtering out filecoordinationd
+ // running in the simulator.
+ //
+ if (!vfs_context_issuser(ctx)) {
+ return EPERM;
}
- /* Swap files are special; skip them */
- if (vnode_isswap(vp)) {
- return 0;
+ error = priv_check_cred(vfs_context_ucred(ctx),
+ PRIV_VFS_DATALESS_RESOLVER, 0);
+ if (error) {
+ return error;
}
- if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) {
- // the change time is within this epoch
- int error;
+ if (is_resolver) {
+ NSPACE_REQ_LOCK();
- error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg);
- if (error == EDEADLK) {
- snapshot_error = 0;
- } else if (error) {
- if (error == EAGAIN) {
- printf("nspace_snapshot_event: timed out waiting for namespace handler...\n");
- } else if (error == EINTR) {
- // printf("nspace_snapshot_event: got a signal while waiting for namespace handler...\n");
- snapshot_error = EINTR;
- }
+ if (nspace_resolver_proc == NULL) {
+ proc_lock(p);
+ p->p_lflag |= P_LNSPACE_RESOLVER;
+ proc_unlock(p);
+ nspace_resolver_proc = p;
+ } else {
+ error = EBUSY;
}
+
+ NSPACE_REQ_UNLOCK();
+ } else {
+ // This is basically just like the exit case.
+ // nspace_resolver_exited() will verify that the
+ // process is the resolver, and will clear the
+ // global.
+ nspace_resolver_exited(p);
}
- return snapshot_error;
+ return error;
}
-int
-get_nspace_item_status(struct vnode *vp, int32_t *status)
+static int
+nspace_materialization_get_proc_state(struct proc *p, int *is_prevented)
{
- int i;
-
- lck_mtx_lock(&nspace_handler_lock);
- for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
- if (nspace_items[i].vp == vp) {
- break;
- }
+ if ((p->p_lflag & P_LNSPACE_RESOLVER) != 0 ||
+ (p->p_vfs_iopolicy &
+ P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES) == 0) {
+ *is_prevented = 1;
+ } else {
+ *is_prevented = 0;
}
+ return 0;
+}
- if (i >= MAX_NSPACE_ITEMS) {
- lck_mtx_unlock(&nspace_handler_lock);
- return ENOENT;
+static int
+nspace_materialization_set_proc_state(struct proc *p, int is_prevented)
+{
+ if (p->p_lflag & P_LNSPACE_RESOLVER) {
+ return is_prevented ? 0 : EBUSY;
}
- *status = nspace_items[i].flags;
- lck_mtx_unlock(&nspace_handler_lock);
+ if (is_prevented) {
+ OSBitAndAtomic16(~((uint16_t)P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES), &p->p_vfs_iopolicy);
+ } else {
+ OSBitOrAtomic16((uint16_t)P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES, &p->p_vfs_iopolicy);
+ }
return 0;
}
-
-#if 0
static int
-build_volfs_path(struct vnode *vp, char *path, int *len)
+nspace_materialization_get_thread_state(int *is_prevented)
{
- struct vnode_attr va;
- int ret;
+ uthread_t ut = get_bsdthread_info(current_thread());
- VATTR_INIT(&va);
- VATTR_WANTED(&va, va_fsid);
- VATTR_WANTED(&va, va_fileid);
+ *is_prevented = (ut->uu_flag & UT_NSPACE_NODATALESSFAULTS) ? 1 : 0;
+ return 0;
+}
- if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
- *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
- ret = -1;
+static int
+nspace_materialization_set_thread_state(int is_prevented)
+{
+ uthread_t ut = get_bsdthread_info(current_thread());
+
+ if (is_prevented) {
+ ut->uu_flag |= UT_NSPACE_NODATALESSFAULTS;
} else {
- *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
- ret = 0;
+ ut->uu_flag &= ~UT_NSPACE_NODATALESSFAULTS;
}
-
- return ret;
+ return 0;
}
-#endif
-//
-// Note: this function does NOT check permissions on all of the
-// parent directories leading to this vnode. It should only be
-// called on behalf of a root process. Otherwise a process may
-// get access to a file because the file itself is readable even
-// though its parent directories would prevent access.
-//
static int
-vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx)
+nspace_materialization_is_prevented(void)
{
- int error, action;
+ proc_t p = current_proc();
+ uthread_t ut = (uthread_t)get_bsdthread_info(current_thread());
+ vfs_context_t ctx = vfs_context_current();
- if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
- return error;
+ /*
+ * Kernel context ==> return EDEADLK, as we would with any random
+ * process decorated as no-materialize.
+ */
+ if (ctx == vfs_context_kernel()) {
+ return EDEADLK;
}
-#if CONFIG_MACF
- error = mac_vnode_check_open(ctx, vp, fmode);
- if (error) {
- return error;
+ /*
+ * If the process has the dataless-manipulation entitlement,
+ * materialization is prevented, and depending on the kind
+ * of file system operation, things get to proceed as if the
+ * object is not dataless.
+ */
+ if (vfs_context_is_dataless_manipulator(ctx)) {
+ return EJUSTRETURN;
}
-#endif
- /* compute action to be authorized */
- action = 0;
- if (fmode & FREAD) {
- action |= KAUTH_VNODE_READ_DATA;
- }
- if (fmode & (FWRITE | O_TRUNC)) {
- /*
- * If we are writing, appending, and not truncating,
- * indicate that we are appending so that if the
- * UF_APPEND or SF_APPEND bits are set, we do not deny
- * the open.
- */
- if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
- action |= KAUTH_VNODE_APPEND_DATA;
- } else {
- action |= KAUTH_VNODE_WRITE_DATA;
+ /*
+ * Per-thread decorations override any process-wide decorations.
+ * (Foundation uses this, and this overrides even the dataless-
+ * manipulation entitlement so as to make API contracts consistent.)
+ */
+ if (ut != NULL) {
+ if (ut->uu_flag & UT_NSPACE_NODATALESSFAULTS) {
+ return EDEADLK;
+ }
+ if (ut->uu_flag & UT_NSPACE_FORCEDATALESSFAULTS) {
+ return 0;
}
}
- if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0) {
- return error;
+ /*
+ * If the process's iopolicy specifies that dataless files
+ * can be materialized, then we let it go ahead.
+ */
+ if (p->p_vfs_iopolicy & P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES) {
+ return 0;
}
+ /*
+ * The default behavior is to not materialize dataless files;
+ * return to the caller that deadlock was detected.
+ */
+ return EDEADLK;
+}
- //
- // if the vnode is tagged VOPENEVT and the current process
- // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
- // flag to the open mode so that this open won't count against
- // the vnode when carbon delete() does a vnode_isinuse() to see
- // if a file is currently in use. this allows spotlight
- // importers to not interfere with carbon apps that depend on
- // the no-delete-if-busy semantics of carbon delete().
- //
- if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
- fmode |= O_EVTONLY;
- }
+/* the vfs.nspace branch */
+SYSCTL_NODE(_vfs, OID_AUTO, nspace, CTLFLAG_RW | CTLFLAG_LOCKED, NULL, "vfs nspace hinge");
- if ((error = VNOP_OPEN(vp, fmode, ctx))) {
+static int
+sysctl_nspace_resolver(__unused struct sysctl_oid *oidp,
+ __unused void *arg1, __unused int arg2, struct sysctl_req *req)
+{
+ struct proc *p = req->p;
+ int new_value, old_value, changed = 0;
+ int error;
+
+ error = nspace_resolver_get_proc_state(p, &old_value);
+ if (error) {
return error;
}
- if ((error = vnode_ref_ext(vp, fmode, 0))) {
- VNOP_CLOSE(vp, fmode, ctx);
- return error;
+
+ error = sysctl_io_number(req, old_value, sizeof(int), &new_value,
+ &changed);
+ if (error == 0 && changed) {
+ error = nspace_resolver_set_proc_state(p, new_value);
}
+ return error;
+}
- /* Call out to allow 3rd party notification of open.
- * Ignore result of kauth_authorize_fileop call.
- */
-#if CONFIG_MACF
- mac_vnode_notify_open(ctx, vp, fmode);
-#endif
- kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
- (uintptr_t)vp, 0);
+/* decorate this process as the dataless file resolver */
+SYSCTL_PROC(_vfs_nspace, OID_AUTO, resolver,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
+ 0, 0, sysctl_nspace_resolver, "I", "");
+static int
+sysctl_nspace_prevent_materialization(__unused struct sysctl_oid *oidp,
+ __unused void *arg1, __unused int arg2, struct sysctl_req *req)
+{
+ struct proc *p = req->p;
+ int new_value, old_value, changed = 0;
+ int error;
- return 0;
+ error = nspace_materialization_get_proc_state(p, &old_value);
+ if (error) {
+ return error;
+ }
+
+ error = sysctl_io_number(req, old_value, sizeof(int), &new_value,
+ &changed);
+ if (error == 0 && changed) {
+ error = nspace_materialization_set_proc_state(p, new_value);
+ }
+ return error;
}
+/* decorate this process as not wanting to materialize dataless files */
+SYSCTL_PROC(_vfs_nspace, OID_AUTO, prevent_materialization,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
+ 0, 0, sysctl_nspace_prevent_materialization, "I", "");
+
static int
-wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type)
+sysctl_nspace_thread_prevent_materialization(__unused struct sysctl_oid *oidp,
+ __unused void *arg1, __unused int arg2, struct sysctl_req *req)
{
- int i;
- int error = 0;
- int unblock = 0;
- task_t curtask;
+ int new_value, old_value, changed = 0;
+ int error;
- lck_mtx_lock(&nspace_handler_exclusion_lock);
- if (nspace_handlers[nspace_type].handler_busy) {
- lck_mtx_unlock(&nspace_handler_exclusion_lock);
- return EBUSY;
+ error = nspace_materialization_get_thread_state(&old_value);
+ if (error) {
+ return error;
}
- nspace_handlers[nspace_type].handler_busy = 1;
- lck_mtx_unlock(&nspace_handler_exclusion_lock);
+ error = sysctl_io_number(req, old_value, sizeof(int), &new_value,
+ &changed);
+ if (error == 0 && changed) {
+ error = nspace_materialization_set_thread_state(new_value);
+ }
+ return error;
+}
- /*
- * Any process that gets here will be one of the namespace handlers.
- * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
- * as we can cause deadlocks to occur, because the namespace handler may prevent
- * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
- * process.
- */
- curtask = current_task();
- bsd_set_dependency_capable(curtask);
+/* decorate this thread as not wanting to materialize dataless files */
+SYSCTL_PROC(_vfs_nspace, OID_AUTO, thread_prevent_materialization,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
+ 0, 0, sysctl_nspace_thread_prevent_materialization, "I", "");
+
+static int
+sysctl_nspace_complete(__unused struct sysctl_oid *oidp, __unused void *arg1,
+ __unused int arg2, struct sysctl_req *req)
+{
+ struct proc *p = req->p;
+ uint32_t req_status[2] = { 0, 0 };
+ uint64_t gencount = 0;
+ int error, is_resolver, changed = 0, gencount_changed;
- lck_mtx_lock(&nspace_handler_lock);
- if (nspace_handlers[nspace_type].handler_proc == NULL) {
- nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread());
- nspace_handlers[nspace_type].handler_proc = current_proc();
+ error = nspace_resolver_get_proc_state(p, &is_resolver);
+ if (error) {
+ return error;
}
- if (nspace_type == NSPACE_HANDLER_SNAPSHOT &&
- (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
- error = EINVAL;
+ if (!is_resolver) {
+ return EPERM;
}
- while (error == 0) {
- /* Try to find matching namespace item */
- for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
- if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
- if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
- break;
- }
- }
- }
+ error = sysctl_io_opaque(req, req_status, sizeof(req_status),
+ &changed);
+ if (error) {
+ return error;
+ }
- if (i >= MAX_NSPACE_ITEMS) {
- /* Nothing is there yet. Wait for wake up and retry */
- error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS | PCATCH, "namespace-items", 0);
- if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
- /* Prevent infinite loop if snapshot handler exited */
- error = EINVAL;
- break;
- }
- continue;
- }
+ // get the gencount if it was passed
+ error = sysctl_io_opaque(req, &gencount, sizeof(gencount),
+ &gencount_changed);
+ if (error) {
+ gencount = 0;
+ // we ignore the error because the gencount was optional
+ error = 0;
+ }
- nspace_items[i].flags &= ~NSPACE_ITEM_NEW;
- nspace_items[i].flags |= NSPACE_ITEM_PROCESSING;
- nspace_items[i].token = ++nspace_token_id;
+ /*
+ * req_status[0] is the req_id
+ *
+ * req_status[1] is the errno
+ */
+ if (error == 0 && changed) {
+ nspace_resolver_req_completed(req_status[0],
+ (int)req_status[1], gencount);
+ }
+ return error;
+}
- assert(nspace_items[i].vp);
- struct fileproc *fp;
- int32_t indx;
- int32_t fmode;
- struct proc *p = current_proc();
- vfs_context_t ctx = vfs_context_current();
- struct vnode_attr va;
- bool vn_get_succsessful = false;
- bool vn_open_successful = false;
- bool fp_alloc_successful = false;
+/* Resolver reports completed reqs here. */
+SYSCTL_PROC(_vfs_nspace, OID_AUTO, complete,
+ CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
+ 0, 0, sysctl_nspace_complete, "-", "");
- /*
- * Use vnode pointer to acquire a file descriptor for
- * hand-off to userland
- */
- fmode = nspace_open_flags_for_type(nspace_type);
- error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
- if (error) {
- goto cleanup;
- }
- vn_get_succsessful = true;
+#endif /* CONFIG_DATALESS_FILES */
- error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
- if (error) {
- goto cleanup;
- }
- vn_open_successful = true;
+#if CONFIG_DATALESS_FILES
+#define __no_dataless_unused /* nothing */
+#else
+#define __no_dataless_unused __unused
+#endif
- error = falloc(p, &fp, &indx, ctx);
- if (error) {
- goto cleanup;
- }
- fp_alloc_successful = true;
+void
+nspace_resolver_init(void)
+{
+#if CONFIG_DATALESS_FILES
+ nspace_resolver_request_lck_grp =
+ lck_grp_alloc_init("file namespace resolver", NULL);
- fp->f_fglob->fg_flag = fmode;
- fp->f_fglob->fg_ops = &vnops;
- fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
+ lck_mtx_init(&nspace_resolver_request_hash_mutex,
+ nspace_resolver_request_lck_grp, NULL);
- proc_fdlock(p);
- procfdtbl_releasefd(p, indx, NULL);
- fp_drop(p, indx, fp, 1);
- proc_fdunlock(p);
+ nspace_resolver_request_hashtbl =
+ hashinit(NSPACE_RESOLVER_REQ_HASHSIZE,
+ M_VNODE /* XXX */, &nspace_resolver_request_hashmask);
+#endif /* CONFIG_DATALESS_FILES */
+}
- /*
- * All variants of the namespace handler struct support these three fields:
- * token, flags, and the FD pointer
- */
- error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t));
- if (error) {
- goto cleanup;
- }
- error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t));
- if (error) {
- goto cleanup;
- }
- error = copyout(&indx, nhd->fdptr, sizeof(uint32_t));
- if (error) {
- goto cleanup;
- }
+void
+nspace_resolver_exited(struct proc *p __no_dataless_unused)
+{
+#if CONFIG_DATALESS_FILES
+ struct nspace_resolver_requesthead *bucket;
+ struct nspace_resolver_request *req;
+ u_long idx;
- /*
- * Handle optional fields:
- * extended version support an info ptr (offset, length), and the
- *
- * namedata version supports a unique per-link object ID
- *
- */
- if (nhd->infoptr) {
- uio_t uio = (uio_t)nspace_items[i].arg;
- uint64_t u_offset, u_length;
+ NSPACE_REQ_LOCK();
- if (uio) {
- u_offset = uio_offset(uio);
- u_length = uio_resid(uio);
- } else {
- u_offset = 0;
- u_length = 0;
- }
- error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t));
- if (error) {
- goto cleanup;
- }
- error = copyout(&u_length, nhd->infoptr + sizeof(uint64_t), sizeof(uint64_t));
- if (error) {
- goto cleanup;
+ if ((p->p_lflag & P_LNSPACE_RESOLVER) &&
+ p == nspace_resolver_proc) {
+ for (idx = 0; idx <= nspace_resolver_request_hashmask; idx++) {
+ bucket = &nspace_resolver_request_hashtbl[idx];
+ LIST_FOREACH(req, bucket, r_hashlink) {
+ nspace_resolver_req_mark_complete(req,
+ ETIMEDOUT);
}
}
+ nspace_resolver_proc = NULL;
+ }
- if (nhd->objid) {
- VATTR_INIT(&va);
- VATTR_WANTED(&va, va_linkid);
- error = vnode_getattr(nspace_items[i].vp, &va, ctx);
- if (error) {
- goto cleanup;
- }
+ NSPACE_REQ_UNLOCK();
+#endif /* CONFIG_DATALESS_FILES */
+}
- uint64_t linkid = 0;
- if (VATTR_IS_SUPPORTED(&va, va_linkid)) {
- linkid = (uint64_t)va.va_linkid;
- }
- error = copyout(&linkid, nhd->objid, sizeof(uint64_t));
- }
-cleanup:
- if (error) {
- if (fp_alloc_successful) {
- fp_free(p, indx, fp);
- }
- if (vn_open_successful) {
- vn_close(nspace_items[i].vp, fmode, ctx);
- }
- unblock = 1;
- }
+int
+resolve_nspace_item(struct vnode *vp, uint64_t op)
+{
+ return resolve_nspace_item_ext(vp, op, NULL);
+}
- if (vn_get_succsessful) {
- vnode_put(nspace_items[i].vp);
- }
+#define DATALESS_RESOLVER_ENTITLEMENT \
+ "com.apple.private.vfs.dataless-resolver"
+#define DATALESS_MANIPULATION_ENTITLEMENT \
+ "com.apple.private.vfs.dataless-manipulation"
- break;
- }
+/*
+ * Return TRUE if the vfs context is associated with a process entitled
+ * for dataless manipulation.
+ *
+ * XXX Arguably belongs in vfs_subr.c, but is here because of the
+ * complication around CONFIG_DATALESS_FILES.
+ */
+boolean_t
+vfs_context_is_dataless_manipulator(vfs_context_t ctx __unused)
+{
+#if CONFIG_DATALESS_FILES
+ assert(ctx->vc_thread == current_thread());
+ task_t const task = current_task();
+ return IOTaskHasEntitlement(task, DATALESS_MANIPULATION_ENTITLEMENT) ||
+ IOTaskHasEntitlement(task, DATALESS_RESOLVER_ENTITLEMENT);
+#else
+ return false;
+#endif /* CONFIG_DATALESS_FILES */
+}
- if (unblock) {
- if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
- vnode_lock_spin(nspace_items[i].vp);
- nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
- vnode_unlock(nspace_items[i].vp);
- }
- nspace_items[i].vp = NULL;
- nspace_items[i].vid = 0;
- nspace_items[i].flags = NSPACE_ITEM_DONE;
- nspace_items[i].token = 0;
+int
+resolve_nspace_item_ext(
+ struct vnode *vp __no_dataless_unused,
+ uint64_t op __no_dataless_unused,
+ void *arg __unused)
+{
+#if CONFIG_DATALESS_FILES
+ int error;
+ mach_port_t mp;
+ char *path = NULL;
+ int path_len;
+ kern_return_t kr;
+ struct nspace_resolver_request req;
- wakeup((caddr_t)&(nspace_items[i].vp));
+ // only allow namespace events on regular files, directories and symlinks.
+ if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
+ return EFTYPE;
}
- if (nspace_type == NSPACE_HANDLER_SNAPSHOT) {
- // just go through every snapshot event and unblock it immediately.
- if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
- for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
- if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
- if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
- nspace_items[i].vp = NULL;
- nspace_items[i].vid = 0;
- nspace_items[i].flags = NSPACE_ITEM_DONE;
- nspace_items[i].token = 0;
+ //
+ // if this is a snapshot event and the vnode is on a
+ // disk image just pretend nothing happened since any
+ // change to the disk image will cause the disk image
+ // itself to get backed up and this avoids multi-way
+ // deadlocks between the snapshot handler and the ever
+ // popular diskimages-helper process. the variable
+ // nspace_allow_virtual_devs allows this behavior to
+ // be overridden (for use by the Mobile TimeMachine
+ // testing infrastructure which uses disk images)
+ //
+ if (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT) {
+ os_log_debug(OS_LOG_DEFAULT, "NSPACE SNAPSHOT not handled");
+ return ENOTSUP;
+ }
- wakeup((caddr_t)&(nspace_items[i].vp));
- }
- }
- }
- }
+ error = nspace_materialization_is_prevented();
+ if (error) {
+ os_log_debug(OS_LOG_DEFAULT,
+ "NSPACE process/thread is decorated as no-materialization");
+ return error;
}
- lck_mtx_unlock(&nspace_handler_lock);
+ kr = host_get_filecoordinationd_port(host_priv_self(), &mp);
+ if (kr != KERN_SUCCESS || !IPC_PORT_VALID(mp)) {
+ os_log_error(OS_LOG_DEFAULT, "NSPACE no port");
+ // Treat this like being unable to access the backing
+ // store server.
+ return ETIMEDOUT;
+ }
- lck_mtx_lock(&nspace_handler_exclusion_lock);
- nspace_handlers[nspace_type].handler_busy = 0;
- lck_mtx_unlock(&nspace_handler_exclusion_lock);
+ path = zalloc(ZV_NAMEI);
+ path_len = MAXPATHLEN;
- return error;
-}
+ error = vn_getpath(vp, path, &path_len);
+ if (error == 0) {
+ int xxx_rdar44371223; /* XXX Mig bug */
+ req.r_req_id = next_nspace_req_id();
+ req.r_resolver_error = 0;
+ req.r_flags = 0;
-static inline int
-validate_namespace_args(int is64bit, int size)
-{
- if (is64bit) {
- /* Must be one of these */
- if (size == sizeof(user64_namespace_handler_info)) {
- goto sizeok;
- }
- if (size == sizeof(user64_namespace_handler_info_ext)) {
- goto sizeok;
- }
- if (size == sizeof(user64_namespace_handler_data)) {
- goto sizeok;
- }
- return EINVAL;
- } else {
- /* 32 bit -- must be one of these */
- if (size == sizeof(user32_namespace_handler_info)) {
- goto sizeok;
- }
- if (size == sizeof(user32_namespace_handler_info_ext)) {
- goto sizeok;
+ if ((error = vnode_ref(vp)) == 0) { // take a ref so that the vnode doesn't go away
+ req.r_vp = vp;
+ } else {
+ goto out_release_port;
}
- if (size == sizeof(user32_namespace_handler_data)) {
- goto sizeok;
+
+ NSPACE_REQ_LOCK();
+ error = nspace_resolver_req_add(&req);
+ NSPACE_REQ_UNLOCK();
+ if (error) {
+ vnode_rele(req.r_vp);
+ goto out_release_port;
}
- return EINVAL;
- }
-sizeok:
+ os_log_debug(OS_LOG_DEFAULT, "NSPACE resolve_path call");
+ kr = send_nspace_resolve_path(mp, req.r_req_id,
+ current_proc()->p_pid, (uint32_t)(op & 0xffffffff),
+ path, &xxx_rdar44371223);
+ if (kr != KERN_SUCCESS) {
+ // Also treat this like being unable to access
+ // the backing store server.
+ os_log_error(OS_LOG_DEFAULT,
+ "NSPACE resolve_path failure: %d", kr);
+ error = ETIMEDOUT;
- return 0;
-}
+ NSPACE_REQ_LOCK();
+ nspace_resolver_req_remove(&req);
+ NSPACE_REQ_UNLOCK();
+ vnode_rele(req.r_vp);
+ goto out_release_port;
+ }
-static int
-process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data)
-{
- int error = 0;
- namespace_handler_data nhd;
+ // Give back the memory we allocated earlier while
+ // we wait; we no longer need it.
+ zfree(ZV_NAMEI, path);
+ path = NULL;
- bzero(&nhd, sizeof(namespace_handler_data));
+ // Request has been submitted to the resolver.
+ // Now (interruptibly) wait for completion.
+ // Upon requrn, the request will have been removed
+ // from the lookup table.
+ error = nspace_resolver_req_wait(&req);
- if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
- return error;
+ vnode_rele(req.r_vp);
}
- error = validate_namespace_args(is64bit, size);
- if (error) {
- return error;
+out_release_port:
+ if (path != NULL) {
+ zfree(ZV_NAMEI, path);
}
+ ipc_port_release_send(mp);
- /* Copy in the userland pointers into our kernel-only struct */
+ return error;
+#else
+ return ENOTSUP;
+#endif /* CONFIG_DATALESS_FILES */
+}
- if (is64bit) {
- /* 64 bit userland structures */
- nhd.token = (user_addr_t)((user64_namespace_handler_info *)data)->token;
- nhd.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags;
- nhd.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr;
+int
+nspace_snapshot_event(__unused vnode_t vp, __unused time_t ctime,
+ __unused uint64_t op_type, __unused void *arg)
+{
+ return 0;
+}
- /* If the size is greater than the standard info struct, add in extra fields */
- if (size > (sizeof(user64_namespace_handler_info))) {
- if (size >= (sizeof(user64_namespace_handler_info_ext))) {
- nhd.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr;
- }
- if (size == (sizeof(user64_namespace_handler_data))) {
- nhd.objid = (user_addr_t)((user64_namespace_handler_data*)data)->objid;
- }
- /* Otherwise the fields were pre-zeroed when we did the bzero above. */
- }
- } else {
- /* 32 bit userland structures */
- nhd.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token);
- nhd.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags);
- nhd.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr);
+#if 0
+static int
+build_volfs_path(struct vnode *vp, char *path, int *len)
+{
+ struct vnode_attr va;
+ int ret;
- if (size > (sizeof(user32_namespace_handler_info))) {
- if (size >= (sizeof(user32_namespace_handler_info_ext))) {
- nhd.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr);
- }
- if (size == (sizeof(user32_namespace_handler_data))) {
- nhd.objid = (user_addr_t)((user32_namespace_handler_data*)data)->objid;
- }
- /* Otherwise the fields were pre-zeroed when we did the bzero above. */
- }
+ VATTR_INIT(&va);
+ VATTR_WANTED(&va, va_fsid);
+ VATTR_WANTED(&va, va_fileid);
+
+ if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
+ *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
+ ret = -1;
+ } else {
+ *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
+ ret = 0;
}
- return wait_for_namespace_event(&nhd, nspace_type);
+ return ret;
}
+#endif
static unsigned long
fsctl_bogus_command_compat(unsigned long cmd)
return FSIOC_ROUTEFS_SETROUTEID;
case IOCBASECMD(FSIOC_SET_PACKAGE_EXTS):
return FSIOC_SET_PACKAGE_EXTS;
- case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_GET):
- return FSIOC_NAMESPACE_HANDLER_GET;
- case IOCBASECMD(FSIOC_OLD_SNAPSHOT_HANDLER_GET):
- return FSIOC_OLD_SNAPSHOT_HANDLER_GET;
- case IOCBASECMD(FSIOC_SNAPSHOT_HANDLER_GET_EXT):
- return FSIOC_SNAPSHOT_HANDLER_GET_EXT;
- case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_UPDATE):
- return FSIOC_NAMESPACE_HANDLER_UPDATE;
- case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_UNBLOCK):
- return FSIOC_NAMESPACE_HANDLER_UNBLOCK;
- case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_CANCEL):
- return FSIOC_NAMESPACE_HANDLER_CANCEL;
- case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME):
- return FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME;
- case IOCBASECMD(FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS):
- return FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS;
case IOCBASECMD(FSIOC_SET_FSTYPENAME_OVERRIDE):
return FSIOC_SET_FSTYPENAME_OVERRIDE;
case IOCBASECMD(DISK_CONDITIONER_IOC_GET):
return cmd;
}
+static int
+cas_bsdflags_setattr(vnode_t vp, void *arg, vfs_context_t ctx)
+{
+ return VNOP_IOCTL(vp, FSIOC_CAS_BSDFLAGS, arg, FWRITE, ctx);
+}
+
+static int __attribute__((noinline))
+handle_sync_volume(vnode_t vp, vnode_t *arg_vp, caddr_t data, vfs_context_t ctx)
+{
+ struct vfs_attr vfa;
+ mount_t mp = vp->v_mount;
+ unsigned arg;
+ int error;
+
+ /* record vid of vp so we can drop it below. */
+ uint32_t vvid = vp->v_id;
+
+ /*
+ * Then grab mount_iterref so that we can release the vnode.
+ * Without this, a thread may call vnode_iterate_prepare then
+ * get into a deadlock because we've never released the root vp
+ */
+ error = mount_iterref(mp, 0);
+ if (error) {
+ return error;
+ }
+ vnode_put(vp);
+
+ arg = MNT_NOWAIT;
+ if (*(uint32_t*)data & FSCTL_SYNC_WAIT) {
+ arg = MNT_WAIT;
+ }
+
+ /*
+ * If the filessytem supports multiple filesytems in a
+ * partition (For eg APFS volumes in a container, it knows
+ * that the waitfor argument to VFS_SYNC are flags.
+ */
+ VFSATTR_INIT(&vfa);
+ VFSATTR_WANTED(&vfa, f_capabilities);
+ if ((vfs_getattr(mp, &vfa, vfs_context_current()) == 0) &&
+ VFSATTR_IS_SUPPORTED(&vfa, f_capabilities) &&
+ ((vfa.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_SHARED_SPACE)) &&
+ ((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_SHARED_SPACE))) {
+ arg |= MNT_VOLUME;
+ }
+
+ /* issue the sync for this volume */
+ (void)sync_callback(mp, &arg);
+
+ /*
+ * Then release the mount_iterref once we're done syncing; it's not
+ * needed for the VNOP_IOCTL below
+ */
+ mount_iterdrop(mp);
+
+ if (arg & FSCTL_SYNC_FULLSYNC) {
+ /* re-obtain vnode iocount on the root vp, if possible */
+ error = vnode_getwithvid(vp, vvid);
+ if (error == 0) {
+ error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
+ vnode_put(vp);
+ }
+ }
+ /* mark the argument VP as having been released */
+ *arg_vp = NULL;
+ return error;
+}
+
+#if ROUTEFS
+static int __attribute__((noinline))
+handle_routes(user_addr_t udata)
+{
+ char routepath[MAXPATHLEN];
+ size_t len = 0;
+ int error;
+
+ if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
+ return error;
+ }
+ bzero(routepath, MAXPATHLEN);
+ error = copyinstr(udata, &routepath[0], MAXPATHLEN, &len);
+ if (error) {
+ return error;
+ }
+ error = routefs_kernel_mount(routepath);
+ return error;
+}
+#endif
+
+static int __attribute__((noinline))
+handle_flags(vnode_t vp, caddr_t data, vfs_context_t ctx)
+{
+ struct fsioc_cas_bsdflags *cas = (struct fsioc_cas_bsdflags *)data;
+ struct vnode_attr va;
+ int error;
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_flags, cas->new_flags);
+
+ error = chflags0(vp, &va, cas_bsdflags_setattr, cas, ctx);
+ return error;
+}
+
+static int __attribute__((noinline))
+handle_auth(vnode_t vp, u_long cmd, caddr_t data, u_long options, vfs_context_t ctx)
+{
+ struct mount *mp = NULL;
+ errno_t rootauth = 0;
+
+ mp = vp->v_mount;
+
+ /*
+ * query the underlying FS and see if it reports something
+ * sane for this vnode. If volume is authenticated via
+ * chunklist, leave that for the caller to determine.
+ */
+ rootauth = VNOP_IOCTL(vp, cmd, data, (int)options, ctx);
+
+ return rootauth;
+}
+
/*
* Make a filesystem-specific control call:
*/
caddr_t data, memp;
vnode_t vp = *arg_vp;
+ if (vp->v_type == VCHR || vp->v_type == VBLK) {
+ return ENOTTY;
+ }
+
cmd = fsctl_bogus_command_compat(cmd);
size = IOCPARM_LEN(cmd);
memp = NULL;
if (size > sizeof(stkbuf)) {
- if ((memp = (caddr_t)kalloc(size)) == 0) {
+ if ((memp = (caddr_t)kheap_alloc(KHEAP_TEMP, size, Z_WAITOK)) == 0) {
return ENOMEM;
}
data = memp;
error = copyin(udata, data, size);
if (error) {
if (memp) {
- kfree(memp, size);
+ kheap_free(KHEAP_TEMP, memp, size);
}
return error;
}
} else {
*(uint32_t *)data = (uint32_t)udata;
}
- };
- } else if ((cmd & IOC_OUT) && size) {
- /*
- * Zero the buffer so the user always
- * gets back something deterministic.
- */
- bzero(data, size);
- } else if (cmd & IOC_VOID) {
- if (is64bit) {
- *(user_addr_t *)data = udata;
- } else {
- *(uint32_t *)data = (uint32_t)udata;
- }
- }
-
- /* Check to see if it's a generic command */
- switch (cmd) {
- case FSIOC_SYNC_VOLUME: {
- mount_t mp = vp->v_mount;
- int arg = *(uint32_t*)data;
-
- /* record vid of vp so we can drop it below. */
- uint32_t vvid = vp->v_id;
-
- /*
- * Then grab mount_iterref so that we can release the vnode.
- * Without this, a thread may call vnode_iterate_prepare then
- * get into a deadlock because we've never released the root vp
- */
- error = mount_iterref(mp, 0);
- if (error) {
- break;
- }
- vnode_put(vp);
-
- /* issue the sync for this volume */
- (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
-
+ };
+ } else if ((cmd & IOC_OUT) && size) {
/*
- * Then release the mount_iterref once we're done syncing; it's not
- * needed for the VNOP_IOCTL below
+ * Zero the buffer so the user always
+ * gets back something deterministic.
*/
- mount_iterdrop(mp);
-
- if (arg & FSCTL_SYNC_FULLSYNC) {
- /* re-obtain vnode iocount on the root vp, if possible */
- error = vnode_getwithvid(vp, vvid);
- if (error == 0) {
- error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
- vnode_put(vp);
- }
+ bzero(data, size);
+ } else if (cmd & IOC_VOID) {
+ if (is64bit) {
+ *(user_addr_t *)data = udata;
+ } else {
+ *(uint32_t *)data = (uint32_t)udata;
}
- /* mark the argument VP as having been released */
- *arg_vp = NULL;
}
- break;
- case FSIOC_ROUTEFS_SETROUTEID: {
-#if ROUTEFS
- char routepath[MAXPATHLEN];
- size_t len = 0;
+ /* Check to see if it's a generic command */
+ switch (cmd) {
+ case FSIOC_SYNC_VOLUME:
+ error = handle_sync_volume(vp, arg_vp, data, ctx);
+ break;
- if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
- break;
- }
- bzero(routepath, MAXPATHLEN);
- error = copyinstr(udata, &routepath[0], MAXPATHLEN, &len);
- if (error) {
- break;
- }
- error = routefs_kernel_mount(routepath);
- if (error) {
- break;
- }
+ case FSIOC_ROUTEFS_SETROUTEID:
+#if ROUTEFS
+ error = handle_routes(udata);
#endif
- }
- break;
+ break;
case FSIOC_SET_PACKAGE_EXTS: {
user_addr_t ext_strings;
}
if (is64bit) {
- ext_strings = ((user64_package_ext_info *)data)->strings;
+ if (sizeof(user64_addr_t) > sizeof(user_addr_t)) {
+ assert(((user64_package_ext_info *)data)->strings <= UINT32_MAX);
+ }
+ ext_strings = (user_addr_t)((user64_package_ext_info *)data)->strings;
num_entries = ((user64_package_ext_info *)data)->num_entries;
max_width = ((user64_package_ext_info *)data)->max_width;
} else {
}
break;
- /* namespace handlers */
- case FSIOC_NAMESPACE_HANDLER_GET: {
- error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data);
- }
- break;
-
- /* Snapshot handlers */
- case FSIOC_OLD_SNAPSHOT_HANDLER_GET: {
- error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
- }
- break;
-
- case FSIOC_SNAPSHOT_HANDLER_GET_EXT: {
- error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
- }
- break;
-
- case FSIOC_NAMESPACE_HANDLER_UPDATE: {
- uint32_t token, val;
- int i;
-
- if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
- break;
- }
-
- if (!nspace_is_special_process(p)) {
- error = EINVAL;
- break;
- }
-
- token = ((uint32_t *)data)[0];
- val = ((uint32_t *)data)[1];
-
- lck_mtx_lock(&nspace_handler_lock);
-
- for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
- if (nspace_items[i].token == token) {
- break; /* exit for loop, not case stmt */
- }
- }
-
- if (i >= MAX_NSPACE_ITEMS) {
- error = ENOENT;
- } else {
- //
- // if this bit is set, when resolve_nspace_item() times out
- // it will loop and go back to sleep.
- //
- nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER;
- }
-
- lck_mtx_unlock(&nspace_handler_lock);
-
- if (error) {
- printf("nspace-handler-update: did not find token %u\n", token);
- }
- }
- break;
-
- case FSIOC_NAMESPACE_HANDLER_UNBLOCK: {
- uint32_t token, val;
- int i;
-
- if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
- break;
- }
-
- if (!nspace_is_special_process(p)) {
- error = EINVAL;
- break;
- }
-
- token = ((uint32_t *)data)[0];
- val = ((uint32_t *)data)[1];
-
- lck_mtx_lock(&nspace_handler_lock);
-
- for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
- if (nspace_items[i].token == token) {
- break; /* exit for loop, not case statement */
- }
- }
-
- if (i >= MAX_NSPACE_ITEMS) {
- printf("nspace-handler-unblock: did not find token %u\n", token);
- error = ENOENT;
- } else {
- if (val == 0 && nspace_items[i].vp) {
- vnode_lock_spin(nspace_items[i].vp);
- nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
- vnode_unlock(nspace_items[i].vp);
- }
-
- nspace_items[i].vp = NULL;
- nspace_items[i].arg = NULL;
- nspace_items[i].op = 0;
- nspace_items[i].vid = 0;
- nspace_items[i].flags = NSPACE_ITEM_DONE;
- nspace_items[i].token = 0;
-
- wakeup((caddr_t)&(nspace_items[i].vp));
- }
-
- lck_mtx_unlock(&nspace_handler_lock);
- }
- break;
-
- case FSIOC_NAMESPACE_HANDLER_CANCEL: {
- uint32_t token, val;
- int i;
-
- if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
- break;
- }
-
- if (!nspace_is_special_process(p)) {
- error = EINVAL;
- break;
- }
-
- token = ((uint32_t *)data)[0];
- val = ((uint32_t *)data)[1];
-
- lck_mtx_lock(&nspace_handler_lock);
-
- for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
- if (nspace_items[i].token == token) {
- break; /* exit for loop, not case stmt */
- }
- }
-
- if (i >= MAX_NSPACE_ITEMS) {
- printf("nspace-handler-cancel: did not find token %u\n", token);
- error = ENOENT;
- } else {
- if (nspace_items[i].vp) {
- vnode_lock_spin(nspace_items[i].vp);
- nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
- vnode_unlock(nspace_items[i].vp);
- }
-
- nspace_items[i].vp = NULL;
- nspace_items[i].arg = NULL;
- nspace_items[i].vid = 0;
- nspace_items[i].token = val;
- nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING;
- nspace_items[i].flags |= NSPACE_ITEM_CANCELLED;
-
- wakeup((caddr_t)&(nspace_items[i].vp));
- }
-
- lck_mtx_unlock(&nspace_handler_lock);
- }
- break;
-
- case FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME: {
- if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
- break;
- }
-
- // we explicitly do not do the namespace_handler_proc check here
-
- lck_mtx_lock(&nspace_handler_lock);
- snapshot_timestamp = ((uint32_t *)data)[0];
- wakeup(&nspace_item_idx);
- lck_mtx_unlock(&nspace_handler_lock);
- printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp);
- }
- break;
-
- case FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS:
- {
- if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
- break;
- }
-
- lck_mtx_lock(&nspace_handler_lock);
- nspace_allow_virtual_devs = ((uint32_t *)data)[0];
- lck_mtx_unlock(&nspace_handler_lock);
- printf("nspace-snapshot-handler will%s allow events on disk-images\n",
- nspace_allow_virtual_devs ? "" : " NOT");
- error = 0;
- }
- break;
-
case FSIOC_SET_FSTYPENAME_OVERRIDE:
{
if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
if (vp->v_mount) {
mount_lock(vp->v_mount);
if (data[0] != 0) {
+ int i;
+ for (i = 0; i < MFSTYPENAMELEN; i++) {
+ if (!data[i]) {
+ goto continue_copy;
+ }
+ }
+ /*
+ * Getting here means we have a user data string which has no
+ * NULL termination in its first MFSTYPENAMELEN bytes.
+ * This is bogus, let's avoid strlcpy-ing the read data and
+ * return an error.
+ */
+ error = EINVAL;
+ goto unlock;
+continue_copy:
strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
vp->v_mount->fstypename_override[0] = '\0';
}
+unlock:
mount_unlock(vp->v_mount);
}
}
}
break;
+ case FSIOC_CAS_BSDFLAGS:
+ error = handle_flags(vp, data, ctx);
+ break;
+
case FSIOC_FD_ONLY_OPEN_ONCE: {
+ error = 0;
if (vnode_usecount(vp) > 1) {
- error = EBUSY;
- } else {
- error = 0;
+ vnode_lock_spin(vp);
+ if (vp->v_lflag & VL_HASSTREAMS) {
+ if (vnode_isinuse_locked(vp, 1, 1)) {
+ error = EBUSY;
+ }
+ } else if (vnode_usecount(vp) > 1) {
+ error = EBUSY;
+ }
+ vnode_unlock(vp);
}
}
break;
+ case FSIOC_EVAL_ROOTAUTH:
+ error = handle_auth(vp, cmd, data, options, ctx);
+ break;
+
default: {
/* other, known commands shouldn't be passed down here */
switch (cmd) {
case F_BARRIERFSYNC:
case F_FREEZE_FS:
case F_THAW_FS:
+ case FSIOC_KERNEL_ROOTAUTH:
error = EINVAL;
goto outdrop;
}
/* Invoke the filesystem-specific code */
- error = VNOP_IOCTL(vp, cmd, data, options, ctx);
+ error = VNOP_IOCTL(vp, cmd, data, (int)options, ctx);
}
} /* end switch stmt */
outdrop:
if (memp) {
- kfree(memp, size);
+ kheap_free(KHEAP_TEMP, memp, size);
}
return error;
{
int error;
struct nameidata nd;
- u_long nameiflags;
+ uint32_t nameiflags;
vnode_t vp = NULL;
vfs_context_t ctx = vfs_context_current();
- AUDIT_ARG(cmd, uap->cmd);
+ AUDIT_ARG(cmd, (int)uap->cmd);
AUDIT_ARG(value32, uap->options);
/* Get the vnode for the file we are getting info on: */
nameiflags = 0;
if ((uap->options & FSOPT_NOFOLLOW) == 0) {
nameiflags |= FOLLOW;
}
+ if (uap->cmd == FSIOC_FIRMLINK_CTL) {
+ nameiflags |= (CN_FIRMLINK_NOFOLLOW | NOCACHE);
+ }
NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
UIO_USERSPACE, uap->path, ctx);
if ((error = namei(&nd))) {
int fd = -1;
AUDIT_ARG(fd, uap->fd);
- AUDIT_ARG(cmd, uap->cmd);
+ AUDIT_ARG(cmd, (int)uap->cmd);
AUDIT_ARG(value32, uap->options);
/* Get the vnode for the file we are getting info on: */
}
/* end of fsctl system call */
+#define FILESEC_ACCESS_ENTITLEMENT \
+ "com.apple.private.vfs.filesec-access"
+
+static int
+xattr_entitlement_check(const char *attrname, vfs_context_t ctx, bool setting)
+{
+ if (strcmp(attrname, KAUTH_FILESEC_XATTR) == 0) {
+ /*
+ * get: root and tasks with FILESEC_ACCESS_ENTITLEMENT.
+ * set: only tasks with FILESEC_ACCESS_ENTITLEMENT.
+ */
+ if ((!setting && vfs_context_issuser(ctx)) ||
+ IOTaskHasEntitlement(current_task(),
+ FILESEC_ACCESS_ENTITLEMENT)) {
+ return 0;
+ }
+ }
+
+ return EPERM;
+}
+
/*
* Retrieve the data of an extended attribute.
*/
if (error != 0) {
goto out;
}
- if (xattr_protected(attrname)) {
- if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
- error = EPERM;
- goto out;
- }
+ if (xattr_protected(attrname) &&
+ (error = xattr_entitlement_check(attrname, ctx, false)) != 0) {
+ goto out;
}
/*
* the specific check for 0xffffffff is a hack to preserve
{
vnode_t vp;
char attrname[XATTR_MAXNAMELEN + 1];
+ vfs_context_t ctx = vfs_context_current();
uio_t auio = NULL;
int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
size_t attrsize = 0;
if (error != 0) {
goto out;
}
- if (xattr_protected(attrname)) {
- error = EPERM;
+ if (xattr_protected(attrname) &&
+ (error = xattr_entitlement_check(attrname, ctx, false)) != 0) {
goto out;
}
if (uap->value && uap->size > 0) {
/* Otherwise return the default error from copyinstr to detect ERANGE, etc */
return error;
}
- if (xattr_protected(attrname)) {
- return EPERM;
+ if (xattr_protected(attrname) &&
+ (error = xattr_entitlement_check(attrname, ctx, true)) != 0) {
+ return error;
}
if (uap->size != 0 && uap->value == 0) {
return EINVAL;
}
+ if (uap->size > INT_MAX) {
+ return E2BIG;
+ }
nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
{
vnode_t vp;
char attrname[XATTR_MAXNAMELEN + 1];
+ vfs_context_t ctx = vfs_context_current();
uio_t auio = NULL;
int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
size_t namelen;
int error;
char uio_buf[UIO_SIZEOF(1)];
-#if CONFIG_FSE
- vfs_context_t ctx = vfs_context_current();
-#endif
if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT)) {
return EINVAL;
/* Otherwise return the default error from copyinstr to detect ERANGE, etc */
return error;
}
- if (xattr_protected(attrname)) {
- return EPERM;
+ if (xattr_protected(attrname) &&
+ (error = xattr_entitlement_check(attrname, ctx, true)) != 0) {
+ return error;
}
if (uap->size != 0 && uap->value == 0) {
return EINVAL;
}
+ if (uap->size > INT_MAX) {
+ return E2BIG;
+ }
if ((error = file_vnode(uap->fd, &vp))) {
return error;
}
}
static int
-fsgetpath_internal(
- vfs_context_t ctx, int volfs_id, uint64_t objid,
- vm_size_t bufsize, caddr_t buf, int *pathlen)
+fsgetpath_internal(vfs_context_t ctx, int volfs_id, uint64_t objid,
+ vm_size_t bufsize, caddr_t buf, uint32_t options, int *pathlen)
{
int error;
struct mount *mp = NULL;
unionget:
if (objid == 2) {
- error = VFS_ROOT(mp, &vp, ctx);
+ struct vfs_attr vfsattr;
+ int use_vfs_root = TRUE;
+
+ VFSATTR_INIT(&vfsattr);
+ VFSATTR_WANTED(&vfsattr, f_capabilities);
+ if (!(options & FSOPT_ISREALFSID) &&
+ vfs_getattr(mp, &vfsattr, vfs_context_kernel()) == 0 &&
+ VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
+ if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_VOL_GROUPS) &&
+ (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_VOL_GROUPS)) {
+ use_vfs_root = FALSE;
+ }
+ }
+
+ if (use_vfs_root) {
+ error = VFS_ROOT(mp, &vp, ctx);
+ } else {
+ error = VFS_VGET(mp, objid, &vp, ctx);
+ }
} else {
error = VFS_VGET(mp, (ino64_t)objid, &vp, ctx);
}
/* Obtain the absolute path to this vnode. */
bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
+ if (options & FSOPT_NOFIRMLINKPATH) {
+ bpflags |= BUILDPATH_NO_FIRMLINK;
+ }
bpflags |= BUILDPATH_CHECK_MOVED;
- error = build_path(vp, buf, bufsize, &length, bpflags, ctx);
+ error = build_path(vp, buf, (int)bufsize, &length, bpflags, ctx);
vnode_put(vp);
if (error) {
AUDIT_ARG(text, buf);
- if (kdebug_enable) {
- long dbg_parms[NUMPARMS];
- int dbg_namelen;
+ if (kdebug_debugid_enabled(VFS_LOOKUP) && length > 0) {
+ unsigned long path_words[NUMPARMS];
+ size_t path_len = sizeof(path_words);
- dbg_namelen = (int)sizeof(dbg_parms);
+ if ((size_t)length < path_len) {
+ memcpy((char *)path_words, buf, length);
+ memset((char *)path_words + length, 0, path_len - length);
- if (length < dbg_namelen) {
- memcpy((char *)dbg_parms, buf, length);
- memset((char *)dbg_parms + length, 0, dbg_namelen - length);
-
- dbg_namelen = length;
+ path_len = length;
} else {
- memcpy((char *)dbg_parms, buf + (length - dbg_namelen), dbg_namelen);
+ memcpy((char *)path_words, buf + (length - path_len), path_len);
}
- kdebug_vfs_lookup(dbg_parms, dbg_namelen, (void *)vp,
+ kdebug_vfs_lookup(path_words, (int)path_len, vp,
KDBG_VFS_LOOKUP_FLAG_LOOKUP);
}
- *pathlen = (user_ssize_t)length; /* may be superseded by error */
+ *pathlen = length; /* may be superseded by error */
out:
return error;
/*
* Obtain the full pathname of a file system object by id.
*/
-int
-fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
+static int
+fsgetpath_extended(user_addr_t buf, user_size_t bufsize, user_addr_t user_fsid, uint64_t objid,
+ uint32_t options, user_ssize_t *retval)
{
vfs_context_t ctx = vfs_context_current();
fsid_t fsid;
int length;
int error;
- if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
+ if (options & ~(FSOPT_NOFIRMLINKPATH | FSOPT_ISREALFSID)) {
+ return EINVAL;
+ }
+
+ if ((error = copyin(user_fsid, (caddr_t)&fsid, sizeof(fsid)))) {
return error;
}
AUDIT_ARG(value32, fsid.val[0]);
- AUDIT_ARG(value64, uap->objid);
+ AUDIT_ARG(value64, objid);
/* Restrict output buffer size for now. */
- if (uap->bufsize > PAGE_SIZE) {
+ if (bufsize > PAGE_SIZE || bufsize <= 0) {
return EINVAL;
}
- MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK | M_ZERO);
+ realpath = kheap_alloc(KHEAP_TEMP, bufsize, Z_WAITOK | Z_ZERO);
if (realpath == NULL) {
return ENOMEM;
}
- error = fsgetpath_internal(
- ctx, fsid.val[0], uap->objid,
- uap->bufsize, realpath, &length);
+ error = fsgetpath_internal(ctx, fsid.val[0], objid, bufsize, realpath,
+ options, &length);
if (error) {
goto out;
}
- error = copyout((caddr_t)realpath, uap->buf, length);
+ error = copyout((caddr_t)realpath, buf, length);
*retval = (user_ssize_t)length; /* may be superseded by error */
out:
- if (realpath) {
- FREE(realpath, M_TEMP);
- }
+ kheap_free(KHEAP_TEMP, realpath, bufsize);
return error;
}
+int
+fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
+{
+ return fsgetpath_extended(uap->buf, uap->bufsize, uap->fsid, uap->objid,
+ 0, retval);
+}
+
+int
+fsgetpath_ext(__unused proc_t p, struct fsgetpath_ext_args *uap, user_ssize_t *retval)
+{
+ return fsgetpath_extended(uap->buf, uap->bufsize, uap->fsid, uap->objid,
+ uap->options, retval);
+}
+
/*
* Common routine to handle various flavors of statfs data heading out
* to user space.
my_size = copy_size = sizeof(sfs);
bzero(&sfs, my_size);
sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
- sfs.f_type = mp->mnt_vtable->vfc_typenum;
+ sfs.f_type = (short)mp->mnt_vtable->vfc_typenum;
sfs.f_reserved1 = (short)sfsp->f_fssubtype;
sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
bzero(&sfs, my_size);
sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
- sfs.f_type = mp->mnt_vtable->vfc_typenum;
+ sfs.f_type = (short)mp->mnt_vtable->vfc_typenum;
sfs.f_reserved1 = (short)sfsp->f_fssubtype;
/*
sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
#undef __SHIFT_OR_CLIP
sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
- sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
+ sfs.f_iosize = (int)lmax(sfsp->f_iosize, sfsp->f_bsize);
} else {
/* filesystem is small enough to be reported honestly */
sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
usbp->st_gid = sbp->st_gid;
usbp->st_rdev = sbp->st_rdev;
#ifndef _POSIX_C_SOURCE
- usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
- usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
- usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
- usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
- usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
- usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
+ usbp->st_atimespec.tv_sec = (user32_time_t)sbp->st_atimespec.tv_sec;
+ usbp->st_atimespec.tv_nsec = (user32_long_t)sbp->st_atimespec.tv_nsec;
+ usbp->st_mtimespec.tv_sec = (user32_time_t)sbp->st_mtimespec.tv_sec;
+ usbp->st_mtimespec.tv_nsec = (user32_long_t)sbp->st_mtimespec.tv_nsec;
+ usbp->st_ctimespec.tv_sec = (user32_time_t)sbp->st_ctimespec.tv_sec;
+ usbp->st_ctimespec.tv_nsec = (user32_long_t)sbp->st_ctimespec.tv_nsec;
#else
usbp->st_atime = sbp->st_atime;
usbp->st_atimensec = sbp->st_atimensec;
usbp->st_gid = sbp->st_gid;
usbp->st_rdev = sbp->st_rdev;
#ifndef _POSIX_C_SOURCE
- usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
- usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
- usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
- usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
- usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
- usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
- usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
- usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
+ usbp->st_atimespec.tv_sec = (user32_time_t)sbp->st_atimespec.tv_sec;
+ usbp->st_atimespec.tv_nsec = (user32_long_t)sbp->st_atimespec.tv_nsec;
+ usbp->st_mtimespec.tv_sec = (user32_time_t)sbp->st_mtimespec.tv_sec;
+ usbp->st_mtimespec.tv_nsec = (user32_long_t)sbp->st_mtimespec.tv_nsec;
+ usbp->st_ctimespec.tv_sec = (user32_time_t)sbp->st_ctimespec.tv_sec;
+ usbp->st_ctimespec.tv_nsec = (user32_long_t)sbp->st_ctimespec.tv_nsec;
+ usbp->st_birthtimespec.tv_sec = (user32_time_t)sbp->st_birthtimespec.tv_sec;
+ usbp->st_birthtimespec.tv_nsec = (user32_long_t)sbp->st_birthtimespec.tv_nsec;
#else
usbp->st_atime = sbp->st_atime;
usbp->st_atimensec = sbp->st_atimensec;
goto out;
}
- MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
+ name_buf = zalloc_flags(ZV_NAMEI, Z_WAITOK);
error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
if (error) {
goto out1;
error = namei(ndp);
out1:
- FREE(name_buf, M_TEMP);
+ zfree(ZV_NAMEI, name_buf);
out:
if (error) {
if (*sdvpp) {
* Since this requires superuser privileges, vnode_authorize calls are not
* made.
*/
-static int
+static int __attribute__((noinline))
snapshot_create(int dirfd, user_addr_t name, __unused uint32_t flags,
vfs_context_t ctx)
{
vnode_t rvp, snapdvp;
int error;
- struct nameidata namend;
+ struct nameidata *ndp;
- error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, CREATE,
+ ndp = kheap_alloc(KHEAP_TEMP, sizeof(*ndp), Z_WAITOK);
+
+ error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, ndp, CREATE,
OP_LINK, ctx);
if (error) {
- return error;
+ goto out;
}
- if (namend.ni_vp) {
- vnode_put(namend.ni_vp);
+ if (ndp->ni_vp) {
+ vnode_put(ndp->ni_vp);
error = EEXIST;
} else {
- struct vnode_attr va;
+ struct vnode_attr *vap;
vnode_t vp = NULLVP;
- VATTR_INIT(&va);
- VATTR_SET(&va, va_type, VREG);
- VATTR_SET(&va, va_mode, 0);
+ vap = kheap_alloc(KHEAP_TEMP, sizeof(*vap), Z_WAITOK);
- error = vn_create(snapdvp, &vp, &namend, &va,
+ VATTR_INIT(vap);
+ VATTR_SET(vap, va_type, VREG);
+ VATTR_SET(vap, va_mode, 0);
+
+ error = vn_create(snapdvp, &vp, ndp, vap,
VN_CREATE_NOAUTH | VN_CREATE_NOINHERIT, 0, NULL, ctx);
if (!error && vp) {
vnode_put(vp);
}
+
+ kheap_free(KHEAP_TEMP, vap, sizeof(*vap));
}
- nameidone(&namend);
+ nameidone(ndp);
vnode_put(snapdvp);
vnode_put(rvp);
+out:
+ kheap_free(KHEAP_TEMP, ndp, sizeof(*ndp));
+
return error;
}
* get the vnode for the unnamed snapshot directory and the snapshot and
* delete the snapshot.
*/
-static int
+static int __attribute__((noinline))
snapshot_delete(int dirfd, user_addr_t name, __unused uint32_t flags,
vfs_context_t ctx)
{
vnode_t rvp, snapdvp;
int error;
- struct nameidata namend;
+ struct nameidata *ndp;
- error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, DELETE,
+ ndp = kheap_alloc(KHEAP_TEMP, sizeof(*ndp), Z_WAITOK);
+
+ error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, ndp, DELETE,
OP_UNLINK, ctx);
if (error) {
goto out;
}
- error = VNOP_REMOVE(snapdvp, namend.ni_vp, &namend.ni_cnd,
+ error = VNOP_REMOVE(snapdvp, ndp->ni_vp, &ndp->ni_cnd,
VNODE_REMOVE_SKIP_NAMESPACE_EVENT, ctx);
- vnode_put(namend.ni_vp);
- nameidone(&namend);
+ vnode_put(ndp->ni_vp);
+ nameidone(ndp);
vnode_put(snapdvp);
vnode_put(rvp);
out:
+ kheap_free(KHEAP_TEMP, ndp, sizeof(*ndp));
+
return error;
}
*
* Marks the filesystem to revert to the given snapshot on next mount.
*/
-static int
+static int __attribute__((noinline))
snapshot_revert(int dirfd, user_addr_t name, __unused uint32_t flags,
vfs_context_t ctx)
{
}
mp = vnode_mount(rvp);
- MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
+ name_buf = zalloc_flags(ZV_NAMEI, Z_WAITOK);
error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
if (error) {
- FREE(name_buf, M_TEMP);
+ zfree(ZV_NAMEI, name_buf);
vnode_put(rvp);
return error;
}
#if CONFIG_MACF
error = mac_mount_check_snapshot_revert(ctx, mp, name_buf);
if (error) {
- FREE(name_buf, M_TEMP);
+ zfree(ZV_NAMEI, name_buf);
vnode_put(rvp);
return error;
}
error = mount_iterref(mp, 0);
vnode_put(rvp);
if (error) {
- FREE(name_buf, M_TEMP);
+ zfree(ZV_NAMEI, name_buf);
return error;
}
error = VFS_IOCTL(mp, VFSIOC_REVERT_SNAPSHOT, (caddr_t)&revert_data, 0, ctx);
mount_iterdrop(mp);
- FREE(name_buf, M_TEMP);
+ zfree(ZV_NAMEI, name_buf);
if (error) {
/* If there was any error, try again using VNOP_IOCTL */
* rename(2) (which has to deal with a lot more complications). It differs
* slightly from rename(2) in that EEXIST is returned if the new name exists.
*/
-static int
+static int __attribute__((noinline))
snapshot_rename(int dirfd, user_addr_t old, user_addr_t new,
__unused uint32_t flags, vfs_context_t ctx)
{
struct nameidata to_node;
} * __rename_data;
- MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
+ __rename_data = kheap_alloc(KHEAP_TEMP, sizeof(*__rename_data), Z_WAITOK);
fromnd = &__rename_data->from_node;
tond = &__rename_data->to_node;
}
fvp = fromnd->ni_vp;
- MALLOC(newname_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
+ newname_buf = zalloc_flags(ZV_NAMEI, Z_WAITOK);
error = copyinstr(new, newname_buf, MAXPATHLEN, &name_len);
if (error) {
goto out1;
out2:
nameidone(tond);
out1:
- FREE(newname_buf, M_TEMP);
+ zfree(ZV_NAMEI, newname_buf);
vnode_put(fvp);
vnode_put(snapdvp);
vnode_put(rvp);
nameidone(fromnd);
out:
- FREE(__rename_data, M_TEMP);
+ kheap_free(KHEAP_TEMP, __rename_data, sizeof(*__rename_data));
return error;
}
* get the vnode for the unnamed snapshot directory and the snapshot and
* mount the snapshot.
*/
-static int
+static int __attribute__((noinline))
snapshot_mount(int dirfd, user_addr_t name, user_addr_t directory,
__unused user_addr_t mnt_data, __unused uint32_t flags, vfs_context_t ctx)
{
+ mount_t mp;
vnode_t rvp, snapdvp, snapvp, vp, pvp;
+ struct fs_snapshot_mount_args smnt_data;
int error;
struct nameidata *snapndp, *dirndp;
/* carving out a chunk for structs that are too big to be on stack. */
struct nameidata dirnd;
} * __snapshot_mount_data;
- MALLOC(__snapshot_mount_data, void *, sizeof(*__snapshot_mount_data),
- M_TEMP, M_WAITOK);
+ __snapshot_mount_data = kheap_alloc(KHEAP_TEMP,
+ sizeof(*__snapshot_mount_data), Z_WAITOK);
snapndp = &__snapshot_mount_data->snapnd;
dirndp = &__snapshot_mount_data->dirnd;
vp = dirndp->ni_vp;
pvp = dirndp->ni_dvp;
+ mp = vnode_mount(rvp);
if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
error = EINVAL;
- } else {
- mount_t mp = vnode_mount(rvp);
- struct fs_snapshot_mount_args smnt_data;
+ goto out2;
+ }
- smnt_data.sm_mp = mp;
- smnt_data.sm_cnp = &snapndp->ni_cnd;
- error = mount_common(mp->mnt_vfsstat.f_fstypename, pvp, vp,
- &dirndp->ni_cnd, CAST_USER_ADDR_T(&smnt_data), flags & MNT_DONTBROWSE,
- KERNEL_MOUNT_SNAPSHOT, NULL, FALSE, ctx);
+#if CONFIG_MACF
+ error = mac_mount_check_snapshot_mount(ctx, rvp, vp, &dirndp->ni_cnd, snapndp->ni_cnd.cn_nameptr,
+ mp->mnt_vfsstat.f_fstypename);
+ if (error) {
+ goto out2;
}
+#endif
+
+ smnt_data.sm_mp = mp;
+ smnt_data.sm_cnp = &snapndp->ni_cnd;
+ error = mount_common(mp->mnt_vfsstat.f_fstypename, pvp, vp,
+ &dirndp->ni_cnd, CAST_USER_ADDR_T(&smnt_data), flags & MNT_DONTBROWSE,
+ KERNEL_MOUNT_SNAPSHOT, NULL, FALSE, ctx);
+out2:
vnode_put(vp);
vnode_put(pvp);
nameidone(dirndp);
vnode_put(rvp);
nameidone(snapndp);
out:
- FREE(__snapshot_mount_data, M_TEMP);
+ kheap_free(KHEAP_TEMP, __snapshot_mount_data,
+ sizeof(*__snapshot_mount_data));
return error;
}
*
* Marks the filesystem to root from the given snapshot on next boot.
*/
-static int
+static int __attribute__((noinline))
snapshot_root(int dirfd, user_addr_t name, __unused uint32_t flags,
vfs_context_t ctx)
{
}
mp = vnode_mount(rvp);
- MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
+ name_buf = zalloc_flags(ZV_NAMEI, Z_WAITOK);
error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
if (error) {
- FREE(name_buf, M_TEMP);
+ zfree(ZV_NAMEI, name_buf);
vnode_put(rvp);
return error;
}
error = mount_iterref(mp, 0);
vnode_put(rvp);
if (error) {
- FREE(name_buf, M_TEMP);
+ zfree(ZV_NAMEI, name_buf);
return error;
}
error = VFS_IOCTL(mp, VFSIOC_ROOT_SNAPSHOT, (caddr_t)&root_data, 0, ctx);
mount_iterdrop(mp);
- FREE(name_buf, M_TEMP);
+ zfree(ZV_NAMEI, name_buf);
return error;
}
}
/*
- * Enforce user authorization for snapshot modification operations
+ * Enforce user authorization for snapshot modification operations,
+ * or if trying to root from snapshot.
*/
- if ((uap->op != SNAPSHOT_OP_MOUNT) &&
- (uap->op != SNAPSHOT_OP_ROOT)) {
+ if (uap->op != SNAPSHOT_OP_MOUNT) {
vnode_t dvp = NULLVP;
vnode_t devvp = NULLVP;
mount_t mp;