/*
- * Copyright (c) 1995-2015 Apple Inc. All rights reserved.
+ * Copyright (c) 1995-2016 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
+ *
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
- *
+ *
* Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
+ *
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
- *
+ *
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
/*
#include <sys/fsctl.h>
#include <sys/ubc_internal.h>
#include <sys/disk.h>
+#include <sys/content_protection.h>
+#include <sys/clonefile.h>
+#include <sys/snapshot.h>
+#include <sys/priv.h>
#include <machine/cons.h>
#include <machine/limits.h>
#include <miscfs/specfs/specdev.h>
#include <kern/task.h>
#include <vm/vm_pageout.h>
+#include <vm/vm_protos.h>
#include <libkern/OSAtomic.h>
#include <pexpert/pexpert.h>
+#include <IOKit/IOBSD.h>
+
+#if ROUTEFS
+#include <miscfs/routefs/routefs.h>
+#endif /* ROUTEFS */
#if CONFIG_MACF
#include <security/mac.h>
#include <security/mac_framework.h>
#endif
-#if CONFIG_FSE
+#if CONFIG_FSE
#define GET_PATH(x) \
- (x) = get_pathbuff();
+ (x) = get_pathbuff();
#define RELEASE_PATH(x) \
release_pathbuff(x);
-#else
+#else
#define GET_PATH(x) \
- MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
+ MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
#define RELEASE_PATH(x) \
FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
#endif /* CONFIG_FSE */
static int sync_callback(mount_t, void *);
static void sync_thread(void *, __unused wait_result_t);
static int sync_async(int);
-static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
- user_addr_t bufp, int *sizep, boolean_t is_64_bit,
+static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
+ user_addr_t bufp, int *sizep, boolean_t is_64_bit,
boolean_t partial_copy);
static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
user_addr_t bufp);
extern const struct fileops vnops;
#if CONFIG_APPLEDOUBLE
-extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
+extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
#endif /* CONFIG_APPLEDOUBLE */
-typedef uint32_t vfs_rename_flags_t;
-#if CONFIG_SECLUDED_RENAME
-enum {
- VFS_SECLUDE_RENAME = 0x00000001
-};
-#endif
-
/*
* Virtual File System System Calls
*/
-#if NFSCLIENT || DEVFS
+#if NFSCLIENT || DEVFS || ROUTEFS
/*
* Private in-kernel mounting spi (NFS only, not exported)
*/
boolean_t did_namei;
int error;
- NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
+ NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
/*
}
void
-vfs_notify_mount(vnode_t pdvp)
+vfs_notify_mount(vnode_t pdvp)
{
vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
lock_vnode_and_post(pdvp, NOTE_WRITE);
*
* Parameters: p Process requesting the mount
* uap User argument descriptor (see below)
- * retval (ignored)
+ * retval (ignored)
*
* Indirect: uap->type Filesystem type
* uap->path Path to mount
- * uap->data Mount arguments
- * uap->mac_p MAC info
+ * uap->data Mount arguments
+ * uap->mac_p MAC info
* uap->flags Mount flags
- *
+ *
*
* Returns: 0 Success
* !0 Not success
char *labelstr = NULL;
int flags = uap->flags;
int error;
-#if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
+#if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
boolean_t is_64bit = IS_64BIT_PROCESS(p);
#else
#pragma unused(p)
/*
* Get the vnode to be covered
*/
- NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
+ NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
UIO_USERSPACE, uap->path, ctx);
error = namei(&nd);
if (error) {
need_nameidone = 1;
vp = nd.ni_vp;
pvp = nd.ni_dvp;
-
+
#ifdef CONFIG_IMGSRC_ACCESS
/* Mounting image source cannot be batched with other operations */
if (flags == MNT_IMGSRC_BY_INDEX) {
AUDIT_ARG(fflags, flags);
+#if SECURE_KERNEL
+ if (flags & MNT_UNION) {
+ /* No union mounts on release kernels */
+ error = EPERM;
+ goto out;
+ }
+#endif
+
if ((vp->v_flag & VROOT) &&
(vp->v_mount->mnt_flag & MNT_ROOTFS)) {
if (!(flags & MNT_UNION)) {
flags |= MNT_UPDATE;
}
else {
- /*
+ /*
* For a union mount on '/', treat it as fresh
- * mount instead of update.
- * Otherwise, union mouting on '/' used to panic the
- * system before, since mnt_vnodecovered was found to
- * be NULL for '/' which is required for unionlookup
+ * mount instead of update.
+ * Otherwise, union mouting on '/' used to panic the
+ * system before, since mnt_vnodecovered was found to
+ * be NULL for '/' which is required for unionlookup
* after it gets ENOENT on union mount.
*/
flags = (flags & ~(MNT_UPDATE));
}
-#ifdef SECURE_KERNEL
+#if SECURE_KERNEL
if ((flags & MNT_RDONLY) == 0) {
/* Release kernels are not allowed to mount "/" as rw */
error = EPERM;
- goto out;
+ goto out;
}
#endif
/*
* See 7392553 for more details on why this check exists.
* Suffice to say: If this check is ON and something tries
* to mount the rootFS RW, we'll turn off the codesign
- * bitmap optimization.
- */
+ * bitmap optimization.
+ */
#if CHECK_CS_VALIDATION_BITMAP
if ((flags & MNT_RDONLY) == 0 ) {
root_fs_upgrade_try = TRUE;
/*
* common mount implementation (final stage of mounting)
-
+
* Arguments:
* fstypename file system type (ie it's vfs name)
* pvp parent of covered vnode
* If content protection is enabled, update mounts are not
* allowed to turn it off.
*/
- if ((mp->mnt_flag & MNT_CPROTECT) &&
+ if ((mp->mnt_flag & MNT_CPROTECT) &&
((flags & MNT_CPROTECT) == 0)) {
error = EINVAL;
goto out1;
}
-#ifdef CONFIG_IMGSRC_ACCESS
+#ifdef CONFIG_IMGSRC_ACCESS
/* Can't downgrade the backer of the root FS */
if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
(!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
/* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
-#if NFSCLIENT || DEVFS
+#if NFSCLIENT || DEVFS || ROUTEFS
if (kernelmount)
mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0)
MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
MNT_QUARANTINE | MNT_CPROTECT);
+
+#if SECURE_KERNEL
+#if !CONFIG_MNT_SUID
+ /*
+ * On release builds of iOS based platforms, always enforce NOSUID and NODEV on
+ * all mounts. We do this here because we can catch update mounts as well as
+ * non-update mounts in this case.
+ */
+ mp->mnt_flag |= (MNT_NOSUID);
+#endif
+#endif
+
mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
/*
* Process device path for local file systems if requested
*/
- if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
+ if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS &&
+ !(internal_flags & KERNEL_MOUNT_SNAPSHOT)) {
if (vfs_context_is64bit(ctx)) {
if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
- goto out1;
+ goto out1;
fsmountargs += sizeof(devpath);
} else {
user32_addr_t tmp;
if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
- goto out1;
+ goto out1;
/* munge into LP64 addr */
devpath = CAST_USER_ADDR_T(tmp);
fsmountargs += sizeof(tmp);
if ( (error = namei(&nd)) )
goto out1;
- strncpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
+ strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
devvp = nd.ni_vp;
nameidone(&nd);
*/
if ( (error = vfs_mountedon(devvp)) )
goto out3;
-
+
if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
error = EBUSY;
goto out3;
vnode_getalways(device_vnode);
if (suser(vfs_context_ucred(ctx), NULL) &&
- (error = vnode_authorize(device_vnode, NULL,
+ (error = vnode_authorize(device_vnode, NULL,
KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
ctx)) != 0) {
vnode_put(device_vnode);
/*
* Mount the filesystem.
*/
- error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
+ if (internal_flags & KERNEL_MOUNT_SNAPSHOT) {
+ error = VFS_IOCTL(mp, VFSIOC_MOUNT_SNAPSHOT,
+ (caddr_t)fsmountargs, 0, ctx);
+ } else {
+ error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
+ }
if (flags & MNT_UPDATE) {
if (mp->mnt_kern_flag & MNTK_WANTRDWR)
/* Unmount the filesystem as cdir/rdirs cannot be updated */
goto out4;
}
- /*
- * there is no cleanup code here so I have made it void
+ /*
+ * there is no cleanup code here so I have made it void
* we need to revisit this
*/
(void)VFS_START(mp, 0, ctx);
VFSATTR_INIT(&vfsattr);
VFSATTR_WANTED(&vfsattr, f_capabilities);
if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
- vfs_getattr(mp, &vfsattr, ctx) == 0 &&
+ vfs_getattr(mp, &vfsattr, ctx) == 0 &&
VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
(vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
/* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
}
+
+ if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS) &&
+ (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS)) {
+ mp->mnt_kern_flag |= MNTK_DIR_HARDLINKS;
+ }
}
if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
* defaults will have been set, so no reason to bail or care
*/
vfs_init_io_attributes(device_vnode, mp);
- }
+ }
/* Now that mount is setup, notify the listeners */
vfs_notify_mount(pvp);
+ IOBSDMountChange(mp, kIOMountChangeMount);
+
} else {
/* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
if (mp->mnt_vnodelist.tqh_first != NULL) {
- panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
+ panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
mp->mnt_vtable->vfc_name, error);
}
}
lck_rw_done(&mp->mnt_rwlock);
is_rwlock_locked = FALSE;
-
+
/*
* if we get here, we have a mount structure that needs to be freed,
* but since the coveredvp hasn't yet been updated to point at it,
/* Error condition exits */
out4:
(void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
-
- /*
+
+ /*
* If the mount has been placed on the covered vp,
* it may have been discovered by now, so we have
* to treat this just like an unmount
if (is_rwlock_locked == TRUE) {
lck_rw_done(&mp->mnt_rwlock);
}
-
+
if (mntalloc) {
if (mp->mnt_crossref)
mount_dropcrossref(mp, vp, 0);
return(error);
}
-/*
+/*
* Flush in-core data, check for competing mount attempts,
* and set VMOUNT
*/
VATTR_WANTED(&va, va_uid);
if ((error = vnode_getattr(vp, &va, ctx)) ||
(va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
- (!vfs_context_issuser(ctx)))) {
+ (!vfs_context_issuser(ctx)))) {
error = EPERM;
goto out;
}
#define IMGSRC_DEBUG(args...) printf(args)
#else
#define IMGSRC_DEBUG(args...) do { } while(0)
-#endif
+#endif
static int
authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
* permitted to update it.
*/
if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
- (!vfs_context_issuser(ctx))) {
+ (!vfs_context_issuser(ctx))) {
error = EPERM;
goto out;
}
return error;
}
-static void
+static void
mount_end_update(mount_t mp)
{
lck_rw_done(&mp->mnt_rwlock);
}
static int
-relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp,
- const char *fsname, vfs_context_t ctx,
+relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp,
+ const char *fsname, vfs_context_t ctx,
boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
{
int error;
goto out0;
}
- /*
+ /*
* It can only be moved once. Flag is set under the rwlock,
* so we're now safe to proceed.
*/
IMGSRC_DEBUG("Already moved [2]\n");
goto out1;
}
-
-
+
+
IMGSRC_DEBUG("Preparing coveredvp.\n");
/* Mark covered vnode as mount in progress, authorize placing mount on top */
IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
goto out1;
}
-
+
IMGSRC_DEBUG("Covered vp OK.\n");
/* Sanity check the name caller has provided */
}
}
- /*
+ /*
* Place mp on top of vnode, ref the vnode, call checkdirs(),
- * and increment the name cache's mount generation
+ * and increment the name cache's mount generation
*/
IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
placed = TRUE;
- strncpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
- strncpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
+ strlcpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
+ strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
/* Forbid future moves */
mount_lock(mp);
return 0;
out3:
- strncpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
+ strlcpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
mount_lock(mp);
mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
mount_unlock(mp);
out2:
- /*
+ /*
* Placing the mp on the vnode clears VMOUNT,
- * so cleanup is different after that point
+ * so cleanup is different after that point
*/
if (placed) {
/* Rele the vp, clear VMOUNT and v_mountedhere */
if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
return;
}
- /*
+ /*
* Enable filesystem disk quotas if necessary.
* We ignore errors as this should not interfere with final mount
*/
static int
-checkdirs_callback(proc_t p, void * arg)
+checkdirs_callback(proc_t p, void * arg)
{
struct cdirargs * cdrp = (struct cdirargs * )arg;
vnode_t olddp = cdrp->olddp;
struct nameidata nd;
vfs_context_t ctx = vfs_context_current();
- NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW | AUDITVNPATH1,
+ NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW | AUDITVNPATH1,
UIO_USERSPACE, uap->path, ctx);
error = namei(&nd);
if (error)
}
int
-vfs_unmountbyfsid(fsid_t * fsid, int flags, vfs_context_t ctx)
+vfs_unmountbyfsid(fsid_t *fsid, int flags, vfs_context_t ctx)
{
mount_t mp;
}
/*
- * Skip authorization if the mount is tagged as permissive and
+ * Skip authorization if the mount is tagged as permissive and
* this is not a forced-unmount attempt.
*/
if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
int pflags_save = 0;
#endif /* CONFIG_TRIGGERS */
+#if CONFIG_FSE
+ if (!(flags & MNT_FORCE)) {
+ fsevent_unmount(mp, ctx); /* has to come first! */
+ }
+#endif
+
mount_lock(mp);
/*
*/
mp->mnt_realrootvp = NULLVP;
mount_unlock(mp);
-
+
if (forcedunmount && (flags & MNT_LNOSUB) == 0) {
/*
* Force unmount any mounts in this filesystem.
lck_rw_lock_exclusive(&mp->mnt_rwlock);
if (withref != 0)
mount_drop(mp, 0);
-#if CONFIG_FSE
- fsevent_unmount(mp); /* has to come first! */
-#endif
error = 0;
if (forcedunmount == 0) {
ubc_umount(mp); /* release cached vnodes */
}
}
+ IOBSDMountChange(mp, kIOMountChangeUnmount);
+
#if CONFIG_TRIGGERS
vfs_nested_trigger_unmounts(mp, flags, ctx);
did_vflush = 1;
-#endif
+#endif
if (forcedunmount)
lflags |= FORCECLOSE;
error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags);
out:
if (mp->mnt_lflag & MNT_LWAIT) {
mp->mnt_lflag &= ~MNT_LWAIT;
- needwakeup = 1;
+ needwakeup = 1;
}
#if CONFIG_TRIGGERS
OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag);
}
- /*
+ /*
* Callback and context are set together under the mount lock, and
- * never cleared, so we're safe to examine them here, drop the lock,
+ * never cleared, so we're safe to examine them here, drop the lock,
* and call out.
*/
if (mp->mnt_triggercallback != NULL) {
} else {
mount_unlock(mp);
}
-#else
+#else
mount_unlock(mp);
#endif /* CONFIG_TRIGGERS */
/*
* Fill the array with submount fsids.
* Since mounts are always added to the tail of the mount list, the
- * list is always in mount order.
+ * list is always in mount order.
* For each mount check if the mounted-on vnode belongs to a
* mount that's already added to our array of mounts to be unmounted.
*/
panic("mount cross refs -ve");
if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
-
+
if (need_put)
vnode_put_locked(dp);
vnode_unlock(dp);
int print_vmpage_stat=0;
int sync_timeout = 60; // Sync time limit (sec)
-static int
+static int
sync_callback(mount_t mp, __unused void *arg)
{
if ((mp->mnt_flag & MNT_RDONLY) == 0) {
vfs_context_t ctx = vfs_context_current();
vnode_t vp;
- NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
+ NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
UIO_USERSPACE, uap->path, ctx);
error = namei(&nd);
- if (error)
+ if (error != 0)
return (error);
vp = nd.ni_vp;
mp = vp->v_mount;
sp = &mp->mnt_vfsstat;
nameidone(&nd);
+#if CONFIG_MACF
+ error = mac_mount_check_stat(ctx, mp);
+ if (error != 0)
+ return (error);
+#endif
+
error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
- if (error != 0) {
+ if (error != 0) {
vnode_put(vp);
return (error);
}
error = EBADF;
goto out;
}
+
+#if CONFIG_MACF
+ error = mac_mount_check_stat(vfs_context_current(), mp);
+ if (error != 0)
+ goto out;
+#endif
+
sp = &mp->mnt_vfsstat;
- if ((error = vfs_update_vfsstat(mp,vfs_context_current(),VFS_USER_EVENT)) != 0) {
+ if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
goto out;
}
return (error);
}
-/*
- * Common routine to handle copying of statfs64 data to user space
+/*
+ * Common routine to handle copying of statfs64 data to user space
*/
-static int
+static int
statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
{
int error;
struct statfs64 sfs;
-
+
bzero(&sfs, sizeof(sfs));
sfs.f_bsize = sfsp->f_bsize;
return(error);
}
-/*
- * Get file system statistics in 64-bit mode
+/*
+ * Get file system statistics in 64-bit mode
*/
int
statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
vfs_context_t ctxp = vfs_context_current();
vnode_t vp;
- NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
+ NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
UIO_USERSPACE, uap->path, ctxp);
error = namei(&nd);
- if (error)
+ if (error != 0)
return (error);
vp = nd.ni_vp;
mp = vp->v_mount;
sp = &mp->mnt_vfsstat;
nameidone(&nd);
+#if CONFIG_MACF
+ error = mac_mount_check_stat(ctxp, mp);
+ if (error != 0)
+ return (error);
+#endif
+
error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
- if (error != 0) {
+ if (error != 0) {
vnode_put(vp);
return (error);
}
return (error);
}
-/*
- * Get file system statistics in 64-bit mode
+/*
+ * Get file system statistics in 64-bit mode
*/
int
fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
error = EBADF;
goto out;
}
+
+#if CONFIG_MACF
+ error = mac_mount_check_stat(vfs_context_current(), mp);
+ if (error != 0)
+ goto out;
+#endif
+
sp = &mp->mnt_vfsstat;
if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
goto out;
static int
getfsstat_callback(mount_t mp, void * arg)
{
-
+
struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
struct vfsstatfs *sp;
int error, my_size;
vfs_context_t ctx = vfs_context_current();
if (fstp->sfsp && fstp->count < fstp->maxcount) {
+#if CONFIG_MACF
+ error = mac_mount_check_stat(ctx, mp);
+ if (error != 0) {
+ fstp->error = error;
+ return(VFS_RETURNED_DONE);
+ }
+#endif
sp = &mp->mnt_vfsstat;
/*
* If MNT_NOWAIT is specified, do not refresh the
*
* Parameters: p (ignored)
* uap User argument descriptor (see below)
- * retval Count of file system statistics (N stats)
+ * retval Count of file system statistics (N stats)
*
* Indirect: uap->bufsize Buffer size
* uap->macsize MAC info size
* uap->buf Buffer where information will be returned
* uap->mac MAC info
* uap->flags File system flags
- *
+ *
*
* Returns: 0 Success
* !0 Not success
fst.error = 0;
fst.maxcount = maxcount;
-
+
vfs_iterate(0, getfsstat_callback, &fst);
if (mp)
int error;
if (fstp->sfsp && fstp->count < fstp->maxcount) {
+#if CONFIG_MACF
+ error = mac_mount_check_stat(vfs_context_current(), mp);
+ if (error != 0) {
+ fstp->error = error;
+ return(VFS_RETURNED_DONE);
+ }
+#endif
sp = &mp->mnt_vfsstat;
/*
* If MNT_NOWAIT is specified, do not refresh the fsstat
* by this call needs a vnode_put
*
*/
-static int
+int
vnode_getfromfd(vfs_context_t ctx, int fd, vnode_t *vpp)
{
int error;
vnode_t tvp;
vfs_context_t ctx = vfs_context_current();
- NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
+ NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
UIO_USERSPACE, uap->path, ctx);
error = change_dir(&nd, ctx);
if (error)
if ((error = suser(kauth_cred_get(), &p->p_acflag)))
return (error);
- NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
+ NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
UIO_USERSPACE, uap->path, ctx);
error = change_dir(&nd, ctx);
if (error)
int flags, oflags;
int type, indx, error;
struct flock lf;
- int no_controlling_tty = 0;
- int deny_controlling_tty = 0;
- struct session *sessp = SESSION_NULL;
+ struct vfs_context context;
oflags = uflags;
if ((oflags & O_ACCMODE) == O_ACCMODE)
return(EINVAL);
+
flags = FFLAGS(uflags);
+ CLR(flags, FENCRYPTED);
+ CLR(flags, FUNENCRYPTED);
AUDIT_ARG(fflags, oflags);
AUDIT_ARG(mode, vap->va_mode);
}
uu->uu_dupfd = -indx - 1;
- if (!(p->p_flag & P_CONTROLT)) {
- sessp = proc_session(p);
- no_controlling_tty = 1;
- /*
- * If conditions would warrant getting a controlling tty if
- * the device being opened is a tty (see ttyopen in tty.c),
- * but the open flags deny it, set a flag in the session to
- * prevent it.
- */
- if (SESS_LEADER(p, sessp) &&
- sessp->s_ttyvp == NULL &&
- (flags & O_NOCTTY)) {
- session_lock(sessp);
- sessp->s_flags |= S_NOCTTY;
- session_unlock(sessp);
- deny_controlling_tty = 1;
- }
- }
-
if ((error = vn_open_auth(ndp, &flags, vap))) {
if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){ /* XXX from fdopen */
if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
fp_drop(p, indx, NULL, 0);
*retval = indx;
- if (deny_controlling_tty) {
- session_lock(sessp);
- sessp->s_flags &= ~S_NOCTTY;
- session_unlock(sessp);
- }
- if (sessp != SESSION_NULL)
- session_rele(sessp);
return (0);
}
}
if (error == ERESTART)
error = EINTR;
fp_free(p, indx, fp);
-
- if (deny_controlling_tty) {
- session_lock(sessp);
- sessp->s_flags &= ~S_NOCTTY;
- session_unlock(sessp);
- }
- if (sessp != SESSION_NULL)
- session_rele(sessp);
return (error);
}
uu->uu_dupfd = 0;
vp = ndp->ni_vp;
- fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY);
+ fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY | FENCRYPTED | FUNENCRYPTED);
fp->f_fglob->fg_ops = &vnops;
fp->f_fglob->fg_data = (caddr_t)vp;
-#if CONFIG_PROTECT
- if (VATTR_IS_ACTIVE (vap, va_dataprotect_flags)) {
- if (vap->va_dataprotect_flags & VA_DP_RAWENCRYPTED) {
- fp->f_fglob->fg_flag |= FENCRYPTED;
- }
- }
-#endif
-
if (flags & (O_EXLOCK | O_SHLOCK)) {
lf.l_whence = SEEK_SET;
lf.l_start = 0;
fp->f_fglob->fg_flag |= FHASLOCK;
}
- /* try to truncate by setting the size attribute */
- if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
- goto bad;
-
- /*
- * If the open flags denied the acquisition of a controlling tty,
- * clear the flag in the session structure that prevented the lower
- * level code from assigning one.
- */
- if (deny_controlling_tty) {
- session_lock(sessp);
- sessp->s_flags &= ~S_NOCTTY;
- session_unlock(sessp);
- }
-
+#if DEVELOPMENT || DEBUG
/*
- * If a controlling tty was set by the tty line discipline, then we
- * want to set the vp of the tty into the session structure. We have
- * a race here because we can't get to the vp for the tp in ttyopen,
- * because it's not passed as a parameter in the open path.
+ * XXX VSWAP: Check for entitlements or special flag here
+ * so we can restrict access appropriately.
*/
- if (no_controlling_tty && (p->p_flag & P_CONTROLT)) {
- vnode_t ttyvp;
+#else /* DEVELOPMENT || DEBUG */
- session_lock(sessp);
- ttyvp = sessp->s_ttyvp;
- sessp->s_ttyvp = vp;
- sessp->s_ttyvid = vnode_vid(vp);
- session_unlock(sessp);
+ if (vnode_isswap(vp) && (flags & (FWRITE | O_TRUNC)) && (ctx != vfs_context_kernel())) {
+ /* block attempt to write/truncate swapfile */
+ error = EPERM;
+ goto bad;
}
+#endif /* DEVELOPMENT || DEBUG */
+
+ /* try to truncate by setting the size attribute */
+ if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
+ goto bad;
/*
* For directories we hold some additional information in the fd.
vnode_put(vp);
+ /*
+ * The first terminal open (without a O_NOCTTY) by a session leader
+ * results in it being set as the controlling terminal.
+ */
+ if (vnode_istty(vp) && !(p->p_flag & P_CONTROLT) &&
+ !(flags & O_NOCTTY)) {
+ int tmp = 0;
+
+ (void)(*fp->f_fglob->fg_ops->fo_ioctl)(fp, (int)TIOCSCTTY,
+ (caddr_t)&tmp, ctx);
+ }
+
proc_fdlock(p);
if (flags & O_CLOEXEC)
*fdflags(p, indx) |= UF_EXCLOSE;
if (flags & O_CLOFORK)
*fdflags(p, indx) |= UF_FORKCLOSE;
procfdtbl_releasefd(p, indx, NULL);
+
+#if CONFIG_SECLUDED_MEMORY
+ if (secluded_for_filecache &&
+ FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE &&
+ vnode_vtype(vp) == VREG) {
+ memory_object_control_t moc;
+
+ moc = ubc_getobject(vp, UBC_FLAGS_NONE);
+
+ if (moc == MEMORY_OBJECT_CONTROL_NULL) {
+ /* nothing to do... */
+ } else if (fp->f_fglob->fg_flag & FWRITE) {
+ /* writable -> no longer eligible for secluded pages */
+ memory_object_mark_eligible_for_secluded(moc,
+ FALSE);
+ } else if (secluded_for_filecache == 1) {
+ char pathname[32] = { 0, };
+ size_t copied;
+ /* XXX FBDP: better way to detect /Applications/ ? */
+ if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
+ copyinstr(ndp->ni_dirp,
+ pathname,
+ sizeof (pathname),
+ &copied);
+ } else {
+ copystr(CAST_DOWN(void *, ndp->ni_dirp),
+ pathname,
+ sizeof (pathname),
+ &copied);
+ }
+ pathname[sizeof (pathname) - 1] = '\0';
+ if (strncmp(pathname,
+ "/Applications/",
+ strlen("/Applications/")) == 0 &&
+ strncmp(pathname,
+ "/Applications/Camera.app/",
+ strlen("/Applications/Camera.app/")) != 0) {
+ /*
+ * not writable
+ * AND from "/Applications/"
+ * AND not from "/Applications/Camera.app/"
+ * ==> eligible for secluded
+ */
+ memory_object_mark_eligible_for_secluded(moc,
+ TRUE);
+ }
+ } else if (secluded_for_filecache == 2) {
+/* not implemented... */
+ if (!strncmp(vp->v_name,
+ DYLD_SHARED_CACHE_NAME,
+ strlen(DYLD_SHARED_CACHE_NAME)) ||
+ !strncmp(vp->v_name,
+ "dyld",
+ strlen(vp->v_name)) ||
+ !strncmp(vp->v_name,
+ "launchd",
+ strlen(vp->v_name)) ||
+ !strncmp(vp->v_name,
+ "Camera",
+ strlen(vp->v_name)) ||
+ !strncmp(vp->v_name,
+ "mediaserverd",
+ strlen(vp->v_name))) {
+ /*
+ * This file matters when launching Camera:
+ * do not store its contents in the secluded
+ * pool that will be drained on Camera launch.
+ */
+ memory_object_mark_eligible_for_secluded(moc,
+ FALSE);
+ }
+ }
+ }
+#endif /* CONFIG_SECLUDED_MEMORY */
+
fp_drop(p, indx, fp, 1);
proc_fdunlock(p);
*retval = indx;
- if (sessp != SESSION_NULL)
- session_rele(sessp);
return (0);
bad:
- if (deny_controlling_tty) {
- session_lock(sessp);
- sessp->s_flags &= ~S_NOCTTY;
- session_unlock(sessp);
- }
- if (sessp != SESSION_NULL)
- session_rele(sessp);
-
- struct vfs_context context = *vfs_context_current();
+ context = *vfs_context_current();
context.vc_ucred = fp->f_fglob->fg_cred;
-
+
if ((fp->f_fglob->fg_flag & FHASLOCK) &&
(FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE)) {
lf.l_whence = SEEK_SET;
lf.l_start = 0;
lf.l_len = 0;
lf.l_type = F_UNLCK;
-
+
(void)VNOP_ADVLOCK(
vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
}
return ciferror;
}
-/*
+/*
* Go through the data-protected atomically controlled open (2)
- *
+ *
* int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
*/
int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval) {
int class = uap->class;
int dpflags = uap->dpflags;
- /*
+ /*
* Follow the same path as normal open(2)
* Look up the item if it exists, and acquire the vnode.
*/
struct nameidata nd;
int cmode;
int error;
-
+
VATTR_INIT(&va);
/* Mask off all but regular access permissions */
cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
uap->path, vfs_context_current());
- /*
- * Initialize the extra fields in vnode_attr to pass down our
+ /*
+ * Initialize the extra fields in vnode_attr to pass down our
* extra fields.
* 1. target cprotect class.
- * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
- */
- if (flags & O_CREAT) {
- VATTR_SET(&va, va_dataprotect_class, class);
- }
-
- if (dpflags & O_DP_GETRAWENCRYPTED) {
+ * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
+ */
+ if (flags & O_CREAT) {
+ /* lower level kernel code validates that the class is valid before applying it. */
+ if (class != PROTECTION_CLASS_DEFAULT) {
+ /*
+ * PROTECTION_CLASS_DEFAULT implies that we make the class for this
+ * file behave the same as open (2)
+ */
+ VATTR_SET(&va, va_dataprotect_class, class);
+ }
+ }
+
+ if (dpflags & (O_DP_GETRAWENCRYPTED|O_DP_GETRAWUNENCRYPTED)) {
if ( flags & (O_RDWR | O_WRONLY)) {
/* Not allowed to write raw encrypted bytes */
- return EINVAL;
- }
- VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
+ return EINVAL;
+ }
+ if (uap->dpflags & O_DP_GETRAWENCRYPTED) {
+ VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
+ }
+ if (uap->dpflags & O_DP_GETRAWUNENCRYPTED) {
+ VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWUNENCRYPTED);
+ }
}
error = open1(vfs_context_current(), &nd, uap->flags, &va,
int pathlen = 0;
vfs_context_t ctx = vfs_context_current();
+ if ((error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_OPEN_BY_ID, 0))) {
+ return (error);
+ }
+
if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
return (error);
}
if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
return (error);
- NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
+ NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
UIO_USERSPACE, uap->path, ctx);
error = namei(&nd);
if (error)
}
switch (uap->mode & S_IFMT) {
- case S_IFMT: /* used by badsect to flag bad sectors */
- VATTR_SET(&va, va_type, VBAD);
- break;
case S_IFCHR:
VATTR_SET(&va, va_type, VCHR);
break;
int error;
struct nameidata nd;
- NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
+ NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
UIO_USERSPACE, upath, ctx);
error = namei(&nd);
if (error)
len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
if (len > MAXPATHLEN) {
char *ptr;
-
+
// the string got truncated!
*truncated_path = 1;
ptr = my_strrchr(path, '/');
if (ret != ENOSPC) {
printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
- }
+ }
*truncated_path = 1;
-
+
do {
if (mydvp->v_parent != NULL) {
mydvp = mydvp->v_parent;
len = 2;
mydvp = NULL;
}
-
+
if (mydvp == NULL) {
break;
}
* However, some file systems may have limited support.
*/
if (vp->v_type == VDIR) {
- if (!(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
+ if (!ISSET(vp->v_mount->mnt_kern_flag, MNTK_DIR_HARDLINKS)) {
error = EPERM; /* POSIX */
goto out;
}
+
/* Linking to a directory requires ownership. */
if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
struct vnode_attr dva;
error = EXDEV;
goto out2;
}
-
+
/* authorize creation of the target note */
if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
goto out2;
link_name_len = MAXPATHLEN;
if (vn_getpath(vp, link_to_path, &link_name_len) == 0) {
/*
- * Call out to allow 3rd party notification of rename.
+ * Call out to allow 3rd party notification of rename.
* Ignore result of kauth_authorize_fileop call.
*/
- kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
- (uintptr_t)link_to_path,
+ kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
+ (uintptr_t)link_to_path,
(uintptr_t)target_path);
}
if (link_to_path != NULL) {
int error;
struct nameidata nd;
vnode_t vp, dvp;
- uint32_t dfflags; // Directory file flags
size_t dummy=0;
proc_t p;
VATTR_SET(&va, va_type, VLNK);
VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
- /*
- * Handle inheritance of restricted flag
- */
- error = vnode_flags(dvp, &dfflags, ctx);
- if (error)
- goto skipit;
- if (dfflags & SF_RESTRICTED)
- VATTR_SET(&va, va_flags, SF_RESTRICTED);
-
#if CONFIG_MACF
error = mac_vnode_check_create(ctx,
dvp, &nd.ni_cnd, &va);
error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
#if CONFIG_MACF
- if (error == 0)
+ if (error == 0 && vp)
error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
#endif
/* do fallback attribute handling */
- if (error == 0)
+ if (error == 0 && vp)
error = vnode_setattr_fallback(vp, &va, ctx);
if (error == 0) {
int update_flags = 0;
+ /*check if a new vnode was created, else try to get one*/
if (vp == NULL) {
nd.ni_cnd.cn_nameiop = LOOKUP;
#if CONFIG_TRIGGERS
nd.ni_flag |= NAMEI_COMPOUNDREMOVE;
cnp = &nd.ni_cnd;
-lookup_continue:
+continue_lookup:
error = nameiat(&nd, fd);
if (error)
return (error);
if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
flags |= VNODE_REMOVE_NODELETEBUSY;
}
-
+
/* Skip any potential upcalls if told to. */
if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
error = EBUSY;
}
+#if DEVELOPMENT || DEBUG
+ /*
+ * XXX VSWAP: Check for entitlements or special flag here
+ * so we can restrict access appropriately.
+ */
+#else /* DEVELOPMENT || DEBUG */
+
+ if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) {
+ error = EPERM;
+ goto out;
+ }
+#endif /* DEVELOPMENT || DEBUG */
+
if (!batched) {
error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
if (error) {
- if (error == ENOENT &&
- retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
- do_retry = 1;
- retry_count++;
+ if (error == ENOENT) {
+ assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
+ if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
+ do_retry = 1;
+ retry_count++;
+ }
}
goto out;
}
error = EISDIR;
goto out;
}
- goto lookup_continue;
- } else if (error == ENOENT && batched &&
- retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
- /*
- * For compound VNOPs, the authorization callback may
- * return ENOENT in case of racing hardlink lookups
- * hitting the name cache, redrive the lookup.
- */
- do_retry = 1;
- retry_count += 1;
- goto out;
+ goto continue_lookup;
+ } else if (error == ENOENT && batched) {
+ assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
+ if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
+ /*
+ * For compound VNOPs, the authorization callback may
+ * return ENOENT in case of racing hardlink lookups
+ * hitting the name cache, redrive the lookup.
+ */
+ do_retry = 1;
+ retry_count += 1;
+ goto out;
+ }
}
}
/*
- * Call out to allow 3rd party notification of delete.
+ * Call out to allow 3rd party notification of delete.
* Ignore result of kauth_authorize_fileop call.
*/
if (!error) {
if (has_listeners) {
- kauth_authorize_fileop(vfs_context_ucred(ctx),
- KAUTH_FILEOP_DELETE,
+ kauth_authorize_fileop(vfs_context_ucred(ctx),
+ KAUTH_FILEOP_DELETE,
(uintptr_t)vp,
(uintptr_t)path);
}
RELEASE_PATH(path);
#if NAMEDRSRCFORK
- /* recycle the deleted rsrc fork vnode to force a reclaim, which
+ /* recycle the deleted rsrc fork vnode to force a reclaim, which
* will cause its shadow file to go away if necessary.
*/
if (vp && (vnode_isnamedstream(vp)) &&
(vp->v_parent != NULLVP) &&
vnode_isshadow(vp)) {
vnode_recycle(vp);
- }
+ }
#endif
/*
* nameidone has to happen before we vnode_put(dvp)
break;
case L_SET:
break;
+ case SEEK_HOLE:
+ error = VNOP_IOCTL(vp, FSCTL_FIOSEEKHOLE, (caddr_t)&offset, 0, ctx);
+ break;
+ case SEEK_DATA:
+ error = VNOP_IOCTL(vp, FSCTL_FIOSEEKDATA, (caddr_t)&offset, 0, ctx);
+ break;
default:
error = EINVAL;
}
}
}
- /*
+ /*
* An lseek can affect whether data is "available to read." Use
* hint of NOTE_NONE so no EVFILT_VNODE events fire
*/
/* take advantage of definition of uflags */
action = uflags >> 8;
}
-
+
#if CONFIG_MACF
error = mac_vnode_check_access(ctx, vp, uflags);
if (error)
* access_extended: Check access permissions in bulk.
*
* Description: uap->entries Pointer to an array of accessx
- * descriptor structs, plus one or
- * more NULL terminated strings (see
+ * descriptor structs, plus one or
+ * more NULL terminated strings (see
* "Notes" section below).
* uap->size Size of the area pointed to by
* uap->entries.
*
* since we must have at least one string, and the string must
* be at least one character plus the NULL terminator in length.
- *
+ *
* XXX: Need to support the check-as uid argument
*/
int
goto out;
}
+ /* Also do not let ad_name_offset point to something beyond the size of the input */
+ if (input[i].ad_name_offset >= uap->size) {
+ error = EINVAL;
+ goto out;
+ }
+
/*
* An offset of 0 means use the previous descriptor's offset;
* this is used to chain multiple requests for the same file
vnode_put(dvp);
dvp = NULL;
}
-
+
/*
* Scan forward in the descriptor list to see if we
* need the parent vnode. We will need it if we are
for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
if (input[j].ad_flags & _DELETE_OK)
wantdelete = 1;
-
+
niopts = FOLLOW | AUDITVNPATH1;
/* need parent for vnode_authorize for deletion test */
/* copy out results */
error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
-
+
out:
if (input && input != stack_input)
FREE(input, M_TEMP);
goto out;
#if NAMEDRSRCFORK
- /* Grab reference on the shadow stream file vnode to
+ /* Grab reference on the shadow stream file vnode to
* force an inactive on release which will mark it
* for recycle.
*/
if (amode & _DELETE_OK)
vnode_put(nd.ni_dvp);
nameidone(&nd);
-
+
out:
if (!(flag & AT_EACCESS))
kauth_cred_unref(&context.vc_ucred);
statptr = (void *)&source;
#if NAMEDRSRCFORK
- /* Grab reference on the shadow stream file vnode to
- * force an inactive on release which will mark it
+ /* Grab reference on the shadow stream file vnode to
+ * force an inactive on release which will mark it
* for recycle.
*/
if (vnode_isnamedstream(nd.ni_vp) &&
source.sb64.st_qspare[0] = 0LL;
source.sb64.st_qspare[1] = 0LL;
if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
- munge_user64_stat64(&source.sb64, &dest.user64_sb64);
+ munge_user64_stat64(&source.sb64, &dest.user64_sb64);
my_size = sizeof(dest.user64_sb64);
sbp = (caddr_t)&dest.user64_sb64;
} else {
- munge_user32_stat64(&source.sb64, &dest.user32_sb64);
+ munge_user32_stat64(&source.sb64, &dest.user32_sb64);
my_size = sizeof(dest.user32_sb64);
sbp = (caddr_t)&dest.user32_sb64;
}
source.sb.st_qspare[0] = 0LL;
source.sb.st_qspare[1] = 0LL;
if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
- munge_user64_stat(&source.sb, &dest.user64_sb);
+ munge_user64_stat(&source.sb, &dest.user64_sb);
my_size = sizeof(dest.user64_sb);
sbp = (caddr_t)&dest.user64_sb;
} else {
- munge_user32_stat(&source.sb, &dest.user32_sb);
+ munge_user32_stat(&source.sb, &dest.user32_sb);
my_size = sizeof(dest.user32_sb);
sbp = (caddr_t)&dest.user32_sb;
}
*
* Parameters: p (ignored)
* uap User argument descriptor (see below)
- * retval (ignored)
+ * retval (ignored)
*
* Indirect: uap->path Path of file to get status from
* uap->ub User buffer (holds file status info)
* uap->xsecurity ACL to get (extended security)
* uap->xsecurity_size Size of ACL
- *
+ *
* Returns: 0 Success
* !0 errno value
*
*
* Parameters: p (ignored)
* uap User argument descriptor (see below)
- * retval (ignored)
+ * retval (ignored)
*
* Indirect: uap->path Path of file to get status from
* uap->ub User buffer (holds file status info)
* uap->xsecurity ACL to get (extended security)
* uap->xsecurity_size Size of ACL
- *
+ *
* Returns: 0 Success
* !0 errno value
*
*
* Parameters: p (ignored)
* uap User argument descriptor (see below)
- * retval (ignored)
+ * retval (ignored)
*
* Indirect: uap->path Path of file to get status from
* uap->ub User buffer (holds file status info)
* uap->xsecurity ACL to get (extended security)
* uap->xsecurity_size Size of ACL
- *
+ *
* Returns: 0 Success
* !0 errno value
*
*
* Parameters: p (ignored)
* uap User argument descriptor (see below)
- * retval (ignored)
+ * retval (ignored)
*
* Indirect: uap->path Path of file to get status from
* uap->ub User buffer (holds file status info)
* uap->xsecurity ACL to get (extended security)
* uap->xsecurity_size Size of ACL
- *
+ *
* Returns: 0 Success
* !0 errno value
*
struct nameidata nd;
vfs_context_t ctx = vfs_context_current();
- NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
+ NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
UIO_USERSPACE, uap->path, ctx);
error = namei(&nd);
if (error)
/*
* Change file flags.
+ *
+ * NOTE: this will vnode_put() `vp'
*/
static int
chflags1(vnode_t vp, int flags, vfs_context_t ctx)
goto out;
error = vnode_setattr(vp, &va, ctx);
+#if CONFIG_MACF
+ if (error == 0)
+ mac_vnode_notify_setflags(ctx, vp, flags);
+#endif
+
if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
error = ENOTSUP;
}
struct nameidata nd;
AUDIT_ARG(fflags, uap->flags);
- NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
+ NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
UIO_USERSPACE, uap->path, ctx);
error = namei(&nd);
if (error)
vp = nd.ni_vp;
nameidone(&nd);
+ /* we don't vnode_put() here because chflags1 does internally */
error = chflags1(vp, uap->flags, ctx);
return(error);
AUDIT_ARG(vnpath, vp, ARG_VNODE1);
+ /* we don't vnode_put() here because chflags1 does internally */
error = chflags1(vp, uap->flags, vfs_context_current());
file_drop(uap->fd);
{
kauth_action_t action;
int error;
-
+
AUDIT_ARG(mode, vap->va_mode);
/* XXX audit new args */
if (VATTR_IS_ACTIVE(vap, va_mode) &&
(error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0)
return (error);
+
+ if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid)) {
+ if ((error = mac_vnode_check_setowner(ctx, vp,
+ VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
+ VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1)))
+ return (error);
+ }
+
+ if (VATTR_IS_ACTIVE(vap, va_acl) &&
+ (error = mac_vnode_check_setacl(ctx, vp, vap->va_acl)))
+ return (error);
#endif
/* make sure that the caller is allowed to set this security information */
error = EPERM;
return(error);
}
-
- error = vnode_setattr(vp, vap, ctx);
- return (error);
-}
+ if ((error = vnode_setattr(vp, vap, ctx)) != 0)
+ return (error);
+#if CONFIG_MACF
+ if (VATTR_IS_ACTIVE(vap, va_mode))
+ mac_vnode_notify_setmode(ctx, vp, (mode_t)vap->va_mode);
-/*
- * Change mode of a file given a path name.
+ if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid))
+ mac_vnode_notify_setowner(ctx, vp,
+ VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
+ VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1);
+
+ if (VATTR_IS_ACTIVE(vap, va_acl))
+ mac_vnode_notify_setacl(ctx, vp, vap->va_acl);
+#endif
+
+ return (error);
+}
+
+
+/*
+ * Change mode of a file given a path name.
*
* Returns: 0 Success
* namei:??? [anything namei can return]
}
/*
- * chmod_extended: Change the mode of a file given a path name; with extended
+ * chmod_extended: Change the mode of a file given a path name; with extended
* argument list (including extended security (ACL)).
*
* Parameters: p Process requesting the open
*
* Parameters: p Process requesting to change file mode
* uap User argument descriptor (see below)
- * retval (ignored)
+ * retval (ignored)
*
* Indirect: uap->mode File mode to set (same as 'chmod')
* uap->uid UID to set
* uap->gid GID to set
* uap->xsecurity ACL to set (or delete)
* uap->fd File descriptor of file to change mode
- *
+ *
* Returns: 0 Success
* !0 errno value
*
error = fchmod1(p, uap->fd, &va);
-
+
switch(uap->xsecurity) {
case USER_ADDR_NULL:
case CAST_USER_ADDR_T(-1):
if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
goto out;
error = vnode_setattr(vp, &va, ctx);
-
+
+#if CONFIG_MACF
+ if (error == 0)
+ mac_vnode_notify_setowner(ctx, vp, uid, gid);
+#endif
+
out:
/*
* EACCES is only allowed from namei(); permissions failure should
}
error = vnode_setattr(vp, &va, ctx);
+#if CONFIG_MACF
+ if (error == 0)
+ mac_vnode_notify_setowner(ctx, vp, uap->uid, uap->gid);
+#endif
+
out:
(void)vnode_put(vp);
file_drop(uap->fd);
}
error = vnode_setattr(vp, &va, ctx);
+#if CONFIG_MACF
+ if (error == 0)
+ mac_vnode_notify_setutimes(ctx, vp, ts[0], ts[1]);
+#endif
+
out:
return error;
}
vfs_context_t ctx = vfs_context_current();
/*
- * AUDIT: Needed to change the order of operations to do the
+ * AUDIT: Needed to change the order of operations to do the
* name lookup first because auditing wants the path.
*/
- NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
+ NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
UIO_USERSPACE, uap->path, ctx);
error = namei(&nd);
if (error)
if (uap->length < 0)
return(EINVAL);
- NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
+ NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
UIO_USERSPACE, uap->path, ctx);
if ((error = namei(&nd)))
return (error);
if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
goto out;
error = vnode_setattr(vp, &va, ctx);
+
+#if CONFIG_MACF
+ if (error == 0)
+ mac_vnode_notify_truncate(ctx, NOCRED, vp);
+#endif
+
out:
vnode_put(vp);
return (error);
AUDIT_ARG(fd, uap->fd);
if (uap->length < 0)
return(EINVAL);
-
+
if ( (error = fp_lookup(p,fd,&fp,0)) ) {
return(error);
}
VATTR_INIT(&va);
VATTR_SET(&va, va_data_size, uap->length);
error = vnode_setattr(vp, &va, ctx);
+
+#if CONFIG_MACF
+ if (error == 0)
+ mac_vnode_notify_truncate(ctx, fp->f_fglob->fg_cred, vp);
+#endif
+
(void)vnode_put(vp);
out:
file_drop(fd);
* thread cancellation points.
*/
/* ARGSUSED */
-int
+int
fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
{
return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
#if NAMEDRSRCFORK
/* Sync resource fork shadow file if necessary. */
if ((error == 0) &&
- (vp->v_flag & VISNAMEDSTREAM) &&
+ (vp->v_flag & VISNAMEDSTREAM) &&
(vp->v_parent != NULLVP) &&
vnode_isshadow(vp) &&
(fp->f_flags & FP_WRITTEN)) {
}
/*
- * Duplicate files. Source must be a file, target must be a file or
+ * Duplicate files. Source must be a file, target must be a file or
* must not exist.
*
* XXX Copyfile authorisation checking is woefully inadequate, and will not
struct nameidata fromnd, tond;
int error;
vfs_context_t ctx = vfs_context_current();
+#if CONFIG_MACF
+ struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
+ struct vnode_attr va;
+#endif
/* Check that the flags are valid. */
return(EINVAL);
}
- NDINIT(&fromnd, LOOKUP, OP_COPYFILE, SAVESTART | AUDITVNPATH1,
+ NDINIT(&fromnd, LOOKUP, OP_COPYFILE, AUDITVNPATH1,
UIO_USERSPACE, uap->from, ctx);
if ((error = namei(&fromnd)))
return (error);
goto out;
}
}
+
if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
error = EISDIR;
goto out;
}
+ /* This calls existing MAC hooks for open */
+ if ((error = vn_authorize_open_existing(fvp, &fromnd.ni_cnd, FREAD, ctx,
+ NULL))) {
+ goto out;
+ }
+
+ if (tvp) {
+ /*
+ * See unlinkat_internal for an explanation of the potential
+ * ENOENT from the MAC hook but the gist is that the MAC hook
+ * can fail because vn_getpath isn't able to return the full
+ * path. We choose to ignore this failure.
+ */
+ error = vn_authorize_unlink(tdvp, tvp, &tond.ni_cnd, ctx, NULL);
+ if (error && error != ENOENT)
+ goto out;
+ error = 0;
+ }
+
+#if CONFIG_MACF
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_type, fvp->v_type);
+ /* Mask off all but regular access permissions */
+ VATTR_SET(&va, va_mode,
+ ((((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT) & ACCESSPERMS));
+ error = mac_vnode_check_create(ctx, tdvp, &tond.ni_cnd, &va);
+ if (error)
+ goto out;
+#endif /* CONFIG_MACF */
+
if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
goto out;
out1:
vnode_put(fvp);
- if (fromnd.ni_startdir)
- vnode_put(fromnd.ni_startdir);
nameidone(&fromnd);
if (error == -1)
return (error);
}
+#define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1
+
+/*
+ * Helper function for doing clones. The caller is expected to provide an
+ * iocounted source vnode and release it.
+ */
+static int
+clonefile_internal(vnode_t fvp, boolean_t data_read_authorised, int dst_dirfd,
+ user_addr_t dst, uint32_t flags, vfs_context_t ctx)
+{
+ vnode_t tvp, tdvp;
+ struct nameidata tond;
+ int error;
+ int follow;
+ boolean_t free_src_acl;
+ boolean_t attr_cleanup;
+ enum vtype v_type;
+ kauth_action_t action;
+ struct componentname *cnp;
+ uint32_t defaulted;
+ struct vnode_attr va;
+ struct vnode_attr nva;
+
+ v_type = vnode_vtype(fvp);
+ switch (v_type) {
+ case VLNK:
+ /* FALLTHRU */
+ case VREG:
+ action = KAUTH_VNODE_ADD_FILE;
+ break;
+ case VDIR:
+ if (vnode_isvroot(fvp) || vnode_ismount(fvp) ||
+ fvp->v_mountedhere) {
+ return (EINVAL);
+ }
+ action = KAUTH_VNODE_ADD_SUBDIRECTORY;
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ AUDIT_ARG(fd2, dst_dirfd);
+ AUDIT_ARG(value32, flags);
+
+ follow = (flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
+ NDINIT(&tond, CREATE, OP_LINK, follow | WANTPARENT | AUDITVNPATH2,
+ UIO_USERSPACE, dst, ctx);
+ if ((error = nameiat(&tond, dst_dirfd)))
+ return (error);
+ cnp = &tond.ni_cnd;
+ tdvp = tond.ni_dvp;
+ tvp = tond.ni_vp;
+
+ free_src_acl = FALSE;
+ attr_cleanup = FALSE;
+
+ if (tvp != NULL) {
+ error = EEXIST;
+ goto out;
+ }
+
+ if (vnode_mount(tdvp) != vnode_mount(fvp)) {
+ error = EXDEV;
+ goto out;
+ }
+
+#if CONFIG_MACF
+ if ((error = mac_vnode_check_clone(ctx, tdvp, fvp, cnp)))
+ goto out;
+#endif
+ if ((error = vnode_authorize(tdvp, NULL, action, ctx)))
+ goto out;
+
+ action = KAUTH_VNODE_GENERIC_READ_BITS;
+ if (data_read_authorised)
+ action &= ~KAUTH_VNODE_READ_DATA;
+ if ((error = vnode_authorize(fvp, NULL, action, ctx)))
+ goto out;
+
+ /*
+ * certain attributes may need to be changed from the source, we ask for
+ * those here.
+ */
+ VATTR_INIT(&va);
+ VATTR_WANTED(&va, va_uid);
+ VATTR_WANTED(&va, va_gid);
+ VATTR_WANTED(&va, va_mode);
+ VATTR_WANTED(&va, va_flags);
+ VATTR_WANTED(&va, va_acl);
+
+ if ((error = vnode_getattr(fvp, &va, ctx)) != 0)
+ goto out;
+
+ VATTR_INIT(&nva);
+ VATTR_SET(&nva, va_type, v_type);
+ if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL) {
+ VATTR_SET(&nva, va_acl, va.va_acl);
+ free_src_acl = TRUE;
+ }
+
+ /* Handle ACL inheritance, initialize vap. */
+ if (v_type == VLNK) {
+ error = vnode_authattr_new(tdvp, &nva, 0, ctx);
+ } else {
+ error = vn_attribute_prepare(tdvp, &nva, &defaulted, ctx);
+ if (error)
+ goto out;
+ attr_cleanup = TRUE;
+ }
+
+ /*
+ * We've got initial values for all security parameters,
+ * If we are superuser, then we can change owners to be the
+ * same as the source. Both superuser and the owner have default
+ * WRITE_SECURITY privileges so all other fields can be taken
+ * from source as well.
+ */
+ if (vfs_context_issuser(ctx)) {
+ if (VATTR_IS_SUPPORTED(&va, va_uid))
+ VATTR_SET(&nva, va_uid, va.va_uid);
+ if (VATTR_IS_SUPPORTED(&va, va_gid))
+ VATTR_SET(&nva, va_gid, va.va_gid);
+ }
+ if (VATTR_IS_SUPPORTED(&va, va_mode))
+ VATTR_SET(&nva, va_mode, va.va_mode);
+ if (VATTR_IS_SUPPORTED(&va, va_flags)) {
+ VATTR_SET(&nva, va_flags,
+ ((va.va_flags & ~SF_RESTRICTED) | /* Turn off from source */
+ (nva.va_flags & SF_RESTRICTED)));
+ }
+
+ error = VNOP_CLONEFILE(fvp, tdvp, &tvp, cnp, &nva,
+ VNODE_CLONEFILE_DEFAULT, ctx);
+
+ if (!error && tvp) {
+ int update_flags = 0;
+#if CONFIG_FSE
+ int fsevent;
+#endif /* CONFIG_FSE */
+
+#if CONFIG_MACF
+ (void)vnode_label(vnode_mount(tvp), tdvp, tvp, cnp,
+ VNODE_LABEL_CREATE, ctx);
+#endif
+ /*
+ * If some of the requested attributes weren't handled by the
+ * VNOP, use our fallback code.
+ */
+ if (!VATTR_ALL_SUPPORTED(&va))
+ (void)vnode_setattr_fallback(tvp, &nva, ctx);
+
+ // Make sure the name & parent pointers are hooked up
+ if (tvp->v_name == NULL)
+ update_flags |= VNODE_UPDATE_NAME;
+ if (tvp->v_parent == NULLVP)
+ update_flags |= VNODE_UPDATE_PARENT;
+
+ if (update_flags) {
+ (void)vnode_update_identity(tvp, tdvp, cnp->cn_nameptr,
+ cnp->cn_namelen, cnp->cn_hash, update_flags);
+ }
+
+#if CONFIG_FSE
+ switch (vnode_vtype(tvp)) {
+ case VLNK:
+ /* FALLTHRU */
+ case VREG:
+ fsevent = FSE_CREATE_FILE;
+ break;
+ case VDIR:
+ fsevent = FSE_CREATE_DIR;
+ break;
+ default:
+ goto out;
+ }
+
+ if (need_fsevent(fsevent, tvp)) {
+ add_fsevent(fsevent, ctx, FSE_ARG_VNODE, tvp,
+ FSE_ARG_DONE);
+ }
+#endif /* CONFIG_FSE */
+ }
+
+out:
+ if (attr_cleanup)
+ vn_attribute_cleanup(&nva, defaulted);
+ if (free_src_acl && va.va_acl)
+ kauth_acl_free(va.va_acl);
+ nameidone(&tond);
+ if (tvp)
+ vnode_put(tvp);
+ vnode_put(tdvp);
+ return (error);
+}
+
+/*
+ * clone files or directories, target must not exist.
+ */
+/* ARGSUSED */
+int
+clonefileat(__unused proc_t p, struct clonefileat_args *uap,
+ __unused int32_t *retval)
+{
+ vnode_t fvp;
+ struct nameidata fromnd;
+ int follow;
+ int error;
+ vfs_context_t ctx = vfs_context_current();
+
+ /* Check that the flags are valid. */
+ if (uap->flags & ~CLONE_NOFOLLOW)
+ return (EINVAL);
+
+ AUDIT_ARG(fd, uap->src_dirfd);
+
+ follow = (uap->flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
+ NDINIT(&fromnd, LOOKUP, OP_COPYFILE, follow | AUDITVNPATH1,
+ UIO_USERSPACE, uap->src, ctx);
+ if ((error = nameiat(&fromnd, uap->src_dirfd)))
+ return (error);
+
+ fvp = fromnd.ni_vp;
+ nameidone(&fromnd);
+
+ error = clonefile_internal(fvp, FALSE, uap->dst_dirfd, uap->dst,
+ uap->flags, ctx);
+
+ vnode_put(fvp);
+ return (error);
+}
+
+int
+fclonefileat(__unused proc_t p, struct fclonefileat_args *uap,
+ __unused int32_t *retval)
+{
+ vnode_t fvp;
+ struct fileproc *fp;
+ int error;
+ vfs_context_t ctx = vfs_context_current();
+
+ AUDIT_ARG(fd, uap->src_fd);
+ error = fp_getfvp(p, uap->src_fd, &fp, &fvp);
+ if (error)
+ return (error);
+
+ if ((fp->f_fglob->fg_flag & FREAD) == 0) {
+ AUDIT_ARG(vnpath_withref, fvp, ARG_VNODE1);
+ error = EBADF;
+ goto out;
+ }
+
+ if ((error = vnode_getwithref(fvp)))
+ goto out;
+
+ AUDIT_ARG(vnpath, fvp, ARG_VNODE1);
+
+ error = clonefile_internal(fvp, TRUE, uap->dst_dirfd, uap->dst,
+ uap->flags, ctx);
+
+ vnode_put(fvp);
+out:
+ file_drop(uap->src_fd);
+ return (error);
+}
/*
* Rename files. Source and destination must either both be directories,
renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from,
int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags)
{
+ if (flags & ~VFS_RENAME_FLAGS_MASK)
+ return EINVAL;
+
+ if (ISSET(flags, VFS_RENAME_SWAP) && ISSET(flags, VFS_RENAME_EXCL))
+ return EINVAL;
+
vnode_t tvp, tdvp;
vnode_t fvp, fdvp;
struct nameidata *fromnd, *tond;
tvp = tond->ni_vp;
}
+#if DEVELOPMENT || DEBUG
+ /*
+ * XXX VSWAP: Check for entitlements or special flag here
+ * so we can restrict access appropriately.
+ */
+#else /* DEVELOPMENT || DEBUG */
+
+ if (fromnd->ni_vp && vnode_isswap(fromnd->ni_vp) && (ctx != vfs_context_kernel())) {
+ error = EPERM;
+ goto out1;
+ }
+
+ if (tond->ni_vp && vnode_isswap(tond->ni_vp) && (ctx != vfs_context_kernel())) {
+ error = EPERM;
+ goto out1;
+ }
+#endif /* DEVELOPMENT || DEBUG */
+
+ if (!tvp && ISSET(flags, VFS_RENAME_SWAP)) {
+ error = ENOENT;
+ goto out1;
+ }
+
+ if (tvp && ISSET(flags, VFS_RENAME_EXCL)) {
+ error = EEXIST;
+ goto out1;
+ }
+
batched = vnode_compound_rename_available(fdvp);
if (!fvp) {
/*
}
if (!batched) {
- error = vn_authorize_rename(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, NULL);
+ error = vn_authorize_renamex(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, flags, NULL);
if (error) {
- if (error == ENOENT &&
- retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
- /*
- * We encountered a race where after doing the namei, tvp stops
- * being valid. If so, simply re-drive the rename call from the
- * top.
- */
- do_retry = 1;
- retry_count += 1;
+ if (error == ENOENT) {
+ assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
+ if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
+ /*
+ * We encountered a race where after doing the namei, tvp stops
+ * being valid. If so, simply re-drive the rename call from the
+ * top.
+ */
+ do_retry = 1;
+ retry_count += 1;
+ }
}
goto out1;
}
*
* XXX Handle this in VFS after a continued lookup (if we missed
* in the cache to start off)
+ *
+ * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so
+ * we'll skip past here. The file system is responsible for
+ * checking that @tvp is not a descendent of @fvp and vice versa
+ * so it should always return EINVAL if either @tvp or @fvp is the
+ * root of a volume.
*/
if ((fvp->v_flag & VROOT) &&
(fvp->v_type == VDIR) &&
to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
}
-#if CONFIG_SECLUDED_RENAME
- if (flags & VFS_SECLUDE_RENAME) {
- fromnd->ni_cnd.cn_flags |= CN_SECLUDE_RENAME;
- }
-#else
- #pragma unused(flags)
-#endif
error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
tdvp, &tvp, &tond->ni_cnd, tvap,
- 0, ctx);
+ flags, ctx);
if (holding_mntlock) {
/*
* ENOENT in case of racing hardlink lookups hitting the name
* cache, redrive the lookup.
*/
- if (batched && error == ENOENT &&
- retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
- do_retry = 1;
- retry_count += 1;
+ if (batched && error == ENOENT) {
+ assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
+ if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
+ do_retry = 1;
+ retry_count += 1;
+ }
}
goto out1;
kauth_authorize_fileop(vfs_context_ucred(ctx),
KAUTH_FILEOP_RENAME,
(uintptr_t)from_name, (uintptr_t)to_name);
+ if (flags & VFS_RENAME_SWAP) {
+ kauth_authorize_fileop(vfs_context_ucred(ctx),
+ KAUTH_FILEOP_RENAME,
+ (uintptr_t)to_name, (uintptr_t)from_name);
+ }
#if CONFIG_FSE
if (from_name != NULL && to_name != NULL) {
vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
}
- if (tvp) {
- add_fsevent(FSE_RENAME, ctx,
- FSE_ARG_STRING, from_len, from_name,
- FSE_ARG_FINFO, &from_finfo,
- FSE_ARG_STRING, to_len, to_name,
- FSE_ARG_FINFO, &to_finfo,
- FSE_ARG_DONE);
+ if (tvp) {
+ add_fsevent(FSE_RENAME, ctx,
+ FSE_ARG_STRING, from_len, from_name,
+ FSE_ARG_FINFO, &from_finfo,
+ FSE_ARG_STRING, to_len, to_name,
+ FSE_ARG_FINFO, &to_finfo,
+ FSE_ARG_DONE);
+ if (flags & VFS_RENAME_SWAP) {
+ /*
+ * Strictly speaking, swap is the equivalent of
+ * *three* renames. FSEvents clients should only take
+ * the events as a hint, so we only bother reporting
+ * two.
+ */
+ add_fsevent(FSE_RENAME, ctx,
+ FSE_ARG_STRING, to_len, to_name,
+ FSE_ARG_FINFO, &to_finfo,
+ FSE_ARG_STRING, from_len, from_name,
+ FSE_ARG_FINFO, &from_finfo,
+ FSE_ARG_DONE);
+ }
} else {
add_fsevent(FSE_RENAME, ctx,
FSE_ARG_STRING, from_len, from_name,
AT_FDCWD, uap->to, UIO_USERSPACE, 0));
}
-#if CONFIG_SECLUDED_RENAME
-int rename_ext(__unused proc_t p, struct rename_ext_args *uap, __unused int32_t *retval)
+int renameatx_np(__unused proc_t p, struct renameatx_np_args *uap, __unused int32_t *retval)
{
return renameat_internal(
- vfs_context_current(),
- AT_FDCWD, uap->from,
- AT_FDCWD, uap->to,
+ vfs_context_current(),
+ uap->fromfd, uap->from,
+ uap->tofd, uap->to,
UIO_USERSPACE, uap->flags);
}
-#endif
-
+
int
renameat(__unused proc_t p, struct renameat_args *uap, __unused int32_t *retval)
{
goto out;
}
+#if DEVELOPMENT || DEBUG
/*
- * Removed a check here; we used to abort if vp's vid
- * was not the same as what we'd seen the last time around.
- * I do not think that check was valid, because if we retry
- * and all dirents are gone, the directory could legitimately
+ * XXX VSWAP: Check for entitlements or special flag here
+ * so we can restrict access appropriately.
+ */
+#else /* DEVELOPMENT || DEBUG */
+
+ if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) {
+ error = EPERM;
+ goto out;
+ }
+#endif /* DEVELOPMENT || DEBUG */
+
+ /*
+ * Removed a check here; we used to abort if vp's vid
+ * was not the same as what we'd seen the last time around.
+ * I do not think that check was valid, because if we retry
+ * and all dirents are gone, the directory could legitimately
* be recycled but still be present in a situation where we would
* have had permission to delete. Therefore, we won't make
* an effort to preserve that check now that we may not have a
if (!batched) {
error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
if (error) {
- if (error == ENOENT &&
- restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
- restart_flag = 1;
- restart_count += 1;
+ if (error == ENOENT) {
+ assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
+ if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
+ restart_flag = 1;
+ restart_count += 1;
+ }
}
goto out;
}
if (error == EKEEPLOOKING) {
goto continue_lookup;
- } else if (batched && error == ENOENT &&
- restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
- /*
- * For compound VNOPs, the authorization callback
- * may return ENOENT in case of racing hard link lookups
- * redrive the lookup.
- */
- restart_flag = 1;
- restart_count += 1;
- goto out;
+ } else if (batched && error == ENOENT) {
+ assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
+ if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
+ /*
+ * For compound VNOPs, the authorization callback
+ * may return ENOENT in case of racing hard link lookups
+ * redrive the lookup.
+ */
+ restart_flag = 1;
+ restart_count += 1;
+ goto out;
+ }
}
#if CONFIG_APPLEDOUBLE
/*
int *numdirent, vfs_context_t ctxp)
{
/* Check if fs natively supports VNODE_READDIR_EXTENDED */
- if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
+ if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) {
return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
} else {
* will prevent us from reading more than we can pack.
*
* Since this buffer is wired memory, we will limit the
- * buffer size to a maximum of 32K. We would really like to
+ * buffer size to a maximum of 32K. We would really like to
* use 32K in the MIN(), but we use magic number 87371 to
- * prevent uio_resid() * 3 / 8 from overflowing.
+ * prevent uio_resid() * 3 / 8 from overflowing.
*/
bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
error = union_dircheckp(&vp, fp, &context);
if (error == -1)
goto unionread;
- if (error)
+ if (error) {
+ (void)vnode_put(vp);
goto out;
+ }
}
if ((vp->v_mount->mnt_flag & MNT_UNION)) {
if (offset) {
*offset = loff;
}
-
+
*bytesread = bufsize - uio_resid(auio);
out:
file_drop(fd);
*
* Indirect: uap->newmask umask to set
* uap->xsecurity ACL to set
- *
+ *
* Returns: 0 Success
* !0 Not success
*
uint32_t newstate;
int error, eofflag;
uint32_t loff;
- struct attrlist attributelist;
+ struct attrlist attributelist;
vfs_context_t ctx = vfs_context_current();
int fd = uap->fd;
char uio_buf[ UIO_SIZEOF(1) ];
kauth_action_t action;
AUDIT_ARG(fd, fd);
-
+
/* Get the attributes into kernel space */
if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
return(error);
loff = fp->f_fglob->fg_offset;
auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
uio_addiov(auio, uap->buffer, uap->buffersize);
-
+
/*
* If the only item requested is file names, we can let that past with
* just LIST_DIRECTORY. If they want any other attributes, that means
if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
attributelist.fileattr || attributelist.dirattr)
action |= KAUTH_VNODE_SEARCH;
-
+
if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
/* Believe it or not, uap->options only has 32-bits of valid
(void)vnode_put(vp);
- if (error)
+ if (error)
goto out;
fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
#if CONFIG_FSE
fse_info f_finfo, s_finfo;
#endif
-
+
nameiflags = 0;
if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
nameidone(&fnd);
fvp = fnd.ni_vp;
- NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
+ NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
UIO_USERSPACE, uap->path2, ctx);
error = namei(&snd);
if (svp == fvp) {
error = EINVAL;
goto out;
- }
+ }
/*
* if the files are on different volumes, return an error
if (
#if CONFIG_FSE
- need_fsevent(FSE_EXCHANGE, fvp) ||
+ need_fsevent(FSE_EXCHANGE, fvp) ||
#endif
kauth_authorize_fileop_has_listeners()) {
GET_PATH(fpath);
flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
-
+
#if CONFIG_FSE
get_fse_info(fvp, &f_finfo, ctx);
get_fse_info(svp, &s_finfo, ctx);
const char *tmpname;
if (fpath != NULL && spath != NULL) {
- /* call out to allow 3rd party notification of exchangedata.
+ /* call out to allow 3rd party notification of exchangedata.
* Ignore result of kauth_authorize_fileop call.
*/
- kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
+ kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
(uintptr_t)fpath, (uintptr_t)spath);
}
name_cache_lock();
tmpname = fvp->v_name;
fvp->v_name = svp->v_name;
svp->v_name = tmpname;
-
+
if (fvp->v_parent != svp->v_parent) {
vnode_t tmp;
uint32_t
freespace_mb(vnode_t vp)
{
- vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
+ vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
return (((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
vp->v_mount->mnt_vfsstat.f_bsize) >> 20);
}
searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
searchblock.maxmatches = tmp_searchblock.maxmatches;
- /*
+ /*
* These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
* from a 32 bit long, and tv_usec is already a signed 32 bit int.
*/
if (error)
return(error);
- /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
+ /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
*/
- if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
+ if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
return(EINVAL);
-
+
/* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
/* It all has to do into local memory and it's not that big so we might as well put it all together. */
/* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
/* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
/* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
/* assumes the size is still 556 bytes it will continue to work */
-
+
mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
sizeof(struct attrlist) + sizeof(struct searchstate) + (2*sizeof(uint32_t));
if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
goto freeandexit;
-
+
if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
goto freeandexit;
*/
if (uap->options & SRCHFS_START)
state->ss_union_layer = 0;
- else
+ else
uap->options |= state->ss_union_flags;
state->ss_union_flags = 0;
/*
* Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
* which is passed in with an attrreference_t, we need to inspect the buffer manually here.
- * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
- * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
+ * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
+ * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
* validate the user-supplied data offset of the attrreference_t, we'll do it here.
*/
if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
attrreference_t* string_ref;
u_int32_t* start_length;
- user64_size_t param_length;
+ user64_size_t param_length;
/* validate searchparams1 */
- param_length = searchblock.sizeofsearchparams1;
+ param_length = searchblock.sizeofsearchparams1;
/* skip the word that specifies length of the buffer */
start_length= (u_int32_t*) searchparams1;
start_length= start_length+1;
/* ensure no negative offsets or too big offsets */
if (string_ref->attr_dataoffset < 0 ) {
error = EINVAL;
- goto freeandexit;
+ goto freeandexit;
}
if (string_ref->attr_length > MAXPATHLEN) {
error = EINVAL;
goto freeandexit;
}
-
+
/* Check for pointer overflow in the string ref */
if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
error = EINVAL;
error = ENOENT;
goto freeandexit;
}
- vnode_getwithref(vp);
+ error = vnode_getwithref(vp);
vnode_put(tvp);
+ if (error)
+ goto freeandexit;
}
#if CONFIG_MACF
}
#endif
-
+
/*
- * If searchblock.maxmatches == 0, then skip the search. This has happened
+ * If searchblock.maxmatches == 0, then skip the search. This has happened
* before and sometimes the underlying code doesnt deal with it well.
*/
if (searchblock.maxmatches == 0) {
/*
* Allright, we have everything we need, so lets make that call.
- *
+ *
* We keep special track of the return value from the file system:
* EAGAIN is an acceptable error condition that shouldn't keep us
* from copying out any results...
auio,
(struct searchstate *) &state->ss_fsstate,
ctx);
-
+
/*
* If it's a union mount we need to be called again
* to search the mounted-on filesystem.
if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
goto freeandexit;
-
+
error = fserror;
freeandexit:
nspace_proc_exit(struct proc *p)
{
int i, event_mask = 0;
-
+
for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
if (p == nspace_handlers[i].handler_proc) {
event_mask |= nspace_item_flags_for_type(i);
if (event_mask == 0) {
return;
}
-
+
+ lck_mtx_lock(&nspace_handler_lock);
if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
// if this process was the snapshot handler, zero snapshot_timeout
snapshot_timestamp = 0;
}
-
+
//
// unblock anyone that's waiting for the handler that died
//
- lck_mtx_lock(&nspace_handler_lock);
for(i=0; i < MAX_NSPACE_ITEMS; i++) {
if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) {
nspace_items[i].vid = 0;
nspace_items[i].flags = NSPACE_ITEM_DONE;
nspace_items[i].token = 0;
-
+
wakeup((caddr_t)&(nspace_items[i].vp));
}
}
}
-
+
wakeup((caddr_t)&nspace_item_idx);
lck_mtx_unlock(&nspace_handler_lock);
}
-int
+int
resolve_nspace_item(struct vnode *vp, uint64_t op)
{
return resolve_nspace_item_ext(vp, op, NULL);
}
-int
+int
resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg)
{
int i, error, keep_waiting;
} else {
nspace_items[i].refcount++;
}
-
+
if (i >= MAX_NSPACE_ITEMS) {
ts.tv_sec = nspace_handler_timeout;
ts.tv_nsec = 0;
nspace_items[i].token = 0;
nspace_items[i].refcount = 1;
-
+
wakeup((caddr_t)&nspace_item_idx);
}
// hmmm, why did we get woken up?
printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
nspace_items[i].token);
- }
+ }
if (--nspace_items[i].refcount == 0) {
nspace_items[i].vp = NULL; // clear this so that no one will match on it again
return error;
}
+int nspace_snapshot_event(vnode_t vp, time_t ctime, uint64_t op_type, void *arg)
+{
+ int snapshot_error = 0;
+
+ if (vp == NULL) {
+ return 0;
+ }
+
+ /* Swap files are special; skip them */
+ if (vnode_isswap(vp)) {
+ return 0;
+ }
+
+ if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) {
+ // the change time is within this epoch
+ int error;
+
+ error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg);
+ if (error == EDEADLK) {
+ snapshot_error = 0;
+ } else if (error) {
+ if (error == EAGAIN) {
+ printf("nspace_snapshot_event: timed out waiting for namespace handler...\n");
+ } else if (error == EINTR) {
+ // printf("nspace_snapshot_event: got a signal while waiting for namespace handler...\n");
+ snapshot_error = EINTR;
+ }
+ }
+ }
+
+ return snapshot_error;
+}
int
get_nspace_item_status(struct vnode *vp, int32_t *status)
lck_mtx_unlock(&nspace_handler_lock);
return 0;
}
-
+
#if 0
static int
if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
return error;
-
+
//
// if the vnode is tagged VOPENEVT and the current process
return error;
}
- /* Call out to allow 3rd party notification of open.
+ /* Call out to allow 3rd party notification of open.
* Ignore result of kauth_authorize_fileop call.
*/
#if CONFIG_MACF
mac_vnode_notify_open(ctx, vp, fmode);
#endif
- kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
+ kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
(uintptr_t)vp, 0);
static int
wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type)
{
- int i, error=0, unblock=0;
+ int i;
+ int error = 0;
+ int unblock = 0;
task_t curtask;
-
+
lck_mtx_lock(&nspace_handler_exclusion_lock);
if (nspace_handlers[nspace_type].handler_busy) {
lck_mtx_unlock(&nspace_handler_exclusion_lock);
return EBUSY;
}
+
nspace_handlers[nspace_type].handler_busy = 1;
lck_mtx_unlock(&nspace_handler_exclusion_lock);
-
- /*
+
+ /*
* Any process that gets here will be one of the namespace handlers.
* As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
* as we can cause deadlocks to occur, because the namespace handler may prevent
- * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
+ * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
* process.
*/
curtask = current_task();
- bsd_set_dependency_capable (curtask);
-
+ bsd_set_dependency_capable (curtask);
+
lck_mtx_lock(&nspace_handler_lock);
if (nspace_handlers[nspace_type].handler_proc == NULL) {
nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread());
nspace_handlers[nspace_type].handler_proc = current_proc();
}
-
+
+ if (nspace_type == NSPACE_HANDLER_SNAPSHOT &&
+ (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
+ error = EINVAL;
+ }
+
while (error == 0) {
-
- for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+
+ /* Try to find matching namespace item */
+ for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
- if (!nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
- continue;
+ if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
+ break;
}
- break;
}
}
-
- if (i < MAX_NSPACE_ITEMS) {
- nspace_items[i].flags &= ~NSPACE_ITEM_NEW;
- nspace_items[i].flags |= NSPACE_ITEM_PROCESSING;
- nspace_items[i].token = ++nspace_token_id;
-
- if (nspace_items[i].vp) {
- struct fileproc *fp;
- int32_t indx, fmode;
- struct proc *p = current_proc();
- vfs_context_t ctx = vfs_context_current();
- struct vnode_attr va;
-
-
- /*
- * Use vnode pointer to acquire a file descriptor for
- * hand-off to userland
- */
- fmode = nspace_open_flags_for_type(nspace_type);
- error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
- if (error) {
- unblock = 1;
- break;
- }
- error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
- if (error) {
- unblock = 1;
- vnode_put(nspace_items[i].vp);
- break;
- }
-
- if ((error = falloc(p, &fp, &indx, ctx))) {
- vn_close(nspace_items[i].vp, fmode, ctx);
- vnode_put(nspace_items[i].vp);
- unblock = 1;
- break;
- }
-
- fp->f_fglob->fg_flag = fmode;
- fp->f_fglob->fg_ops = &vnops;
- fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
-
- proc_fdlock(p);
- procfdtbl_releasefd(p, indx, NULL);
- fp_drop(p, indx, fp, 1);
- proc_fdunlock(p);
-
- /*
- * All variants of the namespace handler struct support these three fields:
- * token, flags, and the FD pointer
- */
- error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t));
- error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t));
- error = copyout(&indx, nhd->fdptr, sizeof(uint32_t));
-
- /*
- * Handle optional fields:
- * extended version support an info ptr (offset, length), and the
- *
- * namedata version supports a unique per-link object ID
- *
- */
- if (nhd->infoptr) {
- uio_t uio = (uio_t)nspace_items[i].arg;
- uint64_t u_offset, u_length;
-
- if (uio) {
- u_offset = uio_offset(uio);
- u_length = uio_resid(uio);
- } else {
- u_offset = 0;
- u_length = 0;
- }
- error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t));
- error = copyout(&u_length, nhd->infoptr+sizeof(uint64_t), sizeof(uint64_t));
- }
-
- if (nhd->objid) {
- VATTR_INIT(&va);
- VATTR_WANTED(&va, va_linkid);
- error = vnode_getattr(nspace_items[i].vp, &va, ctx);
- if (error == 0 ) {
- uint64_t linkid = 0;
- if (VATTR_IS_SUPPORTED (&va, va_linkid)) {
- linkid = (uint64_t)va.va_linkid;
- }
- error = copyout (&linkid, nhd->objid, sizeof(uint64_t));
- }
- }
- if (error) {
- vn_close(nspace_items[i].vp, fmode, ctx);
- fp_free(p, indx, fp);
- unblock = 1;
- }
-
- vnode_put(nspace_items[i].vp);
-
- break;
- } else {
- printf("wait_for_nspace_event: failed (nspace_items[%d] == %p error %d, name %s)\n",
- i, nspace_items[i].vp, error, nspace_items[i].vp->v_name);
- }
-
- } else {
+ if (i >= MAX_NSPACE_ITEMS) {
+ /* Nothing is there yet. Wait for wake up and retry */
error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS|PCATCH, "namespace-items", 0);
if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
+ /* Prevent infinite loop if snapshot handler exited */
error = EINVAL;
break;
}
-
+ continue;
}
+
+ nspace_items[i].flags &= ~NSPACE_ITEM_NEW;
+ nspace_items[i].flags |= NSPACE_ITEM_PROCESSING;
+ nspace_items[i].token = ++nspace_token_id;
+
+ assert(nspace_items[i].vp);
+ struct fileproc *fp;
+ int32_t indx;
+ int32_t fmode;
+ struct proc *p = current_proc();
+ vfs_context_t ctx = vfs_context_current();
+ struct vnode_attr va;
+ bool vn_get_succsessful = false;
+ bool vn_open_successful = false;
+ bool fp_alloc_successful = false;
+
+ /*
+ * Use vnode pointer to acquire a file descriptor for
+ * hand-off to userland
+ */
+ fmode = nspace_open_flags_for_type(nspace_type);
+ error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
+ if (error) goto cleanup;
+ vn_get_succsessful = true;
+
+ error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
+ if (error) goto cleanup;
+ vn_open_successful = true;
+
+ error = falloc(p, &fp, &indx, ctx);
+ if (error) goto cleanup;
+ fp_alloc_successful = true;
+
+ fp->f_fglob->fg_flag = fmode;
+ fp->f_fglob->fg_ops = &vnops;
+ fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
+
+ proc_fdlock(p);
+ procfdtbl_releasefd(p, indx, NULL);
+ fp_drop(p, indx, fp, 1);
+ proc_fdunlock(p);
+
+ /*
+ * All variants of the namespace handler struct support these three fields:
+ * token, flags, and the FD pointer
+ */
+ error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t));
+ if (error) goto cleanup;
+ error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t));
+ if (error) goto cleanup;
+ error = copyout(&indx, nhd->fdptr, sizeof(uint32_t));
+ if (error) goto cleanup;
+
+ /*
+ * Handle optional fields:
+ * extended version support an info ptr (offset, length), and the
+ *
+ * namedata version supports a unique per-link object ID
+ *
+ */
+ if (nhd->infoptr) {
+ uio_t uio = (uio_t)nspace_items[i].arg;
+ uint64_t u_offset, u_length;
+
+ if (uio) {
+ u_offset = uio_offset(uio);
+ u_length = uio_resid(uio);
+ } else {
+ u_offset = 0;
+ u_length = 0;
+ }
+ error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t));
+ if (error) goto cleanup;
+ error = copyout(&u_length, nhd->infoptr + sizeof(uint64_t), sizeof(uint64_t));
+ if (error) goto cleanup;
+ }
+
+ if (nhd->objid) {
+ VATTR_INIT(&va);
+ VATTR_WANTED(&va, va_linkid);
+ error = vnode_getattr(nspace_items[i].vp, &va, ctx);
+ if (error) goto cleanup;
+
+ uint64_t linkid = 0;
+ if (VATTR_IS_SUPPORTED (&va, va_linkid)) {
+ linkid = (uint64_t)va.va_linkid;
+ }
+ error = copyout(&linkid, nhd->objid, sizeof(uint64_t));
+ }
+cleanup:
+ if (error) {
+ if (fp_alloc_successful) fp_free(p, indx, fp);
+ if (vn_open_successful) vn_close(nspace_items[i].vp, fmode, ctx);
+ unblock = 1;
+ }
+
+ if (vn_get_succsessful) vnode_put(nspace_items[i].vp);
+
+ break;
}
-
+
if (unblock) {
if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
vnode_lock_spin(nspace_items[i].vp);
nspace_items[i].vid = 0;
nspace_items[i].flags = NSPACE_ITEM_DONE;
nspace_items[i].token = 0;
-
+
wakeup((caddr_t)&(nspace_items[i].vp));
}
-
+
if (nspace_type == NSPACE_HANDLER_SNAPSHOT) {
// just go through every snapshot event and unblock it immediately.
if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
- for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+ for(i = 0; i < MAX_NSPACE_ITEMS; i++) {
if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
nspace_items[i].vp = NULL;
nspace_items[i].vid = 0;
nspace_items[i].flags = NSPACE_ITEM_DONE;
nspace_items[i].token = 0;
-
- wakeup((caddr_t)&(nspace_items[i].vp));
+
+ wakeup((caddr_t)&(nspace_items[i].vp));
}
}
}
}
}
-
+
lck_mtx_unlock(&nspace_handler_lock);
-
+
lck_mtx_lock(&nspace_handler_exclusion_lock);
nspace_handlers[nspace_type].handler_busy = 0;
lck_mtx_unlock(&nspace_handler_exclusion_lock);
-
+
return error;
}
{
int error = 0;
namespace_handler_data nhd;
-
+
bzero (&nhd, sizeof(namespace_handler_data));
- if (nspace_type == NSPACE_HANDLER_SNAPSHOT &&
- (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
- return EINVAL;
- }
-
if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
return error;
}
-
+
error = validate_namespace_args (is64bit, size);
if (error) {
return error;
}
-
+
/* Copy in the userland pointers into our kernel-only struct */
if (is64bit) {
}
/* Otherwise the fields were pre-zeroed when we did the bzero above. */
}
- }
+ }
else {
/* 32 bit userland structures */
nhd.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token);
nhd.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags);
nhd.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr);
-
+
if (size > (sizeof(user32_namespace_handler_info))) {
if (size >= (sizeof(user32_namespace_handler_info_ext))) {
nhd.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr);
/* Otherwise the fields were pre-zeroed when we did the bzero above. */
}
}
-
+
return wait_for_namespace_event(&nhd, nspace_type);
}
boolean_t is64bit;
u_int size;
#define STK_PARAMS 128
- char stkbuf[STK_PARAMS];
+ char stkbuf[STK_PARAMS] = {0};
caddr_t data, memp;
vnode_t vp = *arg_vp;
memp = NULL;
+
/*
* ensure the buffer is large enough for underlying calls
*/
#ifndef HFSIOC_GETPATH
-typedef char pn_t[MAXPATHLEN];
+ typedef char pn_t[MAXPATHLEN];
#define HFSIOC_GETPATH _IOWR('h', 13, pn_t)
#endif
/* Round up to MAXPATHLEN regardless of user input */
size = MAXPATHLEN;
}
-
+ else if (vp->v_tag == VT_CIFS) {
+ /*
+ * XXX Until fsctl's length encoding can be
+ * XXX fixed properly.
+ */
+ if (IOCBASECMD(cmd) == _IOWR('z', 19, 0) && size < 1432) {
+ size = 1432; /* sizeof(struct UniqueSMBShareID) */
+ } else if (IOCBASECMD(cmd) == _IOWR('z', 28, 0) && size < 308) {
+ size = 308; /* sizeof(struct smbDebugTestPB) */
+ }
+ }
if (size > sizeof (stkbuf)) {
if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
} else {
data = &stkbuf[0];
};
-
+
if (cmd & IOC_IN) {
if (size) {
error = copyin(udata, data, size);
- if (error) {
+ if (error) {
if (memp) {
- kfree (memp, size);
+ kfree (memp, size);
}
return error;
}
/* issue the sync for this volume */
(void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
- /*
+ /*
* Then release the mount_iterref once we're done syncing; it's not
* needed for the VNOP_IOCTL below
*/
}
break;
+ case FSCTL_ROUTEFS_SETROUTEID: {
+#if ROUTEFS
+ char routepath[MAXPATHLEN];
+ size_t len = 0;
+
+ if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
+ break;
+ }
+ bzero(routepath, MAXPATHLEN);
+ error = copyinstr(udata, &routepath[0], MAXPATHLEN, &len);
+ if (error) {
+ break;
+ }
+ error = routefs_kernel_mount(routepath);
+ if (error) {
+ break;
+ }
+#endif
+ }
+ break;
+
case FSCTL_SET_PACKAGE_EXTS: {
user_addr_t ext_strings;
uint32_t num_entries;
uint32_t max_width;
+ if ((error = priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS, 0)))
+ break;
+
if ( (is64bit && size != sizeof(user64_package_ext_info))
|| (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
}
break;
- /* namespace handlers */
+ /* namespace handlers */
case FSCTL_NAMESPACE_HANDLER_GET: {
error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data);
}
/* Snapshot handlers */
case FSCTL_OLD_SNAPSHOT_HANDLER_GET: {
error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
- }
+ }
break;
case FSCTL_SNAPSHOT_HANDLER_GET_EXT: {
error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
}
- break;
+ break;
case FSCTL_NAMESPACE_HANDLER_UPDATE: {
uint32_t token, val;
if (error) {
printf("nspace-handler-update: did not find token %u\n", token);
}
- }
+ }
break;
-
- case FSCTL_NAMESPACE_HANDLER_UNBLOCK: {
+
+ case FSCTL_NAMESPACE_HANDLER_UNBLOCK: {
uint32_t token, val;
int i;
}
lck_mtx_unlock(&nspace_handler_lock);
- }
+ }
break;
case FSCTL_NAMESPACE_HANDLER_CANCEL: {
vnode_unlock(nspace_items[i].vp);
}
- nspace_items[i].vp = NULL;
- nspace_items[i].arg = NULL;
+ nspace_items[i].vp = NULL;
+ nspace_items[i].arg = NULL;
nspace_items[i].vid = 0;
nspace_items[i].token = val;
nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING;
- nspace_items[i].flags |= NSPACE_ITEM_CANCELLED;
+ nspace_items[i].flags |= NSPACE_ITEM_CANCELLED;
wakeup((caddr_t)&(nspace_items[i].vp));
}
lck_mtx_unlock(&nspace_handler_lock);
- }
+ }
break;
case FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME: {
lck_mtx_unlock(&nspace_handler_lock);
printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp);
- }
+ }
break;
case FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS:
}
break;
- case FSCTL_SET_FSTYPENAME_OVERRIDE:
- {
+ case FSCTL_SET_FSTYPENAME_OVERRIDE:
+ {
if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
break;
}
}
}
break;
-
+
default: {
/* Invoke the filesystem-specific code */
error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx);
* if no errors, copy any data to user. Size was
* already set and checked above.
*/
- if (error == 0 && (cmd & IOC_OUT) && size)
+ if (error == 0 && (cmd & IOC_OUT) && size)
error = copyout(data, udata, size);
-
+
if (memp) {
kfree(memp, size);
}
-
+
return error;
}
fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
{
int error;
- struct nameidata nd;
+ struct nameidata nd;
u_long nameiflags;
vnode_t vp = NULL;
vfs_context_t ctx = vfs_context_current();
AUDIT_ARG(fd, uap->fd);
AUDIT_ARG(cmd, uap->cmd);
AUDIT_ARG(value32, uap->options);
-
+
/* Get the vnode for the file we are getting info on: */
if ((error = file_vnode(uap->fd, &vp)))
- goto done;
+ return error;
fd = uap->fd;
if ((error = vnode_getwithref(vp))) {
- goto done;
+ file_drop(fd);
+ return error;
}
#if CONFIG_MACF
- error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
- if (error) {
- goto done;
+ if ((error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd))) {
+ file_drop(fd);
+ vnode_put(vp);
+ return error;
}
#endif
error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
-done:
- if (fd != -1)
- file_drop(fd);
+ file_drop(fd);
- if (vp)
+ /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
+ if (vp) {
vnode_put(vp);
+ }
+
return error;
}
/* end of fsctl system call */
/*
* the specific check for 0xffffffff is a hack to preserve
* binaray compatibilty in K64 with applications that discovered
- * that passing in a buf pointer and a size of -1 resulted in
+ * that passing in a buf pointer and a size of -1 resulted in
* just the size of the indicated extended attribute being returned.
* this isn't part of the documented behavior, but because of the
* original implemtation's check for "uap->size > 0", this behavior
* was allowed. In K32 that check turned into a signed comparison
* even though uap->size is unsigned... in K64, we blow by that
* check because uap->size is unsigned and doesn't get sign smeared
- * in the munger for a 32 bit user app. we also need to add a
+ * in the munger for a 32 bit user app. we also need to add a
* check to limit the maximum size of the buffer being passed in...
* unfortunately, the underlying fileystems seem to just malloc
* the requested size even if the actual extended attribute is tiny.
if (uap->value) {
if (uap->size > (size_t)XATTR_MAXSIZE)
uap->size = XATTR_MAXSIZE;
-
+
auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
&uio_buf[0], sizeof(uio_buf));
uio_addiov(auio, uap->value, uap->size);
return (EINVAL);
if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
- return (error);
+ if (error == EPERM) {
+ /* if the string won't fit in attrname, copyinstr emits EPERM */
+ return (ENAMETOOLONG);
+ }
+ /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
+ return error;
}
if (xattr_protected(attrname))
return(EPERM);
return(error);
}
if (uap->namebuf != 0 && uap->bufsize > 0) {
- auio = uio_createwithbuffer(1, 0, spacetype,
+ auio = uio_createwithbuffer(1, 0, spacetype,
UIO_READ, &uio_buf[0], sizeof(uio_buf));
uio_addiov(auio, uap->namebuf, uap->bufsize);
}
vnode_t vp;
int length;
int bpflags;
+ /* maximum number of times to retry build_path */
+ unsigned int retries = 0x10;
if (bufsize > PAGE_SIZE) {
return (EINVAL);
return (ENOMEM);
}
+retry:
if ((mp = mount_lookupby_volfsid(volfs_id, 1)) == NULL) {
error = ENOTSUP; /* unexpected failure */
return ENOTSUP;
vnode_put(vp);
if (error) {
+ /* there was a race building the path, try a few more times */
+ if (error == EAGAIN) {
+ --retries;
+ if (retries > 0)
+ goto retry;
+
+ error = ENOENT;
+ }
goto out;
}
AUDIT_ARG(value32, fsid.val[0]);
AUDIT_ARG(value64, uap->objid);
/* Restrict output buffer size for now. */
-
+
if (uap->bufsize > PAGE_SIZE) {
return (EINVAL);
- }
+ }
MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK);
if (realpath == NULL) {
return (ENOMEM);
}
error = fsgetpath_internal(
- ctx, fsid.val[0], uap->objid,
+ ctx, fsid.val[0], uap->objid,
uap->bufsize, realpath, &length);
if (error) {
goto out;
}
-
+
error = copyout((caddr_t)realpath, uap->buf, length);
*retval = (user_ssize_t)length; /* may be superseded by error */
* EFAULT
*/
static int
-munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
- user_addr_t bufp, int *sizep, boolean_t is_64_bit,
+munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
+ user_addr_t bufp, int *sizep, boolean_t is_64_bit,
boolean_t partial_copy)
{
int error;
my_size = copy_size = sizeof(sfs);
bzero(&sfs, my_size);
-
+
sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
sfs.f_type = mp->mnt_vtable->vfc_typenum;
sfs.f_reserved1 = (short)sfsp->f_fssubtype;
-
+
/*
* It's possible for there to be more than 2^^31 blocks in the filesystem, so we
* have to fudge the numbers here in that case. We inflate the blocksize in order
* to reflect the filesystem size as best we can.
*/
- if ((sfsp->f_blocks > INT_MAX)
- /* Hack for 4061702 . I think the real fix is for Carbon to
+ if ((sfsp->f_blocks > INT_MAX)
+ /* Hack for 4061702 . I think the real fix is for Carbon to
* look for some volume capability and not depend on hidden
- * semantics agreed between a FS and carbon.
+ * semantics agreed between a FS and carbon.
* f_blocks, f_bfree, and f_bavail set to -1 is the trigger
* for Carbon to set bNoVolumeSizes volume attribute.
- * Without this the webdavfs files cannot be copied onto
+ * Without this the webdavfs files cannot be copied onto
* disk as they look huge. This change should not affect
* XSAN as they should not setting these to -1..
*/
}
error = copyout((caddr_t)&sfs, bufp, copy_size);
}
-
+
if (sizep != NULL) {
*sizep = my_size;
}
return 0;
}
+/*
+ * gets the vnode associated with the (unnamed) snapshot directory
+ * for a Filesystem. The snapshot directory vnode is returned with
+ * an iocount on it.
+ */
+int
+vnode_get_snapdir(vnode_t rvp, vnode_t *sdvpp, vfs_context_t ctx)
+{
+ return (VFS_VGET_SNAPDIR(vnode_mount(rvp), sdvpp, ctx));
+}
+
+/*
+ * Get the snapshot vnode.
+ *
+ * If successful, the call returns with an iocount on *rvpp ,*sdvpp and
+ * needs nameidone() on ndp.
+ *
+ * If the snapshot vnode exists it is returned in ndp->ni_vp.
+ *
+ * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is
+ * not needed.
+ */
+static int
+vnode_get_snapshot(int dirfd, vnode_t *rvpp, vnode_t *sdvpp,
+ user_addr_t name, struct nameidata *ndp, int32_t op,
+#if !CONFIG_TRIGGERS
+ __unused
+#endif
+ enum path_operation pathop,
+ vfs_context_t ctx)
+{
+ int error, i;
+ caddr_t name_buf;
+ size_t name_len;
+ struct vfs_attr vfa;
+
+ *sdvpp = NULLVP;
+ *rvpp = NULLVP;
+
+ error = vnode_getfromfd(ctx, dirfd, rvpp);
+ if (error)
+ return (error);
+
+ if (!vnode_isvroot(*rvpp)) {
+ error = EINVAL;
+ goto out;
+ }
+
+ /* Make sure the filesystem supports snapshots */
+ VFSATTR_INIT(&vfa);
+ VFSATTR_WANTED(&vfa, f_capabilities);
+ if ((vfs_getattr(vnode_mount(*rvpp), &vfa, ctx) != 0) ||
+ !VFSATTR_IS_SUPPORTED(&vfa, f_capabilities) ||
+ !((vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] &
+ VOL_CAP_INT_SNAPSHOT)) ||
+ !((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] &
+ VOL_CAP_INT_SNAPSHOT))) {
+ error = ENOTSUP;
+ goto out;
+ }
+
+ error = vnode_get_snapdir(*rvpp, sdvpp, ctx);
+ if (error)
+ goto out;
+
+ MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
+ error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
+ if (error)
+ goto out1;
+
+ /*
+ * Some sanity checks- name can't be empty, "." or ".." or have slashes.
+ * (the length returned by copyinstr includes the terminating NUL)
+ */
+ if ((name_len == 1) || (name_len == 2 && name_buf[0] == '.') ||
+ (name_len == 3 && name_buf[0] == '.' && name_buf[1] == '.')) {
+ error = EINVAL;
+ goto out1;
+ }
+ for (i = 0; i < (int)name_len && name_buf[i] != '/'; i++);
+ if (i < (int)name_len) {
+ error = EINVAL;
+ goto out1;
+ }
+
+#if CONFIG_MACF
+ if (op == CREATE) {
+ error = mac_mount_check_snapshot_create(ctx, vnode_mount(*rvpp),
+ name_buf);
+ } else if (op == DELETE) {
+ error = mac_mount_check_snapshot_delete(ctx, vnode_mount(*rvpp),
+ name_buf);
+ }
+ if (error)
+ goto out1;
+#endif
+
+ /* Check if the snapshot already exists ... */
+ NDINIT(ndp, op, pathop, USEDVP | NOCACHE | AUDITVNPATH1,
+ UIO_SYSSPACE, CAST_USER_ADDR_T(name_buf), ctx);
+ ndp->ni_dvp = *sdvpp;
+
+ error = namei(ndp);
+out1:
+ FREE(name_buf, M_TEMP);
+out:
+ if (error) {
+ if (*sdvpp) {
+ vnode_put(*sdvpp);
+ *sdvpp = NULLVP;
+ }
+ if (*rvpp) {
+ vnode_put(*rvpp);
+ *rvpp = NULLVP;
+ }
+ }
+ return (error);
+}
+
+/*
+ * create a filesystem snapshot (for supporting filesystems)
+ *
+ * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL)
+ * We get to the (unnamed) snapshot directory vnode and create the vnode
+ * for the snapshot in it.
+ *
+ * Restrictions:
+ *
+ * a) Passed in name for snapshot cannot have slashes.
+ * b) name can't be "." or ".."
+ *
+ * Since this requires superuser privileges, vnode_authorize calls are not
+ * made.
+ */
+static int
+snapshot_create(int dirfd, user_addr_t name, __unused uint32_t flags,
+ vfs_context_t ctx)
+{
+ vnode_t rvp, snapdvp;
+ int error;
+ struct nameidata namend;
+
+ error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, CREATE,
+ OP_LINK, ctx);
+ if (error)
+ return (error);
+
+ if (namend.ni_vp) {
+ vnode_put(namend.ni_vp);
+ error = EEXIST;
+ } else {
+ struct vnode_attr va;
+ vnode_t vp = NULLVP;
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_type, VREG);
+ VATTR_SET(&va, va_mode, 0);
+
+ error = vn_create(snapdvp, &vp, &namend, &va,
+ VN_CREATE_NOAUTH | VN_CREATE_NOINHERIT, 0, NULL, ctx);
+ if (!error && vp)
+ vnode_put(vp);
+ }
+
+ nameidone(&namend);
+ vnode_put(snapdvp);
+ vnode_put(rvp);
+ return (error);
+}
+
+/*
+ * Delete a Filesystem snapshot
+ *
+ * get the vnode for the unnamed snapshot directory and the snapshot and
+ * delete the snapshot.
+ */
+static int
+snapshot_delete(int dirfd, user_addr_t name, __unused uint32_t flags,
+ vfs_context_t ctx)
+{
+ vnode_t rvp, snapdvp;
+ int error;
+ struct nameidata namend;
+
+ error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, DELETE,
+ OP_UNLINK, ctx);
+ if (error)
+ goto out;
+
+ error = VNOP_REMOVE(snapdvp, namend.ni_vp, &namend.ni_cnd,
+ VNODE_REMOVE_SKIP_NAMESPACE_EVENT, ctx);
+
+ vnode_put(namend.ni_vp);
+ nameidone(&namend);
+ vnode_put(snapdvp);
+ vnode_put(rvp);
+out:
+ return (error);
+}
+
+/*
+ * Revert a filesystem to a snapshot
+ *
+ * Marks the filesystem to revert to the given snapshot on next mount.
+ */
+static int
+snapshot_revert(int dirfd, user_addr_t name, __unused uint32_t flags,
+ vfs_context_t ctx)
+{
+ int error;
+ vnode_t rvp;
+ mount_t mp;
+ struct fs_snapshot_revert_args revert_data;
+ struct componentname cnp;
+ caddr_t name_buf;
+ size_t name_len;
+
+ error = vnode_getfromfd(ctx, dirfd, &rvp);
+ if (error) {
+ return (error);
+ }
+ mp = vnode_mount(rvp);
+
+ MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
+ error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
+ if (error) {
+ FREE(name_buf, M_TEMP);
+ vnode_put(rvp);
+ return (error);
+ }
+
+#if CONFIG_MACF
+ error = mac_mount_check_snapshot_revert(ctx, mp, name_buf);
+ if (error) {
+ FREE(name_buf, M_TEMP);
+ vnode_put(rvp);
+ return (error);
+ }
+#endif
+
+ /*
+ * Grab mount_iterref so that we can release the vnode,
+ * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync.
+ */
+ error = mount_iterref (mp, 0);
+ vnode_put(rvp);
+ if (error) {
+ FREE(name_buf, M_TEMP);
+ return (error);
+ }
+
+ memset(&cnp, 0, sizeof(cnp));
+ cnp.cn_pnbuf = (char *)name_buf;
+ cnp.cn_nameiop = LOOKUP;
+ cnp.cn_flags = ISLASTCN | HASBUF;
+ cnp.cn_pnlen = MAXPATHLEN;
+ cnp.cn_nameptr = cnp.cn_pnbuf;
+ cnp.cn_namelen = (int)name_len;
+ revert_data.sr_cnp = &cnp;
+
+ error = VFS_IOCTL(mp, VFSIOC_REVERT_SNAPSHOT, (caddr_t)&revert_data, 0, ctx);
+ mount_iterdrop(mp);
+ FREE(name_buf, M_TEMP);
+
+ if (error) {
+ /* If there was any error, try again using VNOP_IOCTL */
+
+ vnode_t snapdvp;
+ struct nameidata namend;
+
+ error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, LOOKUP,
+ OP_LOOKUP, ctx);
+ if (error) {
+ return (error);
+ }
+
+
+#ifndef APFSIOC_REVERT_TO_SNAPSHOT
+#define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
+#endif
+
+#ifndef APFS_REVERT_TO_SNAPSHOT
+#define APFS_REVERT_TO_SNAPSHOT IOCBASECMD(APFSIOC_REVERT_TO_SNAPSHOT)
+#endif
+
+ error = VNOP_IOCTL(namend.ni_vp, APFS_REVERT_TO_SNAPSHOT, (caddr_t) NULL,
+ 0, ctx);
+
+ vnode_put(namend.ni_vp);
+ nameidone(&namend);
+ vnode_put(snapdvp);
+ vnode_put(rvp);
+ }
+
+ return (error);
+}
+
+/*
+ * rename a Filesystem snapshot
+ *
+ * get the vnode for the unnamed snapshot directory and the snapshot and
+ * rename the snapshot. This is a very specialised (and simple) case of
+ * rename(2) (which has to deal with a lot more complications). It differs
+ * slightly from rename(2) in that EEXIST is returned if the new name exists.
+ */
+static int
+snapshot_rename(int dirfd, user_addr_t old, user_addr_t new,
+ __unused uint32_t flags, vfs_context_t ctx)
+{
+ vnode_t rvp, snapdvp;
+ int error, i;
+ caddr_t newname_buf;
+ size_t name_len;
+ vnode_t fvp;
+ struct nameidata *fromnd, *tond;
+ /* carving out a chunk for structs that are too big to be on stack. */
+ struct {
+ struct nameidata from_node;
+ struct nameidata to_node;
+ } * __rename_data;
+
+ MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
+ fromnd = &__rename_data->from_node;
+ tond = &__rename_data->to_node;
+
+ error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, old, fromnd, DELETE,
+ OP_UNLINK, ctx);
+ if (error)
+ goto out;
+ fvp = fromnd->ni_vp;
+
+ MALLOC(newname_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
+ error = copyinstr(new, newname_buf, MAXPATHLEN, &name_len);
+ if (error)
+ goto out1;
+
+ /*
+ * Some sanity checks- new name can't be empty, "." or ".." or have
+ * slashes.
+ * (the length returned by copyinstr includes the terminating NUL)
+ *
+ * The FS rename VNOP is suppossed to handle this but we'll pick it
+ * off here itself.
+ */
+ if ((name_len == 1) || (name_len == 2 && newname_buf[0] == '.') ||
+ (name_len == 3 && newname_buf[0] == '.' && newname_buf[1] == '.')) {
+ error = EINVAL;
+ goto out1;
+ }
+ for (i = 0; i < (int)name_len && newname_buf[i] != '/'; i++);
+ if (i < (int)name_len) {
+ error = EINVAL;
+ goto out1;
+ }
+
+#if CONFIG_MACF
+ error = mac_mount_check_snapshot_create(ctx, vnode_mount(rvp),
+ newname_buf);
+ if (error)
+ goto out1;
+#endif
+
+ NDINIT(tond, RENAME, OP_RENAME, USEDVP | NOCACHE | AUDITVNPATH2,
+ UIO_SYSSPACE, CAST_USER_ADDR_T(newname_buf), ctx);
+ tond->ni_dvp = snapdvp;
+
+ error = namei(tond);
+ if (error) {
+ goto out2;
+ } else if (tond->ni_vp) {
+ /*
+ * snapshot rename behaves differently than rename(2) - if the
+ * new name exists, EEXIST is returned.
+ */
+ vnode_put(tond->ni_vp);
+ error = EEXIST;
+ goto out2;
+ }
+
+ error = VNOP_RENAME(snapdvp, fvp, &fromnd->ni_cnd, snapdvp, NULLVP,
+ &tond->ni_cnd, ctx);
+
+out2:
+ nameidone(tond);
+out1:
+ FREE(newname_buf, M_TEMP);
+ vnode_put(fvp);
+ vnode_put(snapdvp);
+ vnode_put(rvp);
+ nameidone(fromnd);
+out:
+ FREE(__rename_data, M_TEMP);
+ return (error);
+}
+
+/*
+ * Mount a Filesystem snapshot
+ *
+ * get the vnode for the unnamed snapshot directory and the snapshot and
+ * mount the snapshot.
+ */
+static int
+snapshot_mount(int dirfd, user_addr_t name, user_addr_t directory,
+ __unused user_addr_t mnt_data, __unused uint32_t flags, vfs_context_t ctx)
+{
+ vnode_t rvp, snapdvp, snapvp, vp, pvp;
+ int error;
+ struct nameidata *snapndp, *dirndp;
+ /* carving out a chunk for structs that are too big to be on stack. */
+ struct {
+ struct nameidata snapnd;
+ struct nameidata dirnd;
+ } * __snapshot_mount_data;
+
+ MALLOC(__snapshot_mount_data, void *, sizeof(*__snapshot_mount_data),
+ M_TEMP, M_WAITOK);
+ snapndp = &__snapshot_mount_data->snapnd;
+ dirndp = &__snapshot_mount_data->dirnd;
+
+ error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, snapndp, LOOKUP,
+ OP_LOOKUP, ctx);
+ if (error)
+ goto out;
+
+ snapvp = snapndp->ni_vp;
+ if (!vnode_mount(rvp) || (vnode_mount(rvp) == dead_mountp)) {
+ error = EIO;
+ goto out1;
+ }
+
+ /* Get the vnode to be covered */
+ NDINIT(dirndp, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
+ UIO_USERSPACE, directory, ctx);
+ error = namei(dirndp);
+ if (error)
+ goto out1;
+
+ vp = dirndp->ni_vp;
+ pvp = dirndp->ni_dvp;
+
+ if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
+ error = EINVAL;
+ } else {
+ mount_t mp = vnode_mount(rvp);
+ struct fs_snapshot_mount_args smnt_data;
+
+ smnt_data.sm_mp = mp;
+ smnt_data.sm_cnp = &snapndp->ni_cnd;
+ error = mount_common(mp->mnt_vfsstat.f_fstypename, pvp, vp,
+ &dirndp->ni_cnd, CAST_USER_ADDR_T(&smnt_data), 0,
+ KERNEL_MOUNT_SNAPSHOT, NULL, FALSE, ctx);
+ }
+
+ vnode_put(vp);
+ vnode_put(pvp);
+ nameidone(dirndp);
+out1:
+ vnode_put(snapvp);
+ vnode_put(snapdvp);
+ vnode_put(rvp);
+ nameidone(snapndp);
+out:
+ FREE(__snapshot_mount_data, M_TEMP);
+ return (error);
+}
+
+/*
+ * Root from a snapshot of the filesystem
+ *
+ * Marks the filesystem to root from the given snapshot on next boot.
+ */
+static int
+snapshot_root(int dirfd, user_addr_t name, __unused uint32_t flags,
+ vfs_context_t ctx)
+{
+ int error;
+ vnode_t rvp;
+ mount_t mp;
+ struct fs_snapshot_root_args root_data;
+ struct componentname cnp;
+ caddr_t name_buf;
+ size_t name_len;
+
+ error = vnode_getfromfd(ctx, dirfd, &rvp);
+ if (error) {
+ return (error);
+ }
+ mp = vnode_mount(rvp);
+
+ MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
+ error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
+ if (error) {
+ FREE(name_buf, M_TEMP);
+ vnode_put(rvp);
+ return (error);
+ }
+
+ // XXX MAC checks ?
+
+ /*
+ * Grab mount_iterref so that we can release the vnode,
+ * since VFSIOC_ROOT_SNAPSHOT could conceivably cause a sync.
+ */
+ error = mount_iterref (mp, 0);
+ vnode_put(rvp);
+ if (error) {
+ FREE(name_buf, M_TEMP);
+ return (error);
+ }
+
+ memset(&cnp, 0, sizeof(cnp));
+ cnp.cn_pnbuf = (char *)name_buf;
+ cnp.cn_nameiop = LOOKUP;
+ cnp.cn_flags = ISLASTCN | HASBUF;
+ cnp.cn_pnlen = MAXPATHLEN;
+ cnp.cn_nameptr = cnp.cn_pnbuf;
+ cnp.cn_namelen = (int)name_len;
+ root_data.sr_cnp = &cnp;
+
+ error = VFS_IOCTL(mp, VFSIOC_ROOT_SNAPSHOT, (caddr_t)&root_data, 0, ctx);
+
+ mount_iterdrop(mp);
+ FREE(name_buf, M_TEMP);
+
+ return (error);
+}
+
+/*
+ * FS snapshot operations dispatcher
+ */
+int
+fs_snapshot(__unused proc_t p, struct fs_snapshot_args *uap,
+ __unused int32_t *retval)
+{
+ int error;
+ vfs_context_t ctx = vfs_context_current();
+
+ AUDIT_ARG(fd, uap->dirfd);
+ AUDIT_ARG(value32, uap->op);
+
+ error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_SNAPSHOT, 0);
+ if (error)
+ return (error);
+
+ switch (uap->op) {
+ case SNAPSHOT_OP_CREATE:
+ error = snapshot_create(uap->dirfd, uap->name1, uap->flags, ctx);
+ break;
+ case SNAPSHOT_OP_DELETE:
+ error = snapshot_delete(uap->dirfd, uap->name1, uap->flags, ctx);
+ break;
+ case SNAPSHOT_OP_RENAME:
+ error = snapshot_rename(uap->dirfd, uap->name1, uap->name2,
+ uap->flags, ctx);
+ break;
+ case SNAPSHOT_OP_MOUNT:
+ error = snapshot_mount(uap->dirfd, uap->name1, uap->name2,
+ uap->data, uap->flags, ctx);
+ break;
+ case SNAPSHOT_OP_REVERT:
+ error = snapshot_revert(uap->dirfd, uap->name1, uap->flags, ctx);
+ break;
+ case SNAPSHOT_OP_ROOT:
+ error = snapshot_root(uap->dirfd, uap->name1, uap->flags, ctx);
+ break;
+ default:
+ error = ENOSYS;
+ }
+
+ return (error);
+}