/*
- * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* External virtual filesystem routines
*/
-
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc_internal.h>
#include <string.h>
#include <machine/spl.h>
-
+#include <machine/machine_routines.h>
#include <kern/assert.h>
#include <mach/kern_return.h>
void
vnode_iterate_setup(mount_t mp)
{
- while (mp->mnt_lflag & MNT_LITER) {
- mp->mnt_lflag |= MNT_LITERWAIT;
- msleep((caddr_t)mp, &mp->mnt_mlock, PVFS, "vnode_iterate_setup", NULL);
- }
-
mp->mnt_lflag |= MNT_LITER;
-
}
int
vnode_iterate_clear(mount_t mp)
{
mp->mnt_lflag &= ~MNT_LITER;
- if (mp->mnt_lflag & MNT_LITERWAIT) {
- mp->mnt_lflag &= ~MNT_LITERWAIT;
- wakeup(mp);
- }
}
int vid, retval;
int ret = 0;
+ /*
+ * The mount iterate mutex is held for the duration of the iteration.
+ * This can be done by a state flag on the mount structure but we can
+ * run into priority inversion issues sometimes.
+ * Using a mutex allows us to benefit from the priority donation
+ * mechanisms in the kernel for locks. This mutex should never be
+ * acquired in spin mode and it should be acquired before attempting to
+ * acquire the mount lock.
+ */
+ mount_iterate_lock(mp);
+
mount_lock(mp);
vnode_iterate_setup(mp);
- /* it is returns 0 then there is nothing to do */
+ /* If it returns 0 then there is nothing to do */
retval = vnode_iterate_prepare(mp);
if (retval == 0) {
vnode_iterate_clear(mp);
mount_unlock(mp);
+ mount_iterate_unlock(mp);
return(ret);
}
(void)vnode_iterate_reloadq(mp);
vnode_iterate_clear(mp);
mount_unlock(mp);
+ mount_iterate_unlock(mp);
return (ret);
}
lck_mtx_unlock(&mp->mnt_renamelock);
}
+void
+mount_iterate_lock(mount_t mp)
+{
+ lck_mtx_lock(&mp->mnt_iter_lock);
+}
+
+void
+mount_iterate_unlock(mount_t mp)
+{
+ lck_mtx_unlock(&mp->mnt_iter_lock);
+}
+
void
mount_lock(mount_t mp)
{
bdevvp_rootvp = rootvp;
for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
- if (vfsp->vfc_mountroot == NULL)
+ if (vfsp->vfc_mountroot == NULL
+ && !ISSET(vfsp->vfc_vfsflags, VFC_VFSCANMOUNTROOT)) {
continue;
+ }
mp = vfs_rootmountalloc_internal(vfsp, "root_device");
mp->mnt_devvp = rootvp;
- if ((error = (*vfsp->vfc_mountroot)(mp, rootvp, ctx)) == 0) {
+ if (vfsp->vfc_mountroot)
+ error = (*vfsp->vfc_mountroot)(mp, rootvp, ctx);
+ else
+ error = VFS_MOUNT(mp, rootvp, 0, ctx);
+
+ if (!error) {
if ( bdevvp_rootvp != rootvp ) {
/*
* rootvp changed...
*/
vfs_init_io_attributes(rootvp, mp);
- if ((mp->mnt_ioflags & MNT_IOFLAGS_FUSION_DRIVE) &&
- (mp->mnt_ioflags & MNT_IOFLAGS_IOSCHED_SUPPORTED)) {
- /*
- * only for CF
- */
+ if (mp->mnt_ioflags & MNT_IOFLAGS_FUSION_DRIVE) {
root_is_CF_drive = TRUE;
}
+
/*
* Shadow the VFC_VFSNATIVEXATTR flag to MNTK_EXTENDED_ATTRS.
*/
(vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
}
+
+ if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS) &&
+ (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS)) {
+ mp->mnt_kern_flag |= MNTK_DIR_HARDLINKS;
+ }
}
/*
fsid_t tfsid;
int mtype;
- mount_t nmp;
mount_list_lock();
tfsid.val[0] = makedev(nblkdev + mtype, mntid_gen);
tfsid.val[1] = mtype;
- TAILQ_FOREACH(nmp, &mountlist, mnt_list) {
- while (vfs_getvfs_locked(&tfsid)) {
- if (++mntid_gen == 0)
- mntid_gen++;
- tfsid.val[0] = makedev(nblkdev + mtype, mntid_gen);
- }
+ while (vfs_getvfs_locked(&tfsid)) {
+ if (++mntid_gen == 0)
+ mntid_gen++;
+ tfsid.val[0] = makedev(nblkdev + mtype, mntid_gen);
}
+
mp->mnt_vfsstat.f_fsid.val[0] = tfsid.val[0];
mp->mnt_vfsstat.f_fsid.val[1] = tfsid.val[1];
mount_list_unlock();
int retval;
unsigned int vid;
+ /*
+ * See comments in vnode_iterate() for the rationale for this lock
+ */
+ mount_iterate_lock(mp);
+
mount_lock(mp);
vnode_iterate_setup(mp);
/*
if (vnode_umount_preflight(mp, skipvp, flags)) {
vnode_iterate_clear(mp);
mount_unlock(mp);
+ mount_iterate_unlock(mp);
return(EBUSY);
}
}
loop:
- /* it is returns 0 then there is nothing to do */
+ /* If it returns 0 then there is nothing to do */
retval = vnode_iterate_prepare(mp);
if (retval == 0) {
vnode_iterate_clear(mp);
mount_unlock(mp);
+ mount_iterate_unlock(mp);
return(retval);
}
}
vnode_iterate_clear(mp);
mount_unlock(mp);
+ mount_iterate_unlock(mp);
if (busy && ((flags & FORCECLOSE)==0))
return (EBUSY);
int count;
int vid;
+ if (!vnode_isspec(vp)) {
+ return (vp->v_usecount - vp->v_kusecount);
+ }
+
loop:
if (!vnode_isaliased(vp))
return (vp->v_specinfo->si_opencount);
}
-__private_extern__ int
-is_package_name(const char *name, int len)
+int is_package_name(const char *name, int len)
{
int i, extlen;
const char *ptr, *name_ext;
case VFS_CTL_DISC:
case VFS_CTL_SERVERINFO:
return 1;
- break;
default:
break;
case AFPFS_VFS_CTL_NETCHANGE:
case AFPFS_VFS_CTL_VOLCHANGE:
return 1;
- break;
}
}
off_t readsegsize = 0;
off_t writesegsize = 0;
off_t alignment = 0;
+ u_int32_t minsaturationbytecount = 0;
u_int32_t ioqueue_depth = 0;
u_int32_t blksize;
u_int64_t temp;
if (features & DK_FEATURE_FORCE_UNIT_ACCESS)
mp->mnt_ioflags |= MNT_IOFLAGS_FUA_SUPPORTED;
+
+ if (VNOP_IOCTL(devvp, DKIOCGETIOMINSATURATIONBYTECOUNT, (caddr_t)&minsaturationbytecount, 0, ctx) == 0) {
+ mp->mnt_minsaturationbytecount = minsaturationbytecount;
+ } else {
+ mp->mnt_minsaturationbytecount = 0;
+ }
if (VNOP_IOCTL(devvp, DKIOCCORESTORAGE, (caddr_t)&cs_info, 0, ctx) == 0)
cs_present = TRUE;
sfs.f_ffree = (user64_long_t)sp->f_ffree;
sfs.f_fsid = sp->f_fsid;
sfs.f_owner = sp->f_owner;
-
+#ifdef NFSCLIENT
if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
- } else {
+ } else
+#endif
+ {
strlcpy(sfs.f_fstypename, sp->f_fstypename, MFSNAMELEN);
}
strlcpy(sfs.f_mntonname, sp->f_mntonname, MNAMELEN);
sfs.f_ffree = (user32_long_t)sp->f_ffree;
sfs.f_fsid = sp->f_fsid;
sfs.f_owner = sp->f_owner;
-
+
+#ifdef NFS_CLIENT
if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
- } else {
+ } else
+#endif
+ {
strlcpy(sfs.f_fstypename, sp->f_fstypename, MFSNAMELEN);
}
strlcpy(sfs.f_mntonname, sp->f_mntonname, MNAMELEN);
static int filt_fsattach(struct knote *kn);
static void filt_fsdetach(struct knote *kn);
static int filt_fsevent(struct knote *kn, long hint);
+static int filt_fstouch(struct knote *kn, struct kevent_internal_s *kev);
+static int filt_fsprocess(struct knote *kn, struct filt_process_s *data, struct kevent_internal_s *kev);
struct filterops fs_filtops = {
.f_attach = filt_fsattach,
.f_detach = filt_fsdetach,
.f_event = filt_fsevent,
+ .f_touch = filt_fstouch,
+ .f_process = filt_fsprocess,
};
static int
filt_fsattach(struct knote *kn)
{
-
lck_mtx_lock(fs_klist_lock);
- kn->kn_flags |= EV_CLEAR;
KNOTE_ATTACH(&fs_klist, kn);
lck_mtx_unlock(fs_klist_lock);
+
+ /*
+ * filter only sees future events,
+ * so it can't be fired already.
+ */
return (0);
}
return (kn->kn_fflags != 0);
}
+static int
+filt_fstouch(struct knote *kn, struct kevent_internal_s *kev)
+{
+ int res;
+
+ lck_mtx_lock(fs_klist_lock);
+
+ kn->kn_sfflags = kev->fflags;
+ if ((kn->kn_status & KN_UDATA_SPECIFIC) == 0)
+ kn->kn_udata = kev->udata;
+
+ /*
+ * the above filter function sets bits even if nobody is looking for them.
+ * Just preserve those bits even in the new mask is more selective
+ * than before.
+ *
+ * For compatibility with previous implementations, we leave kn_fflags
+ * as they were before.
+ */
+ //if (kn->kn_sfflags)
+ // kn->kn_fflags &= kn->kn_sfflags;
+ res = (kn->kn_fflags != 0);
+
+ lck_mtx_unlock(fs_klist_lock);
+
+ return res;
+}
+
+static int
+filt_fsprocess(struct knote *kn, struct filt_process_s *data, struct kevent_internal_s *kev)
+{
+#pragma unused(data)
+ int res;
+
+ lck_mtx_lock(fs_klist_lock);
+ res = (kn->kn_fflags != 0);
+ if (res) {
+ *kev = kn->kn_kevent;
+ kn->kn_flags |= EV_CLEAR; /* automatic */
+ kn->kn_fflags = 0;
+ kn->kn_data = 0;
+ }
+ lck_mtx_unlock(fs_klist_lock);
+ return res;
+}
+
static int
sysctl_vfs_noremotehang(__unused struct sysctl_oid *oidp,
__unused void *arg1, __unused int arg2, struct sysctl_req *req)
panic("new_vnode(%p): free vnode still referenced", vp);
if ((vp->v_mntvnodes.tqe_prev != 0) && (vp->v_mntvnodes.tqe_next != 0))
panic("new_vnode(%p): vnode seems to be on mount list", vp);
- if ( !LIST_EMPTY(&vp->v_nclinks) || !LIST_EMPTY(&vp->v_ncchildren))
+ if ( !LIST_EMPTY(&vp->v_nclinks) || !TAILQ_EMPTY(&vp->v_ncchildren))
panic("new_vnode(%p): vnode still hooked into the name cache", vp);
} else {
vnode_unlock(vp);
return (vp);
}
-
-
+__attribute__((noreturn))
static void
async_work_continue(void)
{
VLISTNONE(vp); /* avoid double queue removal */
lck_mtx_init(&vp->v_lock, vnode_lck_grp, vnode_lck_attr);
+ TAILQ_INIT(&vp->v_ncchildren);
+
klist_init(&vp->v_knotes);
nanouptime(&ts);
vp->v_id = ts.tv_nsec;
return(vnode_isinuse_locked(vp, refcnt, 0));
}
+int vnode_usecount(vnode_t vp)
+{
+ return vp->v_usecount;
+}
+
+int vnode_iocount(vnode_t vp)
+{
+ return vp->v_iocount;
+}
static int
vnode_isinuse_locked(vnode_t vp, int refcnt, int locked)
*/
vp->v_flag |= VRAGE;
}
+
+#if CONFIG_SECLUDED_MEMORY
+ switch (secluded_for_filecache) {
+ case 0:
+ /*
+ * secluded_for_filecache == 0:
+ * + no file contents in secluded pool
+ */
+ break;
+ case 1:
+ /*
+ * secluded_for_filecache == 1:
+ * + no files from /
+ * + files from /Applications/ are OK
+ * + files from /Applications/Camera are not OK
+ * + no files that are open for write
+ */
+ if (vnode_vtype(vp) == VREG &&
+ vnode_mount(vp) != NULL &&
+ (! (vfs_flags(vnode_mount(vp)) & MNT_ROOTFS))) {
+ /* not from root filesystem: eligible for secluded pages */
+ memory_object_mark_eligible_for_secluded(
+ ubc_getobject(vp, UBC_FLAGS_NONE),
+ TRUE);
+ }
+ break;
+ case 2:
+ /*
+ * secluded_for_filecache == 2:
+ * + all read-only files OK, except:
+ * + dyld_shared_cache_arm64*
+ * + Camera
+ * + mediaserverd
+ */
+ if (vnode_vtype(vp) == VREG) {
+ memory_object_mark_eligible_for_secluded(
+ ubc_getobject(vp, UBC_FLAGS_NONE),
+ TRUE);
+ }
+ break;
+ default:
+ break;
+ }
+#endif /* CONFIG_SECLUDED_MEMORY */
+
return (0);
error_out:
VFSATTR_WANTED(&va, f_ffree);
VFSATTR_WANTED(&va, f_bsize);
VFSATTR_WANTED(&va, f_fssubtype);
+
+ if ((error = vfs_getattr(mp, &va, ctx)) != 0) {
+ KAUTH_DEBUG("STAT - filesystem returned error %d", error);
+ return(error);
+ }
#if CONFIG_MACF
if (eventtype == VFS_USER_EVENT) {
error = mac_mount_check_getattr(ctx, mp, &va);
return (error);
}
#endif
-
- if ((error = vfs_getattr(mp, &va, ctx)) != 0) {
- KAUTH_DEBUG("STAT - filesystem returned error %d", error);
- return(error);
- }
-
/*
* Unpack into the per-mount structure.
*
boolean_t batched;
struct componentname *cnp;
uint32_t defaulted;
- uint32_t dfflags; // Directory file flags
cnp = &ndp->ni_cnd;
error = 0;
panic("Mode for open, but not trying to open...");
}
- /*
- * Handle inheritance of restricted flag
- */
- error = vnode_flags(dvp, &dfflags, ctx);
- if (error)
- return error;
- if (dfflags & SF_RESTRICTED)
- VATTR_SET(vap, va_flags, SF_RESTRICTED);
/*
* Create the requested node.
static kauth_scope_t vnode_scope;
static int vnode_authorize_callback(kauth_cred_t credential, void *idata, kauth_action_t action,
uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3);
-static int vnode_authorize_callback_int(__unused kauth_cred_t credential, __unused void *idata, kauth_action_t action,
- uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3);
+static int vnode_authorize_callback_int(kauth_action_t action, vfs_context_t ctx,
+ vnode_t vp, vnode_t dvp, int *errorp);
typedef struct _vnode_authorize_context {
vnode_t vp;
#define _VAC_IN_GROUP (1<<1)
#define _VAC_IS_DIR_OWNER (1<<2)
#define _VAC_IN_DIR_GROUP (1<<3)
+#define _VAC_NO_VNODE_POINTERS (1<<4)
} *vauth_ctx;
void
* However, some file systems may have limited support.
*/
if ((vp->v_type == VDIR) &&
- !(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
+ !(vp->v_mount->mnt_kern_flag & MNTK_DIR_HARDLINKS)) {
return (EPERM); /* POSIX */
}
return (vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx));
}
-int
+int
vn_authorize_rename(struct vnode *fdvp, struct vnode *fvp, struct componentname *fcnp,
- struct vnode *tdvp, struct vnode *tvp, struct componentname *tcnp,
- vfs_context_t ctx, void *reserved)
+ struct vnode *tdvp, struct vnode *tvp, struct componentname *tcnp,
+ vfs_context_t ctx, void *reserved)
+{
+ return vn_authorize_renamex(fdvp, fvp, fcnp, tdvp, tvp, tcnp, ctx, 0, reserved);
+}
+
+int
+vn_authorize_renamex(struct vnode *fdvp, struct vnode *fvp, struct componentname *fcnp,
+ struct vnode *tdvp, struct vnode *tvp, struct componentname *tcnp,
+ vfs_context_t ctx, vfs_rename_flags_t flags, void *reserved)
{
int error = 0;
int moving = 0;
+ bool swap = flags & VFS_RENAME_SWAP;
if (reserved != NULL) {
panic("Passed something other than NULL as reserved field!");
error = mac_vnode_check_rename(ctx, fdvp, fvp, fcnp, tdvp, tvp, tcnp);
if (error)
goto out;
+ if (swap) {
+ error = mac_vnode_check_rename(ctx, tdvp, tvp, tcnp, fdvp, fvp, fcnp);
+ if (error)
+ goto out;
+ }
#endif
/***** </MACF> *****/
/***** <MiscChecks> *****/
if (tvp != NULL) {
- if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
- error = ENOTDIR;
- goto out;
- } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
- error = EISDIR;
- goto out;
+ if (!swap) {
+ if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
+ error = ENOTDIR;
+ goto out;
+ } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
+ error = EISDIR;
+ goto out;
+ }
}
+ } else if (swap) {
+ /*
+ * Caller should have already checked this and returned
+ * ENOENT. If we send back ENOENT here, caller will retry
+ * which isn't what we want so we send back EINVAL here
+ * instead.
+ */
+ error = EINVAL;
+ goto out;
}
if (fvp == tdvp) {
error = EINVAL;
goto out;
}
+
+ if (swap && fdvp->v_parent == tvp) {
+ error = EINVAL;
+ goto out;
+ }
/***** </MiscChecks> *****/
/***** <Kauth> *****/
- error = 0;
- if ((tvp != NULL) && vnode_isdir(tvp)) {
- if (tvp != fdvp)
- moving = 1;
- } else if (tdvp != fdvp) {
- moving = 1;
- }
-
+ if (swap) {
+ kauth_action_t f = 0, t = 0;
- /*
- * must have delete rights to remove the old name even in
- * the simple case of fdvp == tdvp.
- *
- * If fvp is a directory, and we are changing it's parent,
- * then we also need rights to rewrite its ".." entry as well.
- */
- if (vnode_isdir(fvp)) {
- if ((error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE | KAUTH_VNODE_ADD_SUBDIRECTORY, ctx)) != 0)
- goto out;
- } else {
- if ((error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE, ctx)) != 0)
+ /*
+ * Directories changing parents need ...ADD_SUBDIR... to
+ * permit changing ".."
+ */
+ if (fdvp != tdvp) {
+ if (vnode_isdir(fvp))
+ f = KAUTH_VNODE_ADD_SUBDIRECTORY;
+ if (vnode_isdir(tvp))
+ t = KAUTH_VNODE_ADD_SUBDIRECTORY;
+ }
+ error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE | f, ctx);
+ if (error)
goto out;
- }
- if (moving) {
- /* moving into tdvp or tvp, must have rights to add */
- if ((error = vnode_authorize(((tvp != NULL) && vnode_isdir(tvp)) ? tvp : tdvp,
- NULL,
- vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE,
- ctx)) != 0) {
+ error = vnode_authorize(tvp, tdvp, KAUTH_VNODE_DELETE | t, ctx);
+ if (error)
goto out;
+ f = vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE;
+ t = vnode_isdir(tvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE;
+ if (fdvp == tdvp)
+ error = vnode_authorize(fdvp, NULL, f | t, ctx);
+ else {
+ error = vnode_authorize(fdvp, NULL, t, ctx);
+ if (error)
+ goto out;
+ error = vnode_authorize(tdvp, NULL, f, ctx);
}
+ if (error)
+ goto out;
} else {
- /* node staying in same directory, must be allowed to add new name */
- if ((error = vnode_authorize(fdvp, NULL,
- vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE, ctx)) != 0)
+ error = 0;
+ if ((tvp != NULL) && vnode_isdir(tvp)) {
+ if (tvp != fdvp)
+ moving = 1;
+ } else if (tdvp != fdvp) {
+ moving = 1;
+ }
+
+ /*
+ * must have delete rights to remove the old name even in
+ * the simple case of fdvp == tdvp.
+ *
+ * If fvp is a directory, and we are changing it's parent,
+ * then we also need rights to rewrite its ".." entry as well.
+ */
+ if (vnode_isdir(fvp)) {
+ if ((error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE | KAUTH_VNODE_ADD_SUBDIRECTORY, ctx)) != 0)
+ goto out;
+ } else {
+ if ((error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE, ctx)) != 0)
+ goto out;
+ }
+ if (moving) {
+ /* moving into tdvp or tvp, must have rights to add */
+ if ((error = vnode_authorize(((tvp != NULL) && vnode_isdir(tvp)) ? tvp : tdvp,
+ NULL,
+ vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE,
+ ctx)) != 0) {
+ goto out;
+ }
+ } else {
+ /* node staying in same directory, must be allowed to add new name */
+ if ((error = vnode_authorize(fdvp, NULL,
+ vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE, ctx)) != 0)
+ goto out;
+ }
+ /* overwriting tvp */
+ if ((tvp != NULL) && !vnode_isdir(tvp) &&
+ ((error = vnode_authorize(tvp, tdvp, KAUTH_VNODE_DELETE, ctx)) != 0)) {
goto out;
- }
- /* overwriting tvp */
- if ((tvp != NULL) && !vnode_isdir(tvp) &&
- ((error = vnode_authorize(tvp, tdvp, KAUTH_VNODE_DELETE, ctx)) != 0)) {
- goto out;
+ }
}
/***** </Kauth> *****/
return vnode_authorize(vp, dvp, KAUTH_VNODE_DELETE, ctx);
}
+/*
+ * Authorizer for directory cloning. This does not use vnodes but instead
+ * uses prefilled vnode attributes from the filesystem.
+ *
+ * The same function is called to set up the attributes required, perform the
+ * authorization and cleanup (if required)
+ */
+int
+vnode_attr_authorize_dir_clone(struct vnode_attr *vap, kauth_action_t action,
+ struct vnode_attr *dvap, __unused vnode_t sdvp, mount_t mp,
+ dir_clone_authorizer_op_t vattr_op, vfs_context_t ctx,
+ __unused void *reserved)
+{
+ int error;
+ int is_suser = vfs_context_issuser(ctx);
+
+ if (vattr_op == OP_VATTR_SETUP) {
+ VATTR_INIT(vap);
+
+ /*
+ * When ACL inheritence is implemented, both vap->va_acl and
+ * dvap->va_acl will be required (even as superuser).
+ */
+ VATTR_WANTED(vap, va_type);
+ VATTR_WANTED(vap, va_mode);
+ VATTR_WANTED(vap, va_flags);
+ VATTR_WANTED(vap, va_uid);
+ VATTR_WANTED(vap, va_gid);
+ if (dvap) {
+ VATTR_INIT(dvap);
+ VATTR_WANTED(dvap, va_flags);
+ }
+
+ if (!is_suser) {
+ /*
+ * If not superuser, we have to evaluate ACLs and
+ * need the target directory gid to set the initial
+ * gid of the new object.
+ */
+ VATTR_WANTED(vap, va_acl);
+ if (dvap)
+ VATTR_WANTED(dvap, va_gid);
+ }
+
+ return (0);
+ } else if (vattr_op == OP_VATTR_CLEANUP) {
+ return (0); /* Nothing to do for now */
+ }
+
+ /* dvap isn't used for authorization */
+ error = vnode_attr_authorize(vap, NULL, mp, action, ctx);
+
+ if (error)
+ return (error);
+
+ /*
+ * vn_attribute_prepare should be able to accept attributes as well as
+ * vnodes but for now we do this inline.
+ */
+ if (!is_suser) {
+ /*
+ * If the filesystem is mounted IGNORE_OWNERSHIP and an explicit
+ * owner is set, that owner takes ownership of all new files.
+ */
+ if ((mp->mnt_flag & MNT_IGNORE_OWNERSHIP) &&
+ (mp->mnt_fsowner != KAUTH_UID_NONE)) {
+ VATTR_SET(vap, va_uid, mp->mnt_fsowner);
+ } else {
+ /* default owner is current user */
+ VATTR_SET(vap, va_uid,
+ kauth_cred_getuid(vfs_context_ucred(ctx)));
+ }
+
+ if ((mp->mnt_flag & MNT_IGNORE_OWNERSHIP) &&
+ (mp->mnt_fsgroup != KAUTH_GID_NONE)) {
+ VATTR_SET(vap, va_gid, mp->mnt_fsgroup);
+ } else {
+ /*
+ * default group comes from parent object,
+ * fallback to current user
+ */
+ if (VATTR_IS_SUPPORTED(dvap, va_gid)) {
+ VATTR_SET(vap, va_gid, dvap->va_gid);
+ } else {
+ VATTR_SET(vap, va_gid,
+ kauth_cred_getgid(vfs_context_ucred(ctx)));
+ }
+ }
+ }
+
+ /* Inherit SF_RESTRICTED bit from destination directory only */
+ if (VATTR_IS_ACTIVE(vap, va_flags)) {
+ VATTR_SET(vap, va_flags,
+ ((vap->va_flags & ~SF_RESTRICTED))); /* Turn off from source */
+ if (VATTR_IS_ACTIVE(dvap, va_flags))
+ VATTR_SET(vap, va_flags,
+ vap->va_flags | (dvap->va_flags & SF_RESTRICTED));
+ } else if (VATTR_IS_ACTIVE(dvap, va_flags)) {
+ VATTR_SET(vap, va_flags, (dvap->va_flags & SF_RESTRICTED));
+ }
+
+ return (0);
+}
+
+
/*
* Authorize an operation on a vnode.
*
* - Neither the node nor the directory are immutable.
* - The user is not the superuser.
*
- * Deletion is not permitted if the directory is sticky and the caller is
- * not owner of the node or directory.
+ * The precedence of factors for authorizing or denying delete for a credential
*
- * If either the node grants DELETE, or the directory grants DELETE_CHILD,
- * the node may be deleted. If neither denies the permission, and the
- * caller has Posix write access to the directory, then the node may be
- * deleted.
+ * 1) Explicit ACE on the node. (allow or deny DELETE)
+ * 2) Explicit ACE on the directory (allow or deny DELETE_CHILD).
+ *
+ * If there are conflicting ACEs on the node and the directory, the node
+ * ACE wins.
+ *
+ * 3) Sticky bit on the directory.
+ * Deletion is not permitted if the directory is sticky and the caller is
+ * not owner of the node or directory. The sticky bit rules are like a deny
+ * delete ACE except lower in priority than ACL's either allowing or denying
+ * delete.
+ *
+ * 4) POSIX permisions on the directory.
*
* As an optimization, we cache whether or not delete child is permitted
- * on directories without the sticky bit set.
+ * on directories. This enables us to skip directory ACL and POSIX checks
+ * as we already have the result from those checks. However, we always check the
+ * node ACL and, if the directory has the sticky bit set, we always check its
+ * ACL (even for a directory with an authorized delete child). Furthermore,
+ * caching the delete child authorization is independent of the sticky bit
+ * being set as it is only applicable in determining whether the node can be
+ * deleted or not.
*/
-int
-vnode_authorize_delete(vauth_ctx vcp, boolean_t cached_delete_child);
-/*static*/ int
+static int
vnode_authorize_delete(vauth_ctx vcp, boolean_t cached_delete_child)
{
struct vnode_attr *vap = vcp->vap;
struct vnode_attr *dvap = vcp->dvap;
kauth_cred_t cred = vcp->ctx->vc_ucred;
struct kauth_acl_eval eval;
- int error, delete_denied, delete_child_denied, ismember;
+ int error, ismember;
- /* check the ACL on the directory */
- delete_child_denied = 0;
- if (!cached_delete_child && VATTR_IS_NOT(dvap, va_acl, NULL)) {
- eval.ae_requested = KAUTH_VNODE_DELETE_CHILD;
- eval.ae_acl = &dvap->va_acl->acl_ace[0];
- eval.ae_count = dvap->va_acl->acl_entrycount;
+ /* Check the ACL on the node first */
+ if (VATTR_IS_NOT(vap, va_acl, NULL)) {
+ eval.ae_requested = KAUTH_VNODE_DELETE;
+ eval.ae_acl = &vap->va_acl->acl_ace[0];
+ eval.ae_count = vap->va_acl->acl_entrycount;
eval.ae_options = 0;
- if (vauth_dir_owner(vcp))
+ if (vauth_file_owner(vcp))
eval.ae_options |= KAUTH_AEVAL_IS_OWNER;
/*
* We use ENOENT as a marker to indicate we could not get
* have the ACL evaluation answer. Previously, we would
* always deny the operation at this point.
*/
- if ((error = vauth_dir_ingroup(vcp, &ismember, ENOENT)) != 0 && error != ENOENT)
- return(error);
+ if ((error = vauth_file_ingroup(vcp, &ismember, ENOENT)) != 0 && error != ENOENT)
+ return (error);
if (error == ENOENT)
eval.ae_options |= KAUTH_AEVAL_IN_GROUP_UNKNOWN;
else if (ismember)
eval.ae_exp_gwrite = KAUTH_VNODE_GENERIC_WRITE_BITS;
eval.ae_exp_gexec = KAUTH_VNODE_GENERIC_EXECUTE_BITS;
- /*
- * If there is no entry, we are going to defer to other
- * authorization mechanisms.
- */
- error = kauth_acl_evaluate(cred, &eval);
-
- if (error != 0) {
+ if ((error = kauth_acl_evaluate(cred, &eval)) != 0) {
KAUTH_DEBUG("%p ERROR during ACL processing - %d", vcp->vp, error);
- return(error);
+ return (error);
}
+
switch(eval.ae_result) {
case KAUTH_RESULT_DENY:
- delete_child_denied = 1;
- break;
- /* FALLSTHROUGH */
- case KAUTH_RESULT_ALLOW:
- KAUTH_DEBUG("%p ALLOWED - granted by directory ACL", vcp->vp);
- return(0);
+ KAUTH_DEBUG("%p DENIED - denied by ACL", vcp->vp);
+ return (EACCES);
+ case KAUTH_RESULT_ALLOW:
+ KAUTH_DEBUG("%p ALLOWED - granted by ACL", vcp->vp);
+ return (0);
case KAUTH_RESULT_DEFER:
default:
- /* Effectively the same as !delete_child_denied */
- KAUTH_DEBUG("%p DEFERRED - directory ACL", vcp->vp);
+ /* Defer to directory */
+ KAUTH_DEBUG("%p DEFERRED - by file ACL", vcp->vp);
break;
}
}
- /* check the ACL on the node */
- delete_denied = 0;
- if (VATTR_IS_NOT(vap, va_acl, NULL)) {
- eval.ae_requested = KAUTH_VNODE_DELETE;
- eval.ae_acl = &vap->va_acl->acl_ace[0];
- eval.ae_count = vap->va_acl->acl_entrycount;
+ /*
+ * Without a sticky bit, a previously authorized delete child is
+ * sufficient to authorize this delete.
+ *
+ * If the sticky bit is set, a directory ACL which allows delete child
+ * overrides a (potential) sticky bit deny. The authorized delete child
+ * cannot tell us if it was authorized because of an explicit delete
+ * child allow ACE or because of POSIX permisions so we have to check
+ * the directory ACL everytime if the directory has a sticky bit.
+ */
+ if (!(dvap->va_mode & S_ISTXT) && cached_delete_child) {
+ KAUTH_DEBUG("%p ALLOWED - granted by directory ACL or POSIX permissions and no sticky bit on directory", vcp->vp);
+ return (0);
+ }
+
+ /* check the ACL on the directory */
+ if (VATTR_IS_NOT(dvap, va_acl, NULL)) {
+ eval.ae_requested = KAUTH_VNODE_DELETE_CHILD;
+ eval.ae_acl = &dvap->va_acl->acl_ace[0];
+ eval.ae_count = dvap->va_acl->acl_entrycount;
eval.ae_options = 0;
- if (vauth_file_owner(vcp))
+ if (vauth_dir_owner(vcp))
eval.ae_options |= KAUTH_AEVAL_IS_OWNER;
/*
* We use ENOENT as a marker to indicate we could not get
* have the ACL evaluation answer. Previously, we would
* always deny the operation at this point.
*/
- if ((error = vauth_file_ingroup(vcp, &ismember, ENOENT)) != 0 && error != ENOENT)
+ if ((error = vauth_dir_ingroup(vcp, &ismember, ENOENT)) != 0 && error != ENOENT)
return(error);
if (error == ENOENT)
eval.ae_options |= KAUTH_AEVAL_IN_GROUP_UNKNOWN;
eval.ae_exp_gwrite = KAUTH_VNODE_GENERIC_WRITE_BITS;
eval.ae_exp_gexec = KAUTH_VNODE_GENERIC_EXECUTE_BITS;
- if ((error = kauth_acl_evaluate(cred, &eval)) != 0) {
+ /*
+ * If there is no entry, we are going to defer to other
+ * authorization mechanisms.
+ */
+ error = kauth_acl_evaluate(cred, &eval);
+
+ if (error != 0) {
KAUTH_DEBUG("%p ERROR during ACL processing - %d", vcp->vp, error);
- return(error);
+ return (error);
}
-
switch(eval.ae_result) {
case KAUTH_RESULT_DENY:
- delete_denied = 1;
- break;
+ KAUTH_DEBUG("%p DENIED - denied by directory ACL", vcp->vp);
+ return (EACCES);
case KAUTH_RESULT_ALLOW:
- KAUTH_DEBUG("%p ALLOWED - granted by file ACL", vcp->vp);
- return(0);
+ KAUTH_DEBUG("%p ALLOWED - granted by directory ACL", vcp->vp);
+ if (!cached_delete_child && vcp->dvp) {
+ vnode_cache_authorized_action(vcp->dvp,
+ vcp->ctx, KAUTH_VNODE_DELETE_CHILD);
+ }
+ return (0);
case KAUTH_RESULT_DEFER:
default:
- /* Effectively the same as !delete_child_denied */
- KAUTH_DEBUG("%p DEFERRED%s - by file ACL", vcp->vp, delete_denied ? "(DENY)" : "");
+ /* Deferred by directory ACL */
+ KAUTH_DEBUG("%p DEFERRED - directory ACL", vcp->vp);
break;
}
}
- /* if denied by ACL on directory or node, return denial */
- if (delete_denied || delete_child_denied) {
- KAUTH_DEBUG("%p DENIED - denied by ACL", vcp->vp);
- return(EACCES);
+ /*
+ * From this point, we can't explicitly allow and if we reach the end
+ * of the function without a denial, then the delete is authorized.
+ */
+ if (!cached_delete_child) {
+ if (vnode_authorize_posix(vcp, VWRITE, 1 /* on_dir */) != 0) {
+ KAUTH_DEBUG("%p DENIED - denied by posix permisssions", vcp->vp);
+ return (EACCES);
+ }
+ /*
+ * Cache the authorized action on the vnode if allowed by the
+ * directory ACL or POSIX permissions. It is correct to cache
+ * this action even if sticky bit would deny deleting the node.
+ */
+ if (vcp->dvp) {
+ vnode_cache_authorized_action(vcp->dvp, vcp->ctx,
+ KAUTH_VNODE_DELETE_CHILD);
+ }
}
/* enforce sticky bit behaviour */
if ((dvap->va_mode & S_ISTXT) && !vauth_file_owner(vcp) && !vauth_dir_owner(vcp)) {
KAUTH_DEBUG("%p DENIED - sticky bit rules (user %d file %d dir %d)",
vcp->vp, cred->cr_posix.cr_uid, vap->va_uid, dvap->va_uid);
- return(EACCES);
- }
-
- /* check the directory */
- if (!cached_delete_child && (error = vnode_authorize_posix(vcp, VWRITE, 1 /* on_dir */)) != 0) {
- KAUTH_DEBUG("%p DENIED - denied by posix permisssions", vcp->vp);
- return(error);
+ return (EACCES);
}
/* not denied, must be OK */
- return(0);
+ return (0);
}
* Check for file immutability.
*/
static int
-vnode_authorize_checkimmutable(vnode_t vp, struct vnode_attr *vap, int rights, int ignore)
+vnode_authorize_checkimmutable(mount_t mp, struct vnode_attr *vap, int rights, int ignore)
{
- mount_t mp;
int error;
int append;
*
* Sockets, fifos and devices require special handling.
*/
- switch(vp->v_type) {
+ switch(vap->va_type) {
case VSOCK:
case VFIFO:
case VBLK:
if (rights & KAUTH_VNODE_WRITE_RIGHTS) {
/* check per-filesystem options if possible */
- mp = vp->v_mount;
if (mp != NULL) {
/* check for no-EA filesystems */
* allowable for a UF_APPEND file.
*/
append = 0;
- if (vp->v_type == VDIR) {
+ if (vap->va_type == VDIR) {
if ((rights & (KAUTH_VNODE_ADD_FILE | KAUTH_VNODE_ADD_SUBDIRECTORY | KAUTH_VNODE_WRITE_EXTATTRIBUTES)) == rights)
append = 1;
} else {
static int
-vnode_authorize_callback(kauth_cred_t cred, void *idata, kauth_action_t action,
- uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3)
+vnode_authorize_callback(__unused kauth_cred_t cred, __unused void *idata,
+ kauth_action_t action, uintptr_t arg0, uintptr_t arg1, uintptr_t arg2,
+ uintptr_t arg3)
{
vfs_context_t ctx;
vnode_t cvp = NULLVP;
goto out;
}
defer:
- result = vnode_authorize_callback_int(cred, idata, action, arg0, arg1, arg2, arg3);
+ result = vnode_authorize_callback_int(action, ctx, vp, dvp, (int *)arg3);
if (result == KAUTH_RESULT_ALLOW && cvp != NULLVP) {
KAUTH_DEBUG("%p - caching action = %x", cvp, action);
return result;
}
+static int
+vnode_attr_authorize_internal(vauth_ctx vcp, mount_t mp,
+ kauth_ace_rights_t rights, int is_suser, boolean_t *found_deny,
+ int noimmutable, int parent_authorized_for_delete_child)
+{
+ int result;
+
+ /*
+ * Check for immutability.
+ *
+ * In the deletion case, parent directory immutability vetoes specific
+ * file rights.
+ */
+ if ((result = vnode_authorize_checkimmutable(mp, vcp->vap, rights,
+ noimmutable)) != 0)
+ goto out;
+
+ if ((rights & KAUTH_VNODE_DELETE) &&
+ !parent_authorized_for_delete_child) {
+ result = vnode_authorize_checkimmutable(mp, vcp->dvap,
+ KAUTH_VNODE_DELETE_CHILD, 0);
+ if (result)
+ goto out;
+ }
+
+ /*
+ * Clear rights that have been authorized by reaching this point, bail if nothing left to
+ * check.
+ */
+ rights &= ~(KAUTH_VNODE_LINKTARGET | KAUTH_VNODE_CHECKIMMUTABLE);
+ if (rights == 0)
+ goto out;
+
+ /*
+ * If we're not the superuser, authorize based on file properties;
+ * note that even if parent_authorized_for_delete_child is TRUE, we
+ * need to check on the node itself.
+ */
+ if (!is_suser) {
+ /* process delete rights */
+ if ((rights & KAUTH_VNODE_DELETE) &&
+ ((result = vnode_authorize_delete(vcp, parent_authorized_for_delete_child)) != 0))
+ goto out;
+
+ /* process remaining rights */
+ if ((rights & ~KAUTH_VNODE_DELETE) &&
+ (result = vnode_authorize_simple(vcp, rights, rights & KAUTH_VNODE_DELETE, found_deny)) != 0)
+ goto out;
+ } else {
+ /*
+ * Execute is only granted to root if one of the x bits is set. This check only
+ * makes sense if the posix mode bits are actually supported.
+ */
+ if ((rights & KAUTH_VNODE_EXECUTE) &&
+ (vcp->vap->va_type == VREG) &&
+ VATTR_IS_SUPPORTED(vcp->vap, va_mode) &&
+ !(vcp->vap->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) {
+ result = EPERM;
+ KAUTH_DEBUG("%p DENIED - root execute requires at least one x bit in 0x%x", vp, va.va_mode);
+ goto out;
+ }
+
+ /* Assume that there were DENYs so we don't wrongly cache KAUTH_VNODE_SEARCHBYANYONE */
+ *found_deny = TRUE;
+
+ KAUTH_DEBUG("%p ALLOWED - caller is superuser", vp);
+ }
+out:
+ return (result);
+}
static int
-vnode_authorize_callback_int(__unused kauth_cred_t unused_cred, __unused void *idata, kauth_action_t action,
- uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3)
+vnode_authorize_callback_int(kauth_action_t action, vfs_context_t ctx,
+ vnode_t vp, vnode_t dvp, int *errorp)
{
struct _vnode_authorize_context auth_context;
vauth_ctx vcp;
- vfs_context_t ctx;
- vnode_t vp, dvp;
kauth_cred_t cred;
kauth_ace_rights_t rights;
struct vnode_attr va, dva;
int result;
- int *errorp;
int noimmutable;
boolean_t parent_authorized_for_delete_child = FALSE;
boolean_t found_deny = FALSE;
boolean_t parent_ref= FALSE;
+ boolean_t is_suser = FALSE;
vcp = &auth_context;
- ctx = vcp->ctx = (vfs_context_t)arg0;
- vp = vcp->vp = (vnode_t)arg1;
- dvp = vcp->dvp = (vnode_t)arg2;
- errorp = (int *)arg3;
+ vcp->ctx = ctx;
+ vcp->vp = vp;
+ vcp->dvp = dvp;
/*
* Note that we authorize against the context, not the passed cred
* (the same thing anyway)
if (vnode_cache_is_authorized(dvp, ctx, KAUTH_VNODE_DELETE_CHILD) == TRUE)
parent_authorized_for_delete_child = TRUE;
} else {
- dvp = NULL;
+ vcp->dvp = NULLVP;
+ vcp->dvap = NULL;
}
/*
goto out;
/*
- * Get vnode attributes and extended security information for the vnode
- * and directory if required.
- */
- VATTR_WANTED(&va, va_mode);
- VATTR_WANTED(&va, va_uid);
- VATTR_WANTED(&va, va_gid);
- VATTR_WANTED(&va, va_flags);
- VATTR_WANTED(&va, va_acl);
- if ((result = vnode_getattr(vp, &va, ctx)) != 0) {
- KAUTH_DEBUG("%p ERROR - failed to get vnode attributes - %d", vp, result);
- goto out;
- }
- if (dvp) {
- VATTR_WANTED(&dva, va_mode);
- VATTR_WANTED(&dva, va_uid);
- VATTR_WANTED(&dva, va_gid);
- VATTR_WANTED(&dva, va_flags);
- VATTR_WANTED(&dva, va_acl);
- if ((result = vnode_getattr(dvp, &dva, ctx)) != 0) {
- KAUTH_DEBUG("%p ERROR - failed to get directory vnode attributes - %d", vp, result);
- goto out;
- }
- }
-
- /*
- * If the vnode is an extended attribute data vnode (eg. a resource fork), *_DATA becomes
- * *_EXTATTRIBUTES.
+ * If the vnode is a namedstream (extended attribute) data vnode (eg.
+ * a resource fork), *_DATA becomes *_EXTATTRIBUTES.
*/
if (vnode_isnamedstream(vp)) {
if (rights & KAUTH_VNODE_READ_DATA) {
rights &= ~KAUTH_VNODE_WRITE_DATA;
rights |= KAUTH_VNODE_WRITE_EXTATTRIBUTES;
}
+
+ /*
+ * Point 'vp' to the namedstream's parent for ACL checking
+ */
+ if ((vp->v_parent != NULL) &&
+ (vget_internal(vp->v_parent, 0, VNODE_NODEAD | VNODE_DRAINO) == 0)) {
+ parent_ref = TRUE;
+ vcp->vp = vp = vp->v_parent;
+ }
+ }
+
+ if (vfs_context_issuser(ctx)) {
+ /*
+ * if we're not asking for execute permissions or modifications,
+ * then we're done, this action is authorized.
+ */
+ if (!(rights & (KAUTH_VNODE_EXECUTE | KAUTH_VNODE_WRITE_RIGHTS)))
+ goto success;
+
+ is_suser = TRUE;
}
/*
- * Point 'vp' to the resource fork's parent for ACL checking
+ * Get vnode attributes and extended security information for the vnode
+ * and directory if required.
+ *
+ * If we're root we only want mode bits and flags for checking
+ * execute and immutability.
*/
- if (vnode_isnamedstream(vp) &&
- (vp->v_parent != NULL) &&
- (vget_internal(vp->v_parent, 0, VNODE_NODEAD | VNODE_DRAINO) == 0)) {
- parent_ref = TRUE;
- vcp->vp = vp = vp->v_parent;
- if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL))
- kauth_acl_free(va.va_acl);
- VATTR_INIT(&va);
- VATTR_WANTED(&va, va_mode);
+ VATTR_WANTED(&va, va_mode);
+ VATTR_WANTED(&va, va_flags);
+ if (!is_suser) {
VATTR_WANTED(&va, va_uid);
VATTR_WANTED(&va, va_gid);
- VATTR_WANTED(&va, va_flags);
VATTR_WANTED(&va, va_acl);
- if ((result = vnode_getattr(vp, &va, ctx)) != 0)
- goto out;
}
-
- /*
- * Check for immutability.
- *
- * In the deletion case, parent directory immutability vetoes specific
- * file rights.
- */
- if ((result = vnode_authorize_checkimmutable(vp, &va, rights, noimmutable)) != 0)
- goto out;
- if ((rights & KAUTH_VNODE_DELETE) &&
- parent_authorized_for_delete_child == FALSE &&
- ((result = vnode_authorize_checkimmutable(dvp, &dva, KAUTH_VNODE_DELETE_CHILD, 0)) != 0))
- goto out;
-
- /*
- * Clear rights that have been authorized by reaching this point, bail if nothing left to
- * check.
- */
- rights &= ~(KAUTH_VNODE_LINKTARGET | KAUTH_VNODE_CHECKIMMUTABLE);
- if (rights == 0)
+ if ((result = vnode_getattr(vp, &va, ctx)) != 0) {
+ KAUTH_DEBUG("%p ERROR - failed to get vnode attributes - %d", vp, result);
goto out;
+ }
+ VATTR_WANTED(&va, va_type);
+ VATTR_RETURN(&va, va_type, vnode_vtype(vp));
- /*
- * If we're not the superuser, authorize based on file properties;
- * note that even if parent_authorized_for_delete_child is TRUE, we
- * need to check on the node itself.
- */
- if (!vfs_context_issuser(ctx)) {
- /* process delete rights */
- if ((rights & KAUTH_VNODE_DELETE) &&
- ((result = vnode_authorize_delete(vcp, parent_authorized_for_delete_child)) != 0))
- goto out;
-
- /* process remaining rights */
- if ((rights & ~KAUTH_VNODE_DELETE) &&
- (result = vnode_authorize_simple(vcp, rights, rights & KAUTH_VNODE_DELETE, &found_deny)) != 0)
- goto out;
- } else {
-
- /*
- * Execute is only granted to root if one of the x bits is set. This check only
- * makes sense if the posix mode bits are actually supported.
- */
- if ((rights & KAUTH_VNODE_EXECUTE) &&
- (vp->v_type == VREG) &&
- VATTR_IS_SUPPORTED(&va, va_mode) &&
- !(va.va_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) {
- result = EPERM;
- KAUTH_DEBUG("%p DENIED - root execute requires at least one x bit in 0x%x", vp, va.va_mode);
+ if (vcp->dvp) {
+ VATTR_WANTED(&dva, va_mode);
+ VATTR_WANTED(&dva, va_flags);
+ if (!is_suser) {
+ VATTR_WANTED(&dva, va_uid);
+ VATTR_WANTED(&dva, va_gid);
+ VATTR_WANTED(&dva, va_acl);
+ }
+ if ((result = vnode_getattr(vcp->dvp, &dva, ctx)) != 0) {
+ KAUTH_DEBUG("%p ERROR - failed to get directory vnode attributes - %d", vp, result);
goto out;
}
-
- KAUTH_DEBUG("%p ALLOWED - caller is superuser", vp);
+ VATTR_WANTED(&dva, va_type);
+ VATTR_RETURN(&dva, va_type, vnode_vtype(vcp->dvp));
}
+
+ result = vnode_attr_authorize_internal(vcp, vp->v_mount, rights, is_suser,
+ &found_deny, noimmutable, parent_authorized_for_delete_child);
out:
if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL))
kauth_acl_free(va.va_acl);
* deny execute, we can synthesize a global right that allows anyone to
* traverse this directory during a pathname lookup without having to
* match the credential associated with this cache of rights.
+ *
+ * Note that we can correctly cache KAUTH_VNODE_SEARCHBYANYONE
+ * only if we actually check ACLs which we don't for root. As
+ * a workaround, the lookup fast path checks for root.
*/
if (!VATTR_IS_SUPPORTED(&va, va_mode) ||
((va.va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) ==
vnode_cache_authorized_action(vp, ctx, KAUTH_VNODE_SEARCHBYANYONE);
}
}
- if ((rights & KAUTH_VNODE_DELETE) && parent_authorized_for_delete_child == FALSE) {
- /*
- * parent was successfully and newly authorized for content deletions
- * add it to the cache, but only if it doesn't have the sticky
- * bit set on it. This same check is done earlier guarding
- * fetching of dva, and if we jumped to out without having done
- * this, we will have returned already because of a non-zero
- * 'result' value.
- */
- if (VATTR_IS_SUPPORTED(&dva, va_mode) &&
- !(dva.va_mode & (S_ISVTX))) {
- /* OK to cache delete rights */
- KAUTH_DEBUG("%p - caching DELETE_CHILD rights", dvp);
- vnode_cache_authorized_action(dvp, ctx, KAUTH_VNODE_DELETE_CHILD);
- }
- }
+success:
if (parent_ref)
vnode_put(vp);
+
/*
* Note that this implies that we will allow requests for no rights, as well as
* for rights that we do not recognise. There should be none of these.
return(KAUTH_RESULT_ALLOW);
}
+int
+vnode_attr_authorize_init(struct vnode_attr *vap, struct vnode_attr *dvap,
+ kauth_action_t action, vfs_context_t ctx)
+{
+ VATTR_INIT(vap);
+ VATTR_WANTED(vap, va_type);
+ VATTR_WANTED(vap, va_mode);
+ VATTR_WANTED(vap, va_flags);
+ if (dvap) {
+ VATTR_INIT(dvap);
+ if (action & KAUTH_VNODE_DELETE) {
+ VATTR_WANTED(dvap, va_type);
+ VATTR_WANTED(dvap, va_mode);
+ VATTR_WANTED(dvap, va_flags);
+ }
+ } else if (action & KAUTH_VNODE_DELETE) {
+ return (EINVAL);
+ }
+
+ if (!vfs_context_issuser(ctx)) {
+ VATTR_WANTED(vap, va_uid);
+ VATTR_WANTED(vap, va_gid);
+ VATTR_WANTED(vap, va_acl);
+ if (dvap && (action & KAUTH_VNODE_DELETE)) {
+ VATTR_WANTED(dvap, va_uid);
+ VATTR_WANTED(dvap, va_gid);
+ VATTR_WANTED(dvap, va_acl);
+ }
+ }
+
+ return (0);
+}
+
+int
+vnode_attr_authorize(struct vnode_attr *vap, struct vnode_attr *dvap, mount_t mp,
+ kauth_action_t action, vfs_context_t ctx)
+{
+ struct _vnode_authorize_context auth_context;
+ vauth_ctx vcp;
+ kauth_ace_rights_t rights;
+ int noimmutable;
+ boolean_t found_deny;
+ boolean_t is_suser = FALSE;
+ int result = 0;
+
+ vcp = &auth_context;
+ vcp->ctx = ctx;
+ vcp->vp = NULLVP;
+ vcp->vap = vap;
+ vcp->dvp = NULLVP;
+ vcp->dvap = dvap;
+ vcp->flags = vcp->flags_valid = 0;
+
+ noimmutable = (action & KAUTH_VNODE_NOIMMUTABLE) ? 1 : 0;
+ rights = action & ~(KAUTH_VNODE_ACCESS | KAUTH_VNODE_NOIMMUTABLE);
+
+ /*
+ * Check for read-only filesystems.
+ */
+ if ((rights & KAUTH_VNODE_WRITE_RIGHTS) &&
+ mp && (mp->mnt_flag & MNT_RDONLY) &&
+ ((vap->va_type == VREG) || (vap->va_type == VDIR) ||
+ (vap->va_type == VLNK) || (rights & KAUTH_VNODE_DELETE) ||
+ (rights & KAUTH_VNODE_DELETE_CHILD))) {
+ result = EROFS;
+ goto out;
+ }
+
+ /*
+ * Check for noexec filesystems.
+ */
+ if ((rights & KAUTH_VNODE_EXECUTE) &&
+ (vap->va_type == VREG) && mp && (mp->mnt_flag & MNT_NOEXEC)) {
+ result = EACCES;
+ goto out;
+ }
+
+ if (vfs_context_issuser(ctx)) {
+ /*
+ * if we're not asking for execute permissions or modifications,
+ * then we're done, this action is authorized.
+ */
+ if (!(rights & (KAUTH_VNODE_EXECUTE | KAUTH_VNODE_WRITE_RIGHTS)))
+ goto out;
+ is_suser = TRUE;
+ } else {
+ if (!VATTR_IS_SUPPORTED(vap, va_uid) ||
+ !VATTR_IS_SUPPORTED(vap, va_gid) ||
+ (mp && vfs_extendedsecurity(mp) && !VATTR_IS_SUPPORTED(vap, va_acl))) {
+ panic("vnode attrs not complete for vnode_attr_authorize\n");
+ }
+ }
+
+ result = vnode_attr_authorize_internal(vcp, mp, rights, is_suser,
+ &found_deny, noimmutable, FALSE);
+
+ if (result == EPERM)
+ result = EACCES;
+out:
+ return (result);
+}
+
+
int
vnode_authattr_new(vnode_t dvp, struct vnode_attr *vap, int noauth, vfs_context_t ctx)
{
vnode_authattr_new_internal(vnode_t dvp, struct vnode_attr *vap, int noauth, uint32_t *defaulted_fieldsp, vfs_context_t ctx)
{
int error;
- int has_priv_suser, ismember, defaulted_owner, defaulted_group, defaulted_mode;
+ int has_priv_suser, ismember, defaulted_owner, defaulted_group, defaulted_mode, inherit_restricted;
kauth_cred_t cred;
guid_t changer;
mount_t dmp;
+ struct vnode_attr dva;
error = 0;
defaulted_owner = defaulted_group = defaulted_mode = 0;
+ inherit_restricted = 0;
+
/*
* Require that the filesystem support extended security to apply any.
*/
}
}
+ /*
+ * We need the dvp's va_flags and *may* need the gid of the directory,
+ * we ask for both here.
+ */
+ VATTR_INIT(&dva);
+ VATTR_WANTED(&dva, va_gid);
+ VATTR_WANTED(&dva, va_flags);
+ if ((error = vnode_getattr(dvp, &dva, ctx)) != 0)
+ goto out;
+
/*
* If the filesystem is mounted IGNORE_OWNERSHIP and an explicit grouo is set, that
* group takes ownership of all new files.
} else {
if (!VATTR_IS_ACTIVE(vap, va_gid)) {
/* default group comes from parent object, fallback to current user */
- struct vnode_attr dva;
- VATTR_INIT(&dva);
- VATTR_WANTED(&dva, va_gid);
- if ((error = vnode_getattr(dvp, &dva, ctx)) != 0)
- goto out;
if (VATTR_IS_SUPPORTED(&dva, va_gid)) {
VATTR_SET(vap, va_gid, dva.va_gid);
} else {
if (!VATTR_IS_ACTIVE(vap, va_flags))
VATTR_SET(vap, va_flags, 0);
-
+
+ /* Determine if SF_RESTRICTED should be inherited from the parent
+ * directory. */
+ if (VATTR_IS_SUPPORTED(&dva, va_flags) &&
+ (dva.va_flags & SF_RESTRICTED)) {
+ inherit_restricted = 1;
+ }
+
/* default mode is everything, masked with current umask */
if (!VATTR_IS_ACTIVE(vap, va_mode)) {
VATTR_SET(vap, va_mode, ACCESSPERMS & ~vfs_context_proc(ctx)->p_fd->fd_cmask);
}
}
out:
+ if (inherit_restricted) {
+ /* Apply SF_RESTRICTED to the file if its parent directory was
+ * restricted. This is done at the end so that root is not
+ * required if this flag is only set due to inheritance. */
+ VATTR_SET(vap, va_flags, (vap->va_flags | SF_RESTRICTED));
+ }
if (defaulted_fieldsp) {
if (defaulted_mode) {
*defaulted_fieldsp |= VATTR_PREPARE_DEFAULTED_MODE;
* If the size is being set, make sure it's not a directory.
*/
if (VATTR_IS_ACTIVE(vap, va_data_size)) {
- /* size is meaningless on a directory, don't permit this */
- if (vnode_isdir(vp)) {
- KAUTH_DEBUG("ATTR - ERROR: size change requested on a directory");
- error = EISDIR;
+ /* size is only meaningful on regular files, don't permit otherwise */
+ if (!vnode_isreg(vp)) {
+ KAUTH_DEBUG("ATTR - ERROR: size change requested on non-file");
+ error = vnode_isdir(vp) ? EISDIR : EINVAL;
goto out;
}
}
required_action |= KAUTH_VNODE_WRITE_SECURITY;
}
- /* clear set-uid and set-gid bits as required by Posix */
- if (VATTR_IS_ACTIVE(vap, va_mode)) {
- newmode = vap->va_mode;
- } else if (VATTR_IS_SUPPORTED(&ova, va_mode)) {
- newmode = ova.va_mode;
- } else {
- KAUTH_DEBUG("CHOWN - trying to change owner but cannot get mode from filesystem to mask setugid bits");
- newmode = 0;
- }
- if (newmode & (S_ISUID | S_ISGID)) {
- VATTR_SET(vap, va_mode, newmode & ~(S_ISUID | S_ISGID));
- KAUTH_DEBUG("CHOWN - masking setugid bits from mode %o to %o", newmode, vap->va_mode);
+ }
+
+ /*
+ * clear set-uid and set-gid bits. POSIX only requires this for
+ * non-privileged processes but we do it even for root.
+ */
+ if (VATTR_IS_ACTIVE(vap, va_mode)) {
+ newmode = vap->va_mode;
+ } else if (VATTR_IS_SUPPORTED(&ova, va_mode)) {
+ newmode = ova.va_mode;
+ } else {
+ KAUTH_DEBUG("CHOWN - trying to change owner but cannot get mode from filesystem to mask setugid bits");
+ newmode = 0;
+ }
+
+ /* chown always clears setuid/gid bits. An exception is made for
+ * setattrlist executed by a root process to set <uid, gid, mode> on a file:
+ * setattrlist is allowed to set the new mode on the file and change (chown)
+ * uid/gid.
+ */
+ if (newmode & (S_ISUID | S_ISGID)) {
+ if (!VATTR_IS_ACTIVE(vap, va_mode) || !has_priv_suser) {
+ KAUTH_DEBUG("CHOWN - masking setugid bits from mode %o to %o",
+ newmode, newmode & ~(S_ISUID | S_ISGID));
+ newmode &= ~(S_ISUID | S_ISGID);
}
+ VATTR_SET(vap, va_mode, newmode);
}
}
}
void panic_print_vnodes(void);
+
/* define PANIC_PRINTS_VNODES only if investigation is required. */
#ifdef PANIC_PRINTS_VNODES
* iterate all vnodelist items in all mounts (mntlist) -> mnt_vnodelist
*/
TAILQ_FOREACH(mnt, &mountlist, mnt_list) {
+
+ if (!ml_validate_nofault((vm_offset_t)mnt, sizeof(mount_t))) {
+ kdb_printf("Unable to iterate the mount list %p - encountered an invalid mount pointer %p \n",
+ &mountlist, mnt);
+ break;
+ }
+
TAILQ_FOREACH(vp, &mnt->mnt_vnodelist, v_mntvnodes) {
+
+ if (!ml_validate_nofault((vm_offset_t)vp, sizeof(vnode_t))) {
+ kdb_printf("Unable to iterate the vnode list %p - encountered an invalid vnode pointer %p \n",
+ &mnt->mnt_vnodelist, vp);
+ break;
+ }
+
if (++nvnodes > SANE_VNODE_PRINT_LIMIT)
return;
type = __vtype(vp->v_type);
}
#endif /* CONFIG_TRIGGERS */
+
+vm_offset_t kdebug_vnode(vnode_t vp)
+{
+ return VM_KERNEL_ADDRPERM(vp);
+}
+
+static int flush_cache_on_write = 0;
+SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write,
+ CTLFLAG_RW | CTLFLAG_LOCKED, &flush_cache_on_write, 0,
+ "always flush the drive cache on writes to uncached files");
+
+int vnode_should_flush_after_write(vnode_t vp, int ioflag)
+{
+ return (flush_cache_on_write
+ && (ISSET(ioflag, IO_NOCACHE) || vnode_isnocache(vp)));
+}
+
+/*
+ * sysctl for use by disk I/O tracing tools to get the list of existing
+ * vnodes' paths
+ */
+
+struct vnode_trace_paths_context {
+ uint64_t count;
+ long path[MAXPATHLEN / sizeof (long) + 1]; /* + 1 in case sizeof (long) does not divide MAXPATHLEN */
+};
+
+static int vnode_trace_path_callback(struct vnode *vp, void *arg) {
+ int len, rv;
+ struct vnode_trace_paths_context *ctx;
+
+ ctx = arg;
+
+ len = sizeof (ctx->path);
+ rv = vn_getpath(vp, (char *)ctx->path, &len);
+ /* vn_getpath() NUL-terminates, and len includes the NUL */
+
+ if (!rv) {
+ kdebug_lookup_gen_events(ctx->path, len, vp, TRUE);
+
+ if (++(ctx->count) == 1000) {
+ thread_yield_to_preemption();
+ ctx->count = 0;
+ }
+ }
+
+ return VNODE_RETURNED;
+}
+
+static int vfs_trace_paths_callback(mount_t mp, void *arg) {
+ if (mp->mnt_flag & MNT_LOCAL)
+ vnode_iterate(mp, VNODE_ITERATE_ALL, vnode_trace_path_callback, arg);
+
+ return VFS_RETURNED;
+}
+
+static int sysctl_vfs_trace_paths SYSCTL_HANDLER_ARGS {
+ struct vnode_trace_paths_context ctx;
+
+ (void)oidp;
+ (void)arg1;
+ (void)arg2;
+ (void)req;
+
+ if (!kauth_cred_issuser(kauth_cred_get()))
+ return EPERM;
+
+ if (!kdebug_enable || !kdebug_debugid_enabled(VFS_LOOKUP))
+ return EINVAL;
+
+ bzero(&ctx, sizeof (struct vnode_trace_paths_context));
+
+ vfs_iterate(0, vfs_trace_paths_callback, &ctx);
+
+ return 0;
+}
+
+SYSCTL_PROC(_vfs_generic, OID_AUTO, trace_paths, CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED, NULL, 0, &sysctl_vfs_trace_paths, "-", "trace_paths");