/*
- * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2018 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <miscfs/fifofs/fifo.h>
#include <string.h>
-#include <machine/spl.h>
#include <machine/machine_routines.h>
#include <kern/assert.h>
#include <kern/kalloc.h> /* kalloc()/kfree() */
#include <kern/clock.h> /* delay_for_interval() */
#include <libkern/OSAtomic.h> /* OSAddAtomic() */
+#if !CONFIG_EMBEDDED
#include <console/video_console.h>
+#endif
#ifdef JOE_DEBUG
#include <libkern/OSDebug.h>
#include <security/mac_framework.h>
#endif
+#include <vfs/vfs_disk_conditioner.h>
+#include <libkern/section_keywords.h>
+
extern lck_grp_t *vnode_lck_grp;
extern lck_attr_t *vnode_lck_attr;
/* XXX next protptype should be from <nfs/nfs.h> */
extern int nfs_vinvalbuf(vnode_t, int, vfs_context_t, int);
+extern int paniclog_append_noflush(const char *format, ...);
+
/* XXX next prototytype should be from libsa/stdlib.h> but conflicts libkern */
__private_extern__ void qsort(
void * array,
size_t member_size,
int (*)(const void *, const void *));
-extern kern_return_t adjust_vm_object_cache(vm_size_t oval, vm_size_t nval);
__private_extern__ void vntblinit(void);
-__private_extern__ kern_return_t reset_vmobjectcache(unsigned int val1,
- unsigned int val2);
__private_extern__ int unlink1(vfs_context_t, vnode_t, user_addr_t,
enum uio_seg, int);
ragevnodes--; \
} while(0)
-
-/*
- * vnodetarget hasn't been used in a long time, but
- * it was exported for some reason... I'm leaving in
- * place for now... it should be deprecated out of the
- * exports and removed eventually.
- */
-u_int32_t vnodetarget; /* target for vnreclaim() */
-#define VNODE_FREE_TARGET 20 /* Default value for vnodetarget */
-
-/*
- * We need quite a few vnodes on the free list to sustain the
- * rapid stat() the compilation process does, and still benefit from the name
- * cache. Having too few vnodes on the free list causes serious disk
- * thrashing as we cycle through them.
- */
-#define VNODE_FREE_MIN CONFIG_VNODE_FREE_MIN /* freelist should have at least this many */
-
-
static void async_work_continue(void);
/*
TAILQ_INIT(&vnode_async_work_list);
TAILQ_INIT(&mountlist);
- if (!vnodetarget)
- vnodetarget = VNODE_FREE_TARGET;
-
microuptime(&rage_tv);
rage_limit = desiredvnodes / 100;
if (rage_limit < RAGE_LIMIT_MIN)
rage_limit = RAGE_LIMIT_MIN;
- /*
- * Scale the vm_object_cache to accomodate the vnodes
- * we want to cache
- */
- (void) adjust_vm_object_cache(0, desiredvnodes - VNODE_FREE_MIN);
-
/*
* create worker threads
*/
thread_deallocate(thread);
}
-/* Reset the VM Object Cache with the values passed in */
-__private_extern__ kern_return_t
-reset_vmobjectcache(unsigned int val1, unsigned int val2)
-{
- vm_size_t oval = val1 - VNODE_FREE_MIN;
- vm_size_t nval;
-
- if (val1 == val2) {
- return KERN_SUCCESS;
- }
-
- if(val2 < VNODE_FREE_MIN)
- nval = 0;
- else
- nval = val2 - VNODE_FREE_MIN;
-
- return(adjust_vm_object_cache(oval, nval));
-}
-
-
/* the timeout is in 10 msecs */
int
vnode_waitforwrites(vnode_t vp, int output_target, int slpflag, int slptimeout, const char *msg) {
void
vnode_iterate_setup(mount_t mp)
{
- while (mp->mnt_lflag & MNT_LITER) {
- mp->mnt_lflag |= MNT_LITERWAIT;
- msleep((caddr_t)mp, &mp->mnt_mlock, PVFS, "vnode_iterate_setup", NULL);
- }
-
mp->mnt_lflag |= MNT_LITER;
-
}
int
vnode_iterate_clear(mount_t mp)
{
mp->mnt_lflag &= ~MNT_LITER;
- if (mp->mnt_lflag & MNT_LITERWAIT) {
- mp->mnt_lflag &= ~MNT_LITERWAIT;
- wakeup(mp);
- }
}
+#if !CONFIG_EMBEDDED
#include <i386/panic_hooks.h>
static void vnode_iterate_panic_hook(panic_hook_t *hook_)
{
- extern int kdb_log(const char *fmt, ...);
struct vnode_iterate_panic_hook *hook = (struct vnode_iterate_panic_hook *)hook_;
panic_phys_range_t range;
uint64_t phys;
if (panic_phys_range_before(hook->mp, &phys, &range)) {
- kdb_log("mp = %p, phys = %p, prev (%p: %p-%p)\n",
+ paniclog_append_noflush("mp = %p, phys = %p, prev (%p: %p-%p)\n",
hook->mp, phys, range.type, range.phys_start,
range.phys_start + range.len);
} else {
- kdb_log("mp = %p, phys = %p, prev (!)\n", hook->mp, phys);
+ paniclog_append_noflush("mp = %p, phys = %p, prev (!)\n", hook->mp, phys);
}
if (panic_phys_range_before(hook->vp, &phys, &range)) {
- kdb_log("vp = %p, phys = %p, prev (%p: %p-%p)\n",
+ paniclog_append_noflush("vp = %p, phys = %p, prev (%p: %p-%p)\n",
hook->vp, phys, range.type, range.phys_start,
range.phys_start + range.len);
} else {
- kdb_log("vp = %p, phys = %p, prev (!)\n", hook->vp, phys);
+ paniclog_append_noflush("vp = %p, phys = %p, prev (!)\n", hook->vp, phys);
}
panic_dump_mem((void *)(((vm_offset_t)hook->mp -4096) & ~4095), 12288);
}
+#endif //CONFIG_EMBEDDED
int
vnode_iterate(mount_t mp, int flags, int (*callout)(struct vnode *, void *),
int vid, retval;
int ret = 0;
+ /*
+ * The mount iterate mutex is held for the duration of the iteration.
+ * This can be done by a state flag on the mount structure but we can
+ * run into priority inversion issues sometimes.
+ * Using a mutex allows us to benefit from the priority donation
+ * mechanisms in the kernel for locks. This mutex should never be
+ * acquired in spin mode and it should be acquired before attempting to
+ * acquire the mount lock.
+ */
+ mount_iterate_lock(mp);
+
mount_lock(mp);
vnode_iterate_setup(mp);
- /* it is returns 0 then there is nothing to do */
+ /* If it returns 0 then there is nothing to do */
retval = vnode_iterate_prepare(mp);
if (retval == 0) {
vnode_iterate_clear(mp);
mount_unlock(mp);
+ mount_iterate_unlock(mp);
return(ret);
}
+#if !CONFIG_EMBEDDED
struct vnode_iterate_panic_hook hook;
hook.mp = mp;
hook.vp = NULL;
panic_hook(&hook.hook, vnode_iterate_panic_hook);
+#endif
/* iterate over all the vnodes */
while (!TAILQ_EMPTY(&mp->mnt_workerqueue)) {
vp = TAILQ_FIRST(&mp->mnt_workerqueue);
+#if !CONFIG_EMBEDDED
hook.vp = vp;
+#endif
TAILQ_REMOVE(&mp->mnt_workerqueue, vp, v_mntvnodes);
TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes);
vid = vp->v_id;
}
out:
+#if !CONFIG_EMBEDDED
panic_unhook(&hook.hook);
+#endif
(void)vnode_iterate_reloadq(mp);
vnode_iterate_clear(mp);
mount_unlock(mp);
+ mount_iterate_unlock(mp);
return (ret);
}
lck_mtx_unlock(&mp->mnt_renamelock);
}
+void
+mount_iterate_lock(mount_t mp)
+{
+ lck_mtx_lock(&mp->mnt_iter_lock);
+}
+
+void
+mount_iterate_unlock(mount_t mp)
+{
+ lck_mtx_unlock(&mp->mnt_iter_lock);
+}
+
void
mount_lock(mount_t mp)
{
return (ENOMEM);
}
+#define DBG_MOUNTROOT (FSDBG_CODE(DBG_MOUNT, 0))
/*
* Find an appropriate filesystem to use for the root. If a filesystem
mount_t mp;
vnode_t bdevvp_rootvp;
+ KDBG_RELEASE(DBG_MOUNTROOT | DBG_FUNC_START);
if (mountroot != NULL) {
/*
* used for netboot which follows a different set of rules
*/
error = (*mountroot)();
+
+ KDBG_RELEASE(DBG_MOUNTROOT | DBG_FUNC_END, error, 0);
return (error);
}
if ((error = bdevvp(rootdev, &rootvp))) {
printf("vfs_mountroot: can't setup bdevvp\n");
+
+ KDBG_RELEASE(DBG_MOUNTROOT | DBG_FUNC_END, error, 1);
return (error);
}
/*
mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
}
+#if !CONFIG_EMBEDDED
uint32_t speed;
- if (MNTK_VIRTUALDEV & mp->mnt_kern_flag) speed = 128;
- else if (MNTK_SSD & mp->mnt_kern_flag) speed = 7*256;
- else speed = 256;
+ if (MNTK_VIRTUALDEV & mp->mnt_kern_flag) speed = 128;
+ else if (disk_conditioner_mount_is_ssd(mp)) speed = 7*256;
+ else speed = 256;
vc_progress_setdiskspeed(speed);
+#endif
/*
* Probe root file system for additional features.
*/
vnode_put(rootvp);
#if CONFIG_MACF
- if ((vfs_flags(mp) & MNT_MULTILABEL) == 0)
+ if ((vfs_flags(mp) & MNT_MULTILABEL) == 0) {
+ KDBG_RELEASE(DBG_MOUNTROOT | DBG_FUNC_END, 0, 2);
return (0);
+ }
error = VFS_ROOT(mp, &vp, ctx);
if (error) {
goto fail;
}
#endif
+ KDBG_RELEASE(DBG_MOUNTROOT | DBG_FUNC_END, 0, 3);
return (0);
}
#if CONFIG_MACF
fail:
#endif
vfs_rootmountfailed(mp);
-
+
if (error != EINVAL)
printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
}
+ KDBG_RELEASE(DBG_MOUNTROOT | DBG_FUNC_END, error ? error : ENODEV, 4);
return (ENODEV);
}
return (0);
}
-
/*
* Check to see if the new vnode represents a special device
* for which we already have a vnode (either because of
int retval;
unsigned int vid;
+ /*
+ * See comments in vnode_iterate() for the rationale for this lock
+ */
+ mount_iterate_lock(mp);
+
mount_lock(mp);
vnode_iterate_setup(mp);
/*
if (vnode_umount_preflight(mp, skipvp, flags)) {
vnode_iterate_clear(mp);
mount_unlock(mp);
+ mount_iterate_unlock(mp);
return(EBUSY);
}
}
loop:
- /* it is returns 0 then there is nothing to do */
+ /* If it returns 0 then there is nothing to do */
retval = vnode_iterate_prepare(mp);
if (retval == 0) {
vnode_iterate_clear(mp);
mount_unlock(mp);
+ mount_iterate_unlock(mp);
return(retval);
}
}
vnode_iterate_clear(mp);
mount_unlock(mp);
+ mount_iterate_unlock(mp);
if (busy && ((flags & FORCECLOSE)==0))
return (EBUSY);
return build_path(vp, pathbuf, *len, len, 0, vfs_context_current());
}
+/*
+ * vn_getpath_fsenter_with_parent will reenter the file system to fine the path of the
+ * vnode. It requires that there are IO counts on both the vnode and the directory vnode.
+ *
+ * vn_getpath_fsenter is called by MAC hooks to authorize operations for every thing, but
+ * unlink, rmdir and rename. For these operation the MAC hook calls vn_getpath. This presents
+ * problems where if the path can not be found from the name cache, those operations can
+ * erroneously fail with EPERM even though the call should succeed. When removing or moving
+ * file system objects with operations such as unlink or rename, those operations need to
+ * take IO counts on the target and containing directory. Calling vn_getpath_fsenter from a
+ * MAC hook from these operations during forced unmount operations can lead to dead
+ * lock. This happens when the operation starts, IO counts are taken on the containing
+ * directories and targets. Before the MAC hook is called a forced unmount from another
+ * thread takes place and blocks on the on going operation's directory vnode in vdrain.
+ * After which, the MAC hook gets called and calls vn_getpath_fsenter. vn_getpath_fsenter
+ * is called with the understanding that there is an IO count on the target. If in
+ * build_path the directory vnode is no longer in the cache, then the parent object id via
+ * vnode_getattr from the target is obtain and used to call VFS_VGET to get the parent
+ * vnode. The file system's VFS_VGET then looks up by inode in its hash and tries to get
+ * an IO count. But VFS_VGET "sees" the directory vnode is in vdrain and can block
+ * depending on which version and how it calls the vnode_get family of interfaces.
+ *
+ * N.B. A reasonable interface to use is vnode_getwithvid. This interface was modified to
+ * call vnode_getiocount with VNODE_DRAINO, so it will happily get an IO count and not
+ * cause issues, but there is no guarantee that all or any file systems are doing that.
+ *
+ * vn_getpath_fsenter_with_parent can enter the file system safely since there is a known
+ * IO count on the directory vnode by calling build_path_with_parent.
+ */
+
+int
+vn_getpath_fsenter_with_parent(struct vnode *dvp, struct vnode *vp, char *pathbuf, int *len)
+{
+ return build_path_with_parent(vp, dvp, pathbuf, *len, len, 0, vfs_context_current());
+}
+
int
vn_getcdhash(struct vnode *vp, off_t offset, unsigned char *cdhash)
{
temp = MNT_DEFAULT_IOQUEUE_DEPTH;
mp->mnt_ioqueue_depth = temp;
- mp->mnt_ioscale = (mp->mnt_ioqueue_depth + (MNT_DEFAULT_IOQUEUE_DEPTH - 1)) / MNT_DEFAULT_IOQUEUE_DEPTH;
+ mp->mnt_ioscale = MNT_IOSCALE(mp->mnt_ioqueue_depth);
if (mp->mnt_ioscale > 1)
printf("ioqueue_depth = %d, ioscale = %d\n", (int)mp->mnt_ioqueue_depth, (int)mp->mnt_ioscale);
*/
if ((cs_info.flags & DK_CORESTORAGE_PIN_YOUR_METADATA))
mp->mnt_ioflags |= MNT_IOFLAGS_FUSION_DRIVE;
+ } else {
+ /* Check for APFS Fusion */
+ dk_apfs_flavour_t flavour;
+ if ((VNOP_IOCTL(devvp, DKIOCGETAPFSFLAVOUR, (caddr_t)&flavour, 0, ctx) == 0) &&
+ (flavour == DK_APFS_FUSION)) {
+ mp->mnt_ioflags |= MNT_IOFLAGS_FUSION_DRIVE;
+ }
}
#if CONFIG_IOSCHED
if (space < req->oldlen)
return (ENOMEM);
- MALLOC(fsidlst, fsid_t *, req->oldlen, M_TEMP, M_WAITOK);
+ MALLOC(fsidlst, fsid_t *, req->oldlen, M_TEMP, M_WAITOK | M_ZERO);
if (fsidlst == NULL) {
return (ENOMEM);
}
sfs.f_owner = sp->f_owner;
#ifdef NFSCLIENT
if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
- strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
+ strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
} else
#endif
{
sfs.f_fsid = sp->f_fsid;
sfs.f_owner = sp->f_owner;
-#ifdef NFS_CLIENT
+#ifdef NFSCLIENT
if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
- strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
+ strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
} else
#endif
{
return (error);
}
-static int filt_fsattach(struct knote *kn);
+static int filt_fsattach(struct knote *kn, struct kevent_internal_s *kev);
static void filt_fsdetach(struct knote *kn);
static int filt_fsevent(struct knote *kn, long hint);
static int filt_fstouch(struct knote *kn, struct kevent_internal_s *kev);
static int filt_fsprocess(struct knote *kn, struct filt_process_s *data, struct kevent_internal_s *kev);
-struct filterops fs_filtops = {
- .f_attach = filt_fsattach,
- .f_detach = filt_fsdetach,
- .f_event = filt_fsevent,
+SECURITY_READ_ONLY_EARLY(struct filterops) fs_filtops = {
+ .f_attach = filt_fsattach,
+ .f_detach = filt_fsdetach,
+ .f_event = filt_fsevent,
.f_touch = filt_fstouch,
.f_process = filt_fsprocess,
};
static int
-filt_fsattach(struct knote *kn)
+filt_fsattach(struct knote *kn, __unused struct kevent_internal_s *kev)
{
lck_mtx_lock(fs_klist_lock);
KNOTE_ATTACH(&fs_klist, kn);
lck_mtx_lock(fs_klist_lock);
kn->kn_sfflags = kev->fflags;
- if ((kn->kn_status & KN_UDATA_SPECIFIC) == 0)
- kn->kn_udata = kev->udata;
/*
* the above filter function sets bits even if nobody is looking for them.
{
int *name, namelen;
struct vfstable *vfsp;
- struct vfsconf vfsc;
+ struct vfsconf vfsc = {};
(void)oidp;
name = arg1;
SYSCTL_INT(_vfs_generic, VFS_MAXTYPENUM, maxtypenum,
CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED,
&maxvfstypenum, 0, "");
-SYSCTL_INT(_vfs_generic, OID_AUTO, sync_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &sync_timeout, 0, "");
+SYSCTL_INT(_vfs_generic, OID_AUTO, sync_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &sync_timeout_seconds, 0, "");
SYSCTL_NODE(_vfs_generic, VFS_CONF, conf,
CTLFLAG_RD | CTLFLAG_LOCKED,
sysctl_vfs_generic_conf, "");
+/* Indicate that the root file system unmounted cleanly */
+static int vfs_root_unmounted_cleanly = 0;
+SYSCTL_INT(_vfs_generic, OID_AUTO, root_unmounted_cleanly, CTLFLAG_RD, &vfs_root_unmounted_cleanly, 0, "Root filesystem was unmounted cleanly");
+
+void
+vfs_set_root_unmounted_cleanly(void)
+{
+ vfs_root_unmounted_cleanly = 1;
+}
+
/*
* Print vnode state.
*/
ut = get_bsdthread_info(current_thread());
if ((current_proc()->p_lflag & P_LRAGE_VNODES) ||
- (ut->uu_flag & UT_RAGE_VNODES)) {
+ (ut->uu_flag & (UT_RAGE_VNODES | UT_KERN_RAGE_VNODES))) {
/*
* process has indicated that it wants any
* vnodes created on its behalf to be rapidly
* aged to reduce the impact on the cached set
* of vnodes
+ *
+ * if UT_KERN_RAGE_VNODES is set, then the
+ * kernel internally wants vnodes to be rapidly
+ * aged, even if the process hasn't requested
+ * this
*/
vp->v_flag |= VRAGE;
}
if (!batched) {
*vpp = (vnode_t) 0;
vnode_put(vp);
+ vp = NULLVP;
}
}
+ /*
+ * For creation VNOPs, this is the equivalent of
+ * lookup_handle_found_vnode.
+ */
+ if (kdebug_enable && *vpp)
+ kdebug_lookup(*vpp, cnp);
+
out:
vn_attribute_cleanup(vap, defaulted);
static kauth_scope_t vnode_scope;
static int vnode_authorize_callback(kauth_cred_t credential, void *idata, kauth_action_t action,
uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3);
-static int vnode_authorize_callback_int(__unused kauth_cred_t credential, __unused void *idata, kauth_action_t action,
- uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3);
+static int vnode_authorize_callback_int(kauth_action_t action, vfs_context_t ctx,
+ vnode_t vp, vnode_t dvp, int *errorp);
typedef struct _vnode_authorize_context {
vnode_t vp;
#define _VAC_IN_GROUP (1<<1)
#define _VAC_IS_DIR_OWNER (1<<2)
#define _VAC_IN_DIR_GROUP (1<<3)
+#define _VAC_NO_VNODE_POINTERS (1<<4)
} *vauth_ctx;
void
struct vnode *tdvp, struct vnode *tvp, struct componentname *tcnp,
vfs_context_t ctx, vfs_rename_flags_t flags, void *reserved)
{
+
+ return vn_authorize_renamex_with_paths(fdvp, fvp, fcnp, NULL, tdvp, tvp, tcnp, NULL, ctx, flags, reserved);
+}
+
+int
+vn_authorize_renamex_with_paths(struct vnode *fdvp, struct vnode *fvp, struct componentname *fcnp, const char *from_path,
+ struct vnode *tdvp, struct vnode *tvp, struct componentname *tcnp, const char *to_path,
+ vfs_context_t ctx, vfs_rename_flags_t flags, void *reserved)
+{
int error = 0;
int moving = 0;
bool swap = flags & VFS_RENAME_SWAP;
/***** <Kauth> *****/
+ /*
+ * As part of the Kauth step, we call out to allow 3rd-party
+ * fileop notification of "about to rename". This is needed
+ * in the event that 3rd-parties need to know that the DELETE
+ * authorization is actually part of a rename. It's important
+ * that we guarantee that the DELETE call-out will always be
+ * made if the WILL_RENAME call-out is made. Another fileop
+ * call-out will be performed once the operation is completed.
+ * We can ignore the result of kauth_authorize_fileop().
+ *
+ * N.B. We are passing the vnode and *both* paths to each
+ * call; kauth_authorize_fileop() extracts the "from" path
+ * when posting a KAUTH_FILEOP_WILL_RENAME notification.
+ * As such, we only post these notifications if all of the
+ * information we need is provided.
+ */
+
if (swap) {
kauth_action_t f = 0, t = 0;
if (vnode_isdir(tvp))
t = KAUTH_VNODE_ADD_SUBDIRECTORY;
}
+ if (to_path != NULL)
+ kauth_authorize_fileop(vfs_context_ucred(ctx),
+ KAUTH_FILEOP_WILL_RENAME,
+ (uintptr_t)fvp,
+ (uintptr_t)to_path);
error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE | f, ctx);
if (error)
goto out;
+ if (from_path != NULL)
+ kauth_authorize_fileop(vfs_context_ucred(ctx),
+ KAUTH_FILEOP_WILL_RENAME,
+ (uintptr_t)tvp,
+ (uintptr_t)from_path);
error = vnode_authorize(tvp, tdvp, KAUTH_VNODE_DELETE | t, ctx);
if (error)
goto out;
* If fvp is a directory, and we are changing it's parent,
* then we also need rights to rewrite its ".." entry as well.
*/
+ if (to_path != NULL)
+ kauth_authorize_fileop(vfs_context_ucred(ctx),
+ KAUTH_FILEOP_WILL_RENAME,
+ (uintptr_t)fvp,
+ (uintptr_t)to_path);
if (vnode_isdir(fvp)) {
if ((error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE | KAUTH_VNODE_ADD_SUBDIRECTORY, ctx)) != 0)
goto out;
return vnode_authorize(vp, dvp, KAUTH_VNODE_DELETE, ctx);
}
+/*
+ * Authorizer for directory cloning. This does not use vnodes but instead
+ * uses prefilled vnode attributes from the filesystem.
+ *
+ * The same function is called to set up the attributes required, perform the
+ * authorization and cleanup (if required)
+ */
+int
+vnode_attr_authorize_dir_clone(struct vnode_attr *vap, kauth_action_t action,
+ struct vnode_attr *dvap, __unused vnode_t sdvp, mount_t mp,
+ dir_clone_authorizer_op_t vattr_op, uint32_t flags, vfs_context_t ctx,
+ __unused void *reserved)
+{
+ int error;
+ int is_suser = vfs_context_issuser(ctx);
+
+ if (vattr_op == OP_VATTR_SETUP) {
+ VATTR_INIT(vap);
+
+ /*
+ * When ACL inheritence is implemented, both vap->va_acl and
+ * dvap->va_acl will be required (even as superuser).
+ */
+ VATTR_WANTED(vap, va_type);
+ VATTR_WANTED(vap, va_mode);
+ VATTR_WANTED(vap, va_flags);
+ VATTR_WANTED(vap, va_uid);
+ VATTR_WANTED(vap, va_gid);
+ if (dvap) {
+ VATTR_INIT(dvap);
+ VATTR_WANTED(dvap, va_flags);
+ }
+
+ if (!is_suser) {
+ /*
+ * If not superuser, we have to evaluate ACLs and
+ * need the target directory gid to set the initial
+ * gid of the new object.
+ */
+ VATTR_WANTED(vap, va_acl);
+ if (dvap)
+ VATTR_WANTED(dvap, va_gid);
+ } else if (dvap && (flags & VNODE_CLONEFILE_NOOWNERCOPY)) {
+ VATTR_WANTED(dvap, va_gid);
+ }
+ return (0);
+ } else if (vattr_op == OP_VATTR_CLEANUP) {
+ return (0); /* Nothing to do for now */
+ }
+
+ /* dvap isn't used for authorization */
+ error = vnode_attr_authorize(vap, NULL, mp, action, ctx);
+
+ if (error)
+ return (error);
+
+ /*
+ * vn_attribute_prepare should be able to accept attributes as well as
+ * vnodes but for now we do this inline.
+ */
+ if (!is_suser || (flags & VNODE_CLONEFILE_NOOWNERCOPY)) {
+ /*
+ * If the filesystem is mounted IGNORE_OWNERSHIP and an explicit
+ * owner is set, that owner takes ownership of all new files.
+ */
+ if ((mp->mnt_flag & MNT_IGNORE_OWNERSHIP) &&
+ (mp->mnt_fsowner != KAUTH_UID_NONE)) {
+ VATTR_SET(vap, va_uid, mp->mnt_fsowner);
+ } else {
+ /* default owner is current user */
+ VATTR_SET(vap, va_uid,
+ kauth_cred_getuid(vfs_context_ucred(ctx)));
+ }
+
+ if ((mp->mnt_flag & MNT_IGNORE_OWNERSHIP) &&
+ (mp->mnt_fsgroup != KAUTH_GID_NONE)) {
+ VATTR_SET(vap, va_gid, mp->mnt_fsgroup);
+ } else {
+ /*
+ * default group comes from parent object,
+ * fallback to current user
+ */
+ if (VATTR_IS_SUPPORTED(dvap, va_gid)) {
+ VATTR_SET(vap, va_gid, dvap->va_gid);
+ } else {
+ VATTR_SET(vap, va_gid,
+ kauth_cred_getgid(vfs_context_ucred(ctx)));
+ }
+ }
+ }
+
+ /* Inherit SF_RESTRICTED bit from destination directory only */
+ if (VATTR_IS_ACTIVE(vap, va_flags)) {
+ VATTR_SET(vap, va_flags,
+ ((vap->va_flags & ~(UF_DATAVAULT | SF_RESTRICTED)))); /* Turn off from source */
+ if (VATTR_IS_ACTIVE(dvap, va_flags))
+ VATTR_SET(vap, va_flags,
+ vap->va_flags | (dvap->va_flags & (UF_DATAVAULT | SF_RESTRICTED)));
+ } else if (VATTR_IS_ACTIVE(dvap, va_flags)) {
+ VATTR_SET(vap, va_flags, (dvap->va_flags & (UF_DATAVAULT | SF_RESTRICTED)));
+ }
+
+ return (0);
+}
+
+
/*
* Authorize an operation on a vnode.
*
* Check for file immutability.
*/
static int
-vnode_authorize_checkimmutable(vnode_t vp, struct vnode_attr *vap, int rights, int ignore)
+vnode_authorize_checkimmutable(mount_t mp, struct vnode_attr *vap, int rights, int ignore)
{
- mount_t mp;
int error;
int append;
*
* Sockets, fifos and devices require special handling.
*/
- switch(vp->v_type) {
+ switch(vap->va_type) {
case VSOCK:
case VFIFO:
case VBLK:
if (rights & KAUTH_VNODE_WRITE_RIGHTS) {
/* check per-filesystem options if possible */
- mp = vp->v_mount;
if (mp != NULL) {
/* check for no-EA filesystems */
* allowable for a UF_APPEND file.
*/
append = 0;
- if (vp->v_type == VDIR) {
+ if (vap->va_type == VDIR) {
if ((rights & (KAUTH_VNODE_ADD_FILE | KAUTH_VNODE_ADD_SUBDIRECTORY | KAUTH_VNODE_WRITE_EXTATTRIBUTES)) == rights)
append = 1;
} else {
static int
-vnode_authorize_callback(kauth_cred_t cred, void *idata, kauth_action_t action,
- uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3)
+vnode_authorize_callback(__unused kauth_cred_t cred, __unused void *idata,
+ kauth_action_t action, uintptr_t arg0, uintptr_t arg1, uintptr_t arg2,
+ uintptr_t arg3)
{
vfs_context_t ctx;
vnode_t cvp = NULLVP;
goto out;
}
defer:
- result = vnode_authorize_callback_int(cred, idata, action, arg0, arg1, arg2, arg3);
+ result = vnode_authorize_callback_int(action, ctx, vp, dvp, (int *)arg3);
if (result == KAUTH_RESULT_ALLOW && cvp != NULLVP) {
KAUTH_DEBUG("%p - caching action = %x", cvp, action);
return result;
}
+static int
+vnode_attr_authorize_internal(vauth_ctx vcp, mount_t mp,
+ kauth_ace_rights_t rights, int is_suser, boolean_t *found_deny,
+ int noimmutable, int parent_authorized_for_delete_child)
+{
+ int result;
+
+ /*
+ * Check for immutability.
+ *
+ * In the deletion case, parent directory immutability vetoes specific
+ * file rights.
+ */
+ if ((result = vnode_authorize_checkimmutable(mp, vcp->vap, rights,
+ noimmutable)) != 0)
+ goto out;
+
+ if ((rights & KAUTH_VNODE_DELETE) &&
+ !parent_authorized_for_delete_child) {
+ result = vnode_authorize_checkimmutable(mp, vcp->dvap,
+ KAUTH_VNODE_DELETE_CHILD, 0);
+ if (result)
+ goto out;
+ }
+
+ /*
+ * Clear rights that have been authorized by reaching this point, bail if nothing left to
+ * check.
+ */
+ rights &= ~(KAUTH_VNODE_LINKTARGET | KAUTH_VNODE_CHECKIMMUTABLE);
+ if (rights == 0)
+ goto out;
+
+ /*
+ * If we're not the superuser, authorize based on file properties;
+ * note that even if parent_authorized_for_delete_child is TRUE, we
+ * need to check on the node itself.
+ */
+ if (!is_suser) {
+ /* process delete rights */
+ if ((rights & KAUTH_VNODE_DELETE) &&
+ ((result = vnode_authorize_delete(vcp, parent_authorized_for_delete_child)) != 0))
+ goto out;
+
+ /* process remaining rights */
+ if ((rights & ~KAUTH_VNODE_DELETE) &&
+ (result = vnode_authorize_simple(vcp, rights, rights & KAUTH_VNODE_DELETE, found_deny)) != 0)
+ goto out;
+ } else {
+ /*
+ * Execute is only granted to root if one of the x bits is set. This check only
+ * makes sense if the posix mode bits are actually supported.
+ */
+ if ((rights & KAUTH_VNODE_EXECUTE) &&
+ (vcp->vap->va_type == VREG) &&
+ VATTR_IS_SUPPORTED(vcp->vap, va_mode) &&
+ !(vcp->vap->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) {
+ result = EPERM;
+ KAUTH_DEBUG("%p DENIED - root execute requires at least one x bit in 0x%x", vp, va.va_mode);
+ goto out;
+ }
+
+ /* Assume that there were DENYs so we don't wrongly cache KAUTH_VNODE_SEARCHBYANYONE */
+ *found_deny = TRUE;
+
+ KAUTH_DEBUG("%p ALLOWED - caller is superuser", vp);
+ }
+out:
+ return (result);
+}
static int
-vnode_authorize_callback_int(__unused kauth_cred_t unused_cred, __unused void *idata, kauth_action_t action,
- uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3)
+vnode_authorize_callback_int(kauth_action_t action, vfs_context_t ctx,
+ vnode_t vp, vnode_t dvp, int *errorp)
{
struct _vnode_authorize_context auth_context;
vauth_ctx vcp;
- vfs_context_t ctx;
- vnode_t vp, dvp;
kauth_cred_t cred;
kauth_ace_rights_t rights;
struct vnode_attr va, dva;
int result;
- int *errorp;
int noimmutable;
boolean_t parent_authorized_for_delete_child = FALSE;
boolean_t found_deny = FALSE;
boolean_t is_suser = FALSE;
vcp = &auth_context;
- ctx = vcp->ctx = (vfs_context_t)arg0;
- vp = vcp->vp = (vnode_t)arg1;
- dvp = vcp->dvp = (vnode_t)arg2;
- errorp = (int *)arg3;
+ vcp->ctx = ctx;
+ vcp->vp = vp;
+ vcp->dvp = dvp;
/*
* Note that we authorize against the context, not the passed cred
* (the same thing anyway)
if (vnode_cache_is_authorized(dvp, ctx, KAUTH_VNODE_DELETE_CHILD) == TRUE)
parent_authorized_for_delete_child = TRUE;
} else {
- dvp = NULL;
+ vcp->dvp = NULLVP;
+ vcp->dvap = NULL;
}
/*
KAUTH_DEBUG("%p ERROR - failed to get vnode attributes - %d", vp, result);
goto out;
}
- if (dvp) {
+ VATTR_WANTED(&va, va_type);
+ VATTR_RETURN(&va, va_type, vnode_vtype(vp));
+
+ if (vcp->dvp) {
VATTR_WANTED(&dva, va_mode);
VATTR_WANTED(&dva, va_flags);
if (!is_suser) {
VATTR_WANTED(&dva, va_gid);
VATTR_WANTED(&dva, va_acl);
}
- if ((result = vnode_getattr(dvp, &dva, ctx)) != 0) {
+ if ((result = vnode_getattr(vcp->dvp, &dva, ctx)) != 0) {
KAUTH_DEBUG("%p ERROR - failed to get directory vnode attributes - %d", vp, result);
goto out;
}
+ VATTR_WANTED(&dva, va_type);
+ VATTR_RETURN(&dva, va_type, vnode_vtype(vcp->dvp));
}
- /*
- * Check for immutability.
- *
- * In the deletion case, parent directory immutability vetoes specific
- * file rights.
- */
- if ((result = vnode_authorize_checkimmutable(vp, &va, rights, noimmutable)) != 0)
- goto out;
- if ((rights & KAUTH_VNODE_DELETE) &&
- parent_authorized_for_delete_child == FALSE &&
- ((result = vnode_authorize_checkimmutable(dvp, &dva, KAUTH_VNODE_DELETE_CHILD, 0)) != 0))
- goto out;
-
- /*
- * Clear rights that have been authorized by reaching this point, bail if nothing left to
- * check.
- */
- rights &= ~(KAUTH_VNODE_LINKTARGET | KAUTH_VNODE_CHECKIMMUTABLE);
- if (rights == 0)
- goto out;
-
- /*
- * If we're not the superuser, authorize based on file properties;
- * note that even if parent_authorized_for_delete_child is TRUE, we
- * need to check on the node itself.
- */
- if (!is_suser) {
- /* process delete rights */
- if ((rights & KAUTH_VNODE_DELETE) &&
- ((result = vnode_authorize_delete(vcp, parent_authorized_for_delete_child)) != 0))
- goto out;
-
- /* process remaining rights */
- if ((rights & ~KAUTH_VNODE_DELETE) &&
- (result = vnode_authorize_simple(vcp, rights, rights & KAUTH_VNODE_DELETE, &found_deny)) != 0)
- goto out;
- } else {
-
- /*
- * Execute is only granted to root if one of the x bits is set. This check only
- * makes sense if the posix mode bits are actually supported.
- */
- if ((rights & KAUTH_VNODE_EXECUTE) &&
- (vp->v_type == VREG) &&
- VATTR_IS_SUPPORTED(&va, va_mode) &&
- !(va.va_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) {
- result = EPERM;
- KAUTH_DEBUG("%p DENIED - root execute requires at least one x bit in 0x%x", vp, va.va_mode);
- goto out;
- }
-
- /* Assume that there were DENYs so we don't wrongly cache KAUTH_VNODE_SEARCHBYANYONE */
- found_deny = TRUE;
-
- KAUTH_DEBUG("%p ALLOWED - caller is superuser", vp);
- }
+ result = vnode_attr_authorize_internal(vcp, vp->v_mount, rights, is_suser,
+ &found_deny, noimmutable, parent_authorized_for_delete_child);
out:
if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL))
kauth_acl_free(va.va_acl);
return(KAUTH_RESULT_ALLOW);
}
+int
+vnode_attr_authorize_init(struct vnode_attr *vap, struct vnode_attr *dvap,
+ kauth_action_t action, vfs_context_t ctx)
+{
+ VATTR_INIT(vap);
+ VATTR_WANTED(vap, va_type);
+ VATTR_WANTED(vap, va_mode);
+ VATTR_WANTED(vap, va_flags);
+ if (dvap) {
+ VATTR_INIT(dvap);
+ if (action & KAUTH_VNODE_DELETE) {
+ VATTR_WANTED(dvap, va_type);
+ VATTR_WANTED(dvap, va_mode);
+ VATTR_WANTED(dvap, va_flags);
+ }
+ } else if (action & KAUTH_VNODE_DELETE) {
+ return (EINVAL);
+ }
+
+ if (!vfs_context_issuser(ctx)) {
+ VATTR_WANTED(vap, va_uid);
+ VATTR_WANTED(vap, va_gid);
+ VATTR_WANTED(vap, va_acl);
+ if (dvap && (action & KAUTH_VNODE_DELETE)) {
+ VATTR_WANTED(dvap, va_uid);
+ VATTR_WANTED(dvap, va_gid);
+ VATTR_WANTED(dvap, va_acl);
+ }
+ }
+
+ return (0);
+}
+
+int
+vnode_attr_authorize(struct vnode_attr *vap, struct vnode_attr *dvap, mount_t mp,
+ kauth_action_t action, vfs_context_t ctx)
+{
+ struct _vnode_authorize_context auth_context;
+ vauth_ctx vcp;
+ kauth_ace_rights_t rights;
+ int noimmutable;
+ boolean_t found_deny;
+ boolean_t is_suser = FALSE;
+ int result = 0;
+
+ vcp = &auth_context;
+ vcp->ctx = ctx;
+ vcp->vp = NULLVP;
+ vcp->vap = vap;
+ vcp->dvp = NULLVP;
+ vcp->dvap = dvap;
+ vcp->flags = vcp->flags_valid = 0;
+
+ noimmutable = (action & KAUTH_VNODE_NOIMMUTABLE) ? 1 : 0;
+ rights = action & ~(KAUTH_VNODE_ACCESS | KAUTH_VNODE_NOIMMUTABLE);
+
+ /*
+ * Check for read-only filesystems.
+ */
+ if ((rights & KAUTH_VNODE_WRITE_RIGHTS) &&
+ mp && (mp->mnt_flag & MNT_RDONLY) &&
+ ((vap->va_type == VREG) || (vap->va_type == VDIR) ||
+ (vap->va_type == VLNK) || (rights & KAUTH_VNODE_DELETE) ||
+ (rights & KAUTH_VNODE_DELETE_CHILD))) {
+ result = EROFS;
+ goto out;
+ }
+
+ /*
+ * Check for noexec filesystems.
+ */
+ if ((rights & KAUTH_VNODE_EXECUTE) &&
+ (vap->va_type == VREG) && mp && (mp->mnt_flag & MNT_NOEXEC)) {
+ result = EACCES;
+ goto out;
+ }
+
+ if (vfs_context_issuser(ctx)) {
+ /*
+ * if we're not asking for execute permissions or modifications,
+ * then we're done, this action is authorized.
+ */
+ if (!(rights & (KAUTH_VNODE_EXECUTE | KAUTH_VNODE_WRITE_RIGHTS)))
+ goto out;
+ is_suser = TRUE;
+ } else {
+ if (!VATTR_IS_SUPPORTED(vap, va_uid) ||
+ !VATTR_IS_SUPPORTED(vap, va_gid) ||
+ (mp && vfs_extendedsecurity(mp) && !VATTR_IS_SUPPORTED(vap, va_acl))) {
+ panic("vnode attrs not complete for vnode_attr_authorize\n");
+ }
+ }
+
+ result = vnode_attr_authorize_internal(vcp, mp, rights, is_suser,
+ &found_deny, noimmutable, FALSE);
+
+ if (result == EPERM)
+ result = EACCES;
+out:
+ return (result);
+}
+
+
int
vnode_authattr_new(vnode_t dvp, struct vnode_attr *vap, int noauth, vfs_context_t ctx)
{
vnode_authattr_new_internal(vnode_t dvp, struct vnode_attr *vap, int noauth, uint32_t *defaulted_fieldsp, vfs_context_t ctx)
{
int error;
- int has_priv_suser, ismember, defaulted_owner, defaulted_group, defaulted_mode, inherit_restricted;
+ int has_priv_suser, ismember, defaulted_owner, defaulted_group, defaulted_mode;
+ uint32_t inherit_flags;
kauth_cred_t cred;
guid_t changer;
mount_t dmp;
defaulted_owner = defaulted_group = defaulted_mode = 0;
- inherit_restricted = 0;
+ inherit_flags = 0;
/*
* Require that the filesystem support extended security to apply any.
/* Determine if SF_RESTRICTED should be inherited from the parent
* directory. */
- if (VATTR_IS_SUPPORTED(&dva, va_flags) &&
- (dva.va_flags & SF_RESTRICTED)) {
- inherit_restricted = 1;
+ if (VATTR_IS_SUPPORTED(&dva, va_flags)) {
+ inherit_flags = dva.va_flags & (UF_DATAVAULT | SF_RESTRICTED);
}
/* default mode is everything, masked with current umask */
}
}
out:
- if (inherit_restricted) {
+ if (inherit_flags) {
/* Apply SF_RESTRICTED to the file if its parent directory was
* restricted. This is done at the end so that root is not
* required if this flag is only set due to inheritance. */
- VATTR_SET(vap, va_flags, (vap->va_flags | SF_RESTRICTED));
+ VATTR_SET(vap, va_flags, (vap->va_flags | inherit_flags));
}
if (defaulted_fieldsp) {
if (defaulted_mode) {
VATTR_IS_ACTIVE(vap, va_change_time) ||
VATTR_IS_ACTIVE(vap, va_modify_time) ||
VATTR_IS_ACTIVE(vap, va_access_time) ||
- VATTR_IS_ACTIVE(vap, va_backup_time)) {
+ VATTR_IS_ACTIVE(vap, va_backup_time) ||
+ VATTR_IS_ACTIVE(vap, va_addedtime)) {
VATTR_WANTED(&ova, va_uid);
#if 0 /* enable this when we support UUIDs as official owners */
VATTR_IS_ACTIVE(vap, va_change_time) ||
VATTR_IS_ACTIVE(vap, va_modify_time) ||
VATTR_IS_ACTIVE(vap, va_access_time) ||
- VATTR_IS_ACTIVE(vap, va_backup_time)) {
+ VATTR_IS_ACTIVE(vap, va_backup_time) ||
+ VATTR_IS_ACTIVE(vap, va_addedtime)) {
/*
* The owner and root may set any timestamps they like,
* provided that the file is not immutable. The owner still needs
mount_unlock(mp);
}
-
void
vnode_setswapmount(vnode_t vp)
{
return dst;
}
-extern int kdb_printf(const char *format, ...) __printflike(1,2);
-
#define SANE_VNODE_PRINT_LIMIT 5000
void panic_print_vnodes(void)
{
char *nm;
char vname[257];
- kdb_printf("\n***** VNODES *****\n"
+ paniclog_append_noflush("\n***** VNODES *****\n"
"TYPE UREF ICNT PATH\n");
/* NULL-terminate the path name */
TAILQ_FOREACH(mnt, &mountlist, mnt_list) {
if (!ml_validate_nofault((vm_offset_t)mnt, sizeof(mount_t))) {
- kdb_printf("Unable to iterate the mount list %p - encountered an invalid mount pointer %p \n",
+ paniclog_append_noflush("Unable to iterate the mount list %p - encountered an invalid mount pointer %p \n",
&mountlist, mnt);
break;
}
TAILQ_FOREACH(vp, &mnt->mnt_vnodelist, v_mntvnodes) {
if (!ml_validate_nofault((vm_offset_t)vp, sizeof(vnode_t))) {
- kdb_printf("Unable to iterate the vnode list %p - encountered an invalid vnode pointer %p \n",
+ paniclog_append_noflush("Unable to iterate the vnode list %p - encountered an invalid vnode pointer %p \n",
&mnt->mnt_vnodelist, vp);
break;
}
return;
type = __vtype(vp->v_type);
nm = __vpath(vp, vname, sizeof(vname)-1, 0);
- kdb_printf("%s %0d %0d %s\n",
+ paniclog_append_noflush("%s %0d %0d %s\n",
type, vp->v_usecount, vp->v_iocount, nm);
}
}
lck_mtx_unlock(&rp->vr_lock);
+#if CONFIG_MACF
+ int rv = mac_vnode_check_trigger_resolve(ctx, vp, &ndp->ni_cnd);
+ if (rv != 0)
+ return rv;
+#endif
+
/*
* XXX
* assumes that resolver will not access this trigger vnode (otherwise the kernel will deadlock)
/* vn_getpath() NUL-terminates, and len includes the NUL */
if (!rv) {
- kdebug_lookup_gen_events(ctx->path, len, vp, TRUE);
+ kdebug_vfs_lookup(ctx->path, len, vp,
+ KDBG_VFS_LOOKUP_FLAG_LOOKUP | KDBG_VFS_LOOKUP_FLAG_NOPROCFILT);
if (++(ctx->count) == 1000) {
thread_yield_to_preemption();