X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/8ad349bb6ed4a0be06e34c92be0d98b92e078db4..008676633c2ad2c325837c2b64915f7ded690a8f:/bsd/vfs/kpi_vfs.c diff --git a/bsd/vfs/kpi_vfs.c b/bsd/vfs/kpi_vfs.c index f4b956fb8..dd3560fed 100644 --- a/bsd/vfs/kpi_vfs.c +++ b/bsd/vfs/kpi_vfs.c @@ -1,31 +1,29 @@ /* - * Copyright (c) 2006 Apple Computer, Inc. All Rights Reserved. + * Copyright (c) 2000-2016 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * - * @APPLE_LICENSE_OSREFERENCE_HEADER_START@ + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the - * License may not be used to create, or enable the creation or - * redistribution of, unlawful or unlicensed copies of an Apple operating - * system, or to circumvent, violate, or enable the circumvention or - * violation of, any terms of an Apple operating system software license - * agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this - * file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and * limitations under the License. - * - * @APPLE_LICENSE_OSREFERENCE_HEADER_END@ + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ /* @@ -67,13 +65,17 @@ * * @(#)kpi_vfs.c */ +/* + * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce + * support for mandatory and extensible security protections. This notice + * is included in support of clause 2.2 (b) of the Apple Public License, + * Version 2.0. + */ /* * External virtual filesystem routines */ -#undef DIAGNOSTIC -#define DIAGNOSTIC 1 #include #include @@ -96,6 +98,7 @@ #include #include #include +#include #include #include #include @@ -103,11 +106,21 @@ #include #include +#include + +#include #include #include #include +#include + +#if CONFIG_MACF +#include +#endif + +#include #define ESUCCESS 0 #undef mount_t @@ -115,380 +128,285 @@ #define COMPAT_ONLY - -#define THREAD_SAFE_FS(VP) \ - ((VP)->v_unsafefs ? 0 : 1) - #define NATIVE_XATTR(VP) \ - ((VP)->v_mount ? (VP)->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR : 0) + ((VP)->v_mount ? (VP)->v_mount->mnt_kern_flag & MNTK_EXTENDED_ATTRS : 0) -static void xattrfile_remove(vnode_t dvp, const char * basename, vfs_context_t context, - int thread_safe, int force); -static void xattrfile_setattr(vnode_t dvp, const char * basename, struct vnode_attr * vap, - vfs_context_t context, int thread_safe); +#if CONFIG_APPLEDOUBLE +static void xattrfile_remove(vnode_t dvp, const char *basename, + vfs_context_t ctx, int force); +static void xattrfile_setattr(vnode_t dvp, const char * basename, + struct vnode_attr * vap, vfs_context_t ctx); +#endif /* CONFIG_APPLEDOUBLE */ +static errno_t post_rename(vnode_t fdvp, vnode_t fvp, vnode_t tdvp, vnode_t tvp); -static void +/* + * vnode_setneedinactive + * + * Description: Indicate that when the last iocount on this vnode goes away, + * and the usecount is also zero, we should inform the filesystem + * via VNOP_INACTIVE. + * + * Parameters: vnode_t vnode to mark + * + * Returns: Nothing + * + * Notes: Notably used when we're deleting a file--we need not have a + * usecount, so VNOP_INACTIVE may not get called by anyone. We + * want it called when we drop our iocount. + */ +void vnode_setneedinactive(vnode_t vp) { cache_purge(vp); - vnode_lock(vp); + vnode_lock_spin(vp); vp->v_lflag |= VL_NEEDINACTIVE; vnode_unlock(vp); } -int -lock_fsnode(vnode_t vp, int *funnel_state) -{ - if (funnel_state) - *funnel_state = thread_funnel_set(kernel_flock, TRUE); - - if (vp->v_unsafefs) { - if (vp->v_unsafefs->fsnodeowner == current_thread()) { - vp->v_unsafefs->fsnode_count++; - } else { - lck_mtx_lock(&vp->v_unsafefs->fsnodelock); - - if (vp->v_lflag & (VL_TERMWANT | VL_TERMINATE | VL_DEAD)) { - lck_mtx_unlock(&vp->v_unsafefs->fsnodelock); - - if (funnel_state) - (void) thread_funnel_set(kernel_flock, *funnel_state); - return (ENOENT); - } - vp->v_unsafefs->fsnodeowner = current_thread(); - vp->v_unsafefs->fsnode_count = 1; - } - } - return (0); -} - - -void -unlock_fsnode(vnode_t vp, int *funnel_state) -{ - if (vp->v_unsafefs) { - if (--vp->v_unsafefs->fsnode_count == 0) { - vp->v_unsafefs->fsnodeowner = NULL; - lck_mtx_unlock(&vp->v_unsafefs->fsnodelock); - } - } - if (funnel_state) - (void) thread_funnel_set(kernel_flock, *funnel_state); -} - - - /* ====================================================================== */ /* ************ EXTERNAL KERNEL APIS ********************************** */ /* ====================================================================== */ /* - * prototypes for exported VFS operations + * implementations of exported VFS operations */ int -VFS_MOUNT(struct mount * mp, vnode_t devvp, user_addr_t data, vfs_context_t context) +VFS_MOUNT(mount_t mp, vnode_t devvp, user_addr_t data, vfs_context_t ctx) { int error; - int thread_safe; - int funnel_state = 0; if ((mp == dead_mountp) || (mp->mnt_op->vfs_mount == 0)) return(ENOTSUP); - thread_safe = mp->mnt_vtable->vfc_threadsafe; - - - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } - - if (vfs_context_is64bit(context)) { + if (vfs_context_is64bit(ctx)) { if (vfs_64bitready(mp)) { - error = (*mp->mnt_op->vfs_mount)(mp, devvp, data, context); + error = (*mp->mnt_op->vfs_mount)(mp, devvp, data, ctx); } else { error = ENOTSUP; } } else { - error = (*mp->mnt_op->vfs_mount)(mp, devvp, data, context); + error = (*mp->mnt_op->vfs_mount)(mp, devvp, data, ctx); } - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } return (error); } int -VFS_START(struct mount * mp, int flags, vfs_context_t context) +VFS_START(mount_t mp, int flags, vfs_context_t ctx) { int error; - int thread_safe; - int funnel_state = 0; if ((mp == dead_mountp) || (mp->mnt_op->vfs_start == 0)) return(ENOTSUP); - thread_safe = mp->mnt_vtable->vfc_threadsafe; + error = (*mp->mnt_op->vfs_start)(mp, flags, ctx); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } - error = (*mp->mnt_op->vfs_start)(mp, flags, context); - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } return (error); } int -VFS_UNMOUNT(struct mount *mp, int flags, vfs_context_t context) +VFS_UNMOUNT(mount_t mp, int flags, vfs_context_t ctx) { int error; - int thread_safe; - int funnel_state = 0; if ((mp == dead_mountp) || (mp->mnt_op->vfs_unmount == 0)) return(ENOTSUP); - thread_safe = mp->mnt_vtable->vfc_threadsafe; + error = (*mp->mnt_op->vfs_unmount)(mp, flags, ctx); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } - error = (*mp->mnt_op->vfs_unmount)(mp, flags, context); - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } return (error); } +/* + * Returns: 0 Success + * ENOTSUP Not supported + * :ENOENT + * :??? + * + * Note: The return codes from the underlying VFS's root routine can't + * be fully enumerated here, since third party VFS authors may not + * limit their error returns to the ones documented here, even + * though this may result in some programs functioning incorrectly. + * + * The return codes documented above are those which may currently + * be returned by HFS from hfs_vfs_root, which is a simple wrapper + * for a call to hfs_vget on the volume mount point, not including + * additional error codes which may be propagated from underlying + * routines called by hfs_vget. + */ int -VFS_ROOT(struct mount * mp, struct vnode ** vpp, vfs_context_t context) +VFS_ROOT(mount_t mp, struct vnode ** vpp, vfs_context_t ctx) { int error; - int thread_safe; - int funnel_state = 0; - struct vfs_context acontext; if ((mp == dead_mountp) || (mp->mnt_op->vfs_root == 0)) return(ENOTSUP); - if (context == NULL) { - acontext.vc_proc = current_proc(); - acontext.vc_ucred = kauth_cred_get(); - context = &acontext; + if (ctx == NULL) { + ctx = vfs_context_current(); } - thread_safe = mp->mnt_vtable->vfc_threadsafe; - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } - error = (*mp->mnt_op->vfs_root)(mp, vpp, context); - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } + error = (*mp->mnt_op->vfs_root)(mp, vpp, ctx); + return (error); } int -VFS_QUOTACTL(struct mount *mp, int cmd, uid_t uid, caddr_t datap, vfs_context_t context) +VFS_QUOTACTL(mount_t mp, int cmd, uid_t uid, caddr_t datap, vfs_context_t ctx) { int error; - int thread_safe; - int funnel_state = 0; if ((mp == dead_mountp) || (mp->mnt_op->vfs_quotactl == 0)) return(ENOTSUP); - thread_safe = mp->mnt_vtable->vfc_threadsafe; + error = (*mp->mnt_op->vfs_quotactl)(mp, cmd, uid, datap, ctx); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } - error = (*mp->mnt_op->vfs_quotactl)(mp, cmd, uid, datap, context); - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } return (error); } int -VFS_GETATTR(struct mount *mp, struct vfs_attr *vfa, vfs_context_t context) +VFS_GETATTR(mount_t mp, struct vfs_attr *vfa, vfs_context_t ctx) { int error; - int thread_safe; - int funnel_state = 0; - struct vfs_context acontext; if ((mp == dead_mountp) || (mp->mnt_op->vfs_getattr == 0)) return(ENOTSUP); - if (context == NULL) { - acontext.vc_proc = current_proc(); - acontext.vc_ucred = kauth_cred_get(); - context = &acontext; + if (ctx == NULL) { + ctx = vfs_context_current(); } - thread_safe = mp->mnt_vtable->vfc_threadsafe; + + error = (*mp->mnt_op->vfs_getattr)(mp, vfa, ctx); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } - error = (*mp->mnt_op->vfs_getattr)(mp, vfa, context); - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } return(error); } int -VFS_SETATTR(struct mount *mp, struct vfs_attr *vfa, vfs_context_t context) +VFS_SETATTR(mount_t mp, struct vfs_attr *vfa, vfs_context_t ctx) { int error; - int thread_safe; - int funnel_state = 0; - struct vfs_context acontext; if ((mp == dead_mountp) || (mp->mnt_op->vfs_setattr == 0)) return(ENOTSUP); - if (context == NULL) { - acontext.vc_proc = current_proc(); - acontext.vc_ucred = kauth_cred_get(); - context = &acontext; - } - thread_safe = mp->mnt_vtable->vfc_threadsafe; - - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } - error = (*mp->mnt_op->vfs_setattr)(mp, vfa, context); - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); + if (ctx == NULL) { + ctx = vfs_context_current(); } + + error = (*mp->mnt_op->vfs_setattr)(mp, vfa, ctx); + return(error); } int -VFS_SYNC(struct mount *mp, int flags, vfs_context_t context) +VFS_SYNC(mount_t mp, int flags, vfs_context_t ctx) { int error; - int thread_safe; - int funnel_state = 0; - struct vfs_context acontext; if ((mp == dead_mountp) || (mp->mnt_op->vfs_sync == 0)) return(ENOTSUP); - if (context == NULL) { - acontext.vc_proc = current_proc(); - acontext.vc_ucred = kauth_cred_get(); - context = &acontext; + if (ctx == NULL) { + ctx = vfs_context_current(); } - thread_safe = mp->mnt_vtable->vfc_threadsafe; - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } - error = (*mp->mnt_op->vfs_sync)(mp, flags, context); - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } + error = (*mp->mnt_op->vfs_sync)(mp, flags, ctx); + return(error); } int -VFS_VGET(struct mount * mp, ino64_t ino, struct vnode **vpp, vfs_context_t context) +VFS_VGET(mount_t mp, ino64_t ino, struct vnode **vpp, vfs_context_t ctx) { int error; - int thread_safe; - int funnel_state = 0; - struct vfs_context acontext; if ((mp == dead_mountp) || (mp->mnt_op->vfs_vget == 0)) return(ENOTSUP); - if (context == NULL) { - acontext.vc_proc = current_proc(); - acontext.vc_ucred = kauth_cred_get(); - context = &acontext; + if (ctx == NULL) { + ctx = vfs_context_current(); } - thread_safe = mp->mnt_vtable->vfc_threadsafe; - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } - error = (*mp->mnt_op->vfs_vget)(mp, ino, vpp, context); - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } + error = (*mp->mnt_op->vfs_vget)(mp, ino, vpp, ctx); + return(error); } int -VFS_FHTOVP(struct mount * mp, int fhlen, unsigned char * fhp, vnode_t * vpp, vfs_context_t context) +VFS_FHTOVP(mount_t mp, int fhlen, unsigned char *fhp, vnode_t *vpp, vfs_context_t ctx) { int error; - int thread_safe; - int funnel_state = 0; - struct vfs_context acontext; if ((mp == dead_mountp) || (mp->mnt_op->vfs_fhtovp == 0)) return(ENOTSUP); - if (context == NULL) { - acontext.vc_proc = current_proc(); - acontext.vc_ucred = kauth_cred_get(); - context = &acontext; + if (ctx == NULL) { + ctx = vfs_context_current(); } - thread_safe = mp->mnt_vtable->vfc_threadsafe; - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } - error = (*mp->mnt_op->vfs_fhtovp)(mp, fhlen, fhp, vpp, context); - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } + error = (*mp->mnt_op->vfs_fhtovp)(mp, fhlen, fhp, vpp, ctx); + return(error); } int -VFS_VPTOFH(struct vnode * vp, int *fhlenp, unsigned char * fhp, vfs_context_t context) +VFS_VPTOFH(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t ctx) { int error; - int thread_safe; - int funnel_state = 0; - struct vfs_context acontext; if ((vp->v_mount == dead_mountp) || (vp->v_mount->mnt_op->vfs_vptofh == 0)) return(ENOTSUP); - if (context == NULL) { - acontext.vc_proc = current_proc(); - acontext.vc_ucred = kauth_cred_get(); - context = &acontext; + if (ctx == NULL) { + ctx = vfs_context_current(); } - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } - error = (*vp->v_mount->mnt_op->vfs_vptofh)(vp, fhlenp, fhp, context); - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } + error = (*vp->v_mount->mnt_op->vfs_vptofh)(vp, fhlenp, fhp, ctx); + return(error); } +int VFS_IOCTL(struct mount *mp, u_long command, caddr_t data, + int flags, vfs_context_t context) +{ + if (mp == dead_mountp || !mp->mnt_op->vfs_ioctl) + return ENOTSUP; + + return mp->mnt_op->vfs_ioctl(mp, command, data, flags, + context ?: vfs_context_current()); +} + +int +VFS_VGET_SNAPDIR(mount_t mp, vnode_t *vpp, vfs_context_t ctx) +{ + int error; + + if ((mp == dead_mountp) || (mp->mnt_op->vfs_vget_snapdir == 0)) + return(ENOTSUP); + + if (ctx == NULL) + ctx = vfs_context_current(); + + error = (*mp->mnt_op->vfs_vget_snapdir)(mp, vpp, ctx); + + return (error); +} + +/* returns the cached throttle mask for the mount_t */ +uint64_t +vfs_throttle_mask(mount_t mp) +{ + return(mp->mnt_throttle_mask); +} /* returns a copy of vfs type name for the mount_t */ void -vfs_name(mount_t mp, char * buffer) +vfs_name(mount_t mp, char *buffer) { strncpy(buffer, mp->mnt_vtable->vfc_name, MFSNAMELEN); } @@ -500,6 +418,12 @@ vfs_typenum(mount_t mp) return(mp->mnt_vtable->vfc_typenum); } +/* Safe to cast to "struct label*"; returns "void*" to limit dependence of mount.h on security headers. */ +void* +vfs_mntlabel(mount_t mp) +{ + return (void*)mp->mnt_mntlabel; +} /* returns command modifier flags of mount_t ie. MNT_CMDFLAGS */ uint64_t @@ -514,7 +438,9 @@ vfs_setflags(mount_t mp, uint64_t flags) { uint32_t lflags = (uint32_t)(flags & (MNT_CMDFLAGS | MNT_VISFLAGMASK)); + mount_lock(mp); mp->mnt_flag |= lflags; + mount_unlock(mp); } /* clear any of the command modifier flags(MNT_CMDFLAGS) in mount_t */ @@ -523,7 +449,9 @@ vfs_clearflags(mount_t mp , uint64_t flags) { uint32_t lflags = (uint32_t)(flags & (MNT_CMDFLAGS | MNT_VISFLAGMASK)); + mount_lock(mp); mp->mnt_flag &= ~lflags; + mount_unlock(mp); } /* Is the mount_t ronly and upgrade read/write requested? */ @@ -571,25 +499,67 @@ vfs_isreload(mount_t mp) return ((mp->mnt_flag & MNT_UPDATE) && (mp->mnt_flag & MNT_RELOAD)); } -/* Is mount_t marked for reload (ie MNT_FORCE) */ +/* Is mount_t marked for forced unmount (ie MNT_FORCE or MNTK_FRCUNMOUNT) */ int vfs_isforce(mount_t mp) { - if ((mp->mnt_flag & MNT_FORCE) || (mp->mnt_kern_flag & MNTK_FRCUNMOUNT)) + if (mp->mnt_lflag & MNT_LFORCE) return(1); else return(0); } +int +vfs_isunmount(mount_t mp) +{ + if ((mp->mnt_lflag & MNT_LUNMOUNT)) { + return 1; + } else { + return 0; + } +} + int vfs_64bitready(mount_t mp) { - if ((mp->mnt_vtable->vfc_64bitready)) + if ((mp->mnt_vtable->vfc_vfsflags & VFC_VFS64BITREADY)) return(1); else return(0); } + +int +vfs_authcache_ttl(mount_t mp) +{ + if ( (mp->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL)) ) + return (mp->mnt_authcache_ttl); + else + return (CACHED_RIGHT_INFINITE_TTL); +} + +void +vfs_setauthcache_ttl(mount_t mp, int ttl) +{ + mount_lock(mp); + mp->mnt_kern_flag |= MNTK_AUTH_CACHE_TTL; + mp->mnt_authcache_ttl = ttl; + mount_unlock(mp); +} + +void +vfs_clearauthcache_ttl(mount_t mp) +{ + mount_lock(mp); + mp->mnt_kern_flag &= ~MNTK_AUTH_CACHE_TTL; + /* + * back to the default TTL value in case + * MNTK_AUTH_OPAQUE is set on this mount + */ + mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL; + mount_unlock(mp); +} + int vfs_authopaque(mount_t mp) { @@ -656,6 +626,22 @@ vfs_clearextendedsecurity(mount_t mp) mount_unlock(mp); } +void +vfs_setnoswap(mount_t mp) +{ + mount_lock(mp); + mp->mnt_kern_flag |= MNTK_NOSWAP; + mount_unlock(mp); +} + +void +vfs_clearnoswap(mount_t mp) +{ + mount_lock(mp); + mp->mnt_kern_flag &= ~MNTK_NOSWAP; + mount_unlock(mp); +} + int vfs_extendedsecurity(mount_t mp) { @@ -687,7 +673,6 @@ int vfs_getattr(mount_t mp, struct vfs_attr *vfa, vfs_context_t ctx) { int error; - char *vname; if ((error = VFS_GETATTR(mp, vfa, ctx)) != 0) return(error); @@ -732,10 +717,17 @@ vfs_fsprivate(mount_t mp) void vfs_setfsprivate(mount_t mp, void *mntdata) { + mount_lock(mp); mp->mnt_data = mntdata; + mount_unlock(mp); } - +/* query whether the mount point supports native EAs */ +int +vfs_nativexattrs(mount_t mp) { + return (mp->mnt_kern_flag & MNTK_EXTENDED_ATTRS); +} + /* * return the block size of the underlying * device associated with mount_t @@ -746,6 +738,40 @@ vfs_devblocksize(mount_t mp) { return(mp->mnt_devblocksize); } +/* + * Returns vnode with an iocount that must be released with vnode_put() + */ +vnode_t +vfs_vnodecovered(mount_t mp) +{ + vnode_t vp = mp->mnt_vnodecovered; + if ((vp == NULL) || (vnode_getwithref(vp) != 0)) { + return NULL; + } else { + return vp; + } +} + +/* + * Returns device vnode backing a mountpoint with an iocount (if valid vnode exists). + * The iocount must be released with vnode_put(). Note that this KPI is subtle + * with respect to the validity of using this device vnode for anything substantial + * (which is discouraged). If commands are sent to the device driver without + * taking proper steps to ensure that the device is still open, chaos may ensue. + * Similarly, this routine should only be called if there is some guarantee that + * the mount itself is still valid. + */ +vnode_t +vfs_devvp(mount_t mp) +{ + vnode_t vp = mp->mnt_devvp; + + if ((vp != NULLVP) && (vnode_get(vp) == 0)) { + return vp; + } + + return NULLVP; +} /* * return the io attributes associated with mount_t @@ -753,26 +779,29 @@ vfs_devblocksize(mount_t mp) { void vfs_ioattr(mount_t mp, struct vfsioattr *ioattrp) { - if (mp == NULL) { - ioattrp->io_maxreadcnt = MAXPHYS; + ioattrp->io_reserved[0] = NULL; + ioattrp->io_reserved[1] = NULL; + if (mp == NULL) { + ioattrp->io_maxreadcnt = MAXPHYS; ioattrp->io_maxwritecnt = MAXPHYS; ioattrp->io_segreadcnt = 32; ioattrp->io_segwritecnt = 32; ioattrp->io_maxsegreadsize = MAXPHYS; ioattrp->io_maxsegwritesize = MAXPHYS; ioattrp->io_devblocksize = DEV_BSIZE; + ioattrp->io_flags = 0; + ioattrp->io_max_swappin_available = 0; } else { - ioattrp->io_maxreadcnt = mp->mnt_maxreadcnt; + ioattrp->io_maxreadcnt = mp->mnt_maxreadcnt; ioattrp->io_maxwritecnt = mp->mnt_maxwritecnt; ioattrp->io_segreadcnt = mp->mnt_segreadcnt; ioattrp->io_segwritecnt = mp->mnt_segwritecnt; ioattrp->io_maxsegreadsize = mp->mnt_maxsegreadsize; ioattrp->io_maxsegwritesize = mp->mnt_maxsegwritesize; ioattrp->io_devblocksize = mp->mnt_devblocksize; + ioattrp->io_flags = mp->mnt_ioflags; + ioattrp->io_max_swappin_available = mp->mnt_max_swappin_available; } - ioattrp->io_reserved[0] = 0; - ioattrp->io_reserved[1] = 0; - ioattrp->io_reserved[2] = 0; } @@ -791,6 +820,8 @@ vfs_setioattr(mount_t mp, struct vfsioattr * ioattrp) mp->mnt_maxsegreadsize = ioattrp->io_maxsegreadsize; mp->mnt_maxsegwritesize = ioattrp->io_maxsegwritesize; mp->mnt_devblocksize = ioattrp->io_devblocksize; + mp->mnt_ioflags = ioattrp->io_flags; + mp->mnt_max_swappin_available = ioattrp->io_max_swappin_available; } /* @@ -802,9 +833,8 @@ vfs_setioattr(mount_t mp, struct vfsioattr * ioattrp) typedef int (*PFI)(void *); extern int vfs_opv_numops; errno_t -vfs_fsadd(struct vfs_fsentry *vfe, vfstable_t * handle) +vfs_fsadd(struct vfs_fsentry *vfe, vfstable_t *handle) { -#pragma unused(data) struct vfstable *newvfstbl = NULL; int i,j; int (***opv_desc_vector_p)(void *); @@ -823,10 +853,14 @@ vfs_fsadd(struct vfs_fsentry *vfe, vfstable_t * handle) return(EINVAL); desccount = vfe->vfe_vopcnt; - if ((desccount <=0) || ((desccount > 5)) || (vfe->vfe_vfsops == (struct vfsops *)NULL) + if ((desccount <=0) || ((desccount > 8)) || (vfe->vfe_vfsops == (struct vfsops *)NULL) || (vfe->vfe_opvdescs == (struct vnodeopv_desc **)NULL)) return(EINVAL); + /* Non-threadsafe filesystems are not supported */ + if ((vfe->vfe_flags & (VFS_TBLTHREADSAFE | VFS_TBLFSNODELOCK)) == 0) { + return (EINVAL); + } MALLOC(newvfstbl, void *, sizeof(struct vfstable), M_TEMP, M_WAITOK); @@ -834,7 +868,7 @@ vfs_fsadd(struct vfs_fsentry *vfe, vfstable_t * handle) newvfstbl->vfc_vfsops = vfe->vfe_vfsops; strncpy(&newvfstbl->vfc_name[0], vfe->vfe_fsname, MFSNAMELEN); if ((vfe->vfe_flags & VFS_TBLNOTYPENUM)) - newvfstbl->vfc_typenum = maxvfsconf++; + newvfstbl->vfc_typenum = maxvfstypenum++; else newvfstbl->vfc_typenum = vfe->vfe_fstypenum; @@ -842,21 +876,34 @@ vfs_fsadd(struct vfs_fsentry *vfe, vfstable_t * handle) newvfstbl->vfc_flags = 0; newvfstbl->vfc_mountroot = NULL; newvfstbl->vfc_next = NULL; - newvfstbl->vfc_threadsafe = 0; newvfstbl->vfc_vfsflags = 0; if (vfe->vfe_flags & VFS_TBL64BITREADY) - newvfstbl->vfc_64bitready= 1; - if (vfe->vfe_flags & VFS_TBLTHREADSAFE) - newvfstbl->vfc_threadsafe= 1; - if (vfe->vfe_flags & VFS_TBLFSNODELOCK) - newvfstbl->vfc_threadsafe= 1; + newvfstbl->vfc_vfsflags |= VFC_VFS64BITREADY; + if (vfe->vfe_flags & VFS_TBLVNOP_PAGEINV2) + newvfstbl->vfc_vfsflags |= VFC_VFSVNOP_PAGEINV2; + if (vfe->vfe_flags & VFS_TBLVNOP_PAGEOUTV2) + newvfstbl->vfc_vfsflags |= VFC_VFSVNOP_PAGEOUTV2; if ((vfe->vfe_flags & VFS_TBLLOCALVOL) == VFS_TBLLOCALVOL) newvfstbl->vfc_flags |= MNT_LOCAL; - if (vfe->vfe_flags & VFS_TBLLOCALVOL) + if ((vfe->vfe_flags & VFS_TBLLOCALVOL) && (vfe->vfe_flags & VFS_TBLGENERICMNTARGS) == 0) newvfstbl->vfc_vfsflags |= VFC_VFSLOCALARGS; else newvfstbl->vfc_vfsflags |= VFC_VFSGENERICARGS; - + + if (vfe->vfe_flags & VFS_TBLNATIVEXATTR) + newvfstbl->vfc_vfsflags |= VFC_VFSNATIVEXATTR; + if (vfe->vfe_flags & VFS_TBLUNMOUNT_PREFLIGHT) + newvfstbl->vfc_vfsflags |= VFC_VFSPREFLIGHT; + if (vfe->vfe_flags & VFS_TBLREADDIR_EXTENDED) + newvfstbl->vfc_vfsflags |= VFC_VFSREADDIR_EXTENDED; + if (vfe->vfe_flags & VFS_TBLNOMACLABEL) + newvfstbl->vfc_vfsflags |= VFC_VFSNOMACLABEL; + if (vfe->vfe_flags & VFS_TBLVNOP_NOUPDATEID_RENAME) + newvfstbl->vfc_vfsflags |= VFC_VFSVNOP_NOUPDATEID_RENAME; + if (vfe->vfe_flags & VFS_TBLVNOP_SECLUDE_RENAME) + newvfstbl->vfc_vfsflags |= VFC_VFSVNOP_SECLUDE_RENAME; + if (vfe->vfe_flags & VFS_TBLCANMOUNTROOT) + newvfstbl->vfc_vfsflags |= VFC_VFSCANMOUNTROOT; /* * Allocate and init the vectors. @@ -875,6 +922,7 @@ vfs_fsadd(struct vfs_fsentry *vfe, vfstable_t * handle) newvfstbl->vfc_descptr = descptr; newvfstbl->vfc_descsize = descsize; + newvfstbl->vfc_sysctl = NULL; for (i= 0; i< desccount; i++ ) { opv_desc_vector_p = vfe->vfe_opvdescs[i]->opv_desc_vector_p; @@ -888,10 +936,17 @@ vfs_fsadd(struct vfs_fsentry *vfe, vfstable_t * handle) for (j = 0; vfe->vfe_opvdescs[i]->opv_desc_ops[j].opve_op; j++) { opve_descp = &(vfe->vfe_opvdescs[i]->opv_desc_ops[j]); + /* Silently skip known-disabled operations */ + if (opve_descp->opve_op->vdesc_flags & VDESC_DISABLED) { + printf("vfs_fsadd: Ignoring reference in %p to disabled operation %s.\n", + vfe->vfe_opvdescs[i], opve_descp->opve_op->vdesc_name); + continue; + } + /* * Sanity check: is this operation listed * in the list of operations? We check this - * by seeing if its offest is zero. Since + * by seeing if its offset is zero. Since * the default routine should always be listed * first, it should be the only one with a zero * offset. Any other operation with a zero @@ -906,7 +961,7 @@ vfs_fsadd(struct vfs_fsentry *vfe, vfstable_t * handle) * list of supported operations. */ if (opve_descp->opve_op->vdesc_offset == 0 && - opve_descp->opve_op->vdesc_offset != VOFFSET(vnop_default)) { + opve_descp->opve_op != VDESC(vnop_default)) { printf("vfs_fsadd: operation %s not listed in %s.\n", opve_descp->opve_op->vdesc_name, "vfs_op_descs"); @@ -944,12 +999,22 @@ vfs_fsadd(struct vfs_fsentry *vfe, vfstable_t * handle) *handle = vfstable_add(newvfstbl); - if (newvfstbl->vfc_typenum <= maxvfsconf ) - maxvfsconf = newvfstbl->vfc_typenum + 1; - numused_vfsslots++; + if (newvfstbl->vfc_typenum <= maxvfstypenum ) + maxvfstypenum = newvfstbl->vfc_typenum + 1; - if (newvfstbl->vfc_vfsops->vfs_init) - (*newvfstbl->vfc_vfsops->vfs_init)((struct vfsconf *)handle); + if (newvfstbl->vfc_vfsops->vfs_init) { + struct vfsconf vfsc; + bzero(&vfsc, sizeof(struct vfsconf)); + vfsc.vfc_reserved1 = 0; + bcopy((*handle)->vfc_name, vfsc.vfc_name, sizeof(vfsc.vfc_name)); + vfsc.vfc_typenum = (*handle)->vfc_typenum; + vfsc.vfc_refcount = (*handle)->vfc_refcount; + vfsc.vfc_flags = (*handle)->vfc_flags; + vfsc.vfc_reserved2 = 0; + vfsc.vfc_reserved3 = 0; + + (*newvfstbl->vfc_vfsops->vfs_init)(&vfsc); + } FREE(newvfstbl, M_TEMP); @@ -962,7 +1027,7 @@ vfs_fsadd(struct vfs_fsentry *vfe, vfstable_t * handle) * file system was added */ errno_t -vfs_fsremove(vfstable_t handle) +vfs_fsremove(vfstable_t handle) { struct vfstable * vfstbl = (struct vfstable *)handle; void *old_desc = NULL; @@ -974,7 +1039,6 @@ vfs_fsremove(vfstable_t handle) mount_list_unlock(); return EBUSY; } - mount_list_unlock(); /* * save the old descriptor; the free cannot occur unconditionally, @@ -985,6 +1049,8 @@ vfs_fsremove(vfstable_t handle) } err = vfstable_del(vfstbl); + mount_list_unlock(); + /* free the descriptor if the delete was successful */ if (err == 0 && old_desc) { FREE(old_desc, M_TEMP); @@ -993,134 +1059,387 @@ vfs_fsremove(vfstable_t handle) return(err); } -/* - * This returns a reference to mount_t - * which should be dropped using vfs_mountrele(). - * Not doing so will leak a mountpoint - * and associated data structures. - */ -errno_t -vfs_mountref(__unused mount_t mp ) /* gives a reference */ +void vfs_setowner(mount_t mp, uid_t uid, gid_t gid) { - return(0); + mp->mnt_fsowner = uid; + mp->mnt_fsgroup = gid; } -/* This drops the reference on mount_t that was acquired */ -errno_t -vfs_mountrele(__unused mount_t mp ) /* drops reference */ +/* + * Callers should be careful how they use this; accessing + * mnt_last_write_completed_timestamp is not thread-safe. Writing to + * it isn't either. Point is: be prepared to deal with strange values + * being returned. + */ +uint64_t vfs_idle_time(mount_t mp) { - return(0); + if (mp->mnt_pending_write_size) + return 0; + + struct timeval now; + + microuptime(&now); + + return ((now.tv_sec + - mp->mnt_last_write_completed_timestamp.tv_sec) * 1000000 + + now.tv_usec - mp->mnt_last_write_completed_timestamp.tv_usec); } int -vfs_context_pid(vfs_context_t context) +vfs_context_pid(vfs_context_t ctx) { - return (context->vc_proc->p_pid); + return (proc_pid(vfs_context_proc(ctx))); } int -vfs_context_suser(vfs_context_t context) +vfs_context_suser(vfs_context_t ctx) { - return (suser(context->vc_ucred, 0)); + return (suser(ctx->vc_ucred, NULL)); } + +/* + * Return bit field of signals posted to all threads in the context's process. + * + * XXX Signals should be tied to threads, not processes, for most uses of this + * XXX call. + */ int -vfs_context_issignal(vfs_context_t context, sigset_t mask) +vfs_context_issignal(vfs_context_t ctx, sigset_t mask) { - if (context->vc_proc) - return(proc_pendingsignals(context->vc_proc, mask)); + proc_t p = vfs_context_proc(ctx); + if (p) + return(proc_pendingsignals(p, mask)); return(0); } int -vfs_context_is64bit(vfs_context_t context) +vfs_context_is64bit(vfs_context_t ctx) { - if (context->vc_proc) - return(proc_is64bit(context->vc_proc)); + proc_t proc = vfs_context_proc(ctx); + + if (proc) + return(proc_is64bit(proc)); return(0); } + +/* + * vfs_context_proc + * + * Description: Given a vfs_context_t, return the proc_t associated with it. + * + * Parameters: vfs_context_t The context to use + * + * Returns: proc_t The process for this context + * + * Notes: This function will return the current_proc() if any of the + * following conditions are true: + * + * o The supplied context pointer is NULL + * o There is no Mach thread associated with the context + * o There is no Mach task associated with the Mach thread + * o There is no proc_t associated with the Mach task + * o The proc_t has no per process open file table + * o The proc_t is post-vfork() + * + * This causes this function to return a value matching as + * closely as possible the previous behaviour, while at the + * same time avoiding the task lending that results from vfork() + */ proc_t -vfs_context_proc(vfs_context_t context) +vfs_context_proc(vfs_context_t ctx) { - return (context->vc_proc); + proc_t proc = NULL; + + if (ctx != NULL && ctx->vc_thread != NULL) + proc = (proc_t)get_bsdthreadtask_info(ctx->vc_thread); + if (proc != NULL && (proc->p_fd == NULL || (proc->p_lflag & P_LVFORK))) + proc = NULL; + + return(proc == NULL ? current_proc() : proc); } -vfs_context_t -vfs_context_create(vfs_context_t context) +/* + * vfs_context_get_special_port + * + * Description: Return the requested special port from the task associated + * with the given context. + * + * Parameters: vfs_context_t The context to use + * int Index of special port + * ipc_port_t * Pointer to returned port + * + * Returns: kern_return_t see task_get_special_port() + */ +kern_return_t +vfs_context_get_special_port(vfs_context_t ctx, int which, ipc_port_t *portp) { - struct vfs_context * newcontext; + task_t task = NULL; - newcontext = (struct vfs_context *)kalloc(sizeof(struct vfs_context)); + if (ctx != NULL && ctx->vc_thread != NULL) + task = get_threadtask(ctx->vc_thread); - if (newcontext) { - if (context) { - newcontext->vc_proc = context->vc_proc; - newcontext->vc_ucred = context->vc_ucred; - } else { - newcontext->vc_proc = proc_self(); - newcontext->vc_ucred = kauth_cred_get(); - } - return(newcontext); - } - return((vfs_context_t)0); + return task_get_special_port(task, which, portp); } -int -vfs_context_rele(vfs_context_t context) +/* + * vfs_context_set_special_port + * + * Description: Set the requested special port in the task associated + * with the given context. + * + * Parameters: vfs_context_t The context to use + * int Index of special port + * ipc_port_t New special port + * + * Returns: kern_return_t see task_set_special_port() + */ +kern_return_t +vfs_context_set_special_port(vfs_context_t ctx, int which, ipc_port_t port) { - if (context) - kfree(context, sizeof(struct vfs_context)); - return(0); -} + task_t task = NULL; + if (ctx != NULL && ctx->vc_thread != NULL) + task = get_threadtask(ctx->vc_thread); -ucred_t -vfs_context_ucred(vfs_context_t context) -{ - return (context->vc_ucred); + return task_set_special_port(task, which, port); } /* - * Return true if the context is owned by the superuser. + * vfs_context_thread + * + * Description: Return the Mach thread associated with a vfs_context_t + * + * Parameters: vfs_context_t The context to use + * + * Returns: thread_t The thread for this context, or + * NULL, if there is not one. + * + * Notes: NULL thread_t's are legal, but discouraged. They occur only + * as a result of a static vfs_context_t declaration in a function + * and will result in this function returning NULL. + * + * This is intentional; this function should NOT return the + * current_thread() in this case. */ -int -vfs_context_issuser(vfs_context_t context) +thread_t +vfs_context_thread(vfs_context_t ctx) { - return(context->vc_ucred->cr_uid == 0); + return(ctx->vc_thread); } -/* XXXXXXXXXXXXXX VNODE KAPIS XXXXXXXXXXXXXXXXXXXXXXXXX */ - - /* - * Convert between vnode types and inode formats (since POSIX.1 - * defines mode word of stat structure in terms of inode formats). + * vfs_context_cwd + * + * Description: Returns a reference on the vnode for the current working + * directory for the supplied context + * + * Parameters: vfs_context_t The context to use + * + * Returns: vnode_t The current working directory + * for this context + * + * Notes: The function first attempts to obtain the current directory + * from the thread, and if it is not present there, falls back + * to obtaining it from the process instead. If it can't be + * obtained from either place, we return NULLVP. */ -enum vtype -vnode_iftovt(int mode) +vnode_t +vfs_context_cwd(vfs_context_t ctx) { - return(iftovt_tab[((mode) & S_IFMT) >> 12]); -} + vnode_t cwd = NULLVP; -int -vnode_vttoif(enum vtype indx) -{ - return(vttoif_tab[(int)(indx)]); -} + if(ctx != NULL && ctx->vc_thread != NULL) { + uthread_t uth = get_bsdthread_info(ctx->vc_thread); + proc_t proc; -int -vnode_makeimode(int indx, int mode) -{ - return (int)(VTTOIF(indx) | (mode)); -} + /* + * Get the cwd from the thread; if there isn't one, get it + * from the process, instead. + */ + if ((cwd = uth->uu_cdir) == NULLVP && + (proc = (proc_t)get_bsdthreadtask_info(ctx->vc_thread)) != NULL && + proc->p_fd != NULL) + cwd = proc->p_fd->fd_cdir; + } + return(cwd); +} /* - * vnode manipulation functions. + * vfs_context_create + * + * Description: Allocate and initialize a new context. + * + * Parameters: vfs_context_t: Context to copy, or NULL for new + * + * Returns: Pointer to new context + * + * Notes: Copy cred and thread from argument, if available; else + * initialize with current thread and new cred. Returns + * with a reference held on the credential. */ +vfs_context_t +vfs_context_create(vfs_context_t ctx) +{ + vfs_context_t newcontext; + + newcontext = (vfs_context_t)kalloc(sizeof(struct vfs_context)); + + if (newcontext) { + kauth_cred_t safecred; + if (ctx) { + newcontext->vc_thread = ctx->vc_thread; + safecred = ctx->vc_ucred; + } else { + newcontext->vc_thread = current_thread(); + safecred = kauth_cred_get(); + } + if (IS_VALID_CRED(safecred)) + kauth_cred_ref(safecred); + newcontext->vc_ucred = safecred; + return(newcontext); + } + return(NULL); +} + + +vfs_context_t +vfs_context_current(void) +{ + vfs_context_t ctx = NULL; + volatile uthread_t ut = (uthread_t)get_bsdthread_info(current_thread()); + + if (ut != NULL ) { + if (ut->uu_context.vc_ucred != NULL) { + ctx = &ut->uu_context; + } + } + + return(ctx == NULL ? vfs_context_kernel() : ctx); +} + + +/* + * XXX Do not ask + * + * Dangerous hack - adopt the first kernel thread as the current thread, to + * get to the vfs_context_t in the uthread associated with a kernel thread. + * This is used by UDF to make the call into IOCDMediaBSDClient, + * IOBDMediaBSDClient, and IODVDMediaBSDClient to determine whether the + * ioctl() is being called from kernel or user space (and all this because + * we do not pass threads into our ioctl()'s, instead of processes). + * + * This is also used by imageboot_setup(), called early from bsd_init() after + * kernproc has been given a credential. + * + * Note: The use of proc_thread() here is a convenience to avoid inclusion + * of many Mach headers to do the reference directly rather than indirectly; + * we will need to forego this convenience when we reture proc_thread(). + */ +static struct vfs_context kerncontext; +vfs_context_t +vfs_context_kernel(void) +{ + if (kerncontext.vc_ucred == NOCRED) + kerncontext.vc_ucred = kernproc->p_ucred; + if (kerncontext.vc_thread == NULL) + kerncontext.vc_thread = proc_thread(kernproc); + + return(&kerncontext); +} + + +int +vfs_context_rele(vfs_context_t ctx) +{ + if (ctx) { + if (IS_VALID_CRED(ctx->vc_ucred)) + kauth_cred_unref(&ctx->vc_ucred); + kfree(ctx, sizeof(struct vfs_context)); + } + return(0); +} + + +kauth_cred_t +vfs_context_ucred(vfs_context_t ctx) +{ + return (ctx->vc_ucred); +} + +/* + * Return true if the context is owned by the superuser. + */ +int +vfs_context_issuser(vfs_context_t ctx) +{ + return(kauth_cred_issuser(vfs_context_ucred(ctx))); +} + +int vfs_context_iskernel(vfs_context_t ctx) +{ + return ctx == &kerncontext; +} + +/* + * Given a context, for all fields of vfs_context_t which + * are not held with a reference, set those fields to the + * values for the current execution context. Currently, this + * just means the vc_thread. + * + * Returns: 0 for success, nonzero for failure + * + * The intended use is: + * 1. vfs_context_create() gets the caller a context + * 2. vfs_context_bind() sets the unrefcounted data + * 3. vfs_context_rele() releases the context + * + */ +int +vfs_context_bind(vfs_context_t ctx) +{ + ctx->vc_thread = current_thread(); + return 0; +} + +int vfs_isswapmount(mount_t mnt) +{ + return mnt && ISSET(mnt->mnt_kern_flag, MNTK_SWAP_MOUNT) ? 1 : 0; +} + +/* XXXXXXXXXXXXXX VNODE KAPIS XXXXXXXXXXXXXXXXXXXXXXXXX */ -/* returns system root vnode reference; It should be dropped using vrele() */ + +/* + * Convert between vnode types and inode formats (since POSIX.1 + * defines mode word of stat structure in terms of inode formats). + */ +enum vtype +vnode_iftovt(int mode) +{ + return(iftovt_tab[((mode) & S_IFMT) >> 12]); +} + +int +vnode_vttoif(enum vtype indx) +{ + return(vttoif_tab[(int)(indx)]); +} + +int +vnode_makeimode(int indx, int mode) +{ + return (int)(VTTOIF(indx) | (mode)); +} + + +/* + * vnode manipulation functions. + */ + +/* returns system root vnode iocount; It should be released using vnode_put() */ vnode_t vfs_rootvnode(void) { @@ -1140,14 +1459,23 @@ vnode_vid(vnode_t vp) return ((uint32_t)(vp->v_id)); } -/* returns a mount reference; drop it with vfs_mountrelease() */ mount_t vnode_mount(vnode_t vp) { return (vp->v_mount); } -/* returns a mount reference iff vnode_t is a dir and is a mount point */ +#if CONFIG_IOSCHED +vnode_t +vnode_mountdevvp(vnode_t vp) +{ + if (vp->v_mount) + return (vp->v_mount->mnt_devvp); + else + return ((vnode_t)0); +} +#endif + mount_t vnode_mountedhere(vnode_t vp) { @@ -1177,7 +1505,7 @@ vnode_fsnode(vnode_t vp) void vnode_clearfsnode(vnode_t vp) { - vp->v_data = 0; + vp->v_data = NULL; } dev_t @@ -1202,6 +1530,20 @@ vnode_issystem(vnode_t vp) return ((vp->v_flag & VSYSTEM)? 1 : 0); } +/* is vnode_t a swap file vnode */ +int +vnode_isswap(vnode_t vp) +{ + return ((vp->v_flag & VSWAP)? 1 : 0); +} + +/* is vnode_t a tty */ +int +vnode_istty(vnode_t vp) +{ + return ((vp->v_flag & VISTTY) ? 1 : 0); +} + /* if vnode_t mount operation in progress */ int vnode_ismount(vnode_t vp) @@ -1215,12 +1557,54 @@ vnode_isrecycled(vnode_t vp) { int ret; - vnode_lock(vp); + vnode_lock_spin(vp); ret = (vp->v_lflag & (VL_TERMINATE|VL_DEAD))? 1 : 0; vnode_unlock(vp); return(ret); } +/* vnode was created by background task requesting rapid aging + and has not since been referenced by a normal task */ +int +vnode_israge(vnode_t vp) +{ + return ((vp->v_flag & VRAGE)? 1 : 0); +} + +int +vnode_needssnapshots(vnode_t vp) +{ + return ((vp->v_flag & VNEEDSSNAPSHOT)? 1 : 0); +} + + +/* Check the process/thread to see if we should skip atime updates */ +int +vfs_ctx_skipatime (vfs_context_t ctx) { + struct uthread *ut; + proc_t proc; + thread_t thr; + + proc = vfs_context_proc(ctx); + thr = vfs_context_thread (ctx); + + /* Validate pointers in case we were invoked via a kernel context */ + if (thr && proc) { + ut = get_bsdthread_info (thr); + + if (proc->p_lflag & P_LRAGE_VNODES) { + return 1; + } + + if (ut) { + if (ut->uu_flag & UT_RAGE_VNODES) { + return 1; + } + } + } + return 0; +} + /* is vnode_t marked to not keep data cached once it's been consumed */ int vnode_isnocache(vnode_t vp) @@ -1237,6 +1621,12 @@ vnode_isnoreadahead(vnode_t vp) return ((vp->v_flag & VRAOFF)? 1 : 0); } +int +vnode_is_openevt(vnode_t vp) +{ + return ((vp->v_flag & VOPENEVT)? 1 : 0); +} + /* is vnode_t a standard one? */ int vnode_isstandard(vnode_t vp) @@ -1272,6 +1662,46 @@ vnode_islnk(vnode_t vp) return ((vp->v_type == VLNK)? 1 : 0); } +int +vnode_lookup_continue_needed(vnode_t vp, struct componentname *cnp) +{ + struct nameidata *ndp = cnp->cn_ndp; + + if (ndp == NULL) { + panic("vnode_lookup_continue_needed(): cnp->cn_ndp is NULL\n"); + } + + if (vnode_isdir(vp)) { + if (vp->v_mountedhere != NULL) { + goto yes; + } + +#if CONFIG_TRIGGERS + if (vp->v_resolve) { + goto yes; + } +#endif /* CONFIG_TRIGGERS */ + + } + + + if (vnode_islnk(vp)) { + /* From lookup(): || *ndp->ni_next == '/') No need for this, we know we're NULL-terminated here */ + if (cnp->cn_flags & FOLLOW) { + goto yes; + } + if (ndp->ni_flag & NAMEI_TRAILINGSLASH) { + goto yes; + } + } + + return 0; + +yes: + ndp->ni_flag |= NAMEI_CONTLOOKUP; + return EKEEPLOOKING; +} + /* is vnode_t a fifo ? */ int vnode_isfifo(vnode_t vp) @@ -1286,6 +1716,12 @@ vnode_isblk(vnode_t vp) return ((vp->v_type == VBLK)? 1 : 0); } +int +vnode_isspec(vnode_t vp) +{ + return (((vp->v_type == VCHR) || (vp->v_type == VBLK)) ? 1 : 0); +} + /* is vnode_t a char device? */ int vnode_ischr(vnode_t vp) @@ -1300,12 +1736,72 @@ vnode_issock(vnode_t vp) return ((vp->v_type == VSOCK)? 1 : 0); } +/* is vnode_t a device with multiple active vnodes referring to it? */ +int +vnode_isaliased(vnode_t vp) +{ + enum vtype vt = vp->v_type; + if (!((vt == VCHR) || (vt == VBLK))) { + return 0; + } else { + return (vp->v_specflags & SI_ALIASED); + } +} + +/* is vnode_t a named stream? */ +int +vnode_isnamedstream( +#if NAMEDSTREAMS + vnode_t vp +#else + __unused vnode_t vp +#endif + ) +{ +#if NAMEDSTREAMS + return ((vp->v_flag & VISNAMEDSTREAM) ? 1 : 0); +#else + return (0); +#endif +} + +int +vnode_isshadow( +#if NAMEDSTREAMS + vnode_t vp +#else + __unused vnode_t vp +#endif + ) +{ +#if NAMEDSTREAMS + return ((vp->v_flag & VISSHADOW) ? 1 : 0); +#else + return (0); +#endif +} +/* does vnode have associated named stream vnodes ? */ +int +vnode_hasnamedstreams( +#if NAMEDSTREAMS + vnode_t vp +#else + __unused vnode_t vp +#endif + ) +{ +#if NAMEDSTREAMS + return ((vp->v_lflag & VL_HASSTREAMS) ? 1 : 0); +#else + return (0); +#endif +} /* TBD: set vnode_t to not cache data after it is consumed once; used for quota */ void vnode_setnocache(vnode_t vp) { - vnode_lock(vp); + vnode_lock_spin(vp); vp->v_flag |= VNOCACHE_DATA; vnode_unlock(vp); } @@ -1313,15 +1809,32 @@ vnode_setnocache(vnode_t vp) void vnode_clearnocache(vnode_t vp) { - vnode_lock(vp); + vnode_lock_spin(vp); vp->v_flag &= ~VNOCACHE_DATA; vnode_unlock(vp); } +void +vnode_set_openevt(vnode_t vp) +{ + vnode_lock_spin(vp); + vp->v_flag |= VOPENEVT; + vnode_unlock(vp); +} + +void +vnode_clear_openevt(vnode_t vp) +{ + vnode_lock_spin(vp); + vp->v_flag &= ~VOPENEVT; + vnode_unlock(vp); +} + + void vnode_setnoreadahead(vnode_t vp) { - vnode_lock(vp); + vnode_lock_spin(vp); vp->v_flag |= VRAOFF; vnode_unlock(vp); } @@ -1329,17 +1842,63 @@ vnode_setnoreadahead(vnode_t vp) void vnode_clearnoreadahead(vnode_t vp) { - vnode_lock(vp); + vnode_lock_spin(vp); vp->v_flag &= ~VRAOFF; vnode_unlock(vp); } +int +vnode_isfastdevicecandidate(vnode_t vp) +{ + return ((vp->v_flag & VFASTDEVCANDIDATE)? 1 : 0); +} + +void +vnode_setfastdevicecandidate(vnode_t vp) +{ + vnode_lock_spin(vp); + vp->v_flag |= VFASTDEVCANDIDATE; + vnode_unlock(vp); +} + +void +vnode_clearfastdevicecandidate(vnode_t vp) +{ + vnode_lock_spin(vp); + vp->v_flag &= ~VFASTDEVCANDIDATE; + vnode_unlock(vp); +} + +int +vnode_isautocandidate(vnode_t vp) +{ + return ((vp->v_flag & VAUTOCANDIDATE)? 1 : 0); +} + +void +vnode_setautocandidate(vnode_t vp) +{ + vnode_lock_spin(vp); + vp->v_flag |= VAUTOCANDIDATE; + vnode_unlock(vp); +} + +void +vnode_clearautocandidate(vnode_t vp) +{ + vnode_lock_spin(vp); + vp->v_flag &= ~VAUTOCANDIDATE; + vnode_unlock(vp); +} + + + /* mark vnode_t to skip vflush() is SKIPSYSTEM */ void vnode_setnoflush(vnode_t vp) { - vnode_lock(vp); + vnode_lock_spin(vp); vp->v_flag |= VNOFLUSH; vnode_unlock(vp); } @@ -1347,7 +1906,7 @@ vnode_setnoflush(vnode_t vp) void vnode_clearnoflush(vnode_t vp) { - vnode_lock(vp); + vnode_lock_spin(vp); vp->v_flag &= ~VNOFLUSH; vnode_unlock(vp); } @@ -1363,7 +1922,7 @@ vnode_ismountedon(vnode_t vp) void vnode_setmountedon(vnode_t vp) { - vnode_lock(vp); + vnode_lock_spin(vp); vp->v_specflags |= SI_MOUNTEDON; vnode_unlock(vp); } @@ -1371,7 +1930,7 @@ vnode_setmountedon(vnode_t vp) void vnode_clearmountedon(vnode_t vp) { - vnode_lock(vp); + vnode_lock_spin(vp); vp->v_specflags &= ~SI_MOUNTEDON; vnode_unlock(vp); } @@ -1403,13 +1962,6 @@ vnode_setparent(vnode_t vp, vnode_t dvp) vp->v_parent = dvp; } -char * -vnode_name(vnode_t vp) -{ - /* we try to keep v_name a reasonable name for the node */ - return(vp->v_name); -} - void vnode_setname(vnode_t vp, char * name) { @@ -1420,7 +1972,7 @@ vnode_setname(vnode_t vp, char * name) void vnode_vfsname(vnode_t vp, char * buf) { - strncpy(buf, vp->v_mount->mnt_vtable->vfc_name, MFSNAMELEN); + strlcpy(buf, vp->v_mount->mnt_vtable->vfc_name, MFSNAMELEN); } /* return the FS type number */ @@ -1434,7 +1986,10 @@ int vnode_vfs64bitready(vnode_t vp) { - if ((vp->v_mount->mnt_vtable->vfc_64bitready)) + /* + * Checking for dead_mountp is a bit of a hack for SnowLeopard: + */ + if ((vp->v_mount != dead_mountp) && (vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFS64BITREADY)) return(1); else return(0); @@ -1484,56 +2039,132 @@ vnode_vfsisrdonly(vnode_t vp) return ((vp->v_mount->mnt_flag & MNT_RDONLY)? 1 : 0); } +int +vnode_compound_rename_available(vnode_t vp) +{ + return vnode_compound_op_available(vp, COMPOUND_VNOP_RENAME); +} +int +vnode_compound_rmdir_available(vnode_t vp) +{ + return vnode_compound_op_available(vp, COMPOUND_VNOP_RMDIR); +} +int +vnode_compound_mkdir_available(vnode_t vp) +{ + return vnode_compound_op_available(vp, COMPOUND_VNOP_MKDIR); +} +int +vnode_compound_remove_available(vnode_t vp) +{ + return vnode_compound_op_available(vp, COMPOUND_VNOP_REMOVE); +} +int +vnode_compound_open_available(vnode_t vp) +{ + return vnode_compound_op_available(vp, COMPOUND_VNOP_OPEN); +} + +int +vnode_compound_op_available(vnode_t vp, compound_vnop_id_t opid) +{ + return ((vp->v_mount->mnt_compound_ops & opid) != 0); +} -/* returns vnode ref to current working directory */ +/* + * Returns vnode ref to current working directory; if a per-thread current + * working directory is in effect, return that instead of the per process one. + * + * XXX Published, but not used. + */ vnode_t current_workingdir(void) { - struct proc *p = current_proc(); - struct vnode * vp ; - - if ( (vp = p->p_fd->fd_cdir) ) { - if ( (vnode_getwithref(vp)) ) - return (NULL); - } - return vp; + return vfs_context_cwd(vfs_context_current()); } /* returns vnode ref to current root(chroot) directory */ vnode_t current_rootdir(void) { - struct proc *p = current_proc(); + proc_t proc = current_proc(); struct vnode * vp ; - if ( (vp = p->p_fd->fd_rdir) ) { + if ( (vp = proc->p_fd->fd_rdir) ) { if ( (vnode_getwithref(vp)) ) return (NULL); } return vp; } -static int -vnode_get_filesec(vnode_t vp, kauth_filesec_t *fsecp, vfs_context_t ctx) -{ - kauth_filesec_t fsec; - uio_t fsec_uio; - size_t fsec_size; +/* + * Get a filesec and optional acl contents from an extended attribute. + * Function will attempt to retrive ACL, UUID, and GUID information using a + * read of a named extended attribute (KAUTH_FILESEC_XATTR). + * + * Parameters: vp The vnode on which to operate. + * fsecp The filesec (and ACL, if any) being + * retrieved. + * ctx The vnode context in which the + * operation is to be attempted. + * + * Returns: 0 Success + * !0 errno value + * + * Notes: The kauth_filesec_t in '*fsecp', if retrieved, will be in + * host byte order, as will be the ACL contents, if any. + * Internally, we will cannonize these values from network (PPC) + * byte order after we retrieve them so that the on-disk contents + * of the extended attribute are identical for both PPC and Intel + * (if we were not being required to provide this service via + * fallback, this would be the job of the filesystem + * 'VNOP_GETATTR' call). + * + * We use ntohl() because it has a transitive property on Intel + * machines and no effect on PPC mancines. This guarantees us + * + * XXX: Deleting rather than ignoreing a corrupt security structure is + * probably the only way to reset it without assistance from an + * file system integrity checking tool. Right now we ignore it. + * + * XXX: We should enummerate the possible errno values here, and where + * in the code they originated. + */ +static int +vnode_get_filesec(vnode_t vp, kauth_filesec_t *fsecp, vfs_context_t ctx) +{ + kauth_filesec_t fsec; + uio_t fsec_uio; + size_t fsec_size; size_t xsize, rsize; int error; + uint32_t host_fsec_magic; + uint32_t host_acl_entrycount; fsec = NULL; fsec_uio = NULL; - error = 0; - + /* find out how big the EA is */ - if (vn_getxattr(vp, KAUTH_FILESEC_XATTR, NULL, &xsize, XATTR_NOSECURITY, ctx) != 0) { + error = vn_getxattr(vp, KAUTH_FILESEC_XATTR, NULL, &xsize, XATTR_NOSECURITY, ctx); + if (error != 0) { /* no EA, no filesec */ if ((error == ENOATTR) || (error == ENOENT) || (error == EJUSTRETURN)) error = 0; /* either way, we are done */ goto out; } + + /* + * To be valid, a kauth_filesec_t must be large enough to hold a zero + * ACE entrly ACL, and if it's larger than that, it must have the right + * number of bytes such that it contains an atomic number of ACEs, + * rather than partial entries. Otherwise, we ignore it. + */ + if (!KAUTH_FILESEC_VALID(xsize)) { + KAUTH_DEBUG(" ERROR - Bogus kauth_fiilesec_t: %ld bytes", xsize); + error = 0; + goto out; + } /* how many entries would fit? */ fsec_size = KAUTH_FILESEC_COUNT(xsize); @@ -1564,28 +2195,38 @@ vnode_get_filesec(vnode_t vp, kauth_filesec_t *fsecp, vfs_context_t ctx) } /* - * Validate security structure. If it's corrupt, we will - * just ignore it. + * Validate security structure; the validation must take place in host + * byte order. If it's corrupt, we will just ignore it. */ + + /* Validate the size before trying to convert it */ if (rsize < KAUTH_FILESEC_SIZE(0)) { KAUTH_DEBUG("ACL - DATA TOO SMALL (%d)", rsize); goto out; } - if (fsec->fsec_magic != KAUTH_FILESEC_MAGIC) { - KAUTH_DEBUG("ACL - BAD MAGIC %x", fsec->fsec_magic); - goto out; - } - if ((fsec->fsec_acl.acl_entrycount != KAUTH_FILESEC_NOACL) && - (fsec->fsec_acl.acl_entrycount > KAUTH_ACL_MAX_ENTRIES)) { - KAUTH_DEBUG("ACL - BAD ENTRYCOUNT %x", fsec->fsec_entrycount); + + /* Validate the magic number before trying to convert it */ + host_fsec_magic = ntohl(KAUTH_FILESEC_MAGIC); + if (fsec->fsec_magic != host_fsec_magic) { + KAUTH_DEBUG("ACL - BAD MAGIC %x", host_fsec_magic); goto out; } - if ((fsec->fsec_acl.acl_entrycount != KAUTH_FILESEC_NOACL) && - (KAUTH_FILESEC_SIZE(fsec->fsec_acl.acl_entrycount) > rsize)) { - KAUTH_DEBUG("ACL - BUFFER OVERFLOW (%d entries too big for %d)", fsec->fsec_acl.acl_entrycount, rsize); - goto out; + + /* Validate the entry count before trying to convert it. */ + host_acl_entrycount = ntohl(fsec->fsec_acl.acl_entrycount); + if (host_acl_entrycount != KAUTH_FILESEC_NOACL) { + if (host_acl_entrycount > KAUTH_ACL_MAX_ENTRIES) { + KAUTH_DEBUG("ACL - BAD ENTRYCOUNT %x", host_acl_entrycount); + goto out; + } + if (KAUTH_FILESEC_SIZE(host_acl_entrycount) > rsize) { + KAUTH_DEBUG("ACL - BUFFER OVERFLOW (%d entries too big for %d)", host_acl_entrycount, rsize); + goto out; + } } + kauth_filesec_acl_setendian(KAUTH_ENDIAN_HOST, fsec, NULL); + *fsecp = fsec; fsec = NULL; error = 0; @@ -1599,11 +2240,43 @@ out: return(error); } +/* + * Set a filesec and optional acl contents into an extended attribute. + * function will attempt to store ACL, UUID, and GUID information using a + * write to a named extended attribute (KAUTH_FILESEC_XATTR). The 'acl' + * may or may not point to the `fsec->fsec_acl`, depending on whether the + * original caller supplied an acl. + * + * Parameters: vp The vnode on which to operate. + * fsec The filesec being set. + * acl The acl to be associated with 'fsec'. + * ctx The vnode context in which the + * operation is to be attempted. + * + * Returns: 0 Success + * !0 errno value + * + * Notes: Both the fsec and the acl are always valid. + * + * The kauth_filesec_t in 'fsec', if any, is in host byte order, + * as are the acl contents, if they are used. Internally, we will + * cannonize these values into network (PPC) byte order before we + * attempt to write them so that the on-disk contents of the + * extended attribute are identical for both PPC and Intel (if we + * were not being required to provide this service via fallback, + * this would be the job of the filesystem 'VNOP_SETATTR' call). + * We reverse this process on the way out, so we leave with the + * same byte order we started with. + * + * XXX: We should enummerate the possible errno values here, and where + * in the code they originated. + */ static int vnode_set_filesec(vnode_t vp, kauth_filesec_t fsec, kauth_acl_t acl, vfs_context_t ctx) { - uio_t fsec_uio; - int error; + uio_t fsec_uio; + int error; + uint32_t saved_acl_copysize; fsec_uio = NULL; @@ -1612,8 +2285,16 @@ vnode_set_filesec(vnode_t vp, kauth_filesec_t fsec, kauth_acl_t acl, vfs_context error = ENOMEM; goto out; } - uio_addiov(fsec_uio, CAST_USER_ADDR_T(fsec), sizeof(struct kauth_filesec) - sizeof(struct kauth_acl)); - uio_addiov(fsec_uio, CAST_USER_ADDR_T(acl), KAUTH_ACL_COPYSIZE(acl)); + /* + * Save the pre-converted ACL copysize, because it gets swapped too + * if we are running with the wrong endianness. + */ + saved_acl_copysize = KAUTH_ACL_COPYSIZE(acl); + + kauth_filesec_acl_setendian(KAUTH_ENDIAN_DISK, fsec, acl); + + uio_addiov(fsec_uio, CAST_USER_ADDR_T(fsec), KAUTH_FILESEC_SIZE(0) - KAUTH_ACL_SIZE(KAUTH_FILESEC_NOACL)); + uio_addiov(fsec_uio, CAST_USER_ADDR_T(acl), saved_acl_copysize); error = vn_setxattr(vp, KAUTH_FILESEC_XATTR, fsec_uio, @@ -1621,6 +2302,8 @@ vnode_set_filesec(vnode_t vp, kauth_filesec_t fsec, kauth_acl_t acl, vfs_context ctx); VFS_DEBUG(ctx, vp, "SETATTR - set ACL returning %d", error); + kauth_filesec_acl_setendian(KAUTH_ENDIAN_HOST, fsec, acl); + out: if (fsec_uio != NULL) uio_free(fsec_uio); @@ -1628,6 +2311,15 @@ out: } +/* + * Returns: 0 Success + * ENOMEM Not enough space [only if has filesec] + * VNOP_GETATTR: ??? + * vnode_get_filesec: ??? + * kauth_cred_guid2uid: ??? + * kauth_cred_guid2gid: ??? + * vfs_update_vfsstat: ??? + */ int vnode_getattr(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) { @@ -1670,7 +2362,7 @@ vnode_getattr(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) if (VATTR_NOT_RETURNED(vap, va_acl) || VATTR_NOT_RETURNED(vap, va_uuuid) || VATTR_NOT_RETURNED(vap, va_guuid)) { fsec = NULL; - if ((vp->v_type == VDIR) || (vp->v_type == VLNK) || (vp->v_type == VREG)) { + if (XATTR_VNODE_SUPPORTED(vp)) { /* try to get the filesec */ if ((error = vnode_get_filesec(vp, &fsec, ctx)) != 0) goto out; @@ -1738,7 +2430,9 @@ vnode_getattr(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) * Handle uid/gid == 99 and MNT_IGNORE_OWNERSHIP here. */ if (VATTR_IS_ACTIVE(vap, va_uid)) { - if (vp->v_mount->mnt_flag & MNT_IGNORE_OWNERSHIP) { + if (vfs_context_issuser(ctx) && VATTR_IS_SUPPORTED(vap, va_uid)) { + nuid = vap->va_uid; + } else if (vp->v_mount->mnt_flag & MNT_IGNORE_OWNERSHIP) { nuid = vp->v_mount->mnt_fsowner; if (nuid == KAUTH_UID_NONE) nuid = 99; @@ -1753,7 +2447,9 @@ vnode_getattr(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) VATTR_RETURN(vap, va_uid, nuid); } if (VATTR_IS_ACTIVE(vap, va_gid)) { - if (vp->v_mount->mnt_flag & MNT_IGNORE_OWNERSHIP) { + if (vfs_context_issuser(ctx) && VATTR_IS_SUPPORTED(vap, va_gid)) { + ngid = vap->va_gid; + } else if (vp->v_mount->mnt_flag & MNT_IGNORE_OWNERSHIP) { ngid = vp->v_mount->mnt_fsgroup; if (ngid == KAUTH_GID_NONE) ngid = 99; @@ -1795,7 +2491,7 @@ vnode_getattr(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) VATTR_IS_ACTIVE(vap, va_total_alloc)) { /* make sure f_bsize is valid */ if (vp->v_mount->mnt_vfsstat.f_bsize == 0) { - if ((error = vfs_update_vfsstat(vp->v_mount, ctx)) != 0) + if ((error = vfs_update_vfsstat(vp->v_mount, ctx, VFS_KERNEL_EVENT)) != 0) goto out; } @@ -1834,17 +2530,81 @@ out: return(error); } +/* + * Set the attributes on a vnode in a vnode context. + * + * Parameters: vp The vnode whose attributes to set. + * vap A pointer to the attributes to set. + * ctx The vnode context in which the + * operation is to be attempted. + * + * Returns: 0 Success + * !0 errno value + * + * Notes: The kauth_filesec_t in 'vap', if any, is in host byte order. + * + * The contents of the data area pointed to by 'vap' may be + * modified if the vnode is on a filesystem which has been + * mounted with ingore ownership flags, or by the underlyng + * VFS itself, or by the fallback code, if the underlying VFS + * does not support ACL, UUID, or GUUID attributes directly. + * + * XXX: We should enummerate the possible errno values here, and where + * in the code they originated. + */ int vnode_setattr(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) { - int error, is_ownership_change=0; + int error, is_perm_change=0; /* * Make sure the filesystem is mounted R/W. * If not, return an error. */ - if (vfs_isrdonly(vp->v_mount)) - return(EROFS); + if (vfs_isrdonly(vp->v_mount)) { + error = EROFS; + goto out; + } + +#if DEVELOPMENT || DEBUG + /* + * XXX VSWAP: Check for entitlements or special flag here + * so we can restrict access appropriately. + */ +#else /* DEVELOPMENT || DEBUG */ + + if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) { + error = EPERM; + goto out; + } +#endif /* DEVELOPMENT || DEBUG */ + +#if NAMEDSTREAMS + /* For streams, va_data_size is the only setable attribute. */ + if ((vp->v_flag & VISNAMEDSTREAM) && (vap->va_active != VNODE_ATTR_va_data_size)) { + error = EPERM; + goto out; + } +#endif + /* Check for truncation */ + if(VATTR_IS_ACTIVE(vap, va_data_size)) { + switch(vp->v_type) { + case VREG: + /* For regular files it's ok */ + break; + case VDIR: + /* Not allowed to truncate directories */ + error = EISDIR; + goto out; + default: + /* For everything else we will clear the bit and let underlying FS decide on the rest */ + VATTR_CLEAR_ACTIVE(vap, va_data_size); + if (vap->va_active) + break; + /* If it was the only bit set, return success, to handle cases like redirect to /dev/null */ + return (0); + } + } /* * If ownership is being ignored on this volume, we silently discard @@ -1855,8 +2615,9 @@ vnode_setattr(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) VATTR_CLEAR_ACTIVE(vap, va_gid); } - if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid)) { - is_ownership_change = 1; + if ( VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid) + || VATTR_IS_ACTIVE(vap, va_mode) || VATTR_IS_ACTIVE(vap, va_acl)) { + is_perm_change = 1; } /* @@ -1866,7 +2627,13 @@ vnode_setattr(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) if (!vfs_extendedsecurity(vnode_mount(vp)) && (VATTR_IS_ACTIVE(vap, va_acl) || VATTR_IS_ACTIVE(vap, va_uuuid) || VATTR_IS_ACTIVE(vap, va_guuid))) { KAUTH_DEBUG("SETATTR - returning ENOTSUP to request to set extended security"); - return(ENOTSUP); + error = ENOTSUP; + goto out; + } + + /* Never allow the setting of any unsupported superuser flags. */ + if (VATTR_IS_ACTIVE(vap, va_flags)) { + vap->va_flags &= (SF_SUPPORTED | UF_SETTABLE); } error = VNOP_SETATTR(vp, vap, ctx); @@ -1874,36 +2641,48 @@ vnode_setattr(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) if ((error == 0) && !VATTR_ALL_SUPPORTED(vap)) error = vnode_setattr_fallback(vp, vap, ctx); - /* - * If we have changed any of the things about the file that are likely - * to result in changes to authorisation results, blow the vnode auth - * cache - */ - if (VATTR_IS_SUPPORTED(vap, va_mode) || - VATTR_IS_SUPPORTED(vap, va_uid) || - VATTR_IS_SUPPORTED(vap, va_gid) || - VATTR_IS_SUPPORTED(vap, va_flags) || - VATTR_IS_SUPPORTED(vap, va_acl) || - VATTR_IS_SUPPORTED(vap, va_uuuid) || - VATTR_IS_SUPPORTED(vap, va_guuid)) - vnode_uncache_credentials(vp); +#if CONFIG_FSE // only send a stat_changed event if this is more than - // just an access time update - if (error == 0 && (vap->va_active != VNODE_ATTR_BIT(va_access_time))) { - if (need_fsevent(FSE_STAT_CHANGED, vp) || (is_ownership_change && need_fsevent(FSE_CHOWN, vp))) { - if (is_ownership_change == 0) - add_fsevent(FSE_STAT_CHANGED, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE); - else - add_fsevent(FSE_CHOWN, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE); + // just an access or backup time update + if (error == 0 && (vap->va_active != VNODE_ATTR_BIT(va_access_time)) && (vap->va_active != VNODE_ATTR_BIT(va_backup_time))) { + if (is_perm_change) { + if (need_fsevent(FSE_CHOWN, vp)) { + add_fsevent(FSE_CHOWN, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE); + } + } else if(need_fsevent(FSE_STAT_CHANGED, vp)) { + add_fsevent(FSE_STAT_CHANGED, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE); } } +#endif + +out: return(error); } /* - * Following an operation which sets attributes (setattr, create, etc.) we may - * need to perform fallback operations to get attributes saved. - */ + * Fallback for setting the attributes on a vnode in a vnode context. This + * Function will attempt to store ACL, UUID, and GUID information utilizing + * a read/modify/write operation against an EA used as a backing store for + * the object. + * + * Parameters: vp The vnode whose attributes to set. + * vap A pointer to the attributes to set. + * ctx The vnode context in which the + * operation is to be attempted. + * + * Returns: 0 Success + * !0 errno value + * + * Notes: The kauth_filesec_t in 'vap', if any, is in host byte order, + * as are the fsec and lfsec, if they are used. + * + * The contents of the data area pointed to by 'vap' may be + * modified to indicate that the attribute is supported for + * any given requested attribute. + * + * XXX: We should enummerate the possible errno values here, and where + * in the code they originated. + */ int vnode_setattr_fallback(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) { @@ -1917,7 +2696,8 @@ vnode_setattr_fallback(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) /* * Extended security fallback via extended attributes. * - * Note that we do not free the filesec; the caller is expected to do this. + * Note that we do not free the filesec; the caller is expected to + * do this. */ if (VATTR_NOT_RETURNED(vap, va_acl) || VATTR_NOT_RETURNED(vap, va_uuuid) || @@ -1925,17 +2705,19 @@ vnode_setattr_fallback(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) VFS_DEBUG(ctx, vp, "SETATTR - doing filesec fallback"); /* - * Fail for file types that we don't permit extended security to be set on. + * Fail for file types that we don't permit extended security + * to be set on. */ - if ((vp->v_type != VDIR) && (vp->v_type != VLNK) && (vp->v_type != VREG)) { + if (!XATTR_VNODE_SUPPORTED(vp)) { VFS_DEBUG(ctx, vp, "SETATTR - Can't write ACL to file type %d", vnode_vtype(vp)); error = EINVAL; goto out; } /* - * If we don't have all the extended security items, we need to fetch the existing - * data to perform a read-modify-write operation. + * If we don't have all the extended security items, we need + * to fetch the existing data to perform a read-modify-write + * operation. */ fsec = NULL; if (!VATTR_IS_ACTIVE(vap, va_acl) || @@ -1990,7 +2772,8 @@ vnode_setattr_fallback(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) } /* - * If the filesec data is all invalid, we can just remove the EA completely. + * If the filesec data is all invalid, we can just remove + * the EA completely. */ if ((facl->acl_entrycount == KAUTH_FILESEC_NOACL) && kauth_guid_equal(&fsec->fsec_owner, &kauth_null_guid) && @@ -2015,6 +2798,112 @@ out: return(error); } +/* + * Upcall for a filesystem to tell VFS about an EVFILT_VNODE-type + * event on a vnode. + */ +int +vnode_notify(vnode_t vp, uint32_t events, struct vnode_attr *vap) +{ + /* These are the same as the corresponding knotes, at least for now. Cheating a little. */ + uint32_t knote_mask = (VNODE_EVENT_WRITE | VNODE_EVENT_DELETE | VNODE_EVENT_RENAME + | VNODE_EVENT_LINK | VNODE_EVENT_EXTEND | VNODE_EVENT_ATTRIB); + uint32_t dir_contents_mask = (VNODE_EVENT_DIR_CREATED | VNODE_EVENT_FILE_CREATED + | VNODE_EVENT_DIR_REMOVED | VNODE_EVENT_FILE_REMOVED); + uint32_t knote_events = (events & knote_mask); + + /* Permissions are not explicitly part of the kqueue model */ + if (events & VNODE_EVENT_PERMS) { + knote_events |= NOTE_ATTRIB; + } + + /* Directory contents information just becomes NOTE_WRITE */ + if ((vnode_isdir(vp)) && (events & dir_contents_mask)) { + knote_events |= NOTE_WRITE; + } + + if (knote_events) { + lock_vnode_and_post(vp, knote_events); +#if CONFIG_FSE + if (vap != NULL) { + create_fsevent_from_kevent(vp, events, vap); + } +#else + (void)vap; +#endif + } + + return 0; +} + + + +int +vnode_isdyldsharedcache(vnode_t vp) +{ + return ((vp->v_flag & VSHARED_DYLD) ? 1 : 0); +} + + +/* + * For a filesystem that isn't tracking its own vnode watchers: + * check whether a vnode is being monitored. + */ +int +vnode_ismonitored(vnode_t vp) { + return (vp->v_knotes.slh_first != NULL); +} + +/* + * Initialize a struct vnode_attr and activate the attributes required + * by the vnode_notify() call. + */ +int +vfs_get_notify_attributes(struct vnode_attr *vap) +{ + VATTR_INIT(vap); + vap->va_active = VNODE_NOTIFY_ATTRS; + return 0; +} + +#if CONFIG_TRIGGERS +int +vfs_settriggercallback(fsid_t *fsid, vfs_trigger_callback_t vtc, void *data, uint32_t flags __unused, vfs_context_t ctx) +{ + int error; + mount_t mp; + + mp = mount_list_lookupby_fsid(fsid, 0 /* locked */, 1 /* withref */); + if (mp == NULL) { + return ENOENT; + } + + error = vfs_busy(mp, LK_NOWAIT); + mount_iterdrop(mp); + + if (error != 0) { + return ENOENT; + } + + mount_lock(mp); + if (mp->mnt_triggercallback != NULL) { + error = EBUSY; + mount_unlock(mp); + goto out; + } + + mp->mnt_triggercallback = vtc; + mp->mnt_triggerdata = data; + mount_unlock(mp); + + mp->mnt_triggercallback(mp, VTC_REPLACE, data, ctx); + +out: + vfs_unbusy(mp); + return 0; +} +#endif /* CONFIG_TRIGGERS */ + /* * Definition of vnode operations. */ @@ -2034,64 +2923,140 @@ struct vnop_lookup_args { }; #endif /* 0*/ +/* + * Returns: 0 Success + * lock_fsnode:ENOENT No such file or directory [only for VFS + * that is not thread safe & vnode is + * currently being/has been terminated] + * :ENAMETOOLONG + * :ENOENT + * :EJUSTRETURN + * :EPERM + * :EISDIR + * :ENOTDIR + * :??? + * + * Note: The return codes from the underlying VFS's lookup routine can't + * be fully enumerated here, since third party VFS authors may not + * limit their error returns to the ones documented here, even + * though this may result in some programs functioning incorrectly. + * + * The return codes documented above are those which may currently + * be returned by HFS from hfs_lookup, not including additional + * error code which may be propagated from underlying routines. + */ errno_t -VNOP_LOOKUP(vnode_t dvp, vnode_t *vpp, struct componentname *cnp, vfs_context_t context) +VNOP_LOOKUP(vnode_t dvp, vnode_t *vpp, struct componentname *cnp, vfs_context_t ctx) { int _err; struct vnop_lookup_args a; - vnode_t vp; - int thread_safe; - int funnel_state = 0; a.a_desc = &vnop_lookup_desc; a.a_dvp = dvp; a.a_vpp = vpp; a.a_cnp = cnp; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(dvp); + a.a_context = ctx; + + _err = (*dvp->v_op[vnop_lookup_desc.vdesc_offset])(&a); + if (_err == 0 && *vpp) { + DTRACE_FSINFO(lookup, vnode_t, *vpp); + } + + return (_err); +} + +#if 0 +struct vnop_compound_open_args { + struct vnodeop_desc *a_desc; + vnode_t a_dvp; + vnode_t *a_vpp; + struct componentname *a_cnp; + int32_t a_flags; + int32_t a_fmode; + struct vnode_attr *a_vap; + vfs_context_t a_context; + void *a_reserved; +}; +#endif /* 0 */ + +int +VNOP_COMPOUND_OPEN(vnode_t dvp, vnode_t *vpp, struct nameidata *ndp, int32_t flags, int32_t fmode, uint32_t *statusp, struct vnode_attr *vap, vfs_context_t ctx) +{ + int _err; + struct vnop_compound_open_args a; + int did_create = 0; + int want_create; + uint32_t tmp_status = 0; + struct componentname *cnp = &ndp->ni_cnd; + + want_create = (flags & O_CREAT); + + a.a_desc = &vnop_compound_open_desc; + a.a_dvp = dvp; + a.a_vpp = vpp; /* Could be NULL */ + a.a_cnp = cnp; + a.a_flags = flags; + a.a_fmode = fmode; + a.a_status = (statusp != NULL) ? statusp : &tmp_status; + a.a_vap = vap; + a.a_context = ctx; + a.a_open_create_authorizer = vn_authorize_create; + a.a_open_existing_authorizer = vn_authorize_open_existing; + a.a_reserved = NULL; - vnode_cache_credentials(dvp, context); + if (dvp == NULLVP) { + panic("No dvp?"); + } + if (want_create && !vap) { + panic("Want create, but no vap?"); + } + if (!want_create && vap) { + panic("Don't want create, but have a vap?"); + } - if (!thread_safe) { - if ( (_err = lock_fsnode(dvp, &funnel_state)) ) { - return (_err); + _err = (*dvp->v_op[vnop_compound_open_desc.vdesc_offset])(&a); + if (want_create) { + if (_err == 0 && *vpp) { + DTRACE_FSINFO(compound_open, vnode_t, *vpp); + } else { + DTRACE_FSINFO(compound_open, vnode_t, dvp); } + } else { + DTRACE_FSINFO(compound_open, vnode_t, *vpp); } - _err = (*dvp->v_op[vnop_lookup_desc.vdesc_offset])(&a); - vp = *vpp; - - if (!thread_safe) { - if ( (cnp->cn_flags & ISLASTCN) ) { - if ( (cnp->cn_flags & LOCKPARENT) ) { - if ( !(cnp->cn_flags & FSNODELOCKHELD) ) { - /* - * leave the fsnode lock held on - * the directory, but restore the funnel... - * also indicate that we need to drop the - * fsnode_lock when we're done with the - * system call processing for this path - */ - cnp->cn_flags |= FSNODELOCKHELD; - - (void) thread_funnel_set(kernel_flock, funnel_state); - return (_err); - } - } + did_create = (*a.a_status & COMPOUND_OPEN_STATUS_DID_CREATE); + + if (did_create && !want_create) { + panic("Filesystem did a create, even though none was requested?"); + } + + if (did_create) { +#if CONFIG_APPLEDOUBLE + if (!NATIVE_XATTR(dvp)) { + /* + * Remove stale Apple Double file (if any). + */ + xattrfile_remove(dvp, cnp->cn_nameptr, ctx, 0); } - unlock_fsnode(dvp, &funnel_state); +#endif /* CONFIG_APPLEDOUBLE */ + /* On create, provide kqueue notification */ + post_event_if_success(dvp, _err, NOTE_WRITE); + } + + lookup_compound_vnop_post_hook(_err, dvp, *vpp, ndp, did_create); +#if 0 /* FSEvents... */ + if (*vpp && _err && _err != EKEEPLOOKING) { + vnode_put(*vpp); + *vpp = NULLVP; } +#endif /* 0 */ + return (_err); + } #if 0 -/* - *# - *#% create dvp L L L - *#% create vpp - L - - *# - */ - struct vnop_create_args { struct vnodeop_desc *a_desc; vnode_t a_dvp; @@ -2102,36 +3067,34 @@ struct vnop_create_args { }; #endif /* 0*/ errno_t -VNOP_CREATE(vnode_t dvp, vnode_t * vpp, struct componentname * cnp, struct vnode_attr * vap, vfs_context_t context) +VNOP_CREATE(vnode_t dvp, vnode_t * vpp, struct componentname * cnp, struct vnode_attr * vap, vfs_context_t ctx) { int _err; struct vnop_create_args a; - int thread_safe; - int funnel_state = 0; a.a_desc = &vnop_create_desc; a.a_dvp = dvp; a.a_vpp = vpp; a.a_cnp = cnp; a.a_vap = vap; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(dvp); + a.a_context = ctx; - if (!thread_safe) { - if ( (_err = lock_fsnode(dvp, &funnel_state)) ) { - return (_err); - } - } _err = (*dvp->v_op[vnop_create_desc.vdesc_offset])(&a); + if (_err == 0 && *vpp) { + DTRACE_FSINFO(create, vnode_t, *vpp); + } + +#if CONFIG_APPLEDOUBLE if (_err == 0 && !NATIVE_XATTR(dvp)) { /* * Remove stale Apple Double file (if any). */ - xattrfile_remove(dvp, cnp->cn_nameptr, context, thread_safe, 0); - } - if (!thread_safe) { - unlock_fsnode(dvp, &funnel_state); + xattrfile_remove(dvp, cnp->cn_nameptr, ctx, 0); } +#endif /* CONFIG_APPLEDOUBLE */ + + post_event_if_success(dvp, _err, NOTE_WRITE); + return (_err); } @@ -2152,33 +3115,13 @@ struct vnop_whiteout_args { }; #endif /* 0*/ errno_t -VNOP_WHITEOUT(vnode_t dvp, struct componentname * cnp, int flags, vfs_context_t context) +VNOP_WHITEOUT(__unused vnode_t dvp, __unused struct componentname *cnp, + __unused int flags, __unused vfs_context_t ctx) { - int _err; - struct vnop_whiteout_args a; - int thread_safe; - int funnel_state = 0; - - a.a_desc = &vnop_whiteout_desc; - a.a_dvp = dvp; - a.a_cnp = cnp; - a.a_flags = flags; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(dvp); - - if (!thread_safe) { - if ( (_err = lock_fsnode(dvp, &funnel_state)) ) { - return (_err); - } - } - _err = (*dvp->v_op[vnop_whiteout_desc.vdesc_offset])(&a); - if (!thread_safe) { - unlock_fsnode(dvp, &funnel_state); - } - return (_err); + return (ENOTSUP); // XXX OBSOLETE } - #if 0 +#if 0 /* *# *#% mknod dvp L U U @@ -2195,31 +3138,26 @@ struct vnop_mknod_args { }; #endif /* 0*/ errno_t -VNOP_MKNOD(vnode_t dvp, vnode_t * vpp, struct componentname * cnp, struct vnode_attr * vap, vfs_context_t context) +VNOP_MKNOD(vnode_t dvp, vnode_t * vpp, struct componentname * cnp, struct vnode_attr * vap, vfs_context_t ctx) { int _err; struct vnop_mknod_args a; - int thread_safe; - int funnel_state = 0; a.a_desc = &vnop_mknod_desc; a.a_dvp = dvp; a.a_vpp = vpp; a.a_cnp = cnp; a.a_vap = vap; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(dvp); + a.a_context = ctx; - if (!thread_safe) { - if ( (_err = lock_fsnode(dvp, &funnel_state)) ) { - return (_err); - } - } _err = (*dvp->v_op[vnop_mknod_desc.vdesc_offset])(&a); - if (!thread_safe) { - unlock_fsnode(dvp, &funnel_state); + if (_err == 0 && *vpp) { + DTRACE_FSINFO(mknod, vnode_t, *vpp); } + + post_event_if_success(dvp, _err, NOTE_WRITE); + return (_err); } @@ -2237,41 +3175,22 @@ struct vnop_open_args { }; #endif /* 0*/ errno_t -VNOP_OPEN(vnode_t vp, int mode, vfs_context_t context) +VNOP_OPEN(vnode_t vp, int mode, vfs_context_t ctx) { int _err; struct vnop_open_args a; - int thread_safe; - int funnel_state = 0; - struct vfs_context acontext; - if (context == NULL) { - acontext.vc_proc = current_proc(); - acontext.vc_ucred = kauth_cred_get(); - context = &acontext; - } + if (ctx == NULL) { + ctx = vfs_context_current(); + } a.a_desc = &vnop_open_desc; a.a_vp = vp; a.a_mode = mode; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(vp); - - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { - if ( (_err = lock_fsnode(vp, NULL)) ) { - (void) thread_funnel_set(kernel_flock, funnel_state); - return (_err); - } - } - } + a.a_context = ctx; + _err = (*vp->v_op[vnop_open_desc.vdesc_offset])(&a); - if (!thread_safe) { - if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { - unlock_fsnode(vp, NULL); - } - (void) thread_funnel_set(kernel_flock, funnel_state); - } + DTRACE_FSINFO(open, vnode_t, vp); + return (_err); } @@ -2289,41 +3208,22 @@ struct vnop_close_args { }; #endif /* 0*/ errno_t -VNOP_CLOSE(vnode_t vp, int fflag, vfs_context_t context) +VNOP_CLOSE(vnode_t vp, int fflag, vfs_context_t ctx) { int _err; struct vnop_close_args a; - int thread_safe; - int funnel_state = 0; - struct vfs_context acontext; - if (context == NULL) { - acontext.vc_proc = current_proc(); - acontext.vc_ucred = kauth_cred_get(); - context = &acontext; + if (ctx == NULL) { + ctx = vfs_context_current(); } a.a_desc = &vnop_close_desc; a.a_vp = vp; a.a_fflag = fflag; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(vp); - - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { - if ( (_err = lock_fsnode(vp, NULL)) ) { - (void) thread_funnel_set(kernel_flock, funnel_state); - return (_err); - } - } - } + a.a_context = ctx; + _err = (*vp->v_op[vnop_close_desc.vdesc_offset])(&a); - if (!thread_safe) { - if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { - unlock_fsnode(vp, NULL); - } - (void) thread_funnel_set(kernel_flock, funnel_state); - } + DTRACE_FSINFO(close, vnode_t, vp); + return (_err); } @@ -2341,34 +3241,22 @@ struct vnop_access_args { }; #endif /* 0*/ errno_t -VNOP_ACCESS(vnode_t vp, int action, vfs_context_t context) +VNOP_ACCESS(vnode_t vp, int action, vfs_context_t ctx) { int _err; struct vnop_access_args a; - int thread_safe; - int funnel_state = 0; - struct vfs_context acontext; - if (context == NULL) { - acontext.vc_proc = current_proc(); - acontext.vc_ucred = kauth_cred_get(); - context = &acontext; + if (ctx == NULL) { + ctx = vfs_context_current(); } a.a_desc = &vnop_access_desc; a.a_vp = vp; a.a_action = action; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(vp); + a.a_context = ctx; - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } _err = (*vp->v_op[vnop_access_desc.vdesc_offset])(&a); - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } + DTRACE_FSINFO(access, vnode_t, vp); + return (_err); } @@ -2386,28 +3274,19 @@ struct vnop_getattr_args { }; #endif /* 0*/ errno_t -VNOP_GETATTR(vnode_t vp, struct vnode_attr * vap, vfs_context_t context) +VNOP_GETATTR(vnode_t vp, struct vnode_attr * vap, vfs_context_t ctx) { int _err; struct vnop_getattr_args a; - int thread_safe; - int funnel_state; a.a_desc = &vnop_getattr_desc; a.a_vp = vp; a.a_vap = vap; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(vp); + a.a_context = ctx; - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } _err = (*vp->v_op[vnop_getattr_desc.vdesc_offset])(&a); - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } + DTRACE_FSINFO(getattr, vnode_t, vp); + return (_err); } @@ -2425,28 +3304,22 @@ struct vnop_setattr_args { }; #endif /* 0*/ errno_t -VNOP_SETATTR(vnode_t vp, struct vnode_attr * vap, vfs_context_t context) +VNOP_SETATTR(vnode_t vp, struct vnode_attr * vap, vfs_context_t ctx) { int _err; struct vnop_setattr_args a; - int thread_safe; - int funnel_state; a.a_desc = &vnop_setattr_desc; a.a_vp = vp; a.a_vap = vap; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(vp); + a.a_context = ctx; - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } _err = (*vp->v_op[vnop_setattr_desc.vdesc_offset])(&a); + DTRACE_FSINFO(setattr, vnode_t, vp); +#if CONFIG_APPLEDOUBLE /* - * Shadow uid/gid/mod change to extended attibute file. + * Shadow uid/gid/mod change to extended attribute file. */ if (_err == 0 && !NATIVE_XATTR(vp)) { struct vnode_attr va; @@ -2467,167 +3340,90 @@ VNOP_SETATTR(vnode_t vp, struct vnode_attr * vap, vfs_context_t context) } if (change) { vnode_t dvp; - char *vname; + const char *vname; dvp = vnode_getparent(vp); vname = vnode_getname(vp); - xattrfile_setattr(dvp, vname, &va, context, thread_safe); + xattrfile_setattr(dvp, vname, &va, ctx); if (dvp != NULLVP) vnode_put(dvp); if (vname != NULL) vnode_putname(vname); } } - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } +#endif /* CONFIG_APPLEDOUBLE */ + + /* + * If we have changed any of the things about the file that are likely + * to result in changes to authorization results, blow the vnode auth + * cache + */ + if (_err == 0 && ( + VATTR_IS_SUPPORTED(vap, va_mode) || + VATTR_IS_SUPPORTED(vap, va_uid) || + VATTR_IS_SUPPORTED(vap, va_gid) || + VATTR_IS_SUPPORTED(vap, va_flags) || + VATTR_IS_SUPPORTED(vap, va_acl) || + VATTR_IS_SUPPORTED(vap, va_uuuid) || + VATTR_IS_SUPPORTED(vap, va_guuid))) { + vnode_uncache_authorized_action(vp, KAUTH_INVALIDATE_CACHED_RIGHTS); + +#if NAMEDSTREAMS + if (vfs_authopaque(vp->v_mount) && vnode_hasnamedstreams(vp)) { + vnode_t svp; + if (vnode_getnamedstream(vp, &svp, XATTR_RESOURCEFORK_NAME, NS_OPEN, 0, ctx) == 0) { + vnode_uncache_authorized_action(svp, KAUTH_INVALIDATE_CACHED_RIGHTS); + vnode_put(svp); + } + } +#endif /* NAMEDSTREAMS */ + } + + + post_event_if_success(vp, _err, NOTE_ATTRIB); + return (_err); } + #if 0 /* *# - *#% getattrlist vp = = = + *#% read vp L L L *# */ -struct vnop_getattrlist_args { +struct vnop_read_args { struct vnodeop_desc *a_desc; vnode_t a_vp; - struct attrlist *a_alist; struct uio *a_uio; - int a_options; + int a_ioflag; vfs_context_t a_context; }; #endif /* 0*/ -errno_t -VNOP_GETATTRLIST(vnode_t vp, struct attrlist * alist, struct uio * uio, int options, vfs_context_t context) -{ - int _err; - struct vnop_getattrlist_args a; - int thread_safe; - int funnel_state = 0; - - a.a_desc = &vnop_getattrlist_desc; - a.a_vp = vp; - a.a_alist = alist; - a.a_uio = uio; - a.a_options = options; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(vp); - - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } - _err = (*vp->v_op[vnop_getattrlist_desc.vdesc_offset])(&a); - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } - return (_err); -} - -#if 0 -/* - *# - *#% setattrlist vp L L L - *# - */ -struct vnop_setattrlist_args { - struct vnodeop_desc *a_desc; - vnode_t a_vp; - struct attrlist *a_alist; - struct uio *a_uio; - int a_options; - vfs_context_t a_context; -}; -#endif /* 0*/ -errno_t -VNOP_SETATTRLIST(vnode_t vp, struct attrlist * alist, struct uio * uio, int options, vfs_context_t context) -{ - int _err; - struct vnop_setattrlist_args a; - int thread_safe; - int funnel_state = 0; - - a.a_desc = &vnop_setattrlist_desc; - a.a_vp = vp; - a.a_alist = alist; - a.a_uio = uio; - a.a_options = options; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(vp); - - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } - _err = (*vp->v_op[vnop_setattrlist_desc.vdesc_offset])(&a); - - vnode_uncache_credentials(vp); - - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } - return (_err); -} - - -#if 0 -/* - *# - *#% read vp L L L - *# - */ -struct vnop_read_args { - struct vnodeop_desc *a_desc; - vnode_t a_vp; - struct uio *a_uio; - int a_ioflag; - vfs_context_t a_context; -}; -#endif /* 0*/ -errno_t -VNOP_READ(vnode_t vp, struct uio * uio, int ioflag, vfs_context_t context) +errno_t +VNOP_READ(vnode_t vp, struct uio * uio, int ioflag, vfs_context_t ctx) { int _err; struct vnop_read_args a; - int thread_safe; - int funnel_state = 0; - struct vfs_context acontext; +#if CONFIG_DTRACE + user_ssize_t resid = uio_resid(uio); +#endif - if (context == NULL) { - acontext.vc_proc = current_proc(); - acontext.vc_ucred = kauth_cred_get(); - context = &acontext; + if (ctx == NULL) { + return EINVAL; } a.a_desc = &vnop_read_desc; a.a_vp = vp; a.a_uio = uio; a.a_ioflag = ioflag; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(vp); - - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { - if ( (_err = lock_fsnode(vp, NULL)) ) { - (void) thread_funnel_set(kernel_flock, funnel_state); - return (_err); - } - } - } + a.a_context = ctx; + _err = (*vp->v_op[vnop_read_desc.vdesc_offset])(&a); + DTRACE_FSINFO_IO(read, + vnode_t, vp, user_ssize_t, (resid - uio_resid(uio))); - if (!thread_safe) { - if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { - unlock_fsnode(vp, NULL); - } - (void) thread_funnel_set(kernel_flock, funnel_state); - } return (_err); } @@ -2647,44 +3443,30 @@ struct vnop_write_args { }; #endif /* 0*/ errno_t -VNOP_WRITE(vnode_t vp, struct uio * uio, int ioflag, vfs_context_t context) +VNOP_WRITE(vnode_t vp, struct uio * uio, int ioflag, vfs_context_t ctx) { struct vnop_write_args a; int _err; - int thread_safe; - int funnel_state = 0; - struct vfs_context acontext; +#if CONFIG_DTRACE + user_ssize_t resid = uio_resid(uio); +#endif - if (context == NULL) { - acontext.vc_proc = current_proc(); - acontext.vc_ucred = kauth_cred_get(); - context = &acontext; + if (ctx == NULL) { + return EINVAL; } a.a_desc = &vnop_write_desc; a.a_vp = vp; a.a_uio = uio; a.a_ioflag = ioflag; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(vp); - - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { - if ( (_err = lock_fsnode(vp, NULL)) ) { - (void) thread_funnel_set(kernel_flock, funnel_state); - return (_err); - } - } - } + a.a_context = ctx; + _err = (*vp->v_op[vnop_write_desc.vdesc_offset])(&a); + DTRACE_FSINFO_IO(write, + vnode_t, vp, user_ssize_t, (resid - uio_resid(uio))); + + post_event_if_success(vp, _err, NOTE_WRITE); - if (!thread_safe) { - if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { - unlock_fsnode(vp, NULL); - } - (void) thread_funnel_set(kernel_flock, funnel_state); - } return (_err); } @@ -2705,22 +3487,31 @@ struct vnop_ioctl_args { }; #endif /* 0*/ errno_t -VNOP_IOCTL(vnode_t vp, u_long command, caddr_t data, int fflag, vfs_context_t context) +VNOP_IOCTL(vnode_t vp, u_long command, caddr_t data, int fflag, vfs_context_t ctx) { int _err; struct vnop_ioctl_args a; - int thread_safe; - int funnel_state = 0; - struct vfs_context acontext; - if (context == NULL) { - acontext.vc_proc = current_proc(); - acontext.vc_ucred = kauth_cred_get(); - context = &acontext; + if (ctx == NULL) { + ctx = vfs_context_current(); } - if (vfs_context_is64bit(context)) { - if (!vnode_vfs64bitready(vp)) { + /* + * This check should probably have been put in the TTY code instead... + * + * We have to be careful about what we assume during startup and shutdown. + * We have to be able to use the root filesystem's device vnode even when + * devfs isn't mounted (yet/anymore), so we can't go looking at its mount + * structure. If there is no data pointer, it doesn't matter whether + * the device is 64-bit ready. Any command (like DKIOCSYNCHRONIZE) + * which passes NULL for its data pointer can therefore be used during + * mount or unmount of the root filesystem. + * + * Depending on what root filesystems need to do during mount/unmount, we + * may need to loosen this check again in the future. + */ + if (vfs_context_is64bit(ctx) && !(vnode_ischr(vp) || vnode_isblk(vp))) { + if (data != NULL && !vnode_vfs64bitready(vp)) { return(ENOTTY); } } @@ -2730,25 +3521,11 @@ VNOP_IOCTL(vnode_t vp, u_long command, caddr_t data, int fflag, vfs_context_t co a.a_command = command; a.a_data = data; a.a_fflag = fflag; - a.a_context= context; - thread_safe = THREAD_SAFE_FS(vp); - - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { - if ( (_err = lock_fsnode(vp, NULL)) ) { - (void) thread_funnel_set(kernel_flock, funnel_state); - return (_err); - } - } - } + a.a_context= ctx; + _err = (*vp->v_op[vnop_ioctl_desc.vdesc_offset])(&a); - if (!thread_safe) { - if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { - unlock_fsnode(vp, NULL); - } - (void) thread_funnel_set(kernel_flock, funnel_state); - } + DTRACE_FSINFO(ioctl, vnode_t, vp); + return (_err); } @@ -2769,43 +3546,24 @@ struct vnop_select_args { }; #endif /* 0*/ errno_t -VNOP_SELECT(vnode_t vp, int which , int fflags, void * wql, vfs_context_t context) +VNOP_SELECT(vnode_t vp, int which , int fflags, void * wql, vfs_context_t ctx) { int _err; struct vnop_select_args a; - int thread_safe; - int funnel_state = 0; - struct vfs_context acontext; - if (context == NULL) { - acontext.vc_proc = current_proc(); - acontext.vc_ucred = kauth_cred_get(); - context = &acontext; + if (ctx == NULL) { + ctx = vfs_context_current(); } a.a_desc = &vnop_select_desc; a.a_vp = vp; a.a_which = which; a.a_fflags = fflags; - a.a_context = context; + a.a_context = ctx; a.a_wql = wql; - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { - if ( (_err = lock_fsnode(vp, NULL)) ) { - (void) thread_funnel_set(kernel_flock, funnel_state); - return (_err); - } - } - } _err = (*vp->v_op[vnop_select_desc.vdesc_offset])(&a); - if (!thread_safe) { - if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { - unlock_fsnode(vp, NULL); - } - (void) thread_funnel_set(kernel_flock, funnel_state); - } + DTRACE_FSINFO(select, vnode_t, vp); + return (_err); } @@ -2826,45 +3584,24 @@ struct vnop_exchange_args { }; #endif /* 0*/ errno_t -VNOP_EXCHANGE(vnode_t fvp, vnode_t tvp, int options, vfs_context_t context) +VNOP_EXCHANGE(vnode_t fvp, vnode_t tvp, int options, vfs_context_t ctx) { int _err; struct vnop_exchange_args a; - int thread_safe; - int funnel_state = 0; - vnode_t lock_first = NULL, lock_second = NULL; a.a_desc = &vnop_exchange_desc; a.a_fvp = fvp; a.a_tvp = tvp; a.a_options = options; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(fvp); + a.a_context = ctx; - if (!thread_safe) { - /* - * Lock in vnode address order to avoid deadlocks - */ - if (fvp < tvp) { - lock_first = fvp; - lock_second = tvp; - } else { - lock_first = tvp; - lock_second = fvp; - } - if ( (_err = lock_fsnode(lock_first, &funnel_state)) ) { - return (_err); - } - if ( (_err = lock_fsnode(lock_second, NULL)) ) { - unlock_fsnode(lock_first, &funnel_state); - return (_err); - } - } _err = (*fvp->v_op[vnop_exchange_desc.vdesc_offset])(&a); - if (!thread_safe) { - unlock_fsnode(lock_second, NULL); - unlock_fsnode(lock_first, &funnel_state); - } + DTRACE_FSINFO(exchange, vnode_t, fvp); + + /* Don't post NOTE_WRITE because file descriptors follow the data ... */ + post_event_if_success(fvp, _err, NOTE_ATTRIB); + post_event_if_success(tvp, _err, NOTE_ATTRIB); + return (_err); } @@ -2883,26 +3620,19 @@ struct vnop_revoke_args { }; #endif /* 0*/ errno_t -VNOP_REVOKE(vnode_t vp, int flags, vfs_context_t context) +VNOP_REVOKE(vnode_t vp, int flags, vfs_context_t ctx) { struct vnop_revoke_args a; int _err; - int thread_safe; - int funnel_state = 0; a.a_desc = &vnop_revoke_desc; a.a_vp = vp; a.a_flags = flags; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(vp); + a.a_context = ctx; - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } _err = (*vp->v_op[vnop_revoke_desc.vdesc_offset])(&a); - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } + DTRACE_FSINFO(revoke, vnode_t, vp); + return (_err); } @@ -2921,28 +3651,19 @@ struct vnop_mmap_args { }; #endif /* 0*/ errno_t -VNOP_MMAP(vnode_t vp, int fflags, vfs_context_t context) +VNOP_MMAP(vnode_t vp, int fflags, vfs_context_t ctx) { int _err; struct vnop_mmap_args a; - int thread_safe; - int funnel_state = 0; a.a_desc = &vnop_mmap_desc; a.a_vp = vp; a.a_fflags = fflags; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(vp); + a.a_context = ctx; - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } _err = (*vp->v_op[vnop_mmap_desc.vdesc_offset])(&a); - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } + DTRACE_FSINFO(mmap, vnode_t, vp); + return (_err); } @@ -2960,27 +3681,18 @@ struct vnop_mnomap_args { }; #endif /* 0*/ errno_t -VNOP_MNOMAP(vnode_t vp, vfs_context_t context) +VNOP_MNOMAP(vnode_t vp, vfs_context_t ctx) { int _err; struct vnop_mnomap_args a; - int thread_safe; - int funnel_state = 0; a.a_desc = &vnop_mnomap_desc; a.a_vp = vp; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(vp); + a.a_context = ctx; - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } _err = (*vp->v_op[vnop_mnomap_desc.vdesc_offset])(&a); - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } + DTRACE_FSINFO(mnomap, vnode_t, vp); + return (_err); } @@ -2999,28 +3711,19 @@ struct vnop_fsync_args { }; #endif /* 0*/ errno_t -VNOP_FSYNC(vnode_t vp, int waitfor, vfs_context_t context) +VNOP_FSYNC(vnode_t vp, int waitfor, vfs_context_t ctx) { struct vnop_fsync_args a; int _err; - int thread_safe; - int funnel_state = 0; a.a_desc = &vnop_fsync_desc; a.a_vp = vp; a.a_waitfor = waitfor; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(vp); + a.a_context = ctx; - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } _err = (*vp->v_op[vnop_fsync_desc.vdesc_offset])(&a); - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } + DTRACE_FSINFO(fsync, vnode_t, vp); + return (_err); } @@ -3042,45 +3745,89 @@ struct vnop_remove_args { }; #endif /* 0*/ errno_t -VNOP_REMOVE(vnode_t dvp, vnode_t vp, struct componentname * cnp, int flags, vfs_context_t context) +VNOP_REMOVE(vnode_t dvp, vnode_t vp, struct componentname * cnp, int flags, vfs_context_t ctx) { int _err; struct vnop_remove_args a; - int thread_safe; - int funnel_state = 0; a.a_desc = &vnop_remove_desc; a.a_dvp = dvp; a.a_vp = vp; a.a_cnp = cnp; a.a_flags = flags; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(dvp); + a.a_context = ctx; - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } _err = (*dvp->v_op[vnop_remove_desc.vdesc_offset])(&a); + DTRACE_FSINFO(remove, vnode_t, vp); if (_err == 0) { vnode_setneedinactive(vp); +#if CONFIG_APPLEDOUBLE + if ( !(NATIVE_XATTR(dvp)) ) { + /* + * Remove any associated extended attribute file (._ AppleDouble file). + */ + xattrfile_remove(dvp, cnp->cn_nameptr, ctx, 1); + } +#endif /* CONFIG_APPLEDOUBLE */ + } + post_event_if_success(vp, _err, NOTE_DELETE | NOTE_LINK); + post_event_if_success(dvp, _err, NOTE_WRITE); + + return (_err); +} + +int +VNOP_COMPOUND_REMOVE(vnode_t dvp, vnode_t *vpp, struct nameidata *ndp, int32_t flags, struct vnode_attr *vap, vfs_context_t ctx) +{ + int _err; + struct vnop_compound_remove_args a; + int no_vp = (*vpp == NULLVP); + + a.a_desc = &vnop_compound_remove_desc; + a.a_dvp = dvp; + a.a_vpp = vpp; + a.a_cnp = &ndp->ni_cnd; + a.a_flags = flags; + a.a_vap = vap; + a.a_context = ctx; + a.a_remove_authorizer = vn_authorize_unlink; + + _err = (*dvp->v_op[vnop_compound_remove_desc.vdesc_offset])(&a); + if (_err == 0 && *vpp) { + DTRACE_FSINFO(compound_remove, vnode_t, *vpp); + } else { + DTRACE_FSINFO(compound_remove, vnode_t, dvp); + } + if (_err == 0) { + vnode_setneedinactive(*vpp); +#if CONFIG_APPLEDOUBLE if ( !(NATIVE_XATTR(dvp)) ) { /* - * Remove any associated extended attibute file (._ AppleDouble file). + * Remove any associated extended attribute file (._ AppleDouble file). */ - xattrfile_remove(dvp, cnp->cn_nameptr, context, thread_safe, 1); + xattrfile_remove(dvp, ndp->ni_cnd.cn_nameptr, ctx, 1); } +#endif /* CONFIG_APPLEDOUBLE */ } - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); + + post_event_if_success(*vpp, _err, NOTE_DELETE | NOTE_LINK); + post_event_if_success(dvp, _err, NOTE_WRITE); + + if (no_vp) { + lookup_compound_vnop_post_hook(_err, dvp, *vpp, ndp, 0); + if (*vpp && _err && _err != EKEEPLOOKING) { + vnode_put(*vpp); + *vpp = NULLVP; + } } + + //printf("VNOP_COMPOUND_REMOVE() returning %d\n", _err); + return (_err); } - #if 0 /* *# @@ -3097,19 +3844,18 @@ struct vnop_link_args { }; #endif /* 0*/ errno_t -VNOP_LINK(vnode_t vp, vnode_t tdvp, struct componentname * cnp, vfs_context_t context) +VNOP_LINK(vnode_t vp, vnode_t tdvp, struct componentname * cnp, vfs_context_t ctx) { int _err; struct vnop_link_args a; - int thread_safe; - int funnel_state = 0; +#if CONFIG_APPLEDOUBLE /* * For file systems with non-native extended attributes, * disallow linking to an existing "._" Apple Double file. */ if ( !NATIVE_XATTR(tdvp) && (vp->v_type == VREG)) { - char *vname; + const char *vname; vname = vnode_getname(vp); if (vname != NULL) { @@ -3122,130 +3868,62 @@ VNOP_LINK(vnode_t vp, vnode_t tdvp, struct componentname * cnp, vfs_context_t co return (_err); } } +#endif /* CONFIG_APPLEDOUBLE */ + a.a_desc = &vnop_link_desc; a.a_vp = vp; a.a_tdvp = tdvp; a.a_cnp = cnp; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(vp); + a.a_context = ctx; - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } _err = (*tdvp->v_op[vnop_link_desc.vdesc_offset])(&a); - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } + DTRACE_FSINFO(link, vnode_t, vp); + + post_event_if_success(vp, _err, NOTE_LINK); + post_event_if_success(tdvp, _err, NOTE_WRITE); + return (_err); } - -#if 0 -/* - *# - *#% rename fdvp U U U - *#% rename fvp U U U - *#% rename tdvp L U U - *#% rename tvp X U U - *# - */ -struct vnop_rename_args { - struct vnodeop_desc *a_desc; - vnode_t a_fdvp; - vnode_t a_fvp; - struct componentname *a_fcnp; - vnode_t a_tdvp; - vnode_t a_tvp; - struct componentname *a_tcnp; - vfs_context_t a_context; -}; -#endif /* 0*/ errno_t -VNOP_RENAME(struct vnode *fdvp, struct vnode *fvp, struct componentname *fcnp, - struct vnode *tdvp, struct vnode *tvp, struct componentname *tcnp, - vfs_context_t context) +vn_rename(struct vnode *fdvp, struct vnode **fvpp, struct componentname *fcnp, struct vnode_attr *fvap, + struct vnode *tdvp, struct vnode **tvpp, struct componentname *tcnp, struct vnode_attr *tvap, + vfs_rename_flags_t flags, vfs_context_t ctx) { int _err; - struct vnop_rename_args a; - int funnel_state = 0; + struct nameidata *fromnd = NULL; + struct nameidata *tond = NULL; +#if CONFIG_APPLEDOUBLE + vnode_t src_attr_vp = NULLVP; + vnode_t dst_attr_vp = NULLVP; char smallname1[48]; char smallname2[48]; char *xfromname = NULL; char *xtoname = NULL; - vnode_t lock_first = NULL, lock_second = NULL; - vnode_t fdvp_unsafe = NULLVP; - vnode_t tdvp_unsafe = NULLVP; - - a.a_desc = &vnop_rename_desc; - a.a_fdvp = fdvp; - a.a_fvp = fvp; - a.a_fcnp = fcnp; - a.a_tdvp = tdvp; - a.a_tvp = tvp; - a.a_tcnp = tcnp; - a.a_context = context; +#endif /* CONFIG_APPLEDOUBLE */ + int batched; + uint32_t tdfflags; // Target directory file flags - if (!THREAD_SAFE_FS(fdvp)) - fdvp_unsafe = fdvp; - if (!THREAD_SAFE_FS(tdvp)) - tdvp_unsafe = tdvp; + batched = vnode_compound_rename_available(fdvp); - if (fdvp_unsafe != NULLVP) { - /* - * Lock parents in vnode address order to avoid deadlocks - * note that it's possible for the fdvp to be unsafe, - * but the tdvp to be safe because tvp could be a directory - * in the root of a filesystem... in that case, tdvp is the - * in the filesystem that this root is mounted on - */ - if (tdvp_unsafe == NULL || fdvp_unsafe == tdvp_unsafe) { - lock_first = fdvp_unsafe; - lock_second = NULL; - } else if (fdvp_unsafe < tdvp_unsafe) { - lock_first = fdvp_unsafe; - lock_second = tdvp_unsafe; - } else { - lock_first = tdvp_unsafe; - lock_second = fdvp_unsafe; - } - if ( (_err = lock_fsnode(lock_first, &funnel_state)) ) - return (_err); - - if (lock_second != NULL && (_err = lock_fsnode(lock_second, NULL))) { - unlock_fsnode(lock_first, &funnel_state); - return (_err); - } - - /* - * Lock both children in vnode address order to avoid deadlocks - */ - if (tvp == NULL || tvp == fvp) { - lock_first = fvp; - lock_second = NULL; - } else if (fvp < tvp) { - lock_first = fvp; - lock_second = tvp; - } else { - lock_first = tvp; - lock_second = fvp; - } - if ( (_err = lock_fsnode(lock_first, NULL)) ) - goto out1; - - if (lock_second != NULL && (_err = lock_fsnode(lock_second, NULL))) { - unlock_fsnode(lock_first, NULL); - goto out1; - } + if (!batched) { + if (*fvpp == NULLVP) + panic("Not batched, and no fvp?"); } + +#if CONFIG_APPLEDOUBLE /* - * Save source and destination names (._ AppleDouble files). - * Skip if source already has a "._" prefix. + * We need to preflight any potential AppleDouble file for the source file + * before doing the rename operation, since we could potentially be doing + * this operation on a network filesystem, and would end up duplicating + * the work. Also, save the source and destination names. Skip it if the + * source has a "._" prefix. */ + if (!NATIVE_XATTR(fdvp) && !(fcnp->cn_nameptr[0] == '.' && fcnp->cn_nameptr[1] == '_')) { size_t len; + int error; /* Get source attribute file name. */ len = fcnp->cn_namelen + 3; @@ -3254,7 +3932,7 @@ VNOP_RENAME(struct vnode *fdvp, struct vnode *fvp, struct componentname *fcnp, } else { xfromname = &smallname1[0]; } - strcpy(xfromname, "._"); + strlcpy(xfromname, "._", min(sizeof smallname1, len)); strncat(xfromname, fcnp->cn_nameptr, fcnp->cn_namelen); xfromname[len-1] = '\0'; @@ -3265,175 +3943,453 @@ VNOP_RENAME(struct vnode *fdvp, struct vnode *fvp, struct componentname *fcnp, } else { xtoname = &smallname2[0]; } - strcpy(xtoname, "._"); + strlcpy(xtoname, "._", min(sizeof smallname2, len)); strncat(xtoname, tcnp->cn_nameptr, tcnp->cn_namelen); xtoname[len-1] = '\0'; + + /* + * Look up source attribute file, keep reference on it if exists. + * Note that we do the namei with the nameiop of RENAME, which is different than + * in the rename syscall. It's OK if the source file does not exist, since this + * is only for AppleDouble files. + */ + if (xfromname != NULL) { + MALLOC(fromnd, struct nameidata *, sizeof (struct nameidata), M_TEMP, M_WAITOK); + NDINIT(fromnd, RENAME, OP_RENAME, NOFOLLOW | USEDVP | CN_NBMOUNTLOOK, + UIO_SYSSPACE, CAST_USER_ADDR_T(xfromname), ctx); + fromnd->ni_dvp = fdvp; + error = namei(fromnd); + + /* + * If there was an error looking up source attribute file, + * we'll behave as if it didn't exist. + */ + + if (error == 0) { + if (fromnd->ni_vp) { + /* src_attr_vp indicates need to call vnode_put / nameidone later */ + src_attr_vp = fromnd->ni_vp; + + if (fromnd->ni_vp->v_type != VREG) { + src_attr_vp = NULLVP; + vnode_put(fromnd->ni_vp); + } + } + /* + * Either we got an invalid vnode type (not a regular file) or the namei lookup + * suppressed ENOENT as a valid error since we're renaming. Either way, we don't + * have a vnode here, so we drop our namei buffer for the source attribute file + */ + if (src_attr_vp == NULLVP) { + nameidone(fromnd); + } + } + } } +#endif /* CONFIG_APPLEDOUBLE */ - _err = (*fdvp->v_op[vnop_rename_desc.vdesc_offset])(&a); + if (batched) { + _err = VNOP_COMPOUND_RENAME(fdvp, fvpp, fcnp, fvap, tdvp, tvpp, tcnp, tvap, flags, ctx); + if (_err != 0) { + printf("VNOP_COMPOUND_RENAME() returned %d\n", _err); + } + } else { + if (flags) { + _err = VNOP_RENAMEX(fdvp, *fvpp, fcnp, tdvp, *tvpp, tcnp, flags, ctx); + if (_err == ENOTSUP && flags == VFS_RENAME_SECLUDE) { + // Legacy... + if ((*fvpp)->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSVNOP_SECLUDE_RENAME) { + fcnp->cn_flags |= CN_SECLUDE_RENAME; + _err = VNOP_RENAME(fdvp, *fvpp, fcnp, tdvp, *tvpp, tcnp, ctx); + } + } + } else + _err = VNOP_RENAME(fdvp, *fvpp, fcnp, tdvp, *tvpp, tcnp, ctx); + } - if (fdvp_unsafe != NULLVP) { - if (lock_second != NULL) - unlock_fsnode(lock_second, NULL); - unlock_fsnode(lock_first, NULL); + /* + * If moved to a new directory that is restricted, + * set the restricted flag on the item moved. + */ + if (_err == 0) { + _err = vnode_flags(tdvp, &tdfflags, ctx); + if (_err == 0 && (tdfflags & SF_RESTRICTED)) { + uint32_t fflags; + _err = vnode_flags(*fvpp, &fflags, ctx); + if (_err == 0 && !(fflags & SF_RESTRICTED)) { + struct vnode_attr va; + VATTR_INIT(&va); + VATTR_SET(&va, va_flags, fflags | SF_RESTRICTED); + _err = vnode_setattr(*fvpp, &va, ctx); + } + } } + +#if CONFIG_MACF if (_err == 0) { - if (tvp && tvp != fvp) - vnode_setneedinactive(tvp); + mac_vnode_notify_rename(ctx, *fvpp, tdvp, tcnp); } +#endif +#if CONFIG_APPLEDOUBLE /* - * Rename any associated extended attibute file (._ AppleDouble file). + * Rename any associated extended attribute file (._ AppleDouble file). */ if (_err == 0 && !NATIVE_XATTR(fdvp) && xfromname != NULL) { - struct nameidata fromnd, tond; - int killdest = 0; - int error; - + int error = 0; + /* - * Get source attribute file vnode. - * Note that fdvp already has an iocount reference and - * using DELETE will take an additional reference. + * Get destination attribute file vnode. + * Note that tdvp already has an iocount reference. Make sure to check that we + * get a valid vnode from namei. */ - NDINIT(&fromnd, DELETE, NOFOLLOW | USEDVP, UIO_SYSSPACE, - CAST_USER_ADDR_T(xfromname), context); - fromnd.ni_dvp = fdvp; - error = namei(&fromnd); - - if (error) { - /* When source doesn't exist there still may be a destination. */ - if (error == ENOENT) { - killdest = 1; + MALLOC(tond, struct nameidata *, sizeof(struct nameidata), M_TEMP, M_WAITOK); + NDINIT(tond, RENAME, OP_RENAME, + NOCACHE | NOFOLLOW | USEDVP | CN_NBMOUNTLOOK, UIO_SYSSPACE, + CAST_USER_ADDR_T(xtoname), ctx); + tond->ni_dvp = tdvp; + error = namei(tond); + + if (error) + goto ad_error; + + if (tond->ni_vp) { + dst_attr_vp = tond->ni_vp; + } + + if (src_attr_vp) { + const char *old_name = src_attr_vp->v_name; + vnode_t old_parent = src_attr_vp->v_parent; + + if (batched) { + error = VNOP_COMPOUND_RENAME(fdvp, &src_attr_vp, &fromnd->ni_cnd, NULL, + tdvp, &dst_attr_vp, &tond->ni_cnd, NULL, + 0, ctx); } else { - goto out; + error = VNOP_RENAME(fdvp, src_attr_vp, &fromnd->ni_cnd, + tdvp, dst_attr_vp, &tond->ni_cnd, ctx); } - } else if (fromnd.ni_vp->v_type != VREG) { - vnode_put(fromnd.ni_vp); - nameidone(&fromnd); - killdest = 1; - } - if (killdest) { - struct vnop_remove_args args; + if (error == 0 && old_name == src_attr_vp->v_name && + old_parent == src_attr_vp->v_parent) { + int update_flags = VNODE_UPDATE_NAME; + + if (fdvp != tdvp) + update_flags |= VNODE_UPDATE_PARENT; + + if ((src_attr_vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSVNOP_NOUPDATEID_RENAME) == 0) { + vnode_update_identity(src_attr_vp, tdvp, + tond->ni_cnd.cn_nameptr, + tond->ni_cnd.cn_namelen, + tond->ni_cnd.cn_hash, + update_flags); + } + } + + /* kevent notifications for moving resource files + * _err is zero if we're here, so no need to notify directories, code + * below will do that. only need to post the rename on the source and + * possibly a delete on the dest + */ + post_event_if_success(src_attr_vp, error, NOTE_RENAME); + if (dst_attr_vp) { + post_event_if_success(dst_attr_vp, error, NOTE_DELETE); + } + + } else if (dst_attr_vp) { /* - * Get destination attribute file vnode. + * Just delete destination attribute file vnode if it exists, since + * we didn't have a source attribute file. * Note that tdvp already has an iocount reference. */ - NDINIT(&tond, DELETE, NOFOLLOW | USEDVP, UIO_SYSSPACE, - CAST_USER_ADDR_T(xtoname), context); - tond.ni_dvp = tdvp; - error = namei(&tond); - if (error) { - goto out; - } - if (tond.ni_vp->v_type != VREG) { - vnode_put(tond.ni_vp); - nameidone(&tond); - goto out; - } + + struct vnop_remove_args args; + args.a_desc = &vnop_remove_desc; args.a_dvp = tdvp; - args.a_vp = tond.ni_vp; - args.a_cnp = &tond.ni_cnd; - args.a_context = context; + args.a_vp = dst_attr_vp; + args.a_cnp = &tond->ni_cnd; + args.a_context = ctx; - if (fdvp_unsafe != NULLVP) - error = lock_fsnode(tond.ni_vp, NULL); if (error == 0) { - error = (*tdvp->v_op[vnop_remove_desc.vdesc_offset])(&args); - - if (fdvp_unsafe != NULLVP) - unlock_fsnode(tond.ni_vp, NULL); + error = (*tdvp->v_op[vnop_remove_desc.vdesc_offset])(&args); if (error == 0) - vnode_setneedinactive(tond.ni_vp); + vnode_setneedinactive(dst_attr_vp); } - vnode_put(tond.ni_vp); - nameidone(&tond); - goto out; + + /* kevent notification for deleting the destination's attribute file + * if it existed. Only need to post the delete on the destination, since + * the code below will handle the directories. + */ + post_event_if_success(dst_attr_vp, error, NOTE_DELETE); } + } +ad_error: + if (src_attr_vp) { + vnode_put(src_attr_vp); + nameidone(fromnd); + } + if (dst_attr_vp) { + vnode_put(dst_attr_vp); + nameidone(tond); + } + if (xfromname && xfromname != &smallname1[0]) { + FREE(xfromname, M_TEMP); + } + if (xtoname && xtoname != &smallname2[0]) { + FREE(xtoname, M_TEMP); + } +#endif /* CONFIG_APPLEDOUBLE */ + if (fromnd) { + FREE(fromnd, M_TEMP); + } + if (tond) { + FREE(tond, M_TEMP); + } + return _err; +} - /* - * Get destination attribute file vnode. + +#if 0 +/* + *# + *#% rename fdvp U U U + *#% rename fvp U U U + *#% rename tdvp L U U + *#% rename tvp X U U + *# + */ +struct vnop_rename_args { + struct vnodeop_desc *a_desc; + vnode_t a_fdvp; + vnode_t a_fvp; + struct componentname *a_fcnp; + vnode_t a_tdvp; + vnode_t a_tvp; + struct componentname *a_tcnp; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_RENAME(struct vnode *fdvp, struct vnode *fvp, struct componentname *fcnp, + struct vnode *tdvp, struct vnode *tvp, struct componentname *tcnp, + vfs_context_t ctx) +{ + int _err = 0; + struct vnop_rename_args a; + + a.a_desc = &vnop_rename_desc; + a.a_fdvp = fdvp; + a.a_fvp = fvp; + a.a_fcnp = fcnp; + a.a_tdvp = tdvp; + a.a_tvp = tvp; + a.a_tcnp = tcnp; + a.a_context = ctx; + + /* do the rename of the main file. */ + _err = (*fdvp->v_op[vnop_rename_desc.vdesc_offset])(&a); + DTRACE_FSINFO(rename, vnode_t, fdvp); + + if (_err) + return _err; + + return post_rename(fdvp, fvp, tdvp, tvp); +} + +static errno_t +post_rename(vnode_t fdvp, vnode_t fvp, vnode_t tdvp, vnode_t tvp) +{ + if (tvp && tvp != fvp) + vnode_setneedinactive(tvp); + + /* Wrote at least one directory. If transplanted a dir, also changed link counts */ + int events = NOTE_WRITE; + if (vnode_isdir(fvp)) { + /* Link count on dir changed only if we are moving a dir and... + * --Moved to new dir, not overwriting there + * --Kept in same dir and DID overwrite */ - NDINIT(&tond, RENAME, - NOCACHE | NOFOLLOW | USEDVP, UIO_SYSSPACE, - CAST_USER_ADDR_T(xtoname), context); - tond.ni_dvp = tdvp; - error = namei(&tond); - - if (error) { - vnode_put(fromnd.ni_vp); - nameidone(&fromnd); - goto out; + if (((fdvp != tdvp) && (!tvp)) || ((fdvp == tdvp) && (tvp))) { + events |= NOTE_LINK; } - a.a_desc = &vnop_rename_desc; - a.a_fdvp = fdvp; - a.a_fvp = fromnd.ni_vp; - a.a_fcnp = &fromnd.ni_cnd; - a.a_tdvp = tdvp; - a.a_tvp = tond.ni_vp; - a.a_tcnp = &tond.ni_cnd; - a.a_context = context; - - if (fdvp_unsafe != NULLVP) { - /* - * Lock in vnode address order to avoid deadlocks + } + + lock_vnode_and_post(fdvp, events); + if (fdvp != tdvp) { + lock_vnode_and_post(tdvp, events); + } + + /* If you're replacing the target, post a deletion for it */ + if (tvp) + { + lock_vnode_and_post(tvp, NOTE_DELETE); + } + + lock_vnode_and_post(fvp, NOTE_RENAME); + + return 0; +} + +#if 0 +/* + *# + *#% renamex fdvp U U U + *#% renamex fvp U U U + *#% renamex tdvp L U U + *#% renamex tvp X U U + *# + */ +struct vnop_renamex_args { + struct vnodeop_desc *a_desc; + vnode_t a_fdvp; + vnode_t a_fvp; + struct componentname *a_fcnp; + vnode_t a_tdvp; + vnode_t a_tvp; + struct componentname *a_tcnp; + vfs_rename_flags_t a_flags; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_RENAMEX(struct vnode *fdvp, struct vnode *fvp, struct componentname *fcnp, + struct vnode *tdvp, struct vnode *tvp, struct componentname *tcnp, + vfs_rename_flags_t flags, vfs_context_t ctx) +{ + int _err = 0; + struct vnop_renamex_args a; + + a.a_desc = &vnop_renamex_desc; + a.a_fdvp = fdvp; + a.a_fvp = fvp; + a.a_fcnp = fcnp; + a.a_tdvp = tdvp; + a.a_tvp = tvp; + a.a_tcnp = tcnp; + a.a_flags = flags; + a.a_context = ctx; + + /* do the rename of the main file. */ + _err = (*fdvp->v_op[vnop_renamex_desc.vdesc_offset])(&a); + DTRACE_FSINFO(renamex, vnode_t, fdvp); + + if (_err) + return _err; + + return post_rename(fdvp, fvp, tdvp, tvp); +} + + +int +VNOP_COMPOUND_RENAME( + struct vnode *fdvp, struct vnode **fvpp, struct componentname *fcnp, struct vnode_attr *fvap, + struct vnode *tdvp, struct vnode **tvpp, struct componentname *tcnp, struct vnode_attr *tvap, + uint32_t flags, vfs_context_t ctx) +{ + int _err = 0; + int events; + struct vnop_compound_rename_args a; + int no_fvp, no_tvp; + + no_fvp = (*fvpp) == NULLVP; + no_tvp = (*tvpp) == NULLVP; + + a.a_desc = &vnop_compound_rename_desc; + + a.a_fdvp = fdvp; + a.a_fvpp = fvpp; + a.a_fcnp = fcnp; + a.a_fvap = fvap; + + a.a_tdvp = tdvp; + a.a_tvpp = tvpp; + a.a_tcnp = tcnp; + a.a_tvap = tvap; + + a.a_flags = flags; + a.a_context = ctx; + a.a_rename_authorizer = vn_authorize_rename; + a.a_reserved = NULL; + + /* do the rename of the main file. */ + _err = (*fdvp->v_op[vnop_compound_rename_desc.vdesc_offset])(&a); + DTRACE_FSINFO(compound_rename, vnode_t, fdvp); + + if (_err == 0) { + if (*tvpp && *tvpp != *fvpp) + vnode_setneedinactive(*tvpp); + } + + /* Wrote at least one directory. If transplanted a dir, also changed link counts */ + if (_err == 0 && *fvpp != *tvpp) { + if (!*fvpp) { + panic("No fvpp after compound rename?"); + } + + events = NOTE_WRITE; + if (vnode_isdir(*fvpp)) { + /* Link count on dir changed only if we are moving a dir and... + * --Moved to new dir, not overwriting there + * --Kept in same dir and DID overwrite */ - if (tond.ni_vp == NULL || tond.ni_vp == fromnd.ni_vp) { - lock_first = fromnd.ni_vp; - lock_second = NULL; - } else if (fromnd.ni_vp < tond.ni_vp) { - lock_first = fromnd.ni_vp; - lock_second = tond.ni_vp; - } else { - lock_first = tond.ni_vp; - lock_second = fromnd.ni_vp; - } - if ( (error = lock_fsnode(lock_first, NULL)) == 0) { - if (lock_second != NULL && (error = lock_fsnode(lock_second, NULL)) ) - unlock_fsnode(lock_first, NULL); + if (((fdvp != tdvp) && (!*tvpp)) || ((fdvp == tdvp) && (*tvpp))) { + events |= NOTE_LINK; } } - if (error == 0) { - error = (*fdvp->v_op[vnop_rename_desc.vdesc_offset])(&a); - if (fdvp_unsafe != NULLVP) { - if (lock_second != NULL) - unlock_fsnode(lock_second, NULL); - unlock_fsnode(lock_first, NULL); - } - if (error == 0) { - vnode_setneedinactive(fromnd.ni_vp); - - if (tond.ni_vp && tond.ni_vp != fromnd.ni_vp) - vnode_setneedinactive(tond.ni_vp); - } + lock_vnode_and_post(fdvp, events); + if (fdvp != tdvp) { + lock_vnode_and_post(tdvp, events); } - vnode_put(fromnd.ni_vp); - if (tond.ni_vp) { - vnode_put(tond.ni_vp); + + /* If you're replacing the target, post a deletion for it */ + if (*tvpp) + { + lock_vnode_and_post(*tvpp, NOTE_DELETE); } - nameidone(&tond); - nameidone(&fromnd); + + lock_vnode_and_post(*fvpp, NOTE_RENAME); } -out: - if (xfromname && xfromname != &smallname1[0]) { - FREE(xfromname, M_TEMP); + + if (no_fvp) { + lookup_compound_vnop_post_hook(_err, fdvp, *fvpp, fcnp->cn_ndp, 0); } - if (xtoname && xtoname != &smallname2[0]) { - FREE(xtoname, M_TEMP); + if (no_tvp && *tvpp != NULLVP) { + lookup_compound_vnop_post_hook(_err, tdvp, *tvpp, tcnp->cn_ndp, 0); } -out1: - if (fdvp_unsafe != NULLVP) { - if (tdvp_unsafe != NULLVP) - unlock_fsnode(tdvp_unsafe, NULL); - unlock_fsnode(fdvp_unsafe, &funnel_state); + + if (_err && _err != EKEEPLOOKING) { + if (*fvpp) { + vnode_put(*fvpp); + *fvpp = NULLVP; + } + if (*tvpp) { + vnode_put(*tvpp); + *tvpp = NULLVP; + } } + return (_err); } - #if 0 +int +vn_mkdir(struct vnode *dvp, struct vnode **vpp, struct nameidata *ndp, + struct vnode_attr *vap, vfs_context_t ctx) +{ + if (ndp->ni_cnd.cn_nameiop != CREATE) { + panic("Non-CREATE nameiop in vn_mkdir()?"); + } + + if (vnode_compound_mkdir_available(dvp)) { + return VNOP_COMPOUND_MKDIR(dvp, vpp, ndp, vap, ctx); + } else { + return VNOP_MKDIR(dvp, vpp, &ndp->ni_cnd, vap, ctx); + } +} + +#if 0 /* *# *#% mkdir dvp L U U @@ -3451,39 +4407,94 @@ struct vnop_mkdir_args { #endif /* 0*/ errno_t VNOP_MKDIR(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, - struct vnode_attr *vap, vfs_context_t context) + struct vnode_attr *vap, vfs_context_t ctx) { int _err; struct vnop_mkdir_args a; - int thread_safe; - int funnel_state = 0; a.a_desc = &vnop_mkdir_desc; a.a_dvp = dvp; a.a_vpp = vpp; a.a_cnp = cnp; a.a_vap = vap; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(dvp); + a.a_context = ctx; - if (!thread_safe) { - if ( (_err = lock_fsnode(dvp, &funnel_state)) ) { - return (_err); - } - } _err = (*dvp->v_op[vnop_mkdir_desc.vdesc_offset])(&a); + if (_err == 0 && *vpp) { + DTRACE_FSINFO(mkdir, vnode_t, *vpp); + } +#if CONFIG_APPLEDOUBLE if (_err == 0 && !NATIVE_XATTR(dvp)) { /* * Remove stale Apple Double file (if any). */ - xattrfile_remove(dvp, cnp->cn_nameptr, context, thread_safe, 0); + xattrfile_remove(dvp, cnp->cn_nameptr, ctx, 0); } - if (!thread_safe) { - unlock_fsnode(dvp, &funnel_state); - } +#endif /* CONFIG_APPLEDOUBLE */ + + post_event_if_success(dvp, _err, NOTE_LINK | NOTE_WRITE); + return (_err); } +int +VNOP_COMPOUND_MKDIR(struct vnode *dvp, struct vnode **vpp, struct nameidata *ndp, + struct vnode_attr *vap, vfs_context_t ctx) +{ + int _err; + struct vnop_compound_mkdir_args a; + + a.a_desc = &vnop_compound_mkdir_desc; + a.a_dvp = dvp; + a.a_vpp = vpp; + a.a_cnp = &ndp->ni_cnd; + a.a_vap = vap; + a.a_flags = 0; + a.a_context = ctx; +#if 0 + a.a_mkdir_authorizer = vn_authorize_mkdir; +#endif /* 0 */ + a.a_reserved = NULL; + + _err = (*dvp->v_op[vnop_compound_mkdir_desc.vdesc_offset])(&a); + if (_err == 0 && *vpp) { + DTRACE_FSINFO(compound_mkdir, vnode_t, *vpp); + } +#if CONFIG_APPLEDOUBLE + if (_err == 0 && !NATIVE_XATTR(dvp)) { + /* + * Remove stale Apple Double file (if any). + */ + xattrfile_remove(dvp, ndp->ni_cnd.cn_nameptr, ctx, 0); + } +#endif /* CONFIG_APPLEDOUBLE */ + + post_event_if_success(dvp, _err, NOTE_LINK | NOTE_WRITE); + + lookup_compound_vnop_post_hook(_err, dvp, *vpp, ndp, (_err == 0)); + if (*vpp && _err && _err != EKEEPLOOKING) { + vnode_put(*vpp); + *vpp = NULLVP; + } + + return (_err); +} + +int +vn_rmdir(vnode_t dvp, vnode_t *vpp, struct nameidata *ndp, struct vnode_attr *vap, vfs_context_t ctx) +{ + if (vnode_compound_rmdir_available(dvp)) { + return VNOP_COMPOUND_RMDIR(dvp, vpp, ndp, vap, ctx); + } else { + if (*vpp == NULLVP) { + panic("NULL vp, but not a compound VNOP?"); + } + if (vap != NULL) { + panic("Non-NULL vap, but not a compound VNOP?"); + } + return VNOP_RMDIR(dvp, *vpp, &ndp->ni_cnd, ctx); + } +} #if 0 /* @@ -3502,49 +4513,99 @@ struct vnop_rmdir_args { #endif /* 0*/ errno_t -VNOP_RMDIR(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, vfs_context_t context) +VNOP_RMDIR(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, vfs_context_t ctx) { int _err; struct vnop_rmdir_args a; - int thread_safe; - int funnel_state = 0; a.a_desc = &vnop_rmdir_desc; a.a_dvp = dvp; a.a_vp = vp; a.a_cnp = cnp; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(dvp); + a.a_context = ctx; - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } _err = (*vp->v_op[vnop_rmdir_desc.vdesc_offset])(&a); + DTRACE_FSINFO(rmdir, vnode_t, vp); if (_err == 0) { vnode_setneedinactive(vp); - +#if CONFIG_APPLEDOUBLE if ( !(NATIVE_XATTR(dvp)) ) { /* - * Remove any associated extended attibute file (._ AppleDouble file). + * Remove any associated extended attribute file (._ AppleDouble file). */ - xattrfile_remove(dvp, cnp->cn_nameptr, context, thread_safe, 1); + xattrfile_remove(dvp, cnp->cn_nameptr, ctx, 1); } +#endif + } + + /* If you delete a dir, it loses its "." reference --> NOTE_LINK */ + post_event_if_success(vp, _err, NOTE_DELETE | NOTE_LINK); + post_event_if_success(dvp, _err, NOTE_LINK | NOTE_WRITE); + + return (_err); +} + +int +VNOP_COMPOUND_RMDIR(struct vnode *dvp, struct vnode **vpp, struct nameidata *ndp, + struct vnode_attr *vap, vfs_context_t ctx) +{ + int _err; + struct vnop_compound_rmdir_args a; + int no_vp; + + a.a_desc = &vnop_mkdir_desc; + a.a_dvp = dvp; + a.a_vpp = vpp; + a.a_cnp = &ndp->ni_cnd; + a.a_vap = vap; + a.a_flags = 0; + a.a_context = ctx; + a.a_rmdir_authorizer = vn_authorize_rmdir; + a.a_reserved = NULL; + + no_vp = (*vpp == NULLVP); + + _err = (*dvp->v_op[vnop_compound_rmdir_desc.vdesc_offset])(&a); + if (_err == 0 && *vpp) { + DTRACE_FSINFO(compound_rmdir, vnode_t, *vpp); + } +#if CONFIG_APPLEDOUBLE + if (_err == 0 && !NATIVE_XATTR(dvp)) { + /* + * Remove stale Apple Double file (if any). + */ + xattrfile_remove(dvp, ndp->ni_cnd.cn_nameptr, ctx, 0); } - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); +#endif + + if (*vpp) { + post_event_if_success(*vpp, _err, NOTE_DELETE | NOTE_LINK); } - return (_err); + post_event_if_success(dvp, _err, NOTE_LINK | NOTE_WRITE); + + if (no_vp) { + lookup_compound_vnop_post_hook(_err, dvp, *vpp, ndp, 0); + +#if 0 /* Removing orphaned ._ files requires a vp.... */ + if (*vpp && _err && _err != EKEEPLOOKING) { + vnode_put(*vpp); + *vpp = NULLVP; + } +#endif /* 0 */ + } + + return (_err); } +#if CONFIG_APPLEDOUBLE /* * Remove a ._ AppleDouble file */ #define AD_STALE_SECS (180) static void -xattrfile_remove(vnode_t dvp, const char * basename, vfs_context_t context, int thread_safe, int force) { +xattrfile_remove(vnode_t dvp, const char * basename, vfs_context_t ctx, int force) +{ vnode_t xvp; struct nameidata nd; char smallname[64]; @@ -3562,8 +4623,8 @@ xattrfile_remove(vnode_t dvp, const char * basename, vfs_context_t context, int MALLOC(filename, char *, len, M_TEMP, M_WAITOK); len = snprintf(filename, len, "._%s", basename); } - NDINIT(&nd, DELETE, LOCKLEAF | NOFOLLOW | USEDVP, UIO_SYSSPACE, - CAST_USER_ADDR_T(filename), context); + NDINIT(&nd, DELETE, OP_UNLINK, WANTPARENT | LOCKLEAF | NOFOLLOW | USEDVP, UIO_SYSSPACE, + CAST_USER_ADDR_T(filename), ctx); nd.ni_dvp = dvp; if (namei(&nd) != 0) goto out2; @@ -3584,7 +4645,7 @@ xattrfile_remove(vnode_t dvp, const char * basename, vfs_context_t context, int VATTR_INIT(&va); VATTR_WANTED(&va, va_data_size); VATTR_WANTED(&va, va_modify_time); - if (VNOP_GETATTR(xvp, &va, context) == 0 && + if (VNOP_GETATTR(xvp, &va, ctx) == 0 && VATTR_IS_SUPPORTED(&va, va_data_size) && VATTR_IS_SUPPORTED(&va, va_modify_time) && va.va_data_size != 0) { @@ -3598,29 +4659,18 @@ xattrfile_remove(vnode_t dvp, const char * basename, vfs_context_t context, int } } if (force) { - struct vnop_remove_args a; int error; - a.a_desc = &vnop_remove_desc; - a.a_dvp = nd.ni_dvp; - a.a_vp = xvp; - a.a_cnp = &nd.ni_cnd; - a.a_context = context; - - if (!thread_safe) { - if ( (lock_fsnode(xvp, NULL)) ) - goto out1; - } - error = (*dvp->v_op[vnop_remove_desc.vdesc_offset])(&a); - - if (!thread_safe) - unlock_fsnode(xvp, NULL); - + error = VNOP_REMOVE(dvp, xvp, &nd.ni_cnd, 0, ctx); if (error == 0) vnode_setneedinactive(xvp); + + post_event_if_success(xvp, error, NOTE_DELETE); + post_event_if_success(dvp, error, NOTE_WRITE); } + out1: - /* Note: nd.ni_dvp's iocount is dropped by caller of VNOP_XXXX */ + vnode_put(dvp); vnode_put(xvp); out2: if (filename && filename != &smallname[0]) { @@ -3633,7 +4683,8 @@ out2: */ static void xattrfile_setattr(vnode_t dvp, const char * basename, struct vnode_attr * vap, - vfs_context_t context, int thread_safe) { + vfs_context_t ctx) +{ vnode_t xvp; struct nameidata nd; char smallname[64]; @@ -3652,8 +4703,8 @@ xattrfile_setattr(vnode_t dvp, const char * basename, struct vnode_attr * vap, MALLOC(filename, char *, len, M_TEMP, M_WAITOK); len = snprintf(filename, len, "._%s", basename); } - NDINIT(&nd, LOOKUP, NOFOLLOW | USEDVP, UIO_SYSSPACE, - CAST_USER_ADDR_T(filename), context); + NDINIT(&nd, LOOKUP, OP_SETATTR, NOFOLLOW | USEDVP, UIO_SYSSPACE, + CAST_USER_ADDR_T(filename), ctx); nd.ni_dvp = dvp; if (namei(&nd) != 0) goto out2; @@ -3667,24 +4718,18 @@ xattrfile_setattr(vnode_t dvp, const char * basename, struct vnode_attr * vap, a.a_desc = &vnop_setattr_desc; a.a_vp = xvp; a.a_vap = vap; - a.a_context = context; + a.a_context = ctx; - if (!thread_safe) { - if ( (lock_fsnode(xvp, NULL)) ) - goto out1; - } (void) (*xvp->v_op[vnop_setattr_desc.vdesc_offset])(&a); - if (!thread_safe) { - unlock_fsnode(xvp, NULL); - } } -out1: + vnode_put(xvp); out2: if (filename && filename != &smallname[0]) { FREE(filename, M_TEMP); } } +#endif /* CONFIG_APPLEDOUBLE */ #if 0 /* @@ -3706,12 +4751,10 @@ struct vnop_symlink_args { #endif /* 0*/ errno_t VNOP_SYMLINK(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, - struct vnode_attr *vap, char *target, vfs_context_t context) + struct vnode_attr *vap, char *target, vfs_context_t ctx) { int _err; struct vnop_symlink_args a; - int thread_safe; - int funnel_state = 0; a.a_desc = &vnop_symlink_desc; a.a_dvp = dvp; @@ -3719,25 +4762,22 @@ VNOP_SYMLINK(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, a.a_cnp = cnp; a.a_vap = vap; a.a_target = target; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(dvp); + a.a_context = ctx; - if (!thread_safe) { - if ( (_err = lock_fsnode(dvp, &funnel_state)) ) { - return (_err); - } - } _err = (*dvp->v_op[vnop_symlink_desc.vdesc_offset])(&a); + DTRACE_FSINFO(symlink, vnode_t, dvp); +#if CONFIG_APPLEDOUBLE if (_err == 0 && !NATIVE_XATTR(dvp)) { /* - * Remove stale Apple Double file (if any). + * Remove stale Apple Double file (if any). Posts its own knotes */ - xattrfile_remove(dvp, cnp->cn_nameptr, context, thread_safe, 0); + xattrfile_remove(dvp, cnp->cn_nameptr, ctx, 0); } - if (!thread_safe) { - unlock_fsnode(dvp, &funnel_state); - } - return (_err); +#endif /* CONFIG_APPLEDOUBLE */ + + post_event_if_success(dvp, _err, NOTE_WRITE); + + return (_err); } #if 0 @@ -3759,12 +4799,13 @@ struct vnop_readdir_args { #endif /* 0*/ errno_t VNOP_READDIR(struct vnode *vp, struct uio *uio, int flags, int *eofflag, - int *numdirent, vfs_context_t context) + int *numdirent, vfs_context_t ctx) { int _err; struct vnop_readdir_args a; - int thread_safe; - int funnel_state = 0; +#if CONFIG_DTRACE + user_ssize_t resid = uio_resid(uio); +#endif a.a_desc = &vnop_readdir_desc; a.a_vp = vp; @@ -3772,18 +4813,12 @@ VNOP_READDIR(struct vnode *vp, struct uio *uio, int flags, int *eofflag, a.a_flags = flags; a.a_eofflag = eofflag; a.a_numdirent = numdirent; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(vp); + a.a_context = ctx; - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } _err = (*vp->v_op[vnop_readdir_desc.vdesc_offset])(&a); - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } + DTRACE_FSINFO_IO(readdir, + vnode_t, vp, user_ssize_t, (resid - uio_resid(uio))); + return (_err); } @@ -3798,23 +4833,24 @@ struct vnop_readdirattr_args { vnode_t a_vp; struct attrlist *a_alist; struct uio *a_uio; - u_long a_maxcount; - u_long a_options; - u_long *a_newstate; + uint32_t a_maxcount; + uint32_t a_options; + uint32_t *a_newstate; int *a_eofflag; - u_long *a_actualcount; + uint32_t *a_actualcount; vfs_context_t a_context; }; #endif /* 0*/ errno_t -VNOP_READDIRATTR(struct vnode *vp, struct attrlist *alist, struct uio *uio, u_long maxcount, - u_long options, u_long *newstate, int *eofflag, u_long *actualcount, vfs_context_t context) +VNOP_READDIRATTR(struct vnode *vp, struct attrlist *alist, struct uio *uio, uint32_t maxcount, + uint32_t options, uint32_t *newstate, int *eofflag, uint32_t *actualcount, vfs_context_t ctx) { int _err; struct vnop_readdirattr_args a; - int thread_safe; - int funnel_state = 0; +#if CONFIG_DTRACE + user_ssize_t resid = uio_resid(uio); +#endif a.a_desc = &vnop_readdirattr_desc; a.a_vp = vp; @@ -3825,18 +4861,55 @@ VNOP_READDIRATTR(struct vnode *vp, struct attrlist *alist, struct uio *uio, u_lo a.a_newstate = newstate; a.a_eofflag = eofflag; a.a_actualcount = actualcount; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(vp); + a.a_context = ctx; - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } _err = (*vp->v_op[vnop_readdirattr_desc.vdesc_offset])(&a); - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } + DTRACE_FSINFO_IO(readdirattr, + vnode_t, vp, user_ssize_t, (resid - uio_resid(uio))); + + return (_err); +} + +#if 0 +struct vnop_getttrlistbulk_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + struct attrlist *a_alist; + struct vnode_attr *a_vap; + struct uio *a_uio; + void *a_private + uint64_t a_options; + int *a_eofflag; + uint32_t *a_actualcount; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_GETATTRLISTBULK(struct vnode *vp, struct attrlist *alist, + struct vnode_attr *vap, struct uio *uio, void *private, uint64_t options, + int32_t *eofflag, int32_t *actualcount, vfs_context_t ctx) +{ + int _err; + struct vnop_getattrlistbulk_args a; +#if CONFIG_DTRACE + user_ssize_t resid = uio_resid(uio); +#endif + + a.a_desc = &vnop_getattrlistbulk_desc; + a.a_vp = vp; + a.a_alist = alist; + a.a_vap = vap; + a.a_uio = uio; + a.a_private = private; + a.a_options = options; + a.a_eofflag = eofflag; + a.a_actualcount = actualcount; + a.a_context = ctx; + + _err = (*vp->v_op[vnop_getattrlistbulk_desc.vdesc_offset])(&a); + DTRACE_FSINFO_IO(getattrlistbulk, + vnode_t, vp, user_ssize_t, (resid - uio_resid(uio))); + return (_err); } @@ -3854,29 +4927,42 @@ struct vnop_readlink_args { }; #endif /* 0 */ +/* + * Returns: 0 Success + * lock_fsnode:ENOENT No such file or directory [only for VFS + * that is not thread safe & vnode is + * currently being/has been terminated] + * :EINVAL + * :??? + * + * Note: The return codes from the underlying VFS's readlink routine + * can't be fully enumerated here, since third party VFS authors + * may not limit their error returns to the ones documented here, + * even though this may result in some programs functioning + * incorrectly. + * + * The return codes documented above are those which may currently + * be returned by HFS from hfs_vnop_readlink, not including + * additional error code which may be propagated from underlying + * routines. + */ errno_t -VNOP_READLINK(struct vnode *vp, struct uio *uio, vfs_context_t context) +VNOP_READLINK(struct vnode *vp, struct uio *uio, vfs_context_t ctx) { int _err; struct vnop_readlink_args a; - int thread_safe; - int funnel_state = 0; - +#if CONFIG_DTRACE + user_ssize_t resid = uio_resid(uio); +#endif a.a_desc = &vnop_readlink_desc; a.a_vp = vp; a.a_uio = uio; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(vp); + a.a_context = ctx; - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } _err = (*vp->v_op[vnop_readlink_desc.vdesc_offset])(&a); - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } + DTRACE_FSINFO_IO(readlink, + vnode_t, vp, user_ssize_t, (resid - uio_resid(uio))); + return (_err); } @@ -3893,27 +4979,32 @@ struct vnop_inactive_args { }; #endif /* 0*/ errno_t -VNOP_INACTIVE(struct vnode *vp, vfs_context_t context) +VNOP_INACTIVE(struct vnode *vp, vfs_context_t ctx) { int _err; struct vnop_inactive_args a; - int thread_safe; - int funnel_state = 0; a.a_desc = &vnop_inactive_desc; a.a_vp = vp; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(vp); - - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } + a.a_context = ctx; + _err = (*vp->v_op[vnop_inactive_desc.vdesc_offset])(&a); - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); + DTRACE_FSINFO(inactive, vnode_t, vp); + +#if NAMEDSTREAMS + /* For file systems that do not support namedstream natively, mark + * the shadow stream file vnode to be recycled as soon as the last + * reference goes away. To avoid re-entering reclaim code, do not + * call recycle on terminating namedstream vnodes. + */ + if (vnode_isnamedstream(vp) && + (vp->v_parent != NULLVP) && + vnode_isshadow(vp) && + ((vp->v_lflag & VL_TERMINATE) == 0)) { + vnode_recycle(vp); } +#endif + return (_err); } @@ -3931,29 +5022,29 @@ struct vnop_reclaim_args { }; #endif /* 0*/ errno_t -VNOP_RECLAIM(struct vnode *vp, vfs_context_t context) +VNOP_RECLAIM(struct vnode *vp, vfs_context_t ctx) { int _err; struct vnop_reclaim_args a; - int thread_safe; - int funnel_state = 0; a.a_desc = &vnop_reclaim_desc; a.a_vp = vp; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(vp); + a.a_context = ctx; - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } _err = (*vp->v_op[vnop_reclaim_desc.vdesc_offset])(&a); - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } + DTRACE_FSINFO(reclaim, vnode_t, vp); + return (_err); } +/* + * Returns: 0 Success + * lock_fsnode:ENOENT No such file or directory [only for VFS + * that is not thread safe & vnode is + * currently being/has been terminated] + * :??? [per FS implementation specific] + */ #if 0 /* *# @@ -3964,37 +5055,39 @@ struct vnop_pathconf_args { struct vnodeop_desc *a_desc; vnode_t a_vp; int a_name; - register_t *a_retval; + int32_t *a_retval; vfs_context_t a_context; }; #endif /* 0*/ errno_t -VNOP_PATHCONF(struct vnode *vp, int name, register_t *retval, vfs_context_t context) +VNOP_PATHCONF(struct vnode *vp, int name, int32_t *retval, vfs_context_t ctx) { int _err; struct vnop_pathconf_args a; - int thread_safe; - int funnel_state = 0; a.a_desc = &vnop_pathconf_desc; a.a_vp = vp; a.a_name = name; a.a_retval = retval; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(vp); + a.a_context = ctx; - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } _err = (*vp->v_op[vnop_pathconf_desc.vdesc_offset])(&a); - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } + DTRACE_FSINFO(pathconf, vnode_t, vp); + return (_err); } +/* + * Returns: 0 Success + * err_advlock:ENOTSUP + * lf_advlock:??? + * :??? + * + * Notes: VFS implementations of advisory locking using calls through + * because lock enforcement does not occur + * locally should try to limit themselves to the return codes + * documented above for lf_advlock and err_advlock. + */ #if 0 /* *# @@ -4012,13 +5105,10 @@ struct vnop_advlock_args { }; #endif /* 0*/ errno_t -VNOP_ADVLOCK(struct vnode *vp, caddr_t id, int op, struct flock *fl, int flags, vfs_context_t context) +VNOP_ADVLOCK(struct vnode *vp, caddr_t id, int op, struct flock *fl, int flags, vfs_context_t ctx, struct timespec *timeout) { int _err; struct vnop_advlock_args a; - int thread_safe; - int funnel_state = 0; - struct uthread * uth; a.a_desc = &vnop_advlock_desc; a.a_vp = vp; @@ -4026,13 +5116,9 @@ VNOP_ADVLOCK(struct vnode *vp, caddr_t id, int op, struct flock *fl, int flags, a.a_op = op; a.a_fl = fl; a.a_flags = flags; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(vp); + a.a_context = ctx; + a.a_timeout = timeout; - uth = get_bsdthread_info(current_thread()); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } /* Disallow advisory locking on non-seekable vnodes */ if (vnode_isfifo(vp)) { _err = err_advlock(&a); @@ -4040,14 +5126,18 @@ VNOP_ADVLOCK(struct vnode *vp, caddr_t id, int op, struct flock *fl, int flags, if ((vp->v_flag & VLOCKLOCAL)) { /* Advisory locking done at this layer */ _err = lf_advlock(&a); + } else if (flags & F_OFD_LOCK) { + /* Non-local locking doesn't work for OFD locks */ + _err = err_advlock(&a); } else { /* Advisory locking done by underlying filesystem */ _err = (*vp->v_op[vnop_advlock_desc.vdesc_offset])(&a); } + DTRACE_FSINFO(advlock, vnode_t, vp); + if (op == F_UNLCK && flags == F_FLOCK) + post_event_if_success(vp, _err, NOTE_FUNLOCK); } - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } + return (_err); } @@ -4071,12 +5161,10 @@ struct vnop_allocate_args { #endif /* 0*/ errno_t -VNOP_ALLOCATE(struct vnode *vp, off_t length, u_int32_t flags, off_t *bytesallocated, off_t offset, vfs_context_t context) +VNOP_ALLOCATE(struct vnode *vp, off_t length, u_int32_t flags, off_t *bytesallocated, off_t offset, vfs_context_t ctx) { int _err; struct vnop_allocate_args a; - int thread_safe; - int funnel_state = 0; a.a_desc = &vnop_allocate_desc; a.a_vp = vp; @@ -4084,18 +5172,16 @@ VNOP_ALLOCATE(struct vnode *vp, off_t length, u_int32_t flags, off_t *bytesalloc a.a_flags = flags; a.a_bytesallocated = bytesallocated; a.a_offset = offset; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(vp); + a.a_context = ctx; - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } _err = (*vp->v_op[vnop_allocate_desc.vdesc_offset])(&a); - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); + DTRACE_FSINFO(allocate, vnode_t, vp); +#if CONFIG_FSE + if (_err == 0) { + add_fsevent(FSE_STAT_CHANGED, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE); } +#endif + return (_err); } @@ -4109,7 +5195,7 @@ struct vnop_pagein_args { struct vnodeop_desc *a_desc; vnode_t a_vp; upl_t a_pl; - vm_offset_t a_pl_offset; + upl_offset_t a_pl_offset; off_t a_f_offset; size_t a_size; int a_flags; @@ -4117,12 +5203,10 @@ struct vnop_pagein_args { }; #endif /* 0*/ errno_t -VNOP_PAGEIN(struct vnode *vp, upl_t pl, vm_offset_t pl_offset, off_t f_offset, size_t size, int flags, vfs_context_t context) +VNOP_PAGEIN(struct vnode *vp, upl_t pl, upl_offset_t pl_offset, off_t f_offset, size_t size, int flags, vfs_context_t ctx) { int _err; struct vnop_pagein_args a; - int thread_safe; - int funnel_state = 0; a.a_desc = &vnop_pagein_desc; a.a_vp = vp; @@ -4131,16 +5215,11 @@ VNOP_PAGEIN(struct vnode *vp, upl_t pl, vm_offset_t pl_offset, off_t f_offset, s a.a_f_offset = f_offset; a.a_size = size; a.a_flags = flags; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(vp); + a.a_context = ctx; - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } _err = (*vp->v_op[vnop_pagein_desc.vdesc_offset])(&a); - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } + DTRACE_FSINFO(pagein, vnode_t, vp); + return (_err); } @@ -4154,7 +5233,7 @@ struct vnop_pageout_args { struct vnodeop_desc *a_desc; vnode_t a_vp; upl_t a_pl; - vm_offset_t a_pl_offset; + upl_offset_t a_pl_offset; off_t a_f_offset; size_t a_size; int a_flags; @@ -4163,12 +5242,10 @@ struct vnop_pageout_args { #endif /* 0*/ errno_t -VNOP_PAGEOUT(struct vnode *vp, upl_t pl, vm_offset_t pl_offset, off_t f_offset, size_t size, int flags, vfs_context_t context) +VNOP_PAGEOUT(struct vnode *vp, upl_t pl, upl_offset_t pl_offset, off_t f_offset, size_t size, int flags, vfs_context_t ctx) { int _err; struct vnop_pageout_args a; - int thread_safe; - int funnel_state = 0; a.a_desc = &vnop_pageout_desc; a.a_vp = vp; @@ -4177,19 +5254,27 @@ VNOP_PAGEOUT(struct vnode *vp, upl_t pl, vm_offset_t pl_offset, off_t f_offset, a.a_f_offset = f_offset; a.a_size = size; a.a_flags = flags; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(vp); + a.a_context = ctx; - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } _err = (*vp->v_op[vnop_pageout_desc.vdesc_offset])(&a); - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } + DTRACE_FSINFO(pageout, vnode_t, vp); + + post_event_if_success(vp, _err, NOTE_WRITE); + return (_err); } +int +vn_remove(vnode_t dvp, vnode_t *vpp, struct nameidata *ndp, int32_t flags, struct vnode_attr *vap, vfs_context_t ctx) +{ + if (vnode_compound_remove_available(dvp)) { + return VNOP_COMPOUND_REMOVE(dvp, vpp, ndp, flags, vap, ctx); + } else { + return VNOP_REMOVE(dvp, *vpp, &ndp->ni_cnd, flags, ctx); + } +} + +#if CONFIG_SEARCHFS #if 0 /* @@ -4203,12 +5288,12 @@ struct vnop_searchfs_args { void *a_searchparams1; void *a_searchparams2; struct attrlist *a_searchattrs; - u_long a_maxmatches; + uint32_t a_maxmatches; struct timeval *a_timelimit; struct attrlist *a_returnattrs; - u_long *a_nummatches; - u_long a_scriptcode; - u_long a_options; + uint32_t *a_nummatches; + uint32_t a_scriptcode; + uint32_t a_options; struct uio *a_uio; struct searchstate *a_searchstate; vfs_context_t a_context; @@ -4216,12 +5301,10 @@ struct vnop_searchfs_args { #endif /* 0*/ errno_t -VNOP_SEARCHFS(struct vnode *vp, void *searchparams1, void *searchparams2, struct attrlist *searchattrs, u_long maxmatches, struct timeval *timelimit, struct attrlist *returnattrs, u_long *nummatches, u_long scriptcode, u_long options, struct uio *uio, struct searchstate *searchstate, vfs_context_t context) +VNOP_SEARCHFS(struct vnode *vp, void *searchparams1, void *searchparams2, struct attrlist *searchattrs, uint32_t maxmatches, struct timeval *timelimit, struct attrlist *returnattrs, uint32_t *nummatches, uint32_t scriptcode, uint32_t options, struct uio *uio, struct searchstate *searchstate, vfs_context_t ctx) { int _err; struct vnop_searchfs_args a; - int thread_safe; - int funnel_state = 0; a.a_desc = &vnop_searchfs_desc; a.a_vp = vp; @@ -4236,20 +5319,14 @@ VNOP_SEARCHFS(struct vnode *vp, void *searchparams1, void *searchparams2, struct a.a_options = options; a.a_uio = uio; a.a_searchstate = searchstate; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(vp); + a.a_context = ctx; - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } _err = (*vp->v_op[vnop_searchfs_desc.vdesc_offset])(&a); - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } + DTRACE_FSINFO(searchfs, vnode_t, vp); + return (_err); } +#endif /* CONFIG_SEARCHFS */ #if 0 /* @@ -4272,7 +5349,7 @@ struct vnop_copyfile_args { #endif /* 0*/ errno_t VNOP_COPYFILE(struct vnode *fvp, struct vnode *tdvp, struct vnode *tvp, struct componentname *tcnp, - int mode, int flags, vfs_context_t context) + int mode, int flags, vfs_context_t ctx) { int _err; struct vnop_copyfile_args a; @@ -4283,19 +5360,71 @@ VNOP_COPYFILE(struct vnode *fvp, struct vnode *tdvp, struct vnode *tvp, struct c a.a_tcnp = tcnp; a.a_mode = mode; a.a_flags = flags; - a.a_context = context; + a.a_context = ctx; _err = (*fvp->v_op[vnop_copyfile_desc.vdesc_offset])(&a); + DTRACE_FSINFO(copyfile, vnode_t, fvp); return (_err); } +#if 0 +struct vnop_clonefile_args { + struct vnodeop_desc *a_desc; + vnode_t a_fvp; + vnode_t a_dvp; + vnode_t *a_vpp; + struct componentname *a_cnp; + struct vnode_attr *a_vap; + uint32_t a_flags; + vfs_context_t a_context; + int (*a_dir_clone_authorizer)( /* Authorization callback */ + struct vnode_attr *vap, /* attribute to be authorized */ + kauth_action_t action, /* action for which attribute is to be authorized */ + struct vnode_attr *dvap, /* target directory attributes */ + vnode_t sdvp, /* source directory vnode pointer (optional) */ + mount_t mp, /* mount point of filesystem */ + dir_clone_authorizer_op_t vattr_op, /* specific operation requested : setup, authorization or cleanup */ + vfs_context_t ctx, /* As passed to VNOP */ + void *reserved); /* Always NULL */ + void *a_reserved; /* Currently unused */ +}; +#endif /* 0 */ + +errno_t +VNOP_CLONEFILE(vnode_t fvp, vnode_t dvp, vnode_t *vpp, + struct componentname *cnp, struct vnode_attr *vap, uint32_t flags, + vfs_context_t ctx) +{ + int _err; + struct vnop_clonefile_args a; + a.a_desc = &vnop_clonefile_desc; + a.a_fvp = fvp; + a.a_dvp = dvp; + a.a_vpp = vpp; + a.a_cnp = cnp; + a.a_vap = vap; + a.a_flags = flags; + a.a_context = ctx; + + if (vnode_vtype(fvp) == VDIR) + a.a_dir_clone_authorizer = vnode_attr_authorize_dir_clone; + else + a.a_dir_clone_authorizer = NULL; + + _err = (*dvp->v_op[vnop_clonefile_desc.vdesc_offset])(&a); + + if (_err == 0 && *vpp) + DTRACE_FSINFO(clonefile, vnode_t, *vpp); + + post_event_if_success(dvp, _err, NOTE_WRITE); + + return (_err); +} errno_t -VNOP_GETXATTR(vnode_t vp, const char *name, uio_t uio, size_t *size, int options, vfs_context_t context) +VNOP_GETXATTR(vnode_t vp, const char *name, uio_t uio, size_t *size, int options, vfs_context_t ctx) { struct vnop_getxattr_args a; int error; - int thread_safe; - int funnel_state = 0; a.a_desc = &vnop_getxattr_desc; a.a_vp = vp; @@ -4303,101 +5432,74 @@ VNOP_GETXATTR(vnode_t vp, const char *name, uio_t uio, size_t *size, int options a.a_uio = uio; a.a_size = size; a.a_options = options; - a.a_context = context; + a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - if ( (error = lock_fsnode(vp, &funnel_state)) ) { - return (error); - } - } error = (*vp->v_op[vnop_getxattr_desc.vdesc_offset])(&a); - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } + DTRACE_FSINFO(getxattr, vnode_t, vp); + return (error); } errno_t -VNOP_SETXATTR(vnode_t vp, const char *name, uio_t uio, int options, vfs_context_t context) +VNOP_SETXATTR(vnode_t vp, const char *name, uio_t uio, int options, vfs_context_t ctx) { struct vnop_setxattr_args a; int error; - int thread_safe; - int funnel_state = 0; a.a_desc = &vnop_setxattr_desc; a.a_vp = vp; a.a_name = name; a.a_uio = uio; a.a_options = options; - a.a_context = context; + a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - if ( (error = lock_fsnode(vp, &funnel_state)) ) { - return (error); - } - } error = (*vp->v_op[vnop_setxattr_desc.vdesc_offset])(&a); - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } + DTRACE_FSINFO(setxattr, vnode_t, vp); + + if (error == 0) + vnode_uncache_authorized_action(vp, KAUTH_INVALIDATE_CACHED_RIGHTS); + + post_event_if_success(vp, error, NOTE_ATTRIB); + return (error); } errno_t -VNOP_REMOVEXATTR(vnode_t vp, const char *name, int options, vfs_context_t context) +VNOP_REMOVEXATTR(vnode_t vp, const char *name, int options, vfs_context_t ctx) { struct vnop_removexattr_args a; int error; - int thread_safe; - int funnel_state = 0; a.a_desc = &vnop_removexattr_desc; a.a_vp = vp; a.a_name = name; a.a_options = options; - a.a_context = context; + a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - if ( (error = lock_fsnode(vp, &funnel_state)) ) { - return (error); - } - } error = (*vp->v_op[vnop_removexattr_desc.vdesc_offset])(&a); - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } + DTRACE_FSINFO(removexattr, vnode_t, vp); + + post_event_if_success(vp, error, NOTE_ATTRIB); + return (error); } errno_t -VNOP_LISTXATTR(vnode_t vp, uio_t uio, size_t *size, int options, vfs_context_t context) +VNOP_LISTXATTR(vnode_t vp, uio_t uio, size_t *size, int options, vfs_context_t ctx) { struct vnop_listxattr_args a; int error; - int thread_safe; - int funnel_state = 0; a.a_desc = &vnop_listxattr_desc; a.a_vp = vp; a.a_uio = uio; a.a_size = size; a.a_options = options; - a.a_context = context; + a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - if ( (error = lock_fsnode(vp, &funnel_state)) ) { - return (error); - } - } error = (*vp->v_op[vnop_listxattr_desc.vdesc_offset])(&a); - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } + DTRACE_FSINFO(listxattr, vnode_t, vp); + return (error); } @@ -4420,22 +5522,15 @@ VNOP_BLKTOOFF(struct vnode *vp, daddr64_t lblkno, off_t *offset) { int _err; struct vnop_blktooff_args a; - int thread_safe; - int funnel_state = 0; a.a_desc = &vnop_blktooff_desc; a.a_vp = vp; a.a_lblkno = lblkno; a.a_offset = offset; - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } _err = (*vp->v_op[vnop_blktooff_desc.vdesc_offset])(&a); - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } + DTRACE_FSINFO(blktooff, vnode_t, vp); + return (_err); } @@ -4457,22 +5552,15 @@ VNOP_OFFTOBLK(struct vnode *vp, off_t offset, daddr64_t *lblkno) { int _err; struct vnop_offtoblk_args a; - int thread_safe; - int funnel_state = 0; a.a_desc = &vnop_offtoblk_desc; a.a_vp = vp; a.a_offset = offset; a.a_lblkno = lblkno; - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } _err = (*vp->v_op[vnop_offtoblk_desc.vdesc_offset])(&a); - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } + DTRACE_FSINFO(offtoblk, vnode_t, vp); + return (_err); } @@ -4495,37 +5583,44 @@ struct vnop_blockmap_args { }; #endif /* 0*/ errno_t -VNOP_BLOCKMAP(struct vnode *vp, off_t foffset, size_t size, daddr64_t *bpn, size_t *run, void *poff, int flags, vfs_context_t context) +VNOP_BLOCKMAP(struct vnode *vp, off_t foffset, size_t size, daddr64_t *bpn, size_t *run, void *poff, int flags, vfs_context_t ctx) { int _err; struct vnop_blockmap_args a; - int thread_safe; - int funnel_state = 0; - struct vfs_context acontext; + size_t localrun = 0; - if (context == NULL) { - acontext.vc_proc = current_proc(); - acontext.vc_ucred = kauth_cred_get(); - context = &acontext; + if (ctx == NULL) { + ctx = vfs_context_current(); } a.a_desc = &vnop_blockmap_desc; a.a_vp = vp; a.a_foffset = foffset; a.a_size = size; a.a_bpn = bpn; - a.a_run = run; + a.a_run = &localrun; a.a_poff = poff; a.a_flags = flags; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(vp); + a.a_context = ctx; - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } _err = (*vp->v_op[vnop_blockmap_desc.vdesc_offset])(&a); - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); + DTRACE_FSINFO(blockmap, vnode_t, vp); + + /* + * We used a local variable to request information from the underlying + * filesystem about the length of the I/O run in question. If + * we get malformed output from the filesystem, we cap it to the length + * requested, at most. Update 'run' on the way out. + */ + if (_err == 0) { + if (localrun > size) { + localrun = size; + } + + if (run) { + *run = localrun; + } } + return (_err); } @@ -4541,9 +5636,11 @@ VNOP_STRATEGY(struct buf *bp) { int _err; struct vnop_strategy_args a; + vnode_t vp = buf_vnode(bp); a.a_desc = &vnop_strategy_desc; a.a_bp = bp; - _err = (*buf_vnode(bp)->v_op[vnop_strategy_desc.vdesc_offset])(&a); + _err = (*vp->v_op[vnop_strategy_desc.vdesc_offset])(&a); + DTRACE_FSINFO(strategy, vnode_t, vp); return (_err); } @@ -4558,9 +5655,11 @@ VNOP_BWRITE(struct buf *bp) { int _err; struct vnop_bwrite_args a; + vnode_t vp = buf_vnode(bp); a.a_desc = &vnop_bwrite_desc; a.a_bp = bp; - _err = (*buf_vnode(bp)->v_op[vnop_bwrite_desc.vdesc_offset])(&a); + _err = (*vp->v_op[vnop_bwrite_desc.vdesc_offset])(&a); + DTRACE_FSINFO(bwrite, vnode_t, vp); return (_err); } @@ -4573,28 +5672,19 @@ struct vnop_kqfilt_add_args { }; #endif errno_t -VNOP_KQFILT_ADD(struct vnode *vp, struct knote *kn, vfs_context_t context) +VNOP_KQFILT_ADD(struct vnode *vp, struct knote *kn, vfs_context_t ctx) { int _err; struct vnop_kqfilt_add_args a; - int thread_safe; - int funnel_state = 0; a.a_desc = VDESC(vnop_kqfilt_add); a.a_vp = vp; a.a_kn = kn; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(vp); + a.a_context = ctx; - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } _err = (*vp->v_op[vnop_kqfilt_add_desc.vdesc_offset])(&a); - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } + DTRACE_FSINFO(kqfilt_add, vnode_t, vp); + return(_err); } @@ -4607,28 +5697,130 @@ struct vnop_kqfilt_remove_args { }; #endif errno_t -VNOP_KQFILT_REMOVE(struct vnode *vp, uintptr_t ident, vfs_context_t context) +VNOP_KQFILT_REMOVE(struct vnode *vp, uintptr_t ident, vfs_context_t ctx) { int _err; struct vnop_kqfilt_remove_args a; - int thread_safe; - int funnel_state = 0; a.a_desc = VDESC(vnop_kqfilt_remove); a.a_vp = vp; a.a_ident = ident; - a.a_context = context; - thread_safe = THREAD_SAFE_FS(vp); + a.a_context = ctx; - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } _err = (*vp->v_op[vnop_kqfilt_remove_desc.vdesc_offset])(&a); - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } + DTRACE_FSINFO(kqfilt_remove, vnode_t, vp); + + return(_err); +} + +errno_t +VNOP_MONITOR(vnode_t vp, uint32_t events, uint32_t flags, void *handle, vfs_context_t ctx) +{ + int _err; + struct vnop_monitor_args a; + + a.a_desc = VDESC(vnop_monitor); + a.a_vp = vp; + a.a_events = events; + a.a_flags = flags; + a.a_handle = handle; + a.a_context = ctx; + + _err = (*vp->v_op[vnop_monitor_desc.vdesc_offset])(&a); + DTRACE_FSINFO(monitor, vnode_t, vp); + + return(_err); +} + +#if 0 +struct vnop_setlabel_args { + struct vnodeop_desc *a_desc; + struct vnode *a_vp; + struct label *a_vl; + vfs_context_t a_context; +}; +#endif +errno_t +VNOP_SETLABEL(struct vnode *vp, struct label *label, vfs_context_t ctx) +{ + int _err; + struct vnop_setlabel_args a; + + a.a_desc = VDESC(vnop_setlabel); + a.a_vp = vp; + a.a_vl = label; + a.a_context = ctx; + + _err = (*vp->v_op[vnop_setlabel_desc.vdesc_offset])(&a); + DTRACE_FSINFO(setlabel, vnode_t, vp); + return(_err); } + +#if NAMEDSTREAMS +/* + * Get a named streamed + */ +errno_t +VNOP_GETNAMEDSTREAM(vnode_t vp, vnode_t *svpp, const char *name, enum nsoperation operation, int flags, vfs_context_t ctx) +{ + int _err; + struct vnop_getnamedstream_args a; + + a.a_desc = &vnop_getnamedstream_desc; + a.a_vp = vp; + a.a_svpp = svpp; + a.a_name = name; + a.a_operation = operation; + a.a_flags = flags; + a.a_context = ctx; + + _err = (*vp->v_op[vnop_getnamedstream_desc.vdesc_offset])(&a); + DTRACE_FSINFO(getnamedstream, vnode_t, vp); + return (_err); +} + +/* + * Create a named streamed + */ +errno_t +VNOP_MAKENAMEDSTREAM(vnode_t vp, vnode_t *svpp, const char *name, int flags, vfs_context_t ctx) +{ + int _err; + struct vnop_makenamedstream_args a; + + a.a_desc = &vnop_makenamedstream_desc; + a.a_vp = vp; + a.a_svpp = svpp; + a.a_name = name; + a.a_flags = flags; + a.a_context = ctx; + + _err = (*vp->v_op[vnop_makenamedstream_desc.vdesc_offset])(&a); + DTRACE_FSINFO(makenamedstream, vnode_t, vp); + return (_err); +} + + +/* + * Remove a named streamed + */ +errno_t +VNOP_REMOVENAMEDSTREAM(vnode_t vp, vnode_t svp, const char *name, int flags, vfs_context_t ctx) +{ + int _err; + struct vnop_removenamedstream_args a; + + a.a_desc = &vnop_removenamedstream_desc; + a.a_vp = vp; + a.a_svp = svp; + a.a_name = name; + a.a_flags = flags; + a.a_context = ctx; + + _err = (*vp->v_op[vnop_removenamedstream_desc.vdesc_offset])(&a); + DTRACE_FSINFO(removenamedstream, vnode_t, vp); + return (_err); +} +#endif