/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
*
* @(#)vfs_vnops.c 8.14 (Berkeley) 6/15/95
*
- * History
- * 10-20-1997 Umesh Vaishampayan
- * Fixed the count to be off_t rather than int.
*/
#include <sys/param.h>
+#include <sys/types.h>
#include <sys/systm.h>
#include <sys/kernel.h>
-#include <sys/file.h>
+#include <sys/file_internal.h>
#include <sys/stat.h>
-#include <sys/buf.h>
-#include <sys/proc.h>
-#include <sys/mount.h>
+#include <sys/proc_internal.h>
+#include <sys/kauth.h>
+#include <sys/mount_internal.h>
#include <sys/namei.h>
-#include <sys/vnode.h>
+#include <sys/vnode_internal.h>
#include <sys/ioctl.h>
#include <sys/tty.h>
#include <sys/ubc.h>
-#include <mach/kern_return.h>
-#include <mach/memory_object_control.h>
-#include <mach/vm_prot.h>
+#include <sys/conf.h>
+#include <sys/disk.h>
+#include <sys/fsevents.h>
+#include <sys/kdebug.h>
+#include <sys/xattr.h>
+#include <sys/ubc_internal.h>
+#include <sys/uio_internal.h>
+#include <sys/resourcevar.h>
+
+#include <vm/vm_kern.h>
+#include <vm/vm_map.h>
+
+#include <miscfs/specfs/specdev.h>
+
+
+
+static int vn_closefile(struct fileglob *fp, struct proc *p);
+static int vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, struct proc *p);
+static int vn_read(struct fileproc *fp, struct uio *uio,
+ kauth_cred_t cred, int flags, struct proc *p);
+static int vn_write(struct fileproc *fp, struct uio *uio,
+ kauth_cred_t cred, int flags, struct proc *p);
+static int vn_select( struct fileproc *fp, int which, void * wql, struct proc *p);
+static int vn_kqfilt_add(struct fileproc *fp, struct knote *kn, struct proc *p);
+#if 0
+static int vn_kqfilt_remove(struct vnode *vp, uintptr_t ident, struct proc *p);
+#endif
struct fileops vnops =
- { vn_read, vn_write, vn_ioctl, vn_select, vn_closefile };
+ { vn_read, vn_write, vn_ioctl, vn_select, vn_closefile, vn_kqfilt_add, 0 };
/*
* Common code for vnode open operations.
- * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
+ * Check permissions, and call the VNOP_OPEN or VNOP_CREATE routine.
+ *
+ * XXX the profusion of interfaces here is probably a bad thing.
+ */
+int
+vn_open(struct nameidata *ndp, int fmode, int cmode)
+{
+ return(vn_open_modflags(ndp, &fmode, cmode));
+}
+
+int
+vn_open_modflags(struct nameidata *ndp, int *fmodep, int cmode)
+{
+ struct vnode_attr va;
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_mode, cmode);
+
+ return(vn_open_auth(ndp, fmodep, &va));
+}
+
+/*
+ * Open a file with authorization, updating the contents of the structures
+ * pointed to by ndp, fmodep, and vap as necessary to perform the requested
+ * operation. This function is used for both opens of existing files, and
+ * creation of new files.
+ *
+ * Parameters: ndp The nami data pointer describing the
+ * file
+ * fmodep A pointer to an int containg the mode
+ * information to be used for the open
+ * vap A pointer to the vnode attribute
+ * descriptor to be used for the open
+ *
+ * Indirect: * Contents of the data structures pointed
+ * to by the parameters are modified as
+ * necessary to the requested operation.
+ *
+ * Returns: 0 Success
+ * !0 errno value
+ *
+ * Notes: The kauth_filesec_t in 'vap', if any, is in host byte order.
+ *
+ * The contents of '*ndp' will be modified, based on the other
+ * arguments to this function, and to return file and directory
+ * data necessary to satisfy the requested operation.
+ *
+ * If the file does not exist and we are creating it, then the
+ * O_TRUNC flag will be cleared in '*fmodep' to indicate to the
+ * caller that the file was not truncated.
+ *
+ * If the file exists and the O_EXCL flag was not specified, then
+ * the O_CREAT flag will be cleared in '*fmodep' to indicate to
+ * the caller that the existing file was merely opened rather
+ * than created.
+ *
+ * The contents of '*vap' will be modified as necessary to
+ * complete the operation, including setting of supported
+ * attribute, clearing of fields containing unsupported attributes
+ * in the request, if the request proceeds without them, etc..
+ *
+ * XXX: This function is too complicated in actings on its arguments
+ *
+ * XXX: We should enummerate the possible errno values here, and where
+ * in the code they originated.
*/
-vn_open(ndp, fmode, cmode)
- register struct nameidata *ndp;
- int fmode, cmode;
+int
+vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap)
{
- register struct vnode *vp;
- register struct proc *p = ndp->ni_cnd.cn_proc;
- register struct ucred *cred = p->p_ucred;
- struct vattr vat;
- struct vattr *vap = &vat;
+ struct vnode *vp;
+ struct vnode *dvp;
+ vfs_context_t ctx = ndp->ni_cnd.cn_context;
int error;
+ int fmode;
+ kauth_action_t action;
+again:
+ vp = NULL;
+ dvp = NULL;
+ fmode = *fmodep;
if (fmode & O_CREAT) {
+ if ( (fmode & O_DIRECTORY) ) {
+ error = EINVAL;
+ goto out;
+ }
ndp->ni_cnd.cn_nameiop = CREATE;
- ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
+ ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | AUDITVNPATH1;
+
if ((fmode & O_EXCL) == 0)
ndp->ni_cnd.cn_flags |= FOLLOW;
- if (error = namei(ndp))
- return (error);
- if (ndp->ni_vp == NULL) {
- VATTR_NULL(vap);
- vap->va_type = VREG;
- vap->va_mode = cmode;
+ if ( (error = namei(ndp)) )
+ goto out;
+ dvp = ndp->ni_dvp;
+ vp = ndp->ni_vp;
+
+ /* not found, create */
+ if (vp == NULL) {
+ /* must have attributes for a new file */
+ if (vap == NULL) {
+ error = EINVAL;
+ goto badcreate;
+ }
+
+ /* authorize before creating */
+ if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
+ goto badcreate;
+
+ VATTR_SET(vap, va_type, VREG);
if (fmode & O_EXCL)
vap->va_vaflags |= VA_EXCLUSIVE;
- VOP_LEASE(ndp->ni_dvp, p, cred, LEASE_WRITE);
- if (error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
- &ndp->ni_cnd, vap))
- return (error);
- fmode &= ~O_TRUNC;
+
+ if ((error = vn_create(dvp, &ndp->ni_vp, &ndp->ni_cnd, vap, 0, ctx)) != 0)
+ goto badcreate;
+
vp = ndp->ni_vp;
+
+ if (vp) {
+ int update_flags = 0;
+
+ // Make sure the name & parent pointers are hooked up
+ if (vp->v_name == NULL)
+ update_flags |= VNODE_UPDATE_NAME;
+ if (vp->v_parent == NULLVP)
+ update_flags |= VNODE_UPDATE_PARENT;
+
+ if (update_flags)
+ vnode_update_identity(vp, dvp, ndp->ni_cnd.cn_nameptr, ndp->ni_cnd.cn_namelen, ndp->ni_cnd.cn_hash, update_flags);
+
+ if (need_fsevent(FSE_CREATE_FILE, vp)) {
+ add_fsevent(FSE_CREATE_FILE, ctx,
+ FSE_ARG_VNODE, vp,
+ FSE_ARG_DONE);
+ }
+ }
+ /*
+ * nameidone has to happen before we vnode_put(dvp)
+ * and clear the ni_dvp field, since it may need
+ * to release the fs_nodelock on the dvp
+ */
+badcreate:
+ nameidone(ndp);
+ ndp->ni_dvp = NULL;
+ vnode_put(dvp);
+
+ if (error) {
+ /*
+ * Check for a creation race.
+ */
+ if ((error == EEXIST) && !(fmode & O_EXCL)) {
+ goto again;
+ }
+ goto bad;
+ }
+ fmode &= ~O_TRUNC;
} else {
- VOP_ABORTOP(ndp->ni_dvp, &ndp->ni_cnd);
- if (ndp->ni_dvp == ndp->ni_vp)
- vrele(ndp->ni_dvp);
- else
- vput(ndp->ni_dvp);
+ nameidone(ndp);
ndp->ni_dvp = NULL;
- vp = ndp->ni_vp;
+ vnode_put(dvp);
+
if (fmode & O_EXCL) {
error = EEXIST;
goto bad;
}
} else {
ndp->ni_cnd.cn_nameiop = LOOKUP;
- ndp->ni_cnd.cn_flags = FOLLOW | LOCKLEAF;
- if (error = namei(ndp))
- return (error);
+ ndp->ni_cnd.cn_flags = FOLLOW | LOCKLEAF | AUDITVNPATH1;
+ if ( (error = namei(ndp)) )
+ goto out;
vp = ndp->ni_vp;
+ nameidone(ndp);
+ ndp->ni_dvp = NULL;
+
+ if ( (fmode & O_DIRECTORY) && vp->v_type != VDIR ) {
+ error = ENOTDIR;
+ goto bad;
+ }
}
- if (vp->v_type == VSOCK) {
- error = EOPNOTSUPP;
+ if (vp->v_type == VSOCK && vp->v_tag != VT_FDESC) {
+ error = EOPNOTSUPP; /* Operation not supported on socket */
goto bad;
}
+
+#if DIAGNOSTIC
+ if (UBCINFOMISSING(vp))
+ panic("vn_open: ubc_info_init");
+#endif /* DIAGNOSTIC */
+
+ /* authorize open of an existing file */
if ((fmode & O_CREAT) == 0) {
- if (fmode & FREAD && fmode & (FWRITE | O_TRUNC)) {
- int err = 0;
- if (vp->v_type == VDIR)
- err = EISDIR;
- else
- err = vn_writechk(vp);
- if (err && !(error = VOP_ACCESS(vp, VREAD, cred, p)))
- error = err;
- if (error || (error = VOP_ACCESS(vp, VREAD|VWRITE,
- cred, p)))
- goto bad;
- } else if (fmode & FREAD) {
- if ((error = VOP_ACCESS(vp, VREAD, cred, p)))
- goto bad;
- } else if (fmode & (FWRITE | O_TRUNC)) {
- if (vp->v_type == VDIR) {
- error = EISDIR;
- goto bad;
- }
- if ((error = vn_writechk(vp)) ||
- (error = VOP_ACCESS(vp, VWRITE, cred, p)))
- goto bad;
+
+ /* disallow write operations on directories */
+ if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) {
+ error = EISDIR;
+ goto bad;
}
- }
- if (fmode & O_TRUNC) {
- VOP_UNLOCK(vp, 0, p); /* XXX */
- VOP_LEASE(vp, p, cred, LEASE_WRITE);
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */
- VATTR_NULL(vap);
- vap->va_size = 0;
- if (error = VOP_SETATTR(vp, vap, cred, p))
+
+ /* compute action to be authorized */
+ action = 0;
+ if (fmode & FREAD)
+ action |= KAUTH_VNODE_READ_DATA;
+ if (fmode & (FWRITE | O_TRUNC))
+ action |= KAUTH_VNODE_WRITE_DATA;
+ if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
goto bad;
+
}
- if (error = VOP_OPEN(vp, fmode, cred, p))
+
+ if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
goto bad;
- if (UBCINFOMISSING(vp))
- panic("vn_open: ubc_info_init");
- if (UBCINFOEXISTS(vp) && !ubc_hold(vp))
- panic("vn_open: hold");
- if (fmode & FWRITE)
- if (++vp->v_writecount <= 0)
- panic("vn_open: v_writecount");
+ }
+ if ( (error = vnode_ref_ext(vp, fmode)) )
+ goto bad;
+
+ /* call out to allow 3rd party notification of open.
+ * Ignore result of kauth_authorize_fileop call.
+ */
+ kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
+ (uintptr_t)vp, 0);
+
+ *fmodep = fmode;
return (0);
bad:
- vput(vp);
+ ndp->ni_vp = NULL;
+ if (vp) {
+ vnode_put(vp);
+ /*
+ * Check for a race against unlink. We had a vnode
+ * but according to vnode_authorize or VNOP_OPEN it
+ * no longer exists.
+ */
+ if ((error == ENOENT) && (*fmodep & O_CREAT)) {
+ goto again;
+ }
+ }
+out:
return (error);
}
/*
- * Check for write permissions on the specified vnode.
- * Prototype text segments cannot be written.
+ * Authorize an action against a vnode. This has been the canonical way to
+ * ensure that the credential/process/etc. referenced by a vfs_context
+ * is granted the rights called out in 'mode' against the vnode 'vp'.
+ *
+ * Unfortunately, the use of VREAD/VWRITE/VEXEC makes it very difficult
+ * to add support for more rights. As such, this interface will be deprecated
+ * and callers will use vnode_authorize instead.
*/
-vn_writechk(vp)
- register struct vnode *vp;
+#warning vn_access is deprecated
+int
+vn_access(vnode_t vp, int mode, vfs_context_t context)
{
-
- /*
- * If there's shared text associated with
- * the vnode, try to free it up once. If
- * we fail, we can't allow writing.
- */
-#if 0
- /* XXXXX Not sure we need this */
- if (vp->v_flag & VTEXT)
- return (ETXTBSY);
-#endif /* 0 */
- return (0);
+ kauth_action_t action;
+
+ action = 0;
+ if (mode & VREAD)
+ action |= KAUTH_VNODE_READ_DATA;
+ if (mode & VWRITE)
+ action |= KAUTH_VNODE_WRITE_DATA;
+ if (mode & VEXEC)
+ action |= KAUTH_VNODE_EXECUTE;
+
+ return(vnode_authorize(vp, NULL, action, context));
}
/*
* Vnode close call
*/
-vn_close(vp, flags, cred, p)
- register struct vnode *vp;
- int flags;
- struct ucred *cred;
- struct proc *p;
+int
+vn_close(struct vnode *vp, int flags, kauth_cred_t cred, struct proc *p)
{
+ struct vfs_context context;
int error;
- vm_map_t user_map;
- vm_offset_t addr, addr1;
- vm_size_t size, pageoff;
-
- if (flags & FWRITE)
- vp->v_writecount--;
- error = VOP_CLOSE(vp, flags, cred, p);
- ubc_rele(vp);
- vrele(vp);
+
+ context.vc_proc = p;
+ context.vc_ucred = cred;
+
+ if (flags & FWASWRITTEN) {
+ if (need_fsevent(FSE_CONTENT_MODIFIED, vp)) {
+ add_fsevent(FSE_CONTENT_MODIFIED, &context,
+ FSE_ARG_VNODE, vp,
+ FSE_ARG_DONE);
+ }
+ }
+
+ error = VNOP_CLOSE(vp, flags, &context);
+ (void)vnode_rele_ext(vp, flags, 0);
+
return (error);
}
+static int
+vn_read_swapfile(
+ struct vnode *vp,
+ uio_t uio)
+{
+ static char *swap_read_zero_page = NULL;
+ int error;
+ off_t swap_count, this_count;
+ off_t file_end, read_end;
+ off_t prev_resid;
+
+ /*
+ * Reading from a swap file will get you all zeroes.
+ */
+ error = 0;
+ swap_count = uio_resid(uio);
+
+ file_end = ubc_getsize(vp);
+ read_end = uio->uio_offset + uio_resid(uio);
+ if (uio->uio_offset >= file_end) {
+ /* uio starts after end of file: nothing to read */
+ swap_count = 0;
+ } else if (read_end > file_end) {
+ /* uio extends beyond end of file: stop before that */
+ swap_count -= (read_end - file_end);
+ }
+
+ while (swap_count > 0) {
+ if (swap_read_zero_page == NULL) {
+ char *my_zero_page;
+ int funnel_state;
+
+ /*
+ * Take kernel funnel so that only one thread
+ * sets up "swap_read_zero_page".
+ */
+ funnel_state = thread_funnel_set(kernel_flock, TRUE);
+
+ if (swap_read_zero_page == NULL) {
+ MALLOC(my_zero_page, char *, PAGE_SIZE,
+ M_TEMP, M_WAITOK);
+ memset(my_zero_page, '?', PAGE_SIZE);
+ /*
+ * Adding a newline character here
+ * and there prevents "less(1)", for
+ * example, from getting too confused
+ * about a file with one really really
+ * long line.
+ */
+ my_zero_page[PAGE_SIZE-1] = '\n';
+ if (swap_read_zero_page == NULL) {
+ swap_read_zero_page = my_zero_page;
+ } else {
+ FREE(my_zero_page, M_TEMP);
+ }
+ } else {
+ /*
+ * Someone else raced us here and won;
+ * just use their page.
+ */
+ }
+ thread_funnel_set(kernel_flock, funnel_state);
+ }
+
+ this_count = swap_count;
+ if (this_count > PAGE_SIZE) {
+ this_count = PAGE_SIZE;
+ }
+
+ prev_resid = uio_resid(uio);
+ error = uiomove((caddr_t) swap_read_zero_page,
+ this_count,
+ uio);
+ if (error) {
+ break;
+ }
+ swap_count -= (prev_resid - uio_resid(uio));
+ }
+
+ return error;
+}
/*
* Package up an I/O request on a vnode into a uio and do it.
*/
-vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p)
- enum uio_rw rw;
- struct vnode *vp;
- caddr_t base;
- int len;
- off_t offset;
- enum uio_seg segflg;
- int ioflg;
- struct ucred *cred;
- int *aresid;
- struct proc *p;
+int
+vn_rdwr(
+ enum uio_rw rw,
+ struct vnode *vp,
+ caddr_t base,
+ int len,
+ off_t offset,
+ enum uio_seg segflg,
+ int ioflg,
+ kauth_cred_t cred,
+ int *aresid,
+ struct proc *p)
{
- struct uio auio;
- struct iovec aiov;
+ return vn_rdwr_64(rw,
+ vp,
+ (uint64_t)(uintptr_t)base,
+ (int64_t)len,
+ offset,
+ segflg,
+ ioflg,
+ cred,
+ aresid,
+ p);
+}
+
+
+int
+vn_rdwr_64(
+ enum uio_rw rw,
+ struct vnode *vp,
+ uint64_t base,
+ int64_t len,
+ off_t offset,
+ enum uio_seg segflg,
+ int ioflg,
+ kauth_cred_t cred,
+ int *aresid,
+ struct proc *p)
+{
+ uio_t auio;
+ int spacetype;
+ struct vfs_context context;
int error=0;
+ char uio_buf[ UIO_SIZEOF(1) ];
+
+ context.vc_proc = p;
+ context.vc_ucred = cred;
- /* FIXME XXX */
- if ((ioflg & IO_NODELOCKED) == 0)
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
- auio.uio_iov = &aiov;
- auio.uio_iovcnt = 1;
- aiov.iov_base = base;
- aiov.iov_len = len;
- auio.uio_resid = len;
- auio.uio_offset = offset;
- auio.uio_segflg = segflg;
- auio.uio_rw = rw;
- auio.uio_procp = p;
-
- if (rw == UIO_READ)
- error = VOP_READ(vp, &auio, ioflg, cred);
- else
- error = VOP_WRITE(vp, &auio, ioflg, cred);
+ if (UIO_SEG_IS_USER_SPACE(segflg)) {
+ spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
+ }
+ else {
+ spacetype = UIO_SYSSPACE;
+ }
+ auio = uio_createwithbuffer(1, offset, spacetype, rw,
+ &uio_buf[0], sizeof(uio_buf));
+ uio_addiov(auio, base, len);
+
+ if (rw == UIO_READ) {
+ if (vp->v_flag & VSWAP) {
+ error = vn_read_swapfile(vp, auio);
+ } else {
+ error = VNOP_READ(vp, auio, ioflg, &context);
+ }
+ } else {
+ error = VNOP_WRITE(vp, auio, ioflg, &context);
+ }
if (aresid)
- *aresid = auio.uio_resid;
+ // LP64todo - fix this
+ *aresid = uio_resid(auio);
else
- if (auio.uio_resid && error == 0)
+ if (uio_resid(auio) && error == 0)
error = EIO;
- if ((ioflg & IO_NODELOCKED) == 0)
- VOP_UNLOCK(vp, 0, p);
return (error);
}
/*
* File table vnode read routine.
*/
-vn_read(fp, uio, cred)
- struct file *fp;
- struct uio *uio;
- struct ucred *cred;
+static int
+vn_read(struct fileproc *fp, struct uio *uio, kauth_cred_t cred,
+ int flags, struct proc *p)
{
- struct vnode *vp = (struct vnode *)fp->f_data;
- struct proc *p = uio->uio_procp;
- int error;
+ struct vnode *vp;
+ int error, ioflag;
off_t count;
+ struct vfs_context context;
- VOP_LEASE(vp, p, cred, LEASE_READ);
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
- uio->uio_offset = fp->f_offset;
- count = uio->uio_resid;
+ context.vc_proc = p;
+ context.vc_ucred = cred;
- error = VOP_READ(vp, uio, (fp->f_flag & FNONBLOCK) ? IO_NDELAY : 0, cred);
+ vp = (struct vnode *)fp->f_fglob->fg_data;
+ if ( (error = vnode_getwithref(vp)) ) {
+ return(error);
+ }
+ ioflag = 0;
+ if (fp->f_fglob->fg_flag & FNONBLOCK)
+ ioflag |= IO_NDELAY;
+
+ if ((flags & FOF_OFFSET) == 0)
+ uio->uio_offset = fp->f_fglob->fg_offset;
+ count = uio_resid(uio);
- fp->f_offset += count - uio->uio_resid;
- VOP_UNLOCK(vp, 0, p);
+ if (vp->v_flag & VSWAP) {
+ /* special case for swap files */
+ error = vn_read_swapfile(vp, uio);
+ } else {
+ error = VNOP_READ(vp, uio, ioflag, &context);
+ }
+ if ((flags & FOF_OFFSET) == 0)
+ fp->f_fglob->fg_offset += count - uio_resid(uio);
+
+ (void)vnode_put(vp);
return (error);
}
/*
* File table vnode write routine.
*/
-vn_write(fp, uio, cred)
- struct file *fp;
- struct uio *uio;
- struct ucred *cred;
+static int
+vn_write(struct fileproc *fp, struct uio *uio, kauth_cred_t cred,
+ int flags, struct proc *p)
{
- struct vnode *vp = (struct vnode *)fp->f_data;
- struct proc *p = uio->uio_procp;
- int error, ioflag = IO_UNIT;
+ struct vnode *vp;
+ int error, ioflag;
off_t count;
+ struct vfs_context context;
- if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
+ context.vc_proc = p;
+ context.vc_ucred = cred;
+ count = 0;
+ vp = (struct vnode *)fp->f_fglob->fg_data;
+ if ( (error = vnode_getwithref(vp)) ) {
+ return(error);
+ }
+ ioflag = IO_UNIT;
+ if (vp->v_type == VREG && (fp->f_fglob->fg_flag & O_APPEND))
ioflag |= IO_APPEND;
- if (fp->f_flag & FNONBLOCK)
+ if (fp->f_fglob->fg_flag & FNONBLOCK)
ioflag |= IO_NDELAY;
- if ((fp->f_flag & O_FSYNC) ||
+ if ((fp->f_fglob->fg_flag & O_FSYNC) ||
(vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
ioflag |= IO_SYNC;
- VOP_LEASE(vp, p, cred, LEASE_WRITE);
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
- uio->uio_offset = fp->f_offset;
- count = uio->uio_resid;
- error = VOP_WRITE(vp, uio, ioflag, cred);
+ if ((flags & FOF_OFFSET) == 0) {
+ uio->uio_offset = fp->f_fglob->fg_offset;
+ count = uio_resid(uio);
+ }
+ if (p && (vp->v_type == VREG) &&
+ (((uio->uio_offset + uio_uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) ||
+ (uio_uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)))) {
+ psignal(p, SIGXFSZ);
+ vnode_put(vp);
+ return (EFBIG);
+ }
+
+ error = VNOP_WRITE(vp, uio, ioflag, &context);
+
+ if ((flags & FOF_OFFSET) == 0) {
+ if (ioflag & IO_APPEND)
+ fp->f_fglob->fg_offset = uio->uio_offset;
+ else
+ fp->f_fglob->fg_offset += count - uio_resid(uio);
+ }
- if (ioflag & IO_APPEND)
- fp->f_offset = uio->uio_offset;
- else
- fp->f_offset += count - uio->uio_resid;
/*
* Set the credentials on successful writes
*/
if ((error == 0) && (vp->v_tag == VT_NFS) && (UBCINFOEXISTS(vp))) {
- ubc_setcred(vp, p);
+ /*
+ * When called from aio subsystem, we only have the proc from
+ * which to get the credential, at this point, so use that
+ * instead. This means aio functions are incompatible with
+ * per-thread credentials (aio operations are proxied). We
+ * can't easily correct the aio vs. settid race in this case
+ * anyway, so we disallow it.
+ */
+ if ((flags & FOF_PCRED) == 0) {
+ ubc_setthreadcred(vp, p, current_thread());
+ } else {
+ ubc_setcred(vp, p);
+ }
}
-
- VOP_UNLOCK(vp, 0, p);
+ (void)vnode_put(vp);
return (error);
}
/*
* File table vnode stat routine.
*/
-vn_stat(vp, sb, p)
- struct vnode *vp;
- register struct stat *sb;
- struct proc *p;
+int
+vn_stat_noauth(struct vnode *vp, struct stat *sb, kauth_filesec_t *xsec, vfs_context_t ctx)
{
- struct vattr vattr;
- register struct vattr *vap;
+ struct vnode_attr va;
int error;
u_short mode;
+ kauth_filesec_t fsec;
- vap = &vattr;
- error = VOP_GETATTR(vp, vap, p->p_ucred, p);
+ VATTR_INIT(&va);
+ VATTR_WANTED(&va, va_fsid);
+ VATTR_WANTED(&va, va_fileid);
+ VATTR_WANTED(&va, va_mode);
+ VATTR_WANTED(&va, va_type);
+ VATTR_WANTED(&va, va_nlink);
+ VATTR_WANTED(&va, va_uid);
+ VATTR_WANTED(&va, va_gid);
+ VATTR_WANTED(&va, va_rdev);
+ VATTR_WANTED(&va, va_data_size);
+ VATTR_WANTED(&va, va_access_time);
+ VATTR_WANTED(&va, va_modify_time);
+ VATTR_WANTED(&va, va_change_time);
+ VATTR_WANTED(&va, va_flags);
+ VATTR_WANTED(&va, va_gen);
+ VATTR_WANTED(&va, va_iosize);
+ /* lower layers will synthesise va_total_alloc from va_data_size if required */
+ VATTR_WANTED(&va, va_total_alloc);
+ if (xsec != NULL) {
+ VATTR_WANTED(&va, va_uuuid);
+ VATTR_WANTED(&va, va_guuid);
+ VATTR_WANTED(&va, va_acl);
+ }
+ error = vnode_getattr(vp, &va, ctx);
if (error)
- return (error);
+ goto out;
/*
* Copy from vattr table
*/
- sb->st_dev = vap->va_fsid;
- sb->st_ino = vap->va_fileid;
- mode = vap->va_mode;
+ sb->st_dev = va.va_fsid;
+ sb->st_ino = (ino_t)va.va_fileid;
+ mode = va.va_mode;
switch (vp->v_type) {
case VREG:
mode |= S_IFREG;
mode |= S_IFIFO;
break;
default:
- return (EBADF);
+ error = EBADF;
+ goto out;
};
sb->st_mode = mode;
- sb->st_nlink = vap->va_nlink;
- sb->st_uid = vap->va_uid;
- sb->st_gid = vap->va_gid;
- sb->st_rdev = vap->va_rdev;
- sb->st_size = vap->va_size;
- sb->st_atimespec = vap->va_atime;
- sb->st_mtimespec = vap->va_mtime;
- sb->st_ctimespec = vap->va_ctime;
- sb->st_blksize = vap->va_blocksize;
- sb->st_flags = vap->va_flags;
+ sb->st_nlink = VATTR_IS_SUPPORTED(&va, va_nlink) ? (u_int16_t)va.va_nlink : 1;
+ sb->st_uid = va.va_uid;
+ sb->st_gid = va.va_gid;
+ sb->st_rdev = va.va_rdev;
+ sb->st_size = va.va_data_size;
+ sb->st_atimespec = va.va_access_time;
+ sb->st_mtimespec = va.va_modify_time;
+ sb->st_ctimespec = va.va_change_time;
+ sb->st_blksize = va.va_iosize;
+ sb->st_flags = va.va_flags;
+ sb->st_blocks = roundup(va.va_total_alloc, 512) / 512;
+
+ /* if we're interested in exended security data and we got an ACL */
+ if (xsec != NULL) {
+ if (!VATTR_IS_SUPPORTED(&va, va_acl) &&
+ !VATTR_IS_SUPPORTED(&va, va_uuuid) &&
+ !VATTR_IS_SUPPORTED(&va, va_guuid)) {
+ *xsec = KAUTH_FILESEC_NONE;
+ } else {
+
+ if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) {
+ fsec = kauth_filesec_alloc(va.va_acl->acl_entrycount);
+ } else {
+ fsec = kauth_filesec_alloc(0);
+ }
+ if (fsec == NULL) {
+ error = ENOMEM;
+ goto out;
+ }
+ fsec->fsec_magic = KAUTH_FILESEC_MAGIC;
+ if (VATTR_IS_SUPPORTED(&va, va_uuuid)) {
+ fsec->fsec_owner = va.va_uuuid;
+ } else {
+ fsec->fsec_owner = kauth_null_guid;
+ }
+ if (VATTR_IS_SUPPORTED(&va, va_guuid)) {
+ fsec->fsec_group = va.va_guuid;
+ } else {
+ fsec->fsec_group = kauth_null_guid;
+ }
+ if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) {
+ bcopy(va.va_acl, &(fsec->fsec_acl), KAUTH_ACL_COPYSIZE(va.va_acl));
+ } else {
+ fsec->fsec_acl.acl_entrycount = KAUTH_FILESEC_NOACL;
+ }
+ *xsec = fsec;
+ }
+ }
+
/* Do not give the generation number out to unpriviledged users */
- if (suser(p->p_ucred, &p->p_acflag))
+ if (va.va_gen && !vfs_context_issuser(ctx))
sb->st_gen = 0;
else
- sb->st_gen = vap->va_gen;
- sb->st_blocks = vap->va_bytes / S_BLKSIZE;
- return (0);
+ sb->st_gen = va.va_gen;
+
+ error = 0;
+out:
+ if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL)
+ kauth_acl_free(va.va_acl);
+ return (error);
+}
+
+int
+vn_stat(struct vnode *vp, struct stat *sb, kauth_filesec_t *xsec, vfs_context_t ctx)
+{
+ int error;
+
+ /* authorize */
+ if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_ATTRIBUTES | KAUTH_VNODE_READ_SECURITY, ctx)) != 0)
+ return(error);
+
+ /* actual stat */
+ return(vn_stat_noauth(vp, sb, xsec, ctx));
}
+
/*
* File table vnode ioctl routine.
*/
+static int
vn_ioctl(fp, com, data, p)
- struct file *fp;
+ struct fileproc *fp;
u_long com;
caddr_t data;
struct proc *p;
{
- register struct vnode *vp = ((struct vnode *)fp->f_data);
- struct vattr vattr;
+ register struct vnode *vp = ((struct vnode *)fp->f_fglob->fg_data);
+ struct vfs_context context;
+ off_t file_size;
int error;
+ struct vnode *ttyvp;
+ int funnel_state;
+
+ if ( (error = vnode_getwithref(vp)) ) {
+ return(error);
+ }
+ context.vc_proc = p;
+ context.vc_ucred = kauth_cred_proc_ref(p); /* XXX kauth_cred_get() ??? */
switch (vp->v_type) {
case VREG:
case VDIR:
if (com == FIONREAD) {
- if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))
- return (error);
- *(int *)data = vattr.va_size - fp->f_offset;
- return (0);
+ if ((error = vnode_size(vp, &file_size, &context)) != 0)
+ goto out;
+ *(int *)data = file_size - fp->f_fglob->fg_offset;
+ goto out;
+ }
+ if (com == FIONBIO || com == FIOASYNC) { /* XXX */
+ goto out;
}
- if (com == FIONBIO || com == FIOASYNC) /* XXX */
- return (0); /* XXX */
/* fall into ... */
default:
- return (ENOTTY);
+ error = ENOTTY;
+ goto out;
case VFIFO:
case VCHR:
case VBLK:
- error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p);
+
+ /* Should not be able to set block size from user space */
+ if (com == DKIOCSETBLOCKSIZE) {
+ error = EPERM;
+ goto out;
+ }
+
+ if (com == FIODTYPE) {
+ if (vp->v_type == VBLK) {
+ if (major(vp->v_rdev) >= nblkdev) {
+ error = ENXIO;
+ goto out;
+ }
+ *(int *)data = bdevsw[major(vp->v_rdev)].d_type;
+
+ } else if (vp->v_type == VCHR) {
+ if (major(vp->v_rdev) >= nchrdev) {
+ error = ENXIO;
+ goto out;
+ }
+ *(int *)data = cdevsw[major(vp->v_rdev)].d_type;
+ } else {
+ error = ENOTTY;
+ goto out;
+ }
+ goto out;
+ }
+ error = VNOP_IOCTL(vp, com, data, fp->f_fglob->fg_flag, &context);
+
if (error == 0 && com == TIOCSCTTY) {
- if (p->p_session->s_ttyvp)
- vrele(p->p_session->s_ttyvp);
+ vnode_ref(vp);
+
+ funnel_state = thread_funnel_set(kernel_flock, TRUE);
+ ttyvp = p->p_session->s_ttyvp;
p->p_session->s_ttyvp = vp;
- VREF(vp);
+ thread_funnel_set(kernel_flock, funnel_state);
+
+ if (ttyvp)
+ vnode_rele(ttyvp);
}
- return (error);
}
+out:
+ (void)vnode_put(vp);
+ kauth_cred_unref(&context.vc_ucred);
+ return(error);
}
/*
* File table vnode select routine.
*/
-vn_select(fp, which, p)
- struct file *fp;
+static int
+vn_select(fp, which, wql, p)
+ struct fileproc *fp;
int which;
+ void * wql;
struct proc *p;
{
+ int error;
+ struct vnode * vp = (struct vnode *)fp->f_fglob->fg_data;
+ struct vfs_context context;
+
+ if ( (error = vnode_getwithref(vp)) == 0 ) {
+ context.vc_proc = p;
+ context.vc_ucred = fp->f_fglob->fg_cred;
- return (VOP_SELECT(((struct vnode *)fp->f_data), which, fp->f_flag,
- fp->f_cred, p));
+ error = VNOP_SELECT(vp, which, fp->f_fglob->fg_flag, wql, &context);
+
+ (void)vnode_put(vp);
+ }
+ return(error);
+
}
/*
* acquire requested lock.
*/
int
-vn_lock(vp, flags, p)
- struct vnode *vp;
- int flags;
+vn_lock(__unused vnode_t vp, __unused int flags, __unused proc_t p)
+{
+ return (0);
+}
+
+/*
+ * File table vnode close routine.
+ */
+static int
+vn_closefile(fg, p)
+ struct fileglob *fg;
struct proc *p;
{
+ struct vnode *vp = (struct vnode *)fg->fg_data;
int error;
+
+ if ( (error = vnode_getwithref(vp)) == 0 ) {
+ error = vn_close(vp, fg->fg_flag, fg->fg_cred, p);
+
+ (void)vnode_put(vp);
+ }
+ return(error);
+}
+
+int
+vn_pathconf(vnode_t vp, int name, register_t *retval, vfs_context_t ctx)
+{
+ int error = 0;
+
+ switch(name) {
+ case _PC_EXTENDED_SECURITY_NP:
+ *retval = vfs_extendedsecurity(vnode_mount(vp));
+ break;
+ case _PC_AUTH_OPAQUE_NP:
+ *retval = vfs_authopaque(vnode_mount(vp));
+ break;
+ default:
+ error = VNOP_PATHCONF(vp, name, retval, ctx);
+ break;
+ }
+
+ return (error);
+}
+
+static int
+vn_kqfilt_add(fp, kn, p)
+ struct fileproc *fp;
+ struct knote *kn;
+ struct proc *p;
+{
+ struct vnode *vp = (struct vnode *)fp->f_fglob->fg_data;
+ struct vfs_context context;
+ int error;
+ int funnel_state;
- do {
- if ((flags & LK_INTERLOCK) == 0)
- simple_lock(&vp->v_interlock);
- if (vp->v_flag & VXLOCK) {
- while (vp->v_flag & VXLOCK) {
- vp->v_flag |= VXWANT;
- simple_unlock(&vp->v_interlock);
- tsleep((caddr_t)vp, PINOD, "vn_lock", 0);
- }
- error = ENOENT;
- } else {
- error = VOP_LOCK(vp, flags | LK_INTERLOCK, p);
- if (error == 0)
- return (error);
- }
- flags &= ~LK_INTERLOCK;
- } while (flags & LK_RETRY);
+ if ( (error = vnode_getwithref(vp)) == 0 ) {
+ context.vc_proc = p;
+ context.vc_ucred = kauth_cred_proc_ref(p); /* XXX kauth_cred_get() ??? */
+
+ funnel_state = thread_funnel_set(kernel_flock, TRUE);
+ error = VNOP_KQFILT_ADD(vp, kn, &context);
+ thread_funnel_set(kernel_flock, funnel_state);
+
+ (void)vnode_put(vp);
+ kauth_cred_unref(&context.vc_ucred);
+ }
return (error);
}
-/*
- * File table vnode close routine.
- */
-vn_closefile(fp, p)
- struct file *fp;
+#if 0
+/* No one calls this yet. */
+static int
+vn_kqfilt_remove(vp, ident, p)
+ struct vnode *vp;
+ uintptr_t ident;
struct proc *p;
{
+ struct vfs_context context;
+ int error;
+ int funnel_state;
+
+ if ( (error = vnode_getwithref(vp)) == 0 ) {
+ context.vc_proc = p;
+ context.vc_ucred = kauth_cred_proc_ref(p); /* XXX kauth_cred_get() ??? */
- return (vn_close(((struct vnode *)fp->f_data), fp->f_flag,
- fp->f_cred, p));
+ funnel_state = thread_funnel_set(kernel_flock, TRUE);
+ error = VNOP_KQFILT_REMOVE(vp, ident, &context);
+ thread_funnel_set(kernel_flock, funnel_state);
+
+ (void)vnode_put(vp);
+ kauth_cred_unref(&context.vc_ucred);
+ }
+ return (error);
}
+#endif