/*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <vm/vm_map.h>
#include <miscfs/specfs/specdev.h>
+#include <miscfs/fifofs/fifo.h>
#if CONFIG_MACF
#include <security/mac_framework.h>
#endif
+#if CONFIG_PROTECT
+#include <sys/cprotect.h>
+#endif
+
static int vn_closefile(struct fileglob *fp, vfs_context_t ctx);
static int vn_ioctl(struct fileproc *fp, u_long com, caddr_t data,
vfs_context_t ctx);
static int vn_kqfilt_add(struct fileproc *fp, struct knote *kn,
vfs_context_t ctx);
+static void filt_vndetach(struct knote *kn);
+static int filt_vnode(struct knote *kn, long hint);
+static int vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx);
#if 0
static int vn_kqfilt_remove(struct vnode *vp, uintptr_t ident,
vfs_context_t ctx);
struct fileops vnops =
{ vn_read, vn_write, vn_ioctl, vn_select, vn_closefile, vn_kqfilt_add, NULL };
+struct filterops vnode_filtops = {
+ .f_isfd = 1,
+ .f_attach = NULL,
+ .f_detach = filt_vndetach,
+ .f_event = filt_vnode
+};
+
/*
* Common code for vnode open operations.
* Check permissions, and call the VNOP_OPEN or VNOP_CREATE routine.
return(vn_open_auth(ndp, fmodep, &va));
}
+static int
+vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx)
+{
+ int error;
+
+ if ((error = vnode_ref_ext(vp, fmode, 0)) != 0) {
+ goto bad;
+ }
+
+ /* call out to allow 3rd party notification of open.
+ * Ignore result of kauth_authorize_fileop call.
+ */
+ kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
+ (uintptr_t)vp, 0);
+
+ return 0;
+
+bad:
+ return error;
+
+}
+
+/*
+ * May do nameidone() to allow safely adding an FSEvent. Cue off of ni_dvp to
+ * determine whether that has happened.
+ */
+static int
+vn_open_auth_do_create(struct nameidata *ndp, struct vnode_attr *vap, int fmode, boolean_t *did_create, boolean_t *did_open, vfs_context_t ctx)
+{
+ uint32_t status = 0;
+ vnode_t dvp = ndp->ni_dvp;
+ int batched;
+ int error;
+ vnode_t vp;
+
+ batched = vnode_compound_open_available(ndp->ni_dvp);
+ *did_open = FALSE;
+
+ VATTR_SET(vap, va_type, VREG);
+ if (fmode & O_EXCL)
+ vap->va_vaflags |= VA_EXCLUSIVE;
+
+#if NAMEDRSRCFORK
+ if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK) {
+ if ((error = vn_authorize_create(dvp, &ndp->ni_cnd, vap, ctx, NULL)) != 0)
+ goto out;
+ if ((error = vnode_makenamedstream(dvp, &ndp->ni_vp, XATTR_RESOURCEFORK_NAME, 0, ctx)) != 0)
+ goto out;
+ *did_create = TRUE;
+ } else {
+#endif
+ if (!batched) {
+ if ((error = vn_authorize_create(dvp, &ndp->ni_cnd, vap, ctx, NULL)) != 0)
+ goto out;
+ }
+
+ error = vn_create(dvp, &ndp->ni_vp, ndp, vap, VN_CREATE_DOOPEN, fmode, &status, ctx);
+ if (error != 0) {
+ if (batched) {
+ *did_create = (status & COMPOUND_OPEN_STATUS_DID_CREATE) ? TRUE : FALSE;
+ } else {
+ *did_create = FALSE;
+ }
+
+ if (error == EKEEPLOOKING) {
+ if (*did_create) {
+ panic("EKEEPLOOKING, but we did a create?");
+ }
+ if (!batched) {
+ panic("EKEEPLOOKING from filesystem that doesn't support compound vnops?");
+ }
+ if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
+ panic("EKEEPLOOKING, but continue flag not set?");
+ }
+
+ /*
+ * Do NOT drop the dvp: we need everything to continue the lookup.
+ */
+ return error;
+ }
+ } else {
+ if (batched) {
+ *did_create = (status & COMPOUND_OPEN_STATUS_DID_CREATE) ? 1 : 0;
+ *did_open = TRUE;
+ } else {
+ *did_create = TRUE;
+ }
+ }
+#if NAMEDRSRCFORK
+ }
+#endif
+
+ /*
+ * Unlock the fsnode (if locked) here so that we are free
+ * to drop the dvp iocount and prevent deadlock in build_path().
+ * nameidone() will still do the right thing later.
+ */
+ vp = ndp->ni_vp;
+ namei_unlock_fsnode(ndp);
+
+ if (*did_create) {
+ int update_flags = 0;
+
+ // Make sure the name & parent pointers are hooked up
+ if (vp->v_name == NULL)
+ update_flags |= VNODE_UPDATE_NAME;
+ if (vp->v_parent == NULLVP)
+ update_flags |= VNODE_UPDATE_PARENT;
+
+ if (update_flags)
+ vnode_update_identity(vp, dvp, ndp->ni_cnd.cn_nameptr, ndp->ni_cnd.cn_namelen, ndp->ni_cnd.cn_hash, update_flags);
+
+ vnode_put(dvp);
+ ndp->ni_dvp = NULLVP;
+
+#if CONFIG_FSE
+ if (need_fsevent(FSE_CREATE_FILE, vp)) {
+ add_fsevent(FSE_CREATE_FILE, ctx,
+ FSE_ARG_VNODE, vp,
+ FSE_ARG_DONE);
+ }
+#endif
+ }
+out:
+ if (ndp->ni_dvp != NULLVP) {
+ vnode_put(dvp);
+ ndp->ni_dvp = NULLVP;
+ }
+
+ return error;
+}
+
/*
* Open a file with authorization, updating the contents of the structures
* pointed to by ndp, fmodep, and vap as necessary to perform the requested
vfs_context_t ctx = ndp->ni_cnd.cn_context;
int error;
int fmode;
- kauth_action_t action;
+ uint32_t origcnflags;
+ boolean_t did_create;
+ boolean_t did_open;
+ boolean_t need_vnop_open;
+ boolean_t batched;
+ boolean_t ref_failed;
again:
vp = NULL;
dvp = NULL;
+ batched = FALSE;
+ did_create = FALSE;
+ need_vnop_open = TRUE;
+ ref_failed = FALSE;
fmode = *fmodep;
+ origcnflags = ndp->ni_cnd.cn_flags;
+
+ /*
+ * O_CREAT
+ */
if (fmode & O_CREAT) {
if ( (fmode & O_DIRECTORY) ) {
error = EINVAL;
goto out;
}
ndp->ni_cnd.cn_nameiop = CREATE;
- /* Inherit USEDVP flag only */
- ndp->ni_cnd.cn_flags &= USEDVP;
+#if CONFIG_TRIGGERS
+ ndp->ni_op = OP_LINK;
+#endif
+ /* Inherit USEDVP, vnode_open() supported flags only */
+ ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT | DOWHITEOUT);
ndp->ni_cnd.cn_flags |= LOCKPARENT | LOCKLEAF | AUDITVNPATH1;
+ ndp->ni_flag = NAMEI_COMPOUNDOPEN;
#if NAMEDRSRCFORK
/* open calls are allowed for resource forks. */
ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
#endif
- if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0)
+ if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0 && (origcnflags & FOLLOW) != 0)
ndp->ni_cnd.cn_flags |= FOLLOW;
+
+continue_create_lookup:
if ( (error = namei(ndp)) )
goto out;
+
dvp = ndp->ni_dvp;
vp = ndp->ni_vp;
- /* not found, create */
- if (vp == NULL) {
- /* must have attributes for a new file */
- if (vap == NULL) {
- error = EINVAL;
- goto badcreate;
- }
-
- VATTR_SET(vap, va_type, VREG);
-#if CONFIG_MACF
- error = mac_vnode_check_create(ctx,
- dvp, &ndp->ni_cnd, vap);
- if (error)
- goto badcreate;
-#endif /* MAC */
+ batched = vnode_compound_open_available(dvp);
- /* authorize before creating */
- if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
- goto badcreate;
+ /* not found, create */
+ if (vp == NULL) {
+ /* must have attributes for a new file */
+ if (vap == NULL) {
+ error = EINVAL;
+ goto out;
+ }
+ /*
+ * Attempt a create. For a system supporting compound VNOPs, we may
+ * find an existing file or create one; in either case, we will already
+ * have the file open and no VNOP_OPEN() will be needed.
+ */
+ error = vn_open_auth_do_create(ndp, vap, fmode, &did_create, &did_open, ctx);
- if (fmode & O_EXCL)
- vap->va_vaflags |= VA_EXCLUSIVE;
-#if NAMEDRSRCFORK
- if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK) {
- if ((error = vnode_makenamedstream(dvp, &ndp->ni_vp, XATTR_RESOURCEFORK_NAME, 0, ctx)) != 0)
- goto badcreate;
- } else
-#endif
- if ((error = vn_create(dvp, &ndp->ni_vp, &ndp->ni_cnd, vap, 0, ctx)) != 0)
- goto badcreate;
-
+ dvp = ndp->ni_dvp;
vp = ndp->ni_vp;
- if (vp) {
- int update_flags = 0;
-
- // Make sure the name & parent pointers are hooked up
- if (vp->v_name == NULL)
- update_flags |= VNODE_UPDATE_NAME;
- if (vp->v_parent == NULLVP)
- update_flags |= VNODE_UPDATE_PARENT;
-
- if (update_flags)
- vnode_update_identity(vp, dvp, ndp->ni_cnd.cn_nameptr, ndp->ni_cnd.cn_namelen, ndp->ni_cnd.cn_hash, update_flags);
-
-#if CONFIG_FSE
- if (need_fsevent(FSE_CREATE_FILE, vp)) {
- add_fsevent(FSE_CREATE_FILE, ctx,
- FSE_ARG_VNODE, vp,
- FSE_ARG_DONE);
+ /*
+ * Detected a node that the filesystem couldn't handle. Don't call
+ * nameidone() yet, because we need that path buffer.
+ */
+ if (error == EKEEPLOOKING) {
+ if (!batched) {
+ panic("EKEEPLOOKING from a filesystem that doesn't support compound VNOPs?");
}
-#endif
-
+ goto continue_create_lookup;
}
- /*
- * nameidone has to happen before we vnode_put(dvp)
- * and clear the ni_dvp field, since it may need
- * to release the fs_nodelock on the dvp
- */
-badcreate:
+
nameidone(ndp);
- ndp->ni_dvp = NULL;
- vnode_put(dvp);
+ if (dvp) {
+ panic("Shouldn't have a dvp here.");
+ }
if (error) {
/*
- * Check for a creation race.
+ * Check for a creation or unlink race.
*/
- if ((error == EEXIST) && !(fmode & O_EXCL)) {
+ if (((error == EEXIST) && !(fmode & O_EXCL)) ||
+ ((error == ENOENT) && (fmode & O_CREAT))){
+ if (vp)
+ vnode_put(vp);
goto again;
}
goto bad;
}
- fmode &= ~O_TRUNC;
- } else {
+
+ need_vnop_open = !did_open;
+ }
+ else {
+ if (fmode & O_EXCL)
+ error = EEXIST;
+
+ /*
+ * We have a vnode. Use compound open if available
+ * or else fall through to "traditional" path. Note: can't
+ * do a compound open for root, because the parent belongs
+ * to a different FS.
+ */
+ if (error == 0 && batched && (vnode_mount(dvp) == vnode_mount(vp))) {
+ error = VNOP_COMPOUND_OPEN(dvp, &ndp->ni_vp, ndp, 0, fmode, NULL, NULL, ctx);
+
+ if (error == 0) {
+ vp = ndp->ni_vp;
+ need_vnop_open = FALSE;
+ } else if (error == EKEEPLOOKING) {
+ if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
+ panic("EKEEPLOOKING, but continue flag not set?");
+ }
+ goto continue_create_lookup;
+ }
+ }
nameidone(ndp);
- ndp->ni_dvp = NULL;
vnode_put(dvp);
+ ndp->ni_dvp = NULLVP;
- if (fmode & O_EXCL) {
- error = EEXIST;
+ if (error) {
goto bad;
}
+
fmode &= ~O_CREAT;
+
+ /* Fall through */
}
} else {
+ /*
+ * Not O_CREAT
+ */
ndp->ni_cnd.cn_nameiop = LOOKUP;
- /* Inherit USEDVP flag only */
- ndp->ni_cnd.cn_flags &= USEDVP;
- ndp->ni_cnd.cn_flags |= FOLLOW | LOCKLEAF | AUDITVNPATH1;
+ /* Inherit USEDVP, vnode_open() supported flags only */
+ ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT | DOWHITEOUT);
+ ndp->ni_cnd.cn_flags |= FOLLOW | LOCKLEAF | AUDITVNPATH1 | WANTPARENT;
#if NAMEDRSRCFORK
/* open calls are allowed for resource forks. */
ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
#endif
- if (fmode & O_NOFOLLOW || fmode & O_SYMLINK) {
- ndp->ni_cnd.cn_flags &= ~FOLLOW;
+ ndp->ni_flag = NAMEI_COMPOUNDOPEN;
+
+ /* preserve NOFOLLOW from vnode_open() */
+ if (fmode & O_NOFOLLOW || fmode & O_SYMLINK || (origcnflags & FOLLOW) == 0) {
+ ndp->ni_cnd.cn_flags &= ~FOLLOW;
}
- if ( (error = namei(ndp)) )
- goto out;
- vp = ndp->ni_vp;
+ /* Do a lookup, possibly going directly to filesystem for compound operation */
+ do {
+ if ( (error = namei(ndp)) )
+ goto out;
+ vp = ndp->ni_vp;
+ dvp = ndp->ni_dvp;
+
+ /* Check for batched lookup-open */
+ batched = vnode_compound_open_available(dvp);
+ if (batched && ((vp == NULLVP) || (vnode_mount(dvp) == vnode_mount(vp)))) {
+ error = VNOP_COMPOUND_OPEN(dvp, &ndp->ni_vp, ndp, 0, fmode, NULL, NULL, ctx);
+ vp = ndp->ni_vp;
+ if (error == 0) {
+ need_vnop_open = FALSE;
+ } else if (error == EKEEPLOOKING) {
+ if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
+ panic("EKEEPLOOKING, but continue flag not set?");
+ }
+ }
+ }
+ } while (error == EKEEPLOOKING);
+
nameidone(ndp);
- ndp->ni_dvp = NULL;
+ vnode_put(dvp);
+ ndp->ni_dvp = NULLVP;
- if ( (fmode & O_DIRECTORY) && vp->v_type != VDIR ) {
- error = ENOTDIR;
+ if (error) {
goto bad;
}
}
- if (vp->v_type == VSOCK && vp->v_tag != VT_FDESC) {
- error = EOPNOTSUPP; /* Operation not supported on socket */
- goto bad;
- }
-
- if (vp->v_type == VLNK && (fmode & O_NOFOLLOW) != 0) {
- error = ELOOP; /* O_NOFOLLOW was specified and the target is a symbolic link */
- goto bad;
+ /*
+ * By this point, nameidone() is called, dvp iocount is dropped,
+ * and dvp pointer is cleared.
+ */
+ if (ndp->ni_dvp != NULLVP) {
+ panic("Haven't cleaned up adequately in vn_open_auth()");
}
- /* authorize open of an existing file */
- if ((fmode & O_CREAT) == 0) {
-
- /* disallow write operations on directories */
- if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) {
- error = EISDIR;
- goto bad;
+ /*
+ * Expect to use this code for filesystems without compound VNOPs, for the root
+ * of a filesystem, which can't be "looked up" in the sense of VNOP_LOOKUP(),
+ * and for shadow files, which do not live on the same filesystems as their "parents."
+ */
+ if (need_vnop_open) {
+ if (batched && !vnode_isvroot(vp) && !vnode_isnamedstream(vp)) {
+ panic("Why am I trying to use VNOP_OPEN() on anything other than the root or a named stream?");
}
-#if CONFIG_MACF
- error = mac_vnode_check_open(ctx, vp, fmode);
- if (error)
- goto bad;
-#endif
-
- /* compute action to be authorized */
- action = 0;
- if (fmode & FREAD) {
- action |= KAUTH_VNODE_READ_DATA;
+ if (!did_create) {
+ error = vn_authorize_open_existing(vp, &ndp->ni_cnd, fmode, ctx, NULL);
+ if (error) {
+ goto bad;
+ }
}
- if (fmode & (FWRITE | O_TRUNC)) {
- /*
- * If we are writing, appending, and not truncating,
- * indicate that we are appending so that if the
- * UF_APPEND or SF_APPEND bits are set, we do not deny
- * the open.
- */
- if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
- action |= KAUTH_VNODE_APPEND_DATA;
- } else {
- action |= KAUTH_VNODE_WRITE_DATA;
+
+#if CONFIG_PROTECT
+ /*
+ * Perform any content protection access checks prior to calling
+ * into the filesystem, if the raw encrypted mode was not
+ * requested.
+ *
+ * If the va_dataprotect_flags are NOT active, or if they are,
+ * but they do not have the VA_DP_RAWENCRYPTED bit set, then we need
+ * to perform the checks.
+ */
+ if (!(VATTR_IS_ACTIVE (vap, va_dataprotect_flags)) ||
+ ((vap->va_dataprotect_flags & VA_DP_RAWENCRYPTED) == 0)) {
+ error = cp_handle_open (vp, fmode);
+ if (error) {
+ goto bad;
}
}
- if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
- goto bad;
-
+#endif
- //
- // if the vnode is tagged VOPENEVT and the current process
- // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
- // flag to the open mode so that this open won't count against
- // the vnode when carbon delete() does a vnode_isinuse() to see
- // if a file is currently in use. this allows spotlight
- // importers to not interfere with carbon apps that depend on
- // the no-delete-if-busy semantics of carbon delete().
- //
- if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
- fmode |= O_EVTONLY;
+ error = VNOP_OPEN(vp, fmode, ctx);
+ if (error) {
+ goto bad;
}
+ need_vnop_open = FALSE;
+ }
+ // if the vnode is tagged VOPENEVT and the current process
+ // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
+ // flag to the open mode so that this open won't count against
+ // the vnode when carbon delete() does a vnode_isinuse() to see
+ // if a file is currently in use. this allows spotlight
+ // importers to not interfere with carbon apps that depend on
+ // the no-delete-if-busy semantics of carbon delete().
+ //
+ if (!did_create && (vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
+ fmode |= O_EVTONLY;
}
- if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
+ /*
+ * Grab reference, etc.
+ */
+ error = vn_open_auth_finish(vp, fmode, ctx);
+ if (error) {
+ ref_failed = TRUE;
goto bad;
}
- if ( (error = vnode_ref_ext(vp, fmode)) ) {
- goto bad2;
- }
- /* call out to allow 3rd party notification of open.
- * Ignore result of kauth_authorize_fileop call.
- */
- kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
- (uintptr_t)vp, 0);
+ /* Compound VNOP open is responsible for doing the truncate */
+ if (batched || did_create)
+ fmode &= ~O_TRUNC;
*fmodep = fmode;
return (0);
-bad2:
- VNOP_CLOSE(vp, fmode, ctx);
+
bad:
+ /* Opened either explicitly or by a batched create */
+ if (!need_vnop_open) {
+ VNOP_CLOSE(vp, fmode, ctx);
+ }
+
ndp->ni_vp = NULL;
if (vp) {
#if NAMEDRSRCFORK
- if ((vnode_isnamedstream(vp)) && (vp->v_parent != NULLVP) &&
- (vnode_isshadow (vp))) {
- vnode_recycle(vp);
+ /* Aggressively recycle shadow files if we error'd out during open() */
+ if ((vnode_isnamedstream(vp)) &&
+ (vp->v_parent != NULLVP) &&
+ (vnode_isshadow(vp))) {
+ vnode_recycle(vp);
}
#endif
vnode_put(vp);
*
* EREDRIVEOPEN: means that we were hit by the tty allocation race.
*/
- if (((error == ENOENT) && (*fmodep & O_CREAT)) || (error == EREDRIVEOPEN)) {
+ if (((error == ENOENT) && (*fmodep & O_CREAT)) || (error == EREDRIVEOPEN) || ref_failed) {
goto again;
}
}
+
out:
return (error);
}
{
int error;
-#if CONFIG_FSE
- if (flags & FWASWRITTEN) {
- if (need_fsevent(FSE_CONTENT_MODIFIED, vp)) {
- add_fsevent(FSE_CONTENT_MODIFIED, ctx,
- FSE_ARG_VNODE, vp,
- FSE_ARG_DONE);
- }
- }
-#endif
-
#if NAMEDRSRCFORK
/* Sync data from resource fork shadow file if needed. */
if ((vp->v_flag & VISNAMEDSTREAM) &&
(vp->v_parent != NULLVP) &&
- (vnode_isshadow(vp))) {
+ vnode_isshadow(vp)) {
if (flags & FWASWRITTEN) {
(void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
}
#endif
/* work around for foxhound */
- if (vp->v_type == VBLK)
+ if (vnode_isspec(vp))
(void)vnode_rele_ext(vp, flags, 0);
error = VNOP_CLOSE(vp, flags, ctx);
- if (vp->v_type != VBLK)
+#if CONFIG_FSE
+ if (flags & FWASWRITTEN) {
+ if (need_fsevent(FSE_CONTENT_MODIFIED, vp)) {
+ add_fsevent(FSE_CONTENT_MODIFIED, ctx,
+ FSE_ARG_VNODE, vp,
+ FSE_ARG_DONE);
+ }
+ }
+#endif
+
+ if (!vnode_isspec(vp))
(void)vnode_rele_ext(vp, flags, 0);
return (error);
struct vnode *vp,
uio_t uio)
{
- static char *swap_read_zero_page = NULL;
int error;
off_t swap_count, this_count;
off_t file_end, read_end;
off_t prev_resid;
+ char *my_swap_page;
/*
- * Reading from a swap file will get you all zeroes.
+ * Reading from a swap file will get you zeroes.
*/
+
+ my_swap_page = NULL;
error = 0;
swap_count = uio_resid(uio);
}
while (swap_count > 0) {
- if (swap_read_zero_page == NULL) {
- char *my_zero_page;
- int funnel_state;
-
- /*
- * Take kernel funnel so that only one thread
- * sets up "swap_read_zero_page".
- */
- funnel_state = thread_funnel_set(kernel_flock, TRUE);
-
- if (swap_read_zero_page == NULL) {
- MALLOC(my_zero_page, char *, PAGE_SIZE,
- M_TEMP, M_WAITOK);
- memset(my_zero_page, '?', PAGE_SIZE);
- /*
- * Adding a newline character here
- * and there prevents "less(1)", for
- * example, from getting too confused
- * about a file with one really really
- * long line.
- */
- my_zero_page[PAGE_SIZE-1] = '\n';
- if (swap_read_zero_page == NULL) {
- swap_read_zero_page = my_zero_page;
- } else {
- FREE(my_zero_page, M_TEMP);
- }
- } else {
- /*
- * Someone else raced us here and won;
- * just use their page.
- */
- }
- thread_funnel_set(kernel_flock, funnel_state);
+ if (my_swap_page == NULL) {
+ MALLOC(my_swap_page, char *, PAGE_SIZE,
+ M_TEMP, M_WAITOK);
+ memset(my_swap_page, '\0', PAGE_SIZE);
+ /* add an end-of-line to keep line counters happy */
+ my_swap_page[PAGE_SIZE-1] = '\n';
}
-
this_count = swap_count;
if (this_count > PAGE_SIZE) {
this_count = PAGE_SIZE;
}
prev_resid = uio_resid(uio);
- error = uiomove((caddr_t) swap_read_zero_page,
+ error = uiomove((caddr_t) my_swap_page,
this_count,
uio);
if (error) {
}
swap_count -= (prev_resid - uio_resid(uio));
}
+ if (my_swap_page != NULL) {
+ FREE(my_swap_page, M_TEMP);
+ my_swap_page = NULL;
+ }
return error;
}
int *aresid,
proc_t p)
{
- return vn_rdwr_64(rw,
+ int64_t resid;
+ int result;
+
+ result = vn_rdwr_64(rw,
vp,
(uint64_t)(uintptr_t)base,
(int64_t)len,
segflg,
ioflg,
cred,
- aresid,
+ &resid,
p);
+
+ /* "resid" should be bounded above by "len," which is an int */
+ if (aresid != NULL) {
+ *aresid = resid;
+ }
+
+ return result;
}
enum uio_seg segflg,
int ioflg,
kauth_cred_t cred,
- int *aresid,
+ int64_t *aresid,
proc_t p)
{
uio_t auio;
if (error == 0) {
if (rw == UIO_READ) {
- if (vp->v_flag & VSWAP) {
+ if (vnode_isswap(vp)) {
error = vn_read_swapfile(vp, auio);
} else {
error = VNOP_READ(vp, auio, ioflg, &context);
}
if (aresid)
- // LP64todo - fix this
*aresid = uio_resid(auio);
else
if (uio_resid(auio) && error == 0)
}
#endif
- ioflag = 0;
+ /* This signals to VNOP handlers that this read came from a file table read */
+ ioflag = IO_SYSCALL_DISPATCH;
+
if (fp->f_fglob->fg_flag & FNONBLOCK)
ioflag |= IO_NDELAY;
if ((fp->f_fglob->fg_flag & FNOCACHE) || vnode_isnocache(vp))
- ioflag |= IO_NOCACHE;
+ ioflag |= IO_NOCACHE;
+ if (fp->f_fglob->fg_flag & FENCRYPTED) {
+ ioflag |= IO_ENCRYPTED;
+ }
if (fp->f_fglob->fg_flag & FNORDAHEAD)
- ioflag |= IO_RAOFF;
+ ioflag |= IO_RAOFF;
if ((flags & FOF_OFFSET) == 0)
uio->uio_offset = fp->f_fglob->fg_offset;
count = uio_resid(uio);
- if (vp->v_flag & VSWAP) {
+ if (vnode_isswap(vp)) {
/* special case for swap files */
error = vn_read_swapfile(vp, uio);
} else {
}
#endif
- ioflag = IO_UNIT;
+ /*
+ * IO_SYSCALL_DISPATCH signals to VNOP handlers that this write originated
+ * from a file table write.
+ */
+ ioflag = (IO_UNIT | IO_SYSCALL_DISPATCH);
+
if (vp->v_type == VREG && (fp->f_fglob->fg_flag & O_APPEND))
ioflag |= IO_APPEND;
if (fp->f_fglob->fg_flag & FNONBLOCK)
ioflag |= IO_NDELAY;
if ((fp->f_fglob->fg_flag & FNOCACHE) || vnode_isnocache(vp))
ioflag |= IO_NOCACHE;
- if ((fp->f_fglob->fg_flag & O_FSYNC) ||
- (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
+ if (fp->f_fglob->fg_flag & FNODIRECT)
+ ioflag |= IO_NODIRECT;
+ if (fp->f_fglob->fg_flag & FSINGLE_WRITER)
+ ioflag |= IO_SINGLE_WRITER;
+
+ /*
+ * Treat synchronous mounts and O_FSYNC on the fd as equivalent.
+ *
+ * XXX We treat O_DSYNC as O_FSYNC for now, since we can not delay
+ * XXX the non-essential metadata without some additional VFS work;
+ * XXX the intent at this point is to plumb the interface for it.
+ */
+ if ((fp->f_fglob->fg_flag & (O_FSYNC|O_DSYNC)) ||
+ (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) {
ioflag |= IO_SYNC;
+ }
if ((flags & FOF_OFFSET) == 0) {
uio->uio_offset = fp->f_fglob->fg_offset;
}
if (((flags & FOF_OFFSET) == 0) &&
vfs_context_proc(ctx) && (vp->v_type == VREG) &&
- (((rlim_t)(uio->uio_offset + uio_uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) ||
- ((rlim_t)uio_uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)))) {
+ (((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) ||
+ ((rlim_t)uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)))) {
/*
* If the requested residual would cause us to go past the
* administrative limit, then we need to adjust the residual
* we can't do that (e.g. the residual is already 1 byte),
* then we fail the write with EFBIG.
*/
- residcount = uio_uio_resid(uio);
- if ((rlim_t)(uio->uio_offset + uio_uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
- clippedsize = (uio->uio_offset + uio_uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur;
- } else if ((rlim_t)uio_uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)) {
+ residcount = uio_resid(uio);
+ if ((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
+ clippedsize = (uio->uio_offset + uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur;
+ } else if ((rlim_t)uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)) {
clippedsize = (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset);
}
if (clippedsize >= residcount) {
return (EFBIG);
}
if (p && (vp->v_type == VREG) &&
- ((rlim_t)(uio->uio_offset + uio_uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) {
+ ((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) {
//Debugger("vn_bwrite:overstepping the bounds");
- residcount = uio_uio_resid(uio);
- clippedsize = (uio->uio_offset + uio_uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur;
+ residcount = uio_resid(uio);
+ clippedsize = (uio->uio_offset + uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur;
partialwrite = 1;
uio_setresid(uio, residcount-clippedsize);
}
sb64 = (struct stat64 *)sbptr;
else
sb = (struct stat *)sbptr;
-
+ memset(&va, 0, sizeof(va));
VATTR_INIT(&va);
VATTR_WANTED(&va, va_fsid);
VATTR_WANTED(&va, va_fileid);
sb->st_blocks = roundup(va.va_total_alloc, 512) / 512;
}
- /* if we're interested in exended security data and we got an ACL */
+ /* if we're interested in extended security data and we got an ACL */
if (xsec != NULL) {
if (!VATTR_IS_SUPPORTED(&va, va_acl) &&
!VATTR_IS_SUPPORTED(&va, va_uuuid) &&
error = ENXIO;
goto out;
}
- *(int *)data = bdevsw[major(vp->v_rdev)].d_type;
+ *(int *)data = D_TYPEMASK & bdevsw[major(vp->v_rdev)].d_type;
} else if (vp->v_type == VCHR) {
if (major(vp->v_rdev) >= nchrdev) {
error = ENXIO;
goto out;
}
- *(int *)data = cdevsw[major(vp->v_rdev)].d_type;
+ *(int *)data = D_TYPEMASK & cdevsw[major(vp->v_rdev)].d_type;
} else {
error = ENOTTY;
goto out;
error = VNOP_IOCTL(vp, com, data, fp->f_fglob->fg_flag, ctx);
if (error == 0 && com == TIOCSCTTY) {
- vnode_ref(vp);
+ error = vnode_ref_ext(vp, 0, VNODE_REF_FORCE);
+ if (error != 0) {
+ panic("vnode_ref_ext() failed despite VNODE_REF_FORCE?!");
+ }
funnel_state = thread_funnel_set(kernel_flock, TRUE);
sessp = proc_session(vfs_context_proc(ctx));
}
-/*
- * Check that the vnode is still valid, and if so
- * acquire requested lock.
- */
-int
-vn_lock(__unused vnode_t vp, __unused int flags, __unused proc_t p)
-{
- return (0);
-}
-
/*
* File table vnode close routine.
*/
* VNOP_PATHCONF:???
*/
int
-vn_pathconf(vnode_t vp, int name, register_t *retval, vfs_context_t ctx)
+vn_pathconf(vnode_t vp, int name, int32_t *retval, vfs_context_t ctx)
{
int error = 0;
+ struct vfs_attr vfa;
switch(name) {
case _PC_EXTENDED_SECURITY_NP:
case _PC_SYNC_IO: /* unistd.h: _POSIX_SYNCHRONIZED_IO */
*retval = 0; /* [SIO] option is not supported */
break;
+ case _PC_XATTR_SIZE_BITS:
+ /* The number of bits used to store maximum extended
+ * attribute size in bytes. For example, if the maximum
+ * attribute size supported by a file system is 128K, the
+ * value returned will be 18. However a value 18 can mean
+ * that the maximum attribute size can be anywhere from
+ * (256KB - 1) to 128KB. As a special case, the resource
+ * fork can have much larger size, and some file system
+ * specific extended attributes can have smaller and preset
+ * size; for example, Finder Info is always 32 bytes.
+ */
+ memset(&vfa, 0, sizeof(vfa));
+ VFSATTR_INIT(&vfa);
+ VFSATTR_WANTED(&vfa, f_capabilities);
+ if (vfs_getattr(vnode_mount(vp), &vfa, ctx) == 0 &&
+ (VFSATTR_IS_SUPPORTED(&vfa, f_capabilities)) &&
+ (vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
+ (vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
+ /* Supports native extended attributes */
+ error = VNOP_PATHCONF(vp, name, retval, ctx);
+ } else {
+ /* Number of bits used to represent the maximum size of
+ * extended attribute stored in an Apple Double file.
+ */
+ *retval = AD_XATTR_SIZE_BITS;
+ }
+ break;
default:
error = VNOP_PATHCONF(vp, name, retval, ctx);
break;
static int
vn_kqfilt_add(struct fileproc *fp, struct knote *kn, vfs_context_t ctx)
{
- struct vnode *vp = (struct vnode *)fp->f_fglob->fg_data;
int error;
- int funnel_state;
+ struct vnode *vp;
- if ( (error = vnode_getwithref(vp)) == 0 ) {
+ vp = (struct vnode *)fp->f_fglob->fg_data;
+
+ /*
+ * Don't attach a knote to a dead vnode.
+ */
+ if ((error = vget_internal(vp, 0, VNODE_NODEAD)) == 0) {
+ switch (kn->kn_filter) {
+ case EVFILT_READ:
+ case EVFILT_WRITE:
+ if (vnode_isfifo(vp)) {
+ /* We'll only watch FIFOs that use our fifofs */
+ if (!(vp->v_fifoinfo && vp->v_fifoinfo->fi_readsock)) {
+ error = ENOTSUP;
+ }
+
+ } else if (!vnode_isreg(vp)) {
+ if (vnode_ischr(vp) &&
+ (error = spec_kqfilter(vp, kn)) == 0) {
+ /* claimed by a special device */
+ vnode_put(vp);
+ return 0;
+ }
+
+ error = EINVAL;
+ }
+ break;
+ case EVFILT_VNODE:
+ break;
+ default:
+ error = EINVAL;
+ }
+
+ if (error) {
+ vnode_put(vp);
+ return error;
+ }
#if CONFIG_MACF
error = mac_vnode_check_kqfilter(ctx, fp->f_fglob->fg_cred, kn, vp);
if (error) {
- (void)vnode_put(vp);
- return (error);
+ vnode_put(vp);
+ return error;
}
#endif
- funnel_state = thread_funnel_set(kernel_flock, TRUE);
- error = VNOP_KQFILT_ADD(vp, kn, ctx);
- thread_funnel_set(kernel_flock, funnel_state);
+ kn->kn_hook = (void*)vp;
+ kn->kn_hookid = vnode_vid(vp);
+ kn->kn_fop = &vnode_filtops;
- (void)vnode_put(vp);
+ vnode_lock(vp);
+ KNOTE_ATTACH(&vp->v_knotes, kn);
+ vnode_unlock(vp);
+
+ /* Ask the filesystem to provide remove notifications, but ignore failure */
+ VNOP_MONITOR(vp, 0, VNODE_MONITOR_BEGIN, (void*) kn, ctx);
+
+ vnode_put(vp);
}
+
return (error);
}
-#if 0
-/* No one calls this yet. */
-static int
-vn_kqfilt_remove(vp, ident, ctx)
- struct vnode *vp;
- uintptr_t ident;
- vfs_context_t ctx;
+static void
+filt_vndetach(struct knote *kn)
{
- int error;
- int funnel_state;
+ vfs_context_t ctx = vfs_context_current();
+ struct vnode *vp;
+ vp = (struct vnode *)kn->kn_hook;
+ if (vnode_getwithvid(vp, kn->kn_hookid))
+ return;
+
+ vnode_lock(vp);
+ KNOTE_DETACH(&vp->v_knotes, kn);
+ vnode_unlock(vp);
- if ( (error = vnode_getwithref(vp)) == 0 ) {
+ /*
+ * Tell a (generally networked) filesystem that we're no longer watching
+ * If the FS wants to track contexts, it should still be using the one from
+ * the VNODE_MONITOR_BEGIN.
+ */
+ VNOP_MONITOR(vp, 0, VNODE_MONITOR_END, (void*)kn, ctx);
+ vnode_put(vp);
+}
- funnel_state = thread_funnel_set(kernel_flock, TRUE);
- error = VNOP_KQFILT_REMOVE(vp, ident, ctx);
- thread_funnel_set(kernel_flock, funnel_state);
- (void)vnode_put(vp);
+/*
+ * Used for EVFILT_READ
+ *
+ * Takes only VFIFO or VREG. vnode is locked. We handle the "poll" case
+ * differently than the regular case for VREG files. If not in poll(),
+ * then we need to know current fileproc offset for VREG.
+ */
+static intptr_t
+vnode_readable_data_count(vnode_t vp, off_t current_offset, int ispoll)
+{
+ if (vnode_isfifo(vp)) {
+ int cnt;
+ int err = fifo_charcount(vp, &cnt);
+ if (err == 0) {
+ return (intptr_t)cnt;
+ } else {
+ return (intptr_t)0;
+ }
+ } else if (vnode_isreg(vp)) {
+ if (ispoll) {
+ return (intptr_t)1;
+ }
+
+ off_t amount;
+ amount = vp->v_un.vu_ubcinfo->ui_size - current_offset;
+ if (amount > (off_t)INTPTR_MAX) {
+ return INTPTR_MAX;
+ } else if (amount < (off_t)INTPTR_MIN) {
+ return INTPTR_MIN;
+ } else {
+ return (intptr_t)amount;
+ }
+ } else {
+ panic("Should never have an EVFILT_READ except for reg or fifo.");
+ return 0;
}
- return (error);
}
-#endif
+
+/*
+ * Used for EVFILT_WRITE.
+ *
+ * For regular vnodes, we can always write (1). For named pipes,
+ * see how much space there is in the buffer. Nothing else is covered.
+ */
+static intptr_t
+vnode_writable_space_count(vnode_t vp)
+{
+ if (vnode_isfifo(vp)) {
+ long spc;
+ int err = fifo_freespace(vp, &spc);
+ if (err == 0) {
+ return (intptr_t)spc;
+ } else {
+ return (intptr_t)0;
+ }
+ } else if (vnode_isreg(vp)) {
+ return (intptr_t)1;
+ } else {
+ panic("Should never have an EVFILT_READ except for reg or fifo.");
+ return 0;
+ }
+}
+
+/*
+ * Determine whether this knote should be active
+ *
+ * This is kind of subtle.
+ * --First, notice if the vnode has been revoked: in so, override hint
+ * --EVFILT_READ knotes are checked no matter what the hint is
+ * --Other knotes activate based on hint.
+ * --If hint is revoke, set special flags and activate
+ */
+static int
+filt_vnode(struct knote *kn, long hint)
+{
+ vnode_t vp = (struct vnode *)kn->kn_hook;
+ int activate = 0;
+ long orig_hint = hint;
+
+ if (0 == hint) {
+ vnode_lock(vp);
+
+ if (vnode_getiocount(vp, kn->kn_hookid, VNODE_NODEAD | VNODE_WITHID) != 0) {
+ /* Is recycled */
+ hint = NOTE_REVOKE;
+ }
+ } else {
+ lck_mtx_assert(&vp->v_lock, LCK_MTX_ASSERT_OWNED);
+ }
+
+ /* Special handling for vnodes that are in recycle or already gone */
+ if (NOTE_REVOKE == hint) {
+ kn->kn_flags |= (EV_EOF | EV_ONESHOT);
+ activate = 1;
+
+ if ((kn->kn_filter == EVFILT_VNODE) && (kn->kn_sfflags & NOTE_REVOKE)) {
+ kn->kn_fflags |= NOTE_REVOKE;
+ }
+ } else {
+ switch(kn->kn_filter) {
+ case EVFILT_READ:
+ kn->kn_data = vnode_readable_data_count(vp, kn->kn_fp->f_fglob->fg_offset, (kn->kn_flags & EV_POLL));
+
+ if (kn->kn_data != 0) {
+ activate = 1;
+ }
+ break;
+ case EVFILT_WRITE:
+ kn->kn_data = vnode_writable_space_count(vp);
+
+ if (kn->kn_data != 0) {
+ activate = 1;
+ }
+ break;
+ case EVFILT_VNODE:
+ /* Check events this note matches against the hint */
+ if (kn->kn_sfflags & hint) {
+ kn->kn_fflags |= hint; /* Set which event occurred */
+ }
+ if (kn->kn_fflags != 0) {
+ activate = 1;
+ }
+ break;
+ default:
+ panic("Invalid knote filter on a vnode!\n");
+ }
+ }
+
+ if (orig_hint == 0) {
+ /*
+ * Definitely need to unlock, may need to put
+ */
+ if (hint == 0) {
+ vnode_put_locked(vp);
+ }
+ vnode_unlock(vp);
+ }
+
+ return (activate);
+}