* are met.
*/
/*
- * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2003-2007 Apple Inc. All rights reserved.
*
- * @APPLE_LICENSE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License"). You may not use this file except in compliance with the
- * License. Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
*
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
*
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+ * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
+ * support for mandatory and extensible security protections. This notice
+ * is included in support of clause 2.2 (b) of the Apple Public License,
+ * Version 2.0.
*/
/*
#include <sys/sysproto.h>
#include <sys/proc_info.h>
-#include <bsm/audit_kernel.h>
+#include <security/audit/audit.h>
#include <sys/kdebug.h>
#endif
-
/*
* interfaces to the outside world
*/
static int pipe_read(struct fileproc *fp, struct uio *uio,
- kauth_cred_t cred, int flags, struct proc *p);
+ int flags, vfs_context_t ctx);
static int pipe_write(struct fileproc *fp, struct uio *uio,
- kauth_cred_t cred, int flags, struct proc *p);
+ int flags, vfs_context_t ctx);
+
+static int pipe_close(struct fileglob *fg, vfs_context_t ctx);
-static int pipe_close(struct fileglob *fg, struct proc *p);
+static int pipe_select(struct fileproc *fp, int which, void * wql,
+ vfs_context_t ctx);
-static int pipe_select(struct fileproc *fp, int which, void * wql, struct proc *p);
+static int pipe_kqfilter(struct fileproc *fp, struct knote *kn,
+ vfs_context_t ctx);
-static int pipe_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p);
+static int pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data,
+ vfs_context_t ctx);
-static int pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, struct proc *p);
+static int pipe_drain(struct fileproc *fp,vfs_context_t ctx);
struct fileops pipeops =
pipe_select,
pipe_close,
pipe_kqfilter,
- 0 };
+ pipe_drain };
static void filt_pipedetach(struct knote *kn);
static int filt_piperead(struct knote *kn, long hint);
static int filt_pipewrite(struct knote *kn, long hint);
-static struct filterops pipe_rfiltops =
- { 1, NULL, filt_pipedetach, filt_piperead };
-static struct filterops pipe_wfiltops =
- { 1, NULL, filt_pipedetach, filt_pipewrite };
+static struct filterops pipe_rfiltops = {
+ .f_isfd = 1,
+ .f_detach = filt_pipedetach,
+ .f_event = filt_piperead,
+};
+static struct filterops pipe_wfiltops = {
+ .f_isfd = 1,
+ .f_detach = filt_pipedetach,
+ .f_event = filt_pipewrite,
+};
/*
* Default pipe buffer size(s), this can be kind-of large now because pipe
#if PIPE_SYSCTLS
SYSCTL_DECL(_kern_ipc);
-SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RD,
+SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RD|CTLFLAG_LOCKED,
&maxpipekva, 0, "Pipe KVA limit");
-SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW,
+SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW|CTLFLAG_LOCKED,
&maxpipekvawired, 0, "Pipe KVA wired limit");
-SYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD,
+SYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD|CTLFLAG_LOCKED,
&amountpipes, 0, "Current # of pipes");
-SYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD,
+SYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD|CTLFLAG_LOCKED,
&nbigpipe, 0, "Current # of big pipes");
-SYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD,
+SYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD|CTLFLAG_LOCKED,
&amountpipekva, 0, "Pipe KVA usage");
-SYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD,
+SYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD|CTLFLAG_LOCKED,
&amountpipekvawired, 0, "Pipe wired KVA usage");
#endif
-void pipeinit(void *dummy __unused);
static void pipeclose(struct pipe *cpipe);
static void pipe_free_kmem(struct pipe *cpipe);
static int pipe_create(struct pipe **cpipep);
static zone_t pipe_zone;
+#define PIPE_GARBAGE_AGE_LIMIT 5000 /* In milliseconds */
+#define PIPE_GARBAGE_QUEUE_LIMIT 32000
+
+struct pipe_garbage {
+ struct pipe *pg_pipe;
+ struct pipe_garbage *pg_next;
+ uint64_t pg_timestamp;
+};
+
+static zone_t pipe_garbage_zone;
+static struct pipe_garbage *pipe_garbage_head = NULL;
+static struct pipe_garbage *pipe_garbage_tail = NULL;
+static uint64_t pipe_garbage_age_limit = PIPE_GARBAGE_AGE_LIMIT;
+static int pipe_garbage_count = 0;
+static lck_mtx_t *pipe_garbage_lock;
+
SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL);
void
-pipeinit(void *dummy __unused)
+pipeinit(void)
{
- pipe_zone = (zone_t)zinit(sizeof(struct pipe), 8192 * sizeof(struct pipe), 4096, "pipe zone");
+ vm_size_t zone_size;
+
+ zone_size = 8192 * sizeof(struct pipe);
+ pipe_zone = zinit(sizeof(struct pipe), zone_size, 4096, "pipe zone");
/*
* allocate lock group attribute and group for pipe mutexes
* allocate the lock attribute for pipe mutexes
*/
pipe_mtx_attr = lck_attr_alloc_init();
+
+ /*
+ * Set up garbage collection for dead pipes
+ */
+ zone_size = (PIPE_GARBAGE_QUEUE_LIMIT + 20) *
+ sizeof(struct pipe_garbage);
+ pipe_garbage_zone = (zone_t)zinit(sizeof(struct pipe_garbage),
+ zone_size, 4096, "pipe garbage zone");
+ pipe_garbage_lock = lck_mtx_alloc_init(pipe_mtx_grp, pipe_mtx_attr);
+}
+
+/* Bitmap for things to touch in pipe_touch() */
+#define PIPE_ATIME 0x00000001 /* time of last access */
+#define PIPE_MTIME 0x00000002 /* time of last modification */
+#define PIPE_CTIME 0x00000004 /* time of last status change */
+
+static void
+pipe_touch(struct pipe *tpipe, int touch)
+{
+ struct timeval now;
+
+ microtime(&now);
+
+ if (touch & PIPE_ATIME) {
+ tpipe->st_atimespec.tv_sec = now.tv_sec;
+ tpipe->st_atimespec.tv_nsec = now.tv_usec * 1000;
+ }
+
+ if (touch & PIPE_MTIME) {
+ tpipe->st_mtimespec.tv_sec = now.tv_sec;
+ tpipe->st_mtimespec.tv_nsec = now.tv_usec * 1000;
+ }
+
+ if (touch & PIPE_CTIME) {
+ tpipe->st_ctimespec.tv_sec = now.tv_sec;
+ tpipe->st_ctimespec.tv_nsec = now.tv_usec * 1000;
+ }
}
/* ARGSUSED */
int
-pipe(struct proc *p, __unused struct pipe_args *uap, register_t *retval)
+pipe(proc_t p, __unused struct pipe_args *uap, int32_t *retval)
{
struct fileproc *rf, *wf;
struct pipe *rpipe, *wpipe;
TAILQ_INIT(&rpipe->pipe_evlist);
TAILQ_INIT(&wpipe->pipe_evlist);
- error = falloc(p, &rf, &fd);
+ error = falloc(p, &rf, &fd, vfs_context_current());
if (error) {
goto freepipes;
}
rf->f_data = (caddr_t)rpipe;
rf->f_ops = &pipeops;
- error = falloc(p, &wf, &fd);
+ error = falloc(p, &wf, &fd, vfs_context_current());
if (error) {
fp_free(p, retval[0], rf);
goto freepipes;
wf->f_data = (caddr_t)wpipe;
wf->f_ops = &pipeops;
+ rpipe->pipe_peer = wpipe;
+ wpipe->pipe_peer = rpipe;
+ rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx;
+
retval[1] = fd;
-#ifdef MAC
+#if CONFIG_MACF
/*
* XXXXXXXX SHOULD NOT HOLD FILE_LOCK() XXXXXXXXXXXX
*
* struct pipe represents a pipe endpoint. The MAC label is shared
- * between the connected endpoints. As a result mac_init_pipe() and
- * mac_create_pipe() should only be called on one of the endpoints
+ * between the connected endpoints. As a result mac_pipe_label_init() and
+ * mac_pipe_label_associate() should only be called on one of the endpoints
* after they have been connected.
*/
- mac_init_pipe(rpipe);
- mac_create_pipe(td->td_ucred, rpipe);
+ mac_pipe_label_init(rpipe);
+ mac_pipe_label_associate(kauth_cred_get(), rpipe);
+ wpipe->pipe_label = rpipe->pipe_label;
#endif
- proc_fdlock(p);
- *fdflags(p, retval[0]) &= ~UF_RESERVED;
- *fdflags(p, retval[1]) &= ~UF_RESERVED;
+ proc_fdlock_spin(p);
+ procfdtbl_releasefd(p, retval[0], NULL);
+ procfdtbl_releasefd(p, retval[1], NULL);
fp_drop(p, retval[0], rf, 1);
fp_drop(p, retval[1], wf, 1);
proc_fdunlock(p);
- rpipe->pipe_peer = wpipe;
- wpipe->pipe_peer = rpipe;
-
- rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx;
return (0);
return (error);
}
-
int
-pipe_stat(struct pipe *cpipe, struct stat *ub)
+pipe_stat(struct pipe *cpipe, void *ub, int isstat64)
{
-#ifdef MAC
+#if CONFIG_MACF
int error;
#endif
- struct timeval now;
+ int pipe_size = 0;
+ int pipe_count;
+ struct stat *sb = (struct stat *)0; /* warning avoidance ; protected by isstat64 */
+ struct stat64 * sb64 = (struct stat64 *)0; /* warning avoidance ; protected by isstat64 */
if (cpipe == NULL)
return (EBADF);
-#ifdef MAC
PIPE_LOCK(cpipe);
- error = mac_check_pipe_stat(active_cred, cpipe);
- PIPE_UNLOCK(cpipe);
- if (error)
+
+#if CONFIG_MACF
+ error = mac_pipe_check_stat(kauth_cred_get(), cpipe);
+ if (error) {
+ PIPE_UNLOCK(cpipe);
return (error);
+ }
#endif
if (cpipe->pipe_buffer.buffer == 0) {
/*
* must be stat'ing the write fd
*/
- cpipe = cpipe->pipe_peer;
-
- if (cpipe == NULL)
- return (EBADF);
+ if (cpipe->pipe_peer) {
+ /*
+ * the peer still exists, use it's info
+ */
+ pipe_size = cpipe->pipe_peer->pipe_buffer.size;
+ pipe_count = cpipe->pipe_peer->pipe_buffer.cnt;
+ } else {
+ pipe_count = 0;
+ }
+ } else {
+ pipe_size = cpipe->pipe_buffer.size;
+ pipe_count = cpipe->pipe_buffer.cnt;
}
- bzero(ub, sizeof(*ub));
- ub->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
- ub->st_blksize = cpipe->pipe_buffer.size;
- ub->st_size = cpipe->pipe_buffer.cnt;
- ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize;
- ub->st_nlink = 1;
+ /*
+ * since peer's buffer is setup ouside of lock
+ * we might catch it in transient state
+ */
+ if (pipe_size == 0)
+ pipe_size = PIPE_SIZE;
- ub->st_uid = kauth_getuid();
- ub->st_gid = kauth_getgid();
+ if (isstat64 != 0) {
+ sb64 = (struct stat64 *)ub;
- microtime(&now);
- ub->st_atimespec.tv_sec = now.tv_sec;
- ub->st_atimespec.tv_nsec = now.tv_usec * 1000;
+ bzero(sb64, sizeof(*sb64));
+ sb64->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
+ sb64->st_blksize = pipe_size;
+ sb64->st_size = pipe_count;
+ sb64->st_blocks = (sb64->st_size + sb64->st_blksize - 1) / sb64->st_blksize;
+
+ sb64->st_uid = kauth_getuid();
+ sb64->st_gid = kauth_getgid();
+
+ sb64->st_atimespec.tv_sec = cpipe->st_atimespec.tv_sec;
+ sb64->st_atimespec.tv_nsec = cpipe->st_atimespec.tv_nsec;
+
+ sb64->st_mtimespec.tv_sec = cpipe->st_mtimespec.tv_sec;
+ sb64->st_mtimespec.tv_nsec = cpipe->st_mtimespec.tv_nsec;
- ub->st_mtimespec.tv_sec = now.tv_sec;
- ub->st_mtimespec.tv_nsec = now.tv_usec * 1000;
+ sb64->st_ctimespec.tv_sec = cpipe->st_ctimespec.tv_sec;
+ sb64->st_ctimespec.tv_nsec = cpipe->st_ctimespec.tv_nsec;
- ub->st_ctimespec.tv_sec = now.tv_sec;
- ub->st_ctimespec.tv_nsec = now.tv_usec * 1000;
+ /*
+ * Return a relatively unique inode number based on the current
+ * address of this pipe's struct pipe. This number may be recycled
+ * relatively quickly.
+ */
+ sb64->st_ino = (ino64_t)((uintptr_t)cpipe);
+ } else {
+ sb = (struct stat *)ub;
+
+ bzero(sb, sizeof(*sb));
+ sb->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
+ sb->st_blksize = pipe_size;
+ sb->st_size = pipe_count;
+ sb->st_blocks = (sb->st_size + sb->st_blksize - 1) / sb->st_blksize;
+
+ sb->st_uid = kauth_getuid();
+ sb->st_gid = kauth_getgid();
+
+ sb->st_atimespec.tv_sec = cpipe->st_atimespec.tv_sec;
+ sb->st_atimespec.tv_nsec = cpipe->st_atimespec.tv_nsec;
+
+ sb->st_mtimespec.tv_sec = cpipe->st_mtimespec.tv_sec;
+ sb->st_mtimespec.tv_nsec = cpipe->st_mtimespec.tv_nsec;
+
+ sb->st_ctimespec.tv_sec = cpipe->st_ctimespec.tv_sec;
+ sb->st_ctimespec.tv_nsec = cpipe->st_ctimespec.tv_nsec;
+
+ /*
+ * Return a relatively unique inode number based on the current
+ * address of this pipe's struct pipe. This number may be recycled
+ * relatively quickly.
+ */
+ sb->st_ino = (ino_t)(uintptr_t)cpipe;
+ }
+ PIPE_UNLOCK(cpipe);
/*
- * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen, st_uid, st_gid.
- * XXX (st_dev, st_ino) should be unique.
+ * POSIX: Left as 0: st_dev, st_nlink, st_rdev, st_flags, st_gen,
+ * st_uid, st_gid.
+ *
+ * XXX (st_dev) should be unique, but there is no device driver that
+ * XXX is associated with pipes, since they are implemented via a
+ * XXX struct fileops indirection rather than as FS objects.
*/
return (0);
}
cpipe->pipe_buffer.out = 0;
cpipe->pipe_buffer.cnt = 0;
- OSAddAtomic(1, (SInt32 *)&amountpipes);
- OSAddAtomic(cpipe->pipe_buffer.size, (SInt32 *)&amountpipekva);
+ OSAddAtomic(1, &amountpipes);
+ OSAddAtomic(cpipe->pipe_buffer.size, &amountpipekva);
return (0);
}
*/
bzero(cpipe, sizeof *cpipe);
+ /* Initial times are all the time of creation of the pipe */
+ pipe_touch(cpipe, PIPE_ATIME | PIPE_MTIME | PIPE_CTIME);
+
return (0);
}
/*
* lock a pipe for I/O, blocking other access
*/
-static __inline int
-pipelock(cpipe, catch)
- struct pipe *cpipe;
- int catch;
+static inline int
+pipelock(struct pipe *cpipe, int catch)
{
int error;
/*
* unlock a pipe I/O lock
*/
-static __inline void
-pipeunlock(cpipe)
- struct pipe *cpipe;
+static inline void
+pipeunlock(struct pipe *cpipe)
{
-
cpipe->pipe_state &= ~PIPE_LOCKFL;
if (cpipe->pipe_state & PIPE_LWANT) {
}
static void
-pipeselwakeup(cpipe, spipe)
- struct pipe *cpipe;
- struct pipe *spipe;
+pipeselwakeup(struct pipe *cpipe, struct pipe *spipe)
{
-
if (cpipe->pipe_state & PIPE_SEL) {
cpipe->pipe_state &= ~PIPE_SEL;
selwakeup(&cpipe->pipe_sel);
postpipeevent(cpipe, EV_RWBYTES);
if (spipe && (spipe->pipe_state & PIPE_ASYNC) && spipe->pipe_pgid) {
- struct proc *p;
-
if (spipe->pipe_pgid < 0)
gsignal(-spipe->pipe_pgid, SIGIO);
- else if ((p = pfind(spipe->pipe_pgid)) != (struct proc *)0)
- psignal(p, SIGIO);
+ else
+ proc_signal(spipe->pipe_pgid, SIGIO);
}
}
/* ARGSUSED */
static int
-pipe_read(struct fileproc *fp, struct uio *uio, __unused kauth_cred_t active_cred, __unused int flags, __unused struct proc *p)
+pipe_read(struct fileproc *fp, struct uio *uio, __unused int flags,
+ __unused vfs_context_t ctx)
{
struct pipe *rpipe = (struct pipe *)fp->f_data;
int error;
if (error)
goto unlocked_error;
-#ifdef MAC
- error = mac_check_pipe_read(active_cred, rpipe);
+#if CONFIG_MACF
+ error = mac_pipe_check_read(kauth_cred_get(), rpipe);
if (error)
goto locked_error;
#endif
* detect EOF condition
* read returns 0 on EOF, no need to set error
*/
- if (rpipe->pipe_state & PIPE_EOF)
+ if (rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) {
break;
+ }
/*
* If the "write-side" has been blocked, wake it up now.
goto unlocked_error;
}
}
-#ifdef MAC
+#if CONFIG_MACF
locked_error:
#endif
pipeunlock(rpipe);
if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF)
pipeselwakeup(rpipe, rpipe->pipe_peer);
+ /* update last read time */
+ pipe_touch(rpipe, PIPE_ATIME);
+
PIPE_UNLOCK(rpipe);
return (error);
PRIBIO | PCATCH, "pipdww", 0);
if (error)
goto error1;
- if (wpipe->pipe_state & PIPE_EOF) {
+ if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) {
error = EPIPE;
goto error1;
}
PRIBIO | PCATCH, "pipdwc", 0);
if (error)
goto error1;
- if (wpipe->pipe_state & PIPE_EOF) {
+ if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) {
error = EPIPE;
goto error1;
}
error = 0;
while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) {
- if (wpipe->pipe_state & PIPE_EOF) {
+ if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) {
pipelock(wpipe, 0);
PIPE_UNLOCK(wpipe);
pipe_destroy_write_buffer(wpipe);
static int
-pipe_write(struct fileproc *fp, struct uio *uio, __unused kauth_cred_t active_cred, __unused int flags, __unused struct proc *p)
+pipe_write(struct fileproc *fp, struct uio *uio, __unused int flags,
+ __unused vfs_context_t ctx)
{
int error = 0;
int orig_resid;
/*
* detect loss of pipe read side, issue SIGPIPE if lost.
*/
- if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF)) {
+ if (wpipe == NULL || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
PIPE_UNLOCK(rpipe);
return (EPIPE);
}
-#ifdef MAC
- error = mac_check_pipe_write(active_cred, wpipe);
+#if CONFIG_MACF
+ error = mac_pipe_check_write(kauth_cred_get(), wpipe);
if (error) {
PIPE_UNLOCK(rpipe);
return (error);
if ((error = pipelock(wpipe, 1)) == 0) {
PIPE_UNLOCK(wpipe);
if (pipespace(wpipe, pipe_size) == 0)
- OSAddAtomic(1, (SInt32 *)&nbigpipe);
+ OSAddAtomic(1, &nbigpipe);
PIPE_LOCK(wpipe);
pipeunlock(wpipe);
*/
if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) &&
(fp->f_flag & FNONBLOCK) == 0 &&
- amountpipekvawired + uio->uio_resid < maxpipekvawired) {
+ amountpipekvawired + uio_resid(uio) < maxpipekvawired) {
error = pipe_direct_write(wpipe, uio);
if (error)
break;
}
error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, "pipbww", 0);
- if (wpipe->pipe_state & PIPE_EOF)
+ if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))
break;
if (error)
break;
int size; /* Transfer size */
int segsize; /* first segment to transfer */
- if (wpipe->pipe_state & PIPE_EOF) {
+ if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) {
pipeunlock(wpipe);
error = EPIPE;
break;
error = EAGAIN;
break;
}
+
+ /*
+ * If read side wants to go away, we just issue a signal
+ * to ourselves.
+ */
+ if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) {
+ error = EPIPE;
+ break;
+ }
+
/*
* We have no more space and have something to offer,
* wake up select/poll.
if (error != 0)
break;
- /*
- * If read side wants to go away, we just issue a signal
- * to ourselves.
- */
- if (wpipe->pipe_state & PIPE_EOF) {
- error = EPIPE;
- break;
- }
}
}
--wpipe->pipe_busy;
*/
pipeselwakeup(wpipe, wpipe);
}
+
+ /* Update modification, status change (# of bytes in pipe) times */
+ pipe_touch(rpipe, PIPE_MTIME | PIPE_CTIME);
+ pipe_touch(wpipe, PIPE_MTIME | PIPE_CTIME);
PIPE_UNLOCK(rpipe);
return (error);
*/
/* ARGSUSED 3 */
static int
-pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, __unused struct proc *p)
+pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data,
+ __unused vfs_context_t ctx)
{
struct pipe *mpipe = (struct pipe *)fp->f_data;
-#ifdef MAC
+#if CONFIG_MACF
int error;
#endif
PIPE_LOCK(mpipe);
-#ifdef MAC
- error = mac_check_pipe_ioctl(active_cred, mpipe, cmd, data);
+#if CONFIG_MACF
+ error = mac_pipe_check_ioctl(kauth_cred_get(), mpipe, cmd);
if (error) {
PIPE_UNLOCK(mpipe);
static int
-pipe_select(struct fileproc *fp, int which, void *wql, struct proc *p)
+pipe_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
{
struct pipe *rpipe = (struct pipe *)fp->f_data;
struct pipe *wpipe;
wpipe = rpipe->pipe_peer;
+#if CONFIG_MACF
+ /*
+ * XXX We should use a per thread credential here; minimally, the
+ * XXX process credential should have a persistent reference on it
+ * XXX before being passed in here.
+ */
+ if (mac_pipe_check_select(vfs_context_ucred(ctx), rpipe, which)) {
+ PIPE_UNLOCK(rpipe);
+ return (0);
+ }
+#endif
switch (which) {
case FREAD:
if ((rpipe->pipe_state & PIPE_DIRECTW) ||
(rpipe->pipe_buffer.cnt > 0) ||
- (rpipe->pipe_state & PIPE_EOF)) {
+ (rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
retnum = 1;
} else {
rpipe->pipe_state |= PIPE_SEL;
- selrecord(p, &rpipe->pipe_sel, wql);
+ selrecord(vfs_context_proc(ctx), &rpipe->pipe_sel, wql);
}
break;
case FWRITE:
- if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) ||
+ if (wpipe)
+ wpipe->pipe_state |= PIPE_WSELECT;
+ if (wpipe == NULL || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) ||
(((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
(wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) {
retnum = 1;
} else {
wpipe->pipe_state |= PIPE_SEL;
- selrecord(p, &wpipe->pipe_sel, wql);
+ selrecord(vfs_context_proc(ctx), &wpipe->pipe_sel, wql);
}
break;
case 0:
rpipe->pipe_state |= PIPE_SEL;
- selrecord(p, &rpipe->pipe_sel, wql);
+ selrecord(vfs_context_proc(ctx), &rpipe->pipe_sel, wql);
break;
}
PIPE_UNLOCK(rpipe);
/* ARGSUSED 1 */
static int
-pipe_close(struct fileglob *fg, __unused struct proc *p)
+pipe_close(struct fileglob *fg, __unused vfs_context_t ctx)
{
struct pipe *cpipe;
- proc_fdlock(p);
+ proc_fdlock_spin(vfs_context_proc(ctx));
cpipe = (struct pipe *)fg->fg_data;
fg->fg_data = NULL;
- proc_fdunlock(p);
+ proc_fdunlock(vfs_context_proc(ctx));
if (cpipe)
pipeclose(cpipe);
if (cpipe->pipe_buffer.buffer != NULL) {
if (cpipe->pipe_buffer.size > PIPE_SIZE)
- OSAddAtomic(-1, (SInt32 *)&nbigpipe);
- OSAddAtomic(-(cpipe->pipe_buffer.size), (SInt32 *)&amountpipekva);
- OSAddAtomic(-1, (SInt32 *)&amountpipes);
+ OSAddAtomic(-1, &nbigpipe);
+ OSAddAtomic(-(cpipe->pipe_buffer.size), &amountpipekva);
+ OSAddAtomic(-1, &amountpipes);
kmem_free(kernel_map, (vm_offset_t)cpipe->pipe_buffer.buffer,
cpipe->pipe_buffer.size);
#endif
}
+/*
+ * When a thread sets a write-select on a pipe, it creates an implicit,
+ * untracked dependency between that thread and the peer of the pipe
+ * on which the select is set. If the peer pipe is closed and freed
+ * before the select()ing thread wakes up, the system will panic as
+ * it attempts to unwind the dangling select(). To avoid that panic,
+ * we notice whenever a dangerous select() is set on a pipe, and
+ * defer the final deletion of the pipe until that select()s are all
+ * resolved. Since we can't currently detect exactly when that
+ * resolution happens, we use a simple garbage collection queue to
+ * reap the at-risk pipes 'later'.
+ */
+static void
+pipe_garbage_collect(struct pipe *cpipe)
+{
+ uint64_t old, now;
+ struct pipe_garbage *pgp;
+
+ /* Convert msecs to nsecs and then to abstime */
+ old = pipe_garbage_age_limit * 1000000;
+ nanoseconds_to_absolutetime(old, &old);
+
+ lck_mtx_lock(pipe_garbage_lock);
+
+ /* Free anything that's been on the queue for <mumble> seconds */
+ now = mach_absolute_time();
+ old = now - old;
+ while ((pgp = pipe_garbage_head) && pgp->pg_timestamp < old) {
+ pipe_garbage_head = pgp->pg_next;
+ if (pipe_garbage_head == NULL)
+ pipe_garbage_tail = NULL;
+ pipe_garbage_count--;
+ zfree(pipe_zone, pgp->pg_pipe);
+ zfree(pipe_garbage_zone, pgp);
+ }
+
+ /* Add the new pipe (if any) to the tail of the garbage queue */
+ if (cpipe) {
+ cpipe->pipe_state = PIPE_DEAD;
+ pgp = (struct pipe_garbage *)zalloc(pipe_garbage_zone);
+ if (pgp == NULL) {
+ /*
+ * We're too low on memory to garbage collect the
+ * pipe. Freeing it runs the risk of panicing the
+ * system. All we can do is leak it and leave
+ * a breadcrumb behind. The good news, such as it
+ * is, is that this will probably never happen.
+ * We will probably hit the panic below first.
+ */
+ printf("Leaking pipe %p - no room left in the queue",
+ cpipe);
+ lck_mtx_unlock(pipe_garbage_lock);
+ return;
+ }
+
+ pgp->pg_pipe = cpipe;
+ pgp->pg_timestamp = now;
+ pgp->pg_next = NULL;
+
+ if (pipe_garbage_tail)
+ pipe_garbage_tail->pg_next = pgp;
+ pipe_garbage_tail = pgp;
+ if (pipe_garbage_head == NULL)
+ pipe_garbage_head = pipe_garbage_tail;
+
+ if (pipe_garbage_count++ >= PIPE_GARBAGE_QUEUE_LIMIT)
+ panic("Length of pipe garbage queue exceeded %d",
+ PIPE_GARBAGE_QUEUE_LIMIT);
+ }
+ lck_mtx_unlock(pipe_garbage_lock);
+}
+
/*
* shutdown the pipe
*/
if (PIPE_MTX(cpipe) != NULL)
PIPE_LOCK(cpipe);
- pipeselwakeup(cpipe, cpipe);
/*
* If the other side is blocked, wake it up saying that
* we want to close it down.
*/
+ cpipe->pipe_state &= ~PIPE_DRAIN;
+ cpipe->pipe_state |= PIPE_EOF;
+ pipeselwakeup(cpipe, cpipe);
+
while (cpipe->pipe_busy) {
- cpipe->pipe_state |= PIPE_WANT | PIPE_EOF;
+ cpipe->pipe_state |= PIPE_WANT;
wakeup(cpipe);
-
msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0);
}
-#ifdef MAC
+#if CONFIG_MACF
+ /*
+ * Free the shared pipe label only after the two ends are disconnected.
+ */
if (cpipe->pipe_label != NULL && cpipe->pipe_peer == NULL)
- mac_destroy_pipe(cpipe);
+ mac_pipe_label_destroy(cpipe);
#endif
/*
*/
if ((ppipe = cpipe->pipe_peer) != NULL) {
+ ppipe->pipe_state &= ~(PIPE_DRAIN);
ppipe->pipe_state |= PIPE_EOF;
pipeselwakeup(ppipe, ppipe);
}
}
pipe_free_kmem(cpipe);
-
- zfree(pipe_zone, cpipe);
+ if (cpipe->pipe_state & PIPE_WSELECT) {
+ pipe_garbage_collect(cpipe);
+ } else {
+ zfree(pipe_zone, cpipe);
+ pipe_garbage_collect(NULL);
+ }
}
-
/*ARGSUSED*/
static int
-pipe_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct proc *p)
+pipe_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused vfs_context_t ctx)
{
struct pipe *cpipe;
cpipe = (struct pipe *)kn->kn_fp->f_data;
PIPE_LOCK(cpipe);
+#if CONFIG_MACF
+ /*
+ * XXX We should use a per thread credential here; minimally, the
+ * XXX process credential should have a persistent reference on it
+ * XXX before being passed in here.
+ */
+ if (mac_pipe_check_kqfilter(vfs_context_ucred(ctx), kn, cpipe) != 0) {
+ PIPE_UNLOCK(cpipe);
+ return (1);
+ }
+#endif
switch (kn->kn_filter) {
case EVFILT_READ:
kn->kn_fop = &pipe_rfiltops;
+
break;
case EVFILT_WRITE:
kn->kn_fop = &pipe_wfiltops;
PIPE_UNLOCK(cpipe);
return (EPIPE);
}
+ if (cpipe->pipe_peer)
cpipe = cpipe->pipe_peer;
break;
default:
if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW))
kn->kn_data = rpipe->pipe_map.cnt;
#endif
- if ((rpipe->pipe_state & PIPE_EOF) ||
- (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
+ if ((rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) ||
+ (wpipe == NULL) || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
kn->kn_flags |= EV_EOF;
retval = 1;
- } else
- retval = (kn->kn_sfflags & NOTE_LOWAT) ?
- (kn->kn_data >= kn->kn_sdata) : (kn->kn_data > 0);
+ } else {
+ int64_t lowwat = 1;
+ if (kn->kn_sfflags & NOTE_LOWAT) {
+ if (rpipe->pipe_buffer.size && kn->kn_sdata > rpipe->pipe_buffer.size)
+ lowwat = rpipe->pipe_buffer.size;
+ else if (kn->kn_sdata > lowwat)
+ lowwat = kn->kn_sdata;
+ }
+ retval = kn->kn_data >= lowwat;
+ }
if (hint == 0)
PIPE_UNLOCK(rpipe);
wpipe = rpipe->pipe_peer;
- if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
+ if ((wpipe == NULL) || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
kn->kn_data = 0;
kn->kn_flags |= EV_EOF;
return (1);
}
kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
+ if (!kn->kn_data && wpipe->pipe_buffer.size == 0)
+ kn->kn_data = PIPE_BUF; /* unwritten pipe is ready for write */
#ifndef PIPE_NODIRECT
if (wpipe->pipe_state & PIPE_DIRECTW)
kn->kn_data = 0;
#endif
+ int64_t lowwat = PIPE_BUF;
+ if (kn->kn_sfflags & NOTE_LOWAT) {
+ if (wpipe->pipe_buffer.size && kn->kn_sdata > wpipe->pipe_buffer.size)
+ lowwat = wpipe->pipe_buffer.size;
+ else if (kn->kn_sdata > lowwat)
+ lowwat = kn->kn_sdata;
+ }
+
if (hint == 0)
PIPE_UNLOCK(rpipe);
- return (kn->kn_data >= ((kn->kn_sfflags & NOTE_LOWAT) ?
- kn->kn_sdata : PIPE_BUF));
+ return (kn->kn_data >= lowwat);
}
int
fill_pipeinfo(struct pipe * cpipe, struct pipe_info * pinfo)
{
-#ifdef MAC
+#if CONFIG_MACF
int error;
#endif
struct timeval now;
- struct stat * ub;
+ struct vinfo_stat * ub;
+ int pipe_size = 0;
+ int pipe_count;
if (cpipe == NULL)
return (EBADF);
-#ifdef MAC
PIPE_LOCK(cpipe);
- error = mac_check_pipe_stat(active_cred, cpipe);
- PIPE_UNLOCK(cpipe);
- if (error)
+
+#if CONFIG_MACF
+ error = mac_pipe_check_stat(kauth_cred_get(), cpipe);
+ if (error) {
+ PIPE_UNLOCK(cpipe);
return (error);
+ }
#endif
if (cpipe->pipe_buffer.buffer == 0) {
/*
* must be stat'ing the write fd
*/
- cpipe = cpipe->pipe_peer;
-
- if (cpipe == NULL)
- return (EBADF);
+ if (cpipe->pipe_peer) {
+ /*
+ * the peer still exists, use it's info
+ */
+ pipe_size = cpipe->pipe_peer->pipe_buffer.size;
+ pipe_count = cpipe->pipe_peer->pipe_buffer.cnt;
+ } else {
+ pipe_count = 0;
+ }
+ } else {
+ pipe_size = cpipe->pipe_buffer.size;
+ pipe_count = cpipe->pipe_buffer.cnt;
}
+ /*
+ * since peer's buffer is setup ouside of lock
+ * we might catch it in transient state
+ */
+ if (pipe_size == 0)
+ pipe_size = PIPE_SIZE;
ub = &pinfo->pipe_stat;
bzero(ub, sizeof(*ub));
- ub->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
- ub->st_blksize = cpipe->pipe_buffer.size;
- ub->st_size = cpipe->pipe_buffer.cnt;
- if (ub->st_blksize != 0);
- ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize;
- ub->st_nlink = 1;
+ ub->vst_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
+ ub->vst_blksize = pipe_size;
+ ub->vst_size = pipe_count;
+ if (ub->vst_blksize != 0)
+ ub->vst_blocks = (ub->vst_size + ub->vst_blksize - 1) / ub->vst_blksize;
+ ub->vst_nlink = 1;
- ub->st_uid = kauth_getuid();
- ub->st_gid = kauth_getgid();
+ ub->vst_uid = kauth_getuid();
+ ub->vst_gid = kauth_getgid();
microtime(&now);
- ub->st_atimespec.tv_sec = now.tv_sec;
- ub->st_atimespec.tv_nsec = now.tv_usec * 1000;
+ ub->vst_atime = now.tv_sec;
+ ub->vst_atimensec = now.tv_usec * 1000;
- ub->st_mtimespec.tv_sec = now.tv_sec;
- ub->st_mtimespec.tv_nsec = now.tv_usec * 1000;
+ ub->vst_mtime = now.tv_sec;
+ ub->vst_mtimensec = now.tv_usec * 1000;
- ub->st_ctimespec.tv_sec = now.tv_sec;
- ub->st_ctimespec.tv_nsec = now.tv_usec * 1000;
+ ub->vst_ctime = now.tv_sec;
+ ub->vst_ctimensec = now.tv_usec * 1000;
/*
* Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen, st_uid, st_gid.
pinfo->pipe_handle = (uint64_t)((uintptr_t)cpipe);
pinfo->pipe_peerhandle = (uint64_t)((uintptr_t)(cpipe->pipe_peer));
pinfo->pipe_status = cpipe->pipe_state;
+
+ PIPE_UNLOCK(cpipe);
+
return (0);
}
+
+static int
+pipe_drain(struct fileproc *fp, __unused vfs_context_t ctx)
+{
+
+ /* Note: fdlock already held */
+ struct pipe *ppipe, *cpipe = (struct pipe *)(fp->f_fglob->fg_data);
+
+ if (cpipe) {
+ PIPE_LOCK(cpipe);
+ cpipe->pipe_state |= PIPE_DRAIN;
+ cpipe->pipe_state &= ~(PIPE_WANTR | PIPE_WANTW);
+ wakeup(cpipe);
+
+ /* Must wake up peer: a writer sleeps on the read side */
+ if ((ppipe = cpipe->pipe_peer)) {
+ ppipe->pipe_state |= PIPE_DRAIN;
+ ppipe->pipe_state &= ~(PIPE_WANTR | PIPE_WANTW);
+ wakeup(ppipe);
+ }
+
+ PIPE_UNLOCK(cpipe);
+ return 0;
+ }
+
+ return 1;
+}
+
+
+
+
+