X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/5d5c5d0d5b79ade9a973d55186ffda2638ba2b6e..ebb1b9f42b62218f29061826217bb0f71cd375a6:/bsd/kern/sys_pipe.c diff --git a/bsd/kern/sys_pipe.c b/bsd/kern/sys_pipe.c index 29f6a7202..27f2461b4 100644 --- a/bsd/kern/sys_pipe.c +++ b/bsd/kern/sys_pipe.c @@ -17,33 +17,37 @@ * are met. */ /* - * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2007 Apple Inc. All rights reserved. * - * @APPLE_LICENSE_OSREFERENCE_HEADER_START@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the - * License may not be used to create, or enable the creation or - * redistribution of, unlawful or unlicensed copies of an Apple operating - * system, or to circumvent, violate, or enable the circumvention or - * violation of, any terms of an Apple operating system software license - * agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this - * file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and * limitations under the License. - * - * @APPLE_LICENSE_OSREFERENCE_HEADER_END@ + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* + * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce + * support for mandatory and extensible security protections. This notice + * is included in support of clause 2.2 (b) of the Apple Public License, + * Version 2.0. */ /* @@ -115,7 +119,7 @@ #include #include -#include +#include #include @@ -154,23 +158,27 @@ #endif - /* * interfaces to the outside world */ static int pipe_read(struct fileproc *fp, struct uio *uio, - kauth_cred_t cred, int flags, struct proc *p); + int flags, vfs_context_t ctx); static int pipe_write(struct fileproc *fp, struct uio *uio, - kauth_cred_t cred, int flags, struct proc *p); + int flags, vfs_context_t ctx); -static int pipe_close(struct fileglob *fg, struct proc *p); +static int pipe_close(struct fileglob *fg, vfs_context_t ctx); -static int pipe_select(struct fileproc *fp, int which, void * wql, struct proc *p); +static int pipe_select(struct fileproc *fp, int which, void * wql, + vfs_context_t ctx); -static int pipe_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p); +static int pipe_kqfilter(struct fileproc *fp, struct knote *kn, + vfs_context_t ctx); -static int pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, struct proc *p); +static int pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, + vfs_context_t ctx); + +static int pipe_drain(struct fileproc *fp,vfs_context_t ctx); struct fileops pipeops = @@ -180,17 +188,23 @@ struct fileops pipeops = pipe_select, pipe_close, pipe_kqfilter, - 0 }; + pipe_drain }; static void filt_pipedetach(struct knote *kn); static int filt_piperead(struct knote *kn, long hint); static int filt_pipewrite(struct knote *kn, long hint); -static struct filterops pipe_rfiltops = - { 1, NULL, filt_pipedetach, filt_piperead }; -static struct filterops pipe_wfiltops = - { 1, NULL, filt_pipedetach, filt_pipewrite }; +static struct filterops pipe_rfiltops = { + .f_isfd = 1, + .f_detach = filt_pipedetach, + .f_event = filt_piperead, +}; +static struct filterops pipe_wfiltops = { + .f_isfd = 1, + .f_detach = filt_pipedetach, + .f_event = filt_pipewrite, +}; /* * Default pipe buffer size(s), this can be kind-of large now because pipe @@ -217,21 +231,20 @@ int maxpipekva = 1024 * 1024 * 16; #if PIPE_SYSCTLS SYSCTL_DECL(_kern_ipc); -SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RD, +SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RD|CTLFLAG_LOCKED, &maxpipekva, 0, "Pipe KVA limit"); -SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW, +SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW|CTLFLAG_LOCKED, &maxpipekvawired, 0, "Pipe KVA wired limit"); -SYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD, +SYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD|CTLFLAG_LOCKED, &amountpipes, 0, "Current # of pipes"); -SYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD, +SYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD|CTLFLAG_LOCKED, &nbigpipe, 0, "Current # of big pipes"); -SYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD, +SYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD|CTLFLAG_LOCKED, &amountpipekva, 0, "Pipe KVA usage"); -SYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD, +SYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD|CTLFLAG_LOCKED, &amountpipekvawired, 0, "Pipe wired KVA usage"); #endif -void pipeinit(void *dummy __unused); static void pipeclose(struct pipe *cpipe); static void pipe_free_kmem(struct pipe *cpipe); static int pipe_create(struct pipe **cpipep); @@ -258,12 +271,31 @@ static lck_grp_attr_t *pipe_mtx_grp_attr; static zone_t pipe_zone; +#define PIPE_GARBAGE_AGE_LIMIT 5000 /* In milliseconds */ +#define PIPE_GARBAGE_QUEUE_LIMIT 32000 + +struct pipe_garbage { + struct pipe *pg_pipe; + struct pipe_garbage *pg_next; + uint64_t pg_timestamp; +}; + +static zone_t pipe_garbage_zone; +static struct pipe_garbage *pipe_garbage_head = NULL; +static struct pipe_garbage *pipe_garbage_tail = NULL; +static uint64_t pipe_garbage_age_limit = PIPE_GARBAGE_AGE_LIMIT; +static int pipe_garbage_count = 0; +static lck_mtx_t *pipe_garbage_lock; + SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL); void -pipeinit(void *dummy __unused) +pipeinit(void) { - pipe_zone = (zone_t)zinit(sizeof(struct pipe), 8192 * sizeof(struct pipe), 4096, "pipe zone"); + vm_size_t zone_size; + + zone_size = 8192 * sizeof(struct pipe); + pipe_zone = zinit(sizeof(struct pipe), zone_size, 4096, "pipe zone"); /* * allocate lock group attribute and group for pipe mutexes @@ -275,6 +307,43 @@ pipeinit(void *dummy __unused) * allocate the lock attribute for pipe mutexes */ pipe_mtx_attr = lck_attr_alloc_init(); + + /* + * Set up garbage collection for dead pipes + */ + zone_size = (PIPE_GARBAGE_QUEUE_LIMIT + 20) * + sizeof(struct pipe_garbage); + pipe_garbage_zone = (zone_t)zinit(sizeof(struct pipe_garbage), + zone_size, 4096, "pipe garbage zone"); + pipe_garbage_lock = lck_mtx_alloc_init(pipe_mtx_grp, pipe_mtx_attr); +} + +/* Bitmap for things to touch in pipe_touch() */ +#define PIPE_ATIME 0x00000001 /* time of last access */ +#define PIPE_MTIME 0x00000002 /* time of last modification */ +#define PIPE_CTIME 0x00000004 /* time of last status change */ + +static void +pipe_touch(struct pipe *tpipe, int touch) +{ + struct timeval now; + + microtime(&now); + + if (touch & PIPE_ATIME) { + tpipe->st_atimespec.tv_sec = now.tv_sec; + tpipe->st_atimespec.tv_nsec = now.tv_usec * 1000; + } + + if (touch & PIPE_MTIME) { + tpipe->st_mtimespec.tv_sec = now.tv_sec; + tpipe->st_mtimespec.tv_nsec = now.tv_usec * 1000; + } + + if (touch & PIPE_CTIME) { + tpipe->st_ctimespec.tv_sec = now.tv_sec; + tpipe->st_ctimespec.tv_nsec = now.tv_usec * 1000; + } } @@ -285,7 +354,7 @@ pipeinit(void *dummy __unused) /* ARGSUSED */ int -pipe(struct proc *p, __unused struct pipe_args *uap, register_t *retval) +pipe(proc_t p, __unused struct pipe_args *uap, int32_t *retval) { struct fileproc *rf, *wf; struct pipe *rpipe, *wpipe; @@ -322,7 +391,7 @@ pipe(struct proc *p, __unused struct pipe_args *uap, register_t *retval) TAILQ_INIT(&rpipe->pipe_evlist); TAILQ_INIT(&wpipe->pipe_evlist); - error = falloc(p, &rf, &fd); + error = falloc(p, &rf, &fd, vfs_context_current()); if (error) { goto freepipes; } @@ -338,7 +407,7 @@ pipe(struct proc *p, __unused struct pipe_args *uap, register_t *retval) rf->f_data = (caddr_t)rpipe; rf->f_ops = &pipeops; - error = falloc(p, &wf, &fd); + error = falloc(p, &wf, &fd, vfs_context_current()); if (error) { fp_free(p, retval[0], rf); goto freepipes; @@ -348,30 +417,31 @@ pipe(struct proc *p, __unused struct pipe_args *uap, register_t *retval) wf->f_data = (caddr_t)wpipe; wf->f_ops = &pipeops; + rpipe->pipe_peer = wpipe; + wpipe->pipe_peer = rpipe; + rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx; + retval[1] = fd; -#ifdef MAC +#if CONFIG_MACF /* * XXXXXXXX SHOULD NOT HOLD FILE_LOCK() XXXXXXXXXXXX * * struct pipe represents a pipe endpoint. The MAC label is shared - * between the connected endpoints. As a result mac_init_pipe() and - * mac_create_pipe() should only be called on one of the endpoints + * between the connected endpoints. As a result mac_pipe_label_init() and + * mac_pipe_label_associate() should only be called on one of the endpoints * after they have been connected. */ - mac_init_pipe(rpipe); - mac_create_pipe(td->td_ucred, rpipe); + mac_pipe_label_init(rpipe); + mac_pipe_label_associate(kauth_cred_get(), rpipe); + wpipe->pipe_label = rpipe->pipe_label; #endif - proc_fdlock(p); - *fdflags(p, retval[0]) &= ~UF_RESERVED; - *fdflags(p, retval[1]) &= ~UF_RESERVED; + proc_fdlock_spin(p); + procfdtbl_releasefd(p, retval[0], NULL); + procfdtbl_releasefd(p, retval[1], NULL); fp_drop(p, retval[0], rf, 1); fp_drop(p, retval[1], wf, 1); proc_fdunlock(p); - rpipe->pipe_peer = wpipe; - wpipe->pipe_peer = rpipe; - - rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx; return (0); @@ -383,56 +453,116 @@ freepipes: return (error); } - int -pipe_stat(struct pipe *cpipe, struct stat *ub) +pipe_stat(struct pipe *cpipe, void *ub, int isstat64) { -#ifdef MAC +#if CONFIG_MACF int error; #endif - struct timeval now; + int pipe_size = 0; + int pipe_count; + struct stat *sb = (struct stat *)0; /* warning avoidance ; protected by isstat64 */ + struct stat64 * sb64 = (struct stat64 *)0; /* warning avoidance ; protected by isstat64 */ if (cpipe == NULL) return (EBADF); -#ifdef MAC PIPE_LOCK(cpipe); - error = mac_check_pipe_stat(active_cred, cpipe); - PIPE_UNLOCK(cpipe); - if (error) + +#if CONFIG_MACF + error = mac_pipe_check_stat(kauth_cred_get(), cpipe); + if (error) { + PIPE_UNLOCK(cpipe); return (error); + } #endif if (cpipe->pipe_buffer.buffer == 0) { /* * must be stat'ing the write fd */ - cpipe = cpipe->pipe_peer; - - if (cpipe == NULL) - return (EBADF); + if (cpipe->pipe_peer) { + /* + * the peer still exists, use it's info + */ + pipe_size = cpipe->pipe_peer->pipe_buffer.size; + pipe_count = cpipe->pipe_peer->pipe_buffer.cnt; + } else { + pipe_count = 0; + } + } else { + pipe_size = cpipe->pipe_buffer.size; + pipe_count = cpipe->pipe_buffer.cnt; } - bzero(ub, sizeof(*ub)); - ub->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; - ub->st_blksize = cpipe->pipe_buffer.size; - ub->st_size = cpipe->pipe_buffer.cnt; - ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; - ub->st_nlink = 1; + /* + * since peer's buffer is setup ouside of lock + * we might catch it in transient state + */ + if (pipe_size == 0) + pipe_size = PIPE_SIZE; - ub->st_uid = kauth_getuid(); - ub->st_gid = kauth_getgid(); + if (isstat64 != 0) { + sb64 = (struct stat64 *)ub; - microtime(&now); - ub->st_atimespec.tv_sec = now.tv_sec; - ub->st_atimespec.tv_nsec = now.tv_usec * 1000; + bzero(sb64, sizeof(*sb64)); + sb64->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; + sb64->st_blksize = pipe_size; + sb64->st_size = pipe_count; + sb64->st_blocks = (sb64->st_size + sb64->st_blksize - 1) / sb64->st_blksize; + + sb64->st_uid = kauth_getuid(); + sb64->st_gid = kauth_getgid(); + + sb64->st_atimespec.tv_sec = cpipe->st_atimespec.tv_sec; + sb64->st_atimespec.tv_nsec = cpipe->st_atimespec.tv_nsec; + + sb64->st_mtimespec.tv_sec = cpipe->st_mtimespec.tv_sec; + sb64->st_mtimespec.tv_nsec = cpipe->st_mtimespec.tv_nsec; + + sb64->st_ctimespec.tv_sec = cpipe->st_ctimespec.tv_sec; + sb64->st_ctimespec.tv_nsec = cpipe->st_ctimespec.tv_nsec; + + /* + * Return a relatively unique inode number based on the current + * address of this pipe's struct pipe. This number may be recycled + * relatively quickly. + */ + sb64->st_ino = (ino64_t)((uintptr_t)cpipe); + } else { + sb = (struct stat *)ub; + + bzero(sb, sizeof(*sb)); + sb->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; + sb->st_blksize = pipe_size; + sb->st_size = pipe_count; + sb->st_blocks = (sb->st_size + sb->st_blksize - 1) / sb->st_blksize; + + sb->st_uid = kauth_getuid(); + sb->st_gid = kauth_getgid(); + + sb->st_atimespec.tv_sec = cpipe->st_atimespec.tv_sec; + sb->st_atimespec.tv_nsec = cpipe->st_atimespec.tv_nsec; + + sb->st_mtimespec.tv_sec = cpipe->st_mtimespec.tv_sec; + sb->st_mtimespec.tv_nsec = cpipe->st_mtimespec.tv_nsec; - ub->st_mtimespec.tv_sec = now.tv_sec; - ub->st_mtimespec.tv_nsec = now.tv_usec * 1000; + sb->st_ctimespec.tv_sec = cpipe->st_ctimespec.tv_sec; + sb->st_ctimespec.tv_nsec = cpipe->st_ctimespec.tv_nsec; - ub->st_ctimespec.tv_sec = now.tv_sec; - ub->st_ctimespec.tv_nsec = now.tv_usec * 1000; + /* + * Return a relatively unique inode number based on the current + * address of this pipe's struct pipe. This number may be recycled + * relatively quickly. + */ + sb->st_ino = (ino_t)(uintptr_t)cpipe; + } + PIPE_UNLOCK(cpipe); /* - * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen, st_uid, st_gid. - * XXX (st_dev, st_ino) should be unique. + * POSIX: Left as 0: st_dev, st_nlink, st_rdev, st_flags, st_gen, + * st_uid, st_gid. + * + * XXX (st_dev) should be unique, but there is no device driver that + * XXX is associated with pipes, since they are implemented via a + * XXX struct fileops indirection rather than as FS objects. */ return (0); } @@ -462,8 +592,8 @@ pipespace(struct pipe *cpipe, int size) cpipe->pipe_buffer.out = 0; cpipe->pipe_buffer.cnt = 0; - OSAddAtomic(1, (SInt32 *)&amountpipes); - OSAddAtomic(cpipe->pipe_buffer.size, (SInt32 *)&amountpipekva); + OSAddAtomic(1, &amountpipes); + OSAddAtomic(cpipe->pipe_buffer.size, &amountpipekva); return (0); } @@ -487,6 +617,9 @@ pipe_create(struct pipe **cpipep) */ bzero(cpipe, sizeof *cpipe); + /* Initial times are all the time of creation of the pipe */ + pipe_touch(cpipe, PIPE_ATIME | PIPE_MTIME | PIPE_CTIME); + return (0); } @@ -494,10 +627,8 @@ pipe_create(struct pipe **cpipep) /* * lock a pipe for I/O, blocking other access */ -static __inline int -pipelock(cpipe, catch) - struct pipe *cpipe; - int catch; +static inline int +pipelock(struct pipe *cpipe, int catch) { int error; @@ -517,11 +648,9 @@ pipelock(cpipe, catch) /* * unlock a pipe I/O lock */ -static __inline void -pipeunlock(cpipe) - struct pipe *cpipe; +static inline void +pipeunlock(struct pipe *cpipe) { - cpipe->pipe_state &= ~PIPE_LOCKFL; if (cpipe->pipe_state & PIPE_LWANT) { @@ -531,11 +660,8 @@ pipeunlock(cpipe) } static void -pipeselwakeup(cpipe, spipe) - struct pipe *cpipe; - struct pipe *spipe; +pipeselwakeup(struct pipe *cpipe, struct pipe *spipe) { - if (cpipe->pipe_state & PIPE_SEL) { cpipe->pipe_state &= ~PIPE_SEL; selwakeup(&cpipe->pipe_sel); @@ -546,18 +672,17 @@ pipeselwakeup(cpipe, spipe) postpipeevent(cpipe, EV_RWBYTES); if (spipe && (spipe->pipe_state & PIPE_ASYNC) && spipe->pipe_pgid) { - struct proc *p; - if (spipe->pipe_pgid < 0) gsignal(-spipe->pipe_pgid, SIGIO); - else if ((p = pfind(spipe->pipe_pgid)) != (struct proc *)0) - psignal(p, SIGIO); + else + proc_signal(spipe->pipe_pgid, SIGIO); } } /* ARGSUSED */ static int -pipe_read(struct fileproc *fp, struct uio *uio, __unused kauth_cred_t active_cred, __unused int flags, __unused struct proc *p) +pipe_read(struct fileproc *fp, struct uio *uio, __unused int flags, + __unused vfs_context_t ctx) { struct pipe *rpipe = (struct pipe *)fp->f_data; int error; @@ -571,8 +696,8 @@ pipe_read(struct fileproc *fp, struct uio *uio, __unused kauth_cred_t active_cre if (error) goto unlocked_error; -#ifdef MAC - error = mac_check_pipe_read(active_cred, rpipe); +#if CONFIG_MACF + error = mac_pipe_check_read(kauth_cred_get(), rpipe); if (error) goto locked_error; #endif @@ -644,8 +769,9 @@ pipe_read(struct fileproc *fp, struct uio *uio, __unused kauth_cred_t active_cre * detect EOF condition * read returns 0 on EOF, no need to set error */ - if (rpipe->pipe_state & PIPE_EOF) + if (rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) { break; + } /* * If the "write-side" has been blocked, wake it up now. @@ -686,7 +812,7 @@ pipe_read(struct fileproc *fp, struct uio *uio, __unused kauth_cred_t active_cre goto unlocked_error; } } -#ifdef MAC +#if CONFIG_MACF locked_error: #endif pipeunlock(rpipe); @@ -713,6 +839,9 @@ unlocked_error: if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) pipeselwakeup(rpipe, rpipe->pipe_peer); + /* update last read time */ + pipe_touch(rpipe, PIPE_ATIME); + PIPE_UNLOCK(rpipe); return (error); @@ -882,7 +1011,7 @@ retry: PRIBIO | PCATCH, "pipdww", 0); if (error) goto error1; - if (wpipe->pipe_state & PIPE_EOF) { + if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) { error = EPIPE; goto error1; } @@ -899,7 +1028,7 @@ retry: PRIBIO | PCATCH, "pipdwc", 0); if (error) goto error1; - if (wpipe->pipe_state & PIPE_EOF) { + if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) { error = EPIPE; goto error1; } @@ -920,7 +1049,7 @@ retry: error = 0; while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { - if (wpipe->pipe_state & PIPE_EOF) { + if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) { pipelock(wpipe, 0); PIPE_UNLOCK(wpipe); pipe_destroy_write_buffer(wpipe); @@ -963,7 +1092,8 @@ error1: static int -pipe_write(struct fileproc *fp, struct uio *uio, __unused kauth_cred_t active_cred, __unused int flags, __unused struct proc *p) +pipe_write(struct fileproc *fp, struct uio *uio, __unused int flags, + __unused vfs_context_t ctx) { int error = 0; int orig_resid; @@ -978,12 +1108,12 @@ pipe_write(struct fileproc *fp, struct uio *uio, __unused kauth_cred_t active_cr /* * detect loss of pipe read side, issue SIGPIPE if lost. */ - if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF)) { + if (wpipe == NULL || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) { PIPE_UNLOCK(rpipe); return (EPIPE); } -#ifdef MAC - error = mac_check_pipe_write(active_cred, wpipe); +#if CONFIG_MACF + error = mac_pipe_check_write(kauth_cred_get(), wpipe); if (error) { PIPE_UNLOCK(rpipe); return (error); @@ -1031,7 +1161,7 @@ pipe_write(struct fileproc *fp, struct uio *uio, __unused kauth_cred_t active_cr if ((error = pipelock(wpipe, 1)) == 0) { PIPE_UNLOCK(wpipe); if (pipespace(wpipe, pipe_size) == 0) - OSAddAtomic(1, (SInt32 *)&nbigpipe); + OSAddAtomic(1, &nbigpipe); PIPE_LOCK(wpipe); pipeunlock(wpipe); @@ -1075,7 +1205,7 @@ pipe_write(struct fileproc *fp, struct uio *uio, __unused kauth_cred_t active_cr */ if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && (fp->f_flag & FNONBLOCK) == 0 && - amountpipekvawired + uio->uio_resid < maxpipekvawired) { + amountpipekvawired + uio_resid(uio) < maxpipekvawired) { error = pipe_direct_write(wpipe, uio); if (error) break; @@ -1097,7 +1227,7 @@ pipe_write(struct fileproc *fp, struct uio *uio, __unused kauth_cred_t active_cr } error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, "pipbww", 0); - if (wpipe->pipe_state & PIPE_EOF) + if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) break; if (error) break; @@ -1119,7 +1249,7 @@ pipe_write(struct fileproc *fp, struct uio *uio, __unused kauth_cred_t active_cr int size; /* Transfer size */ int segsize; /* first segment to transfer */ - if (wpipe->pipe_state & PIPE_EOF) { + if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) { pipeunlock(wpipe); error = EPIPE; break; @@ -1230,6 +1360,16 @@ pipe_write(struct fileproc *fp, struct uio *uio, __unused kauth_cred_t active_cr error = EAGAIN; break; } + + /* + * If read side wants to go away, we just issue a signal + * to ourselves. + */ + if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) { + error = EPIPE; + break; + } + /* * We have no more space and have something to offer, * wake up select/poll. @@ -1242,14 +1382,6 @@ pipe_write(struct fileproc *fp, struct uio *uio, __unused kauth_cred_t active_cr if (error != 0) break; - /* - * If read side wants to go away, we just issue a signal - * to ourselves. - */ - if (wpipe->pipe_state & PIPE_EOF) { - error = EPIPE; - break; - } } } --wpipe->pipe_busy; @@ -1272,6 +1404,10 @@ pipe_write(struct fileproc *fp, struct uio *uio, __unused kauth_cred_t active_cr */ pipeselwakeup(wpipe, wpipe); } + + /* Update modification, status change (# of bytes in pipe) times */ + pipe_touch(rpipe, PIPE_MTIME | PIPE_CTIME); + pipe_touch(wpipe, PIPE_MTIME | PIPE_CTIME); PIPE_UNLOCK(rpipe); return (error); @@ -1282,17 +1418,18 @@ pipe_write(struct fileproc *fp, struct uio *uio, __unused kauth_cred_t active_cr */ /* ARGSUSED 3 */ static int -pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, __unused struct proc *p) +pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, + __unused vfs_context_t ctx) { struct pipe *mpipe = (struct pipe *)fp->f_data; -#ifdef MAC +#if CONFIG_MACF int error; #endif PIPE_LOCK(mpipe); -#ifdef MAC - error = mac_check_pipe_ioctl(active_cred, mpipe, cmd, data); +#if CONFIG_MACF + error = mac_pipe_check_ioctl(kauth_cred_get(), mpipe, cmd); if (error) { PIPE_UNLOCK(mpipe); @@ -1344,7 +1481,7 @@ pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, __unused struct proc * static int -pipe_select(struct fileproc *fp, int which, void *wql, struct proc *p) +pipe_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx) { struct pipe *rpipe = (struct pipe *)fp->f_data; struct pipe *wpipe; @@ -1357,34 +1494,47 @@ pipe_select(struct fileproc *fp, int which, void *wql, struct proc *p) wpipe = rpipe->pipe_peer; +#if CONFIG_MACF + /* + * XXX We should use a per thread credential here; minimally, the + * XXX process credential should have a persistent reference on it + * XXX before being passed in here. + */ + if (mac_pipe_check_select(vfs_context_ucred(ctx), rpipe, which)) { + PIPE_UNLOCK(rpipe); + return (0); + } +#endif switch (which) { case FREAD: if ((rpipe->pipe_state & PIPE_DIRECTW) || (rpipe->pipe_buffer.cnt > 0) || - (rpipe->pipe_state & PIPE_EOF)) { + (rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) { retnum = 1; } else { rpipe->pipe_state |= PIPE_SEL; - selrecord(p, &rpipe->pipe_sel, wql); + selrecord(vfs_context_proc(ctx), &rpipe->pipe_sel, wql); } break; case FWRITE: - if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) || + if (wpipe) + wpipe->pipe_state |= PIPE_WSELECT; + if (wpipe == NULL || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) || (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) { retnum = 1; } else { wpipe->pipe_state |= PIPE_SEL; - selrecord(p, &wpipe->pipe_sel, wql); + selrecord(vfs_context_proc(ctx), &wpipe->pipe_sel, wql); } break; case 0: rpipe->pipe_state |= PIPE_SEL; - selrecord(p, &rpipe->pipe_sel, wql); + selrecord(vfs_context_proc(ctx), &rpipe->pipe_sel, wql); break; } PIPE_UNLOCK(rpipe); @@ -1395,14 +1545,14 @@ pipe_select(struct fileproc *fp, int which, void *wql, struct proc *p) /* ARGSUSED 1 */ static int -pipe_close(struct fileglob *fg, __unused struct proc *p) +pipe_close(struct fileglob *fg, __unused vfs_context_t ctx) { struct pipe *cpipe; - proc_fdlock(p); + proc_fdlock_spin(vfs_context_proc(ctx)); cpipe = (struct pipe *)fg->fg_data; fg->fg_data = NULL; - proc_fdunlock(p); + proc_fdunlock(vfs_context_proc(ctx)); if (cpipe) pipeclose(cpipe); @@ -1416,9 +1566,9 @@ pipe_free_kmem(struct pipe *cpipe) if (cpipe->pipe_buffer.buffer != NULL) { if (cpipe->pipe_buffer.size > PIPE_SIZE) - OSAddAtomic(-1, (SInt32 *)&nbigpipe); - OSAddAtomic(-(cpipe->pipe_buffer.size), (SInt32 *)&amountpipekva); - OSAddAtomic(-1, (SInt32 *)&amountpipes); + OSAddAtomic(-1, &nbigpipe); + OSAddAtomic(-(cpipe->pipe_buffer.size), &amountpipekva); + OSAddAtomic(-1, &amountpipes); kmem_free(kernel_map, (vm_offset_t)cpipe->pipe_buffer.buffer, cpipe->pipe_buffer.size); @@ -1439,6 +1589,78 @@ pipe_free_kmem(struct pipe *cpipe) #endif } +/* + * When a thread sets a write-select on a pipe, it creates an implicit, + * untracked dependency between that thread and the peer of the pipe + * on which the select is set. If the peer pipe is closed and freed + * before the select()ing thread wakes up, the system will panic as + * it attempts to unwind the dangling select(). To avoid that panic, + * we notice whenever a dangerous select() is set on a pipe, and + * defer the final deletion of the pipe until that select()s are all + * resolved. Since we can't currently detect exactly when that + * resolution happens, we use a simple garbage collection queue to + * reap the at-risk pipes 'later'. + */ +static void +pipe_garbage_collect(struct pipe *cpipe) +{ + uint64_t old, now; + struct pipe_garbage *pgp; + + /* Convert msecs to nsecs and then to abstime */ + old = pipe_garbage_age_limit * 1000000; + nanoseconds_to_absolutetime(old, &old); + + lck_mtx_lock(pipe_garbage_lock); + + /* Free anything that's been on the queue for seconds */ + now = mach_absolute_time(); + old = now - old; + while ((pgp = pipe_garbage_head) && pgp->pg_timestamp < old) { + pipe_garbage_head = pgp->pg_next; + if (pipe_garbage_head == NULL) + pipe_garbage_tail = NULL; + pipe_garbage_count--; + zfree(pipe_zone, pgp->pg_pipe); + zfree(pipe_garbage_zone, pgp); + } + + /* Add the new pipe (if any) to the tail of the garbage queue */ + if (cpipe) { + cpipe->pipe_state = PIPE_DEAD; + pgp = (struct pipe_garbage *)zalloc(pipe_garbage_zone); + if (pgp == NULL) { + /* + * We're too low on memory to garbage collect the + * pipe. Freeing it runs the risk of panicing the + * system. All we can do is leak it and leave + * a breadcrumb behind. The good news, such as it + * is, is that this will probably never happen. + * We will probably hit the panic below first. + */ + printf("Leaking pipe %p - no room left in the queue", + cpipe); + lck_mtx_unlock(pipe_garbage_lock); + return; + } + + pgp->pg_pipe = cpipe; + pgp->pg_timestamp = now; + pgp->pg_next = NULL; + + if (pipe_garbage_tail) + pipe_garbage_tail->pg_next = pgp; + pipe_garbage_tail = pgp; + if (pipe_garbage_head == NULL) + pipe_garbage_head = pipe_garbage_tail; + + if (pipe_garbage_count++ >= PIPE_GARBAGE_QUEUE_LIMIT) + panic("Length of pipe garbage queue exceeded %d", + PIPE_GARBAGE_QUEUE_LIMIT); + } + lck_mtx_unlock(pipe_garbage_lock); +} + /* * shutdown the pipe */ @@ -1454,23 +1676,28 @@ pipeclose(struct pipe *cpipe) if (PIPE_MTX(cpipe) != NULL) PIPE_LOCK(cpipe); - pipeselwakeup(cpipe, cpipe); /* * If the other side is blocked, wake it up saying that * we want to close it down. */ + cpipe->pipe_state &= ~PIPE_DRAIN; + cpipe->pipe_state |= PIPE_EOF; + pipeselwakeup(cpipe, cpipe); + while (cpipe->pipe_busy) { - cpipe->pipe_state |= PIPE_WANT | PIPE_EOF; + cpipe->pipe_state |= PIPE_WANT; wakeup(cpipe); - msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0); } -#ifdef MAC +#if CONFIG_MACF + /* + * Free the shared pipe label only after the two ends are disconnected. + */ if (cpipe->pipe_label != NULL && cpipe->pipe_peer == NULL) - mac_destroy_pipe(cpipe); + mac_pipe_label_destroy(cpipe); #endif /* @@ -1478,6 +1705,7 @@ pipeclose(struct pipe *cpipe) */ if ((ppipe = cpipe->pipe_peer) != NULL) { + ppipe->pipe_state &= ~(PIPE_DRAIN); ppipe->pipe_state |= PIPE_EOF; pipeselwakeup(ppipe, ppipe); @@ -1511,24 +1739,39 @@ pipeclose(struct pipe *cpipe) } } pipe_free_kmem(cpipe); - - zfree(pipe_zone, cpipe); + if (cpipe->pipe_state & PIPE_WSELECT) { + pipe_garbage_collect(cpipe); + } else { + zfree(pipe_zone, cpipe); + pipe_garbage_collect(NULL); + } } - /*ARGSUSED*/ static int -pipe_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct proc *p) +pipe_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused vfs_context_t ctx) { struct pipe *cpipe; cpipe = (struct pipe *)kn->kn_fp->f_data; PIPE_LOCK(cpipe); +#if CONFIG_MACF + /* + * XXX We should use a per thread credential here; minimally, the + * XXX process credential should have a persistent reference on it + * XXX before being passed in here. + */ + if (mac_pipe_check_kqfilter(vfs_context_ucred(ctx), kn, cpipe) != 0) { + PIPE_UNLOCK(cpipe); + return (1); + } +#endif switch (kn->kn_filter) { case EVFILT_READ: kn->kn_fop = &pipe_rfiltops; + break; case EVFILT_WRITE: kn->kn_fop = &pipe_wfiltops; @@ -1540,6 +1783,7 @@ pipe_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct pr PIPE_UNLOCK(cpipe); return (EPIPE); } + if (cpipe->pipe_peer) cpipe = cpipe->pipe_peer; break; default: @@ -1599,13 +1843,20 @@ filt_piperead(struct knote *kn, long hint) if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) kn->kn_data = rpipe->pipe_map.cnt; #endif - if ((rpipe->pipe_state & PIPE_EOF) || - (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { + if ((rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) || + (wpipe == NULL) || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) { kn->kn_flags |= EV_EOF; retval = 1; - } else - retval = (kn->kn_sfflags & NOTE_LOWAT) ? - (kn->kn_data >= kn->kn_sdata) : (kn->kn_data > 0); + } else { + int64_t lowwat = 1; + if (kn->kn_sfflags & NOTE_LOWAT) { + if (rpipe->pipe_buffer.size && kn->kn_sdata > rpipe->pipe_buffer.size) + lowwat = rpipe->pipe_buffer.size; + else if (kn->kn_sdata > lowwat) + lowwat = kn->kn_sdata; + } + retval = kn->kn_data >= lowwat; + } if (hint == 0) PIPE_UNLOCK(rpipe); @@ -1631,7 +1882,7 @@ filt_pipewrite(struct knote *kn, long hint) wpipe = rpipe->pipe_peer; - if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { + if ((wpipe == NULL) || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) { kn->kn_data = 0; kn->kn_flags |= EV_EOF; @@ -1640,68 +1891,95 @@ filt_pipewrite(struct knote *kn, long hint) return (1); } kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; + if (!kn->kn_data && wpipe->pipe_buffer.size == 0) + kn->kn_data = PIPE_BUF; /* unwritten pipe is ready for write */ #ifndef PIPE_NODIRECT if (wpipe->pipe_state & PIPE_DIRECTW) kn->kn_data = 0; #endif + int64_t lowwat = PIPE_BUF; + if (kn->kn_sfflags & NOTE_LOWAT) { + if (wpipe->pipe_buffer.size && kn->kn_sdata > wpipe->pipe_buffer.size) + lowwat = wpipe->pipe_buffer.size; + else if (kn->kn_sdata > lowwat) + lowwat = kn->kn_sdata; + } + if (hint == 0) PIPE_UNLOCK(rpipe); - return (kn->kn_data >= ((kn->kn_sfflags & NOTE_LOWAT) ? - kn->kn_sdata : PIPE_BUF)); + return (kn->kn_data >= lowwat); } int fill_pipeinfo(struct pipe * cpipe, struct pipe_info * pinfo) { -#ifdef MAC +#if CONFIG_MACF int error; #endif struct timeval now; - struct stat * ub; + struct vinfo_stat * ub; + int pipe_size = 0; + int pipe_count; if (cpipe == NULL) return (EBADF); -#ifdef MAC PIPE_LOCK(cpipe); - error = mac_check_pipe_stat(active_cred, cpipe); - PIPE_UNLOCK(cpipe); - if (error) + +#if CONFIG_MACF + error = mac_pipe_check_stat(kauth_cred_get(), cpipe); + if (error) { + PIPE_UNLOCK(cpipe); return (error); + } #endif if (cpipe->pipe_buffer.buffer == 0) { /* * must be stat'ing the write fd */ - cpipe = cpipe->pipe_peer; - - if (cpipe == NULL) - return (EBADF); + if (cpipe->pipe_peer) { + /* + * the peer still exists, use it's info + */ + pipe_size = cpipe->pipe_peer->pipe_buffer.size; + pipe_count = cpipe->pipe_peer->pipe_buffer.cnt; + } else { + pipe_count = 0; + } + } else { + pipe_size = cpipe->pipe_buffer.size; + pipe_count = cpipe->pipe_buffer.cnt; } + /* + * since peer's buffer is setup ouside of lock + * we might catch it in transient state + */ + if (pipe_size == 0) + pipe_size = PIPE_SIZE; ub = &pinfo->pipe_stat; bzero(ub, sizeof(*ub)); - ub->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; - ub->st_blksize = cpipe->pipe_buffer.size; - ub->st_size = cpipe->pipe_buffer.cnt; - if (ub->st_blksize != 0); - ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; - ub->st_nlink = 1; + ub->vst_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; + ub->vst_blksize = pipe_size; + ub->vst_size = pipe_count; + if (ub->vst_blksize != 0) + ub->vst_blocks = (ub->vst_size + ub->vst_blksize - 1) / ub->vst_blksize; + ub->vst_nlink = 1; - ub->st_uid = kauth_getuid(); - ub->st_gid = kauth_getgid(); + ub->vst_uid = kauth_getuid(); + ub->vst_gid = kauth_getgid(); microtime(&now); - ub->st_atimespec.tv_sec = now.tv_sec; - ub->st_atimespec.tv_nsec = now.tv_usec * 1000; + ub->vst_atime = now.tv_sec; + ub->vst_atimensec = now.tv_usec * 1000; - ub->st_mtimespec.tv_sec = now.tv_sec; - ub->st_mtimespec.tv_nsec = now.tv_usec * 1000; + ub->vst_mtime = now.tv_sec; + ub->vst_mtimensec = now.tv_usec * 1000; - ub->st_ctimespec.tv_sec = now.tv_sec; - ub->st_ctimespec.tv_nsec = now.tv_usec * 1000; + ub->vst_ctime = now.tv_sec; + ub->vst_ctimensec = now.tv_usec * 1000; /* * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen, st_uid, st_gid. @@ -1711,6 +1989,41 @@ fill_pipeinfo(struct pipe * cpipe, struct pipe_info * pinfo) pinfo->pipe_handle = (uint64_t)((uintptr_t)cpipe); pinfo->pipe_peerhandle = (uint64_t)((uintptr_t)(cpipe->pipe_peer)); pinfo->pipe_status = cpipe->pipe_state; + + PIPE_UNLOCK(cpipe); + return (0); } + +static int +pipe_drain(struct fileproc *fp, __unused vfs_context_t ctx) +{ + + /* Note: fdlock already held */ + struct pipe *ppipe, *cpipe = (struct pipe *)(fp->f_fglob->fg_data); + + if (cpipe) { + PIPE_LOCK(cpipe); + cpipe->pipe_state |= PIPE_DRAIN; + cpipe->pipe_state &= ~(PIPE_WANTR | PIPE_WANTW); + wakeup(cpipe); + + /* Must wake up peer: a writer sleeps on the read side */ + if ((ppipe = cpipe->pipe_peer)) { + ppipe->pipe_state |= PIPE_DRAIN; + ppipe->pipe_state &= ~(PIPE_WANTR | PIPE_WANTW); + wakeup(ppipe); + } + + PIPE_UNLOCK(cpipe); + return 0; + } + + return 1; +} + + + + +