]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/sys_pipe.c
xnu-2050.24.15.tar.gz
[apple/xnu.git] / bsd / kern / sys_pipe.c
CommitLineData
91447636
A
1/*
2 * Copyright (c) 1996 John S. Dyson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice immediately at the beginning of the file, without modification,
10 * this list of conditions, and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. Absolutely no warranty of function or purpose is made by the author
15 * John S. Dyson.
16 * 4. Modifications may be freely made to this file if the above conditions
17 * are met.
18 */
19/*
2d21ac55 20 * Copyright (c) 2003-2007 Apple Inc. All rights reserved.
91447636 21 *
2d21ac55 22 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
91447636 23 *
2d21ac55
A
24 * This file contains Original Code and/or Modifications of Original Code
25 * as defined in and that are subject to the Apple Public Source License
26 * Version 2.0 (the 'License'). You may not use this file except in
27 * compliance with the License. The rights granted to you under the License
28 * may not be used to create, or enable the creation or redistribution of,
29 * unlawful or unlicensed copies of an Apple operating system, or to
30 * circumvent, violate, or enable the circumvention or violation of, any
31 * terms of an Apple operating system software license agreement.
8f6c56a5 32 *
2d21ac55
A
33 * Please obtain a copy of the License at
34 * http://www.opensource.apple.com/apsl/ and read it before using this file.
35 *
36 * The Original Code and all software distributed under the License are
37 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
38 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
39 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
40 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
41 * Please see the License for the specific language governing rights and
42 * limitations under the License.
8f6c56a5 43 *
2d21ac55
A
44 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
45 */
46/*
47 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
48 * support for mandatory and extensible security protections. This notice
49 * is included in support of clause 2.2 (b) of the Apple Public License,
50 * Version 2.0.
91447636
A
51 */
52
53/*
54 * This file contains a high-performance replacement for the socket-based
55 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support
56 * all features of sockets, but does do everything that pipes normally
57 * do.
316670eb
A
58 *
59 * Pipes are implemented as circular buffers. Following are the valid states in pipes operations
60 *
61 * _________________________________
62 * 1. |_________________________________| r=w, c=0
63 *
64 * _________________________________
65 * 2. |__r:::::wc_______________________| r <= w , c > 0
66 *
67 * _________________________________
68 * 3. |::::wc_____r:::::::::::::::::::::| r>w , c > 0
69 *
70 * _________________________________
71 * 4. |:::::::wrc:::::::::::::::::::::::| w=r, c = Max size
72 *
73 *
74 * Nomenclature:-
75 * a-z define the steps in a program flow
76 * 1-4 are the states as defined aboe
77 * Action: is what file operation is done on the pipe
78 *
79 * Current:None Action: initialize with size M=200
80 * a. State 1 ( r=0, w=0, c=0)
81 *
82 * Current: a Action: write(100) (w < M)
83 * b. State 2 (r=0, w=100, c=100)
84 *
85 * Current: b Action: write(100) (w = M-w)
86 * c. State 4 (r=0,w=0,c=200)
87 *
88 * Current: b Action: read(70) ( r < c )
89 * d. State 2(r=70,w=100,c=30)
90 *
91 * Current: d Action: write(75) ( w < (m-w))
92 * e. State 2 (r=70,w=175,c=105)
93 *
94 * Current: d Action: write(110) ( w > (m-w))
95 * f. State 3 (r=70,w=10,c=140)
96 *
97 * Current: d Action: read(30) (r >= c )
98 * g. State 1 (r=100,w=100,c=0)
99 *
91447636
A
100 */
101
102/*
316670eb
A
103 * This code create half duplex pipe buffers for facilitating file like
104 * operations on pipes. The initial buffer is very small, but this can
105 * dynamically change to larger sizes based on usage. The buffer size is never
106 * reduced. The total amount of kernel memory used is governed by maxpipekva.
107 * In case of dynamic expansion limit is reached, the output thread is blocked
108 * until the pipe buffer empties enough to continue.
91447636
A
109 *
110 * In order to limit the resource use of pipes, two sysctls exist:
111 *
112 * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable
316670eb 113 * address space available to us in pipe_map.
91447636
A
114 *
115 * Memory usage may be monitored through the sysctls
316670eb 116 * kern.ipc.pipes, kern.ipc.pipekva.
91447636
A
117 *
118 */
119
120#include <sys/param.h>
121#include <sys/systm.h>
122#include <sys/filedesc.h>
123#include <sys/kernel.h>
124#include <sys/vnode.h>
125#include <sys/proc_internal.h>
126#include <sys/kauth.h>
127#include <sys/file_internal.h>
128#include <sys/stat.h>
129#include <sys/ioctl.h>
130#include <sys/fcntl.h>
131#include <sys/malloc.h>
132#include <sys/syslog.h>
133#include <sys/unistd.h>
134#include <sys/resourcevar.h>
135#include <sys/aio_kern.h>
136#include <sys/signalvar.h>
137#include <sys/pipe.h>
138#include <sys/sysproto.h>
0c530ab8 139#include <sys/proc_info.h>
91447636 140
b0d623f7 141#include <security/audit/audit.h>
91447636
A
142
143#include <sys/kdebug.h>
144
145#include <kern/zalloc.h>
316670eb 146#include <kern/kalloc.h>
91447636
A
147#include <vm/vm_kern.h>
148#include <libkern/OSAtomic.h>
149
150#define f_flag f_fglob->fg_flag
151#define f_type f_fglob->fg_type
152#define f_msgcount f_fglob->fg_msgcount
153#define f_cred f_fglob->fg_cred
154#define f_ops f_fglob->fg_ops
155#define f_offset f_fglob->fg_offset
156#define f_data f_fglob->fg_data
91447636 157
91447636 158/*
316670eb 159 * interfaces to the outside world exported through file operations
91447636
A
160 */
161static int pipe_read(struct fileproc *fp, struct uio *uio,
2d21ac55 162 int flags, vfs_context_t ctx);
91447636 163static int pipe_write(struct fileproc *fp, struct uio *uio,
2d21ac55 164 int flags, vfs_context_t ctx);
2d21ac55 165static int pipe_close(struct fileglob *fg, vfs_context_t ctx);
2d21ac55
A
166static int pipe_select(struct fileproc *fp, int which, void * wql,
167 vfs_context_t ctx);
2d21ac55
A
168static int pipe_kqfilter(struct fileproc *fp, struct knote *kn,
169 vfs_context_t ctx);
2d21ac55
A
170static int pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data,
171 vfs_context_t ctx);
b0d623f7
A
172static int pipe_drain(struct fileproc *fp,vfs_context_t ctx);
173
91447636
A
174struct fileops pipeops =
175 { pipe_read,
176 pipe_write,
177 pipe_ioctl,
178 pipe_select,
179 pipe_close,
180 pipe_kqfilter,
b0d623f7 181 pipe_drain };
91447636 182
91447636
A
183static void filt_pipedetach(struct knote *kn);
184static int filt_piperead(struct knote *kn, long hint);
185static int filt_pipewrite(struct knote *kn, long hint);
186
b0d623f7
A
187static struct filterops pipe_rfiltops = {
188 .f_isfd = 1,
189 .f_detach = filt_pipedetach,
190 .f_event = filt_piperead,
191};
316670eb 192
b0d623f7
A
193static struct filterops pipe_wfiltops = {
194 .f_isfd = 1,
195 .f_detach = filt_pipedetach,
196 .f_event = filt_pipewrite,
197};
91447636 198
316670eb
A
199static int nbigpipe; /* for compatibility sake. no longer used */
200static int amountpipes; /* total number of pipes in system */
201static int amountpipekva; /* total memory used by pipes */
91447636 202
316670eb 203int maxpipekva = PIPE_KVAMAX; /* allowing 16MB max. */
91447636
A
204
205#if PIPE_SYSCTLS
206SYSCTL_DECL(_kern_ipc);
207
6d2010ae 208SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RD|CTLFLAG_LOCKED,
91447636 209 &maxpipekva, 0, "Pipe KVA limit");
6d2010ae 210SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW|CTLFLAG_LOCKED,
91447636 211 &maxpipekvawired, 0, "Pipe KVA wired limit");
6d2010ae 212SYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD|CTLFLAG_LOCKED,
91447636 213 &amountpipes, 0, "Current # of pipes");
6d2010ae 214SYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD|CTLFLAG_LOCKED,
91447636 215 &nbigpipe, 0, "Current # of big pipes");
6d2010ae 216SYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD|CTLFLAG_LOCKED,
91447636 217 &amountpipekva, 0, "Pipe KVA usage");
6d2010ae 218SYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD|CTLFLAG_LOCKED,
91447636
A
219 &amountpipekvawired, 0, "Pipe wired KVA usage");
220#endif
221
91447636
A
222static void pipeclose(struct pipe *cpipe);
223static void pipe_free_kmem(struct pipe *cpipe);
224static int pipe_create(struct pipe **cpipep);
316670eb
A
225static int pipespace(struct pipe *cpipe, int size);
226static int choose_pipespace(unsigned long current, unsigned long expected);
227static int expand_pipespace(struct pipe *p, int target_size);
91447636 228static void pipeselwakeup(struct pipe *cpipe, struct pipe *spipe);
316670eb
A
229static __inline int pipeio_lock(struct pipe *cpipe, int catch);
230static __inline void pipeio_unlock(struct pipe *cpipe);
91447636
A
231
232extern int postpipeevent(struct pipe *, int);
233extern void evpipefree(struct pipe *cpipe);
234
91447636
A
235static lck_grp_t *pipe_mtx_grp;
236static lck_attr_t *pipe_mtx_attr;
237static lck_grp_attr_t *pipe_mtx_grp_attr;
238
239static zone_t pipe_zone;
240
316670eb
A
241#define MAX_PIPESIZE(pipe) ( MAX(PIPE_SIZE, (pipe)->pipe_buffer.size) )
242
ebb1b9f4
A
243#define PIPE_GARBAGE_AGE_LIMIT 5000 /* In milliseconds */
244#define PIPE_GARBAGE_QUEUE_LIMIT 32000
245
246struct pipe_garbage {
247 struct pipe *pg_pipe;
248 struct pipe_garbage *pg_next;
249 uint64_t pg_timestamp;
250};
251
252static zone_t pipe_garbage_zone;
253static struct pipe_garbage *pipe_garbage_head = NULL;
254static struct pipe_garbage *pipe_garbage_tail = NULL;
255static uint64_t pipe_garbage_age_limit = PIPE_GARBAGE_AGE_LIMIT;
256static int pipe_garbage_count = 0;
257static lck_mtx_t *pipe_garbage_lock;
316670eb 258static void pipe_garbage_collect(struct pipe *cpipe);
ebb1b9f4 259
91447636
A
260SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL);
261
316670eb 262/* initial setup done at time of sysinit */
91447636 263void
2d21ac55 264pipeinit(void)
91447636 265{
316670eb 266 nbigpipe=0;
ebb1b9f4 267 vm_size_t zone_size;
316670eb 268
ebb1b9f4
A
269 zone_size = 8192 * sizeof(struct pipe);
270 pipe_zone = zinit(sizeof(struct pipe), zone_size, 4096, "pipe zone");
91447636 271
316670eb
A
272
273 /* allocate lock group attribute and group for pipe mutexes */
91447636 274 pipe_mtx_grp_attr = lck_grp_attr_alloc_init();
91447636
A
275 pipe_mtx_grp = lck_grp_alloc_init("pipe", pipe_mtx_grp_attr);
276
316670eb 277 /* allocate the lock attribute for pipe mutexes */
91447636 278 pipe_mtx_attr = lck_attr_alloc_init();
ebb1b9f4
A
279
280 /*
281 * Set up garbage collection for dead pipes
282 */
283 zone_size = (PIPE_GARBAGE_QUEUE_LIMIT + 20) *
284 sizeof(struct pipe_garbage);
285 pipe_garbage_zone = (zone_t)zinit(sizeof(struct pipe_garbage),
286 zone_size, 4096, "pipe garbage zone");
287 pipe_garbage_lock = lck_mtx_alloc_init(pipe_mtx_grp, pipe_mtx_attr);
316670eb 288
91447636
A
289}
290
2d21ac55
A
291/* Bitmap for things to touch in pipe_touch() */
292#define PIPE_ATIME 0x00000001 /* time of last access */
293#define PIPE_MTIME 0x00000002 /* time of last modification */
294#define PIPE_CTIME 0x00000004 /* time of last status change */
295
296static void
297pipe_touch(struct pipe *tpipe, int touch)
298{
299 struct timeval now;
300
301 microtime(&now);
302
303 if (touch & PIPE_ATIME) {
304 tpipe->st_atimespec.tv_sec = now.tv_sec;
305 tpipe->st_atimespec.tv_nsec = now.tv_usec * 1000;
306 }
307
308 if (touch & PIPE_MTIME) {
309 tpipe->st_mtimespec.tv_sec = now.tv_sec;
310 tpipe->st_mtimespec.tv_nsec = now.tv_usec * 1000;
311 }
312
313 if (touch & PIPE_CTIME) {
314 tpipe->st_ctimespec.tv_sec = now.tv_sec;
315 tpipe->st_ctimespec.tv_nsec = now.tv_usec * 1000;
316 }
317}
318
316670eb
A
319static const unsigned int pipesize_blocks[] = {128,256,1024,2048,PAGE_SIZE, PAGE_SIZE * 2, PIPE_SIZE , PIPE_SIZE * 4 };
320
321/*
322 * finds the right size from possible sizes in pipesize_blocks
323 * returns the size which matches max(current,expected)
324 */
325static int
326choose_pipespace(unsigned long current, unsigned long expected)
327{
328 int i = sizeof(pipesize_blocks)/sizeof(unsigned int) -1;
329 unsigned long target;
330
331 if (expected > current)
332 target = expected;
333 else
334 target = current;
335
336 while ( i >0 && pipesize_blocks[i-1] > target) {
337 i=i-1;
338
339 }
340
341 return pipesize_blocks[i];
342}
91447636
A
343
344
316670eb
A
345/*
346 * expand the size of pipe while there is data to be read,
347 * and then free the old buffer once the current buffered
348 * data has been transferred to new storage.
349 * Required: PIPE_LOCK and io lock to be held by caller.
350 * returns 0 on success or no expansion possible
351 */
352static int
353expand_pipespace(struct pipe *p, int target_size)
354{
355 struct pipe tmp, oldpipe;
356 int error;
357 tmp.pipe_buffer.buffer = 0;
358
359 if (p->pipe_buffer.size >= (unsigned) target_size) {
360 return 0; /* the existing buffer is max size possible */
361 }
362
363 /* create enough space in the target */
364 error = pipespace(&tmp, target_size);
365 if (error != 0)
366 return (error);
367
368 oldpipe.pipe_buffer.buffer = p->pipe_buffer.buffer;
369 oldpipe.pipe_buffer.size = p->pipe_buffer.size;
370
371 memcpy(tmp.pipe_buffer.buffer, p->pipe_buffer.buffer, p->pipe_buffer.size);
372 if (p->pipe_buffer.cnt > 0 && p->pipe_buffer.in <= p->pipe_buffer.out ){
373 /* we are in State 3 and need extra copying for read to be consistent */
374 memcpy(&tmp.pipe_buffer.buffer[p->pipe_buffer.size], p->pipe_buffer.buffer, p->pipe_buffer.size);
375 p->pipe_buffer.in += p->pipe_buffer.size;
376 }
377
378 p->pipe_buffer.buffer = tmp.pipe_buffer.buffer;
379 p->pipe_buffer.size = tmp.pipe_buffer.size;
380
381
382 pipe_free_kmem(&oldpipe);
383 return 0;
384}
385
91447636
A
386/*
387 * The pipe system call for the DTYPE_PIPE type of pipes
316670eb
A
388 *
389 * returns:
390 * FREAD | fd0 | -->[struct rpipe] --> |~~buffer~~| \
391 * (pipe_mutex)
392 * FWRITE | fd1 | -->[struct wpipe] --X /
91447636
A
393 */
394
395/* ARGSUSED */
396int
b0d623f7 397pipe(proc_t p, __unused struct pipe_args *uap, int32_t *retval)
91447636
A
398{
399 struct fileproc *rf, *wf;
400 struct pipe *rpipe, *wpipe;
401 lck_mtx_t *pmtx;
402 int fd, error;
403
404 if ((pmtx = lck_mtx_alloc_init(pipe_mtx_grp, pipe_mtx_attr)) == NULL)
405 return (ENOMEM);
406
407 rpipe = wpipe = NULL;
408 if (pipe_create(&rpipe) || pipe_create(&wpipe)) {
409 error = ENFILE;
410 goto freepipes;
411 }
412 /*
413 * allocate the space for the normal I/O direction up
414 * front... we'll delay the allocation for the other
316670eb 415 * direction until a write actually occurs (most likely it won't)...
91447636 416 */
316670eb 417 error = pipespace(rpipe, choose_pipespace(rpipe->pipe_buffer.size, 0));
91447636
A
418 if (error)
419 goto freepipes;
420
91447636
A
421 TAILQ_INIT(&rpipe->pipe_evlist);
422 TAILQ_INIT(&wpipe->pipe_evlist);
423
2d21ac55 424 error = falloc(p, &rf, &fd, vfs_context_current());
91447636
A
425 if (error) {
426 goto freepipes;
427 }
428 retval[0] = fd;
429
430 /*
316670eb
A
431 * for now we'll create half-duplex pipes(refer returns section above).
432 * this is what we've always supported..
91447636
A
433 */
434 rf->f_flag = FREAD;
435 rf->f_type = DTYPE_PIPE;
436 rf->f_data = (caddr_t)rpipe;
437 rf->f_ops = &pipeops;
438
2d21ac55 439 error = falloc(p, &wf, &fd, vfs_context_current());
91447636
A
440 if (error) {
441 fp_free(p, retval[0], rf);
442 goto freepipes;
443 }
444 wf->f_flag = FWRITE;
445 wf->f_type = DTYPE_PIPE;
446 wf->f_data = (caddr_t)wpipe;
447 wf->f_ops = &pipeops;
448
6601e61a
A
449 rpipe->pipe_peer = wpipe;
450 wpipe->pipe_peer = rpipe;
316670eb
A
451 /* both structures share the same mutex */
452 rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx;
2d21ac55 453
91447636 454 retval[1] = fd;
2d21ac55 455#if CONFIG_MACF
91447636
A
456 /*
457 * XXXXXXXX SHOULD NOT HOLD FILE_LOCK() XXXXXXXXXXXX
458 *
459 * struct pipe represents a pipe endpoint. The MAC label is shared
2d21ac55
A
460 * between the connected endpoints. As a result mac_pipe_label_init() and
461 * mac_pipe_label_associate() should only be called on one of the endpoints
91447636
A
462 * after they have been connected.
463 */
2d21ac55
A
464 mac_pipe_label_init(rpipe);
465 mac_pipe_label_associate(kauth_cred_get(), rpipe);
466 wpipe->pipe_label = rpipe->pipe_label;
91447636 467#endif
2d21ac55 468 proc_fdlock_spin(p);
6601e61a
A
469 procfdtbl_releasefd(p, retval[0], NULL);
470 procfdtbl_releasefd(p, retval[1], NULL);
91447636
A
471 fp_drop(p, retval[0], rf, 1);
472 fp_drop(p, retval[1], wf, 1);
473 proc_fdunlock(p);
474
91447636
A
475
476 return (0);
477
478freepipes:
479 pipeclose(rpipe);
480 pipeclose(wpipe);
481 lck_mtx_free(pmtx, pipe_mtx_grp);
482
483 return (error);
484}
485
91447636 486int
2d21ac55 487pipe_stat(struct pipe *cpipe, void *ub, int isstat64)
91447636 488{
2d21ac55 489#if CONFIG_MACF
91447636
A
490 int error;
491#endif
2d21ac55
A
492 int pipe_size = 0;
493 int pipe_count;
494 struct stat *sb = (struct stat *)0; /* warning avoidance ; protected by isstat64 */
495 struct stat64 * sb64 = (struct stat64 *)0; /* warning avoidance ; protected by isstat64 */
91447636
A
496
497 if (cpipe == NULL)
498 return (EBADF);
91447636 499 PIPE_LOCK(cpipe);
2d21ac55
A
500
501#if CONFIG_MACF
502 error = mac_pipe_check_stat(kauth_cred_get(), cpipe);
503 if (error) {
504 PIPE_UNLOCK(cpipe);
91447636 505 return (error);
2d21ac55 506 }
91447636
A
507#endif
508 if (cpipe->pipe_buffer.buffer == 0) {
316670eb 509 /* must be stat'ing the write fd */
2d21ac55 510 if (cpipe->pipe_peer) {
316670eb
A
511 /* the peer still exists, use it's info */
512 pipe_size = MAX_PIPESIZE(cpipe->pipe_peer);
2d21ac55
A
513 pipe_count = cpipe->pipe_peer->pipe_buffer.cnt;
514 } else {
515 pipe_count = 0;
516 }
517 } else {
316670eb 518 pipe_size = MAX_PIPESIZE(cpipe);
2d21ac55 519 pipe_count = cpipe->pipe_buffer.cnt;
91447636 520 }
2d21ac55
A
521 /*
522 * since peer's buffer is setup ouside of lock
523 * we might catch it in transient state
524 */
525 if (pipe_size == 0)
316670eb 526 pipe_size = MAX(PIPE_SIZE, pipesize_blocks[0]);
91447636 527
2d21ac55
A
528 if (isstat64 != 0) {
529 sb64 = (struct stat64 *)ub;
91447636 530
2d21ac55
A
531 bzero(sb64, sizeof(*sb64));
532 sb64->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
533 sb64->st_blksize = pipe_size;
534 sb64->st_size = pipe_count;
535 sb64->st_blocks = (sb64->st_size + sb64->st_blksize - 1) / sb64->st_blksize;
536
537 sb64->st_uid = kauth_getuid();
538 sb64->st_gid = kauth_getgid();
539
540 sb64->st_atimespec.tv_sec = cpipe->st_atimespec.tv_sec;
541 sb64->st_atimespec.tv_nsec = cpipe->st_atimespec.tv_nsec;
542
543 sb64->st_mtimespec.tv_sec = cpipe->st_mtimespec.tv_sec;
544 sb64->st_mtimespec.tv_nsec = cpipe->st_mtimespec.tv_nsec;
91447636 545
2d21ac55
A
546 sb64->st_ctimespec.tv_sec = cpipe->st_ctimespec.tv_sec;
547 sb64->st_ctimespec.tv_nsec = cpipe->st_ctimespec.tv_nsec;
91447636 548
2d21ac55
A
549 /*
550 * Return a relatively unique inode number based on the current
551 * address of this pipe's struct pipe. This number may be recycled
552 * relatively quickly.
553 */
316670eb 554 sb64->st_ino = (ino64_t)VM_KERNEL_ADDRPERM((uintptr_t)cpipe);
2d21ac55
A
555 } else {
556 sb = (struct stat *)ub;
557
558 bzero(sb, sizeof(*sb));
559 sb->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
560 sb->st_blksize = pipe_size;
561 sb->st_size = pipe_count;
562 sb->st_blocks = (sb->st_size + sb->st_blksize - 1) / sb->st_blksize;
563
564 sb->st_uid = kauth_getuid();
565 sb->st_gid = kauth_getgid();
566
567 sb->st_atimespec.tv_sec = cpipe->st_atimespec.tv_sec;
568 sb->st_atimespec.tv_nsec = cpipe->st_atimespec.tv_nsec;
569
570 sb->st_mtimespec.tv_sec = cpipe->st_mtimespec.tv_sec;
571 sb->st_mtimespec.tv_nsec = cpipe->st_mtimespec.tv_nsec;
572
573 sb->st_ctimespec.tv_sec = cpipe->st_ctimespec.tv_sec;
574 sb->st_ctimespec.tv_nsec = cpipe->st_ctimespec.tv_nsec;
575
576 /*
577 * Return a relatively unique inode number based on the current
578 * address of this pipe's struct pipe. This number may be recycled
579 * relatively quickly.
580 */
316670eb 581 sb->st_ino = (ino_t)VM_KERNEL_ADDRPERM((uintptr_t)cpipe);
2d21ac55
A
582 }
583 PIPE_UNLOCK(cpipe);
91447636
A
584
585 /*
2d21ac55
A
586 * POSIX: Left as 0: st_dev, st_nlink, st_rdev, st_flags, st_gen,
587 * st_uid, st_gid.
588 *
589 * XXX (st_dev) should be unique, but there is no device driver that
590 * XXX is associated with pipes, since they are implemented via a
591 * XXX struct fileops indirection rather than as FS objects.
91447636
A
592 */
593 return (0);
594}
595
596
597/*
598 * Allocate kva for pipe circular buffer, the space is pageable
599 * This routine will 'realloc' the size of a pipe safely, if it fails
600 * it will retain the old buffer.
601 * If it fails it will return ENOMEM.
602 */
603static int
604pipespace(struct pipe *cpipe, int size)
605{
606 vm_offset_t buffer;
607
316670eb
A
608 if (size <= 0)
609 return(EINVAL);
91447636 610
316670eb
A
611 if ((buffer = (vm_offset_t)kalloc(size)) == 0 )
612 return(ENOMEM);
91447636
A
613
614 /* free old resources if we're resizing */
615 pipe_free_kmem(cpipe);
616 cpipe->pipe_buffer.buffer = (caddr_t)buffer;
617 cpipe->pipe_buffer.size = size;
618 cpipe->pipe_buffer.in = 0;
619 cpipe->pipe_buffer.out = 0;
620 cpipe->pipe_buffer.cnt = 0;
621
b0d623f7
A
622 OSAddAtomic(1, &amountpipes);
623 OSAddAtomic(cpipe->pipe_buffer.size, &amountpipekva);
91447636
A
624
625 return (0);
626}
627
628/*
629 * initialize and allocate VM and memory for pipe
630 */
631static int
632pipe_create(struct pipe **cpipep)
633{
634 struct pipe *cpipe;
91447636
A
635 cpipe = (struct pipe *)zalloc(pipe_zone);
636
637 if ((*cpipep = cpipe) == NULL)
638 return (ENOMEM);
639
640 /*
641 * protect so pipespace or pipeclose don't follow a junk pointer
642 * if pipespace() fails.
643 */
644 bzero(cpipe, sizeof *cpipe);
645
2d21ac55
A
646 /* Initial times are all the time of creation of the pipe */
647 pipe_touch(cpipe, PIPE_ATIME | PIPE_MTIME | PIPE_CTIME);
91447636
A
648 return (0);
649}
650
651
652/*
653 * lock a pipe for I/O, blocking other access
654 */
2d21ac55 655static inline int
316670eb 656pipeio_lock(struct pipe *cpipe, int catch)
91447636
A
657{
658 int error;
91447636
A
659 while (cpipe->pipe_state & PIPE_LOCKFL) {
660 cpipe->pipe_state |= PIPE_LWANT;
91447636
A
661 error = msleep(cpipe, PIPE_MTX(cpipe), catch ? (PRIBIO | PCATCH) : PRIBIO,
662 "pipelk", 0);
663 if (error != 0)
664 return (error);
665 }
666 cpipe->pipe_state |= PIPE_LOCKFL;
91447636
A
667 return (0);
668}
669
670/*
671 * unlock a pipe I/O lock
672 */
2d21ac55 673static inline void
316670eb 674pipeio_unlock(struct pipe *cpipe)
91447636 675{
91447636 676 cpipe->pipe_state &= ~PIPE_LOCKFL;
91447636
A
677 if (cpipe->pipe_state & PIPE_LWANT) {
678 cpipe->pipe_state &= ~PIPE_LWANT;
679 wakeup(cpipe);
680 }
681}
682
316670eb
A
683/*
684 * wakeup anyone whos blocked in select
685 */
91447636 686static void
2d21ac55 687pipeselwakeup(struct pipe *cpipe, struct pipe *spipe)
91447636 688{
91447636
A
689 if (cpipe->pipe_state & PIPE_SEL) {
690 cpipe->pipe_state &= ~PIPE_SEL;
691 selwakeup(&cpipe->pipe_sel);
692 }
693 if (cpipe->pipe_state & PIPE_KNOTE)
694 KNOTE(&cpipe->pipe_sel.si_note, 1);
695
696 postpipeevent(cpipe, EV_RWBYTES);
697
698 if (spipe && (spipe->pipe_state & PIPE_ASYNC) && spipe->pipe_pgid) {
91447636
A
699 if (spipe->pipe_pgid < 0)
700 gsignal(-spipe->pipe_pgid, SIGIO);
2d21ac55
A
701 else
702 proc_signal(spipe->pipe_pgid, SIGIO);
91447636
A
703 }
704}
705
316670eb
A
706/*
707 * Read n bytes from the buffer. Semantics are similar to file read.
708 * returns: number of bytes read from the buffer
709 */
91447636
A
710/* ARGSUSED */
711static int
2d21ac55
A
712pipe_read(struct fileproc *fp, struct uio *uio, __unused int flags,
713 __unused vfs_context_t ctx)
91447636
A
714{
715 struct pipe *rpipe = (struct pipe *)fp->f_data;
716 int error;
717 int nread = 0;
718 u_int size;
719
720 PIPE_LOCK(rpipe);
721 ++rpipe->pipe_busy;
722
316670eb 723 error = pipeio_lock(rpipe, 1);
91447636
A
724 if (error)
725 goto unlocked_error;
726
2d21ac55
A
727#if CONFIG_MACF
728 error = mac_pipe_check_read(kauth_cred_get(), rpipe);
91447636
A
729 if (error)
730 goto locked_error;
731#endif
732
316670eb 733
91447636
A
734 while (uio_resid(uio)) {
735 /*
736 * normal pipe buffer receive
737 */
738 if (rpipe->pipe_buffer.cnt > 0) {
316670eb
A
739 /*
740 * # bytes to read is min( bytes from read pointer until end of buffer,
741 * total unread bytes,
742 * user requested byte count)
743 */
91447636
A
744 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
745 if (size > rpipe->pipe_buffer.cnt)
746 size = rpipe->pipe_buffer.cnt;
747 // LP64todo - fix this!
748 if (size > (u_int) uio_resid(uio))
749 size = (u_int) uio_resid(uio);
750
316670eb 751 PIPE_UNLOCK(rpipe); /* we still hold io lock.*/
91447636
A
752 error = uiomove(
753 &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
754 size, uio);
755 PIPE_LOCK(rpipe);
756 if (error)
757 break;
758
759 rpipe->pipe_buffer.out += size;
760 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size)
761 rpipe->pipe_buffer.out = 0;
762
763 rpipe->pipe_buffer.cnt -= size;
316670eb 764
91447636
A
765 /*
766 * If there is no more to read in the pipe, reset
767 * its pointers to the beginning. This improves
768 * cache hit stats.
769 */
770 if (rpipe->pipe_buffer.cnt == 0) {
771 rpipe->pipe_buffer.in = 0;
772 rpipe->pipe_buffer.out = 0;
773 }
774 nread += size;
91447636
A
775 } else {
776 /*
777 * detect EOF condition
778 * read returns 0 on EOF, no need to set error
779 */
b0d623f7 780 if (rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) {
91447636 781 break;
b0d623f7 782 }
91447636
A
783
784 /*
785 * If the "write-side" has been blocked, wake it up now.
786 */
787 if (rpipe->pipe_state & PIPE_WANTW) {
788 rpipe->pipe_state &= ~PIPE_WANTW;
789 wakeup(rpipe);
790 }
791
792 /*
316670eb 793 * Break if some data was read in previous iteration.
91447636
A
794 */
795 if (nread > 0)
796 break;
797
798 /*
799 * Unlock the pipe buffer for our remaining processing.
800 * We will either break out with an error or we will
801 * sleep and relock to loop.
802 */
316670eb 803 pipeio_unlock(rpipe);
91447636
A
804
805 /*
806 * Handle non-blocking mode operation or
807 * wait for more data.
808 */
809 if (fp->f_flag & FNONBLOCK) {
810 error = EAGAIN;
811 } else {
812 rpipe->pipe_state |= PIPE_WANTR;
91447636 813 error = msleep(rpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, "piperd", 0);
91447636 814 if (error == 0)
316670eb 815 error = pipeio_lock(rpipe, 1);
91447636
A
816 }
817 if (error)
818 goto unlocked_error;
819 }
820 }
2d21ac55 821#if CONFIG_MACF
91447636
A
822locked_error:
823#endif
316670eb 824 pipeio_unlock(rpipe);
91447636
A
825
826unlocked_error:
827 --rpipe->pipe_busy;
828
829 /*
830 * PIPE_WANT processing only makes sense if pipe_busy is 0.
831 */
832 if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) {
833 rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW);
834 wakeup(rpipe);
316670eb 835 } else if (rpipe->pipe_buffer.cnt < rpipe->pipe_buffer.size) {
91447636
A
836 /*
837 * Handle write blocking hysteresis.
838 */
839 if (rpipe->pipe_state & PIPE_WANTW) {
840 rpipe->pipe_state &= ~PIPE_WANTW;
841 wakeup(rpipe);
842 }
843 }
844
316670eb 845 if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) > 0)
91447636
A
846 pipeselwakeup(rpipe, rpipe->pipe_peer);
847
2d21ac55
A
848 /* update last read time */
849 pipe_touch(rpipe, PIPE_ATIME);
850
91447636
A
851 PIPE_UNLOCK(rpipe);
852
853 return (error);
854}
855
91447636 856/*
316670eb
A
857 * perform a write of n bytes into the read side of buffer. Since
858 * pipes are unidirectional a write is meant to be read by the otherside only.
91447636 859 */
91447636 860static int
2d21ac55
A
861pipe_write(struct fileproc *fp, struct uio *uio, __unused int flags,
862 __unused vfs_context_t ctx)
91447636
A
863{
864 int error = 0;
865 int orig_resid;
866 int pipe_size;
867 struct pipe *wpipe, *rpipe;
316670eb
A
868 // LP64todo - fix this!
869 orig_resid = uio_resid(uio);
870 int space;
91447636
A
871
872 rpipe = (struct pipe *)fp->f_data;
873
874 PIPE_LOCK(rpipe);
875 wpipe = rpipe->pipe_peer;
876
877 /*
878 * detect loss of pipe read side, issue SIGPIPE if lost.
879 */
b0d623f7 880 if (wpipe == NULL || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
91447636
A
881 PIPE_UNLOCK(rpipe);
882 return (EPIPE);
883 }
2d21ac55
A
884#if CONFIG_MACF
885 error = mac_pipe_check_write(kauth_cred_get(), wpipe);
91447636
A
886 if (error) {
887 PIPE_UNLOCK(rpipe);
888 return (error);
889 }
890#endif
891 ++wpipe->pipe_busy;
892
893 pipe_size = 0;
894
91447636 895 /*
316670eb
A
896 * need to allocate some storage... we delay the allocation
897 * until the first write on fd[0] to avoid allocating storage for both
898 * 'pipe ends'... most pipes are half-duplex with the writes targeting
899 * fd[1], so allocating space for both ends is a waste...
91447636 900 */
91447636 901
316670eb
A
902 if ( wpipe->pipe_buffer.buffer == 0 || (
903 (unsigned)orig_resid > wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt &&
904 amountpipekva < maxpipekva ) ) {
91447636 905
316670eb 906 pipe_size = choose_pipespace(wpipe->pipe_buffer.size, wpipe->pipe_buffer.cnt + orig_resid);
91447636
A
907 }
908 if (pipe_size) {
909 /*
910 * need to do initial allocation or resizing of pipe
316670eb 911 * holding both structure and io locks.
91447636 912 */
316670eb
A
913 if ((error = pipeio_lock(wpipe, 1)) == 0) {
914 if (wpipe->pipe_buffer.cnt == 0)
915 error = pipespace(wpipe, pipe_size);
916 else
917 error = expand_pipespace(wpipe, pipe_size);
918
919 pipeio_unlock(wpipe);
920
921 /* allocation failed */
922 if (wpipe->pipe_buffer.buffer == 0)
91447636 923 error = ENOMEM;
91447636
A
924 }
925 if (error) {
926 /*
927 * If an error occurred unbusy and return, waking up any pending
928 * readers.
929 */
930 --wpipe->pipe_busy;
931 if ((wpipe->pipe_busy == 0) &&
932 (wpipe->pipe_state & PIPE_WANT)) {
933 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
934 wakeup(wpipe);
935 }
936 PIPE_UNLOCK(rpipe);
937 return(error);
938 }
939 }
91447636
A
940
941 while (uio_resid(uio)) {
91447636 942
91447636 943 retrywrite:
91447636
A
944 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
945
316670eb 946 /* Writes of size <= PIPE_BUF must be atomic. */
91447636
A
947 if ((space < uio_resid(uio)) && (orig_resid <= PIPE_BUF))
948 space = 0;
949
950 if (space > 0) {
951
316670eb 952 if ((error = pipeio_lock(wpipe,1)) == 0) {
91447636
A
953 int size; /* Transfer size */
954 int segsize; /* first segment to transfer */
955
b0d623f7 956 if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) {
316670eb 957 pipeio_unlock(wpipe);
91447636
A
958 error = EPIPE;
959 break;
960 }
91447636 961 /*
316670eb 962 * If a process blocked in pipeio_lock, our
91447636
A
963 * value for space might be bad... the mutex
964 * is dropped while we're blocked
965 */
966 if (space > (int)(wpipe->pipe_buffer.size -
967 wpipe->pipe_buffer.cnt)) {
316670eb 968 pipeio_unlock(wpipe);
91447636
A
969 goto retrywrite;
970 }
971
972 /*
973 * Transfer size is minimum of uio transfer
974 * and free space in pipe buffer.
975 */
976 // LP64todo - fix this!
977 if (space > uio_resid(uio))
978 size = uio_resid(uio);
979 else
980 size = space;
981 /*
982 * First segment to transfer is minimum of
983 * transfer size and contiguous space in
984 * pipe buffer. If first segment to transfer
985 * is less than the transfer size, we've got
986 * a wraparound in the buffer.
987 */
988 segsize = wpipe->pipe_buffer.size -
989 wpipe->pipe_buffer.in;
990 if (segsize > size)
991 segsize = size;
992
993 /* Transfer first segment */
994
995 PIPE_UNLOCK(rpipe);
996 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
997 segsize, uio);
998 PIPE_LOCK(rpipe);
999
1000 if (error == 0 && segsize < size) {
1001 /*
1002 * Transfer remaining part now, to
1003 * support atomic writes. Wraparound
316670eb 1004 * happened. (State 3)
91447636
A
1005 */
1006 if (wpipe->pipe_buffer.in + segsize !=
1007 wpipe->pipe_buffer.size)
1008 panic("Expected pipe buffer "
1009 "wraparound disappeared");
1010
1011 PIPE_UNLOCK(rpipe);
1012 error = uiomove(
1013 &wpipe->pipe_buffer.buffer[0],
1014 size - segsize, uio);
1015 PIPE_LOCK(rpipe);
1016 }
316670eb
A
1017 /*
1018 * readers never know to read until count is updated.
1019 */
91447636
A
1020 if (error == 0) {
1021 wpipe->pipe_buffer.in += size;
316670eb 1022 if (wpipe->pipe_buffer.in >
91447636
A
1023 wpipe->pipe_buffer.size) {
1024 if (wpipe->pipe_buffer.in !=
1025 size - segsize +
1026 wpipe->pipe_buffer.size)
1027 panic("Expected "
1028 "wraparound bad");
1029 wpipe->pipe_buffer.in = size -
1030 segsize;
1031 }
1032
1033 wpipe->pipe_buffer.cnt += size;
1034 if (wpipe->pipe_buffer.cnt >
1035 wpipe->pipe_buffer.size)
1036 panic("Pipe buffer overflow");
1037
1038 }
316670eb 1039 pipeio_unlock(wpipe);
91447636
A
1040 }
1041 if (error)
1042 break;
1043
1044 } else {
1045 /*
1046 * If the "read-side" has been blocked, wake it up now.
1047 */
1048 if (wpipe->pipe_state & PIPE_WANTR) {
1049 wpipe->pipe_state &= ~PIPE_WANTR;
1050 wakeup(wpipe);
1051 }
1052 /*
1053 * don't block on non-blocking I/O
1054 * we'll do the pipeselwakeup on the way out
1055 */
1056 if (fp->f_flag & FNONBLOCK) {
1057 error = EAGAIN;
1058 break;
1059 }
6d2010ae
A
1060
1061 /*
1062 * If read side wants to go away, we just issue a signal
1063 * to ourselves.
1064 */
1065 if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) {
1066 error = EPIPE;
1067 break;
1068 }
1069
91447636
A
1070 /*
1071 * We have no more space and have something to offer,
1072 * wake up select/poll.
1073 */
1074 pipeselwakeup(wpipe, wpipe);
1075
1076 wpipe->pipe_state |= PIPE_WANTW;
1077
1078 error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, "pipewr", 0);
1079
1080 if (error != 0)
1081 break;
91447636
A
1082 }
1083 }
1084 --wpipe->pipe_busy;
1085
1086 if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) {
1087 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
1088 wakeup(wpipe);
1089 }
1090 if (wpipe->pipe_buffer.cnt > 0) {
1091 /*
1092 * If there are any characters in the buffer, we wake up
1093 * the reader if it was blocked waiting for data.
1094 */
1095 if (wpipe->pipe_state & PIPE_WANTR) {
1096 wpipe->pipe_state &= ~PIPE_WANTR;
1097 wakeup(wpipe);
1098 }
1099 /*
1100 * wake up thread blocked in select/poll or post the notification
1101 */
1102 pipeselwakeup(wpipe, wpipe);
1103 }
2d21ac55
A
1104
1105 /* Update modification, status change (# of bytes in pipe) times */
1106 pipe_touch(rpipe, PIPE_MTIME | PIPE_CTIME);
1107 pipe_touch(wpipe, PIPE_MTIME | PIPE_CTIME);
91447636
A
1108 PIPE_UNLOCK(rpipe);
1109
1110 return (error);
1111}
1112
1113/*
1114 * we implement a very minimal set of ioctls for compatibility with sockets.
1115 */
1116/* ARGSUSED 3 */
1117static int
2d21ac55
A
1118pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data,
1119 __unused vfs_context_t ctx)
91447636
A
1120{
1121 struct pipe *mpipe = (struct pipe *)fp->f_data;
2d21ac55 1122#if CONFIG_MACF
91447636
A
1123 int error;
1124#endif
1125
1126 PIPE_LOCK(mpipe);
1127
2d21ac55
A
1128#if CONFIG_MACF
1129 error = mac_pipe_check_ioctl(kauth_cred_get(), mpipe, cmd);
91447636
A
1130 if (error) {
1131 PIPE_UNLOCK(mpipe);
1132
1133 return (error);
1134 }
1135#endif
1136
1137 switch (cmd) {
1138
1139 case FIONBIO:
1140 PIPE_UNLOCK(mpipe);
1141 return (0);
1142
1143 case FIOASYNC:
1144 if (*(int *)data) {
1145 mpipe->pipe_state |= PIPE_ASYNC;
1146 } else {
1147 mpipe->pipe_state &= ~PIPE_ASYNC;
1148 }
1149 PIPE_UNLOCK(mpipe);
1150 return (0);
1151
1152 case FIONREAD:
316670eb 1153 *(int *)data = mpipe->pipe_buffer.cnt;
91447636
A
1154 PIPE_UNLOCK(mpipe);
1155 return (0);
1156
1157 case TIOCSPGRP:
1158 mpipe->pipe_pgid = *(int *)data;
1159
1160 PIPE_UNLOCK(mpipe);
1161 return (0);
1162
1163 case TIOCGPGRP:
1164 *(int *)data = mpipe->pipe_pgid;
1165
1166 PIPE_UNLOCK(mpipe);
1167 return (0);
1168
1169 }
1170 PIPE_UNLOCK(mpipe);
1171 return (ENOTTY);
1172}
1173
1174
1175static int
2d21ac55 1176pipe_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
91447636
A
1177{
1178 struct pipe *rpipe = (struct pipe *)fp->f_data;
1179 struct pipe *wpipe;
1180 int retnum = 0;
1181
1182 if (rpipe == NULL || rpipe == (struct pipe *)-1)
1183 return (retnum);
1184
1185 PIPE_LOCK(rpipe);
1186
1187 wpipe = rpipe->pipe_peer;
316670eb 1188
91447636 1189
2d21ac55
A
1190#if CONFIG_MACF
1191 /*
1192 * XXX We should use a per thread credential here; minimally, the
1193 * XXX process credential should have a persistent reference on it
1194 * XXX before being passed in here.
1195 */
1196 if (mac_pipe_check_select(vfs_context_ucred(ctx), rpipe, which)) {
1197 PIPE_UNLOCK(rpipe);
1198 return (0);
1199 }
1200#endif
91447636
A
1201 switch (which) {
1202
1203 case FREAD:
1204 if ((rpipe->pipe_state & PIPE_DIRECTW) ||
1205 (rpipe->pipe_buffer.cnt > 0) ||
b0d623f7 1206 (rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
91447636
A
1207
1208 retnum = 1;
1209 } else {
1210 rpipe->pipe_state |= PIPE_SEL;
2d21ac55 1211 selrecord(vfs_context_proc(ctx), &rpipe->pipe_sel, wql);
91447636
A
1212 }
1213 break;
1214
1215 case FWRITE:
ebb1b9f4
A
1216 if (wpipe)
1217 wpipe->pipe_state |= PIPE_WSELECT;
b0d623f7 1218 if (wpipe == NULL || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) ||
91447636 1219 (((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
316670eb 1220 (MAX_PIPESIZE(wpipe) - wpipe->pipe_buffer.cnt) > 0)) {
91447636
A
1221
1222 retnum = 1;
1223 } else {
1224 wpipe->pipe_state |= PIPE_SEL;
2d21ac55 1225 selrecord(vfs_context_proc(ctx), &wpipe->pipe_sel, wql);
91447636
A
1226 }
1227 break;
1228 case 0:
1229 rpipe->pipe_state |= PIPE_SEL;
2d21ac55 1230 selrecord(vfs_context_proc(ctx), &rpipe->pipe_sel, wql);
91447636
A
1231 break;
1232 }
1233 PIPE_UNLOCK(rpipe);
1234
1235 return (retnum);
1236}
1237
1238
1239/* ARGSUSED 1 */
1240static int
2d21ac55 1241pipe_close(struct fileglob *fg, __unused vfs_context_t ctx)
91447636
A
1242{
1243 struct pipe *cpipe;
1244
2d21ac55 1245 proc_fdlock_spin(vfs_context_proc(ctx));
91447636
A
1246 cpipe = (struct pipe *)fg->fg_data;
1247 fg->fg_data = NULL;
2d21ac55 1248 proc_fdunlock(vfs_context_proc(ctx));
91447636
A
1249 if (cpipe)
1250 pipeclose(cpipe);
1251
1252 return (0);
1253}
1254
1255static void
1256pipe_free_kmem(struct pipe *cpipe)
1257{
91447636 1258 if (cpipe->pipe_buffer.buffer != NULL) {
b0d623f7
A
1259 OSAddAtomic(-(cpipe->pipe_buffer.size), &amountpipekva);
1260 OSAddAtomic(-1, &amountpipes);
316670eb 1261 kfree((void *)cpipe->pipe_buffer.buffer,
91447636
A
1262 cpipe->pipe_buffer.size);
1263 cpipe->pipe_buffer.buffer = NULL;
316670eb 1264 cpipe->pipe_buffer.size = 0;
91447636 1265 }
ebb1b9f4
A
1266}
1267
91447636
A
1268/*
1269 * shutdown the pipe
1270 */
1271static void
1272pipeclose(struct pipe *cpipe)
1273{
1274 struct pipe *ppipe;
1275
1276 if (cpipe == NULL)
1277 return;
91447636
A
1278 /* partially created pipes won't have a valid mutex. */
1279 if (PIPE_MTX(cpipe) != NULL)
1280 PIPE_LOCK(cpipe);
1281
91447636
A
1282
1283 /*
1284 * If the other side is blocked, wake it up saying that
1285 * we want to close it down.
1286 */
b0d623f7 1287 cpipe->pipe_state &= ~PIPE_DRAIN;
2d21ac55
A
1288 cpipe->pipe_state |= PIPE_EOF;
1289 pipeselwakeup(cpipe, cpipe);
1290
91447636 1291 while (cpipe->pipe_busy) {
2d21ac55 1292 cpipe->pipe_state |= PIPE_WANT;
91447636
A
1293
1294 wakeup(cpipe);
91447636
A
1295 msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0);
1296 }
1297
2d21ac55
A
1298#if CONFIG_MACF
1299 /*
1300 * Free the shared pipe label only after the two ends are disconnected.
1301 */
91447636 1302 if (cpipe->pipe_label != NULL && cpipe->pipe_peer == NULL)
2d21ac55 1303 mac_pipe_label_destroy(cpipe);
91447636
A
1304#endif
1305
1306 /*
1307 * Disconnect from peer
1308 */
1309 if ((ppipe = cpipe->pipe_peer) != NULL) {
1310
b0d623f7 1311 ppipe->pipe_state &= ~(PIPE_DRAIN);
91447636
A
1312 ppipe->pipe_state |= PIPE_EOF;
1313
1314 pipeselwakeup(ppipe, ppipe);
1315 wakeup(ppipe);
1316
1317 if (cpipe->pipe_state & PIPE_KNOTE)
1318 KNOTE(&ppipe->pipe_sel.si_note, 1);
1319
1320 postpipeevent(ppipe, EV_RCLOSED);
1321
1322 ppipe->pipe_peer = NULL;
1323 }
1324 evpipefree(cpipe);
1325
1326 /*
1327 * free resources
1328 */
1329 if (PIPE_MTX(cpipe) != NULL) {
1330 if (ppipe != NULL) {
1331 /*
1332 * since the mutex is shared and the peer is still
1333 * alive, we need to release the mutex, not free it
1334 */
1335 PIPE_UNLOCK(cpipe);
1336 } else {
1337 /*
1338 * peer is gone, so we're the sole party left with
1339 * interest in this mutex... we can just free it
1340 */
1341 lck_mtx_free(PIPE_MTX(cpipe), pipe_mtx_grp);
1342 }
1343 }
1344 pipe_free_kmem(cpipe);
ebb1b9f4
A
1345 if (cpipe->pipe_state & PIPE_WSELECT) {
1346 pipe_garbage_collect(cpipe);
1347 } else {
1348 zfree(pipe_zone, cpipe);
1349 pipe_garbage_collect(NULL);
1350 }
316670eb 1351
91447636
A
1352}
1353
91447636
A
1354/*ARGSUSED*/
1355static int
2d21ac55 1356pipe_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused vfs_context_t ctx)
91447636
A
1357{
1358 struct pipe *cpipe;
1359
1360 cpipe = (struct pipe *)kn->kn_fp->f_data;
1361
1362 PIPE_LOCK(cpipe);
2d21ac55
A
1363#if CONFIG_MACF
1364 /*
1365 * XXX We should use a per thread credential here; minimally, the
1366 * XXX process credential should have a persistent reference on it
1367 * XXX before being passed in here.
1368 */
1369 if (mac_pipe_check_kqfilter(vfs_context_ucred(ctx), kn, cpipe) != 0) {
1370 PIPE_UNLOCK(cpipe);
1371 return (1);
1372 }
1373#endif
91447636
A
1374
1375 switch (kn->kn_filter) {
1376 case EVFILT_READ:
1377 kn->kn_fop = &pipe_rfiltops;
2d21ac55 1378
91447636
A
1379 break;
1380 case EVFILT_WRITE:
1381 kn->kn_fop = &pipe_wfiltops;
1382
1383 if (cpipe->pipe_peer == NULL) {
1384 /*
1385 * other end of pipe has been closed
1386 */
1387 PIPE_UNLOCK(cpipe);
1388 return (EPIPE);
1389 }
2d21ac55 1390 if (cpipe->pipe_peer)
91447636
A
1391 cpipe = cpipe->pipe_peer;
1392 break;
1393 default:
1394 PIPE_UNLOCK(cpipe);
1395 return (1);
1396 }
1397
1398 if (KNOTE_ATTACH(&cpipe->pipe_sel.si_note, kn))
1399 cpipe->pipe_state |= PIPE_KNOTE;
1400
1401 PIPE_UNLOCK(cpipe);
1402 return (0);
1403}
1404
1405static void
1406filt_pipedetach(struct knote *kn)
1407{
1408 struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data;
1409
1410 PIPE_LOCK(cpipe);
1411
1412 if (kn->kn_filter == EVFILT_WRITE) {
1413 if (cpipe->pipe_peer == NULL) {
1414 PIPE_UNLOCK(cpipe);
1415 return;
1416 }
1417 cpipe = cpipe->pipe_peer;
1418 }
1419 if (cpipe->pipe_state & PIPE_KNOTE) {
1420 if (KNOTE_DETACH(&cpipe->pipe_sel.si_note, kn))
1421 cpipe->pipe_state &= ~PIPE_KNOTE;
1422 }
1423 PIPE_UNLOCK(cpipe);
1424}
1425
1426/*ARGSUSED*/
1427static int
1428filt_piperead(struct knote *kn, long hint)
1429{
1430 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
1431 struct pipe *wpipe;
1432 int retval;
1433
1434 /*
1435 * if hint == 0, then we've been called from the kevent
1436 * world directly and do not currently hold the pipe mutex...
1437 * if hint == 1, we're being called back via the KNOTE post
1438 * we made in pipeselwakeup, and we already hold the mutex...
1439 */
1440 if (hint == 0)
1441 PIPE_LOCK(rpipe);
1442
1443 wpipe = rpipe->pipe_peer;
1444 kn->kn_data = rpipe->pipe_buffer.cnt;
b0d623f7
A
1445 if ((rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) ||
1446 (wpipe == NULL) || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
91447636
A
1447 kn->kn_flags |= EV_EOF;
1448 retval = 1;
2d21ac55 1449 } else {
6d2010ae
A
1450 int64_t lowwat = 1;
1451 if (kn->kn_sfflags & NOTE_LOWAT) {
316670eb
A
1452 if (rpipe->pipe_buffer.size && kn->kn_sdata > MAX_PIPESIZE(rpipe))
1453 lowwat = MAX_PIPESIZE(rpipe);
6d2010ae
A
1454 else if (kn->kn_sdata > lowwat)
1455 lowwat = kn->kn_sdata;
1456 }
1457 retval = kn->kn_data >= lowwat;
2d21ac55 1458 }
91447636
A
1459
1460 if (hint == 0)
1461 PIPE_UNLOCK(rpipe);
1462
1463 return (retval);
1464}
1465
1466/*ARGSUSED*/
1467static int
1468filt_pipewrite(struct knote *kn, long hint)
1469{
1470 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
1471 struct pipe *wpipe;
1472
1473 /*
1474 * if hint == 0, then we've been called from the kevent
1475 * world directly and do not currently hold the pipe mutex...
1476 * if hint == 1, we're being called back via the KNOTE post
1477 * we made in pipeselwakeup, and we already hold the mutex...
1478 */
1479 if (hint == 0)
1480 PIPE_LOCK(rpipe);
1481
1482 wpipe = rpipe->pipe_peer;
1483
b0d623f7 1484 if ((wpipe == NULL) || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
91447636
A
1485 kn->kn_data = 0;
1486 kn->kn_flags |= EV_EOF;
1487
1488 if (hint == 0)
1489 PIPE_UNLOCK(rpipe);
1490 return (1);
1491 }
316670eb 1492 kn->kn_data = MAX_PIPESIZE(wpipe) - wpipe->pipe_buffer.cnt;
91447636 1493
6d2010ae
A
1494 int64_t lowwat = PIPE_BUF;
1495 if (kn->kn_sfflags & NOTE_LOWAT) {
316670eb
A
1496 if (wpipe->pipe_buffer.size && kn->kn_sdata > MAX_PIPESIZE(wpipe))
1497 lowwat = MAX_PIPESIZE(wpipe);
6d2010ae
A
1498 else if (kn->kn_sdata > lowwat)
1499 lowwat = kn->kn_sdata;
1500 }
1501
91447636
A
1502 if (hint == 0)
1503 PIPE_UNLOCK(rpipe);
1504
6d2010ae 1505 return (kn->kn_data >= lowwat);
91447636 1506}
0c530ab8
A
1507
1508int
1509fill_pipeinfo(struct pipe * cpipe, struct pipe_info * pinfo)
1510{
2d21ac55 1511#if CONFIG_MACF
0c530ab8
A
1512 int error;
1513#endif
1514 struct timeval now;
2d21ac55
A
1515 struct vinfo_stat * ub;
1516 int pipe_size = 0;
1517 int pipe_count;
0c530ab8
A
1518
1519 if (cpipe == NULL)
1520 return (EBADF);
0c530ab8 1521 PIPE_LOCK(cpipe);
2d21ac55
A
1522
1523#if CONFIG_MACF
1524 error = mac_pipe_check_stat(kauth_cred_get(), cpipe);
1525 if (error) {
1526 PIPE_UNLOCK(cpipe);
0c530ab8 1527 return (error);
2d21ac55 1528 }
0c530ab8
A
1529#endif
1530 if (cpipe->pipe_buffer.buffer == 0) {
1531 /*
1532 * must be stat'ing the write fd
1533 */
2d21ac55
A
1534 if (cpipe->pipe_peer) {
1535 /*
1536 * the peer still exists, use it's info
1537 */
316670eb 1538 pipe_size = MAX_PIPESIZE(cpipe->pipe_peer);
2d21ac55
A
1539 pipe_count = cpipe->pipe_peer->pipe_buffer.cnt;
1540 } else {
1541 pipe_count = 0;
1542 }
1543 } else {
316670eb 1544 pipe_size = MAX_PIPESIZE(cpipe);
2d21ac55 1545 pipe_count = cpipe->pipe_buffer.cnt;
0c530ab8 1546 }
2d21ac55
A
1547 /*
1548 * since peer's buffer is setup ouside of lock
1549 * we might catch it in transient state
1550 */
1551 if (pipe_size == 0)
1552 pipe_size = PIPE_SIZE;
0c530ab8
A
1553
1554 ub = &pinfo->pipe_stat;
1555
1556 bzero(ub, sizeof(*ub));
2d21ac55
A
1557 ub->vst_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
1558 ub->vst_blksize = pipe_size;
1559 ub->vst_size = pipe_count;
1560 if (ub->vst_blksize != 0)
1561 ub->vst_blocks = (ub->vst_size + ub->vst_blksize - 1) / ub->vst_blksize;
1562 ub->vst_nlink = 1;
0c530ab8 1563
2d21ac55
A
1564 ub->vst_uid = kauth_getuid();
1565 ub->vst_gid = kauth_getgid();
0c530ab8
A
1566
1567 microtime(&now);
2d21ac55
A
1568 ub->vst_atime = now.tv_sec;
1569 ub->vst_atimensec = now.tv_usec * 1000;
0c530ab8 1570
2d21ac55
A
1571 ub->vst_mtime = now.tv_sec;
1572 ub->vst_mtimensec = now.tv_usec * 1000;
0c530ab8 1573
2d21ac55
A
1574 ub->vst_ctime = now.tv_sec;
1575 ub->vst_ctimensec = now.tv_usec * 1000;
0c530ab8
A
1576
1577 /*
1578 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen, st_uid, st_gid.
1579 * XXX (st_dev, st_ino) should be unique.
1580 */
1581
1582 pinfo->pipe_handle = (uint64_t)((uintptr_t)cpipe);
1583 pinfo->pipe_peerhandle = (uint64_t)((uintptr_t)(cpipe->pipe_peer));
1584 pinfo->pipe_status = cpipe->pipe_state;
2d21ac55
A
1585
1586 PIPE_UNLOCK(cpipe);
1587
0c530ab8
A
1588 return (0);
1589}
b0d623f7
A
1590
1591
1592static int
1593pipe_drain(struct fileproc *fp, __unused vfs_context_t ctx)
1594{
1595
1596 /* Note: fdlock already held */
1597 struct pipe *ppipe, *cpipe = (struct pipe *)(fp->f_fglob->fg_data);
1598
1599 if (cpipe) {
1600 PIPE_LOCK(cpipe);
1601 cpipe->pipe_state |= PIPE_DRAIN;
1602 cpipe->pipe_state &= ~(PIPE_WANTR | PIPE_WANTW);
1603 wakeup(cpipe);
1604
1605 /* Must wake up peer: a writer sleeps on the read side */
1606 if ((ppipe = cpipe->pipe_peer)) {
1607 ppipe->pipe_state |= PIPE_DRAIN;
1608 ppipe->pipe_state &= ~(PIPE_WANTR | PIPE_WANTW);
1609 wakeup(ppipe);
1610 }
1611
1612 PIPE_UNLOCK(cpipe);
1613 return 0;
1614 }
1615
1616 return 1;
1617}
1618
1619
316670eb
A
1620 /*
1621 * When a thread sets a write-select on a pipe, it creates an implicit,
1622 * untracked dependency between that thread and the peer of the pipe
1623 * on which the select is set. If the peer pipe is closed and freed
1624 * before the select()ing thread wakes up, the system will panic as
1625 * it attempts to unwind the dangling select(). To avoid that panic,
1626 * we notice whenever a dangerous select() is set on a pipe, and
1627 * defer the final deletion of the pipe until that select()s are all
1628 * resolved. Since we can't currently detect exactly when that
1629 * resolution happens, we use a simple garbage collection queue to
1630 * reap the at-risk pipes 'later'.
1631 */
1632static void
1633pipe_garbage_collect(struct pipe *cpipe)
1634{
1635 uint64_t old, now;
1636 struct pipe_garbage *pgp;
1637
1638 /* Convert msecs to nsecs and then to abstime */
1639 old = pipe_garbage_age_limit * 1000000;
1640 nanoseconds_to_absolutetime(old, &old);
1641
1642 lck_mtx_lock(pipe_garbage_lock);
1643
1644 /* Free anything that's been on the queue for <mumble> seconds */
1645 now = mach_absolute_time();
1646 old = now - old;
1647 while ((pgp = pipe_garbage_head) && pgp->pg_timestamp < old) {
1648 pipe_garbage_head = pgp->pg_next;
1649 if (pipe_garbage_head == NULL)
1650 pipe_garbage_tail = NULL;
1651 pipe_garbage_count--;
1652 zfree(pipe_zone, pgp->pg_pipe);
1653 zfree(pipe_garbage_zone, pgp);
1654 }
1655
1656 /* Add the new pipe (if any) to the tail of the garbage queue */
1657 if (cpipe) {
1658 cpipe->pipe_state = PIPE_DEAD;
1659 pgp = (struct pipe_garbage *)zalloc(pipe_garbage_zone);
1660 if (pgp == NULL) {
1661 /*
1662 * We're too low on memory to garbage collect the
1663 * pipe. Freeing it runs the risk of panicing the
1664 * system. All we can do is leak it and leave
1665 * a breadcrumb behind. The good news, such as it
1666 * is, is that this will probably never happen.
1667 * We will probably hit the panic below first.
1668 */
1669 printf("Leaking pipe %p - no room left in the queue",
1670 cpipe);
1671 lck_mtx_unlock(pipe_garbage_lock);
1672 return;
1673 }
1674
1675 pgp->pg_pipe = cpipe;
1676 pgp->pg_timestamp = now;
1677 pgp->pg_next = NULL;
b0d623f7 1678
316670eb
A
1679 if (pipe_garbage_tail)
1680 pipe_garbage_tail->pg_next = pgp;
1681 pipe_garbage_tail = pgp;
1682 if (pipe_garbage_head == NULL)
1683 pipe_garbage_head = pipe_garbage_tail;
b0d623f7 1684
316670eb
A
1685 if (pipe_garbage_count++ >= PIPE_GARBAGE_QUEUE_LIMIT)
1686 panic("Length of pipe garbage queue exceeded %d",
1687 PIPE_GARBAGE_QUEUE_LIMIT);
1688 }
1689 lck_mtx_unlock(pipe_garbage_lock);
1690}
b0d623f7 1691