]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/sys_pipe.c
xnu-2422.115.4.tar.gz
[apple/xnu.git] / bsd / kern / sys_pipe.c
CommitLineData
91447636
A
1/*
2 * Copyright (c) 1996 John S. Dyson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice immediately at the beginning of the file, without modification,
10 * this list of conditions, and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. Absolutely no warranty of function or purpose is made by the author
15 * John S. Dyson.
16 * 4. Modifications may be freely made to this file if the above conditions
17 * are met.
18 */
19/*
2d21ac55 20 * Copyright (c) 2003-2007 Apple Inc. All rights reserved.
91447636 21 *
2d21ac55 22 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
91447636 23 *
2d21ac55
A
24 * This file contains Original Code and/or Modifications of Original Code
25 * as defined in and that are subject to the Apple Public Source License
26 * Version 2.0 (the 'License'). You may not use this file except in
27 * compliance with the License. The rights granted to you under the License
28 * may not be used to create, or enable the creation or redistribution of,
29 * unlawful or unlicensed copies of an Apple operating system, or to
30 * circumvent, violate, or enable the circumvention or violation of, any
31 * terms of an Apple operating system software license agreement.
8f6c56a5 32 *
2d21ac55
A
33 * Please obtain a copy of the License at
34 * http://www.opensource.apple.com/apsl/ and read it before using this file.
35 *
36 * The Original Code and all software distributed under the License are
37 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
38 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
39 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
40 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
41 * Please see the License for the specific language governing rights and
42 * limitations under the License.
8f6c56a5 43 *
2d21ac55
A
44 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
45 */
46/*
47 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
48 * support for mandatory and extensible security protections. This notice
49 * is included in support of clause 2.2 (b) of the Apple Public License,
50 * Version 2.0.
91447636
A
51 */
52
53/*
54 * This file contains a high-performance replacement for the socket-based
55 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support
56 * all features of sockets, but does do everything that pipes normally
57 * do.
316670eb
A
58 *
59 * Pipes are implemented as circular buffers. Following are the valid states in pipes operations
60 *
61 * _________________________________
62 * 1. |_________________________________| r=w, c=0
63 *
64 * _________________________________
65 * 2. |__r:::::wc_______________________| r <= w , c > 0
66 *
67 * _________________________________
68 * 3. |::::wc_____r:::::::::::::::::::::| r>w , c > 0
69 *
70 * _________________________________
71 * 4. |:::::::wrc:::::::::::::::::::::::| w=r, c = Max size
72 *
73 *
74 * Nomenclature:-
75 * a-z define the steps in a program flow
76 * 1-4 are the states as defined aboe
77 * Action: is what file operation is done on the pipe
78 *
79 * Current:None Action: initialize with size M=200
80 * a. State 1 ( r=0, w=0, c=0)
81 *
82 * Current: a Action: write(100) (w < M)
83 * b. State 2 (r=0, w=100, c=100)
84 *
85 * Current: b Action: write(100) (w = M-w)
86 * c. State 4 (r=0,w=0,c=200)
87 *
88 * Current: b Action: read(70) ( r < c )
89 * d. State 2(r=70,w=100,c=30)
90 *
91 * Current: d Action: write(75) ( w < (m-w))
92 * e. State 2 (r=70,w=175,c=105)
93 *
94 * Current: d Action: write(110) ( w > (m-w))
95 * f. State 3 (r=70,w=10,c=140)
96 *
97 * Current: d Action: read(30) (r >= c )
98 * g. State 1 (r=100,w=100,c=0)
99 *
91447636
A
100 */
101
102/*
316670eb
A
103 * This code create half duplex pipe buffers for facilitating file like
104 * operations on pipes. The initial buffer is very small, but this can
105 * dynamically change to larger sizes based on usage. The buffer size is never
106 * reduced. The total amount of kernel memory used is governed by maxpipekva.
107 * In case of dynamic expansion limit is reached, the output thread is blocked
108 * until the pipe buffer empties enough to continue.
91447636
A
109 *
110 * In order to limit the resource use of pipes, two sysctls exist:
111 *
112 * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable
316670eb 113 * address space available to us in pipe_map.
91447636
A
114 *
115 * Memory usage may be monitored through the sysctls
316670eb 116 * kern.ipc.pipes, kern.ipc.pipekva.
91447636
A
117 *
118 */
119
120#include <sys/param.h>
121#include <sys/systm.h>
122#include <sys/filedesc.h>
123#include <sys/kernel.h>
124#include <sys/vnode.h>
125#include <sys/proc_internal.h>
126#include <sys/kauth.h>
127#include <sys/file_internal.h>
128#include <sys/stat.h>
129#include <sys/ioctl.h>
130#include <sys/fcntl.h>
131#include <sys/malloc.h>
132#include <sys/syslog.h>
133#include <sys/unistd.h>
134#include <sys/resourcevar.h>
135#include <sys/aio_kern.h>
136#include <sys/signalvar.h>
137#include <sys/pipe.h>
138#include <sys/sysproto.h>
0c530ab8 139#include <sys/proc_info.h>
91447636 140
b0d623f7 141#include <security/audit/audit.h>
91447636
A
142
143#include <sys/kdebug.h>
144
145#include <kern/zalloc.h>
316670eb 146#include <kern/kalloc.h>
91447636
A
147#include <vm/vm_kern.h>
148#include <libkern/OSAtomic.h>
149
150#define f_flag f_fglob->fg_flag
91447636
A
151#define f_msgcount f_fglob->fg_msgcount
152#define f_cred f_fglob->fg_cred
153#define f_ops f_fglob->fg_ops
154#define f_offset f_fglob->fg_offset
155#define f_data f_fglob->fg_data
91447636 156
91447636 157/*
316670eb 158 * interfaces to the outside world exported through file operations
91447636
A
159 */
160static int pipe_read(struct fileproc *fp, struct uio *uio,
2d21ac55 161 int flags, vfs_context_t ctx);
91447636 162static int pipe_write(struct fileproc *fp, struct uio *uio,
2d21ac55 163 int flags, vfs_context_t ctx);
2d21ac55 164static int pipe_close(struct fileglob *fg, vfs_context_t ctx);
2d21ac55
A
165static int pipe_select(struct fileproc *fp, int which, void * wql,
166 vfs_context_t ctx);
2d21ac55
A
167static int pipe_kqfilter(struct fileproc *fp, struct knote *kn,
168 vfs_context_t ctx);
2d21ac55
A
169static int pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data,
170 vfs_context_t ctx);
b0d623f7
A
171static int pipe_drain(struct fileproc *fp,vfs_context_t ctx);
172
39236c6e
A
173static const struct fileops pipeops = {
174 DTYPE_PIPE,
175 pipe_read,
176 pipe_write,
177 pipe_ioctl,
178 pipe_select,
179 pipe_close,
180 pipe_kqfilter,
181 pipe_drain
182};
91447636 183
91447636
A
184static void filt_pipedetach(struct knote *kn);
185static int filt_piperead(struct knote *kn, long hint);
186static int filt_pipewrite(struct knote *kn, long hint);
187
b0d623f7
A
188static struct filterops pipe_rfiltops = {
189 .f_isfd = 1,
190 .f_detach = filt_pipedetach,
191 .f_event = filt_piperead,
192};
316670eb 193
b0d623f7
A
194static struct filterops pipe_wfiltops = {
195 .f_isfd = 1,
196 .f_detach = filt_pipedetach,
197 .f_event = filt_pipewrite,
198};
91447636 199
316670eb
A
200static int nbigpipe; /* for compatibility sake. no longer used */
201static int amountpipes; /* total number of pipes in system */
202static int amountpipekva; /* total memory used by pipes */
91447636 203
39236c6e 204int maxpipekva __attribute__((used)) = PIPE_KVAMAX; /* allowing 16MB max. */
91447636
A
205
206#if PIPE_SYSCTLS
207SYSCTL_DECL(_kern_ipc);
208
6d2010ae 209SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RD|CTLFLAG_LOCKED,
91447636 210 &maxpipekva, 0, "Pipe KVA limit");
6d2010ae 211SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW|CTLFLAG_LOCKED,
91447636 212 &maxpipekvawired, 0, "Pipe KVA wired limit");
6d2010ae 213SYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD|CTLFLAG_LOCKED,
91447636 214 &amountpipes, 0, "Current # of pipes");
6d2010ae 215SYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD|CTLFLAG_LOCKED,
91447636 216 &nbigpipe, 0, "Current # of big pipes");
6d2010ae 217SYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD|CTLFLAG_LOCKED,
91447636 218 &amountpipekva, 0, "Pipe KVA usage");
6d2010ae 219SYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD|CTLFLAG_LOCKED,
91447636
A
220 &amountpipekvawired, 0, "Pipe wired KVA usage");
221#endif
222
91447636
A
223static void pipeclose(struct pipe *cpipe);
224static void pipe_free_kmem(struct pipe *cpipe);
225static int pipe_create(struct pipe **cpipep);
316670eb
A
226static int pipespace(struct pipe *cpipe, int size);
227static int choose_pipespace(unsigned long current, unsigned long expected);
228static int expand_pipespace(struct pipe *p, int target_size);
91447636 229static void pipeselwakeup(struct pipe *cpipe, struct pipe *spipe);
316670eb
A
230static __inline int pipeio_lock(struct pipe *cpipe, int catch);
231static __inline void pipeio_unlock(struct pipe *cpipe);
91447636
A
232
233extern int postpipeevent(struct pipe *, int);
234extern void evpipefree(struct pipe *cpipe);
235
91447636
A
236static lck_grp_t *pipe_mtx_grp;
237static lck_attr_t *pipe_mtx_attr;
238static lck_grp_attr_t *pipe_mtx_grp_attr;
239
240static zone_t pipe_zone;
241
316670eb
A
242#define MAX_PIPESIZE(pipe) ( MAX(PIPE_SIZE, (pipe)->pipe_buffer.size) )
243
ebb1b9f4
A
244#define PIPE_GARBAGE_AGE_LIMIT 5000 /* In milliseconds */
245#define PIPE_GARBAGE_QUEUE_LIMIT 32000
246
247struct pipe_garbage {
248 struct pipe *pg_pipe;
249 struct pipe_garbage *pg_next;
250 uint64_t pg_timestamp;
251};
252
253static zone_t pipe_garbage_zone;
254static struct pipe_garbage *pipe_garbage_head = NULL;
255static struct pipe_garbage *pipe_garbage_tail = NULL;
256static uint64_t pipe_garbage_age_limit = PIPE_GARBAGE_AGE_LIMIT;
257static int pipe_garbage_count = 0;
258static lck_mtx_t *pipe_garbage_lock;
316670eb 259static void pipe_garbage_collect(struct pipe *cpipe);
ebb1b9f4 260
91447636
A
261SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL);
262
316670eb 263/* initial setup done at time of sysinit */
91447636 264void
2d21ac55 265pipeinit(void)
91447636 266{
316670eb 267 nbigpipe=0;
ebb1b9f4 268 vm_size_t zone_size;
316670eb 269
ebb1b9f4
A
270 zone_size = 8192 * sizeof(struct pipe);
271 pipe_zone = zinit(sizeof(struct pipe), zone_size, 4096, "pipe zone");
91447636 272
316670eb
A
273
274 /* allocate lock group attribute and group for pipe mutexes */
91447636 275 pipe_mtx_grp_attr = lck_grp_attr_alloc_init();
91447636
A
276 pipe_mtx_grp = lck_grp_alloc_init("pipe", pipe_mtx_grp_attr);
277
316670eb 278 /* allocate the lock attribute for pipe mutexes */
91447636 279 pipe_mtx_attr = lck_attr_alloc_init();
ebb1b9f4
A
280
281 /*
282 * Set up garbage collection for dead pipes
283 */
284 zone_size = (PIPE_GARBAGE_QUEUE_LIMIT + 20) *
285 sizeof(struct pipe_garbage);
286 pipe_garbage_zone = (zone_t)zinit(sizeof(struct pipe_garbage),
287 zone_size, 4096, "pipe garbage zone");
288 pipe_garbage_lock = lck_mtx_alloc_init(pipe_mtx_grp, pipe_mtx_attr);
316670eb 289
91447636
A
290}
291
2d21ac55
A
292/* Bitmap for things to touch in pipe_touch() */
293#define PIPE_ATIME 0x00000001 /* time of last access */
294#define PIPE_MTIME 0x00000002 /* time of last modification */
295#define PIPE_CTIME 0x00000004 /* time of last status change */
296
297static void
298pipe_touch(struct pipe *tpipe, int touch)
299{
300 struct timeval now;
301
302 microtime(&now);
303
304 if (touch & PIPE_ATIME) {
305 tpipe->st_atimespec.tv_sec = now.tv_sec;
306 tpipe->st_atimespec.tv_nsec = now.tv_usec * 1000;
307 }
308
309 if (touch & PIPE_MTIME) {
310 tpipe->st_mtimespec.tv_sec = now.tv_sec;
311 tpipe->st_mtimespec.tv_nsec = now.tv_usec * 1000;
312 }
313
314 if (touch & PIPE_CTIME) {
315 tpipe->st_ctimespec.tv_sec = now.tv_sec;
316 tpipe->st_ctimespec.tv_nsec = now.tv_usec * 1000;
317 }
318}
319
316670eb
A
320static const unsigned int pipesize_blocks[] = {128,256,1024,2048,PAGE_SIZE, PAGE_SIZE * 2, PIPE_SIZE , PIPE_SIZE * 4 };
321
322/*
323 * finds the right size from possible sizes in pipesize_blocks
324 * returns the size which matches max(current,expected)
325 */
326static int
327choose_pipespace(unsigned long current, unsigned long expected)
328{
329 int i = sizeof(pipesize_blocks)/sizeof(unsigned int) -1;
330 unsigned long target;
331
332 if (expected > current)
333 target = expected;
334 else
335 target = current;
336
337 while ( i >0 && pipesize_blocks[i-1] > target) {
338 i=i-1;
339
340 }
341
342 return pipesize_blocks[i];
343}
91447636
A
344
345
316670eb
A
346/*
347 * expand the size of pipe while there is data to be read,
348 * and then free the old buffer once the current buffered
349 * data has been transferred to new storage.
350 * Required: PIPE_LOCK and io lock to be held by caller.
351 * returns 0 on success or no expansion possible
352 */
353static int
354expand_pipespace(struct pipe *p, int target_size)
355{
356 struct pipe tmp, oldpipe;
357 int error;
358 tmp.pipe_buffer.buffer = 0;
359
360 if (p->pipe_buffer.size >= (unsigned) target_size) {
361 return 0; /* the existing buffer is max size possible */
362 }
363
364 /* create enough space in the target */
365 error = pipespace(&tmp, target_size);
366 if (error != 0)
367 return (error);
368
369 oldpipe.pipe_buffer.buffer = p->pipe_buffer.buffer;
370 oldpipe.pipe_buffer.size = p->pipe_buffer.size;
371
372 memcpy(tmp.pipe_buffer.buffer, p->pipe_buffer.buffer, p->pipe_buffer.size);
373 if (p->pipe_buffer.cnt > 0 && p->pipe_buffer.in <= p->pipe_buffer.out ){
374 /* we are in State 3 and need extra copying for read to be consistent */
375 memcpy(&tmp.pipe_buffer.buffer[p->pipe_buffer.size], p->pipe_buffer.buffer, p->pipe_buffer.size);
376 p->pipe_buffer.in += p->pipe_buffer.size;
377 }
378
379 p->pipe_buffer.buffer = tmp.pipe_buffer.buffer;
380 p->pipe_buffer.size = tmp.pipe_buffer.size;
381
382
383 pipe_free_kmem(&oldpipe);
384 return 0;
385}
386
91447636
A
387/*
388 * The pipe system call for the DTYPE_PIPE type of pipes
316670eb
A
389 *
390 * returns:
391 * FREAD | fd0 | -->[struct rpipe] --> |~~buffer~~| \
392 * (pipe_mutex)
393 * FWRITE | fd1 | -->[struct wpipe] --X /
91447636
A
394 */
395
396/* ARGSUSED */
397int
b0d623f7 398pipe(proc_t p, __unused struct pipe_args *uap, int32_t *retval)
91447636
A
399{
400 struct fileproc *rf, *wf;
401 struct pipe *rpipe, *wpipe;
402 lck_mtx_t *pmtx;
403 int fd, error;
404
405 if ((pmtx = lck_mtx_alloc_init(pipe_mtx_grp, pipe_mtx_attr)) == NULL)
406 return (ENOMEM);
407
408 rpipe = wpipe = NULL;
409 if (pipe_create(&rpipe) || pipe_create(&wpipe)) {
410 error = ENFILE;
411 goto freepipes;
412 }
413 /*
414 * allocate the space for the normal I/O direction up
415 * front... we'll delay the allocation for the other
316670eb 416 * direction until a write actually occurs (most likely it won't)...
91447636 417 */
316670eb 418 error = pipespace(rpipe, choose_pipespace(rpipe->pipe_buffer.size, 0));
91447636
A
419 if (error)
420 goto freepipes;
421
91447636
A
422 TAILQ_INIT(&rpipe->pipe_evlist);
423 TAILQ_INIT(&wpipe->pipe_evlist);
424
2d21ac55 425 error = falloc(p, &rf, &fd, vfs_context_current());
91447636
A
426 if (error) {
427 goto freepipes;
428 }
429 retval[0] = fd;
430
431 /*
316670eb
A
432 * for now we'll create half-duplex pipes(refer returns section above).
433 * this is what we've always supported..
91447636
A
434 */
435 rf->f_flag = FREAD;
91447636
A
436 rf->f_data = (caddr_t)rpipe;
437 rf->f_ops = &pipeops;
438
2d21ac55 439 error = falloc(p, &wf, &fd, vfs_context_current());
91447636
A
440 if (error) {
441 fp_free(p, retval[0], rf);
442 goto freepipes;
443 }
444 wf->f_flag = FWRITE;
91447636
A
445 wf->f_data = (caddr_t)wpipe;
446 wf->f_ops = &pipeops;
447
6601e61a
A
448 rpipe->pipe_peer = wpipe;
449 wpipe->pipe_peer = rpipe;
316670eb
A
450 /* both structures share the same mutex */
451 rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx;
2d21ac55 452
91447636 453 retval[1] = fd;
2d21ac55 454#if CONFIG_MACF
91447636
A
455 /*
456 * XXXXXXXX SHOULD NOT HOLD FILE_LOCK() XXXXXXXXXXXX
457 *
458 * struct pipe represents a pipe endpoint. The MAC label is shared
2d21ac55
A
459 * between the connected endpoints. As a result mac_pipe_label_init() and
460 * mac_pipe_label_associate() should only be called on one of the endpoints
91447636
A
461 * after they have been connected.
462 */
2d21ac55
A
463 mac_pipe_label_init(rpipe);
464 mac_pipe_label_associate(kauth_cred_get(), rpipe);
465 wpipe->pipe_label = rpipe->pipe_label;
91447636 466#endif
2d21ac55 467 proc_fdlock_spin(p);
6601e61a
A
468 procfdtbl_releasefd(p, retval[0], NULL);
469 procfdtbl_releasefd(p, retval[1], NULL);
91447636
A
470 fp_drop(p, retval[0], rf, 1);
471 fp_drop(p, retval[1], wf, 1);
472 proc_fdunlock(p);
473
91447636
A
474
475 return (0);
476
477freepipes:
478 pipeclose(rpipe);
479 pipeclose(wpipe);
480 lck_mtx_free(pmtx, pipe_mtx_grp);
481
482 return (error);
483}
484
91447636 485int
2d21ac55 486pipe_stat(struct pipe *cpipe, void *ub, int isstat64)
91447636 487{
2d21ac55 488#if CONFIG_MACF
91447636
A
489 int error;
490#endif
2d21ac55
A
491 int pipe_size = 0;
492 int pipe_count;
493 struct stat *sb = (struct stat *)0; /* warning avoidance ; protected by isstat64 */
494 struct stat64 * sb64 = (struct stat64 *)0; /* warning avoidance ; protected by isstat64 */
91447636
A
495
496 if (cpipe == NULL)
497 return (EBADF);
91447636 498 PIPE_LOCK(cpipe);
2d21ac55
A
499
500#if CONFIG_MACF
501 error = mac_pipe_check_stat(kauth_cred_get(), cpipe);
502 if (error) {
503 PIPE_UNLOCK(cpipe);
91447636 504 return (error);
2d21ac55 505 }
91447636
A
506#endif
507 if (cpipe->pipe_buffer.buffer == 0) {
316670eb 508 /* must be stat'ing the write fd */
2d21ac55 509 if (cpipe->pipe_peer) {
316670eb
A
510 /* the peer still exists, use it's info */
511 pipe_size = MAX_PIPESIZE(cpipe->pipe_peer);
2d21ac55
A
512 pipe_count = cpipe->pipe_peer->pipe_buffer.cnt;
513 } else {
514 pipe_count = 0;
515 }
516 } else {
316670eb 517 pipe_size = MAX_PIPESIZE(cpipe);
2d21ac55 518 pipe_count = cpipe->pipe_buffer.cnt;
91447636 519 }
2d21ac55
A
520 /*
521 * since peer's buffer is setup ouside of lock
522 * we might catch it in transient state
523 */
524 if (pipe_size == 0)
316670eb 525 pipe_size = MAX(PIPE_SIZE, pipesize_blocks[0]);
91447636 526
2d21ac55
A
527 if (isstat64 != 0) {
528 sb64 = (struct stat64 *)ub;
91447636 529
2d21ac55
A
530 bzero(sb64, sizeof(*sb64));
531 sb64->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
532 sb64->st_blksize = pipe_size;
533 sb64->st_size = pipe_count;
534 sb64->st_blocks = (sb64->st_size + sb64->st_blksize - 1) / sb64->st_blksize;
535
536 sb64->st_uid = kauth_getuid();
537 sb64->st_gid = kauth_getgid();
538
539 sb64->st_atimespec.tv_sec = cpipe->st_atimespec.tv_sec;
540 sb64->st_atimespec.tv_nsec = cpipe->st_atimespec.tv_nsec;
541
542 sb64->st_mtimespec.tv_sec = cpipe->st_mtimespec.tv_sec;
543 sb64->st_mtimespec.tv_nsec = cpipe->st_mtimespec.tv_nsec;
91447636 544
2d21ac55
A
545 sb64->st_ctimespec.tv_sec = cpipe->st_ctimespec.tv_sec;
546 sb64->st_ctimespec.tv_nsec = cpipe->st_ctimespec.tv_nsec;
91447636 547
2d21ac55
A
548 /*
549 * Return a relatively unique inode number based on the current
550 * address of this pipe's struct pipe. This number may be recycled
551 * relatively quickly.
552 */
316670eb 553 sb64->st_ino = (ino64_t)VM_KERNEL_ADDRPERM((uintptr_t)cpipe);
2d21ac55
A
554 } else {
555 sb = (struct stat *)ub;
556
557 bzero(sb, sizeof(*sb));
558 sb->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
559 sb->st_blksize = pipe_size;
560 sb->st_size = pipe_count;
561 sb->st_blocks = (sb->st_size + sb->st_blksize - 1) / sb->st_blksize;
562
563 sb->st_uid = kauth_getuid();
564 sb->st_gid = kauth_getgid();
565
566 sb->st_atimespec.tv_sec = cpipe->st_atimespec.tv_sec;
567 sb->st_atimespec.tv_nsec = cpipe->st_atimespec.tv_nsec;
568
569 sb->st_mtimespec.tv_sec = cpipe->st_mtimespec.tv_sec;
570 sb->st_mtimespec.tv_nsec = cpipe->st_mtimespec.tv_nsec;
571
572 sb->st_ctimespec.tv_sec = cpipe->st_ctimespec.tv_sec;
573 sb->st_ctimespec.tv_nsec = cpipe->st_ctimespec.tv_nsec;
574
575 /*
576 * Return a relatively unique inode number based on the current
577 * address of this pipe's struct pipe. This number may be recycled
578 * relatively quickly.
579 */
316670eb 580 sb->st_ino = (ino_t)VM_KERNEL_ADDRPERM((uintptr_t)cpipe);
2d21ac55
A
581 }
582 PIPE_UNLOCK(cpipe);
91447636
A
583
584 /*
2d21ac55
A
585 * POSIX: Left as 0: st_dev, st_nlink, st_rdev, st_flags, st_gen,
586 * st_uid, st_gid.
587 *
588 * XXX (st_dev) should be unique, but there is no device driver that
589 * XXX is associated with pipes, since they are implemented via a
590 * XXX struct fileops indirection rather than as FS objects.
91447636
A
591 */
592 return (0);
593}
594
595
596/*
597 * Allocate kva for pipe circular buffer, the space is pageable
598 * This routine will 'realloc' the size of a pipe safely, if it fails
599 * it will retain the old buffer.
600 * If it fails it will return ENOMEM.
601 */
602static int
603pipespace(struct pipe *cpipe, int size)
604{
605 vm_offset_t buffer;
606
316670eb
A
607 if (size <= 0)
608 return(EINVAL);
91447636 609
316670eb
A
610 if ((buffer = (vm_offset_t)kalloc(size)) == 0 )
611 return(ENOMEM);
91447636
A
612
613 /* free old resources if we're resizing */
614 pipe_free_kmem(cpipe);
615 cpipe->pipe_buffer.buffer = (caddr_t)buffer;
616 cpipe->pipe_buffer.size = size;
617 cpipe->pipe_buffer.in = 0;
618 cpipe->pipe_buffer.out = 0;
619 cpipe->pipe_buffer.cnt = 0;
620
b0d623f7
A
621 OSAddAtomic(1, &amountpipes);
622 OSAddAtomic(cpipe->pipe_buffer.size, &amountpipekva);
91447636
A
623
624 return (0);
625}
626
627/*
628 * initialize and allocate VM and memory for pipe
629 */
630static int
631pipe_create(struct pipe **cpipep)
632{
633 struct pipe *cpipe;
91447636
A
634 cpipe = (struct pipe *)zalloc(pipe_zone);
635
636 if ((*cpipep = cpipe) == NULL)
637 return (ENOMEM);
638
639 /*
640 * protect so pipespace or pipeclose don't follow a junk pointer
641 * if pipespace() fails.
642 */
643 bzero(cpipe, sizeof *cpipe);
644
2d21ac55
A
645 /* Initial times are all the time of creation of the pipe */
646 pipe_touch(cpipe, PIPE_ATIME | PIPE_MTIME | PIPE_CTIME);
91447636
A
647 return (0);
648}
649
650
651/*
652 * lock a pipe for I/O, blocking other access
653 */
2d21ac55 654static inline int
316670eb 655pipeio_lock(struct pipe *cpipe, int catch)
91447636
A
656{
657 int error;
91447636
A
658 while (cpipe->pipe_state & PIPE_LOCKFL) {
659 cpipe->pipe_state |= PIPE_LWANT;
91447636
A
660 error = msleep(cpipe, PIPE_MTX(cpipe), catch ? (PRIBIO | PCATCH) : PRIBIO,
661 "pipelk", 0);
662 if (error != 0)
663 return (error);
664 }
665 cpipe->pipe_state |= PIPE_LOCKFL;
91447636
A
666 return (0);
667}
668
669/*
670 * unlock a pipe I/O lock
671 */
2d21ac55 672static inline void
316670eb 673pipeio_unlock(struct pipe *cpipe)
91447636 674{
91447636 675 cpipe->pipe_state &= ~PIPE_LOCKFL;
91447636
A
676 if (cpipe->pipe_state & PIPE_LWANT) {
677 cpipe->pipe_state &= ~PIPE_LWANT;
678 wakeup(cpipe);
679 }
680}
681
316670eb
A
682/*
683 * wakeup anyone whos blocked in select
684 */
91447636 685static void
2d21ac55 686pipeselwakeup(struct pipe *cpipe, struct pipe *spipe)
91447636 687{
91447636
A
688 if (cpipe->pipe_state & PIPE_SEL) {
689 cpipe->pipe_state &= ~PIPE_SEL;
690 selwakeup(&cpipe->pipe_sel);
691 }
692 if (cpipe->pipe_state & PIPE_KNOTE)
693 KNOTE(&cpipe->pipe_sel.si_note, 1);
694
695 postpipeevent(cpipe, EV_RWBYTES);
696
697 if (spipe && (spipe->pipe_state & PIPE_ASYNC) && spipe->pipe_pgid) {
91447636
A
698 if (spipe->pipe_pgid < 0)
699 gsignal(-spipe->pipe_pgid, SIGIO);
2d21ac55
A
700 else
701 proc_signal(spipe->pipe_pgid, SIGIO);
91447636
A
702 }
703}
704
316670eb
A
705/*
706 * Read n bytes from the buffer. Semantics are similar to file read.
707 * returns: number of bytes read from the buffer
708 */
91447636
A
709/* ARGSUSED */
710static int
2d21ac55
A
711pipe_read(struct fileproc *fp, struct uio *uio, __unused int flags,
712 __unused vfs_context_t ctx)
91447636
A
713{
714 struct pipe *rpipe = (struct pipe *)fp->f_data;
715 int error;
716 int nread = 0;
717 u_int size;
718
719 PIPE_LOCK(rpipe);
720 ++rpipe->pipe_busy;
721
316670eb 722 error = pipeio_lock(rpipe, 1);
91447636
A
723 if (error)
724 goto unlocked_error;
725
2d21ac55
A
726#if CONFIG_MACF
727 error = mac_pipe_check_read(kauth_cred_get(), rpipe);
91447636
A
728 if (error)
729 goto locked_error;
730#endif
731
316670eb 732
91447636
A
733 while (uio_resid(uio)) {
734 /*
735 * normal pipe buffer receive
736 */
737 if (rpipe->pipe_buffer.cnt > 0) {
316670eb
A
738 /*
739 * # bytes to read is min( bytes from read pointer until end of buffer,
740 * total unread bytes,
741 * user requested byte count)
742 */
91447636
A
743 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
744 if (size > rpipe->pipe_buffer.cnt)
745 size = rpipe->pipe_buffer.cnt;
746 // LP64todo - fix this!
747 if (size > (u_int) uio_resid(uio))
748 size = (u_int) uio_resid(uio);
749
316670eb 750 PIPE_UNLOCK(rpipe); /* we still hold io lock.*/
91447636
A
751 error = uiomove(
752 &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
753 size, uio);
754 PIPE_LOCK(rpipe);
755 if (error)
756 break;
757
758 rpipe->pipe_buffer.out += size;
759 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size)
760 rpipe->pipe_buffer.out = 0;
761
762 rpipe->pipe_buffer.cnt -= size;
316670eb 763
91447636
A
764 /*
765 * If there is no more to read in the pipe, reset
766 * its pointers to the beginning. This improves
767 * cache hit stats.
768 */
769 if (rpipe->pipe_buffer.cnt == 0) {
770 rpipe->pipe_buffer.in = 0;
771 rpipe->pipe_buffer.out = 0;
772 }
773 nread += size;
91447636
A
774 } else {
775 /*
776 * detect EOF condition
777 * read returns 0 on EOF, no need to set error
778 */
b0d623f7 779 if (rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) {
91447636 780 break;
b0d623f7 781 }
91447636
A
782
783 /*
784 * If the "write-side" has been blocked, wake it up now.
785 */
786 if (rpipe->pipe_state & PIPE_WANTW) {
787 rpipe->pipe_state &= ~PIPE_WANTW;
788 wakeup(rpipe);
789 }
790
791 /*
316670eb 792 * Break if some data was read in previous iteration.
91447636
A
793 */
794 if (nread > 0)
795 break;
796
797 /*
798 * Unlock the pipe buffer for our remaining processing.
799 * We will either break out with an error or we will
800 * sleep and relock to loop.
801 */
316670eb 802 pipeio_unlock(rpipe);
91447636
A
803
804 /*
805 * Handle non-blocking mode operation or
806 * wait for more data.
807 */
808 if (fp->f_flag & FNONBLOCK) {
809 error = EAGAIN;
810 } else {
811 rpipe->pipe_state |= PIPE_WANTR;
91447636 812 error = msleep(rpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, "piperd", 0);
91447636 813 if (error == 0)
316670eb 814 error = pipeio_lock(rpipe, 1);
91447636
A
815 }
816 if (error)
817 goto unlocked_error;
818 }
819 }
2d21ac55 820#if CONFIG_MACF
91447636
A
821locked_error:
822#endif
316670eb 823 pipeio_unlock(rpipe);
91447636
A
824
825unlocked_error:
826 --rpipe->pipe_busy;
827
828 /*
829 * PIPE_WANT processing only makes sense if pipe_busy is 0.
830 */
831 if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) {
832 rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW);
833 wakeup(rpipe);
316670eb 834 } else if (rpipe->pipe_buffer.cnt < rpipe->pipe_buffer.size) {
91447636
A
835 /*
836 * Handle write blocking hysteresis.
837 */
838 if (rpipe->pipe_state & PIPE_WANTW) {
839 rpipe->pipe_state &= ~PIPE_WANTW;
840 wakeup(rpipe);
841 }
842 }
843
316670eb 844 if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) > 0)
91447636
A
845 pipeselwakeup(rpipe, rpipe->pipe_peer);
846
2d21ac55
A
847 /* update last read time */
848 pipe_touch(rpipe, PIPE_ATIME);
849
91447636
A
850 PIPE_UNLOCK(rpipe);
851
852 return (error);
853}
854
91447636 855/*
316670eb
A
856 * perform a write of n bytes into the read side of buffer. Since
857 * pipes are unidirectional a write is meant to be read by the otherside only.
91447636 858 */
91447636 859static int
2d21ac55
A
860pipe_write(struct fileproc *fp, struct uio *uio, __unused int flags,
861 __unused vfs_context_t ctx)
91447636
A
862{
863 int error = 0;
864 int orig_resid;
865 int pipe_size;
866 struct pipe *wpipe, *rpipe;
316670eb
A
867 // LP64todo - fix this!
868 orig_resid = uio_resid(uio);
869 int space;
91447636
A
870
871 rpipe = (struct pipe *)fp->f_data;
872
873 PIPE_LOCK(rpipe);
874 wpipe = rpipe->pipe_peer;
875
876 /*
877 * detect loss of pipe read side, issue SIGPIPE if lost.
878 */
b0d623f7 879 if (wpipe == NULL || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
91447636
A
880 PIPE_UNLOCK(rpipe);
881 return (EPIPE);
882 }
2d21ac55
A
883#if CONFIG_MACF
884 error = mac_pipe_check_write(kauth_cred_get(), wpipe);
91447636
A
885 if (error) {
886 PIPE_UNLOCK(rpipe);
887 return (error);
888 }
889#endif
890 ++wpipe->pipe_busy;
891
892 pipe_size = 0;
893
91447636 894 /*
316670eb
A
895 * need to allocate some storage... we delay the allocation
896 * until the first write on fd[0] to avoid allocating storage for both
897 * 'pipe ends'... most pipes are half-duplex with the writes targeting
898 * fd[1], so allocating space for both ends is a waste...
91447636 899 */
91447636 900
316670eb
A
901 if ( wpipe->pipe_buffer.buffer == 0 || (
902 (unsigned)orig_resid > wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt &&
903 amountpipekva < maxpipekva ) ) {
91447636 904
316670eb 905 pipe_size = choose_pipespace(wpipe->pipe_buffer.size, wpipe->pipe_buffer.cnt + orig_resid);
91447636
A
906 }
907 if (pipe_size) {
908 /*
909 * need to do initial allocation or resizing of pipe
316670eb 910 * holding both structure and io locks.
91447636 911 */
316670eb
A
912 if ((error = pipeio_lock(wpipe, 1)) == 0) {
913 if (wpipe->pipe_buffer.cnt == 0)
914 error = pipespace(wpipe, pipe_size);
915 else
916 error = expand_pipespace(wpipe, pipe_size);
917
918 pipeio_unlock(wpipe);
919
920 /* allocation failed */
921 if (wpipe->pipe_buffer.buffer == 0)
91447636 922 error = ENOMEM;
91447636
A
923 }
924 if (error) {
925 /*
926 * If an error occurred unbusy and return, waking up any pending
927 * readers.
928 */
929 --wpipe->pipe_busy;
930 if ((wpipe->pipe_busy == 0) &&
931 (wpipe->pipe_state & PIPE_WANT)) {
932 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
933 wakeup(wpipe);
934 }
935 PIPE_UNLOCK(rpipe);
936 return(error);
937 }
938 }
91447636
A
939
940 while (uio_resid(uio)) {
91447636 941
91447636 942 retrywrite:
91447636
A
943 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
944
316670eb 945 /* Writes of size <= PIPE_BUF must be atomic. */
91447636
A
946 if ((space < uio_resid(uio)) && (orig_resid <= PIPE_BUF))
947 space = 0;
948
949 if (space > 0) {
950
316670eb 951 if ((error = pipeio_lock(wpipe,1)) == 0) {
91447636
A
952 int size; /* Transfer size */
953 int segsize; /* first segment to transfer */
954
b0d623f7 955 if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) {
316670eb 956 pipeio_unlock(wpipe);
91447636
A
957 error = EPIPE;
958 break;
959 }
91447636 960 /*
316670eb 961 * If a process blocked in pipeio_lock, our
91447636
A
962 * value for space might be bad... the mutex
963 * is dropped while we're blocked
964 */
965 if (space > (int)(wpipe->pipe_buffer.size -
966 wpipe->pipe_buffer.cnt)) {
316670eb 967 pipeio_unlock(wpipe);
91447636
A
968 goto retrywrite;
969 }
970
971 /*
972 * Transfer size is minimum of uio transfer
973 * and free space in pipe buffer.
974 */
975 // LP64todo - fix this!
976 if (space > uio_resid(uio))
977 size = uio_resid(uio);
978 else
979 size = space;
980 /*
981 * First segment to transfer is minimum of
982 * transfer size and contiguous space in
983 * pipe buffer. If first segment to transfer
984 * is less than the transfer size, we've got
985 * a wraparound in the buffer.
986 */
987 segsize = wpipe->pipe_buffer.size -
988 wpipe->pipe_buffer.in;
989 if (segsize > size)
990 segsize = size;
991
992 /* Transfer first segment */
993
994 PIPE_UNLOCK(rpipe);
995 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
996 segsize, uio);
997 PIPE_LOCK(rpipe);
998
999 if (error == 0 && segsize < size) {
1000 /*
1001 * Transfer remaining part now, to
1002 * support atomic writes. Wraparound
316670eb 1003 * happened. (State 3)
91447636
A
1004 */
1005 if (wpipe->pipe_buffer.in + segsize !=
1006 wpipe->pipe_buffer.size)
1007 panic("Expected pipe buffer "
1008 "wraparound disappeared");
1009
1010 PIPE_UNLOCK(rpipe);
1011 error = uiomove(
1012 &wpipe->pipe_buffer.buffer[0],
1013 size - segsize, uio);
1014 PIPE_LOCK(rpipe);
1015 }
316670eb
A
1016 /*
1017 * readers never know to read until count is updated.
1018 */
91447636
A
1019 if (error == 0) {
1020 wpipe->pipe_buffer.in += size;
316670eb 1021 if (wpipe->pipe_buffer.in >
91447636
A
1022 wpipe->pipe_buffer.size) {
1023 if (wpipe->pipe_buffer.in !=
1024 size - segsize +
1025 wpipe->pipe_buffer.size)
1026 panic("Expected "
1027 "wraparound bad");
1028 wpipe->pipe_buffer.in = size -
1029 segsize;
1030 }
1031
1032 wpipe->pipe_buffer.cnt += size;
1033 if (wpipe->pipe_buffer.cnt >
1034 wpipe->pipe_buffer.size)
1035 panic("Pipe buffer overflow");
1036
1037 }
316670eb 1038 pipeio_unlock(wpipe);
91447636
A
1039 }
1040 if (error)
1041 break;
1042
1043 } else {
1044 /*
1045 * If the "read-side" has been blocked, wake it up now.
1046 */
1047 if (wpipe->pipe_state & PIPE_WANTR) {
1048 wpipe->pipe_state &= ~PIPE_WANTR;
1049 wakeup(wpipe);
1050 }
1051 /*
1052 * don't block on non-blocking I/O
1053 * we'll do the pipeselwakeup on the way out
1054 */
1055 if (fp->f_flag & FNONBLOCK) {
1056 error = EAGAIN;
1057 break;
1058 }
6d2010ae
A
1059
1060 /*
1061 * If read side wants to go away, we just issue a signal
1062 * to ourselves.
1063 */
1064 if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) {
1065 error = EPIPE;
1066 break;
1067 }
1068
91447636
A
1069 /*
1070 * We have no more space and have something to offer,
1071 * wake up select/poll.
1072 */
1073 pipeselwakeup(wpipe, wpipe);
1074
1075 wpipe->pipe_state |= PIPE_WANTW;
1076
1077 error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, "pipewr", 0);
1078
1079 if (error != 0)
1080 break;
91447636
A
1081 }
1082 }
1083 --wpipe->pipe_busy;
1084
1085 if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) {
1086 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
1087 wakeup(wpipe);
1088 }
1089 if (wpipe->pipe_buffer.cnt > 0) {
1090 /*
1091 * If there are any characters in the buffer, we wake up
1092 * the reader if it was blocked waiting for data.
1093 */
1094 if (wpipe->pipe_state & PIPE_WANTR) {
1095 wpipe->pipe_state &= ~PIPE_WANTR;
1096 wakeup(wpipe);
1097 }
1098 /*
1099 * wake up thread blocked in select/poll or post the notification
1100 */
1101 pipeselwakeup(wpipe, wpipe);
1102 }
2d21ac55
A
1103
1104 /* Update modification, status change (# of bytes in pipe) times */
1105 pipe_touch(rpipe, PIPE_MTIME | PIPE_CTIME);
1106 pipe_touch(wpipe, PIPE_MTIME | PIPE_CTIME);
91447636
A
1107 PIPE_UNLOCK(rpipe);
1108
1109 return (error);
1110}
1111
1112/*
1113 * we implement a very minimal set of ioctls for compatibility with sockets.
1114 */
1115/* ARGSUSED 3 */
1116static int
2d21ac55
A
1117pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data,
1118 __unused vfs_context_t ctx)
91447636
A
1119{
1120 struct pipe *mpipe = (struct pipe *)fp->f_data;
2d21ac55 1121#if CONFIG_MACF
91447636
A
1122 int error;
1123#endif
1124
1125 PIPE_LOCK(mpipe);
1126
2d21ac55
A
1127#if CONFIG_MACF
1128 error = mac_pipe_check_ioctl(kauth_cred_get(), mpipe, cmd);
91447636
A
1129 if (error) {
1130 PIPE_UNLOCK(mpipe);
1131
1132 return (error);
1133 }
1134#endif
1135
1136 switch (cmd) {
1137
1138 case FIONBIO:
1139 PIPE_UNLOCK(mpipe);
1140 return (0);
1141
1142 case FIOASYNC:
1143 if (*(int *)data) {
1144 mpipe->pipe_state |= PIPE_ASYNC;
1145 } else {
1146 mpipe->pipe_state &= ~PIPE_ASYNC;
1147 }
1148 PIPE_UNLOCK(mpipe);
1149 return (0);
1150
1151 case FIONREAD:
316670eb 1152 *(int *)data = mpipe->pipe_buffer.cnt;
91447636
A
1153 PIPE_UNLOCK(mpipe);
1154 return (0);
1155
1156 case TIOCSPGRP:
1157 mpipe->pipe_pgid = *(int *)data;
1158
1159 PIPE_UNLOCK(mpipe);
1160 return (0);
1161
1162 case TIOCGPGRP:
1163 *(int *)data = mpipe->pipe_pgid;
1164
1165 PIPE_UNLOCK(mpipe);
1166 return (0);
1167
1168 }
1169 PIPE_UNLOCK(mpipe);
1170 return (ENOTTY);
1171}
1172
1173
1174static int
2d21ac55 1175pipe_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
91447636
A
1176{
1177 struct pipe *rpipe = (struct pipe *)fp->f_data;
1178 struct pipe *wpipe;
1179 int retnum = 0;
1180
1181 if (rpipe == NULL || rpipe == (struct pipe *)-1)
1182 return (retnum);
1183
1184 PIPE_LOCK(rpipe);
1185
1186 wpipe = rpipe->pipe_peer;
316670eb 1187
91447636 1188
2d21ac55
A
1189#if CONFIG_MACF
1190 /*
1191 * XXX We should use a per thread credential here; minimally, the
1192 * XXX process credential should have a persistent reference on it
1193 * XXX before being passed in here.
1194 */
1195 if (mac_pipe_check_select(vfs_context_ucred(ctx), rpipe, which)) {
1196 PIPE_UNLOCK(rpipe);
1197 return (0);
1198 }
1199#endif
91447636
A
1200 switch (which) {
1201
1202 case FREAD:
1203 if ((rpipe->pipe_state & PIPE_DIRECTW) ||
1204 (rpipe->pipe_buffer.cnt > 0) ||
b0d623f7 1205 (rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
91447636
A
1206
1207 retnum = 1;
1208 } else {
1209 rpipe->pipe_state |= PIPE_SEL;
2d21ac55 1210 selrecord(vfs_context_proc(ctx), &rpipe->pipe_sel, wql);
91447636
A
1211 }
1212 break;
1213
1214 case FWRITE:
ebb1b9f4
A
1215 if (wpipe)
1216 wpipe->pipe_state |= PIPE_WSELECT;
b0d623f7 1217 if (wpipe == NULL || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) ||
91447636 1218 (((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
316670eb 1219 (MAX_PIPESIZE(wpipe) - wpipe->pipe_buffer.cnt) > 0)) {
91447636
A
1220
1221 retnum = 1;
1222 } else {
1223 wpipe->pipe_state |= PIPE_SEL;
2d21ac55 1224 selrecord(vfs_context_proc(ctx), &wpipe->pipe_sel, wql);
91447636
A
1225 }
1226 break;
1227 case 0:
1228 rpipe->pipe_state |= PIPE_SEL;
2d21ac55 1229 selrecord(vfs_context_proc(ctx), &rpipe->pipe_sel, wql);
91447636
A
1230 break;
1231 }
1232 PIPE_UNLOCK(rpipe);
1233
1234 return (retnum);
1235}
1236
1237
1238/* ARGSUSED 1 */
1239static int
2d21ac55 1240pipe_close(struct fileglob *fg, __unused vfs_context_t ctx)
91447636
A
1241{
1242 struct pipe *cpipe;
1243
2d21ac55 1244 proc_fdlock_spin(vfs_context_proc(ctx));
91447636
A
1245 cpipe = (struct pipe *)fg->fg_data;
1246 fg->fg_data = NULL;
2d21ac55 1247 proc_fdunlock(vfs_context_proc(ctx));
91447636
A
1248 if (cpipe)
1249 pipeclose(cpipe);
1250
1251 return (0);
1252}
1253
1254static void
1255pipe_free_kmem(struct pipe *cpipe)
1256{
91447636 1257 if (cpipe->pipe_buffer.buffer != NULL) {
b0d623f7
A
1258 OSAddAtomic(-(cpipe->pipe_buffer.size), &amountpipekva);
1259 OSAddAtomic(-1, &amountpipes);
316670eb 1260 kfree((void *)cpipe->pipe_buffer.buffer,
91447636
A
1261 cpipe->pipe_buffer.size);
1262 cpipe->pipe_buffer.buffer = NULL;
316670eb 1263 cpipe->pipe_buffer.size = 0;
91447636 1264 }
ebb1b9f4
A
1265}
1266
91447636
A
1267/*
1268 * shutdown the pipe
1269 */
1270static void
1271pipeclose(struct pipe *cpipe)
1272{
1273 struct pipe *ppipe;
1274
1275 if (cpipe == NULL)
1276 return;
91447636
A
1277 /* partially created pipes won't have a valid mutex. */
1278 if (PIPE_MTX(cpipe) != NULL)
1279 PIPE_LOCK(cpipe);
1280
91447636
A
1281
1282 /*
1283 * If the other side is blocked, wake it up saying that
1284 * we want to close it down.
1285 */
b0d623f7 1286 cpipe->pipe_state &= ~PIPE_DRAIN;
2d21ac55
A
1287 cpipe->pipe_state |= PIPE_EOF;
1288 pipeselwakeup(cpipe, cpipe);
1289
91447636 1290 while (cpipe->pipe_busy) {
2d21ac55 1291 cpipe->pipe_state |= PIPE_WANT;
91447636
A
1292
1293 wakeup(cpipe);
91447636
A
1294 msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0);
1295 }
1296
2d21ac55
A
1297#if CONFIG_MACF
1298 /*
1299 * Free the shared pipe label only after the two ends are disconnected.
1300 */
91447636 1301 if (cpipe->pipe_label != NULL && cpipe->pipe_peer == NULL)
2d21ac55 1302 mac_pipe_label_destroy(cpipe);
91447636
A
1303#endif
1304
1305 /*
1306 * Disconnect from peer
1307 */
1308 if ((ppipe = cpipe->pipe_peer) != NULL) {
1309
b0d623f7 1310 ppipe->pipe_state &= ~(PIPE_DRAIN);
91447636
A
1311 ppipe->pipe_state |= PIPE_EOF;
1312
1313 pipeselwakeup(ppipe, ppipe);
1314 wakeup(ppipe);
1315
1316 if (cpipe->pipe_state & PIPE_KNOTE)
1317 KNOTE(&ppipe->pipe_sel.si_note, 1);
1318
1319 postpipeevent(ppipe, EV_RCLOSED);
1320
1321 ppipe->pipe_peer = NULL;
1322 }
1323 evpipefree(cpipe);
1324
1325 /*
1326 * free resources
1327 */
1328 if (PIPE_MTX(cpipe) != NULL) {
39236c6e
A
1329 if (ppipe != NULL) {
1330 /*
91447636
A
1331 * since the mutex is shared and the peer is still
1332 * alive, we need to release the mutex, not free it
1333 */
39236c6e 1334 PIPE_UNLOCK(cpipe);
91447636 1335 } else {
39236c6e 1336 /*
91447636 1337 * peer is gone, so we're the sole party left with
39236c6e 1338 * interest in this mutex... unlock and free it
91447636 1339 */
39236c6e 1340 PIPE_UNLOCK(cpipe);
91447636
A
1341 lck_mtx_free(PIPE_MTX(cpipe), pipe_mtx_grp);
1342 }
1343 }
1344 pipe_free_kmem(cpipe);
ebb1b9f4
A
1345 if (cpipe->pipe_state & PIPE_WSELECT) {
1346 pipe_garbage_collect(cpipe);
1347 } else {
1348 zfree(pipe_zone, cpipe);
1349 pipe_garbage_collect(NULL);
1350 }
316670eb 1351
91447636
A
1352}
1353
91447636
A
1354/*ARGSUSED*/
1355static int
2d21ac55 1356pipe_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused vfs_context_t ctx)
91447636
A
1357{
1358 struct pipe *cpipe;
1359
1360 cpipe = (struct pipe *)kn->kn_fp->f_data;
1361
1362 PIPE_LOCK(cpipe);
2d21ac55
A
1363#if CONFIG_MACF
1364 /*
1365 * XXX We should use a per thread credential here; minimally, the
1366 * XXX process credential should have a persistent reference on it
1367 * XXX before being passed in here.
1368 */
1369 if (mac_pipe_check_kqfilter(vfs_context_ucred(ctx), kn, cpipe) != 0) {
1370 PIPE_UNLOCK(cpipe);
1371 return (1);
1372 }
1373#endif
91447636
A
1374
1375 switch (kn->kn_filter) {
1376 case EVFILT_READ:
1377 kn->kn_fop = &pipe_rfiltops;
2d21ac55 1378
91447636
A
1379 break;
1380 case EVFILT_WRITE:
1381 kn->kn_fop = &pipe_wfiltops;
1382
1383 if (cpipe->pipe_peer == NULL) {
1384 /*
1385 * other end of pipe has been closed
1386 */
1387 PIPE_UNLOCK(cpipe);
1388 return (EPIPE);
1389 }
2d21ac55 1390 if (cpipe->pipe_peer)
91447636
A
1391 cpipe = cpipe->pipe_peer;
1392 break;
1393 default:
1394 PIPE_UNLOCK(cpipe);
1395 return (1);
1396 }
1397
1398 if (KNOTE_ATTACH(&cpipe->pipe_sel.si_note, kn))
1399 cpipe->pipe_state |= PIPE_KNOTE;
1400
1401 PIPE_UNLOCK(cpipe);
1402 return (0);
1403}
1404
1405static void
1406filt_pipedetach(struct knote *kn)
1407{
1408 struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data;
1409
1410 PIPE_LOCK(cpipe);
1411
1412 if (kn->kn_filter == EVFILT_WRITE) {
1413 if (cpipe->pipe_peer == NULL) {
1414 PIPE_UNLOCK(cpipe);
1415 return;
1416 }
1417 cpipe = cpipe->pipe_peer;
1418 }
1419 if (cpipe->pipe_state & PIPE_KNOTE) {
1420 if (KNOTE_DETACH(&cpipe->pipe_sel.si_note, kn))
1421 cpipe->pipe_state &= ~PIPE_KNOTE;
1422 }
1423 PIPE_UNLOCK(cpipe);
1424}
1425
1426/*ARGSUSED*/
1427static int
1428filt_piperead(struct knote *kn, long hint)
1429{
1430 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
1431 struct pipe *wpipe;
1432 int retval;
1433
1434 /*
1435 * if hint == 0, then we've been called from the kevent
1436 * world directly and do not currently hold the pipe mutex...
1437 * if hint == 1, we're being called back via the KNOTE post
1438 * we made in pipeselwakeup, and we already hold the mutex...
1439 */
1440 if (hint == 0)
1441 PIPE_LOCK(rpipe);
1442
1443 wpipe = rpipe->pipe_peer;
1444 kn->kn_data = rpipe->pipe_buffer.cnt;
b0d623f7
A
1445 if ((rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) ||
1446 (wpipe == NULL) || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
91447636
A
1447 kn->kn_flags |= EV_EOF;
1448 retval = 1;
2d21ac55 1449 } else {
6d2010ae
A
1450 int64_t lowwat = 1;
1451 if (kn->kn_sfflags & NOTE_LOWAT) {
316670eb
A
1452 if (rpipe->pipe_buffer.size && kn->kn_sdata > MAX_PIPESIZE(rpipe))
1453 lowwat = MAX_PIPESIZE(rpipe);
6d2010ae
A
1454 else if (kn->kn_sdata > lowwat)
1455 lowwat = kn->kn_sdata;
1456 }
1457 retval = kn->kn_data >= lowwat;
2d21ac55 1458 }
91447636
A
1459
1460 if (hint == 0)
1461 PIPE_UNLOCK(rpipe);
1462
1463 return (retval);
1464}
1465
1466/*ARGSUSED*/
1467static int
1468filt_pipewrite(struct knote *kn, long hint)
1469{
1470 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
1471 struct pipe *wpipe;
1472
1473 /*
1474 * if hint == 0, then we've been called from the kevent
1475 * world directly and do not currently hold the pipe mutex...
1476 * if hint == 1, we're being called back via the KNOTE post
1477 * we made in pipeselwakeup, and we already hold the mutex...
1478 */
1479 if (hint == 0)
1480 PIPE_LOCK(rpipe);
1481
1482 wpipe = rpipe->pipe_peer;
1483
b0d623f7 1484 if ((wpipe == NULL) || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
91447636
A
1485 kn->kn_data = 0;
1486 kn->kn_flags |= EV_EOF;
1487
1488 if (hint == 0)
1489 PIPE_UNLOCK(rpipe);
1490 return (1);
1491 }
316670eb 1492 kn->kn_data = MAX_PIPESIZE(wpipe) - wpipe->pipe_buffer.cnt;
91447636 1493
6d2010ae
A
1494 int64_t lowwat = PIPE_BUF;
1495 if (kn->kn_sfflags & NOTE_LOWAT) {
316670eb
A
1496 if (wpipe->pipe_buffer.size && kn->kn_sdata > MAX_PIPESIZE(wpipe))
1497 lowwat = MAX_PIPESIZE(wpipe);
6d2010ae
A
1498 else if (kn->kn_sdata > lowwat)
1499 lowwat = kn->kn_sdata;
1500 }
1501
91447636
A
1502 if (hint == 0)
1503 PIPE_UNLOCK(rpipe);
1504
6d2010ae 1505 return (kn->kn_data >= lowwat);
91447636 1506}
0c530ab8
A
1507
1508int
1509fill_pipeinfo(struct pipe * cpipe, struct pipe_info * pinfo)
1510{
2d21ac55 1511#if CONFIG_MACF
0c530ab8
A
1512 int error;
1513#endif
1514 struct timeval now;
2d21ac55
A
1515 struct vinfo_stat * ub;
1516 int pipe_size = 0;
1517 int pipe_count;
0c530ab8
A
1518
1519 if (cpipe == NULL)
1520 return (EBADF);
0c530ab8 1521 PIPE_LOCK(cpipe);
2d21ac55
A
1522
1523#if CONFIG_MACF
1524 error = mac_pipe_check_stat(kauth_cred_get(), cpipe);
1525 if (error) {
1526 PIPE_UNLOCK(cpipe);
0c530ab8 1527 return (error);
2d21ac55 1528 }
0c530ab8
A
1529#endif
1530 if (cpipe->pipe_buffer.buffer == 0) {
1531 /*
1532 * must be stat'ing the write fd
1533 */
2d21ac55
A
1534 if (cpipe->pipe_peer) {
1535 /*
1536 * the peer still exists, use it's info
1537 */
316670eb 1538 pipe_size = MAX_PIPESIZE(cpipe->pipe_peer);
2d21ac55
A
1539 pipe_count = cpipe->pipe_peer->pipe_buffer.cnt;
1540 } else {
1541 pipe_count = 0;
1542 }
1543 } else {
316670eb 1544 pipe_size = MAX_PIPESIZE(cpipe);
2d21ac55 1545 pipe_count = cpipe->pipe_buffer.cnt;
0c530ab8 1546 }
2d21ac55
A
1547 /*
1548 * since peer's buffer is setup ouside of lock
1549 * we might catch it in transient state
1550 */
1551 if (pipe_size == 0)
1552 pipe_size = PIPE_SIZE;
0c530ab8
A
1553
1554 ub = &pinfo->pipe_stat;
1555
1556 bzero(ub, sizeof(*ub));
2d21ac55
A
1557 ub->vst_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
1558 ub->vst_blksize = pipe_size;
1559 ub->vst_size = pipe_count;
1560 if (ub->vst_blksize != 0)
1561 ub->vst_blocks = (ub->vst_size + ub->vst_blksize - 1) / ub->vst_blksize;
1562 ub->vst_nlink = 1;
0c530ab8 1563
2d21ac55
A
1564 ub->vst_uid = kauth_getuid();
1565 ub->vst_gid = kauth_getgid();
0c530ab8
A
1566
1567 microtime(&now);
2d21ac55
A
1568 ub->vst_atime = now.tv_sec;
1569 ub->vst_atimensec = now.tv_usec * 1000;
0c530ab8 1570
2d21ac55
A
1571 ub->vst_mtime = now.tv_sec;
1572 ub->vst_mtimensec = now.tv_usec * 1000;
0c530ab8 1573
2d21ac55
A
1574 ub->vst_ctime = now.tv_sec;
1575 ub->vst_ctimensec = now.tv_usec * 1000;
0c530ab8
A
1576
1577 /*
1578 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen, st_uid, st_gid.
1579 * XXX (st_dev, st_ino) should be unique.
1580 */
1581
39236c6e
A
1582 pinfo->pipe_handle = (uint64_t)VM_KERNEL_ADDRPERM((uintptr_t)cpipe);
1583 pinfo->pipe_peerhandle = (uint64_t)VM_KERNEL_ADDRPERM((uintptr_t)(cpipe->pipe_peer));
0c530ab8 1584 pinfo->pipe_status = cpipe->pipe_state;
2d21ac55
A
1585
1586 PIPE_UNLOCK(cpipe);
1587
0c530ab8
A
1588 return (0);
1589}
b0d623f7
A
1590
1591
1592static int
1593pipe_drain(struct fileproc *fp, __unused vfs_context_t ctx)
1594{
1595
1596 /* Note: fdlock already held */
1597 struct pipe *ppipe, *cpipe = (struct pipe *)(fp->f_fglob->fg_data);
1598
1599 if (cpipe) {
1600 PIPE_LOCK(cpipe);
1601 cpipe->pipe_state |= PIPE_DRAIN;
1602 cpipe->pipe_state &= ~(PIPE_WANTR | PIPE_WANTW);
1603 wakeup(cpipe);
1604
1605 /* Must wake up peer: a writer sleeps on the read side */
1606 if ((ppipe = cpipe->pipe_peer)) {
1607 ppipe->pipe_state |= PIPE_DRAIN;
1608 ppipe->pipe_state &= ~(PIPE_WANTR | PIPE_WANTW);
1609 wakeup(ppipe);
1610 }
1611
1612 PIPE_UNLOCK(cpipe);
1613 return 0;
1614 }
1615
1616 return 1;
1617}
1618
1619
316670eb
A
1620 /*
1621 * When a thread sets a write-select on a pipe, it creates an implicit,
1622 * untracked dependency between that thread and the peer of the pipe
1623 * on which the select is set. If the peer pipe is closed and freed
1624 * before the select()ing thread wakes up, the system will panic as
1625 * it attempts to unwind the dangling select(). To avoid that panic,
1626 * we notice whenever a dangerous select() is set on a pipe, and
1627 * defer the final deletion of the pipe until that select()s are all
1628 * resolved. Since we can't currently detect exactly when that
1629 * resolution happens, we use a simple garbage collection queue to
1630 * reap the at-risk pipes 'later'.
1631 */
1632static void
1633pipe_garbage_collect(struct pipe *cpipe)
1634{
1635 uint64_t old, now;
1636 struct pipe_garbage *pgp;
1637
1638 /* Convert msecs to nsecs and then to abstime */
1639 old = pipe_garbage_age_limit * 1000000;
1640 nanoseconds_to_absolutetime(old, &old);
1641
1642 lck_mtx_lock(pipe_garbage_lock);
1643
1644 /* Free anything that's been on the queue for <mumble> seconds */
1645 now = mach_absolute_time();
1646 old = now - old;
1647 while ((pgp = pipe_garbage_head) && pgp->pg_timestamp < old) {
1648 pipe_garbage_head = pgp->pg_next;
1649 if (pipe_garbage_head == NULL)
1650 pipe_garbage_tail = NULL;
1651 pipe_garbage_count--;
1652 zfree(pipe_zone, pgp->pg_pipe);
1653 zfree(pipe_garbage_zone, pgp);
1654 }
1655
1656 /* Add the new pipe (if any) to the tail of the garbage queue */
1657 if (cpipe) {
1658 cpipe->pipe_state = PIPE_DEAD;
1659 pgp = (struct pipe_garbage *)zalloc(pipe_garbage_zone);
1660 if (pgp == NULL) {
1661 /*
1662 * We're too low on memory to garbage collect the
1663 * pipe. Freeing it runs the risk of panicing the
1664 * system. All we can do is leak it and leave
1665 * a breadcrumb behind. The good news, such as it
1666 * is, is that this will probably never happen.
1667 * We will probably hit the panic below first.
1668 */
1669 printf("Leaking pipe %p - no room left in the queue",
1670 cpipe);
1671 lck_mtx_unlock(pipe_garbage_lock);
1672 return;
1673 }
1674
1675 pgp->pg_pipe = cpipe;
1676 pgp->pg_timestamp = now;
1677 pgp->pg_next = NULL;
b0d623f7 1678
316670eb
A
1679 if (pipe_garbage_tail)
1680 pipe_garbage_tail->pg_next = pgp;
1681 pipe_garbage_tail = pgp;
1682 if (pipe_garbage_head == NULL)
1683 pipe_garbage_head = pipe_garbage_tail;
b0d623f7 1684
316670eb
A
1685 if (pipe_garbage_count++ >= PIPE_GARBAGE_QUEUE_LIMIT)
1686 panic("Length of pipe garbage queue exceeded %d",
1687 PIPE_GARBAGE_QUEUE_LIMIT);
1688 }
1689 lck_mtx_unlock(pipe_garbage_lock);
1690}
b0d623f7 1691