]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/sys_pipe.c
xnu-3248.60.10.tar.gz
[apple/xnu.git] / bsd / kern / sys_pipe.c
CommitLineData
91447636
A
1/*
2 * Copyright (c) 1996 John S. Dyson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice immediately at the beginning of the file, without modification,
10 * this list of conditions, and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. Absolutely no warranty of function or purpose is made by the author
15 * John S. Dyson.
16 * 4. Modifications may be freely made to this file if the above conditions
17 * are met.
18 */
19/*
fe8ab488 20 * Copyright (c) 2003-2014 Apple Inc. All rights reserved.
91447636 21 *
2d21ac55 22 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
91447636 23 *
2d21ac55
A
24 * This file contains Original Code and/or Modifications of Original Code
25 * as defined in and that are subject to the Apple Public Source License
26 * Version 2.0 (the 'License'). You may not use this file except in
27 * compliance with the License. The rights granted to you under the License
28 * may not be used to create, or enable the creation or redistribution of,
29 * unlawful or unlicensed copies of an Apple operating system, or to
30 * circumvent, violate, or enable the circumvention or violation of, any
31 * terms of an Apple operating system software license agreement.
8f6c56a5 32 *
2d21ac55
A
33 * Please obtain a copy of the License at
34 * http://www.opensource.apple.com/apsl/ and read it before using this file.
35 *
36 * The Original Code and all software distributed under the License are
37 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
38 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
39 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
40 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
41 * Please see the License for the specific language governing rights and
42 * limitations under the License.
8f6c56a5 43 *
2d21ac55
A
44 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
45 */
46/*
47 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
48 * support for mandatory and extensible security protections. This notice
49 * is included in support of clause 2.2 (b) of the Apple Public License,
50 * Version 2.0.
91447636
A
51 */
52
53/*
54 * This file contains a high-performance replacement for the socket-based
55 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support
56 * all features of sockets, but does do everything that pipes normally
57 * do.
316670eb
A
58 *
59 * Pipes are implemented as circular buffers. Following are the valid states in pipes operations
60 *
61 * _________________________________
62 * 1. |_________________________________| r=w, c=0
63 *
64 * _________________________________
65 * 2. |__r:::::wc_______________________| r <= w , c > 0
66 *
67 * _________________________________
68 * 3. |::::wc_____r:::::::::::::::::::::| r>w , c > 0
69 *
70 * _________________________________
71 * 4. |:::::::wrc:::::::::::::::::::::::| w=r, c = Max size
72 *
73 *
74 * Nomenclature:-
75 * a-z define the steps in a program flow
76 * 1-4 are the states as defined aboe
77 * Action: is what file operation is done on the pipe
78 *
79 * Current:None Action: initialize with size M=200
80 * a. State 1 ( r=0, w=0, c=0)
81 *
82 * Current: a Action: write(100) (w < M)
83 * b. State 2 (r=0, w=100, c=100)
84 *
85 * Current: b Action: write(100) (w = M-w)
86 * c. State 4 (r=0,w=0,c=200)
87 *
88 * Current: b Action: read(70) ( r < c )
89 * d. State 2(r=70,w=100,c=30)
90 *
91 * Current: d Action: write(75) ( w < (m-w))
92 * e. State 2 (r=70,w=175,c=105)
93 *
94 * Current: d Action: write(110) ( w > (m-w))
95 * f. State 3 (r=70,w=10,c=140)
96 *
97 * Current: d Action: read(30) (r >= c )
98 * g. State 1 (r=100,w=100,c=0)
99 *
91447636
A
100 */
101
102/*
316670eb
A
103 * This code create half duplex pipe buffers for facilitating file like
104 * operations on pipes. The initial buffer is very small, but this can
105 * dynamically change to larger sizes based on usage. The buffer size is never
106 * reduced. The total amount of kernel memory used is governed by maxpipekva.
107 * In case of dynamic expansion limit is reached, the output thread is blocked
108 * until the pipe buffer empties enough to continue.
91447636
A
109 *
110 * In order to limit the resource use of pipes, two sysctls exist:
111 *
112 * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable
316670eb 113 * address space available to us in pipe_map.
91447636
A
114 *
115 * Memory usage may be monitored through the sysctls
316670eb 116 * kern.ipc.pipes, kern.ipc.pipekva.
91447636
A
117 *
118 */
119
120#include <sys/param.h>
121#include <sys/systm.h>
122#include <sys/filedesc.h>
123#include <sys/kernel.h>
124#include <sys/vnode.h>
125#include <sys/proc_internal.h>
126#include <sys/kauth.h>
127#include <sys/file_internal.h>
128#include <sys/stat.h>
129#include <sys/ioctl.h>
130#include <sys/fcntl.h>
131#include <sys/malloc.h>
132#include <sys/syslog.h>
133#include <sys/unistd.h>
134#include <sys/resourcevar.h>
135#include <sys/aio_kern.h>
136#include <sys/signalvar.h>
137#include <sys/pipe.h>
138#include <sys/sysproto.h>
0c530ab8 139#include <sys/proc_info.h>
91447636 140
b0d623f7 141#include <security/audit/audit.h>
91447636
A
142
143#include <sys/kdebug.h>
144
145#include <kern/zalloc.h>
316670eb 146#include <kern/kalloc.h>
91447636
A
147#include <vm/vm_kern.h>
148#include <libkern/OSAtomic.h>
149
150#define f_flag f_fglob->fg_flag
91447636
A
151#define f_msgcount f_fglob->fg_msgcount
152#define f_cred f_fglob->fg_cred
153#define f_ops f_fglob->fg_ops
154#define f_offset f_fglob->fg_offset
155#define f_data f_fglob->fg_data
91447636 156
91447636 157/*
316670eb 158 * interfaces to the outside world exported through file operations
91447636
A
159 */
160static int pipe_read(struct fileproc *fp, struct uio *uio,
2d21ac55 161 int flags, vfs_context_t ctx);
91447636 162static int pipe_write(struct fileproc *fp, struct uio *uio,
2d21ac55 163 int flags, vfs_context_t ctx);
2d21ac55 164static int pipe_close(struct fileglob *fg, vfs_context_t ctx);
2d21ac55
A
165static int pipe_select(struct fileproc *fp, int which, void * wql,
166 vfs_context_t ctx);
2d21ac55
A
167static int pipe_kqfilter(struct fileproc *fp, struct knote *kn,
168 vfs_context_t ctx);
2d21ac55
A
169static int pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data,
170 vfs_context_t ctx);
b0d623f7
A
171static int pipe_drain(struct fileproc *fp,vfs_context_t ctx);
172
39236c6e
A
173static const struct fileops pipeops = {
174 DTYPE_PIPE,
175 pipe_read,
176 pipe_write,
177 pipe_ioctl,
178 pipe_select,
179 pipe_close,
180 pipe_kqfilter,
181 pipe_drain
182};
91447636 183
91447636
A
184static void filt_pipedetach(struct knote *kn);
185static int filt_piperead(struct knote *kn, long hint);
186static int filt_pipewrite(struct knote *kn, long hint);
187
b0d623f7
A
188static struct filterops pipe_rfiltops = {
189 .f_isfd = 1,
190 .f_detach = filt_pipedetach,
191 .f_event = filt_piperead,
192};
316670eb 193
b0d623f7
A
194static struct filterops pipe_wfiltops = {
195 .f_isfd = 1,
196 .f_detach = filt_pipedetach,
197 .f_event = filt_pipewrite,
198};
91447636 199
316670eb
A
200static int nbigpipe; /* for compatibility sake. no longer used */
201static int amountpipes; /* total number of pipes in system */
202static int amountpipekva; /* total memory used by pipes */
91447636 203
39236c6e 204int maxpipekva __attribute__((used)) = PIPE_KVAMAX; /* allowing 16MB max. */
91447636
A
205
206#if PIPE_SYSCTLS
207SYSCTL_DECL(_kern_ipc);
208
6d2010ae 209SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RD|CTLFLAG_LOCKED,
91447636 210 &maxpipekva, 0, "Pipe KVA limit");
6d2010ae 211SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW|CTLFLAG_LOCKED,
91447636 212 &maxpipekvawired, 0, "Pipe KVA wired limit");
6d2010ae 213SYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD|CTLFLAG_LOCKED,
91447636 214 &amountpipes, 0, "Current # of pipes");
6d2010ae 215SYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD|CTLFLAG_LOCKED,
91447636 216 &nbigpipe, 0, "Current # of big pipes");
6d2010ae 217SYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD|CTLFLAG_LOCKED,
91447636 218 &amountpipekva, 0, "Pipe KVA usage");
6d2010ae 219SYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD|CTLFLAG_LOCKED,
91447636
A
220 &amountpipekvawired, 0, "Pipe wired KVA usage");
221#endif
222
91447636
A
223static void pipeclose(struct pipe *cpipe);
224static void pipe_free_kmem(struct pipe *cpipe);
225static int pipe_create(struct pipe **cpipep);
316670eb
A
226static int pipespace(struct pipe *cpipe, int size);
227static int choose_pipespace(unsigned long current, unsigned long expected);
228static int expand_pipespace(struct pipe *p, int target_size);
91447636 229static void pipeselwakeup(struct pipe *cpipe, struct pipe *spipe);
316670eb
A
230static __inline int pipeio_lock(struct pipe *cpipe, int catch);
231static __inline void pipeio_unlock(struct pipe *cpipe);
91447636
A
232
233extern int postpipeevent(struct pipe *, int);
234extern void evpipefree(struct pipe *cpipe);
235
91447636
A
236static lck_grp_t *pipe_mtx_grp;
237static lck_attr_t *pipe_mtx_attr;
238static lck_grp_attr_t *pipe_mtx_grp_attr;
239
240static zone_t pipe_zone;
241
316670eb
A
242#define MAX_PIPESIZE(pipe) ( MAX(PIPE_SIZE, (pipe)->pipe_buffer.size) )
243
ebb1b9f4
A
244#define PIPE_GARBAGE_AGE_LIMIT 5000 /* In milliseconds */
245#define PIPE_GARBAGE_QUEUE_LIMIT 32000
246
247struct pipe_garbage {
248 struct pipe *pg_pipe;
249 struct pipe_garbage *pg_next;
250 uint64_t pg_timestamp;
251};
252
253static zone_t pipe_garbage_zone;
254static struct pipe_garbage *pipe_garbage_head = NULL;
255static struct pipe_garbage *pipe_garbage_tail = NULL;
256static uint64_t pipe_garbage_age_limit = PIPE_GARBAGE_AGE_LIMIT;
257static int pipe_garbage_count = 0;
258static lck_mtx_t *pipe_garbage_lock;
316670eb 259static void pipe_garbage_collect(struct pipe *cpipe);
ebb1b9f4 260
91447636
A
261SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL);
262
316670eb 263/* initial setup done at time of sysinit */
91447636 264void
2d21ac55 265pipeinit(void)
91447636 266{
316670eb 267 nbigpipe=0;
ebb1b9f4 268 vm_size_t zone_size;
316670eb 269
ebb1b9f4
A
270 zone_size = 8192 * sizeof(struct pipe);
271 pipe_zone = zinit(sizeof(struct pipe), zone_size, 4096, "pipe zone");
91447636 272
316670eb
A
273
274 /* allocate lock group attribute and group for pipe mutexes */
91447636 275 pipe_mtx_grp_attr = lck_grp_attr_alloc_init();
91447636
A
276 pipe_mtx_grp = lck_grp_alloc_init("pipe", pipe_mtx_grp_attr);
277
316670eb 278 /* allocate the lock attribute for pipe mutexes */
91447636 279 pipe_mtx_attr = lck_attr_alloc_init();
ebb1b9f4
A
280
281 /*
282 * Set up garbage collection for dead pipes
283 */
284 zone_size = (PIPE_GARBAGE_QUEUE_LIMIT + 20) *
285 sizeof(struct pipe_garbage);
286 pipe_garbage_zone = (zone_t)zinit(sizeof(struct pipe_garbage),
287 zone_size, 4096, "pipe garbage zone");
288 pipe_garbage_lock = lck_mtx_alloc_init(pipe_mtx_grp, pipe_mtx_attr);
316670eb 289
91447636
A
290}
291
2d21ac55
A
292/* Bitmap for things to touch in pipe_touch() */
293#define PIPE_ATIME 0x00000001 /* time of last access */
294#define PIPE_MTIME 0x00000002 /* time of last modification */
295#define PIPE_CTIME 0x00000004 /* time of last status change */
296
297static void
298pipe_touch(struct pipe *tpipe, int touch)
299{
300 struct timeval now;
301
302 microtime(&now);
303
304 if (touch & PIPE_ATIME) {
305 tpipe->st_atimespec.tv_sec = now.tv_sec;
306 tpipe->st_atimespec.tv_nsec = now.tv_usec * 1000;
307 }
308
309 if (touch & PIPE_MTIME) {
310 tpipe->st_mtimespec.tv_sec = now.tv_sec;
311 tpipe->st_mtimespec.tv_nsec = now.tv_usec * 1000;
312 }
313
314 if (touch & PIPE_CTIME) {
315 tpipe->st_ctimespec.tv_sec = now.tv_sec;
316 tpipe->st_ctimespec.tv_nsec = now.tv_usec * 1000;
317 }
318}
319
3e170ce0 320static const unsigned int pipesize_blocks[] = {512,1024,2048,4096, 4096 * 2, PIPE_SIZE , PIPE_SIZE * 4 };
316670eb
A
321
322/*
323 * finds the right size from possible sizes in pipesize_blocks
324 * returns the size which matches max(current,expected)
325 */
326static int
327choose_pipespace(unsigned long current, unsigned long expected)
328{
329 int i = sizeof(pipesize_blocks)/sizeof(unsigned int) -1;
330 unsigned long target;
331
3e170ce0
A
332 /*
333 * assert that we always get an atomic transaction sized pipe buffer,
334 * even if the system pipe buffer high-water mark has been crossed.
335 */
336 assert(PIPE_BUF == pipesize_blocks[0]);
337
316670eb
A
338 if (expected > current)
339 target = expected;
340 else
341 target = current;
342
343 while ( i >0 && pipesize_blocks[i-1] > target) {
344 i=i-1;
345
346 }
347
348 return pipesize_blocks[i];
349}
91447636
A
350
351
316670eb
A
352/*
353 * expand the size of pipe while there is data to be read,
354 * and then free the old buffer once the current buffered
355 * data has been transferred to new storage.
356 * Required: PIPE_LOCK and io lock to be held by caller.
357 * returns 0 on success or no expansion possible
358 */
359static int
360expand_pipespace(struct pipe *p, int target_size)
361{
362 struct pipe tmp, oldpipe;
363 int error;
364 tmp.pipe_buffer.buffer = 0;
365
366 if (p->pipe_buffer.size >= (unsigned) target_size) {
367 return 0; /* the existing buffer is max size possible */
368 }
369
370 /* create enough space in the target */
371 error = pipespace(&tmp, target_size);
372 if (error != 0)
373 return (error);
374
375 oldpipe.pipe_buffer.buffer = p->pipe_buffer.buffer;
376 oldpipe.pipe_buffer.size = p->pipe_buffer.size;
377
378 memcpy(tmp.pipe_buffer.buffer, p->pipe_buffer.buffer, p->pipe_buffer.size);
379 if (p->pipe_buffer.cnt > 0 && p->pipe_buffer.in <= p->pipe_buffer.out ){
380 /* we are in State 3 and need extra copying for read to be consistent */
381 memcpy(&tmp.pipe_buffer.buffer[p->pipe_buffer.size], p->pipe_buffer.buffer, p->pipe_buffer.size);
382 p->pipe_buffer.in += p->pipe_buffer.size;
383 }
384
385 p->pipe_buffer.buffer = tmp.pipe_buffer.buffer;
386 p->pipe_buffer.size = tmp.pipe_buffer.size;
387
388
389 pipe_free_kmem(&oldpipe);
390 return 0;
391}
392
91447636
A
393/*
394 * The pipe system call for the DTYPE_PIPE type of pipes
316670eb
A
395 *
396 * returns:
397 * FREAD | fd0 | -->[struct rpipe] --> |~~buffer~~| \
398 * (pipe_mutex)
399 * FWRITE | fd1 | -->[struct wpipe] --X /
91447636
A
400 */
401
402/* ARGSUSED */
403int
b0d623f7 404pipe(proc_t p, __unused struct pipe_args *uap, int32_t *retval)
91447636
A
405{
406 struct fileproc *rf, *wf;
407 struct pipe *rpipe, *wpipe;
408 lck_mtx_t *pmtx;
409 int fd, error;
410
411 if ((pmtx = lck_mtx_alloc_init(pipe_mtx_grp, pipe_mtx_attr)) == NULL)
412 return (ENOMEM);
413
414 rpipe = wpipe = NULL;
415 if (pipe_create(&rpipe) || pipe_create(&wpipe)) {
416 error = ENFILE;
417 goto freepipes;
418 }
419 /*
420 * allocate the space for the normal I/O direction up
421 * front... we'll delay the allocation for the other
316670eb 422 * direction until a write actually occurs (most likely it won't)...
91447636 423 */
316670eb 424 error = pipespace(rpipe, choose_pipespace(rpipe->pipe_buffer.size, 0));
91447636
A
425 if (error)
426 goto freepipes;
427
91447636
A
428 TAILQ_INIT(&rpipe->pipe_evlist);
429 TAILQ_INIT(&wpipe->pipe_evlist);
430
2d21ac55 431 error = falloc(p, &rf, &fd, vfs_context_current());
91447636
A
432 if (error) {
433 goto freepipes;
434 }
435 retval[0] = fd;
436
437 /*
316670eb
A
438 * for now we'll create half-duplex pipes(refer returns section above).
439 * this is what we've always supported..
91447636
A
440 */
441 rf->f_flag = FREAD;
91447636
A
442 rf->f_data = (caddr_t)rpipe;
443 rf->f_ops = &pipeops;
444
2d21ac55 445 error = falloc(p, &wf, &fd, vfs_context_current());
91447636
A
446 if (error) {
447 fp_free(p, retval[0], rf);
448 goto freepipes;
449 }
450 wf->f_flag = FWRITE;
91447636
A
451 wf->f_data = (caddr_t)wpipe;
452 wf->f_ops = &pipeops;
453
6601e61a
A
454 rpipe->pipe_peer = wpipe;
455 wpipe->pipe_peer = rpipe;
316670eb
A
456 /* both structures share the same mutex */
457 rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx;
2d21ac55 458
91447636 459 retval[1] = fd;
2d21ac55 460#if CONFIG_MACF
91447636
A
461 /*
462 * XXXXXXXX SHOULD NOT HOLD FILE_LOCK() XXXXXXXXXXXX
463 *
464 * struct pipe represents a pipe endpoint. The MAC label is shared
2d21ac55
A
465 * between the connected endpoints. As a result mac_pipe_label_init() and
466 * mac_pipe_label_associate() should only be called on one of the endpoints
91447636
A
467 * after they have been connected.
468 */
2d21ac55
A
469 mac_pipe_label_init(rpipe);
470 mac_pipe_label_associate(kauth_cred_get(), rpipe);
471 wpipe->pipe_label = rpipe->pipe_label;
91447636 472#endif
2d21ac55 473 proc_fdlock_spin(p);
6601e61a
A
474 procfdtbl_releasefd(p, retval[0], NULL);
475 procfdtbl_releasefd(p, retval[1], NULL);
91447636
A
476 fp_drop(p, retval[0], rf, 1);
477 fp_drop(p, retval[1], wf, 1);
478 proc_fdunlock(p);
479
91447636
A
480
481 return (0);
482
483freepipes:
484 pipeclose(rpipe);
485 pipeclose(wpipe);
486 lck_mtx_free(pmtx, pipe_mtx_grp);
487
488 return (error);
489}
490
91447636 491int
2d21ac55 492pipe_stat(struct pipe *cpipe, void *ub, int isstat64)
91447636 493{
2d21ac55 494#if CONFIG_MACF
91447636
A
495 int error;
496#endif
2d21ac55
A
497 int pipe_size = 0;
498 int pipe_count;
499 struct stat *sb = (struct stat *)0; /* warning avoidance ; protected by isstat64 */
500 struct stat64 * sb64 = (struct stat64 *)0; /* warning avoidance ; protected by isstat64 */
91447636
A
501
502 if (cpipe == NULL)
503 return (EBADF);
91447636 504 PIPE_LOCK(cpipe);
2d21ac55
A
505
506#if CONFIG_MACF
507 error = mac_pipe_check_stat(kauth_cred_get(), cpipe);
508 if (error) {
509 PIPE_UNLOCK(cpipe);
91447636 510 return (error);
2d21ac55 511 }
91447636
A
512#endif
513 if (cpipe->pipe_buffer.buffer == 0) {
316670eb 514 /* must be stat'ing the write fd */
2d21ac55 515 if (cpipe->pipe_peer) {
316670eb
A
516 /* the peer still exists, use it's info */
517 pipe_size = MAX_PIPESIZE(cpipe->pipe_peer);
2d21ac55
A
518 pipe_count = cpipe->pipe_peer->pipe_buffer.cnt;
519 } else {
520 pipe_count = 0;
521 }
522 } else {
316670eb 523 pipe_size = MAX_PIPESIZE(cpipe);
2d21ac55 524 pipe_count = cpipe->pipe_buffer.cnt;
91447636 525 }
2d21ac55
A
526 /*
527 * since peer's buffer is setup ouside of lock
528 * we might catch it in transient state
529 */
530 if (pipe_size == 0)
316670eb 531 pipe_size = MAX(PIPE_SIZE, pipesize_blocks[0]);
91447636 532
2d21ac55
A
533 if (isstat64 != 0) {
534 sb64 = (struct stat64 *)ub;
91447636 535
2d21ac55
A
536 bzero(sb64, sizeof(*sb64));
537 sb64->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
538 sb64->st_blksize = pipe_size;
539 sb64->st_size = pipe_count;
540 sb64->st_blocks = (sb64->st_size + sb64->st_blksize - 1) / sb64->st_blksize;
541
542 sb64->st_uid = kauth_getuid();
543 sb64->st_gid = kauth_getgid();
544
545 sb64->st_atimespec.tv_sec = cpipe->st_atimespec.tv_sec;
546 sb64->st_atimespec.tv_nsec = cpipe->st_atimespec.tv_nsec;
547
548 sb64->st_mtimespec.tv_sec = cpipe->st_mtimespec.tv_sec;
549 sb64->st_mtimespec.tv_nsec = cpipe->st_mtimespec.tv_nsec;
91447636 550
2d21ac55
A
551 sb64->st_ctimespec.tv_sec = cpipe->st_ctimespec.tv_sec;
552 sb64->st_ctimespec.tv_nsec = cpipe->st_ctimespec.tv_nsec;
91447636 553
2d21ac55
A
554 /*
555 * Return a relatively unique inode number based on the current
556 * address of this pipe's struct pipe. This number may be recycled
557 * relatively quickly.
558 */
316670eb 559 sb64->st_ino = (ino64_t)VM_KERNEL_ADDRPERM((uintptr_t)cpipe);
2d21ac55
A
560 } else {
561 sb = (struct stat *)ub;
562
563 bzero(sb, sizeof(*sb));
564 sb->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
565 sb->st_blksize = pipe_size;
566 sb->st_size = pipe_count;
567 sb->st_blocks = (sb->st_size + sb->st_blksize - 1) / sb->st_blksize;
568
569 sb->st_uid = kauth_getuid();
570 sb->st_gid = kauth_getgid();
571
572 sb->st_atimespec.tv_sec = cpipe->st_atimespec.tv_sec;
573 sb->st_atimespec.tv_nsec = cpipe->st_atimespec.tv_nsec;
574
575 sb->st_mtimespec.tv_sec = cpipe->st_mtimespec.tv_sec;
576 sb->st_mtimespec.tv_nsec = cpipe->st_mtimespec.tv_nsec;
577
578 sb->st_ctimespec.tv_sec = cpipe->st_ctimespec.tv_sec;
579 sb->st_ctimespec.tv_nsec = cpipe->st_ctimespec.tv_nsec;
580
581 /*
582 * Return a relatively unique inode number based on the current
583 * address of this pipe's struct pipe. This number may be recycled
584 * relatively quickly.
585 */
316670eb 586 sb->st_ino = (ino_t)VM_KERNEL_ADDRPERM((uintptr_t)cpipe);
2d21ac55
A
587 }
588 PIPE_UNLOCK(cpipe);
91447636
A
589
590 /*
2d21ac55
A
591 * POSIX: Left as 0: st_dev, st_nlink, st_rdev, st_flags, st_gen,
592 * st_uid, st_gid.
593 *
594 * XXX (st_dev) should be unique, but there is no device driver that
595 * XXX is associated with pipes, since they are implemented via a
596 * XXX struct fileops indirection rather than as FS objects.
91447636
A
597 */
598 return (0);
599}
600
601
602/*
603 * Allocate kva for pipe circular buffer, the space is pageable
604 * This routine will 'realloc' the size of a pipe safely, if it fails
605 * it will retain the old buffer.
606 * If it fails it will return ENOMEM.
607 */
608static int
609pipespace(struct pipe *cpipe, int size)
610{
611 vm_offset_t buffer;
612
316670eb
A
613 if (size <= 0)
614 return(EINVAL);
91447636 615
316670eb
A
616 if ((buffer = (vm_offset_t)kalloc(size)) == 0 )
617 return(ENOMEM);
91447636
A
618
619 /* free old resources if we're resizing */
620 pipe_free_kmem(cpipe);
621 cpipe->pipe_buffer.buffer = (caddr_t)buffer;
622 cpipe->pipe_buffer.size = size;
623 cpipe->pipe_buffer.in = 0;
624 cpipe->pipe_buffer.out = 0;
625 cpipe->pipe_buffer.cnt = 0;
626
b0d623f7
A
627 OSAddAtomic(1, &amountpipes);
628 OSAddAtomic(cpipe->pipe_buffer.size, &amountpipekva);
91447636
A
629
630 return (0);
631}
632
633/*
634 * initialize and allocate VM and memory for pipe
635 */
636static int
637pipe_create(struct pipe **cpipep)
638{
639 struct pipe *cpipe;
91447636
A
640 cpipe = (struct pipe *)zalloc(pipe_zone);
641
642 if ((*cpipep = cpipe) == NULL)
643 return (ENOMEM);
644
645 /*
646 * protect so pipespace or pipeclose don't follow a junk pointer
647 * if pipespace() fails.
648 */
649 bzero(cpipe, sizeof *cpipe);
650
2d21ac55
A
651 /* Initial times are all the time of creation of the pipe */
652 pipe_touch(cpipe, PIPE_ATIME | PIPE_MTIME | PIPE_CTIME);
91447636
A
653 return (0);
654}
655
656
657/*
658 * lock a pipe for I/O, blocking other access
659 */
2d21ac55 660static inline int
316670eb 661pipeio_lock(struct pipe *cpipe, int catch)
91447636
A
662{
663 int error;
91447636
A
664 while (cpipe->pipe_state & PIPE_LOCKFL) {
665 cpipe->pipe_state |= PIPE_LWANT;
91447636
A
666 error = msleep(cpipe, PIPE_MTX(cpipe), catch ? (PRIBIO | PCATCH) : PRIBIO,
667 "pipelk", 0);
668 if (error != 0)
669 return (error);
670 }
671 cpipe->pipe_state |= PIPE_LOCKFL;
91447636
A
672 return (0);
673}
674
675/*
676 * unlock a pipe I/O lock
677 */
2d21ac55 678static inline void
316670eb 679pipeio_unlock(struct pipe *cpipe)
91447636 680{
91447636 681 cpipe->pipe_state &= ~PIPE_LOCKFL;
91447636
A
682 if (cpipe->pipe_state & PIPE_LWANT) {
683 cpipe->pipe_state &= ~PIPE_LWANT;
684 wakeup(cpipe);
685 }
686}
687
316670eb
A
688/*
689 * wakeup anyone whos blocked in select
690 */
91447636 691static void
2d21ac55 692pipeselwakeup(struct pipe *cpipe, struct pipe *spipe)
91447636 693{
91447636
A
694 if (cpipe->pipe_state & PIPE_SEL) {
695 cpipe->pipe_state &= ~PIPE_SEL;
696 selwakeup(&cpipe->pipe_sel);
697 }
698 if (cpipe->pipe_state & PIPE_KNOTE)
699 KNOTE(&cpipe->pipe_sel.si_note, 1);
700
701 postpipeevent(cpipe, EV_RWBYTES);
702
703 if (spipe && (spipe->pipe_state & PIPE_ASYNC) && spipe->pipe_pgid) {
91447636
A
704 if (spipe->pipe_pgid < 0)
705 gsignal(-spipe->pipe_pgid, SIGIO);
2d21ac55
A
706 else
707 proc_signal(spipe->pipe_pgid, SIGIO);
91447636
A
708 }
709}
710
316670eb
A
711/*
712 * Read n bytes from the buffer. Semantics are similar to file read.
713 * returns: number of bytes read from the buffer
714 */
91447636
A
715/* ARGSUSED */
716static int
2d21ac55
A
717pipe_read(struct fileproc *fp, struct uio *uio, __unused int flags,
718 __unused vfs_context_t ctx)
91447636
A
719{
720 struct pipe *rpipe = (struct pipe *)fp->f_data;
721 int error;
722 int nread = 0;
723 u_int size;
724
725 PIPE_LOCK(rpipe);
726 ++rpipe->pipe_busy;
727
316670eb 728 error = pipeio_lock(rpipe, 1);
91447636
A
729 if (error)
730 goto unlocked_error;
731
2d21ac55
A
732#if CONFIG_MACF
733 error = mac_pipe_check_read(kauth_cred_get(), rpipe);
91447636
A
734 if (error)
735 goto locked_error;
736#endif
737
316670eb 738
91447636
A
739 while (uio_resid(uio)) {
740 /*
741 * normal pipe buffer receive
742 */
743 if (rpipe->pipe_buffer.cnt > 0) {
316670eb
A
744 /*
745 * # bytes to read is min( bytes from read pointer until end of buffer,
746 * total unread bytes,
747 * user requested byte count)
748 */
91447636
A
749 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
750 if (size > rpipe->pipe_buffer.cnt)
751 size = rpipe->pipe_buffer.cnt;
752 // LP64todo - fix this!
753 if (size > (u_int) uio_resid(uio))
754 size = (u_int) uio_resid(uio);
755
316670eb 756 PIPE_UNLOCK(rpipe); /* we still hold io lock.*/
91447636
A
757 error = uiomove(
758 &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
759 size, uio);
760 PIPE_LOCK(rpipe);
761 if (error)
762 break;
763
764 rpipe->pipe_buffer.out += size;
765 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size)
766 rpipe->pipe_buffer.out = 0;
767
768 rpipe->pipe_buffer.cnt -= size;
316670eb 769
91447636
A
770 /*
771 * If there is no more to read in the pipe, reset
772 * its pointers to the beginning. This improves
773 * cache hit stats.
774 */
775 if (rpipe->pipe_buffer.cnt == 0) {
776 rpipe->pipe_buffer.in = 0;
777 rpipe->pipe_buffer.out = 0;
778 }
779 nread += size;
91447636
A
780 } else {
781 /*
782 * detect EOF condition
783 * read returns 0 on EOF, no need to set error
784 */
b0d623f7 785 if (rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) {
91447636 786 break;
b0d623f7 787 }
91447636
A
788
789 /*
790 * If the "write-side" has been blocked, wake it up now.
791 */
792 if (rpipe->pipe_state & PIPE_WANTW) {
793 rpipe->pipe_state &= ~PIPE_WANTW;
794 wakeup(rpipe);
795 }
796
797 /*
316670eb 798 * Break if some data was read in previous iteration.
91447636
A
799 */
800 if (nread > 0)
801 break;
802
803 /*
804 * Unlock the pipe buffer for our remaining processing.
805 * We will either break out with an error or we will
806 * sleep and relock to loop.
807 */
316670eb 808 pipeio_unlock(rpipe);
91447636
A
809
810 /*
811 * Handle non-blocking mode operation or
812 * wait for more data.
813 */
814 if (fp->f_flag & FNONBLOCK) {
815 error = EAGAIN;
816 } else {
817 rpipe->pipe_state |= PIPE_WANTR;
91447636 818 error = msleep(rpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, "piperd", 0);
91447636 819 if (error == 0)
316670eb 820 error = pipeio_lock(rpipe, 1);
91447636
A
821 }
822 if (error)
823 goto unlocked_error;
824 }
825 }
2d21ac55 826#if CONFIG_MACF
91447636
A
827locked_error:
828#endif
316670eb 829 pipeio_unlock(rpipe);
91447636
A
830
831unlocked_error:
832 --rpipe->pipe_busy;
833
834 /*
835 * PIPE_WANT processing only makes sense if pipe_busy is 0.
836 */
837 if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) {
838 rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW);
839 wakeup(rpipe);
316670eb 840 } else if (rpipe->pipe_buffer.cnt < rpipe->pipe_buffer.size) {
91447636
A
841 /*
842 * Handle write blocking hysteresis.
843 */
844 if (rpipe->pipe_state & PIPE_WANTW) {
845 rpipe->pipe_state &= ~PIPE_WANTW;
846 wakeup(rpipe);
847 }
848 }
849
316670eb 850 if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) > 0)
91447636
A
851 pipeselwakeup(rpipe, rpipe->pipe_peer);
852
2d21ac55
A
853 /* update last read time */
854 pipe_touch(rpipe, PIPE_ATIME);
855
91447636
A
856 PIPE_UNLOCK(rpipe);
857
858 return (error);
859}
860
91447636 861/*
316670eb
A
862 * perform a write of n bytes into the read side of buffer. Since
863 * pipes are unidirectional a write is meant to be read by the otherside only.
91447636 864 */
91447636 865static int
2d21ac55
A
866pipe_write(struct fileproc *fp, struct uio *uio, __unused int flags,
867 __unused vfs_context_t ctx)
91447636
A
868{
869 int error = 0;
870 int orig_resid;
871 int pipe_size;
872 struct pipe *wpipe, *rpipe;
316670eb
A
873 // LP64todo - fix this!
874 orig_resid = uio_resid(uio);
875 int space;
91447636
A
876
877 rpipe = (struct pipe *)fp->f_data;
878
879 PIPE_LOCK(rpipe);
880 wpipe = rpipe->pipe_peer;
881
882 /*
883 * detect loss of pipe read side, issue SIGPIPE if lost.
884 */
b0d623f7 885 if (wpipe == NULL || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
91447636
A
886 PIPE_UNLOCK(rpipe);
887 return (EPIPE);
888 }
2d21ac55
A
889#if CONFIG_MACF
890 error = mac_pipe_check_write(kauth_cred_get(), wpipe);
91447636
A
891 if (error) {
892 PIPE_UNLOCK(rpipe);
893 return (error);
894 }
895#endif
896 ++wpipe->pipe_busy;
897
898 pipe_size = 0;
899
91447636 900 /*
316670eb
A
901 * need to allocate some storage... we delay the allocation
902 * until the first write on fd[0] to avoid allocating storage for both
903 * 'pipe ends'... most pipes are half-duplex with the writes targeting
904 * fd[1], so allocating space for both ends is a waste...
91447636 905 */
91447636 906
316670eb
A
907 if ( wpipe->pipe_buffer.buffer == 0 || (
908 (unsigned)orig_resid > wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt &&
909 amountpipekva < maxpipekva ) ) {
91447636 910
316670eb 911 pipe_size = choose_pipespace(wpipe->pipe_buffer.size, wpipe->pipe_buffer.cnt + orig_resid);
91447636
A
912 }
913 if (pipe_size) {
914 /*
915 * need to do initial allocation or resizing of pipe
316670eb 916 * holding both structure and io locks.
91447636 917 */
316670eb
A
918 if ((error = pipeio_lock(wpipe, 1)) == 0) {
919 if (wpipe->pipe_buffer.cnt == 0)
920 error = pipespace(wpipe, pipe_size);
921 else
922 error = expand_pipespace(wpipe, pipe_size);
923
924 pipeio_unlock(wpipe);
925
926 /* allocation failed */
927 if (wpipe->pipe_buffer.buffer == 0)
91447636 928 error = ENOMEM;
91447636
A
929 }
930 if (error) {
931 /*
932 * If an error occurred unbusy and return, waking up any pending
933 * readers.
934 */
935 --wpipe->pipe_busy;
936 if ((wpipe->pipe_busy == 0) &&
937 (wpipe->pipe_state & PIPE_WANT)) {
938 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
939 wakeup(wpipe);
940 }
941 PIPE_UNLOCK(rpipe);
942 return(error);
943 }
944 }
91447636
A
945
946 while (uio_resid(uio)) {
91447636 947
91447636 948 retrywrite:
91447636
A
949 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
950
316670eb 951 /* Writes of size <= PIPE_BUF must be atomic. */
91447636
A
952 if ((space < uio_resid(uio)) && (orig_resid <= PIPE_BUF))
953 space = 0;
954
955 if (space > 0) {
956
316670eb 957 if ((error = pipeio_lock(wpipe,1)) == 0) {
91447636
A
958 int size; /* Transfer size */
959 int segsize; /* first segment to transfer */
960
b0d623f7 961 if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) {
316670eb 962 pipeio_unlock(wpipe);
91447636
A
963 error = EPIPE;
964 break;
965 }
91447636 966 /*
316670eb 967 * If a process blocked in pipeio_lock, our
91447636
A
968 * value for space might be bad... the mutex
969 * is dropped while we're blocked
970 */
971 if (space > (int)(wpipe->pipe_buffer.size -
972 wpipe->pipe_buffer.cnt)) {
316670eb 973 pipeio_unlock(wpipe);
91447636
A
974 goto retrywrite;
975 }
976
977 /*
978 * Transfer size is minimum of uio transfer
979 * and free space in pipe buffer.
980 */
981 // LP64todo - fix this!
982 if (space > uio_resid(uio))
983 size = uio_resid(uio);
984 else
985 size = space;
986 /*
987 * First segment to transfer is minimum of
988 * transfer size and contiguous space in
989 * pipe buffer. If first segment to transfer
990 * is less than the transfer size, we've got
991 * a wraparound in the buffer.
992 */
993 segsize = wpipe->pipe_buffer.size -
994 wpipe->pipe_buffer.in;
995 if (segsize > size)
996 segsize = size;
997
998 /* Transfer first segment */
999
1000 PIPE_UNLOCK(rpipe);
1001 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
1002 segsize, uio);
1003 PIPE_LOCK(rpipe);
1004
1005 if (error == 0 && segsize < size) {
1006 /*
1007 * Transfer remaining part now, to
1008 * support atomic writes. Wraparound
316670eb 1009 * happened. (State 3)
91447636
A
1010 */
1011 if (wpipe->pipe_buffer.in + segsize !=
1012 wpipe->pipe_buffer.size)
1013 panic("Expected pipe buffer "
1014 "wraparound disappeared");
1015
1016 PIPE_UNLOCK(rpipe);
1017 error = uiomove(
1018 &wpipe->pipe_buffer.buffer[0],
1019 size - segsize, uio);
1020 PIPE_LOCK(rpipe);
1021 }
316670eb
A
1022 /*
1023 * readers never know to read until count is updated.
1024 */
91447636
A
1025 if (error == 0) {
1026 wpipe->pipe_buffer.in += size;
316670eb 1027 if (wpipe->pipe_buffer.in >
91447636
A
1028 wpipe->pipe_buffer.size) {
1029 if (wpipe->pipe_buffer.in !=
1030 size - segsize +
1031 wpipe->pipe_buffer.size)
1032 panic("Expected "
1033 "wraparound bad");
1034 wpipe->pipe_buffer.in = size -
1035 segsize;
1036 }
1037
1038 wpipe->pipe_buffer.cnt += size;
1039 if (wpipe->pipe_buffer.cnt >
1040 wpipe->pipe_buffer.size)
1041 panic("Pipe buffer overflow");
1042
1043 }
316670eb 1044 pipeio_unlock(wpipe);
91447636
A
1045 }
1046 if (error)
1047 break;
1048
1049 } else {
1050 /*
1051 * If the "read-side" has been blocked, wake it up now.
1052 */
1053 if (wpipe->pipe_state & PIPE_WANTR) {
1054 wpipe->pipe_state &= ~PIPE_WANTR;
1055 wakeup(wpipe);
1056 }
1057 /*
1058 * don't block on non-blocking I/O
1059 * we'll do the pipeselwakeup on the way out
1060 */
1061 if (fp->f_flag & FNONBLOCK) {
1062 error = EAGAIN;
1063 break;
1064 }
6d2010ae
A
1065
1066 /*
1067 * If read side wants to go away, we just issue a signal
1068 * to ourselves.
1069 */
1070 if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) {
1071 error = EPIPE;
1072 break;
1073 }
1074
91447636
A
1075 /*
1076 * We have no more space and have something to offer,
1077 * wake up select/poll.
1078 */
1079 pipeselwakeup(wpipe, wpipe);
1080
1081 wpipe->pipe_state |= PIPE_WANTW;
1082
1083 error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, "pipewr", 0);
1084
1085 if (error != 0)
1086 break;
91447636
A
1087 }
1088 }
1089 --wpipe->pipe_busy;
1090
1091 if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) {
1092 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
1093 wakeup(wpipe);
1094 }
1095 if (wpipe->pipe_buffer.cnt > 0) {
1096 /*
1097 * If there are any characters in the buffer, we wake up
1098 * the reader if it was blocked waiting for data.
1099 */
1100 if (wpipe->pipe_state & PIPE_WANTR) {
1101 wpipe->pipe_state &= ~PIPE_WANTR;
1102 wakeup(wpipe);
1103 }
1104 /*
1105 * wake up thread blocked in select/poll or post the notification
1106 */
1107 pipeselwakeup(wpipe, wpipe);
1108 }
2d21ac55
A
1109
1110 /* Update modification, status change (# of bytes in pipe) times */
1111 pipe_touch(rpipe, PIPE_MTIME | PIPE_CTIME);
1112 pipe_touch(wpipe, PIPE_MTIME | PIPE_CTIME);
91447636
A
1113 PIPE_UNLOCK(rpipe);
1114
1115 return (error);
1116}
1117
1118/*
1119 * we implement a very minimal set of ioctls for compatibility with sockets.
1120 */
1121/* ARGSUSED 3 */
1122static int
2d21ac55
A
1123pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data,
1124 __unused vfs_context_t ctx)
91447636
A
1125{
1126 struct pipe *mpipe = (struct pipe *)fp->f_data;
2d21ac55 1127#if CONFIG_MACF
91447636
A
1128 int error;
1129#endif
1130
1131 PIPE_LOCK(mpipe);
1132
2d21ac55
A
1133#if CONFIG_MACF
1134 error = mac_pipe_check_ioctl(kauth_cred_get(), mpipe, cmd);
91447636
A
1135 if (error) {
1136 PIPE_UNLOCK(mpipe);
1137
1138 return (error);
1139 }
1140#endif
1141
1142 switch (cmd) {
1143
1144 case FIONBIO:
1145 PIPE_UNLOCK(mpipe);
1146 return (0);
1147
1148 case FIOASYNC:
1149 if (*(int *)data) {
1150 mpipe->pipe_state |= PIPE_ASYNC;
1151 } else {
1152 mpipe->pipe_state &= ~PIPE_ASYNC;
1153 }
1154 PIPE_UNLOCK(mpipe);
1155 return (0);
1156
1157 case FIONREAD:
316670eb 1158 *(int *)data = mpipe->pipe_buffer.cnt;
91447636
A
1159 PIPE_UNLOCK(mpipe);
1160 return (0);
1161
1162 case TIOCSPGRP:
1163 mpipe->pipe_pgid = *(int *)data;
1164
1165 PIPE_UNLOCK(mpipe);
1166 return (0);
1167
1168 case TIOCGPGRP:
1169 *(int *)data = mpipe->pipe_pgid;
1170
1171 PIPE_UNLOCK(mpipe);
1172 return (0);
1173
1174 }
1175 PIPE_UNLOCK(mpipe);
1176 return (ENOTTY);
1177}
1178
1179
1180static int
2d21ac55 1181pipe_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
91447636
A
1182{
1183 struct pipe *rpipe = (struct pipe *)fp->f_data;
1184 struct pipe *wpipe;
1185 int retnum = 0;
1186
1187 if (rpipe == NULL || rpipe == (struct pipe *)-1)
1188 return (retnum);
1189
1190 PIPE_LOCK(rpipe);
1191
1192 wpipe = rpipe->pipe_peer;
316670eb 1193
91447636 1194
2d21ac55
A
1195#if CONFIG_MACF
1196 /*
1197 * XXX We should use a per thread credential here; minimally, the
1198 * XXX process credential should have a persistent reference on it
1199 * XXX before being passed in here.
1200 */
1201 if (mac_pipe_check_select(vfs_context_ucred(ctx), rpipe, which)) {
1202 PIPE_UNLOCK(rpipe);
1203 return (0);
1204 }
1205#endif
91447636
A
1206 switch (which) {
1207
1208 case FREAD:
1209 if ((rpipe->pipe_state & PIPE_DIRECTW) ||
1210 (rpipe->pipe_buffer.cnt > 0) ||
b0d623f7 1211 (rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
91447636
A
1212
1213 retnum = 1;
1214 } else {
1215 rpipe->pipe_state |= PIPE_SEL;
2d21ac55 1216 selrecord(vfs_context_proc(ctx), &rpipe->pipe_sel, wql);
91447636
A
1217 }
1218 break;
1219
1220 case FWRITE:
ebb1b9f4
A
1221 if (wpipe)
1222 wpipe->pipe_state |= PIPE_WSELECT;
b0d623f7 1223 if (wpipe == NULL || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) ||
91447636 1224 (((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
fe8ab488 1225 (MAX_PIPESIZE(wpipe) - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) {
91447636
A
1226
1227 retnum = 1;
1228 } else {
1229 wpipe->pipe_state |= PIPE_SEL;
2d21ac55 1230 selrecord(vfs_context_proc(ctx), &wpipe->pipe_sel, wql);
91447636
A
1231 }
1232 break;
1233 case 0:
1234 rpipe->pipe_state |= PIPE_SEL;
2d21ac55 1235 selrecord(vfs_context_proc(ctx), &rpipe->pipe_sel, wql);
91447636
A
1236 break;
1237 }
1238 PIPE_UNLOCK(rpipe);
1239
1240 return (retnum);
1241}
1242
1243
1244/* ARGSUSED 1 */
1245static int
2d21ac55 1246pipe_close(struct fileglob *fg, __unused vfs_context_t ctx)
91447636
A
1247{
1248 struct pipe *cpipe;
1249
2d21ac55 1250 proc_fdlock_spin(vfs_context_proc(ctx));
91447636
A
1251 cpipe = (struct pipe *)fg->fg_data;
1252 fg->fg_data = NULL;
2d21ac55 1253 proc_fdunlock(vfs_context_proc(ctx));
91447636
A
1254 if (cpipe)
1255 pipeclose(cpipe);
1256
1257 return (0);
1258}
1259
1260static void
1261pipe_free_kmem(struct pipe *cpipe)
1262{
91447636 1263 if (cpipe->pipe_buffer.buffer != NULL) {
b0d623f7
A
1264 OSAddAtomic(-(cpipe->pipe_buffer.size), &amountpipekva);
1265 OSAddAtomic(-1, &amountpipes);
316670eb 1266 kfree((void *)cpipe->pipe_buffer.buffer,
91447636
A
1267 cpipe->pipe_buffer.size);
1268 cpipe->pipe_buffer.buffer = NULL;
316670eb 1269 cpipe->pipe_buffer.size = 0;
91447636 1270 }
ebb1b9f4
A
1271}
1272
91447636
A
1273/*
1274 * shutdown the pipe
1275 */
1276static void
1277pipeclose(struct pipe *cpipe)
1278{
1279 struct pipe *ppipe;
1280
1281 if (cpipe == NULL)
1282 return;
91447636
A
1283 /* partially created pipes won't have a valid mutex. */
1284 if (PIPE_MTX(cpipe) != NULL)
1285 PIPE_LOCK(cpipe);
1286
91447636
A
1287
1288 /*
1289 * If the other side is blocked, wake it up saying that
1290 * we want to close it down.
1291 */
b0d623f7 1292 cpipe->pipe_state &= ~PIPE_DRAIN;
2d21ac55
A
1293 cpipe->pipe_state |= PIPE_EOF;
1294 pipeselwakeup(cpipe, cpipe);
1295
91447636 1296 while (cpipe->pipe_busy) {
2d21ac55 1297 cpipe->pipe_state |= PIPE_WANT;
91447636
A
1298
1299 wakeup(cpipe);
91447636
A
1300 msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0);
1301 }
1302
2d21ac55
A
1303#if CONFIG_MACF
1304 /*
1305 * Free the shared pipe label only after the two ends are disconnected.
1306 */
91447636 1307 if (cpipe->pipe_label != NULL && cpipe->pipe_peer == NULL)
2d21ac55 1308 mac_pipe_label_destroy(cpipe);
91447636
A
1309#endif
1310
1311 /*
1312 * Disconnect from peer
1313 */
1314 if ((ppipe = cpipe->pipe_peer) != NULL) {
1315
b0d623f7 1316 ppipe->pipe_state &= ~(PIPE_DRAIN);
91447636
A
1317 ppipe->pipe_state |= PIPE_EOF;
1318
1319 pipeselwakeup(ppipe, ppipe);
1320 wakeup(ppipe);
1321
1322 if (cpipe->pipe_state & PIPE_KNOTE)
1323 KNOTE(&ppipe->pipe_sel.si_note, 1);
1324
1325 postpipeevent(ppipe, EV_RCLOSED);
1326
1327 ppipe->pipe_peer = NULL;
1328 }
1329 evpipefree(cpipe);
1330
1331 /*
1332 * free resources
1333 */
1334 if (PIPE_MTX(cpipe) != NULL) {
39236c6e
A
1335 if (ppipe != NULL) {
1336 /*
91447636
A
1337 * since the mutex is shared and the peer is still
1338 * alive, we need to release the mutex, not free it
1339 */
39236c6e 1340 PIPE_UNLOCK(cpipe);
91447636 1341 } else {
39236c6e 1342 /*
91447636 1343 * peer is gone, so we're the sole party left with
39236c6e 1344 * interest in this mutex... unlock and free it
91447636 1345 */
39236c6e 1346 PIPE_UNLOCK(cpipe);
91447636
A
1347 lck_mtx_free(PIPE_MTX(cpipe), pipe_mtx_grp);
1348 }
1349 }
1350 pipe_free_kmem(cpipe);
ebb1b9f4
A
1351 if (cpipe->pipe_state & PIPE_WSELECT) {
1352 pipe_garbage_collect(cpipe);
1353 } else {
1354 zfree(pipe_zone, cpipe);
1355 pipe_garbage_collect(NULL);
1356 }
316670eb 1357
91447636
A
1358}
1359
91447636
A
1360/*ARGSUSED*/
1361static int
2d21ac55 1362pipe_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused vfs_context_t ctx)
91447636
A
1363{
1364 struct pipe *cpipe;
1365
1366 cpipe = (struct pipe *)kn->kn_fp->f_data;
1367
1368 PIPE_LOCK(cpipe);
2d21ac55
A
1369#if CONFIG_MACF
1370 /*
1371 * XXX We should use a per thread credential here; minimally, the
1372 * XXX process credential should have a persistent reference on it
1373 * XXX before being passed in here.
1374 */
1375 if (mac_pipe_check_kqfilter(vfs_context_ucred(ctx), kn, cpipe) != 0) {
1376 PIPE_UNLOCK(cpipe);
1377 return (1);
1378 }
1379#endif
91447636
A
1380
1381 switch (kn->kn_filter) {
1382 case EVFILT_READ:
1383 kn->kn_fop = &pipe_rfiltops;
2d21ac55 1384
91447636
A
1385 break;
1386 case EVFILT_WRITE:
1387 kn->kn_fop = &pipe_wfiltops;
1388
1389 if (cpipe->pipe_peer == NULL) {
1390 /*
1391 * other end of pipe has been closed
1392 */
1393 PIPE_UNLOCK(cpipe);
1394 return (EPIPE);
1395 }
2d21ac55 1396 if (cpipe->pipe_peer)
91447636
A
1397 cpipe = cpipe->pipe_peer;
1398 break;
1399 default:
1400 PIPE_UNLOCK(cpipe);
1401 return (1);
1402 }
1403
1404 if (KNOTE_ATTACH(&cpipe->pipe_sel.si_note, kn))
1405 cpipe->pipe_state |= PIPE_KNOTE;
1406
1407 PIPE_UNLOCK(cpipe);
1408 return (0);
1409}
1410
1411static void
1412filt_pipedetach(struct knote *kn)
1413{
1414 struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data;
1415
1416 PIPE_LOCK(cpipe);
1417
1418 if (kn->kn_filter == EVFILT_WRITE) {
1419 if (cpipe->pipe_peer == NULL) {
1420 PIPE_UNLOCK(cpipe);
1421 return;
1422 }
1423 cpipe = cpipe->pipe_peer;
1424 }
1425 if (cpipe->pipe_state & PIPE_KNOTE) {
1426 if (KNOTE_DETACH(&cpipe->pipe_sel.si_note, kn))
1427 cpipe->pipe_state &= ~PIPE_KNOTE;
1428 }
1429 PIPE_UNLOCK(cpipe);
1430}
1431
1432/*ARGSUSED*/
1433static int
1434filt_piperead(struct knote *kn, long hint)
1435{
1436 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
1437 struct pipe *wpipe;
1438 int retval;
1439
1440 /*
1441 * if hint == 0, then we've been called from the kevent
1442 * world directly and do not currently hold the pipe mutex...
1443 * if hint == 1, we're being called back via the KNOTE post
1444 * we made in pipeselwakeup, and we already hold the mutex...
1445 */
1446 if (hint == 0)
1447 PIPE_LOCK(rpipe);
1448
1449 wpipe = rpipe->pipe_peer;
1450 kn->kn_data = rpipe->pipe_buffer.cnt;
b0d623f7
A
1451 if ((rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) ||
1452 (wpipe == NULL) || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
91447636
A
1453 kn->kn_flags |= EV_EOF;
1454 retval = 1;
2d21ac55 1455 } else {
6d2010ae
A
1456 int64_t lowwat = 1;
1457 if (kn->kn_sfflags & NOTE_LOWAT) {
316670eb
A
1458 if (rpipe->pipe_buffer.size && kn->kn_sdata > MAX_PIPESIZE(rpipe))
1459 lowwat = MAX_PIPESIZE(rpipe);
6d2010ae
A
1460 else if (kn->kn_sdata > lowwat)
1461 lowwat = kn->kn_sdata;
1462 }
1463 retval = kn->kn_data >= lowwat;
2d21ac55 1464 }
91447636
A
1465
1466 if (hint == 0)
1467 PIPE_UNLOCK(rpipe);
1468
1469 return (retval);
1470}
1471
1472/*ARGSUSED*/
1473static int
1474filt_pipewrite(struct knote *kn, long hint)
1475{
1476 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
1477 struct pipe *wpipe;
1478
1479 /*
1480 * if hint == 0, then we've been called from the kevent
1481 * world directly and do not currently hold the pipe mutex...
1482 * if hint == 1, we're being called back via the KNOTE post
1483 * we made in pipeselwakeup, and we already hold the mutex...
1484 */
1485 if (hint == 0)
1486 PIPE_LOCK(rpipe);
1487
1488 wpipe = rpipe->pipe_peer;
1489
b0d623f7 1490 if ((wpipe == NULL) || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
91447636
A
1491 kn->kn_data = 0;
1492 kn->kn_flags |= EV_EOF;
1493
1494 if (hint == 0)
1495 PIPE_UNLOCK(rpipe);
1496 return (1);
1497 }
316670eb 1498 kn->kn_data = MAX_PIPESIZE(wpipe) - wpipe->pipe_buffer.cnt;
91447636 1499
6d2010ae
A
1500 int64_t lowwat = PIPE_BUF;
1501 if (kn->kn_sfflags & NOTE_LOWAT) {
316670eb
A
1502 if (wpipe->pipe_buffer.size && kn->kn_sdata > MAX_PIPESIZE(wpipe))
1503 lowwat = MAX_PIPESIZE(wpipe);
6d2010ae
A
1504 else if (kn->kn_sdata > lowwat)
1505 lowwat = kn->kn_sdata;
1506 }
1507
91447636
A
1508 if (hint == 0)
1509 PIPE_UNLOCK(rpipe);
1510
6d2010ae 1511 return (kn->kn_data >= lowwat);
91447636 1512}
0c530ab8
A
1513
1514int
1515fill_pipeinfo(struct pipe * cpipe, struct pipe_info * pinfo)
1516{
2d21ac55 1517#if CONFIG_MACF
0c530ab8
A
1518 int error;
1519#endif
1520 struct timeval now;
2d21ac55
A
1521 struct vinfo_stat * ub;
1522 int pipe_size = 0;
1523 int pipe_count;
0c530ab8
A
1524
1525 if (cpipe == NULL)
1526 return (EBADF);
0c530ab8 1527 PIPE_LOCK(cpipe);
2d21ac55
A
1528
1529#if CONFIG_MACF
1530 error = mac_pipe_check_stat(kauth_cred_get(), cpipe);
1531 if (error) {
1532 PIPE_UNLOCK(cpipe);
0c530ab8 1533 return (error);
2d21ac55 1534 }
0c530ab8
A
1535#endif
1536 if (cpipe->pipe_buffer.buffer == 0) {
1537 /*
1538 * must be stat'ing the write fd
1539 */
2d21ac55
A
1540 if (cpipe->pipe_peer) {
1541 /*
1542 * the peer still exists, use it's info
1543 */
316670eb 1544 pipe_size = MAX_PIPESIZE(cpipe->pipe_peer);
2d21ac55
A
1545 pipe_count = cpipe->pipe_peer->pipe_buffer.cnt;
1546 } else {
1547 pipe_count = 0;
1548 }
1549 } else {
316670eb 1550 pipe_size = MAX_PIPESIZE(cpipe);
2d21ac55 1551 pipe_count = cpipe->pipe_buffer.cnt;
0c530ab8 1552 }
2d21ac55
A
1553 /*
1554 * since peer's buffer is setup ouside of lock
1555 * we might catch it in transient state
1556 */
1557 if (pipe_size == 0)
1558 pipe_size = PIPE_SIZE;
0c530ab8
A
1559
1560 ub = &pinfo->pipe_stat;
1561
1562 bzero(ub, sizeof(*ub));
2d21ac55
A
1563 ub->vst_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
1564 ub->vst_blksize = pipe_size;
1565 ub->vst_size = pipe_count;
1566 if (ub->vst_blksize != 0)
1567 ub->vst_blocks = (ub->vst_size + ub->vst_blksize - 1) / ub->vst_blksize;
1568 ub->vst_nlink = 1;
0c530ab8 1569
2d21ac55
A
1570 ub->vst_uid = kauth_getuid();
1571 ub->vst_gid = kauth_getgid();
0c530ab8
A
1572
1573 microtime(&now);
2d21ac55
A
1574 ub->vst_atime = now.tv_sec;
1575 ub->vst_atimensec = now.tv_usec * 1000;
0c530ab8 1576
2d21ac55
A
1577 ub->vst_mtime = now.tv_sec;
1578 ub->vst_mtimensec = now.tv_usec * 1000;
0c530ab8 1579
2d21ac55
A
1580 ub->vst_ctime = now.tv_sec;
1581 ub->vst_ctimensec = now.tv_usec * 1000;
0c530ab8
A
1582
1583 /*
1584 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen, st_uid, st_gid.
1585 * XXX (st_dev, st_ino) should be unique.
1586 */
1587
39236c6e
A
1588 pinfo->pipe_handle = (uint64_t)VM_KERNEL_ADDRPERM((uintptr_t)cpipe);
1589 pinfo->pipe_peerhandle = (uint64_t)VM_KERNEL_ADDRPERM((uintptr_t)(cpipe->pipe_peer));
0c530ab8 1590 pinfo->pipe_status = cpipe->pipe_state;
2d21ac55
A
1591
1592 PIPE_UNLOCK(cpipe);
1593
0c530ab8
A
1594 return (0);
1595}
b0d623f7
A
1596
1597
1598static int
1599pipe_drain(struct fileproc *fp, __unused vfs_context_t ctx)
1600{
1601
1602 /* Note: fdlock already held */
1603 struct pipe *ppipe, *cpipe = (struct pipe *)(fp->f_fglob->fg_data);
1604
1605 if (cpipe) {
1606 PIPE_LOCK(cpipe);
1607 cpipe->pipe_state |= PIPE_DRAIN;
1608 cpipe->pipe_state &= ~(PIPE_WANTR | PIPE_WANTW);
1609 wakeup(cpipe);
1610
1611 /* Must wake up peer: a writer sleeps on the read side */
1612 if ((ppipe = cpipe->pipe_peer)) {
1613 ppipe->pipe_state |= PIPE_DRAIN;
1614 ppipe->pipe_state &= ~(PIPE_WANTR | PIPE_WANTW);
1615 wakeup(ppipe);
1616 }
1617
1618 PIPE_UNLOCK(cpipe);
1619 return 0;
1620 }
1621
1622 return 1;
1623}
1624
1625
316670eb
A
1626 /*
1627 * When a thread sets a write-select on a pipe, it creates an implicit,
1628 * untracked dependency between that thread and the peer of the pipe
1629 * on which the select is set. If the peer pipe is closed and freed
1630 * before the select()ing thread wakes up, the system will panic as
1631 * it attempts to unwind the dangling select(). To avoid that panic,
1632 * we notice whenever a dangerous select() is set on a pipe, and
1633 * defer the final deletion of the pipe until that select()s are all
1634 * resolved. Since we can't currently detect exactly when that
1635 * resolution happens, we use a simple garbage collection queue to
1636 * reap the at-risk pipes 'later'.
1637 */
1638static void
1639pipe_garbage_collect(struct pipe *cpipe)
1640{
1641 uint64_t old, now;
1642 struct pipe_garbage *pgp;
1643
1644 /* Convert msecs to nsecs and then to abstime */
1645 old = pipe_garbage_age_limit * 1000000;
1646 nanoseconds_to_absolutetime(old, &old);
1647
1648 lck_mtx_lock(pipe_garbage_lock);
1649
1650 /* Free anything that's been on the queue for <mumble> seconds */
1651 now = mach_absolute_time();
1652 old = now - old;
1653 while ((pgp = pipe_garbage_head) && pgp->pg_timestamp < old) {
1654 pipe_garbage_head = pgp->pg_next;
1655 if (pipe_garbage_head == NULL)
1656 pipe_garbage_tail = NULL;
1657 pipe_garbage_count--;
1658 zfree(pipe_zone, pgp->pg_pipe);
1659 zfree(pipe_garbage_zone, pgp);
1660 }
1661
1662 /* Add the new pipe (if any) to the tail of the garbage queue */
1663 if (cpipe) {
1664 cpipe->pipe_state = PIPE_DEAD;
1665 pgp = (struct pipe_garbage *)zalloc(pipe_garbage_zone);
1666 if (pgp == NULL) {
1667 /*
1668 * We're too low on memory to garbage collect the
1669 * pipe. Freeing it runs the risk of panicing the
1670 * system. All we can do is leak it and leave
1671 * a breadcrumb behind. The good news, such as it
1672 * is, is that this will probably never happen.
1673 * We will probably hit the panic below first.
1674 */
1675 printf("Leaking pipe %p - no room left in the queue",
1676 cpipe);
1677 lck_mtx_unlock(pipe_garbage_lock);
1678 return;
1679 }
1680
1681 pgp->pg_pipe = cpipe;
1682 pgp->pg_timestamp = now;
1683 pgp->pg_next = NULL;
b0d623f7 1684
316670eb
A
1685 if (pipe_garbage_tail)
1686 pipe_garbage_tail->pg_next = pgp;
1687 pipe_garbage_tail = pgp;
1688 if (pipe_garbage_head == NULL)
1689 pipe_garbage_head = pipe_garbage_tail;
b0d623f7 1690
316670eb
A
1691 if (pipe_garbage_count++ >= PIPE_GARBAGE_QUEUE_LIMIT)
1692 panic("Length of pipe garbage queue exceeded %d",
1693 PIPE_GARBAGE_QUEUE_LIMIT);
1694 }
1695 lck_mtx_unlock(pipe_garbage_lock);
1696}
b0d623f7 1697