]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/sys_pipe.c
9aa8ac04c8987ed5e81290fff0e614dc7b464c72
[apple/xnu.git] / bsd / kern / sys_pipe.c
1 /*
2 * Copyright (c) 1996 John S. Dyson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice immediately at the beginning of the file, without modification,
10 * this list of conditions, and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. Absolutely no warranty of function or purpose is made by the author
15 * John S. Dyson.
16 * 4. Modifications may be freely made to this file if the above conditions
17 * are met.
18 */
19 /*
20 * Copyright (c) 2003-2007 Apple Inc. All rights reserved.
21 *
22 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
23 *
24 * This file contains Original Code and/or Modifications of Original Code
25 * as defined in and that are subject to the Apple Public Source License
26 * Version 2.0 (the 'License'). You may not use this file except in
27 * compliance with the License. The rights granted to you under the License
28 * may not be used to create, or enable the creation or redistribution of,
29 * unlawful or unlicensed copies of an Apple operating system, or to
30 * circumvent, violate, or enable the circumvention or violation of, any
31 * terms of an Apple operating system software license agreement.
32 *
33 * Please obtain a copy of the License at
34 * http://www.opensource.apple.com/apsl/ and read it before using this file.
35 *
36 * The Original Code and all software distributed under the License are
37 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
38 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
39 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
40 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
41 * Please see the License for the specific language governing rights and
42 * limitations under the License.
43 *
44 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
45 */
46 /*
47 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
48 * support for mandatory and extensible security protections. This notice
49 * is included in support of clause 2.2 (b) of the Apple Public License,
50 * Version 2.0.
51 */
52
53 /*
54 * This file contains a high-performance replacement for the socket-based
55 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support
56 * all features of sockets, but does do everything that pipes normally
57 * do.
58 *
59 * Pipes are implemented as circular buffers. Following are the valid states in pipes operations
60 *
61 * _________________________________
62 * 1. |_________________________________| r=w, c=0
63 *
64 * _________________________________
65 * 2. |__r:::::wc_______________________| r <= w , c > 0
66 *
67 * _________________________________
68 * 3. |::::wc_____r:::::::::::::::::::::| r>w , c > 0
69 *
70 * _________________________________
71 * 4. |:::::::wrc:::::::::::::::::::::::| w=r, c = Max size
72 *
73 *
74 * Nomenclature:-
75 * a-z define the steps in a program flow
76 * 1-4 are the states as defined aboe
77 * Action: is what file operation is done on the pipe
78 *
79 * Current:None Action: initialize with size M=200
80 * a. State 1 ( r=0, w=0, c=0)
81 *
82 * Current: a Action: write(100) (w < M)
83 * b. State 2 (r=0, w=100, c=100)
84 *
85 * Current: b Action: write(100) (w = M-w)
86 * c. State 4 (r=0,w=0,c=200)
87 *
88 * Current: b Action: read(70) ( r < c )
89 * d. State 2(r=70,w=100,c=30)
90 *
91 * Current: d Action: write(75) ( w < (m-w))
92 * e. State 2 (r=70,w=175,c=105)
93 *
94 * Current: d Action: write(110) ( w > (m-w))
95 * f. State 3 (r=70,w=10,c=140)
96 *
97 * Current: d Action: read(30) (r >= c )
98 * g. State 1 (r=100,w=100,c=0)
99 *
100 */
101
102 /*
103 * This code create half duplex pipe buffers for facilitating file like
104 * operations on pipes. The initial buffer is very small, but this can
105 * dynamically change to larger sizes based on usage. The buffer size is never
106 * reduced. The total amount of kernel memory used is governed by maxpipekva.
107 * In case of dynamic expansion limit is reached, the output thread is blocked
108 * until the pipe buffer empties enough to continue.
109 *
110 * In order to limit the resource use of pipes, two sysctls exist:
111 *
112 * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable
113 * address space available to us in pipe_map.
114 *
115 * Memory usage may be monitored through the sysctls
116 * kern.ipc.pipes, kern.ipc.pipekva.
117 *
118 */
119
120 #include <sys/param.h>
121 #include <sys/systm.h>
122 #include <sys/filedesc.h>
123 #include <sys/kernel.h>
124 #include <sys/vnode.h>
125 #include <sys/proc_internal.h>
126 #include <sys/kauth.h>
127 #include <sys/file_internal.h>
128 #include <sys/stat.h>
129 #include <sys/ioctl.h>
130 #include <sys/fcntl.h>
131 #include <sys/malloc.h>
132 #include <sys/syslog.h>
133 #include <sys/unistd.h>
134 #include <sys/resourcevar.h>
135 #include <sys/aio_kern.h>
136 #include <sys/signalvar.h>
137 #include <sys/pipe.h>
138 #include <sys/sysproto.h>
139 #include <sys/proc_info.h>
140
141 #include <security/audit/audit.h>
142
143 #include <sys/kdebug.h>
144
145 #include <kern/zalloc.h>
146 #include <kern/kalloc.h>
147 #include <vm/vm_kern.h>
148 #include <libkern/OSAtomic.h>
149
150 #define f_flag f_fglob->fg_flag
151 #define f_type f_fglob->fg_type
152 #define f_msgcount f_fglob->fg_msgcount
153 #define f_cred f_fglob->fg_cred
154 #define f_ops f_fglob->fg_ops
155 #define f_offset f_fglob->fg_offset
156 #define f_data f_fglob->fg_data
157
158 /*
159 * interfaces to the outside world exported through file operations
160 */
161 static int pipe_read(struct fileproc *fp, struct uio *uio,
162 int flags, vfs_context_t ctx);
163 static int pipe_write(struct fileproc *fp, struct uio *uio,
164 int flags, vfs_context_t ctx);
165 static int pipe_close(struct fileglob *fg, vfs_context_t ctx);
166 static int pipe_select(struct fileproc *fp, int which, void * wql,
167 vfs_context_t ctx);
168 static int pipe_kqfilter(struct fileproc *fp, struct knote *kn,
169 vfs_context_t ctx);
170 static int pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data,
171 vfs_context_t ctx);
172 static int pipe_drain(struct fileproc *fp,vfs_context_t ctx);
173
174 struct fileops pipeops =
175 { pipe_read,
176 pipe_write,
177 pipe_ioctl,
178 pipe_select,
179 pipe_close,
180 pipe_kqfilter,
181 pipe_drain };
182
183 static void filt_pipedetach(struct knote *kn);
184 static int filt_piperead(struct knote *kn, long hint);
185 static int filt_pipewrite(struct knote *kn, long hint);
186
187 static struct filterops pipe_rfiltops = {
188 .f_isfd = 1,
189 .f_detach = filt_pipedetach,
190 .f_event = filt_piperead,
191 };
192
193 static struct filterops pipe_wfiltops = {
194 .f_isfd = 1,
195 .f_detach = filt_pipedetach,
196 .f_event = filt_pipewrite,
197 };
198
199 static int nbigpipe; /* for compatibility sake. no longer used */
200 static int amountpipes; /* total number of pipes in system */
201 static int amountpipekva; /* total memory used by pipes */
202
203 int maxpipekva = PIPE_KVAMAX; /* allowing 16MB max. */
204
205 #if PIPE_SYSCTLS
206 SYSCTL_DECL(_kern_ipc);
207
208 SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RD|CTLFLAG_LOCKED,
209 &maxpipekva, 0, "Pipe KVA limit");
210 SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW|CTLFLAG_LOCKED,
211 &maxpipekvawired, 0, "Pipe KVA wired limit");
212 SYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD|CTLFLAG_LOCKED,
213 &amountpipes, 0, "Current # of pipes");
214 SYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD|CTLFLAG_LOCKED,
215 &nbigpipe, 0, "Current # of big pipes");
216 SYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD|CTLFLAG_LOCKED,
217 &amountpipekva, 0, "Pipe KVA usage");
218 SYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD|CTLFLAG_LOCKED,
219 &amountpipekvawired, 0, "Pipe wired KVA usage");
220 #endif
221
222 static void pipeclose(struct pipe *cpipe);
223 static void pipe_free_kmem(struct pipe *cpipe);
224 static int pipe_create(struct pipe **cpipep);
225 static int pipespace(struct pipe *cpipe, int size);
226 static int choose_pipespace(unsigned long current, unsigned long expected);
227 static int expand_pipespace(struct pipe *p, int target_size);
228 static void pipeselwakeup(struct pipe *cpipe, struct pipe *spipe);
229 static __inline int pipeio_lock(struct pipe *cpipe, int catch);
230 static __inline void pipeio_unlock(struct pipe *cpipe);
231
232 extern int postpipeevent(struct pipe *, int);
233 extern void evpipefree(struct pipe *cpipe);
234
235 static lck_grp_t *pipe_mtx_grp;
236 static lck_attr_t *pipe_mtx_attr;
237 static lck_grp_attr_t *pipe_mtx_grp_attr;
238
239 static zone_t pipe_zone;
240
241 #define MAX_PIPESIZE(pipe) ( MAX(PIPE_SIZE, (pipe)->pipe_buffer.size) )
242
243 #define PIPE_GARBAGE_AGE_LIMIT 5000 /* In milliseconds */
244 #define PIPE_GARBAGE_QUEUE_LIMIT 32000
245
246 struct pipe_garbage {
247 struct pipe *pg_pipe;
248 struct pipe_garbage *pg_next;
249 uint64_t pg_timestamp;
250 };
251
252 static zone_t pipe_garbage_zone;
253 static struct pipe_garbage *pipe_garbage_head = NULL;
254 static struct pipe_garbage *pipe_garbage_tail = NULL;
255 static uint64_t pipe_garbage_age_limit = PIPE_GARBAGE_AGE_LIMIT;
256 static int pipe_garbage_count = 0;
257 static lck_mtx_t *pipe_garbage_lock;
258 static void pipe_garbage_collect(struct pipe *cpipe);
259
260 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL);
261
262 /* initial setup done at time of sysinit */
263 void
264 pipeinit(void)
265 {
266 nbigpipe=0;
267 vm_size_t zone_size;
268
269 zone_size = 8192 * sizeof(struct pipe);
270 pipe_zone = zinit(sizeof(struct pipe), zone_size, 4096, "pipe zone");
271
272
273 /* allocate lock group attribute and group for pipe mutexes */
274 pipe_mtx_grp_attr = lck_grp_attr_alloc_init();
275 pipe_mtx_grp = lck_grp_alloc_init("pipe", pipe_mtx_grp_attr);
276
277 /* allocate the lock attribute for pipe mutexes */
278 pipe_mtx_attr = lck_attr_alloc_init();
279
280 /*
281 * Set up garbage collection for dead pipes
282 */
283 zone_size = (PIPE_GARBAGE_QUEUE_LIMIT + 20) *
284 sizeof(struct pipe_garbage);
285 pipe_garbage_zone = (zone_t)zinit(sizeof(struct pipe_garbage),
286 zone_size, 4096, "pipe garbage zone");
287 pipe_garbage_lock = lck_mtx_alloc_init(pipe_mtx_grp, pipe_mtx_attr);
288
289 }
290
291 /* Bitmap for things to touch in pipe_touch() */
292 #define PIPE_ATIME 0x00000001 /* time of last access */
293 #define PIPE_MTIME 0x00000002 /* time of last modification */
294 #define PIPE_CTIME 0x00000004 /* time of last status change */
295
296 static void
297 pipe_touch(struct pipe *tpipe, int touch)
298 {
299 struct timeval now;
300
301 microtime(&now);
302
303 if (touch & PIPE_ATIME) {
304 tpipe->st_atimespec.tv_sec = now.tv_sec;
305 tpipe->st_atimespec.tv_nsec = now.tv_usec * 1000;
306 }
307
308 if (touch & PIPE_MTIME) {
309 tpipe->st_mtimespec.tv_sec = now.tv_sec;
310 tpipe->st_mtimespec.tv_nsec = now.tv_usec * 1000;
311 }
312
313 if (touch & PIPE_CTIME) {
314 tpipe->st_ctimespec.tv_sec = now.tv_sec;
315 tpipe->st_ctimespec.tv_nsec = now.tv_usec * 1000;
316 }
317 }
318
319 static const unsigned int pipesize_blocks[] = {128,256,1024,2048,PAGE_SIZE, PAGE_SIZE * 2, PIPE_SIZE , PIPE_SIZE * 4 };
320
321 /*
322 * finds the right size from possible sizes in pipesize_blocks
323 * returns the size which matches max(current,expected)
324 */
325 static int
326 choose_pipespace(unsigned long current, unsigned long expected)
327 {
328 int i = sizeof(pipesize_blocks)/sizeof(unsigned int) -1;
329 unsigned long target;
330
331 if (expected > current)
332 target = expected;
333 else
334 target = current;
335
336 while ( i >0 && pipesize_blocks[i-1] > target) {
337 i=i-1;
338
339 }
340
341 return pipesize_blocks[i];
342 }
343
344
345 /*
346 * expand the size of pipe while there is data to be read,
347 * and then free the old buffer once the current buffered
348 * data has been transferred to new storage.
349 * Required: PIPE_LOCK and io lock to be held by caller.
350 * returns 0 on success or no expansion possible
351 */
352 static int
353 expand_pipespace(struct pipe *p, int target_size)
354 {
355 struct pipe tmp, oldpipe;
356 int error;
357 tmp.pipe_buffer.buffer = 0;
358
359 if (p->pipe_buffer.size >= (unsigned) target_size) {
360 return 0; /* the existing buffer is max size possible */
361 }
362
363 /* create enough space in the target */
364 error = pipespace(&tmp, target_size);
365 if (error != 0)
366 return (error);
367
368 oldpipe.pipe_buffer.buffer = p->pipe_buffer.buffer;
369 oldpipe.pipe_buffer.size = p->pipe_buffer.size;
370
371 memcpy(tmp.pipe_buffer.buffer, p->pipe_buffer.buffer, p->pipe_buffer.size);
372 if (p->pipe_buffer.cnt > 0 && p->pipe_buffer.in <= p->pipe_buffer.out ){
373 /* we are in State 3 and need extra copying for read to be consistent */
374 memcpy(&tmp.pipe_buffer.buffer[p->pipe_buffer.size], p->pipe_buffer.buffer, p->pipe_buffer.size);
375 p->pipe_buffer.in += p->pipe_buffer.size;
376 }
377
378 p->pipe_buffer.buffer = tmp.pipe_buffer.buffer;
379 p->pipe_buffer.size = tmp.pipe_buffer.size;
380
381
382 pipe_free_kmem(&oldpipe);
383 return 0;
384 }
385
386 /*
387 * The pipe system call for the DTYPE_PIPE type of pipes
388 *
389 * returns:
390 * FREAD | fd0 | -->[struct rpipe] --> |~~buffer~~| \
391 * (pipe_mutex)
392 * FWRITE | fd1 | -->[struct wpipe] --X /
393 */
394
395 /* ARGSUSED */
396 int
397 pipe(proc_t p, __unused struct pipe_args *uap, int32_t *retval)
398 {
399 struct fileproc *rf, *wf;
400 struct pipe *rpipe, *wpipe;
401 lck_mtx_t *pmtx;
402 int fd, error;
403
404 if ((pmtx = lck_mtx_alloc_init(pipe_mtx_grp, pipe_mtx_attr)) == NULL)
405 return (ENOMEM);
406
407 rpipe = wpipe = NULL;
408 if (pipe_create(&rpipe) || pipe_create(&wpipe)) {
409 error = ENFILE;
410 goto freepipes;
411 }
412 /*
413 * allocate the space for the normal I/O direction up
414 * front... we'll delay the allocation for the other
415 * direction until a write actually occurs (most likely it won't)...
416 */
417 error = pipespace(rpipe, choose_pipespace(rpipe->pipe_buffer.size, 0));
418 if (error)
419 goto freepipes;
420
421 TAILQ_INIT(&rpipe->pipe_evlist);
422 TAILQ_INIT(&wpipe->pipe_evlist);
423
424 error = falloc(p, &rf, &fd, vfs_context_current());
425 if (error) {
426 goto freepipes;
427 }
428 retval[0] = fd;
429
430 /*
431 * for now we'll create half-duplex pipes(refer returns section above).
432 * this is what we've always supported..
433 */
434 rf->f_flag = FREAD;
435 rf->f_type = DTYPE_PIPE;
436 rf->f_data = (caddr_t)rpipe;
437 rf->f_ops = &pipeops;
438
439 error = falloc(p, &wf, &fd, vfs_context_current());
440 if (error) {
441 fp_free(p, retval[0], rf);
442 goto freepipes;
443 }
444 wf->f_flag = FWRITE;
445 wf->f_type = DTYPE_PIPE;
446 wf->f_data = (caddr_t)wpipe;
447 wf->f_ops = &pipeops;
448
449 rpipe->pipe_peer = wpipe;
450 wpipe->pipe_peer = rpipe;
451 /* both structures share the same mutex */
452 rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx;
453
454 retval[1] = fd;
455 #if CONFIG_MACF
456 /*
457 * XXXXXXXX SHOULD NOT HOLD FILE_LOCK() XXXXXXXXXXXX
458 *
459 * struct pipe represents a pipe endpoint. The MAC label is shared
460 * between the connected endpoints. As a result mac_pipe_label_init() and
461 * mac_pipe_label_associate() should only be called on one of the endpoints
462 * after they have been connected.
463 */
464 mac_pipe_label_init(rpipe);
465 mac_pipe_label_associate(kauth_cred_get(), rpipe);
466 wpipe->pipe_label = rpipe->pipe_label;
467 #endif
468 proc_fdlock_spin(p);
469 procfdtbl_releasefd(p, retval[0], NULL);
470 procfdtbl_releasefd(p, retval[1], NULL);
471 fp_drop(p, retval[0], rf, 1);
472 fp_drop(p, retval[1], wf, 1);
473 proc_fdunlock(p);
474
475
476 return (0);
477
478 freepipes:
479 pipeclose(rpipe);
480 pipeclose(wpipe);
481 lck_mtx_free(pmtx, pipe_mtx_grp);
482
483 return (error);
484 }
485
486 int
487 pipe_stat(struct pipe *cpipe, void *ub, int isstat64)
488 {
489 #if CONFIG_MACF
490 int error;
491 #endif
492 int pipe_size = 0;
493 int pipe_count;
494 struct stat *sb = (struct stat *)0; /* warning avoidance ; protected by isstat64 */
495 struct stat64 * sb64 = (struct stat64 *)0; /* warning avoidance ; protected by isstat64 */
496
497 if (cpipe == NULL)
498 return (EBADF);
499 PIPE_LOCK(cpipe);
500
501 #if CONFIG_MACF
502 error = mac_pipe_check_stat(kauth_cred_get(), cpipe);
503 if (error) {
504 PIPE_UNLOCK(cpipe);
505 return (error);
506 }
507 #endif
508 if (cpipe->pipe_buffer.buffer == 0) {
509 /* must be stat'ing the write fd */
510 if (cpipe->pipe_peer) {
511 /* the peer still exists, use it's info */
512 pipe_size = MAX_PIPESIZE(cpipe->pipe_peer);
513 pipe_count = cpipe->pipe_peer->pipe_buffer.cnt;
514 } else {
515 pipe_count = 0;
516 }
517 } else {
518 pipe_size = MAX_PIPESIZE(cpipe);
519 pipe_count = cpipe->pipe_buffer.cnt;
520 }
521 /*
522 * since peer's buffer is setup ouside of lock
523 * we might catch it in transient state
524 */
525 if (pipe_size == 0)
526 pipe_size = MAX(PIPE_SIZE, pipesize_blocks[0]);
527
528 if (isstat64 != 0) {
529 sb64 = (struct stat64 *)ub;
530
531 bzero(sb64, sizeof(*sb64));
532 sb64->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
533 sb64->st_blksize = pipe_size;
534 sb64->st_size = pipe_count;
535 sb64->st_blocks = (sb64->st_size + sb64->st_blksize - 1) / sb64->st_blksize;
536
537 sb64->st_uid = kauth_getuid();
538 sb64->st_gid = kauth_getgid();
539
540 sb64->st_atimespec.tv_sec = cpipe->st_atimespec.tv_sec;
541 sb64->st_atimespec.tv_nsec = cpipe->st_atimespec.tv_nsec;
542
543 sb64->st_mtimespec.tv_sec = cpipe->st_mtimespec.tv_sec;
544 sb64->st_mtimespec.tv_nsec = cpipe->st_mtimespec.tv_nsec;
545
546 sb64->st_ctimespec.tv_sec = cpipe->st_ctimespec.tv_sec;
547 sb64->st_ctimespec.tv_nsec = cpipe->st_ctimespec.tv_nsec;
548
549 /*
550 * Return a relatively unique inode number based on the current
551 * address of this pipe's struct pipe. This number may be recycled
552 * relatively quickly.
553 */
554 sb64->st_ino = (ino64_t)VM_KERNEL_ADDRPERM((uintptr_t)cpipe);
555 } else {
556 sb = (struct stat *)ub;
557
558 bzero(sb, sizeof(*sb));
559 sb->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
560 sb->st_blksize = pipe_size;
561 sb->st_size = pipe_count;
562 sb->st_blocks = (sb->st_size + sb->st_blksize - 1) / sb->st_blksize;
563
564 sb->st_uid = kauth_getuid();
565 sb->st_gid = kauth_getgid();
566
567 sb->st_atimespec.tv_sec = cpipe->st_atimespec.tv_sec;
568 sb->st_atimespec.tv_nsec = cpipe->st_atimespec.tv_nsec;
569
570 sb->st_mtimespec.tv_sec = cpipe->st_mtimespec.tv_sec;
571 sb->st_mtimespec.tv_nsec = cpipe->st_mtimespec.tv_nsec;
572
573 sb->st_ctimespec.tv_sec = cpipe->st_ctimespec.tv_sec;
574 sb->st_ctimespec.tv_nsec = cpipe->st_ctimespec.tv_nsec;
575
576 /*
577 * Return a relatively unique inode number based on the current
578 * address of this pipe's struct pipe. This number may be recycled
579 * relatively quickly.
580 */
581 sb->st_ino = (ino_t)VM_KERNEL_ADDRPERM((uintptr_t)cpipe);
582 }
583 PIPE_UNLOCK(cpipe);
584
585 /*
586 * POSIX: Left as 0: st_dev, st_nlink, st_rdev, st_flags, st_gen,
587 * st_uid, st_gid.
588 *
589 * XXX (st_dev) should be unique, but there is no device driver that
590 * XXX is associated with pipes, since they are implemented via a
591 * XXX struct fileops indirection rather than as FS objects.
592 */
593 return (0);
594 }
595
596
597 /*
598 * Allocate kva for pipe circular buffer, the space is pageable
599 * This routine will 'realloc' the size of a pipe safely, if it fails
600 * it will retain the old buffer.
601 * If it fails it will return ENOMEM.
602 */
603 static int
604 pipespace(struct pipe *cpipe, int size)
605 {
606 vm_offset_t buffer;
607
608 if (size <= 0)
609 return(EINVAL);
610
611 if ((buffer = (vm_offset_t)kalloc(size)) == 0 )
612 return(ENOMEM);
613
614 /* free old resources if we're resizing */
615 pipe_free_kmem(cpipe);
616 cpipe->pipe_buffer.buffer = (caddr_t)buffer;
617 cpipe->pipe_buffer.size = size;
618 cpipe->pipe_buffer.in = 0;
619 cpipe->pipe_buffer.out = 0;
620 cpipe->pipe_buffer.cnt = 0;
621
622 OSAddAtomic(1, &amountpipes);
623 OSAddAtomic(cpipe->pipe_buffer.size, &amountpipekva);
624
625 return (0);
626 }
627
628 /*
629 * initialize and allocate VM and memory for pipe
630 */
631 static int
632 pipe_create(struct pipe **cpipep)
633 {
634 struct pipe *cpipe;
635 cpipe = (struct pipe *)zalloc(pipe_zone);
636
637 if ((*cpipep = cpipe) == NULL)
638 return (ENOMEM);
639
640 /*
641 * protect so pipespace or pipeclose don't follow a junk pointer
642 * if pipespace() fails.
643 */
644 bzero(cpipe, sizeof *cpipe);
645
646 /* Initial times are all the time of creation of the pipe */
647 pipe_touch(cpipe, PIPE_ATIME | PIPE_MTIME | PIPE_CTIME);
648 return (0);
649 }
650
651
652 /*
653 * lock a pipe for I/O, blocking other access
654 */
655 static inline int
656 pipeio_lock(struct pipe *cpipe, int catch)
657 {
658 int error;
659 while (cpipe->pipe_state & PIPE_LOCKFL) {
660 cpipe->pipe_state |= PIPE_LWANT;
661 error = msleep(cpipe, PIPE_MTX(cpipe), catch ? (PRIBIO | PCATCH) : PRIBIO,
662 "pipelk", 0);
663 if (error != 0)
664 return (error);
665 }
666 cpipe->pipe_state |= PIPE_LOCKFL;
667 return (0);
668 }
669
670 /*
671 * unlock a pipe I/O lock
672 */
673 static inline void
674 pipeio_unlock(struct pipe *cpipe)
675 {
676 cpipe->pipe_state &= ~PIPE_LOCKFL;
677 if (cpipe->pipe_state & PIPE_LWANT) {
678 cpipe->pipe_state &= ~PIPE_LWANT;
679 wakeup(cpipe);
680 }
681 }
682
683 /*
684 * wakeup anyone whos blocked in select
685 */
686 static void
687 pipeselwakeup(struct pipe *cpipe, struct pipe *spipe)
688 {
689 if (cpipe->pipe_state & PIPE_SEL) {
690 cpipe->pipe_state &= ~PIPE_SEL;
691 selwakeup(&cpipe->pipe_sel);
692 }
693 if (cpipe->pipe_state & PIPE_KNOTE)
694 KNOTE(&cpipe->pipe_sel.si_note, 1);
695
696 postpipeevent(cpipe, EV_RWBYTES);
697
698 if (spipe && (spipe->pipe_state & PIPE_ASYNC) && spipe->pipe_pgid) {
699 if (spipe->pipe_pgid < 0)
700 gsignal(-spipe->pipe_pgid, SIGIO);
701 else
702 proc_signal(spipe->pipe_pgid, SIGIO);
703 }
704 }
705
706 /*
707 * Read n bytes from the buffer. Semantics are similar to file read.
708 * returns: number of bytes read from the buffer
709 */
710 /* ARGSUSED */
711 static int
712 pipe_read(struct fileproc *fp, struct uio *uio, __unused int flags,
713 __unused vfs_context_t ctx)
714 {
715 struct pipe *rpipe = (struct pipe *)fp->f_data;
716 int error;
717 int nread = 0;
718 u_int size;
719
720 PIPE_LOCK(rpipe);
721 ++rpipe->pipe_busy;
722
723 error = pipeio_lock(rpipe, 1);
724 if (error)
725 goto unlocked_error;
726
727 #if CONFIG_MACF
728 error = mac_pipe_check_read(kauth_cred_get(), rpipe);
729 if (error)
730 goto locked_error;
731 #endif
732
733
734 while (uio_resid(uio)) {
735 /*
736 * normal pipe buffer receive
737 */
738 if (rpipe->pipe_buffer.cnt > 0) {
739 /*
740 * # bytes to read is min( bytes from read pointer until end of buffer,
741 * total unread bytes,
742 * user requested byte count)
743 */
744 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
745 if (size > rpipe->pipe_buffer.cnt)
746 size = rpipe->pipe_buffer.cnt;
747 // LP64todo - fix this!
748 if (size > (u_int) uio_resid(uio))
749 size = (u_int) uio_resid(uio);
750
751 PIPE_UNLOCK(rpipe); /* we still hold io lock.*/
752 error = uiomove(
753 &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
754 size, uio);
755 PIPE_LOCK(rpipe);
756 if (error)
757 break;
758
759 rpipe->pipe_buffer.out += size;
760 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size)
761 rpipe->pipe_buffer.out = 0;
762
763 rpipe->pipe_buffer.cnt -= size;
764
765 /*
766 * If there is no more to read in the pipe, reset
767 * its pointers to the beginning. This improves
768 * cache hit stats.
769 */
770 if (rpipe->pipe_buffer.cnt == 0) {
771 rpipe->pipe_buffer.in = 0;
772 rpipe->pipe_buffer.out = 0;
773 }
774 nread += size;
775 } else {
776 /*
777 * detect EOF condition
778 * read returns 0 on EOF, no need to set error
779 */
780 if (rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) {
781 break;
782 }
783
784 /*
785 * If the "write-side" has been blocked, wake it up now.
786 */
787 if (rpipe->pipe_state & PIPE_WANTW) {
788 rpipe->pipe_state &= ~PIPE_WANTW;
789 wakeup(rpipe);
790 }
791
792 /*
793 * Break if some data was read in previous iteration.
794 */
795 if (nread > 0)
796 break;
797
798 /*
799 * Unlock the pipe buffer for our remaining processing.
800 * We will either break out with an error or we will
801 * sleep and relock to loop.
802 */
803 pipeio_unlock(rpipe);
804
805 /*
806 * Handle non-blocking mode operation or
807 * wait for more data.
808 */
809 if (fp->f_flag & FNONBLOCK) {
810 error = EAGAIN;
811 } else {
812 rpipe->pipe_state |= PIPE_WANTR;
813 error = msleep(rpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, "piperd", 0);
814 if (error == 0)
815 error = pipeio_lock(rpipe, 1);
816 }
817 if (error)
818 goto unlocked_error;
819 }
820 }
821 #if CONFIG_MACF
822 locked_error:
823 #endif
824 pipeio_unlock(rpipe);
825
826 unlocked_error:
827 --rpipe->pipe_busy;
828
829 /*
830 * PIPE_WANT processing only makes sense if pipe_busy is 0.
831 */
832 if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) {
833 rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW);
834 wakeup(rpipe);
835 } else if (rpipe->pipe_buffer.cnt < rpipe->pipe_buffer.size) {
836 /*
837 * Handle write blocking hysteresis.
838 */
839 if (rpipe->pipe_state & PIPE_WANTW) {
840 rpipe->pipe_state &= ~PIPE_WANTW;
841 wakeup(rpipe);
842 }
843 }
844
845 if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) > 0)
846 pipeselwakeup(rpipe, rpipe->pipe_peer);
847
848 /* update last read time */
849 pipe_touch(rpipe, PIPE_ATIME);
850
851 PIPE_UNLOCK(rpipe);
852
853 return (error);
854 }
855
856 /*
857 * perform a write of n bytes into the read side of buffer. Since
858 * pipes are unidirectional a write is meant to be read by the otherside only.
859 */
860 static int
861 pipe_write(struct fileproc *fp, struct uio *uio, __unused int flags,
862 __unused vfs_context_t ctx)
863 {
864 int error = 0;
865 int orig_resid;
866 int pipe_size;
867 struct pipe *wpipe, *rpipe;
868 // LP64todo - fix this!
869 orig_resid = uio_resid(uio);
870 int space;
871
872 rpipe = (struct pipe *)fp->f_data;
873
874 PIPE_LOCK(rpipe);
875 wpipe = rpipe->pipe_peer;
876
877 /*
878 * detect loss of pipe read side, issue SIGPIPE if lost.
879 */
880 if (wpipe == NULL || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
881 PIPE_UNLOCK(rpipe);
882 return (EPIPE);
883 }
884 #if CONFIG_MACF
885 error = mac_pipe_check_write(kauth_cred_get(), wpipe);
886 if (error) {
887 PIPE_UNLOCK(rpipe);
888 return (error);
889 }
890 #endif
891 ++wpipe->pipe_busy;
892
893 pipe_size = 0;
894
895 /*
896 * need to allocate some storage... we delay the allocation
897 * until the first write on fd[0] to avoid allocating storage for both
898 * 'pipe ends'... most pipes are half-duplex with the writes targeting
899 * fd[1], so allocating space for both ends is a waste...
900 */
901
902 if ( wpipe->pipe_buffer.buffer == 0 || (
903 (unsigned)orig_resid > wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt &&
904 amountpipekva < maxpipekva ) ) {
905
906 pipe_size = choose_pipespace(wpipe->pipe_buffer.size, wpipe->pipe_buffer.cnt + orig_resid);
907 }
908 if (pipe_size) {
909 /*
910 * need to do initial allocation or resizing of pipe
911 * holding both structure and io locks.
912 */
913 if ((error = pipeio_lock(wpipe, 1)) == 0) {
914 if (wpipe->pipe_buffer.cnt == 0)
915 error = pipespace(wpipe, pipe_size);
916 else
917 error = expand_pipespace(wpipe, pipe_size);
918
919 pipeio_unlock(wpipe);
920
921 /* allocation failed */
922 if (wpipe->pipe_buffer.buffer == 0)
923 error = ENOMEM;
924 }
925 if (error) {
926 /*
927 * If an error occurred unbusy and return, waking up any pending
928 * readers.
929 */
930 --wpipe->pipe_busy;
931 if ((wpipe->pipe_busy == 0) &&
932 (wpipe->pipe_state & PIPE_WANT)) {
933 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
934 wakeup(wpipe);
935 }
936 PIPE_UNLOCK(rpipe);
937 return(error);
938 }
939 }
940
941 while (uio_resid(uio)) {
942
943 retrywrite:
944 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
945
946 /* Writes of size <= PIPE_BUF must be atomic. */
947 if ((space < uio_resid(uio)) && (orig_resid <= PIPE_BUF))
948 space = 0;
949
950 if (space > 0) {
951
952 if ((error = pipeio_lock(wpipe,1)) == 0) {
953 int size; /* Transfer size */
954 int segsize; /* first segment to transfer */
955
956 if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) {
957 pipeio_unlock(wpipe);
958 error = EPIPE;
959 break;
960 }
961 /*
962 * If a process blocked in pipeio_lock, our
963 * value for space might be bad... the mutex
964 * is dropped while we're blocked
965 */
966 if (space > (int)(wpipe->pipe_buffer.size -
967 wpipe->pipe_buffer.cnt)) {
968 pipeio_unlock(wpipe);
969 goto retrywrite;
970 }
971
972 /*
973 * Transfer size is minimum of uio transfer
974 * and free space in pipe buffer.
975 */
976 // LP64todo - fix this!
977 if (space > uio_resid(uio))
978 size = uio_resid(uio);
979 else
980 size = space;
981 /*
982 * First segment to transfer is minimum of
983 * transfer size and contiguous space in
984 * pipe buffer. If first segment to transfer
985 * is less than the transfer size, we've got
986 * a wraparound in the buffer.
987 */
988 segsize = wpipe->pipe_buffer.size -
989 wpipe->pipe_buffer.in;
990 if (segsize > size)
991 segsize = size;
992
993 /* Transfer first segment */
994
995 PIPE_UNLOCK(rpipe);
996 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
997 segsize, uio);
998 PIPE_LOCK(rpipe);
999
1000 if (error == 0 && segsize < size) {
1001 /*
1002 * Transfer remaining part now, to
1003 * support atomic writes. Wraparound
1004 * happened. (State 3)
1005 */
1006 if (wpipe->pipe_buffer.in + segsize !=
1007 wpipe->pipe_buffer.size)
1008 panic("Expected pipe buffer "
1009 "wraparound disappeared");
1010
1011 PIPE_UNLOCK(rpipe);
1012 error = uiomove(
1013 &wpipe->pipe_buffer.buffer[0],
1014 size - segsize, uio);
1015 PIPE_LOCK(rpipe);
1016 }
1017 /*
1018 * readers never know to read until count is updated.
1019 */
1020 if (error == 0) {
1021 wpipe->pipe_buffer.in += size;
1022 if (wpipe->pipe_buffer.in >
1023 wpipe->pipe_buffer.size) {
1024 if (wpipe->pipe_buffer.in !=
1025 size - segsize +
1026 wpipe->pipe_buffer.size)
1027 panic("Expected "
1028 "wraparound bad");
1029 wpipe->pipe_buffer.in = size -
1030 segsize;
1031 }
1032
1033 wpipe->pipe_buffer.cnt += size;
1034 if (wpipe->pipe_buffer.cnt >
1035 wpipe->pipe_buffer.size)
1036 panic("Pipe buffer overflow");
1037
1038 }
1039 pipeio_unlock(wpipe);
1040 }
1041 if (error)
1042 break;
1043
1044 } else {
1045 /*
1046 * If the "read-side" has been blocked, wake it up now.
1047 */
1048 if (wpipe->pipe_state & PIPE_WANTR) {
1049 wpipe->pipe_state &= ~PIPE_WANTR;
1050 wakeup(wpipe);
1051 }
1052 /*
1053 * don't block on non-blocking I/O
1054 * we'll do the pipeselwakeup on the way out
1055 */
1056 if (fp->f_flag & FNONBLOCK) {
1057 error = EAGAIN;
1058 break;
1059 }
1060
1061 /*
1062 * If read side wants to go away, we just issue a signal
1063 * to ourselves.
1064 */
1065 if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) {
1066 error = EPIPE;
1067 break;
1068 }
1069
1070 /*
1071 * We have no more space and have something to offer,
1072 * wake up select/poll.
1073 */
1074 pipeselwakeup(wpipe, wpipe);
1075
1076 wpipe->pipe_state |= PIPE_WANTW;
1077
1078 error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, "pipewr", 0);
1079
1080 if (error != 0)
1081 break;
1082 }
1083 }
1084 --wpipe->pipe_busy;
1085
1086 if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) {
1087 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
1088 wakeup(wpipe);
1089 }
1090 if (wpipe->pipe_buffer.cnt > 0) {
1091 /*
1092 * If there are any characters in the buffer, we wake up
1093 * the reader if it was blocked waiting for data.
1094 */
1095 if (wpipe->pipe_state & PIPE_WANTR) {
1096 wpipe->pipe_state &= ~PIPE_WANTR;
1097 wakeup(wpipe);
1098 }
1099 /*
1100 * wake up thread blocked in select/poll or post the notification
1101 */
1102 pipeselwakeup(wpipe, wpipe);
1103 }
1104
1105 /* Update modification, status change (# of bytes in pipe) times */
1106 pipe_touch(rpipe, PIPE_MTIME | PIPE_CTIME);
1107 pipe_touch(wpipe, PIPE_MTIME | PIPE_CTIME);
1108 PIPE_UNLOCK(rpipe);
1109
1110 return (error);
1111 }
1112
1113 /*
1114 * we implement a very minimal set of ioctls for compatibility with sockets.
1115 */
1116 /* ARGSUSED 3 */
1117 static int
1118 pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data,
1119 __unused vfs_context_t ctx)
1120 {
1121 struct pipe *mpipe = (struct pipe *)fp->f_data;
1122 #if CONFIG_MACF
1123 int error;
1124 #endif
1125
1126 PIPE_LOCK(mpipe);
1127
1128 #if CONFIG_MACF
1129 error = mac_pipe_check_ioctl(kauth_cred_get(), mpipe, cmd);
1130 if (error) {
1131 PIPE_UNLOCK(mpipe);
1132
1133 return (error);
1134 }
1135 #endif
1136
1137 switch (cmd) {
1138
1139 case FIONBIO:
1140 PIPE_UNLOCK(mpipe);
1141 return (0);
1142
1143 case FIOASYNC:
1144 if (*(int *)data) {
1145 mpipe->pipe_state |= PIPE_ASYNC;
1146 } else {
1147 mpipe->pipe_state &= ~PIPE_ASYNC;
1148 }
1149 PIPE_UNLOCK(mpipe);
1150 return (0);
1151
1152 case FIONREAD:
1153 *(int *)data = mpipe->pipe_buffer.cnt;
1154 PIPE_UNLOCK(mpipe);
1155 return (0);
1156
1157 case TIOCSPGRP:
1158 mpipe->pipe_pgid = *(int *)data;
1159
1160 PIPE_UNLOCK(mpipe);
1161 return (0);
1162
1163 case TIOCGPGRP:
1164 *(int *)data = mpipe->pipe_pgid;
1165
1166 PIPE_UNLOCK(mpipe);
1167 return (0);
1168
1169 }
1170 PIPE_UNLOCK(mpipe);
1171 return (ENOTTY);
1172 }
1173
1174
1175 static int
1176 pipe_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
1177 {
1178 struct pipe *rpipe = (struct pipe *)fp->f_data;
1179 struct pipe *wpipe;
1180 int retnum = 0;
1181
1182 if (rpipe == NULL || rpipe == (struct pipe *)-1)
1183 return (retnum);
1184
1185 PIPE_LOCK(rpipe);
1186
1187 wpipe = rpipe->pipe_peer;
1188
1189
1190 #if CONFIG_MACF
1191 /*
1192 * XXX We should use a per thread credential here; minimally, the
1193 * XXX process credential should have a persistent reference on it
1194 * XXX before being passed in here.
1195 */
1196 if (mac_pipe_check_select(vfs_context_ucred(ctx), rpipe, which)) {
1197 PIPE_UNLOCK(rpipe);
1198 return (0);
1199 }
1200 #endif
1201 switch (which) {
1202
1203 case FREAD:
1204 if ((rpipe->pipe_state & PIPE_DIRECTW) ||
1205 (rpipe->pipe_buffer.cnt > 0) ||
1206 (rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
1207
1208 retnum = 1;
1209 } else {
1210 rpipe->pipe_state |= PIPE_SEL;
1211 selrecord(vfs_context_proc(ctx), &rpipe->pipe_sel, wql);
1212 }
1213 break;
1214
1215 case FWRITE:
1216 if (wpipe)
1217 wpipe->pipe_state |= PIPE_WSELECT;
1218 if (wpipe == NULL || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) ||
1219 (((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
1220 (MAX_PIPESIZE(wpipe) - wpipe->pipe_buffer.cnt) > 0)) {
1221
1222 retnum = 1;
1223 } else {
1224 wpipe->pipe_state |= PIPE_SEL;
1225 selrecord(vfs_context_proc(ctx), &wpipe->pipe_sel, wql);
1226 }
1227 break;
1228 case 0:
1229 rpipe->pipe_state |= PIPE_SEL;
1230 selrecord(vfs_context_proc(ctx), &rpipe->pipe_sel, wql);
1231 break;
1232 }
1233 PIPE_UNLOCK(rpipe);
1234
1235 return (retnum);
1236 }
1237
1238
1239 /* ARGSUSED 1 */
1240 static int
1241 pipe_close(struct fileglob *fg, __unused vfs_context_t ctx)
1242 {
1243 struct pipe *cpipe;
1244
1245 proc_fdlock_spin(vfs_context_proc(ctx));
1246 cpipe = (struct pipe *)fg->fg_data;
1247 fg->fg_data = NULL;
1248 proc_fdunlock(vfs_context_proc(ctx));
1249 if (cpipe)
1250 pipeclose(cpipe);
1251
1252 return (0);
1253 }
1254
1255 static void
1256 pipe_free_kmem(struct pipe *cpipe)
1257 {
1258 if (cpipe->pipe_buffer.buffer != NULL) {
1259 OSAddAtomic(-(cpipe->pipe_buffer.size), &amountpipekva);
1260 OSAddAtomic(-1, &amountpipes);
1261 kfree((void *)cpipe->pipe_buffer.buffer,
1262 cpipe->pipe_buffer.size);
1263 cpipe->pipe_buffer.buffer = NULL;
1264 cpipe->pipe_buffer.size = 0;
1265 }
1266 }
1267
1268 /*
1269 * shutdown the pipe
1270 */
1271 static void
1272 pipeclose(struct pipe *cpipe)
1273 {
1274 struct pipe *ppipe;
1275
1276 if (cpipe == NULL)
1277 return;
1278 /* partially created pipes won't have a valid mutex. */
1279 if (PIPE_MTX(cpipe) != NULL)
1280 PIPE_LOCK(cpipe);
1281
1282
1283 /*
1284 * If the other side is blocked, wake it up saying that
1285 * we want to close it down.
1286 */
1287 cpipe->pipe_state &= ~PIPE_DRAIN;
1288 cpipe->pipe_state |= PIPE_EOF;
1289 pipeselwakeup(cpipe, cpipe);
1290
1291 while (cpipe->pipe_busy) {
1292 cpipe->pipe_state |= PIPE_WANT;
1293
1294 wakeup(cpipe);
1295 msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0);
1296 }
1297
1298 #if CONFIG_MACF
1299 /*
1300 * Free the shared pipe label only after the two ends are disconnected.
1301 */
1302 if (cpipe->pipe_label != NULL && cpipe->pipe_peer == NULL)
1303 mac_pipe_label_destroy(cpipe);
1304 #endif
1305
1306 /*
1307 * Disconnect from peer
1308 */
1309 if ((ppipe = cpipe->pipe_peer) != NULL) {
1310
1311 ppipe->pipe_state &= ~(PIPE_DRAIN);
1312 ppipe->pipe_state |= PIPE_EOF;
1313
1314 pipeselwakeup(ppipe, ppipe);
1315 wakeup(ppipe);
1316
1317 if (cpipe->pipe_state & PIPE_KNOTE)
1318 KNOTE(&ppipe->pipe_sel.si_note, 1);
1319
1320 postpipeevent(ppipe, EV_RCLOSED);
1321
1322 ppipe->pipe_peer = NULL;
1323 }
1324 evpipefree(cpipe);
1325
1326 /*
1327 * free resources
1328 */
1329 if (PIPE_MTX(cpipe) != NULL) {
1330 if (ppipe != NULL) {
1331 /*
1332 * since the mutex is shared and the peer is still
1333 * alive, we need to release the mutex, not free it
1334 */
1335 PIPE_UNLOCK(cpipe);
1336 } else {
1337 /*
1338 * peer is gone, so we're the sole party left with
1339 * interest in this mutex... we can just free it
1340 */
1341 lck_mtx_free(PIPE_MTX(cpipe), pipe_mtx_grp);
1342 }
1343 }
1344 pipe_free_kmem(cpipe);
1345 if (cpipe->pipe_state & PIPE_WSELECT) {
1346 pipe_garbage_collect(cpipe);
1347 } else {
1348 zfree(pipe_zone, cpipe);
1349 pipe_garbage_collect(NULL);
1350 }
1351
1352 }
1353
1354 /*ARGSUSED*/
1355 static int
1356 pipe_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused vfs_context_t ctx)
1357 {
1358 struct pipe *cpipe;
1359
1360 cpipe = (struct pipe *)kn->kn_fp->f_data;
1361
1362 PIPE_LOCK(cpipe);
1363 #if CONFIG_MACF
1364 /*
1365 * XXX We should use a per thread credential here; minimally, the
1366 * XXX process credential should have a persistent reference on it
1367 * XXX before being passed in here.
1368 */
1369 if (mac_pipe_check_kqfilter(vfs_context_ucred(ctx), kn, cpipe) != 0) {
1370 PIPE_UNLOCK(cpipe);
1371 return (1);
1372 }
1373 #endif
1374
1375 switch (kn->kn_filter) {
1376 case EVFILT_READ:
1377 kn->kn_fop = &pipe_rfiltops;
1378
1379 break;
1380 case EVFILT_WRITE:
1381 kn->kn_fop = &pipe_wfiltops;
1382
1383 if (cpipe->pipe_peer == NULL) {
1384 /*
1385 * other end of pipe has been closed
1386 */
1387 PIPE_UNLOCK(cpipe);
1388 return (EPIPE);
1389 }
1390 if (cpipe->pipe_peer)
1391 cpipe = cpipe->pipe_peer;
1392 break;
1393 default:
1394 PIPE_UNLOCK(cpipe);
1395 return (1);
1396 }
1397
1398 if (KNOTE_ATTACH(&cpipe->pipe_sel.si_note, kn))
1399 cpipe->pipe_state |= PIPE_KNOTE;
1400
1401 PIPE_UNLOCK(cpipe);
1402 return (0);
1403 }
1404
1405 static void
1406 filt_pipedetach(struct knote *kn)
1407 {
1408 struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data;
1409
1410 PIPE_LOCK(cpipe);
1411
1412 if (kn->kn_filter == EVFILT_WRITE) {
1413 if (cpipe->pipe_peer == NULL) {
1414 PIPE_UNLOCK(cpipe);
1415 return;
1416 }
1417 cpipe = cpipe->pipe_peer;
1418 }
1419 if (cpipe->pipe_state & PIPE_KNOTE) {
1420 if (KNOTE_DETACH(&cpipe->pipe_sel.si_note, kn))
1421 cpipe->pipe_state &= ~PIPE_KNOTE;
1422 }
1423 PIPE_UNLOCK(cpipe);
1424 }
1425
1426 /*ARGSUSED*/
1427 static int
1428 filt_piperead(struct knote *kn, long hint)
1429 {
1430 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
1431 struct pipe *wpipe;
1432 int retval;
1433
1434 /*
1435 * if hint == 0, then we've been called from the kevent
1436 * world directly and do not currently hold the pipe mutex...
1437 * if hint == 1, we're being called back via the KNOTE post
1438 * we made in pipeselwakeup, and we already hold the mutex...
1439 */
1440 if (hint == 0)
1441 PIPE_LOCK(rpipe);
1442
1443 wpipe = rpipe->pipe_peer;
1444 kn->kn_data = rpipe->pipe_buffer.cnt;
1445 if ((rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) ||
1446 (wpipe == NULL) || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
1447 kn->kn_flags |= EV_EOF;
1448 retval = 1;
1449 } else {
1450 int64_t lowwat = 1;
1451 if (kn->kn_sfflags & NOTE_LOWAT) {
1452 if (rpipe->pipe_buffer.size && kn->kn_sdata > MAX_PIPESIZE(rpipe))
1453 lowwat = MAX_PIPESIZE(rpipe);
1454 else if (kn->kn_sdata > lowwat)
1455 lowwat = kn->kn_sdata;
1456 }
1457 retval = kn->kn_data >= lowwat;
1458 }
1459
1460 if (hint == 0)
1461 PIPE_UNLOCK(rpipe);
1462
1463 return (retval);
1464 }
1465
1466 /*ARGSUSED*/
1467 static int
1468 filt_pipewrite(struct knote *kn, long hint)
1469 {
1470 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
1471 struct pipe *wpipe;
1472
1473 /*
1474 * if hint == 0, then we've been called from the kevent
1475 * world directly and do not currently hold the pipe mutex...
1476 * if hint == 1, we're being called back via the KNOTE post
1477 * we made in pipeselwakeup, and we already hold the mutex...
1478 */
1479 if (hint == 0)
1480 PIPE_LOCK(rpipe);
1481
1482 wpipe = rpipe->pipe_peer;
1483
1484 if ((wpipe == NULL) || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
1485 kn->kn_data = 0;
1486 kn->kn_flags |= EV_EOF;
1487
1488 if (hint == 0)
1489 PIPE_UNLOCK(rpipe);
1490 return (1);
1491 }
1492 kn->kn_data = MAX_PIPESIZE(wpipe) - wpipe->pipe_buffer.cnt;
1493
1494 int64_t lowwat = PIPE_BUF;
1495 if (kn->kn_sfflags & NOTE_LOWAT) {
1496 if (wpipe->pipe_buffer.size && kn->kn_sdata > MAX_PIPESIZE(wpipe))
1497 lowwat = MAX_PIPESIZE(wpipe);
1498 else if (kn->kn_sdata > lowwat)
1499 lowwat = kn->kn_sdata;
1500 }
1501
1502 if (hint == 0)
1503 PIPE_UNLOCK(rpipe);
1504
1505 return (kn->kn_data >= lowwat);
1506 }
1507
1508 int
1509 fill_pipeinfo(struct pipe * cpipe, struct pipe_info * pinfo)
1510 {
1511 #if CONFIG_MACF
1512 int error;
1513 #endif
1514 struct timeval now;
1515 struct vinfo_stat * ub;
1516 int pipe_size = 0;
1517 int pipe_count;
1518
1519 if (cpipe == NULL)
1520 return (EBADF);
1521 PIPE_LOCK(cpipe);
1522
1523 #if CONFIG_MACF
1524 error = mac_pipe_check_stat(kauth_cred_get(), cpipe);
1525 if (error) {
1526 PIPE_UNLOCK(cpipe);
1527 return (error);
1528 }
1529 #endif
1530 if (cpipe->pipe_buffer.buffer == 0) {
1531 /*
1532 * must be stat'ing the write fd
1533 */
1534 if (cpipe->pipe_peer) {
1535 /*
1536 * the peer still exists, use it's info
1537 */
1538 pipe_size = MAX_PIPESIZE(cpipe->pipe_peer);
1539 pipe_count = cpipe->pipe_peer->pipe_buffer.cnt;
1540 } else {
1541 pipe_count = 0;
1542 }
1543 } else {
1544 pipe_size = MAX_PIPESIZE(cpipe);
1545 pipe_count = cpipe->pipe_buffer.cnt;
1546 }
1547 /*
1548 * since peer's buffer is setup ouside of lock
1549 * we might catch it in transient state
1550 */
1551 if (pipe_size == 0)
1552 pipe_size = PIPE_SIZE;
1553
1554 ub = &pinfo->pipe_stat;
1555
1556 bzero(ub, sizeof(*ub));
1557 ub->vst_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
1558 ub->vst_blksize = pipe_size;
1559 ub->vst_size = pipe_count;
1560 if (ub->vst_blksize != 0)
1561 ub->vst_blocks = (ub->vst_size + ub->vst_blksize - 1) / ub->vst_blksize;
1562 ub->vst_nlink = 1;
1563
1564 ub->vst_uid = kauth_getuid();
1565 ub->vst_gid = kauth_getgid();
1566
1567 microtime(&now);
1568 ub->vst_atime = now.tv_sec;
1569 ub->vst_atimensec = now.tv_usec * 1000;
1570
1571 ub->vst_mtime = now.tv_sec;
1572 ub->vst_mtimensec = now.tv_usec * 1000;
1573
1574 ub->vst_ctime = now.tv_sec;
1575 ub->vst_ctimensec = now.tv_usec * 1000;
1576
1577 /*
1578 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen, st_uid, st_gid.
1579 * XXX (st_dev, st_ino) should be unique.
1580 */
1581
1582 pinfo->pipe_handle = (uint64_t)((uintptr_t)cpipe);
1583 pinfo->pipe_peerhandle = (uint64_t)((uintptr_t)(cpipe->pipe_peer));
1584 pinfo->pipe_status = cpipe->pipe_state;
1585
1586 PIPE_UNLOCK(cpipe);
1587
1588 return (0);
1589 }
1590
1591
1592 static int
1593 pipe_drain(struct fileproc *fp, __unused vfs_context_t ctx)
1594 {
1595
1596 /* Note: fdlock already held */
1597 struct pipe *ppipe, *cpipe = (struct pipe *)(fp->f_fglob->fg_data);
1598
1599 if (cpipe) {
1600 PIPE_LOCK(cpipe);
1601 cpipe->pipe_state |= PIPE_DRAIN;
1602 cpipe->pipe_state &= ~(PIPE_WANTR | PIPE_WANTW);
1603 wakeup(cpipe);
1604
1605 /* Must wake up peer: a writer sleeps on the read side */
1606 if ((ppipe = cpipe->pipe_peer)) {
1607 ppipe->pipe_state |= PIPE_DRAIN;
1608 ppipe->pipe_state &= ~(PIPE_WANTR | PIPE_WANTW);
1609 wakeup(ppipe);
1610 }
1611
1612 PIPE_UNLOCK(cpipe);
1613 return 0;
1614 }
1615
1616 return 1;
1617 }
1618
1619
1620 /*
1621 * When a thread sets a write-select on a pipe, it creates an implicit,
1622 * untracked dependency between that thread and the peer of the pipe
1623 * on which the select is set. If the peer pipe is closed and freed
1624 * before the select()ing thread wakes up, the system will panic as
1625 * it attempts to unwind the dangling select(). To avoid that panic,
1626 * we notice whenever a dangerous select() is set on a pipe, and
1627 * defer the final deletion of the pipe until that select()s are all
1628 * resolved. Since we can't currently detect exactly when that
1629 * resolution happens, we use a simple garbage collection queue to
1630 * reap the at-risk pipes 'later'.
1631 */
1632 static void
1633 pipe_garbage_collect(struct pipe *cpipe)
1634 {
1635 uint64_t old, now;
1636 struct pipe_garbage *pgp;
1637
1638 /* Convert msecs to nsecs and then to abstime */
1639 old = pipe_garbage_age_limit * 1000000;
1640 nanoseconds_to_absolutetime(old, &old);
1641
1642 lck_mtx_lock(pipe_garbage_lock);
1643
1644 /* Free anything that's been on the queue for <mumble> seconds */
1645 now = mach_absolute_time();
1646 old = now - old;
1647 while ((pgp = pipe_garbage_head) && pgp->pg_timestamp < old) {
1648 pipe_garbage_head = pgp->pg_next;
1649 if (pipe_garbage_head == NULL)
1650 pipe_garbage_tail = NULL;
1651 pipe_garbage_count--;
1652 zfree(pipe_zone, pgp->pg_pipe);
1653 zfree(pipe_garbage_zone, pgp);
1654 }
1655
1656 /* Add the new pipe (if any) to the tail of the garbage queue */
1657 if (cpipe) {
1658 cpipe->pipe_state = PIPE_DEAD;
1659 pgp = (struct pipe_garbage *)zalloc(pipe_garbage_zone);
1660 if (pgp == NULL) {
1661 /*
1662 * We're too low on memory to garbage collect the
1663 * pipe. Freeing it runs the risk of panicing the
1664 * system. All we can do is leak it and leave
1665 * a breadcrumb behind. The good news, such as it
1666 * is, is that this will probably never happen.
1667 * We will probably hit the panic below first.
1668 */
1669 printf("Leaking pipe %p - no room left in the queue",
1670 cpipe);
1671 lck_mtx_unlock(pipe_garbage_lock);
1672 return;
1673 }
1674
1675 pgp->pg_pipe = cpipe;
1676 pgp->pg_timestamp = now;
1677 pgp->pg_next = NULL;
1678
1679 if (pipe_garbage_tail)
1680 pipe_garbage_tail->pg_next = pgp;
1681 pipe_garbage_tail = pgp;
1682 if (pipe_garbage_head == NULL)
1683 pipe_garbage_head = pipe_garbage_tail;
1684
1685 if (pipe_garbage_count++ >= PIPE_GARBAGE_QUEUE_LIMIT)
1686 panic("Length of pipe garbage queue exceeded %d",
1687 PIPE_GARBAGE_QUEUE_LIMIT);
1688 }
1689 lck_mtx_unlock(pipe_garbage_lock);
1690 }
1691