]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/sys_pipe.c
xnu-2782.20.48.tar.gz
[apple/xnu.git] / bsd / kern / sys_pipe.c
1 /*
2 * Copyright (c) 1996 John S. Dyson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice immediately at the beginning of the file, without modification,
10 * this list of conditions, and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. Absolutely no warranty of function or purpose is made by the author
15 * John S. Dyson.
16 * 4. Modifications may be freely made to this file if the above conditions
17 * are met.
18 */
19 /*
20 * Copyright (c) 2003-2014 Apple Inc. All rights reserved.
21 *
22 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
23 *
24 * This file contains Original Code and/or Modifications of Original Code
25 * as defined in and that are subject to the Apple Public Source License
26 * Version 2.0 (the 'License'). You may not use this file except in
27 * compliance with the License. The rights granted to you under the License
28 * may not be used to create, or enable the creation or redistribution of,
29 * unlawful or unlicensed copies of an Apple operating system, or to
30 * circumvent, violate, or enable the circumvention or violation of, any
31 * terms of an Apple operating system software license agreement.
32 *
33 * Please obtain a copy of the License at
34 * http://www.opensource.apple.com/apsl/ and read it before using this file.
35 *
36 * The Original Code and all software distributed under the License are
37 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
38 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
39 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
40 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
41 * Please see the License for the specific language governing rights and
42 * limitations under the License.
43 *
44 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
45 */
46 /*
47 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
48 * support for mandatory and extensible security protections. This notice
49 * is included in support of clause 2.2 (b) of the Apple Public License,
50 * Version 2.0.
51 */
52
53 /*
54 * This file contains a high-performance replacement for the socket-based
55 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support
56 * all features of sockets, but does do everything that pipes normally
57 * do.
58 *
59 * Pipes are implemented as circular buffers. Following are the valid states in pipes operations
60 *
61 * _________________________________
62 * 1. |_________________________________| r=w, c=0
63 *
64 * _________________________________
65 * 2. |__r:::::wc_______________________| r <= w , c > 0
66 *
67 * _________________________________
68 * 3. |::::wc_____r:::::::::::::::::::::| r>w , c > 0
69 *
70 * _________________________________
71 * 4. |:::::::wrc:::::::::::::::::::::::| w=r, c = Max size
72 *
73 *
74 * Nomenclature:-
75 * a-z define the steps in a program flow
76 * 1-4 are the states as defined aboe
77 * Action: is what file operation is done on the pipe
78 *
79 * Current:None Action: initialize with size M=200
80 * a. State 1 ( r=0, w=0, c=0)
81 *
82 * Current: a Action: write(100) (w < M)
83 * b. State 2 (r=0, w=100, c=100)
84 *
85 * Current: b Action: write(100) (w = M-w)
86 * c. State 4 (r=0,w=0,c=200)
87 *
88 * Current: b Action: read(70) ( r < c )
89 * d. State 2(r=70,w=100,c=30)
90 *
91 * Current: d Action: write(75) ( w < (m-w))
92 * e. State 2 (r=70,w=175,c=105)
93 *
94 * Current: d Action: write(110) ( w > (m-w))
95 * f. State 3 (r=70,w=10,c=140)
96 *
97 * Current: d Action: read(30) (r >= c )
98 * g. State 1 (r=100,w=100,c=0)
99 *
100 */
101
102 /*
103 * This code create half duplex pipe buffers for facilitating file like
104 * operations on pipes. The initial buffer is very small, but this can
105 * dynamically change to larger sizes based on usage. The buffer size is never
106 * reduced. The total amount of kernel memory used is governed by maxpipekva.
107 * In case of dynamic expansion limit is reached, the output thread is blocked
108 * until the pipe buffer empties enough to continue.
109 *
110 * In order to limit the resource use of pipes, two sysctls exist:
111 *
112 * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable
113 * address space available to us in pipe_map.
114 *
115 * Memory usage may be monitored through the sysctls
116 * kern.ipc.pipes, kern.ipc.pipekva.
117 *
118 */
119
120 #include <sys/param.h>
121 #include <sys/systm.h>
122 #include <sys/filedesc.h>
123 #include <sys/kernel.h>
124 #include <sys/vnode.h>
125 #include <sys/proc_internal.h>
126 #include <sys/kauth.h>
127 #include <sys/file_internal.h>
128 #include <sys/stat.h>
129 #include <sys/ioctl.h>
130 #include <sys/fcntl.h>
131 #include <sys/malloc.h>
132 #include <sys/syslog.h>
133 #include <sys/unistd.h>
134 #include <sys/resourcevar.h>
135 #include <sys/aio_kern.h>
136 #include <sys/signalvar.h>
137 #include <sys/pipe.h>
138 #include <sys/sysproto.h>
139 #include <sys/proc_info.h>
140
141 #include <security/audit/audit.h>
142
143 #include <sys/kdebug.h>
144
145 #include <kern/zalloc.h>
146 #include <kern/kalloc.h>
147 #include <vm/vm_kern.h>
148 #include <libkern/OSAtomic.h>
149
150 #define f_flag f_fglob->fg_flag
151 #define f_msgcount f_fglob->fg_msgcount
152 #define f_cred f_fglob->fg_cred
153 #define f_ops f_fglob->fg_ops
154 #define f_offset f_fglob->fg_offset
155 #define f_data f_fglob->fg_data
156
157 /*
158 * interfaces to the outside world exported through file operations
159 */
160 static int pipe_read(struct fileproc *fp, struct uio *uio,
161 int flags, vfs_context_t ctx);
162 static int pipe_write(struct fileproc *fp, struct uio *uio,
163 int flags, vfs_context_t ctx);
164 static int pipe_close(struct fileglob *fg, vfs_context_t ctx);
165 static int pipe_select(struct fileproc *fp, int which, void * wql,
166 vfs_context_t ctx);
167 static int pipe_kqfilter(struct fileproc *fp, struct knote *kn,
168 vfs_context_t ctx);
169 static int pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data,
170 vfs_context_t ctx);
171 static int pipe_drain(struct fileproc *fp,vfs_context_t ctx);
172
173 static const struct fileops pipeops = {
174 DTYPE_PIPE,
175 pipe_read,
176 pipe_write,
177 pipe_ioctl,
178 pipe_select,
179 pipe_close,
180 pipe_kqfilter,
181 pipe_drain
182 };
183
184 static void filt_pipedetach(struct knote *kn);
185 static int filt_piperead(struct knote *kn, long hint);
186 static int filt_pipewrite(struct knote *kn, long hint);
187
188 static struct filterops pipe_rfiltops = {
189 .f_isfd = 1,
190 .f_detach = filt_pipedetach,
191 .f_event = filt_piperead,
192 };
193
194 static struct filterops pipe_wfiltops = {
195 .f_isfd = 1,
196 .f_detach = filt_pipedetach,
197 .f_event = filt_pipewrite,
198 };
199
200 static int nbigpipe; /* for compatibility sake. no longer used */
201 static int amountpipes; /* total number of pipes in system */
202 static int amountpipekva; /* total memory used by pipes */
203
204 int maxpipekva __attribute__((used)) = PIPE_KVAMAX; /* allowing 16MB max. */
205
206 #if PIPE_SYSCTLS
207 SYSCTL_DECL(_kern_ipc);
208
209 SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RD|CTLFLAG_LOCKED,
210 &maxpipekva, 0, "Pipe KVA limit");
211 SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW|CTLFLAG_LOCKED,
212 &maxpipekvawired, 0, "Pipe KVA wired limit");
213 SYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD|CTLFLAG_LOCKED,
214 &amountpipes, 0, "Current # of pipes");
215 SYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD|CTLFLAG_LOCKED,
216 &nbigpipe, 0, "Current # of big pipes");
217 SYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD|CTLFLAG_LOCKED,
218 &amountpipekva, 0, "Pipe KVA usage");
219 SYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD|CTLFLAG_LOCKED,
220 &amountpipekvawired, 0, "Pipe wired KVA usage");
221 #endif
222
223 static void pipeclose(struct pipe *cpipe);
224 static void pipe_free_kmem(struct pipe *cpipe);
225 static int pipe_create(struct pipe **cpipep);
226 static int pipespace(struct pipe *cpipe, int size);
227 static int choose_pipespace(unsigned long current, unsigned long expected);
228 static int expand_pipespace(struct pipe *p, int target_size);
229 static void pipeselwakeup(struct pipe *cpipe, struct pipe *spipe);
230 static __inline int pipeio_lock(struct pipe *cpipe, int catch);
231 static __inline void pipeio_unlock(struct pipe *cpipe);
232
233 extern int postpipeevent(struct pipe *, int);
234 extern void evpipefree(struct pipe *cpipe);
235
236 static lck_grp_t *pipe_mtx_grp;
237 static lck_attr_t *pipe_mtx_attr;
238 static lck_grp_attr_t *pipe_mtx_grp_attr;
239
240 static zone_t pipe_zone;
241
242 #define MAX_PIPESIZE(pipe) ( MAX(PIPE_SIZE, (pipe)->pipe_buffer.size) )
243
244 #define PIPE_GARBAGE_AGE_LIMIT 5000 /* In milliseconds */
245 #define PIPE_GARBAGE_QUEUE_LIMIT 32000
246
247 struct pipe_garbage {
248 struct pipe *pg_pipe;
249 struct pipe_garbage *pg_next;
250 uint64_t pg_timestamp;
251 };
252
253 static zone_t pipe_garbage_zone;
254 static struct pipe_garbage *pipe_garbage_head = NULL;
255 static struct pipe_garbage *pipe_garbage_tail = NULL;
256 static uint64_t pipe_garbage_age_limit = PIPE_GARBAGE_AGE_LIMIT;
257 static int pipe_garbage_count = 0;
258 static lck_mtx_t *pipe_garbage_lock;
259 static void pipe_garbage_collect(struct pipe *cpipe);
260
261 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL);
262
263 /* initial setup done at time of sysinit */
264 void
265 pipeinit(void)
266 {
267 nbigpipe=0;
268 vm_size_t zone_size;
269
270 zone_size = 8192 * sizeof(struct pipe);
271 pipe_zone = zinit(sizeof(struct pipe), zone_size, 4096, "pipe zone");
272
273
274 /* allocate lock group attribute and group for pipe mutexes */
275 pipe_mtx_grp_attr = lck_grp_attr_alloc_init();
276 pipe_mtx_grp = lck_grp_alloc_init("pipe", pipe_mtx_grp_attr);
277
278 /* allocate the lock attribute for pipe mutexes */
279 pipe_mtx_attr = lck_attr_alloc_init();
280
281 /*
282 * Set up garbage collection for dead pipes
283 */
284 zone_size = (PIPE_GARBAGE_QUEUE_LIMIT + 20) *
285 sizeof(struct pipe_garbage);
286 pipe_garbage_zone = (zone_t)zinit(sizeof(struct pipe_garbage),
287 zone_size, 4096, "pipe garbage zone");
288 pipe_garbage_lock = lck_mtx_alloc_init(pipe_mtx_grp, pipe_mtx_attr);
289
290 }
291
292 /* Bitmap for things to touch in pipe_touch() */
293 #define PIPE_ATIME 0x00000001 /* time of last access */
294 #define PIPE_MTIME 0x00000002 /* time of last modification */
295 #define PIPE_CTIME 0x00000004 /* time of last status change */
296
297 static void
298 pipe_touch(struct pipe *tpipe, int touch)
299 {
300 struct timeval now;
301
302 microtime(&now);
303
304 if (touch & PIPE_ATIME) {
305 tpipe->st_atimespec.tv_sec = now.tv_sec;
306 tpipe->st_atimespec.tv_nsec = now.tv_usec * 1000;
307 }
308
309 if (touch & PIPE_MTIME) {
310 tpipe->st_mtimespec.tv_sec = now.tv_sec;
311 tpipe->st_mtimespec.tv_nsec = now.tv_usec * 1000;
312 }
313
314 if (touch & PIPE_CTIME) {
315 tpipe->st_ctimespec.tv_sec = now.tv_sec;
316 tpipe->st_ctimespec.tv_nsec = now.tv_usec * 1000;
317 }
318 }
319
320 static const unsigned int pipesize_blocks[] = {128,256,1024,2048,4096, 4096 * 2, PIPE_SIZE , PIPE_SIZE * 4 };
321
322 /*
323 * finds the right size from possible sizes in pipesize_blocks
324 * returns the size which matches max(current,expected)
325 */
326 static int
327 choose_pipespace(unsigned long current, unsigned long expected)
328 {
329 int i = sizeof(pipesize_blocks)/sizeof(unsigned int) -1;
330 unsigned long target;
331
332 if (expected > current)
333 target = expected;
334 else
335 target = current;
336
337 while ( i >0 && pipesize_blocks[i-1] > target) {
338 i=i-1;
339
340 }
341
342 return pipesize_blocks[i];
343 }
344
345
346 /*
347 * expand the size of pipe while there is data to be read,
348 * and then free the old buffer once the current buffered
349 * data has been transferred to new storage.
350 * Required: PIPE_LOCK and io lock to be held by caller.
351 * returns 0 on success or no expansion possible
352 */
353 static int
354 expand_pipespace(struct pipe *p, int target_size)
355 {
356 struct pipe tmp, oldpipe;
357 int error;
358 tmp.pipe_buffer.buffer = 0;
359
360 if (p->pipe_buffer.size >= (unsigned) target_size) {
361 return 0; /* the existing buffer is max size possible */
362 }
363
364 /* create enough space in the target */
365 error = pipespace(&tmp, target_size);
366 if (error != 0)
367 return (error);
368
369 oldpipe.pipe_buffer.buffer = p->pipe_buffer.buffer;
370 oldpipe.pipe_buffer.size = p->pipe_buffer.size;
371
372 memcpy(tmp.pipe_buffer.buffer, p->pipe_buffer.buffer, p->pipe_buffer.size);
373 if (p->pipe_buffer.cnt > 0 && p->pipe_buffer.in <= p->pipe_buffer.out ){
374 /* we are in State 3 and need extra copying for read to be consistent */
375 memcpy(&tmp.pipe_buffer.buffer[p->pipe_buffer.size], p->pipe_buffer.buffer, p->pipe_buffer.size);
376 p->pipe_buffer.in += p->pipe_buffer.size;
377 }
378
379 p->pipe_buffer.buffer = tmp.pipe_buffer.buffer;
380 p->pipe_buffer.size = tmp.pipe_buffer.size;
381
382
383 pipe_free_kmem(&oldpipe);
384 return 0;
385 }
386
387 /*
388 * The pipe system call for the DTYPE_PIPE type of pipes
389 *
390 * returns:
391 * FREAD | fd0 | -->[struct rpipe] --> |~~buffer~~| \
392 * (pipe_mutex)
393 * FWRITE | fd1 | -->[struct wpipe] --X /
394 */
395
396 /* ARGSUSED */
397 int
398 pipe(proc_t p, __unused struct pipe_args *uap, int32_t *retval)
399 {
400 struct fileproc *rf, *wf;
401 struct pipe *rpipe, *wpipe;
402 lck_mtx_t *pmtx;
403 int fd, error;
404
405 if ((pmtx = lck_mtx_alloc_init(pipe_mtx_grp, pipe_mtx_attr)) == NULL)
406 return (ENOMEM);
407
408 rpipe = wpipe = NULL;
409 if (pipe_create(&rpipe) || pipe_create(&wpipe)) {
410 error = ENFILE;
411 goto freepipes;
412 }
413 /*
414 * allocate the space for the normal I/O direction up
415 * front... we'll delay the allocation for the other
416 * direction until a write actually occurs (most likely it won't)...
417 */
418 error = pipespace(rpipe, choose_pipespace(rpipe->pipe_buffer.size, 0));
419 if (error)
420 goto freepipes;
421
422 TAILQ_INIT(&rpipe->pipe_evlist);
423 TAILQ_INIT(&wpipe->pipe_evlist);
424
425 error = falloc(p, &rf, &fd, vfs_context_current());
426 if (error) {
427 goto freepipes;
428 }
429 retval[0] = fd;
430
431 /*
432 * for now we'll create half-duplex pipes(refer returns section above).
433 * this is what we've always supported..
434 */
435 rf->f_flag = FREAD;
436 rf->f_data = (caddr_t)rpipe;
437 rf->f_ops = &pipeops;
438
439 error = falloc(p, &wf, &fd, vfs_context_current());
440 if (error) {
441 fp_free(p, retval[0], rf);
442 goto freepipes;
443 }
444 wf->f_flag = FWRITE;
445 wf->f_data = (caddr_t)wpipe;
446 wf->f_ops = &pipeops;
447
448 rpipe->pipe_peer = wpipe;
449 wpipe->pipe_peer = rpipe;
450 /* both structures share the same mutex */
451 rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx;
452
453 retval[1] = fd;
454 #if CONFIG_MACF
455 /*
456 * XXXXXXXX SHOULD NOT HOLD FILE_LOCK() XXXXXXXXXXXX
457 *
458 * struct pipe represents a pipe endpoint. The MAC label is shared
459 * between the connected endpoints. As a result mac_pipe_label_init() and
460 * mac_pipe_label_associate() should only be called on one of the endpoints
461 * after they have been connected.
462 */
463 mac_pipe_label_init(rpipe);
464 mac_pipe_label_associate(kauth_cred_get(), rpipe);
465 wpipe->pipe_label = rpipe->pipe_label;
466 #endif
467 proc_fdlock_spin(p);
468 procfdtbl_releasefd(p, retval[0], NULL);
469 procfdtbl_releasefd(p, retval[1], NULL);
470 fp_drop(p, retval[0], rf, 1);
471 fp_drop(p, retval[1], wf, 1);
472 proc_fdunlock(p);
473
474
475 return (0);
476
477 freepipes:
478 pipeclose(rpipe);
479 pipeclose(wpipe);
480 lck_mtx_free(pmtx, pipe_mtx_grp);
481
482 return (error);
483 }
484
485 int
486 pipe_stat(struct pipe *cpipe, void *ub, int isstat64)
487 {
488 #if CONFIG_MACF
489 int error;
490 #endif
491 int pipe_size = 0;
492 int pipe_count;
493 struct stat *sb = (struct stat *)0; /* warning avoidance ; protected by isstat64 */
494 struct stat64 * sb64 = (struct stat64 *)0; /* warning avoidance ; protected by isstat64 */
495
496 if (cpipe == NULL)
497 return (EBADF);
498 PIPE_LOCK(cpipe);
499
500 #if CONFIG_MACF
501 error = mac_pipe_check_stat(kauth_cred_get(), cpipe);
502 if (error) {
503 PIPE_UNLOCK(cpipe);
504 return (error);
505 }
506 #endif
507 if (cpipe->pipe_buffer.buffer == 0) {
508 /* must be stat'ing the write fd */
509 if (cpipe->pipe_peer) {
510 /* the peer still exists, use it's info */
511 pipe_size = MAX_PIPESIZE(cpipe->pipe_peer);
512 pipe_count = cpipe->pipe_peer->pipe_buffer.cnt;
513 } else {
514 pipe_count = 0;
515 }
516 } else {
517 pipe_size = MAX_PIPESIZE(cpipe);
518 pipe_count = cpipe->pipe_buffer.cnt;
519 }
520 /*
521 * since peer's buffer is setup ouside of lock
522 * we might catch it in transient state
523 */
524 if (pipe_size == 0)
525 pipe_size = MAX(PIPE_SIZE, pipesize_blocks[0]);
526
527 if (isstat64 != 0) {
528 sb64 = (struct stat64 *)ub;
529
530 bzero(sb64, sizeof(*sb64));
531 sb64->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
532 sb64->st_blksize = pipe_size;
533 sb64->st_size = pipe_count;
534 sb64->st_blocks = (sb64->st_size + sb64->st_blksize - 1) / sb64->st_blksize;
535
536 sb64->st_uid = kauth_getuid();
537 sb64->st_gid = kauth_getgid();
538
539 sb64->st_atimespec.tv_sec = cpipe->st_atimespec.tv_sec;
540 sb64->st_atimespec.tv_nsec = cpipe->st_atimespec.tv_nsec;
541
542 sb64->st_mtimespec.tv_sec = cpipe->st_mtimespec.tv_sec;
543 sb64->st_mtimespec.tv_nsec = cpipe->st_mtimespec.tv_nsec;
544
545 sb64->st_ctimespec.tv_sec = cpipe->st_ctimespec.tv_sec;
546 sb64->st_ctimespec.tv_nsec = cpipe->st_ctimespec.tv_nsec;
547
548 /*
549 * Return a relatively unique inode number based on the current
550 * address of this pipe's struct pipe. This number may be recycled
551 * relatively quickly.
552 */
553 sb64->st_ino = (ino64_t)VM_KERNEL_ADDRPERM((uintptr_t)cpipe);
554 } else {
555 sb = (struct stat *)ub;
556
557 bzero(sb, sizeof(*sb));
558 sb->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
559 sb->st_blksize = pipe_size;
560 sb->st_size = pipe_count;
561 sb->st_blocks = (sb->st_size + sb->st_blksize - 1) / sb->st_blksize;
562
563 sb->st_uid = kauth_getuid();
564 sb->st_gid = kauth_getgid();
565
566 sb->st_atimespec.tv_sec = cpipe->st_atimespec.tv_sec;
567 sb->st_atimespec.tv_nsec = cpipe->st_atimespec.tv_nsec;
568
569 sb->st_mtimespec.tv_sec = cpipe->st_mtimespec.tv_sec;
570 sb->st_mtimespec.tv_nsec = cpipe->st_mtimespec.tv_nsec;
571
572 sb->st_ctimespec.tv_sec = cpipe->st_ctimespec.tv_sec;
573 sb->st_ctimespec.tv_nsec = cpipe->st_ctimespec.tv_nsec;
574
575 /*
576 * Return a relatively unique inode number based on the current
577 * address of this pipe's struct pipe. This number may be recycled
578 * relatively quickly.
579 */
580 sb->st_ino = (ino_t)VM_KERNEL_ADDRPERM((uintptr_t)cpipe);
581 }
582 PIPE_UNLOCK(cpipe);
583
584 /*
585 * POSIX: Left as 0: st_dev, st_nlink, st_rdev, st_flags, st_gen,
586 * st_uid, st_gid.
587 *
588 * XXX (st_dev) should be unique, but there is no device driver that
589 * XXX is associated with pipes, since they are implemented via a
590 * XXX struct fileops indirection rather than as FS objects.
591 */
592 return (0);
593 }
594
595
596 /*
597 * Allocate kva for pipe circular buffer, the space is pageable
598 * This routine will 'realloc' the size of a pipe safely, if it fails
599 * it will retain the old buffer.
600 * If it fails it will return ENOMEM.
601 */
602 static int
603 pipespace(struct pipe *cpipe, int size)
604 {
605 vm_offset_t buffer;
606
607 if (size <= 0)
608 return(EINVAL);
609
610 if ((buffer = (vm_offset_t)kalloc(size)) == 0 )
611 return(ENOMEM);
612
613 /* free old resources if we're resizing */
614 pipe_free_kmem(cpipe);
615 cpipe->pipe_buffer.buffer = (caddr_t)buffer;
616 cpipe->pipe_buffer.size = size;
617 cpipe->pipe_buffer.in = 0;
618 cpipe->pipe_buffer.out = 0;
619 cpipe->pipe_buffer.cnt = 0;
620
621 OSAddAtomic(1, &amountpipes);
622 OSAddAtomic(cpipe->pipe_buffer.size, &amountpipekva);
623
624 return (0);
625 }
626
627 /*
628 * initialize and allocate VM and memory for pipe
629 */
630 static int
631 pipe_create(struct pipe **cpipep)
632 {
633 struct pipe *cpipe;
634 cpipe = (struct pipe *)zalloc(pipe_zone);
635
636 if ((*cpipep = cpipe) == NULL)
637 return (ENOMEM);
638
639 /*
640 * protect so pipespace or pipeclose don't follow a junk pointer
641 * if pipespace() fails.
642 */
643 bzero(cpipe, sizeof *cpipe);
644
645 /* Initial times are all the time of creation of the pipe */
646 pipe_touch(cpipe, PIPE_ATIME | PIPE_MTIME | PIPE_CTIME);
647 return (0);
648 }
649
650
651 /*
652 * lock a pipe for I/O, blocking other access
653 */
654 static inline int
655 pipeio_lock(struct pipe *cpipe, int catch)
656 {
657 int error;
658 while (cpipe->pipe_state & PIPE_LOCKFL) {
659 cpipe->pipe_state |= PIPE_LWANT;
660 error = msleep(cpipe, PIPE_MTX(cpipe), catch ? (PRIBIO | PCATCH) : PRIBIO,
661 "pipelk", 0);
662 if (error != 0)
663 return (error);
664 }
665 cpipe->pipe_state |= PIPE_LOCKFL;
666 return (0);
667 }
668
669 /*
670 * unlock a pipe I/O lock
671 */
672 static inline void
673 pipeio_unlock(struct pipe *cpipe)
674 {
675 cpipe->pipe_state &= ~PIPE_LOCKFL;
676 if (cpipe->pipe_state & PIPE_LWANT) {
677 cpipe->pipe_state &= ~PIPE_LWANT;
678 wakeup(cpipe);
679 }
680 }
681
682 /*
683 * wakeup anyone whos blocked in select
684 */
685 static void
686 pipeselwakeup(struct pipe *cpipe, struct pipe *spipe)
687 {
688 if (cpipe->pipe_state & PIPE_SEL) {
689 cpipe->pipe_state &= ~PIPE_SEL;
690 selwakeup(&cpipe->pipe_sel);
691 }
692 if (cpipe->pipe_state & PIPE_KNOTE)
693 KNOTE(&cpipe->pipe_sel.si_note, 1);
694
695 postpipeevent(cpipe, EV_RWBYTES);
696
697 if (spipe && (spipe->pipe_state & PIPE_ASYNC) && spipe->pipe_pgid) {
698 if (spipe->pipe_pgid < 0)
699 gsignal(-spipe->pipe_pgid, SIGIO);
700 else
701 proc_signal(spipe->pipe_pgid, SIGIO);
702 }
703 }
704
705 /*
706 * Read n bytes from the buffer. Semantics are similar to file read.
707 * returns: number of bytes read from the buffer
708 */
709 /* ARGSUSED */
710 static int
711 pipe_read(struct fileproc *fp, struct uio *uio, __unused int flags,
712 __unused vfs_context_t ctx)
713 {
714 struct pipe *rpipe = (struct pipe *)fp->f_data;
715 int error;
716 int nread = 0;
717 u_int size;
718
719 PIPE_LOCK(rpipe);
720 ++rpipe->pipe_busy;
721
722 error = pipeio_lock(rpipe, 1);
723 if (error)
724 goto unlocked_error;
725
726 #if CONFIG_MACF
727 error = mac_pipe_check_read(kauth_cred_get(), rpipe);
728 if (error)
729 goto locked_error;
730 #endif
731
732
733 while (uio_resid(uio)) {
734 /*
735 * normal pipe buffer receive
736 */
737 if (rpipe->pipe_buffer.cnt > 0) {
738 /*
739 * # bytes to read is min( bytes from read pointer until end of buffer,
740 * total unread bytes,
741 * user requested byte count)
742 */
743 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
744 if (size > rpipe->pipe_buffer.cnt)
745 size = rpipe->pipe_buffer.cnt;
746 // LP64todo - fix this!
747 if (size > (u_int) uio_resid(uio))
748 size = (u_int) uio_resid(uio);
749
750 PIPE_UNLOCK(rpipe); /* we still hold io lock.*/
751 error = uiomove(
752 &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
753 size, uio);
754 PIPE_LOCK(rpipe);
755 if (error)
756 break;
757
758 rpipe->pipe_buffer.out += size;
759 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size)
760 rpipe->pipe_buffer.out = 0;
761
762 rpipe->pipe_buffer.cnt -= size;
763
764 /*
765 * If there is no more to read in the pipe, reset
766 * its pointers to the beginning. This improves
767 * cache hit stats.
768 */
769 if (rpipe->pipe_buffer.cnt == 0) {
770 rpipe->pipe_buffer.in = 0;
771 rpipe->pipe_buffer.out = 0;
772 }
773 nread += size;
774 } else {
775 /*
776 * detect EOF condition
777 * read returns 0 on EOF, no need to set error
778 */
779 if (rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) {
780 break;
781 }
782
783 /*
784 * If the "write-side" has been blocked, wake it up now.
785 */
786 if (rpipe->pipe_state & PIPE_WANTW) {
787 rpipe->pipe_state &= ~PIPE_WANTW;
788 wakeup(rpipe);
789 }
790
791 /*
792 * Break if some data was read in previous iteration.
793 */
794 if (nread > 0)
795 break;
796
797 /*
798 * Unlock the pipe buffer for our remaining processing.
799 * We will either break out with an error or we will
800 * sleep and relock to loop.
801 */
802 pipeio_unlock(rpipe);
803
804 /*
805 * Handle non-blocking mode operation or
806 * wait for more data.
807 */
808 if (fp->f_flag & FNONBLOCK) {
809 error = EAGAIN;
810 } else {
811 rpipe->pipe_state |= PIPE_WANTR;
812 error = msleep(rpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, "piperd", 0);
813 if (error == 0)
814 error = pipeio_lock(rpipe, 1);
815 }
816 if (error)
817 goto unlocked_error;
818 }
819 }
820 #if CONFIG_MACF
821 locked_error:
822 #endif
823 pipeio_unlock(rpipe);
824
825 unlocked_error:
826 --rpipe->pipe_busy;
827
828 /*
829 * PIPE_WANT processing only makes sense if pipe_busy is 0.
830 */
831 if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) {
832 rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW);
833 wakeup(rpipe);
834 } else if (rpipe->pipe_buffer.cnt < rpipe->pipe_buffer.size) {
835 /*
836 * Handle write blocking hysteresis.
837 */
838 if (rpipe->pipe_state & PIPE_WANTW) {
839 rpipe->pipe_state &= ~PIPE_WANTW;
840 wakeup(rpipe);
841 }
842 }
843
844 if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) > 0)
845 pipeselwakeup(rpipe, rpipe->pipe_peer);
846
847 /* update last read time */
848 pipe_touch(rpipe, PIPE_ATIME);
849
850 PIPE_UNLOCK(rpipe);
851
852 return (error);
853 }
854
855 /*
856 * perform a write of n bytes into the read side of buffer. Since
857 * pipes are unidirectional a write is meant to be read by the otherside only.
858 */
859 static int
860 pipe_write(struct fileproc *fp, struct uio *uio, __unused int flags,
861 __unused vfs_context_t ctx)
862 {
863 int error = 0;
864 int orig_resid;
865 int pipe_size;
866 struct pipe *wpipe, *rpipe;
867 // LP64todo - fix this!
868 orig_resid = uio_resid(uio);
869 int space;
870
871 rpipe = (struct pipe *)fp->f_data;
872
873 PIPE_LOCK(rpipe);
874 wpipe = rpipe->pipe_peer;
875
876 /*
877 * detect loss of pipe read side, issue SIGPIPE if lost.
878 */
879 if (wpipe == NULL || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
880 PIPE_UNLOCK(rpipe);
881 return (EPIPE);
882 }
883 #if CONFIG_MACF
884 error = mac_pipe_check_write(kauth_cred_get(), wpipe);
885 if (error) {
886 PIPE_UNLOCK(rpipe);
887 return (error);
888 }
889 #endif
890 ++wpipe->pipe_busy;
891
892 pipe_size = 0;
893
894 /*
895 * need to allocate some storage... we delay the allocation
896 * until the first write on fd[0] to avoid allocating storage for both
897 * 'pipe ends'... most pipes are half-duplex with the writes targeting
898 * fd[1], so allocating space for both ends is a waste...
899 */
900
901 if ( wpipe->pipe_buffer.buffer == 0 || (
902 (unsigned)orig_resid > wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt &&
903 amountpipekva < maxpipekva ) ) {
904
905 pipe_size = choose_pipespace(wpipe->pipe_buffer.size, wpipe->pipe_buffer.cnt + orig_resid);
906 }
907 if (pipe_size) {
908 /*
909 * need to do initial allocation or resizing of pipe
910 * holding both structure and io locks.
911 */
912 if ((error = pipeio_lock(wpipe, 1)) == 0) {
913 if (wpipe->pipe_buffer.cnt == 0)
914 error = pipespace(wpipe, pipe_size);
915 else
916 error = expand_pipespace(wpipe, pipe_size);
917
918 pipeio_unlock(wpipe);
919
920 /* allocation failed */
921 if (wpipe->pipe_buffer.buffer == 0)
922 error = ENOMEM;
923 }
924 if (error) {
925 /*
926 * If an error occurred unbusy and return, waking up any pending
927 * readers.
928 */
929 --wpipe->pipe_busy;
930 if ((wpipe->pipe_busy == 0) &&
931 (wpipe->pipe_state & PIPE_WANT)) {
932 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
933 wakeup(wpipe);
934 }
935 PIPE_UNLOCK(rpipe);
936 return(error);
937 }
938 }
939
940 while (uio_resid(uio)) {
941
942 retrywrite:
943 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
944
945 /* Writes of size <= PIPE_BUF must be atomic. */
946 if ((space < uio_resid(uio)) && (orig_resid <= PIPE_BUF))
947 space = 0;
948
949 if (space > 0) {
950
951 if ((error = pipeio_lock(wpipe,1)) == 0) {
952 int size; /* Transfer size */
953 int segsize; /* first segment to transfer */
954
955 if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) {
956 pipeio_unlock(wpipe);
957 error = EPIPE;
958 break;
959 }
960 /*
961 * If a process blocked in pipeio_lock, our
962 * value for space might be bad... the mutex
963 * is dropped while we're blocked
964 */
965 if (space > (int)(wpipe->pipe_buffer.size -
966 wpipe->pipe_buffer.cnt)) {
967 pipeio_unlock(wpipe);
968 goto retrywrite;
969 }
970
971 /*
972 * Transfer size is minimum of uio transfer
973 * and free space in pipe buffer.
974 */
975 // LP64todo - fix this!
976 if (space > uio_resid(uio))
977 size = uio_resid(uio);
978 else
979 size = space;
980 /*
981 * First segment to transfer is minimum of
982 * transfer size and contiguous space in
983 * pipe buffer. If first segment to transfer
984 * is less than the transfer size, we've got
985 * a wraparound in the buffer.
986 */
987 segsize = wpipe->pipe_buffer.size -
988 wpipe->pipe_buffer.in;
989 if (segsize > size)
990 segsize = size;
991
992 /* Transfer first segment */
993
994 PIPE_UNLOCK(rpipe);
995 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
996 segsize, uio);
997 PIPE_LOCK(rpipe);
998
999 if (error == 0 && segsize < size) {
1000 /*
1001 * Transfer remaining part now, to
1002 * support atomic writes. Wraparound
1003 * happened. (State 3)
1004 */
1005 if (wpipe->pipe_buffer.in + segsize !=
1006 wpipe->pipe_buffer.size)
1007 panic("Expected pipe buffer "
1008 "wraparound disappeared");
1009
1010 PIPE_UNLOCK(rpipe);
1011 error = uiomove(
1012 &wpipe->pipe_buffer.buffer[0],
1013 size - segsize, uio);
1014 PIPE_LOCK(rpipe);
1015 }
1016 /*
1017 * readers never know to read until count is updated.
1018 */
1019 if (error == 0) {
1020 wpipe->pipe_buffer.in += size;
1021 if (wpipe->pipe_buffer.in >
1022 wpipe->pipe_buffer.size) {
1023 if (wpipe->pipe_buffer.in !=
1024 size - segsize +
1025 wpipe->pipe_buffer.size)
1026 panic("Expected "
1027 "wraparound bad");
1028 wpipe->pipe_buffer.in = size -
1029 segsize;
1030 }
1031
1032 wpipe->pipe_buffer.cnt += size;
1033 if (wpipe->pipe_buffer.cnt >
1034 wpipe->pipe_buffer.size)
1035 panic("Pipe buffer overflow");
1036
1037 }
1038 pipeio_unlock(wpipe);
1039 }
1040 if (error)
1041 break;
1042
1043 } else {
1044 /*
1045 * If the "read-side" has been blocked, wake it up now.
1046 */
1047 if (wpipe->pipe_state & PIPE_WANTR) {
1048 wpipe->pipe_state &= ~PIPE_WANTR;
1049 wakeup(wpipe);
1050 }
1051 /*
1052 * don't block on non-blocking I/O
1053 * we'll do the pipeselwakeup on the way out
1054 */
1055 if (fp->f_flag & FNONBLOCK) {
1056 error = EAGAIN;
1057 break;
1058 }
1059
1060 /*
1061 * If read side wants to go away, we just issue a signal
1062 * to ourselves.
1063 */
1064 if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) {
1065 error = EPIPE;
1066 break;
1067 }
1068
1069 /*
1070 * We have no more space and have something to offer,
1071 * wake up select/poll.
1072 */
1073 pipeselwakeup(wpipe, wpipe);
1074
1075 wpipe->pipe_state |= PIPE_WANTW;
1076
1077 error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, "pipewr", 0);
1078
1079 if (error != 0)
1080 break;
1081 }
1082 }
1083 --wpipe->pipe_busy;
1084
1085 if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) {
1086 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
1087 wakeup(wpipe);
1088 }
1089 if (wpipe->pipe_buffer.cnt > 0) {
1090 /*
1091 * If there are any characters in the buffer, we wake up
1092 * the reader if it was blocked waiting for data.
1093 */
1094 if (wpipe->pipe_state & PIPE_WANTR) {
1095 wpipe->pipe_state &= ~PIPE_WANTR;
1096 wakeup(wpipe);
1097 }
1098 /*
1099 * wake up thread blocked in select/poll or post the notification
1100 */
1101 pipeselwakeup(wpipe, wpipe);
1102 }
1103
1104 /* Update modification, status change (# of bytes in pipe) times */
1105 pipe_touch(rpipe, PIPE_MTIME | PIPE_CTIME);
1106 pipe_touch(wpipe, PIPE_MTIME | PIPE_CTIME);
1107 PIPE_UNLOCK(rpipe);
1108
1109 return (error);
1110 }
1111
1112 /*
1113 * we implement a very minimal set of ioctls for compatibility with sockets.
1114 */
1115 /* ARGSUSED 3 */
1116 static int
1117 pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data,
1118 __unused vfs_context_t ctx)
1119 {
1120 struct pipe *mpipe = (struct pipe *)fp->f_data;
1121 #if CONFIG_MACF
1122 int error;
1123 #endif
1124
1125 PIPE_LOCK(mpipe);
1126
1127 #if CONFIG_MACF
1128 error = mac_pipe_check_ioctl(kauth_cred_get(), mpipe, cmd);
1129 if (error) {
1130 PIPE_UNLOCK(mpipe);
1131
1132 return (error);
1133 }
1134 #endif
1135
1136 switch (cmd) {
1137
1138 case FIONBIO:
1139 PIPE_UNLOCK(mpipe);
1140 return (0);
1141
1142 case FIOASYNC:
1143 if (*(int *)data) {
1144 mpipe->pipe_state |= PIPE_ASYNC;
1145 } else {
1146 mpipe->pipe_state &= ~PIPE_ASYNC;
1147 }
1148 PIPE_UNLOCK(mpipe);
1149 return (0);
1150
1151 case FIONREAD:
1152 *(int *)data = mpipe->pipe_buffer.cnt;
1153 PIPE_UNLOCK(mpipe);
1154 return (0);
1155
1156 case TIOCSPGRP:
1157 mpipe->pipe_pgid = *(int *)data;
1158
1159 PIPE_UNLOCK(mpipe);
1160 return (0);
1161
1162 case TIOCGPGRP:
1163 *(int *)data = mpipe->pipe_pgid;
1164
1165 PIPE_UNLOCK(mpipe);
1166 return (0);
1167
1168 }
1169 PIPE_UNLOCK(mpipe);
1170 return (ENOTTY);
1171 }
1172
1173
1174 static int
1175 pipe_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
1176 {
1177 struct pipe *rpipe = (struct pipe *)fp->f_data;
1178 struct pipe *wpipe;
1179 int retnum = 0;
1180
1181 if (rpipe == NULL || rpipe == (struct pipe *)-1)
1182 return (retnum);
1183
1184 PIPE_LOCK(rpipe);
1185
1186 wpipe = rpipe->pipe_peer;
1187
1188
1189 #if CONFIG_MACF
1190 /*
1191 * XXX We should use a per thread credential here; minimally, the
1192 * XXX process credential should have a persistent reference on it
1193 * XXX before being passed in here.
1194 */
1195 if (mac_pipe_check_select(vfs_context_ucred(ctx), rpipe, which)) {
1196 PIPE_UNLOCK(rpipe);
1197 return (0);
1198 }
1199 #endif
1200 switch (which) {
1201
1202 case FREAD:
1203 if ((rpipe->pipe_state & PIPE_DIRECTW) ||
1204 (rpipe->pipe_buffer.cnt > 0) ||
1205 (rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
1206
1207 retnum = 1;
1208 } else {
1209 rpipe->pipe_state |= PIPE_SEL;
1210 selrecord(vfs_context_proc(ctx), &rpipe->pipe_sel, wql);
1211 }
1212 break;
1213
1214 case FWRITE:
1215 if (wpipe)
1216 wpipe->pipe_state |= PIPE_WSELECT;
1217 if (wpipe == NULL || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) ||
1218 (((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
1219 (MAX_PIPESIZE(wpipe) - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) {
1220
1221 retnum = 1;
1222 } else {
1223 wpipe->pipe_state |= PIPE_SEL;
1224 selrecord(vfs_context_proc(ctx), &wpipe->pipe_sel, wql);
1225 }
1226 break;
1227 case 0:
1228 rpipe->pipe_state |= PIPE_SEL;
1229 selrecord(vfs_context_proc(ctx), &rpipe->pipe_sel, wql);
1230 break;
1231 }
1232 PIPE_UNLOCK(rpipe);
1233
1234 return (retnum);
1235 }
1236
1237
1238 /* ARGSUSED 1 */
1239 static int
1240 pipe_close(struct fileglob *fg, __unused vfs_context_t ctx)
1241 {
1242 struct pipe *cpipe;
1243
1244 proc_fdlock_spin(vfs_context_proc(ctx));
1245 cpipe = (struct pipe *)fg->fg_data;
1246 fg->fg_data = NULL;
1247 proc_fdunlock(vfs_context_proc(ctx));
1248 if (cpipe)
1249 pipeclose(cpipe);
1250
1251 return (0);
1252 }
1253
1254 static void
1255 pipe_free_kmem(struct pipe *cpipe)
1256 {
1257 if (cpipe->pipe_buffer.buffer != NULL) {
1258 OSAddAtomic(-(cpipe->pipe_buffer.size), &amountpipekva);
1259 OSAddAtomic(-1, &amountpipes);
1260 kfree((void *)cpipe->pipe_buffer.buffer,
1261 cpipe->pipe_buffer.size);
1262 cpipe->pipe_buffer.buffer = NULL;
1263 cpipe->pipe_buffer.size = 0;
1264 }
1265 }
1266
1267 /*
1268 * shutdown the pipe
1269 */
1270 static void
1271 pipeclose(struct pipe *cpipe)
1272 {
1273 struct pipe *ppipe;
1274
1275 if (cpipe == NULL)
1276 return;
1277 /* partially created pipes won't have a valid mutex. */
1278 if (PIPE_MTX(cpipe) != NULL)
1279 PIPE_LOCK(cpipe);
1280
1281
1282 /*
1283 * If the other side is blocked, wake it up saying that
1284 * we want to close it down.
1285 */
1286 cpipe->pipe_state &= ~PIPE_DRAIN;
1287 cpipe->pipe_state |= PIPE_EOF;
1288 pipeselwakeup(cpipe, cpipe);
1289
1290 while (cpipe->pipe_busy) {
1291 cpipe->pipe_state |= PIPE_WANT;
1292
1293 wakeup(cpipe);
1294 msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0);
1295 }
1296
1297 #if CONFIG_MACF
1298 /*
1299 * Free the shared pipe label only after the two ends are disconnected.
1300 */
1301 if (cpipe->pipe_label != NULL && cpipe->pipe_peer == NULL)
1302 mac_pipe_label_destroy(cpipe);
1303 #endif
1304
1305 /*
1306 * Disconnect from peer
1307 */
1308 if ((ppipe = cpipe->pipe_peer) != NULL) {
1309
1310 ppipe->pipe_state &= ~(PIPE_DRAIN);
1311 ppipe->pipe_state |= PIPE_EOF;
1312
1313 pipeselwakeup(ppipe, ppipe);
1314 wakeup(ppipe);
1315
1316 if (cpipe->pipe_state & PIPE_KNOTE)
1317 KNOTE(&ppipe->pipe_sel.si_note, 1);
1318
1319 postpipeevent(ppipe, EV_RCLOSED);
1320
1321 ppipe->pipe_peer = NULL;
1322 }
1323 evpipefree(cpipe);
1324
1325 /*
1326 * free resources
1327 */
1328 if (PIPE_MTX(cpipe) != NULL) {
1329 if (ppipe != NULL) {
1330 /*
1331 * since the mutex is shared and the peer is still
1332 * alive, we need to release the mutex, not free it
1333 */
1334 PIPE_UNLOCK(cpipe);
1335 } else {
1336 /*
1337 * peer is gone, so we're the sole party left with
1338 * interest in this mutex... unlock and free it
1339 */
1340 PIPE_UNLOCK(cpipe);
1341 lck_mtx_free(PIPE_MTX(cpipe), pipe_mtx_grp);
1342 }
1343 }
1344 pipe_free_kmem(cpipe);
1345 if (cpipe->pipe_state & PIPE_WSELECT) {
1346 pipe_garbage_collect(cpipe);
1347 } else {
1348 zfree(pipe_zone, cpipe);
1349 pipe_garbage_collect(NULL);
1350 }
1351
1352 }
1353
1354 /*ARGSUSED*/
1355 static int
1356 pipe_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused vfs_context_t ctx)
1357 {
1358 struct pipe *cpipe;
1359
1360 cpipe = (struct pipe *)kn->kn_fp->f_data;
1361
1362 PIPE_LOCK(cpipe);
1363 #if CONFIG_MACF
1364 /*
1365 * XXX We should use a per thread credential here; minimally, the
1366 * XXX process credential should have a persistent reference on it
1367 * XXX before being passed in here.
1368 */
1369 if (mac_pipe_check_kqfilter(vfs_context_ucred(ctx), kn, cpipe) != 0) {
1370 PIPE_UNLOCK(cpipe);
1371 return (1);
1372 }
1373 #endif
1374
1375 switch (kn->kn_filter) {
1376 case EVFILT_READ:
1377 kn->kn_fop = &pipe_rfiltops;
1378
1379 break;
1380 case EVFILT_WRITE:
1381 kn->kn_fop = &pipe_wfiltops;
1382
1383 if (cpipe->pipe_peer == NULL) {
1384 /*
1385 * other end of pipe has been closed
1386 */
1387 PIPE_UNLOCK(cpipe);
1388 return (EPIPE);
1389 }
1390 if (cpipe->pipe_peer)
1391 cpipe = cpipe->pipe_peer;
1392 break;
1393 default:
1394 PIPE_UNLOCK(cpipe);
1395 return (1);
1396 }
1397
1398 if (KNOTE_ATTACH(&cpipe->pipe_sel.si_note, kn))
1399 cpipe->pipe_state |= PIPE_KNOTE;
1400
1401 PIPE_UNLOCK(cpipe);
1402 return (0);
1403 }
1404
1405 static void
1406 filt_pipedetach(struct knote *kn)
1407 {
1408 struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data;
1409
1410 PIPE_LOCK(cpipe);
1411
1412 if (kn->kn_filter == EVFILT_WRITE) {
1413 if (cpipe->pipe_peer == NULL) {
1414 PIPE_UNLOCK(cpipe);
1415 return;
1416 }
1417 cpipe = cpipe->pipe_peer;
1418 }
1419 if (cpipe->pipe_state & PIPE_KNOTE) {
1420 if (KNOTE_DETACH(&cpipe->pipe_sel.si_note, kn))
1421 cpipe->pipe_state &= ~PIPE_KNOTE;
1422 }
1423 PIPE_UNLOCK(cpipe);
1424 }
1425
1426 /*ARGSUSED*/
1427 static int
1428 filt_piperead(struct knote *kn, long hint)
1429 {
1430 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
1431 struct pipe *wpipe;
1432 int retval;
1433
1434 /*
1435 * if hint == 0, then we've been called from the kevent
1436 * world directly and do not currently hold the pipe mutex...
1437 * if hint == 1, we're being called back via the KNOTE post
1438 * we made in pipeselwakeup, and we already hold the mutex...
1439 */
1440 if (hint == 0)
1441 PIPE_LOCK(rpipe);
1442
1443 wpipe = rpipe->pipe_peer;
1444 kn->kn_data = rpipe->pipe_buffer.cnt;
1445 if ((rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) ||
1446 (wpipe == NULL) || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
1447 kn->kn_flags |= EV_EOF;
1448 retval = 1;
1449 } else {
1450 int64_t lowwat = 1;
1451 if (kn->kn_sfflags & NOTE_LOWAT) {
1452 if (rpipe->pipe_buffer.size && kn->kn_sdata > MAX_PIPESIZE(rpipe))
1453 lowwat = MAX_PIPESIZE(rpipe);
1454 else if (kn->kn_sdata > lowwat)
1455 lowwat = kn->kn_sdata;
1456 }
1457 retval = kn->kn_data >= lowwat;
1458 }
1459
1460 if (hint == 0)
1461 PIPE_UNLOCK(rpipe);
1462
1463 return (retval);
1464 }
1465
1466 /*ARGSUSED*/
1467 static int
1468 filt_pipewrite(struct knote *kn, long hint)
1469 {
1470 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
1471 struct pipe *wpipe;
1472
1473 /*
1474 * if hint == 0, then we've been called from the kevent
1475 * world directly and do not currently hold the pipe mutex...
1476 * if hint == 1, we're being called back via the KNOTE post
1477 * we made in pipeselwakeup, and we already hold the mutex...
1478 */
1479 if (hint == 0)
1480 PIPE_LOCK(rpipe);
1481
1482 wpipe = rpipe->pipe_peer;
1483
1484 if ((wpipe == NULL) || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
1485 kn->kn_data = 0;
1486 kn->kn_flags |= EV_EOF;
1487
1488 if (hint == 0)
1489 PIPE_UNLOCK(rpipe);
1490 return (1);
1491 }
1492 kn->kn_data = MAX_PIPESIZE(wpipe) - wpipe->pipe_buffer.cnt;
1493
1494 int64_t lowwat = PIPE_BUF;
1495 if (kn->kn_sfflags & NOTE_LOWAT) {
1496 if (wpipe->pipe_buffer.size && kn->kn_sdata > MAX_PIPESIZE(wpipe))
1497 lowwat = MAX_PIPESIZE(wpipe);
1498 else if (kn->kn_sdata > lowwat)
1499 lowwat = kn->kn_sdata;
1500 }
1501
1502 if (hint == 0)
1503 PIPE_UNLOCK(rpipe);
1504
1505 return (kn->kn_data >= lowwat);
1506 }
1507
1508 int
1509 fill_pipeinfo(struct pipe * cpipe, struct pipe_info * pinfo)
1510 {
1511 #if CONFIG_MACF
1512 int error;
1513 #endif
1514 struct timeval now;
1515 struct vinfo_stat * ub;
1516 int pipe_size = 0;
1517 int pipe_count;
1518
1519 if (cpipe == NULL)
1520 return (EBADF);
1521 PIPE_LOCK(cpipe);
1522
1523 #if CONFIG_MACF
1524 error = mac_pipe_check_stat(kauth_cred_get(), cpipe);
1525 if (error) {
1526 PIPE_UNLOCK(cpipe);
1527 return (error);
1528 }
1529 #endif
1530 if (cpipe->pipe_buffer.buffer == 0) {
1531 /*
1532 * must be stat'ing the write fd
1533 */
1534 if (cpipe->pipe_peer) {
1535 /*
1536 * the peer still exists, use it's info
1537 */
1538 pipe_size = MAX_PIPESIZE(cpipe->pipe_peer);
1539 pipe_count = cpipe->pipe_peer->pipe_buffer.cnt;
1540 } else {
1541 pipe_count = 0;
1542 }
1543 } else {
1544 pipe_size = MAX_PIPESIZE(cpipe);
1545 pipe_count = cpipe->pipe_buffer.cnt;
1546 }
1547 /*
1548 * since peer's buffer is setup ouside of lock
1549 * we might catch it in transient state
1550 */
1551 if (pipe_size == 0)
1552 pipe_size = PIPE_SIZE;
1553
1554 ub = &pinfo->pipe_stat;
1555
1556 bzero(ub, sizeof(*ub));
1557 ub->vst_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
1558 ub->vst_blksize = pipe_size;
1559 ub->vst_size = pipe_count;
1560 if (ub->vst_blksize != 0)
1561 ub->vst_blocks = (ub->vst_size + ub->vst_blksize - 1) / ub->vst_blksize;
1562 ub->vst_nlink = 1;
1563
1564 ub->vst_uid = kauth_getuid();
1565 ub->vst_gid = kauth_getgid();
1566
1567 microtime(&now);
1568 ub->vst_atime = now.tv_sec;
1569 ub->vst_atimensec = now.tv_usec * 1000;
1570
1571 ub->vst_mtime = now.tv_sec;
1572 ub->vst_mtimensec = now.tv_usec * 1000;
1573
1574 ub->vst_ctime = now.tv_sec;
1575 ub->vst_ctimensec = now.tv_usec * 1000;
1576
1577 /*
1578 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen, st_uid, st_gid.
1579 * XXX (st_dev, st_ino) should be unique.
1580 */
1581
1582 pinfo->pipe_handle = (uint64_t)VM_KERNEL_ADDRPERM((uintptr_t)cpipe);
1583 pinfo->pipe_peerhandle = (uint64_t)VM_KERNEL_ADDRPERM((uintptr_t)(cpipe->pipe_peer));
1584 pinfo->pipe_status = cpipe->pipe_state;
1585
1586 PIPE_UNLOCK(cpipe);
1587
1588 return (0);
1589 }
1590
1591
1592 static int
1593 pipe_drain(struct fileproc *fp, __unused vfs_context_t ctx)
1594 {
1595
1596 /* Note: fdlock already held */
1597 struct pipe *ppipe, *cpipe = (struct pipe *)(fp->f_fglob->fg_data);
1598
1599 if (cpipe) {
1600 PIPE_LOCK(cpipe);
1601 cpipe->pipe_state |= PIPE_DRAIN;
1602 cpipe->pipe_state &= ~(PIPE_WANTR | PIPE_WANTW);
1603 wakeup(cpipe);
1604
1605 /* Must wake up peer: a writer sleeps on the read side */
1606 if ((ppipe = cpipe->pipe_peer)) {
1607 ppipe->pipe_state |= PIPE_DRAIN;
1608 ppipe->pipe_state &= ~(PIPE_WANTR | PIPE_WANTW);
1609 wakeup(ppipe);
1610 }
1611
1612 PIPE_UNLOCK(cpipe);
1613 return 0;
1614 }
1615
1616 return 1;
1617 }
1618
1619
1620 /*
1621 * When a thread sets a write-select on a pipe, it creates an implicit,
1622 * untracked dependency between that thread and the peer of the pipe
1623 * on which the select is set. If the peer pipe is closed and freed
1624 * before the select()ing thread wakes up, the system will panic as
1625 * it attempts to unwind the dangling select(). To avoid that panic,
1626 * we notice whenever a dangerous select() is set on a pipe, and
1627 * defer the final deletion of the pipe until that select()s are all
1628 * resolved. Since we can't currently detect exactly when that
1629 * resolution happens, we use a simple garbage collection queue to
1630 * reap the at-risk pipes 'later'.
1631 */
1632 static void
1633 pipe_garbage_collect(struct pipe *cpipe)
1634 {
1635 uint64_t old, now;
1636 struct pipe_garbage *pgp;
1637
1638 /* Convert msecs to nsecs and then to abstime */
1639 old = pipe_garbage_age_limit * 1000000;
1640 nanoseconds_to_absolutetime(old, &old);
1641
1642 lck_mtx_lock(pipe_garbage_lock);
1643
1644 /* Free anything that's been on the queue for <mumble> seconds */
1645 now = mach_absolute_time();
1646 old = now - old;
1647 while ((pgp = pipe_garbage_head) && pgp->pg_timestamp < old) {
1648 pipe_garbage_head = pgp->pg_next;
1649 if (pipe_garbage_head == NULL)
1650 pipe_garbage_tail = NULL;
1651 pipe_garbage_count--;
1652 zfree(pipe_zone, pgp->pg_pipe);
1653 zfree(pipe_garbage_zone, pgp);
1654 }
1655
1656 /* Add the new pipe (if any) to the tail of the garbage queue */
1657 if (cpipe) {
1658 cpipe->pipe_state = PIPE_DEAD;
1659 pgp = (struct pipe_garbage *)zalloc(pipe_garbage_zone);
1660 if (pgp == NULL) {
1661 /*
1662 * We're too low on memory to garbage collect the
1663 * pipe. Freeing it runs the risk of panicing the
1664 * system. All we can do is leak it and leave
1665 * a breadcrumb behind. The good news, such as it
1666 * is, is that this will probably never happen.
1667 * We will probably hit the panic below first.
1668 */
1669 printf("Leaking pipe %p - no room left in the queue",
1670 cpipe);
1671 lck_mtx_unlock(pipe_garbage_lock);
1672 return;
1673 }
1674
1675 pgp->pg_pipe = cpipe;
1676 pgp->pg_timestamp = now;
1677 pgp->pg_next = NULL;
1678
1679 if (pipe_garbage_tail)
1680 pipe_garbage_tail->pg_next = pgp;
1681 pipe_garbage_tail = pgp;
1682 if (pipe_garbage_head == NULL)
1683 pipe_garbage_head = pipe_garbage_tail;
1684
1685 if (pipe_garbage_count++ >= PIPE_GARBAGE_QUEUE_LIMIT)
1686 panic("Length of pipe garbage queue exceeded %d",
1687 PIPE_GARBAGE_QUEUE_LIMIT);
1688 }
1689 lck_mtx_unlock(pipe_garbage_lock);
1690 }
1691