]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/sys_pipe.c
xnu-6153.81.5.tar.gz
[apple/xnu.git] / bsd / kern / sys_pipe.c
1 /*
2 * Copyright (c) 1996 John S. Dyson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice immediately at the beginning of the file, without modification,
10 * this list of conditions, and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. Absolutely no warranty of function or purpose is made by the author
15 * John S. Dyson.
16 * 4. Modifications may be freely made to this file if the above conditions
17 * are met.
18 */
19 /*
20 * Copyright (c) 2003-2014 Apple Inc. All rights reserved.
21 *
22 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
23 *
24 * This file contains Original Code and/or Modifications of Original Code
25 * as defined in and that are subject to the Apple Public Source License
26 * Version 2.0 (the 'License'). You may not use this file except in
27 * compliance with the License. The rights granted to you under the License
28 * may not be used to create, or enable the creation or redistribution of,
29 * unlawful or unlicensed copies of an Apple operating system, or to
30 * circumvent, violate, or enable the circumvention or violation of, any
31 * terms of an Apple operating system software license agreement.
32 *
33 * Please obtain a copy of the License at
34 * http://www.opensource.apple.com/apsl/ and read it before using this file.
35 *
36 * The Original Code and all software distributed under the License are
37 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
38 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
39 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
40 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
41 * Please see the License for the specific language governing rights and
42 * limitations under the License.
43 *
44 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
45 */
46 /*
47 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
48 * support for mandatory and extensible security protections. This notice
49 * is included in support of clause 2.2 (b) of the Apple Public License,
50 * Version 2.0.
51 */
52
53 /*
54 * This file contains a high-performance replacement for the socket-based
55 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support
56 * all features of sockets, but does do everything that pipes normally
57 * do.
58 *
59 * Pipes are implemented as circular buffers. Following are the valid states in pipes operations
60 *
61 * _________________________________
62 * 1. |_________________________________| r=w, c=0
63 *
64 * _________________________________
65 * 2. |__r:::::wc_______________________| r <= w , c > 0
66 *
67 * _________________________________
68 * 3. |::::wc_____r:::::::::::::::::::::| r>w , c > 0
69 *
70 * _________________________________
71 * 4. |:::::::wrc:::::::::::::::::::::::| w=r, c = Max size
72 *
73 *
74 * Nomenclature:-
75 * a-z define the steps in a program flow
76 * 1-4 are the states as defined aboe
77 * Action: is what file operation is done on the pipe
78 *
79 * Current:None Action: initialize with size M=200
80 * a. State 1 ( r=0, w=0, c=0)
81 *
82 * Current: a Action: write(100) (w < M)
83 * b. State 2 (r=0, w=100, c=100)
84 *
85 * Current: b Action: write(100) (w = M-w)
86 * c. State 4 (r=0,w=0,c=200)
87 *
88 * Current: b Action: read(70) ( r < c )
89 * d. State 2(r=70,w=100,c=30)
90 *
91 * Current: d Action: write(75) ( w < (m-w))
92 * e. State 2 (r=70,w=175,c=105)
93 *
94 * Current: d Action: write(110) ( w > (m-w))
95 * f. State 3 (r=70,w=10,c=140)
96 *
97 * Current: d Action: read(30) (r >= c )
98 * g. State 1 (r=100,w=100,c=0)
99 *
100 */
101
102 /*
103 * This code create half duplex pipe buffers for facilitating file like
104 * operations on pipes. The initial buffer is very small, but this can
105 * dynamically change to larger sizes based on usage. The buffer size is never
106 * reduced. The total amount of kernel memory used is governed by maxpipekva.
107 * In case of dynamic expansion limit is reached, the output thread is blocked
108 * until the pipe buffer empties enough to continue.
109 *
110 * In order to limit the resource use of pipes, two sysctls exist:
111 *
112 * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable
113 * address space available to us in pipe_map.
114 *
115 * Memory usage may be monitored through the sysctls
116 * kern.ipc.pipes, kern.ipc.pipekva.
117 *
118 */
119
120 #include <sys/param.h>
121 #include <sys/systm.h>
122 #include <sys/filedesc.h>
123 #include <sys/kernel.h>
124 #include <sys/vnode.h>
125 #include <sys/proc_internal.h>
126 #include <sys/kauth.h>
127 #include <sys/file_internal.h>
128 #include <sys/stat.h>
129 #include <sys/ioctl.h>
130 #include <sys/fcntl.h>
131 #include <sys/malloc.h>
132 #include <sys/syslog.h>
133 #include <sys/unistd.h>
134 #include <sys/resourcevar.h>
135 #include <sys/aio_kern.h>
136 #include <sys/signalvar.h>
137 #include <sys/pipe.h>
138 #include <sys/sysproto.h>
139 #include <sys/proc_info.h>
140
141 #include <security/audit/audit.h>
142
143 #include <sys/kdebug.h>
144
145 #include <kern/zalloc.h>
146 #include <kern/kalloc.h>
147 #include <vm/vm_kern.h>
148 #include <libkern/OSAtomic.h>
149 #include <libkern/section_keywords.h>
150
151 #if CONFIG_MACF
152 #include <security/mac_framework.h>
153 #endif
154
155 #define f_flag f_fglob->fg_flag
156 #define f_msgcount f_fglob->fg_msgcount
157 #define f_cred f_fglob->fg_cred
158 #define f_ops f_fglob->fg_ops
159 #define f_offset f_fglob->fg_offset
160 #define f_data f_fglob->fg_data
161
162 struct pipepair {
163 lck_mtx_t pp_mtx;
164 struct pipe pp_rpipe;
165 struct pipe pp_wpipe;
166 };
167
168 #define PIPE_PAIR(pipe) \
169 __container_of(PIPE_MTX(pipe), struct pipepair, pp_mtx)
170
171 /*
172 * interfaces to the outside world exported through file operations
173 */
174 static int pipe_read(struct fileproc *fp, struct uio *uio,
175 int flags, vfs_context_t ctx);
176 static int pipe_write(struct fileproc *fp, struct uio *uio,
177 int flags, vfs_context_t ctx);
178 static int pipe_close(struct fileglob *fg, vfs_context_t ctx);
179 static int pipe_select(struct fileproc *fp, int which, void * wql,
180 vfs_context_t ctx);
181 static int pipe_kqfilter(struct fileproc *fp, struct knote *kn,
182 struct kevent_qos_s *kev);
183 static int pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data,
184 vfs_context_t ctx);
185 static int pipe_drain(struct fileproc *fp, vfs_context_t ctx);
186
187 static const struct fileops pipeops = {
188 .fo_type = DTYPE_PIPE,
189 .fo_read = pipe_read,
190 .fo_write = pipe_write,
191 .fo_ioctl = pipe_ioctl,
192 .fo_select = pipe_select,
193 .fo_close = pipe_close,
194 .fo_drain = pipe_drain,
195 .fo_kqfilter = pipe_kqfilter,
196 };
197
198 static void filt_pipedetach(struct knote *kn);
199
200 static int filt_pipenotsup(struct knote *kn, long hint);
201 static int filt_pipenotsuptouch(struct knote *kn, struct kevent_qos_s *kev);
202 static int filt_pipenotsupprocess(struct knote *kn, struct kevent_qos_s *kev);
203
204 static int filt_piperead(struct knote *kn, long hint);
205 static int filt_pipereadtouch(struct knote *kn, struct kevent_qos_s *kev);
206 static int filt_pipereadprocess(struct knote *kn, struct kevent_qos_s *kev);
207
208 static int filt_pipewrite(struct knote *kn, long hint);
209 static int filt_pipewritetouch(struct knote *kn, struct kevent_qos_s *kev);
210 static int filt_pipewriteprocess(struct knote *kn, struct kevent_qos_s *kev);
211
212 SECURITY_READ_ONLY_EARLY(struct filterops) pipe_nfiltops = {
213 .f_isfd = 1,
214 .f_detach = filt_pipedetach,
215 .f_event = filt_pipenotsup,
216 .f_touch = filt_pipenotsuptouch,
217 .f_process = filt_pipenotsupprocess,
218 };
219
220 SECURITY_READ_ONLY_EARLY(struct filterops) pipe_rfiltops = {
221 .f_isfd = 1,
222 .f_detach = filt_pipedetach,
223 .f_event = filt_piperead,
224 .f_touch = filt_pipereadtouch,
225 .f_process = filt_pipereadprocess,
226 };
227
228 SECURITY_READ_ONLY_EARLY(struct filterops) pipe_wfiltops = {
229 .f_isfd = 1,
230 .f_detach = filt_pipedetach,
231 .f_event = filt_pipewrite,
232 .f_touch = filt_pipewritetouch,
233 .f_process = filt_pipewriteprocess,
234 };
235
236 static int nbigpipe; /* for compatibility sake. no longer used */
237 static int amountpipes; /* total number of pipes in system */
238 static int amountpipekva; /* total memory used by pipes */
239
240 int maxpipekva __attribute__((used)) = PIPE_KVAMAX; /* allowing 16MB max. */
241
242 #if PIPE_SYSCTLS
243 SYSCTL_DECL(_kern_ipc);
244
245 SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RD | CTLFLAG_LOCKED,
246 &maxpipekva, 0, "Pipe KVA limit");
247 SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW | CTLFLAG_LOCKED,
248 &maxpipekvawired, 0, "Pipe KVA wired limit");
249 SYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD | CTLFLAG_LOCKED,
250 &amountpipes, 0, "Current # of pipes");
251 SYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD | CTLFLAG_LOCKED,
252 &nbigpipe, 0, "Current # of big pipes");
253 SYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD | CTLFLAG_LOCKED,
254 &amountpipekva, 0, "Pipe KVA usage");
255 SYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD | CTLFLAG_LOCKED,
256 &amountpipekvawired, 0, "Pipe wired KVA usage");
257 #endif
258
259 static int pipepair_alloc(struct pipe **rpipe, struct pipe **wpipe);
260 static void pipeclose(struct pipe *cpipe);
261 static void pipe_free_kmem(struct pipe *cpipe);
262 static int pipespace(struct pipe *cpipe, int size);
263 static int choose_pipespace(unsigned long current, unsigned long expected);
264 static int expand_pipespace(struct pipe *p, int target_size);
265 static void pipeselwakeup(struct pipe *cpipe, struct pipe *spipe);
266 static __inline int pipeio_lock(struct pipe *cpipe, int catch);
267 static __inline void pipeio_unlock(struct pipe *cpipe);
268
269 extern int postpipeevent(struct pipe *, int);
270 extern void evpipefree(struct pipe *cpipe);
271
272 static lck_grp_t *pipe_mtx_grp;
273 static lck_attr_t *pipe_mtx_attr;
274 static lck_grp_attr_t *pipe_mtx_grp_attr;
275
276 static zone_t pipe_zone;
277
278 #define MAX_PIPESIZE(pipe) ( MAX(PIPE_SIZE, (pipe)->pipe_buffer.size) )
279
280 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL);
281
282 /* initial setup done at time of sysinit */
283 void
284 pipeinit(void)
285 {
286 nbigpipe = 0;
287 vm_size_t zone_size;
288
289 zone_size = 8192 * sizeof(struct pipepair);
290 pipe_zone = zinit(sizeof(struct pipepair), zone_size, 4096, "pipe zone");
291
292
293 /* allocate lock group attribute and group for pipe mutexes */
294 pipe_mtx_grp_attr = lck_grp_attr_alloc_init();
295 pipe_mtx_grp = lck_grp_alloc_init("pipe", pipe_mtx_grp_attr);
296
297 /* allocate the lock attribute for pipe mutexes */
298 pipe_mtx_attr = lck_attr_alloc_init();
299 }
300
301 #ifndef CONFIG_EMBEDDED
302 /* Bitmap for things to touch in pipe_touch() */
303 #define PIPE_ATIME 0x00000001 /* time of last access */
304 #define PIPE_MTIME 0x00000002 /* time of last modification */
305 #define PIPE_CTIME 0x00000004 /* time of last status change */
306
307 static void
308 pipe_touch(struct pipe *tpipe, int touch)
309 {
310 struct timespec now;
311
312 nanotime(&now);
313
314 if (touch & PIPE_ATIME) {
315 tpipe->st_atimespec.tv_sec = now.tv_sec;
316 tpipe->st_atimespec.tv_nsec = now.tv_nsec;
317 }
318
319 if (touch & PIPE_MTIME) {
320 tpipe->st_mtimespec.tv_sec = now.tv_sec;
321 tpipe->st_mtimespec.tv_nsec = now.tv_nsec;
322 }
323
324 if (touch & PIPE_CTIME) {
325 tpipe->st_ctimespec.tv_sec = now.tv_sec;
326 tpipe->st_ctimespec.tv_nsec = now.tv_nsec;
327 }
328 }
329 #endif
330
331 static const unsigned int pipesize_blocks[] = {512, 1024, 2048, 4096, 4096 * 2, PIPE_SIZE, PIPE_SIZE * 4 };
332
333 /*
334 * finds the right size from possible sizes in pipesize_blocks
335 * returns the size which matches max(current,expected)
336 */
337 static int
338 choose_pipespace(unsigned long current, unsigned long expected)
339 {
340 int i = sizeof(pipesize_blocks) / sizeof(unsigned int) - 1;
341 unsigned long target;
342
343 /*
344 * assert that we always get an atomic transaction sized pipe buffer,
345 * even if the system pipe buffer high-water mark has been crossed.
346 */
347 assert(PIPE_BUF == pipesize_blocks[0]);
348
349 if (expected > current) {
350 target = expected;
351 } else {
352 target = current;
353 }
354
355 while (i > 0 && pipesize_blocks[i - 1] > target) {
356 i = i - 1;
357 }
358
359 return pipesize_blocks[i];
360 }
361
362
363 /*
364 * expand the size of pipe while there is data to be read,
365 * and then free the old buffer once the current buffered
366 * data has been transferred to new storage.
367 * Required: PIPE_LOCK and io lock to be held by caller.
368 * returns 0 on success or no expansion possible
369 */
370 static int
371 expand_pipespace(struct pipe *p, int target_size)
372 {
373 struct pipe tmp, oldpipe;
374 int error;
375 tmp.pipe_buffer.buffer = 0;
376
377 if (p->pipe_buffer.size >= (unsigned) target_size) {
378 return 0; /* the existing buffer is max size possible */
379 }
380
381 /* create enough space in the target */
382 error = pipespace(&tmp, target_size);
383 if (error != 0) {
384 return error;
385 }
386
387 oldpipe.pipe_buffer.buffer = p->pipe_buffer.buffer;
388 oldpipe.pipe_buffer.size = p->pipe_buffer.size;
389
390 memcpy(tmp.pipe_buffer.buffer, p->pipe_buffer.buffer, p->pipe_buffer.size);
391 if (p->pipe_buffer.cnt > 0 && p->pipe_buffer.in <= p->pipe_buffer.out) {
392 /* we are in State 3 and need extra copying for read to be consistent */
393 memcpy(&tmp.pipe_buffer.buffer[p->pipe_buffer.size], p->pipe_buffer.buffer, p->pipe_buffer.size);
394 p->pipe_buffer.in += p->pipe_buffer.size;
395 }
396
397 p->pipe_buffer.buffer = tmp.pipe_buffer.buffer;
398 p->pipe_buffer.size = tmp.pipe_buffer.size;
399
400
401 pipe_free_kmem(&oldpipe);
402 return 0;
403 }
404
405 /*
406 * The pipe system call for the DTYPE_PIPE type of pipes
407 *
408 * returns:
409 * FREAD | fd0 | -->[struct rpipe] --> |~~buffer~~| \
410 * (pipe_mutex)
411 * FWRITE | fd1 | -->[struct wpipe] --X /
412 */
413
414 /* ARGSUSED */
415 int
416 pipe(proc_t p, __unused struct pipe_args *uap, int32_t *retval)
417 {
418 struct fileproc *rf, *wf;
419 struct pipe *rpipe, *wpipe;
420 int error;
421
422 error = pipepair_alloc(&rpipe, &wpipe);
423 if (error) {
424 return error;
425 }
426
427 /*
428 * for now we'll create half-duplex pipes(refer returns section above).
429 * this is what we've always supported..
430 */
431
432 error = falloc(p, &rf, &retval[0], vfs_context_current());
433 if (error) {
434 goto freepipes;
435 }
436 rf->f_flag = FREAD;
437 rf->f_data = (caddr_t)rpipe;
438 rf->f_ops = &pipeops;
439
440 error = falloc(p, &wf, &retval[1], vfs_context_current());
441 if (error) {
442 fp_free(p, retval[0], rf);
443 goto freepipes;
444 }
445 wf->f_flag = FWRITE;
446 wf->f_data = (caddr_t)wpipe;
447 wf->f_ops = &pipeops;
448
449 rpipe->pipe_peer = wpipe;
450 wpipe->pipe_peer = rpipe;
451
452 #if CONFIG_MACF
453 /*
454 * XXXXXXXX SHOULD NOT HOLD FILE_LOCK() XXXXXXXXXXXX
455 *
456 * struct pipe represents a pipe endpoint. The MAC label is shared
457 * between the connected endpoints. As a result mac_pipe_label_init() and
458 * mac_pipe_label_associate() should only be called on one of the endpoints
459 * after they have been connected.
460 */
461 mac_pipe_label_init(rpipe);
462 mac_pipe_label_associate(kauth_cred_get(), rpipe);
463 wpipe->pipe_label = rpipe->pipe_label;
464 #endif
465 proc_fdlock_spin(p);
466 procfdtbl_releasefd(p, retval[0], NULL);
467 procfdtbl_releasefd(p, retval[1], NULL);
468 fp_drop(p, retval[0], rf, 1);
469 fp_drop(p, retval[1], wf, 1);
470 proc_fdunlock(p);
471 return 0;
472
473 freepipes:
474 pipeclose(rpipe);
475 pipeclose(wpipe);
476 return error;
477 }
478
479 int
480 pipe_stat(struct pipe *cpipe, void *ub, int isstat64)
481 {
482 #if CONFIG_MACF
483 int error;
484 #endif
485 int pipe_size = 0;
486 int pipe_count;
487 struct stat *sb = (struct stat *)0; /* warning avoidance ; protected by isstat64 */
488 struct stat64 * sb64 = (struct stat64 *)0; /* warning avoidance ; protected by isstat64 */
489
490 if (cpipe == NULL) {
491 return EBADF;
492 }
493 PIPE_LOCK(cpipe);
494
495 #if CONFIG_MACF
496 error = mac_pipe_check_stat(kauth_cred_get(), cpipe);
497 if (error) {
498 PIPE_UNLOCK(cpipe);
499 return error;
500 }
501 #endif
502 if (cpipe->pipe_buffer.buffer == 0) {
503 /* must be stat'ing the write fd */
504 if (cpipe->pipe_peer) {
505 /* the peer still exists, use it's info */
506 pipe_size = MAX_PIPESIZE(cpipe->pipe_peer);
507 pipe_count = cpipe->pipe_peer->pipe_buffer.cnt;
508 } else {
509 pipe_count = 0;
510 }
511 } else {
512 pipe_size = MAX_PIPESIZE(cpipe);
513 pipe_count = cpipe->pipe_buffer.cnt;
514 }
515 /*
516 * since peer's buffer is setup ouside of lock
517 * we might catch it in transient state
518 */
519 if (pipe_size == 0) {
520 pipe_size = MAX(PIPE_SIZE, pipesize_blocks[0]);
521 }
522
523 if (isstat64 != 0) {
524 sb64 = (struct stat64 *)ub;
525
526 bzero(sb64, sizeof(*sb64));
527 sb64->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
528 sb64->st_blksize = pipe_size;
529 sb64->st_size = pipe_count;
530 sb64->st_blocks = (sb64->st_size + sb64->st_blksize - 1) / sb64->st_blksize;
531
532 sb64->st_uid = kauth_getuid();
533 sb64->st_gid = kauth_getgid();
534
535 sb64->st_atimespec.tv_sec = cpipe->st_atimespec.tv_sec;
536 sb64->st_atimespec.tv_nsec = cpipe->st_atimespec.tv_nsec;
537
538 sb64->st_mtimespec.tv_sec = cpipe->st_mtimespec.tv_sec;
539 sb64->st_mtimespec.tv_nsec = cpipe->st_mtimespec.tv_nsec;
540
541 sb64->st_ctimespec.tv_sec = cpipe->st_ctimespec.tv_sec;
542 sb64->st_ctimespec.tv_nsec = cpipe->st_ctimespec.tv_nsec;
543
544 /*
545 * Return a relatively unique inode number based on the current
546 * address of this pipe's struct pipe. This number may be recycled
547 * relatively quickly.
548 */
549 sb64->st_ino = (ino64_t)VM_KERNEL_ADDRHASH((uintptr_t)cpipe);
550 } else {
551 sb = (struct stat *)ub;
552
553 bzero(sb, sizeof(*sb));
554 sb->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
555 sb->st_blksize = pipe_size;
556 sb->st_size = pipe_count;
557 sb->st_blocks = (sb->st_size + sb->st_blksize - 1) / sb->st_blksize;
558
559 sb->st_uid = kauth_getuid();
560 sb->st_gid = kauth_getgid();
561
562 sb->st_atimespec.tv_sec = cpipe->st_atimespec.tv_sec;
563 sb->st_atimespec.tv_nsec = cpipe->st_atimespec.tv_nsec;
564
565 sb->st_mtimespec.tv_sec = cpipe->st_mtimespec.tv_sec;
566 sb->st_mtimespec.tv_nsec = cpipe->st_mtimespec.tv_nsec;
567
568 sb->st_ctimespec.tv_sec = cpipe->st_ctimespec.tv_sec;
569 sb->st_ctimespec.tv_nsec = cpipe->st_ctimespec.tv_nsec;
570
571 /*
572 * Return a relatively unique inode number based on the current
573 * address of this pipe's struct pipe. This number may be recycled
574 * relatively quickly.
575 */
576 sb->st_ino = (ino_t)VM_KERNEL_ADDRHASH((uintptr_t)cpipe);
577 }
578 PIPE_UNLOCK(cpipe);
579
580 /*
581 * POSIX: Left as 0: st_dev, st_nlink, st_rdev, st_flags, st_gen,
582 * st_uid, st_gid.
583 *
584 * XXX (st_dev) should be unique, but there is no device driver that
585 * XXX is associated with pipes, since they are implemented via a
586 * XXX struct fileops indirection rather than as FS objects.
587 */
588 return 0;
589 }
590
591
592 /*
593 * Allocate kva for pipe circular buffer, the space is pageable
594 * This routine will 'realloc' the size of a pipe safely, if it fails
595 * it will retain the old buffer.
596 * If it fails it will return ENOMEM.
597 */
598 static int
599 pipespace(struct pipe *cpipe, int size)
600 {
601 vm_offset_t buffer;
602
603 if (size <= 0) {
604 return EINVAL;
605 }
606
607 if ((buffer = (vm_offset_t)kalloc(size)) == 0) {
608 return ENOMEM;
609 }
610
611 /* free old resources if we're resizing */
612 pipe_free_kmem(cpipe);
613 cpipe->pipe_buffer.buffer = (caddr_t)buffer;
614 cpipe->pipe_buffer.size = size;
615 cpipe->pipe_buffer.in = 0;
616 cpipe->pipe_buffer.out = 0;
617 cpipe->pipe_buffer.cnt = 0;
618
619 OSAddAtomic(1, &amountpipes);
620 OSAddAtomic(cpipe->pipe_buffer.size, &amountpipekva);
621
622 return 0;
623 }
624
625 /*
626 * initialize and allocate VM and memory for pipe
627 */
628 static int
629 pipepair_alloc(struct pipe **rp_out, struct pipe **wp_out)
630 {
631 struct pipepair *pp = zalloc(pipe_zone);
632 struct pipe *rpipe = &pp->pp_rpipe;
633 struct pipe *wpipe = &pp->pp_wpipe;
634
635 if (pp == NULL) {
636 return ENOMEM;
637 }
638
639 /*
640 * protect so pipespace or pipeclose don't follow a junk pointer
641 * if pipespace() fails.
642 */
643 bzero(pp, sizeof(struct pipepair));
644 lck_mtx_init(&pp->pp_mtx, pipe_mtx_grp, pipe_mtx_attr);
645
646 rpipe->pipe_mtxp = &pp->pp_mtx;
647 wpipe->pipe_mtxp = &pp->pp_mtx;
648
649 TAILQ_INIT(&rpipe->pipe_evlist);
650 TAILQ_INIT(&wpipe->pipe_evlist);
651
652 #ifndef CONFIG_EMBEDDED
653 /* Initial times are all the time of creation of the pipe */
654 pipe_touch(rpipe, PIPE_ATIME | PIPE_MTIME | PIPE_CTIME);
655 pipe_touch(wpipe, PIPE_ATIME | PIPE_MTIME | PIPE_CTIME);
656 #endif
657
658 /*
659 * allocate the space for the normal I/O direction up
660 * front... we'll delay the allocation for the other
661 * direction until a write actually occurs (most likely it won't)...
662 */
663 int error = pipespace(rpipe, choose_pipespace(rpipe->pipe_buffer.size, 0));
664 if (__improbable(error)) {
665 lck_mtx_destroy(&pp->pp_mtx, pipe_mtx_grp);
666 zfree(pipe_zone, pp);
667 return error;
668 }
669
670 *rp_out = rpipe;
671 *wp_out = wpipe;
672 return 0;
673 }
674
675 static void
676 pipepair_destroy_pipe(struct pipepair *pp, struct pipe *cpipe)
677 {
678 bool can_free;
679
680 pipe_free_kmem(cpipe);
681
682 lck_mtx_lock(&pp->pp_mtx);
683 if (__improbable(cpipe->pipe_state & PIPE_DEAD)) {
684 panic("double free of pipe %p in pair %p", cpipe, pp);
685 }
686
687 cpipe->pipe_state |= PIPE_DEAD;
688
689 can_free = (pp->pp_rpipe.pipe_state & PIPE_DEAD) &&
690 (pp->pp_wpipe.pipe_state & PIPE_DEAD);
691 lck_mtx_unlock(&pp->pp_mtx);
692
693 if (can_free) {
694 lck_mtx_destroy(&pp->pp_mtx, pipe_mtx_grp);
695 zfree(pipe_zone, pp);
696 }
697 }
698
699 /*
700 * lock a pipe for I/O, blocking other access
701 */
702 static inline int
703 pipeio_lock(struct pipe *cpipe, int catch)
704 {
705 int error;
706 while (cpipe->pipe_state & PIPE_LOCKFL) {
707 cpipe->pipe_state |= PIPE_LWANT;
708 error = msleep(cpipe, PIPE_MTX(cpipe), catch ? (PRIBIO | PCATCH) : PRIBIO,
709 "pipelk", 0);
710 if (error != 0) {
711 return error;
712 }
713 }
714 cpipe->pipe_state |= PIPE_LOCKFL;
715 return 0;
716 }
717
718 /*
719 * unlock a pipe I/O lock
720 */
721 static inline void
722 pipeio_unlock(struct pipe *cpipe)
723 {
724 cpipe->pipe_state &= ~PIPE_LOCKFL;
725 if (cpipe->pipe_state & PIPE_LWANT) {
726 cpipe->pipe_state &= ~PIPE_LWANT;
727 wakeup(cpipe);
728 }
729 }
730
731 /*
732 * wakeup anyone whos blocked in select
733 */
734 static void
735 pipeselwakeup(struct pipe *cpipe, struct pipe *spipe)
736 {
737 if (cpipe->pipe_state & PIPE_SEL) {
738 cpipe->pipe_state &= ~PIPE_SEL;
739 selwakeup(&cpipe->pipe_sel);
740 }
741
742 KNOTE(&cpipe->pipe_sel.si_note, 1);
743
744 postpipeevent(cpipe, EV_RWBYTES);
745
746 if (spipe && (spipe->pipe_state & PIPE_ASYNC) && spipe->pipe_pgid) {
747 if (spipe->pipe_pgid < 0) {
748 gsignal(-spipe->pipe_pgid, SIGIO);
749 } else {
750 proc_signal(spipe->pipe_pgid, SIGIO);
751 }
752 }
753 }
754
755 /*
756 * Read n bytes from the buffer. Semantics are similar to file read.
757 * returns: number of bytes read from the buffer
758 */
759 /* ARGSUSED */
760 static int
761 pipe_read(struct fileproc *fp, struct uio *uio, __unused int flags,
762 __unused vfs_context_t ctx)
763 {
764 struct pipe *rpipe = (struct pipe *)fp->f_data;
765 int error;
766 int nread = 0;
767 u_int size;
768
769 PIPE_LOCK(rpipe);
770 ++rpipe->pipe_busy;
771
772 error = pipeio_lock(rpipe, 1);
773 if (error) {
774 goto unlocked_error;
775 }
776
777 #if CONFIG_MACF
778 error = mac_pipe_check_read(kauth_cred_get(), rpipe);
779 if (error) {
780 goto locked_error;
781 }
782 #endif
783
784
785 while (uio_resid(uio)) {
786 /*
787 * normal pipe buffer receive
788 */
789 if (rpipe->pipe_buffer.cnt > 0) {
790 /*
791 * # bytes to read is min( bytes from read pointer until end of buffer,
792 * total unread bytes,
793 * user requested byte count)
794 */
795 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
796 if (size > rpipe->pipe_buffer.cnt) {
797 size = rpipe->pipe_buffer.cnt;
798 }
799 // LP64todo - fix this!
800 if (size > (u_int) uio_resid(uio)) {
801 size = (u_int) uio_resid(uio);
802 }
803
804 PIPE_UNLOCK(rpipe); /* we still hold io lock.*/
805 error = uiomove(
806 &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
807 size, uio);
808 PIPE_LOCK(rpipe);
809 if (error) {
810 break;
811 }
812
813 rpipe->pipe_buffer.out += size;
814 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) {
815 rpipe->pipe_buffer.out = 0;
816 }
817
818 rpipe->pipe_buffer.cnt -= size;
819
820 /*
821 * If there is no more to read in the pipe, reset
822 * its pointers to the beginning. This improves
823 * cache hit stats.
824 */
825 if (rpipe->pipe_buffer.cnt == 0) {
826 rpipe->pipe_buffer.in = 0;
827 rpipe->pipe_buffer.out = 0;
828 }
829 nread += size;
830 } else {
831 /*
832 * detect EOF condition
833 * read returns 0 on EOF, no need to set error
834 */
835 if ((rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) ||
836 (fileproc_get_vflags(fp) & FPV_DRAIN)) {
837 break;
838 }
839
840 /*
841 * If the "write-side" has been blocked, wake it up now.
842 */
843 if (rpipe->pipe_state & PIPE_WANTW) {
844 rpipe->pipe_state &= ~PIPE_WANTW;
845 wakeup(rpipe);
846 }
847
848 /*
849 * Break if some data was read in previous iteration.
850 */
851 if (nread > 0) {
852 break;
853 }
854
855 /*
856 * Unlock the pipe buffer for our remaining processing.
857 * We will either break out with an error or we will
858 * sleep and relock to loop.
859 */
860 pipeio_unlock(rpipe);
861
862 /*
863 * Handle non-blocking mode operation or
864 * wait for more data.
865 */
866 if (fp->f_flag & FNONBLOCK) {
867 error = EAGAIN;
868 } else {
869 rpipe->pipe_state |= PIPE_WANTR;
870 error = msleep(rpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, "piperd", 0);
871 if (error == 0) {
872 error = pipeio_lock(rpipe, 1);
873 }
874 }
875 if (error) {
876 goto unlocked_error;
877 }
878 }
879 }
880 #if CONFIG_MACF
881 locked_error:
882 #endif
883 pipeio_unlock(rpipe);
884
885 unlocked_error:
886 --rpipe->pipe_busy;
887
888 /*
889 * PIPE_WANT processing only makes sense if pipe_busy is 0.
890 */
891 if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) {
892 rpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTW);
893 wakeup(rpipe);
894 } else if (rpipe->pipe_buffer.cnt < rpipe->pipe_buffer.size) {
895 /*
896 * Handle write blocking hysteresis.
897 */
898 if (rpipe->pipe_state & PIPE_WANTW) {
899 rpipe->pipe_state &= ~PIPE_WANTW;
900 wakeup(rpipe);
901 }
902 }
903
904 if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) > 0) {
905 pipeselwakeup(rpipe, rpipe->pipe_peer);
906 }
907
908 #ifndef CONFIG_EMBEDDED
909 /* update last read time */
910 pipe_touch(rpipe, PIPE_ATIME);
911 #endif
912
913 PIPE_UNLOCK(rpipe);
914
915 return error;
916 }
917
918 /*
919 * perform a write of n bytes into the read side of buffer. Since
920 * pipes are unidirectional a write is meant to be read by the otherside only.
921 */
922 static int
923 pipe_write(struct fileproc *fp, struct uio *uio, __unused int flags,
924 __unused vfs_context_t ctx)
925 {
926 int error = 0;
927 int orig_resid;
928 int pipe_size;
929 struct pipe *wpipe, *rpipe;
930 // LP64todo - fix this!
931 orig_resid = uio_resid(uio);
932 int space;
933
934 rpipe = (struct pipe *)fp->f_data;
935
936 PIPE_LOCK(rpipe);
937 wpipe = rpipe->pipe_peer;
938
939 /*
940 * detect loss of pipe read side, issue SIGPIPE if lost.
941 */
942 if (wpipe == NULL || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) ||
943 (fileproc_get_vflags(fp) & FPV_DRAIN)) {
944 PIPE_UNLOCK(rpipe);
945 return EPIPE;
946 }
947 #if CONFIG_MACF
948 error = mac_pipe_check_write(kauth_cred_get(), wpipe);
949 if (error) {
950 PIPE_UNLOCK(rpipe);
951 return error;
952 }
953 #endif
954 ++wpipe->pipe_busy;
955
956 pipe_size = 0;
957
958 /*
959 * need to allocate some storage... we delay the allocation
960 * until the first write on fd[0] to avoid allocating storage for both
961 * 'pipe ends'... most pipes are half-duplex with the writes targeting
962 * fd[1], so allocating space for both ends is a waste...
963 */
964
965 if (wpipe->pipe_buffer.buffer == 0 || (
966 (unsigned)orig_resid > wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt &&
967 amountpipekva < maxpipekva)) {
968 pipe_size = choose_pipespace(wpipe->pipe_buffer.size, wpipe->pipe_buffer.cnt + orig_resid);
969 }
970 if (pipe_size) {
971 /*
972 * need to do initial allocation or resizing of pipe
973 * holding both structure and io locks.
974 */
975 if ((error = pipeio_lock(wpipe, 1)) == 0) {
976 if (wpipe->pipe_buffer.cnt == 0) {
977 error = pipespace(wpipe, pipe_size);
978 } else {
979 error = expand_pipespace(wpipe, pipe_size);
980 }
981
982 pipeio_unlock(wpipe);
983
984 /* allocation failed */
985 if (wpipe->pipe_buffer.buffer == 0) {
986 error = ENOMEM;
987 }
988 }
989 if (error) {
990 /*
991 * If an error occurred unbusy and return, waking up any pending
992 * readers.
993 */
994 --wpipe->pipe_busy;
995 if ((wpipe->pipe_busy == 0) &&
996 (wpipe->pipe_state & PIPE_WANT)) {
997 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
998 wakeup(wpipe);
999 }
1000 PIPE_UNLOCK(rpipe);
1001 return error;
1002 }
1003 }
1004
1005 while (uio_resid(uio)) {
1006 retrywrite:
1007 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
1008
1009 /* Writes of size <= PIPE_BUF must be atomic. */
1010 if ((space < uio_resid(uio)) && (orig_resid <= PIPE_BUF)) {
1011 space = 0;
1012 }
1013
1014 if (space > 0) {
1015 if ((error = pipeio_lock(wpipe, 1)) == 0) {
1016 int size; /* Transfer size */
1017 int segsize; /* first segment to transfer */
1018
1019 if ((wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) ||
1020 (fileproc_get_vflags(fp) & FPV_DRAIN)) {
1021 pipeio_unlock(wpipe);
1022 error = EPIPE;
1023 break;
1024 }
1025 /*
1026 * If a process blocked in pipeio_lock, our
1027 * value for space might be bad... the mutex
1028 * is dropped while we're blocked
1029 */
1030 if (space > (int)(wpipe->pipe_buffer.size -
1031 wpipe->pipe_buffer.cnt)) {
1032 pipeio_unlock(wpipe);
1033 goto retrywrite;
1034 }
1035
1036 /*
1037 * Transfer size is minimum of uio transfer
1038 * and free space in pipe buffer.
1039 */
1040 // LP64todo - fix this!
1041 if (space > uio_resid(uio)) {
1042 size = uio_resid(uio);
1043 } else {
1044 size = space;
1045 }
1046 /*
1047 * First segment to transfer is minimum of
1048 * transfer size and contiguous space in
1049 * pipe buffer. If first segment to transfer
1050 * is less than the transfer size, we've got
1051 * a wraparound in the buffer.
1052 */
1053 segsize = wpipe->pipe_buffer.size -
1054 wpipe->pipe_buffer.in;
1055 if (segsize > size) {
1056 segsize = size;
1057 }
1058
1059 /* Transfer first segment */
1060
1061 PIPE_UNLOCK(rpipe);
1062 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
1063 segsize, uio);
1064 PIPE_LOCK(rpipe);
1065
1066 if (error == 0 && segsize < size) {
1067 /*
1068 * Transfer remaining part now, to
1069 * support atomic writes. Wraparound
1070 * happened. (State 3)
1071 */
1072 if (wpipe->pipe_buffer.in + segsize !=
1073 wpipe->pipe_buffer.size) {
1074 panic("Expected pipe buffer "
1075 "wraparound disappeared");
1076 }
1077
1078 PIPE_UNLOCK(rpipe);
1079 error = uiomove(
1080 &wpipe->pipe_buffer.buffer[0],
1081 size - segsize, uio);
1082 PIPE_LOCK(rpipe);
1083 }
1084 /*
1085 * readers never know to read until count is updated.
1086 */
1087 if (error == 0) {
1088 wpipe->pipe_buffer.in += size;
1089 if (wpipe->pipe_buffer.in >
1090 wpipe->pipe_buffer.size) {
1091 if (wpipe->pipe_buffer.in !=
1092 size - segsize +
1093 wpipe->pipe_buffer.size) {
1094 panic("Expected "
1095 "wraparound bad");
1096 }
1097 wpipe->pipe_buffer.in = size -
1098 segsize;
1099 }
1100
1101 wpipe->pipe_buffer.cnt += size;
1102 if (wpipe->pipe_buffer.cnt >
1103 wpipe->pipe_buffer.size) {
1104 panic("Pipe buffer overflow");
1105 }
1106 }
1107 pipeio_unlock(wpipe);
1108 }
1109 if (error) {
1110 break;
1111 }
1112 } else {
1113 /*
1114 * If the "read-side" has been blocked, wake it up now.
1115 */
1116 if (wpipe->pipe_state & PIPE_WANTR) {
1117 wpipe->pipe_state &= ~PIPE_WANTR;
1118 wakeup(wpipe);
1119 }
1120
1121 /*
1122 * If read side wants to go away, we just issue a signal
1123 * to ourselves.
1124 */
1125 if ((wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) ||
1126 (fileproc_get_vflags(fp) & FPV_DRAIN)) {
1127 error = EPIPE;
1128 break;
1129 }
1130
1131 /*
1132 * don't block on non-blocking I/O
1133 * we'll do the pipeselwakeup on the way out
1134 */
1135 if (fp->f_flag & FNONBLOCK) {
1136 error = EAGAIN;
1137 break;
1138 }
1139
1140 /*
1141 * We have no more space and have something to offer,
1142 * wake up select/poll.
1143 */
1144 pipeselwakeup(wpipe, wpipe);
1145
1146 wpipe->pipe_state |= PIPE_WANTW;
1147
1148 error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, "pipewr", 0);
1149
1150 if (error != 0) {
1151 break;
1152 }
1153 }
1154 }
1155 --wpipe->pipe_busy;
1156
1157 if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) {
1158 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
1159 wakeup(wpipe);
1160 }
1161 if (wpipe->pipe_buffer.cnt > 0) {
1162 /*
1163 * If there are any characters in the buffer, we wake up
1164 * the reader if it was blocked waiting for data.
1165 */
1166 if (wpipe->pipe_state & PIPE_WANTR) {
1167 wpipe->pipe_state &= ~PIPE_WANTR;
1168 wakeup(wpipe);
1169 }
1170 /*
1171 * wake up thread blocked in select/poll or post the notification
1172 */
1173 pipeselwakeup(wpipe, wpipe);
1174 }
1175
1176 #ifndef CONFIG_EMBEDDED
1177 /* Update modification, status change (# of bytes in pipe) times */
1178 pipe_touch(rpipe, PIPE_MTIME | PIPE_CTIME);
1179 pipe_touch(wpipe, PIPE_MTIME | PIPE_CTIME);
1180 #endif
1181 PIPE_UNLOCK(rpipe);
1182
1183 return error;
1184 }
1185
1186 /*
1187 * we implement a very minimal set of ioctls for compatibility with sockets.
1188 */
1189 /* ARGSUSED 3 */
1190 static int
1191 pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data,
1192 __unused vfs_context_t ctx)
1193 {
1194 struct pipe *mpipe = (struct pipe *)fp->f_data;
1195 #if CONFIG_MACF
1196 int error;
1197 #endif
1198
1199 PIPE_LOCK(mpipe);
1200
1201 #if CONFIG_MACF
1202 error = mac_pipe_check_ioctl(kauth_cred_get(), mpipe, cmd);
1203 if (error) {
1204 PIPE_UNLOCK(mpipe);
1205
1206 return error;
1207 }
1208 #endif
1209
1210 switch (cmd) {
1211 case FIONBIO:
1212 PIPE_UNLOCK(mpipe);
1213 return 0;
1214
1215 case FIOASYNC:
1216 if (*(int *)data) {
1217 mpipe->pipe_state |= PIPE_ASYNC;
1218 } else {
1219 mpipe->pipe_state &= ~PIPE_ASYNC;
1220 }
1221 PIPE_UNLOCK(mpipe);
1222 return 0;
1223
1224 case FIONREAD:
1225 *(int *)data = mpipe->pipe_buffer.cnt;
1226 PIPE_UNLOCK(mpipe);
1227 return 0;
1228
1229 case TIOCSPGRP:
1230 mpipe->pipe_pgid = *(int *)data;
1231
1232 PIPE_UNLOCK(mpipe);
1233 return 0;
1234
1235 case TIOCGPGRP:
1236 *(int *)data = mpipe->pipe_pgid;
1237
1238 PIPE_UNLOCK(mpipe);
1239 return 0;
1240 }
1241 PIPE_UNLOCK(mpipe);
1242 return ENOTTY;
1243 }
1244
1245
1246 static int
1247 pipe_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
1248 {
1249 struct pipe *rpipe = (struct pipe *)fp->f_data;
1250 struct pipe *wpipe;
1251 int retnum = 0;
1252
1253 if (rpipe == NULL || rpipe == (struct pipe *)-1) {
1254 return retnum;
1255 }
1256
1257 PIPE_LOCK(rpipe);
1258
1259 wpipe = rpipe->pipe_peer;
1260
1261
1262 #if CONFIG_MACF
1263 /*
1264 * XXX We should use a per thread credential here; minimally, the
1265 * XXX process credential should have a persistent reference on it
1266 * XXX before being passed in here.
1267 */
1268 if (mac_pipe_check_select(vfs_context_ucred(ctx), rpipe, which)) {
1269 PIPE_UNLOCK(rpipe);
1270 return 0;
1271 }
1272 #endif
1273 switch (which) {
1274 case FREAD:
1275 if ((rpipe->pipe_state & PIPE_DIRECTW) ||
1276 (rpipe->pipe_buffer.cnt > 0) ||
1277 (rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) ||
1278 (fileproc_get_vflags(fp) & FPV_DRAIN)) {
1279 retnum = 1;
1280 } else {
1281 rpipe->pipe_state |= PIPE_SEL;
1282 selrecord(vfs_context_proc(ctx), &rpipe->pipe_sel, wql);
1283 }
1284 break;
1285
1286 case FWRITE:
1287 if (wpipe) {
1288 wpipe->pipe_state |= PIPE_WSELECT;
1289 }
1290 if (wpipe == NULL || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) ||
1291 (fileproc_get_vflags(fp) & FPV_DRAIN) ||
1292 (((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
1293 (MAX_PIPESIZE(wpipe) - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) {
1294 retnum = 1;
1295 } else {
1296 wpipe->pipe_state |= PIPE_SEL;
1297 selrecord(vfs_context_proc(ctx), &wpipe->pipe_sel, wql);
1298 }
1299 break;
1300 case 0:
1301 rpipe->pipe_state |= PIPE_SEL;
1302 selrecord(vfs_context_proc(ctx), &rpipe->pipe_sel, wql);
1303 break;
1304 }
1305 PIPE_UNLOCK(rpipe);
1306
1307 return retnum;
1308 }
1309
1310
1311 /* ARGSUSED 1 */
1312 static int
1313 pipe_close(struct fileglob *fg, __unused vfs_context_t ctx)
1314 {
1315 struct pipe *cpipe;
1316
1317 proc_fdlock_spin(vfs_context_proc(ctx));
1318 cpipe = (struct pipe *)fg->fg_data;
1319 fg->fg_data = NULL;
1320 proc_fdunlock(vfs_context_proc(ctx));
1321 if (cpipe) {
1322 pipeclose(cpipe);
1323 }
1324
1325 return 0;
1326 }
1327
1328 static void
1329 pipe_free_kmem(struct pipe *cpipe)
1330 {
1331 if (cpipe->pipe_buffer.buffer != NULL) {
1332 OSAddAtomic(-(cpipe->pipe_buffer.size), &amountpipekva);
1333 OSAddAtomic(-1, &amountpipes);
1334 kfree(cpipe->pipe_buffer.buffer,
1335 cpipe->pipe_buffer.size);
1336 cpipe->pipe_buffer.buffer = NULL;
1337 cpipe->pipe_buffer.size = 0;
1338 }
1339 }
1340
1341 /*
1342 * shutdown the pipe
1343 */
1344 static void
1345 pipeclose(struct pipe *cpipe)
1346 {
1347 struct pipe *ppipe;
1348
1349 PIPE_LOCK(cpipe);
1350
1351 /*
1352 * If the other side is blocked, wake it up saying that
1353 * we want to close it down.
1354 */
1355 cpipe->pipe_state &= ~PIPE_DRAIN;
1356 cpipe->pipe_state |= PIPE_EOF;
1357 pipeselwakeup(cpipe, cpipe);
1358
1359 while (cpipe->pipe_busy) {
1360 cpipe->pipe_state |= PIPE_WANT;
1361
1362 wakeup(cpipe);
1363 msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0);
1364 }
1365
1366 #if CONFIG_MACF
1367 /*
1368 * Free the shared pipe label only after the two ends are disconnected.
1369 */
1370 if (cpipe->pipe_label != NULL && cpipe->pipe_peer == NULL) {
1371 mac_pipe_label_destroy(cpipe);
1372 }
1373 #endif
1374
1375 /*
1376 * Disconnect from peer
1377 */
1378 if ((ppipe = cpipe->pipe_peer) != NULL) {
1379 ppipe->pipe_state &= ~(PIPE_DRAIN);
1380 ppipe->pipe_state |= PIPE_EOF;
1381
1382 pipeselwakeup(ppipe, ppipe);
1383 wakeup(ppipe);
1384
1385 KNOTE(&ppipe->pipe_sel.si_note, 1);
1386
1387 postpipeevent(ppipe, EV_RCLOSED);
1388
1389 ppipe->pipe_peer = NULL;
1390 }
1391 evpipefree(cpipe);
1392
1393 /*
1394 * free resources
1395 */
1396
1397 PIPE_UNLOCK(cpipe);
1398
1399 pipepair_destroy_pipe(PIPE_PAIR(cpipe), cpipe);
1400 }
1401
1402 static int64_t
1403 filt_pipelowwat(struct knote *kn, struct pipe *rpipe, int64_t def_lowwat)
1404 {
1405 if ((kn->kn_sfflags & NOTE_LOWAT) == 0) {
1406 return def_lowwat;
1407 }
1408 if (rpipe->pipe_buffer.size && kn->kn_sdata > MAX_PIPESIZE(rpipe)) {
1409 return MAX_PIPESIZE(rpipe);
1410 }
1411 return MAX(kn->kn_sdata, def_lowwat);
1412 }
1413
1414 static int
1415 filt_pipe_draincommon(struct knote *kn, struct pipe *rpipe)
1416 {
1417 struct pipe *wpipe = rpipe->pipe_peer;
1418
1419 if ((rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) ||
1420 (wpipe == NULL) || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
1421 kn->kn_flags |= EV_EOF;
1422 return 1;
1423 }
1424
1425 return 0;
1426 }
1427
1428 static int
1429 filt_pipenotsup(struct knote *kn, long hint)
1430 {
1431 #pragma unused(hint)
1432 struct pipe *rpipe = kn->kn_hook;
1433
1434 return filt_pipe_draincommon(kn, rpipe);
1435 }
1436
1437 static int
1438 filt_pipenotsuptouch(struct knote *kn, struct kevent_qos_s *kev)
1439 {
1440 struct pipe *rpipe = kn->kn_hook;
1441 int res;
1442
1443 PIPE_LOCK(rpipe);
1444
1445 /* accept new kevent data (and save off lowat threshold and flag) */
1446 kn->kn_sfflags = kev->fflags;
1447 kn->kn_sdata = kev->data;
1448
1449 /* determine if any event is now deemed fired */
1450 res = filt_pipe_draincommon(kn, rpipe);
1451
1452 PIPE_UNLOCK(rpipe);
1453
1454 return res;
1455 }
1456
1457 static int
1458 filt_pipenotsupprocess(struct knote *kn, struct kevent_qos_s *kev)
1459 {
1460 struct pipe *rpipe = kn->kn_hook;
1461 int res;
1462
1463 PIPE_LOCK(rpipe);
1464 res = filt_pipe_draincommon(kn, rpipe);
1465 if (res) {
1466 knote_fill_kevent(kn, kev, 0);
1467 }
1468 PIPE_UNLOCK(rpipe);
1469
1470 return res;
1471 }
1472
1473 /*ARGSUSED*/
1474 static int
1475 filt_piperead_common(struct knote *kn, struct kevent_qos_s *kev, struct pipe *rpipe)
1476 {
1477 int64_t data = rpipe->pipe_buffer.cnt;
1478 int res = 0;
1479
1480 if (filt_pipe_draincommon(kn, rpipe)) {
1481 res = 1;
1482 } else {
1483 res = data >= filt_pipelowwat(kn, rpipe, 1);
1484 }
1485 if (res && kev) {
1486 knote_fill_kevent(kn, kev, data);
1487 }
1488 return res;
1489 }
1490
1491 static int
1492 filt_piperead(struct knote *kn, long hint)
1493 {
1494 #pragma unused(hint)
1495 struct pipe *rpipe = kn->kn_hook;
1496
1497 return filt_piperead_common(kn, NULL, rpipe);
1498 }
1499
1500 static int
1501 filt_pipereadtouch(struct knote *kn, struct kevent_qos_s *kev)
1502 {
1503 struct pipe *rpipe = kn->kn_hook;
1504 int retval;
1505
1506 PIPE_LOCK(rpipe);
1507
1508 /* accept new inputs (and save the low water threshold and flag) */
1509 kn->kn_sdata = kev->data;
1510 kn->kn_sfflags = kev->fflags;
1511
1512 /* identify if any events are now fired */
1513 retval = filt_piperead_common(kn, NULL, rpipe);
1514
1515 PIPE_UNLOCK(rpipe);
1516
1517 return retval;
1518 }
1519
1520 static int
1521 filt_pipereadprocess(struct knote *kn, struct kevent_qos_s *kev)
1522 {
1523 struct pipe *rpipe = kn->kn_hook;
1524 int retval;
1525
1526 PIPE_LOCK(rpipe);
1527 retval = filt_piperead_common(kn, kev, rpipe);
1528 PIPE_UNLOCK(rpipe);
1529
1530 return retval;
1531 }
1532
1533 /*ARGSUSED*/
1534 static int
1535 filt_pipewrite_common(struct knote *kn, struct kevent_qos_s *kev, struct pipe *rpipe)
1536 {
1537 int64_t data = 0;
1538 int res = 0;
1539
1540 if (filt_pipe_draincommon(kn, rpipe)) {
1541 res = 1;
1542 } else {
1543 data = MAX_PIPESIZE(rpipe) - rpipe->pipe_buffer.cnt;
1544 res = data >= filt_pipelowwat(kn, rpipe, PIPE_BUF);
1545 }
1546 if (res && kev) {
1547 knote_fill_kevent(kn, kev, data);
1548 }
1549 return res;
1550 }
1551
1552 /*ARGSUSED*/
1553 static int
1554 filt_pipewrite(struct knote *kn, long hint)
1555 {
1556 #pragma unused(hint)
1557 struct pipe *rpipe = kn->kn_hook;
1558
1559 return filt_pipewrite_common(kn, NULL, rpipe);
1560 }
1561
1562
1563 static int
1564 filt_pipewritetouch(struct knote *kn, struct kevent_qos_s *kev)
1565 {
1566 struct pipe *rpipe = kn->kn_hook;
1567 int res;
1568
1569 PIPE_LOCK(rpipe);
1570
1571 /* accept new kevent data (and save off lowat threshold and flag) */
1572 kn->kn_sfflags = kev->fflags;
1573 kn->kn_sdata = kev->data;
1574
1575 /* determine if any event is now deemed fired */
1576 res = filt_pipewrite_common(kn, NULL, rpipe);
1577
1578 PIPE_UNLOCK(rpipe);
1579
1580 return res;
1581 }
1582
1583 static int
1584 filt_pipewriteprocess(struct knote *kn, struct kevent_qos_s *kev)
1585 {
1586 struct pipe *rpipe = kn->kn_hook;
1587 int res;
1588
1589 PIPE_LOCK(rpipe);
1590 res = filt_pipewrite_common(kn, kev, rpipe);
1591 PIPE_UNLOCK(rpipe);
1592
1593 return res;
1594 }
1595
1596 /*ARGSUSED*/
1597 static int
1598 pipe_kqfilter(struct fileproc *fp, struct knote *kn,
1599 __unused struct kevent_qos_s *kev)
1600 {
1601 struct pipe *cpipe = (struct pipe *)fp->f_data;
1602 struct pipe *rpipe = &PIPE_PAIR(cpipe)->pp_rpipe;
1603 int res;
1604
1605 PIPE_LOCK(cpipe);
1606 #if CONFIG_MACF
1607 /*
1608 * XXX We should use a per thread credential here; minimally, the
1609 * XXX process credential should have a persistent reference on it
1610 * XXX before being passed in here.
1611 */
1612 kauth_cred_t cred = vfs_context_ucred(vfs_context_current());
1613 if (mac_pipe_check_kqfilter(cred, kn, cpipe) != 0) {
1614 PIPE_UNLOCK(cpipe);
1615 knote_set_error(kn, EPERM);
1616 return 0;
1617 }
1618 #endif
1619
1620 /*
1621 * FreeBSD will fail the attach with EPIPE if the peer pipe is detached,
1622 * however, this isn't a programming error as the other side closing
1623 * could race with the kevent registration.
1624 *
1625 * Attach should only fail for programming mistakes else it will break
1626 * libdispatch.
1627 *
1628 * Like FreeBSD, have a "Neutered" filter that will not fire until
1629 * the pipe dies if the wrong filter is attached to the wrong end.
1630 *
1631 * Knotes are always attached to the "rpipe".
1632 */
1633 switch (kn->kn_filter) {
1634 case EVFILT_READ:
1635 if (fp->f_flag & FREAD) {
1636 kn->kn_filtid = EVFILTID_PIPE_R;
1637 res = filt_piperead_common(kn, NULL, rpipe);
1638 } else {
1639 kn->kn_filtid = EVFILTID_PIPE_N;
1640 res = filt_pipe_draincommon(kn, rpipe);
1641 }
1642 break;
1643
1644 case EVFILT_WRITE:
1645 if (fp->f_flag & FWRITE) {
1646 kn->kn_filtid = EVFILTID_PIPE_W;
1647 res = filt_pipewrite_common(kn, NULL, rpipe);
1648 } else {
1649 kn->kn_filtid = EVFILTID_PIPE_N;
1650 res = filt_pipe_draincommon(kn, rpipe);
1651 }
1652 break;
1653
1654 default:
1655 PIPE_UNLOCK(cpipe);
1656 knote_set_error(kn, EINVAL);
1657 return 0;
1658 }
1659
1660 kn->kn_hook = rpipe;
1661 KNOTE_ATTACH(&rpipe->pipe_sel.si_note, kn);
1662
1663 PIPE_UNLOCK(cpipe);
1664 return res;
1665 }
1666
1667 static void
1668 filt_pipedetach(struct knote *kn)
1669 {
1670 struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data;
1671 struct pipe *rpipe = &PIPE_PAIR(cpipe)->pp_rpipe;
1672
1673 PIPE_LOCK(cpipe);
1674 KNOTE_DETACH(&rpipe->pipe_sel.si_note, kn);
1675 PIPE_UNLOCK(cpipe);
1676 }
1677
1678 int
1679 fill_pipeinfo(struct pipe * cpipe, struct pipe_info * pinfo)
1680 {
1681 #if CONFIG_MACF
1682 int error;
1683 #endif
1684 struct timespec now;
1685 struct vinfo_stat * ub;
1686 int pipe_size = 0;
1687 int pipe_count;
1688
1689 if (cpipe == NULL) {
1690 return EBADF;
1691 }
1692 PIPE_LOCK(cpipe);
1693
1694 #if CONFIG_MACF
1695 error = mac_pipe_check_stat(kauth_cred_get(), cpipe);
1696 if (error) {
1697 PIPE_UNLOCK(cpipe);
1698 return error;
1699 }
1700 #endif
1701 if (cpipe->pipe_buffer.buffer == 0) {
1702 /*
1703 * must be stat'ing the write fd
1704 */
1705 if (cpipe->pipe_peer) {
1706 /*
1707 * the peer still exists, use it's info
1708 */
1709 pipe_size = MAX_PIPESIZE(cpipe->pipe_peer);
1710 pipe_count = cpipe->pipe_peer->pipe_buffer.cnt;
1711 } else {
1712 pipe_count = 0;
1713 }
1714 } else {
1715 pipe_size = MAX_PIPESIZE(cpipe);
1716 pipe_count = cpipe->pipe_buffer.cnt;
1717 }
1718 /*
1719 * since peer's buffer is setup ouside of lock
1720 * we might catch it in transient state
1721 */
1722 if (pipe_size == 0) {
1723 pipe_size = PIPE_SIZE;
1724 }
1725
1726 ub = &pinfo->pipe_stat;
1727
1728 bzero(ub, sizeof(*ub));
1729 ub->vst_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
1730 ub->vst_blksize = pipe_size;
1731 ub->vst_size = pipe_count;
1732 if (ub->vst_blksize != 0) {
1733 ub->vst_blocks = (ub->vst_size + ub->vst_blksize - 1) / ub->vst_blksize;
1734 }
1735 ub->vst_nlink = 1;
1736
1737 ub->vst_uid = kauth_getuid();
1738 ub->vst_gid = kauth_getgid();
1739
1740 nanotime(&now);
1741 ub->vst_atime = now.tv_sec;
1742 ub->vst_atimensec = now.tv_nsec;
1743
1744 ub->vst_mtime = now.tv_sec;
1745 ub->vst_mtimensec = now.tv_nsec;
1746
1747 ub->vst_ctime = now.tv_sec;
1748 ub->vst_ctimensec = now.tv_nsec;
1749
1750 /*
1751 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen, st_uid, st_gid.
1752 * XXX (st_dev, st_ino) should be unique.
1753 */
1754
1755 pinfo->pipe_handle = (uint64_t)VM_KERNEL_ADDRHASH((uintptr_t)cpipe);
1756 pinfo->pipe_peerhandle = (uint64_t)VM_KERNEL_ADDRHASH((uintptr_t)(cpipe->pipe_peer));
1757 pinfo->pipe_status = cpipe->pipe_state;
1758
1759 PIPE_UNLOCK(cpipe);
1760
1761 return 0;
1762 }
1763
1764
1765 static int
1766 pipe_drain(struct fileproc *fp, __unused vfs_context_t ctx)
1767 {
1768 /* Note: fdlock already held */
1769 struct pipe *ppipe, *cpipe = (struct pipe *)(fp->f_fglob->fg_data);
1770 boolean_t drain_pipe = FALSE;
1771
1772 /* Check if the pipe is going away */
1773 lck_mtx_lock_spin(&fp->f_fglob->fg_lock);
1774 if (fp->f_fglob->fg_count == 1) {
1775 drain_pipe = TRUE;
1776 }
1777 lck_mtx_unlock(&fp->f_fglob->fg_lock);
1778
1779 if (cpipe) {
1780 PIPE_LOCK(cpipe);
1781
1782 if (drain_pipe) {
1783 cpipe->pipe_state |= PIPE_DRAIN;
1784 cpipe->pipe_state &= ~(PIPE_WANTR | PIPE_WANTW);
1785 }
1786 wakeup(cpipe);
1787
1788 /* Must wake up peer: a writer sleeps on the read side */
1789 if ((ppipe = cpipe->pipe_peer)) {
1790 if (drain_pipe) {
1791 ppipe->pipe_state |= PIPE_DRAIN;
1792 ppipe->pipe_state &= ~(PIPE_WANTR | PIPE_WANTW);
1793 }
1794 wakeup(ppipe);
1795 }
1796
1797 PIPE_UNLOCK(cpipe);
1798 return 0;
1799 }
1800
1801 return 1;
1802 }