]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/sys_pipe.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / kern / sys_pipe.c
1 /*
2 * Copyright (c) 1996 John S. Dyson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice immediately at the beginning of the file, without modification,
10 * this list of conditions, and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. Absolutely no warranty of function or purpose is made by the author
15 * John S. Dyson.
16 * 4. Modifications may be freely made to this file if the above conditions
17 * are met.
18 */
19 /*
20 * Copyright (c) 2003-2020 Apple Inc. All rights reserved.
21 *
22 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
23 *
24 * This file contains Original Code and/or Modifications of Original Code
25 * as defined in and that are subject to the Apple Public Source License
26 * Version 2.0 (the 'License'). You may not use this file except in
27 * compliance with the License. The rights granted to you under the License
28 * may not be used to create, or enable the creation or redistribution of,
29 * unlawful or unlicensed copies of an Apple operating system, or to
30 * circumvent, violate, or enable the circumvention or violation of, any
31 * terms of an Apple operating system software license agreement.
32 *
33 * Please obtain a copy of the License at
34 * http://www.opensource.apple.com/apsl/ and read it before using this file.
35 *
36 * The Original Code and all software distributed under the License are
37 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
38 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
39 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
40 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
41 * Please see the License for the specific language governing rights and
42 * limitations under the License.
43 *
44 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
45 */
46 /*
47 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
48 * support for mandatory and extensible security protections. This notice
49 * is included in support of clause 2.2 (b) of the Apple Public License,
50 * Version 2.0.
51 */
52
53 /*
54 * This file contains a high-performance replacement for the socket-based
55 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support
56 * all features of sockets, but does do everything that pipes normally
57 * do.
58 *
59 * Pipes are implemented as circular buffers. Following are the valid states in pipes operations
60 *
61 * _________________________________
62 * 1. |_________________________________| r=w, c=0
63 *
64 * _________________________________
65 * 2. |__r:::::wc_______________________| r <= w , c > 0
66 *
67 * _________________________________
68 * 3. |::::wc_____r:::::::::::::::::::::| r>w , c > 0
69 *
70 * _________________________________
71 * 4. |:::::::wrc:::::::::::::::::::::::| w=r, c = Max size
72 *
73 *
74 * Nomenclature:-
75 * a-z define the steps in a program flow
76 * 1-4 are the states as defined aboe
77 * Action: is what file operation is done on the pipe
78 *
79 * Current:None Action: initialize with size M=200
80 * a. State 1 ( r=0, w=0, c=0)
81 *
82 * Current: a Action: write(100) (w < M)
83 * b. State 2 (r=0, w=100, c=100)
84 *
85 * Current: b Action: write(100) (w = M-w)
86 * c. State 4 (r=0,w=0,c=200)
87 *
88 * Current: b Action: read(70) ( r < c )
89 * d. State 2(r=70,w=100,c=30)
90 *
91 * Current: d Action: write(75) ( w < (m-w))
92 * e. State 2 (r=70,w=175,c=105)
93 *
94 * Current: d Action: write(110) ( w > (m-w))
95 * f. State 3 (r=70,w=10,c=140)
96 *
97 * Current: d Action: read(30) (r >= c )
98 * g. State 1 (r=100,w=100,c=0)
99 *
100 */
101
102 /*
103 * This code create half duplex pipe buffers for facilitating file like
104 * operations on pipes. The initial buffer is very small, but this can
105 * dynamically change to larger sizes based on usage. The buffer size is never
106 * reduced. The total amount of kernel memory used is governed by maxpipekva.
107 * In case of dynamic expansion limit is reached, the output thread is blocked
108 * until the pipe buffer empties enough to continue.
109 *
110 * In order to limit the resource use of pipes, two sysctls exist:
111 *
112 * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable
113 * address space available to us in pipe_map.
114 *
115 * Memory usage may be monitored through the sysctls
116 * kern.ipc.pipes, kern.ipc.pipekva.
117 *
118 */
119
120 #include <sys/param.h>
121 #include <sys/systm.h>
122 #include <sys/filedesc.h>
123 #include <sys/kernel.h>
124 #include <sys/vnode.h>
125 #include <sys/proc_internal.h>
126 #include <sys/kauth.h>
127 #include <sys/file_internal.h>
128 #include <sys/stat.h>
129 #include <sys/ioctl.h>
130 #include <sys/fcntl.h>
131 #include <sys/malloc.h>
132 #include <sys/syslog.h>
133 #include <sys/unistd.h>
134 #include <sys/resourcevar.h>
135 #include <sys/aio_kern.h>
136 #include <sys/signalvar.h>
137 #include <sys/pipe.h>
138 #include <sys/sysproto.h>
139 #include <sys/proc_info.h>
140
141 #include <security/audit/audit.h>
142
143 #include <sys/kdebug.h>
144
145 #include <kern/zalloc.h>
146 #include <kern/kalloc.h>
147 #include <vm/vm_kern.h>
148 #include <libkern/OSAtomic.h>
149 #include <libkern/section_keywords.h>
150
151 #if CONFIG_MACF
152 #include <security/mac_framework.h>
153 #endif
154
155 #define f_flag fp_glob->fg_flag
156 #define f_ops fp_glob->fg_ops
157 #define f_data fp_glob->fg_data
158
159 struct pipepair {
160 lck_mtx_t pp_mtx;
161 struct pipe pp_rpipe;
162 struct pipe pp_wpipe;
163 uint64_t pp_pipe_id; /* unique ID shared by both pipe ends */
164 };
165
166 #define PIPE_PAIR(pipe) \
167 __container_of(PIPE_MTX(pipe), struct pipepair, pp_mtx)
168
169 /*
170 * interfaces to the outside world exported through file operations
171 */
172 static int pipe_read(struct fileproc *fp, struct uio *uio,
173 int flags, vfs_context_t ctx);
174 static int pipe_write(struct fileproc *fp, struct uio *uio,
175 int flags, vfs_context_t ctx);
176 static int pipe_close(struct fileglob *fg, vfs_context_t ctx);
177 static int pipe_select(struct fileproc *fp, int which, void * wql,
178 vfs_context_t ctx);
179 static int pipe_kqfilter(struct fileproc *fp, struct knote *kn,
180 struct kevent_qos_s *kev);
181 static int pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data,
182 vfs_context_t ctx);
183 static int pipe_drain(struct fileproc *fp, vfs_context_t ctx);
184
185 static const struct fileops pipeops = {
186 .fo_type = DTYPE_PIPE,
187 .fo_read = pipe_read,
188 .fo_write = pipe_write,
189 .fo_ioctl = pipe_ioctl,
190 .fo_select = pipe_select,
191 .fo_close = pipe_close,
192 .fo_drain = pipe_drain,
193 .fo_kqfilter = pipe_kqfilter,
194 };
195
196 static void filt_pipedetach(struct knote *kn);
197
198 static int filt_pipenotsup(struct knote *kn, long hint);
199 static int filt_pipenotsuptouch(struct knote *kn, struct kevent_qos_s *kev);
200 static int filt_pipenotsupprocess(struct knote *kn, struct kevent_qos_s *kev);
201
202 static int filt_piperead(struct knote *kn, long hint);
203 static int filt_pipereadtouch(struct knote *kn, struct kevent_qos_s *kev);
204 static int filt_pipereadprocess(struct knote *kn, struct kevent_qos_s *kev);
205
206 static int filt_pipewrite(struct knote *kn, long hint);
207 static int filt_pipewritetouch(struct knote *kn, struct kevent_qos_s *kev);
208 static int filt_pipewriteprocess(struct knote *kn, struct kevent_qos_s *kev);
209
210 SECURITY_READ_ONLY_EARLY(struct filterops) pipe_nfiltops = {
211 .f_isfd = 1,
212 .f_detach = filt_pipedetach,
213 .f_event = filt_pipenotsup,
214 .f_touch = filt_pipenotsuptouch,
215 .f_process = filt_pipenotsupprocess,
216 };
217
218 SECURITY_READ_ONLY_EARLY(struct filterops) pipe_rfiltops = {
219 .f_isfd = 1,
220 .f_detach = filt_pipedetach,
221 .f_event = filt_piperead,
222 .f_touch = filt_pipereadtouch,
223 .f_process = filt_pipereadprocess,
224 };
225
226 SECURITY_READ_ONLY_EARLY(struct filterops) pipe_wfiltops = {
227 .f_isfd = 1,
228 .f_detach = filt_pipedetach,
229 .f_event = filt_pipewrite,
230 .f_touch = filt_pipewritetouch,
231 .f_process = filt_pipewriteprocess,
232 };
233
234 #if PIPE_SYSCTLS
235 static int nbigpipe; /* for compatibility sake. no longer used */
236 #endif
237 static int amountpipes; /* total number of pipes in system */
238 static int amountpipekva; /* total memory used by pipes */
239
240 static _Atomic uint64_t pipe_unique_id = 1;
241
242 int maxpipekva __attribute__((used)) = PIPE_KVAMAX; /* allowing 16MB max. */
243
244 #if PIPE_SYSCTLS
245 SYSCTL_DECL(_kern_ipc);
246
247 SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RD | CTLFLAG_LOCKED,
248 &maxpipekva, 0, "Pipe KVA limit");
249 SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW | CTLFLAG_LOCKED,
250 &maxpipekvawired, 0, "Pipe KVA wired limit");
251 SYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD | CTLFLAG_LOCKED,
252 &amountpipes, 0, "Current # of pipes");
253 SYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD | CTLFLAG_LOCKED,
254 &nbigpipe, 0, "Current # of big pipes");
255 SYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD | CTLFLAG_LOCKED,
256 &amountpipekva, 0, "Pipe KVA usage");
257 SYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD | CTLFLAG_LOCKED,
258 &amountpipekvawired, 0, "Pipe wired KVA usage");
259 #endif
260
261 static int pipepair_alloc(struct pipe **rpipe, struct pipe **wpipe);
262 static void pipeclose(struct pipe *cpipe);
263 static void pipe_free_kmem(struct pipe *cpipe);
264 static int pipespace(struct pipe *cpipe, int size);
265 static int choose_pipespace(unsigned long current, unsigned long expected);
266 static int expand_pipespace(struct pipe *p, int target_size);
267 static void pipeselwakeup(struct pipe *cpipe, struct pipe *spipe);
268 static __inline int pipeio_lock(struct pipe *cpipe, int catch);
269 static __inline void pipeio_unlock(struct pipe *cpipe);
270
271 static LCK_GRP_DECLARE(pipe_mtx_grp, "pipe");
272 static ZONE_DECLARE(pipe_zone, "pipe zone", sizeof(struct pipepair), ZC_NONE);
273
274 #define MAX_PIPESIZE(pipe) ( MAX(PIPE_SIZE, (pipe)->pipe_buffer.size) )
275
276 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL);
277
278 #if defined(XNU_TARGET_OS_OSX)
279 /* Bitmap for things to touch in pipe_touch() */
280 #define PIPE_ATIME 0x00000001 /* time of last access */
281 #define PIPE_MTIME 0x00000002 /* time of last modification */
282 #define PIPE_CTIME 0x00000004 /* time of last status change */
283
284 static void
285 pipe_touch(struct pipe *tpipe, int touch)
286 {
287 struct timespec now;
288
289 nanotime(&now);
290
291 if (touch & PIPE_ATIME) {
292 tpipe->st_atimespec.tv_sec = now.tv_sec;
293 tpipe->st_atimespec.tv_nsec = now.tv_nsec;
294 }
295
296 if (touch & PIPE_MTIME) {
297 tpipe->st_mtimespec.tv_sec = now.tv_sec;
298 tpipe->st_mtimespec.tv_nsec = now.tv_nsec;
299 }
300
301 if (touch & PIPE_CTIME) {
302 tpipe->st_ctimespec.tv_sec = now.tv_sec;
303 tpipe->st_ctimespec.tv_nsec = now.tv_nsec;
304 }
305 }
306 #endif
307
308 static const unsigned int pipesize_blocks[] = {512, 1024, 2048, 4096, 4096 * 2, PIPE_SIZE, PIPE_SIZE * 4 };
309
310 /*
311 * finds the right size from possible sizes in pipesize_blocks
312 * returns the size which matches max(current,expected)
313 */
314 static int
315 choose_pipespace(unsigned long current, unsigned long expected)
316 {
317 int i = sizeof(pipesize_blocks) / sizeof(unsigned int) - 1;
318 unsigned long target;
319
320 /*
321 * assert that we always get an atomic transaction sized pipe buffer,
322 * even if the system pipe buffer high-water mark has been crossed.
323 */
324 assert(PIPE_BUF == pipesize_blocks[0]);
325
326 if (expected > current) {
327 target = expected;
328 } else {
329 target = current;
330 }
331
332 while (i > 0 && pipesize_blocks[i - 1] > target) {
333 i = i - 1;
334 }
335
336 return pipesize_blocks[i];
337 }
338
339
340 /*
341 * expand the size of pipe while there is data to be read,
342 * and then free the old buffer once the current buffered
343 * data has been transferred to new storage.
344 * Required: PIPE_LOCK and io lock to be held by caller.
345 * returns 0 on success or no expansion possible
346 */
347 static int
348 expand_pipespace(struct pipe *p, int target_size)
349 {
350 struct pipe tmp, oldpipe;
351 int error;
352 tmp.pipe_buffer.buffer = 0;
353
354 if (p->pipe_buffer.size >= (unsigned) target_size) {
355 return 0; /* the existing buffer is max size possible */
356 }
357
358 /* create enough space in the target */
359 error = pipespace(&tmp, target_size);
360 if (error != 0) {
361 return error;
362 }
363
364 oldpipe.pipe_buffer.buffer = p->pipe_buffer.buffer;
365 oldpipe.pipe_buffer.size = p->pipe_buffer.size;
366
367 memcpy(tmp.pipe_buffer.buffer, p->pipe_buffer.buffer, p->pipe_buffer.size);
368 if (p->pipe_buffer.cnt > 0 && p->pipe_buffer.in <= p->pipe_buffer.out) {
369 /* we are in State 3 and need extra copying for read to be consistent */
370 memcpy(&tmp.pipe_buffer.buffer[p->pipe_buffer.size], p->pipe_buffer.buffer, p->pipe_buffer.size);
371 p->pipe_buffer.in += p->pipe_buffer.size;
372 }
373
374 p->pipe_buffer.buffer = tmp.pipe_buffer.buffer;
375 p->pipe_buffer.size = tmp.pipe_buffer.size;
376
377
378 pipe_free_kmem(&oldpipe);
379 return 0;
380 }
381
382 /*
383 * The pipe system call for the DTYPE_PIPE type of pipes
384 *
385 * returns:
386 * FREAD | fd0 | -->[struct rpipe] --> |~~buffer~~| \
387 * (pipe_mutex)
388 * FWRITE | fd1 | -->[struct wpipe] --X /
389 */
390
391 /* ARGSUSED */
392 int
393 pipe(proc_t p, __unused struct pipe_args *uap, int32_t *retval)
394 {
395 struct fileproc *rf, *wf;
396 struct pipe *rpipe, *wpipe;
397 int error;
398
399 error = pipepair_alloc(&rpipe, &wpipe);
400 if (error) {
401 return error;
402 }
403
404 /*
405 * for now we'll create half-duplex pipes(refer returns section above).
406 * this is what we've always supported..
407 */
408
409 error = falloc(p, &rf, &retval[0], vfs_context_current());
410 if (error) {
411 goto freepipes;
412 }
413 rf->f_flag = FREAD;
414 rf->f_data = (caddr_t)rpipe;
415 rf->f_ops = &pipeops;
416
417 error = falloc(p, &wf, &retval[1], vfs_context_current());
418 if (error) {
419 fp_free(p, retval[0], rf);
420 goto freepipes;
421 }
422 wf->f_flag = FWRITE;
423 wf->f_data = (caddr_t)wpipe;
424 wf->f_ops = &pipeops;
425
426 rpipe->pipe_peer = wpipe;
427 wpipe->pipe_peer = rpipe;
428
429 #if CONFIG_MACF
430 /*
431 * XXXXXXXX SHOULD NOT HOLD FILE_LOCK() XXXXXXXXXXXX
432 *
433 * struct pipe represents a pipe endpoint. The MAC label is shared
434 * between the connected endpoints. As a result mac_pipe_label_init() and
435 * mac_pipe_label_associate() should only be called on one of the endpoints
436 * after they have been connected.
437 */
438 mac_pipe_label_init(rpipe);
439 mac_pipe_label_associate(kauth_cred_get(), rpipe);
440 wpipe->pipe_label = rpipe->pipe_label;
441 #endif
442 proc_fdlock_spin(p);
443 procfdtbl_releasefd(p, retval[0], NULL);
444 procfdtbl_releasefd(p, retval[1], NULL);
445 fp_drop(p, retval[0], rf, 1);
446 fp_drop(p, retval[1], wf, 1);
447 proc_fdunlock(p);
448 return 0;
449
450 freepipes:
451 pipeclose(rpipe);
452 pipeclose(wpipe);
453 return error;
454 }
455
456 int
457 pipe_stat(struct pipe *cpipe, void *ub, int isstat64)
458 {
459 #if CONFIG_MACF
460 int error;
461 #endif
462 int pipe_size = 0;
463 int pipe_count;
464 struct stat *sb = (struct stat *)0; /* warning avoidance ; protected by isstat64 */
465 struct stat64 * sb64 = (struct stat64 *)0; /* warning avoidance ; protected by isstat64 */
466
467 if (cpipe == NULL) {
468 return EBADF;
469 }
470 PIPE_LOCK(cpipe);
471
472 #if CONFIG_MACF
473 error = mac_pipe_check_stat(kauth_cred_get(), cpipe);
474 if (error) {
475 PIPE_UNLOCK(cpipe);
476 return error;
477 }
478 #endif
479 if (cpipe->pipe_buffer.buffer == 0) {
480 /* must be stat'ing the write fd */
481 if (cpipe->pipe_peer) {
482 /* the peer still exists, use it's info */
483 pipe_size = MAX_PIPESIZE(cpipe->pipe_peer);
484 pipe_count = cpipe->pipe_peer->pipe_buffer.cnt;
485 } else {
486 pipe_count = 0;
487 }
488 } else {
489 pipe_size = MAX_PIPESIZE(cpipe);
490 pipe_count = cpipe->pipe_buffer.cnt;
491 }
492 /*
493 * since peer's buffer is setup ouside of lock
494 * we might catch it in transient state
495 */
496 if (pipe_size == 0) {
497 pipe_size = MAX(PIPE_SIZE, pipesize_blocks[0]);
498 }
499
500 if (isstat64 != 0) {
501 sb64 = (struct stat64 *)ub;
502
503 bzero(sb64, sizeof(*sb64));
504 sb64->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
505 sb64->st_blksize = pipe_size;
506 sb64->st_size = pipe_count;
507 sb64->st_blocks = (sb64->st_size + sb64->st_blksize - 1) / sb64->st_blksize;
508
509 sb64->st_uid = kauth_getuid();
510 sb64->st_gid = kauth_getgid();
511
512 sb64->st_atimespec.tv_sec = cpipe->st_atimespec.tv_sec;
513 sb64->st_atimespec.tv_nsec = cpipe->st_atimespec.tv_nsec;
514
515 sb64->st_mtimespec.tv_sec = cpipe->st_mtimespec.tv_sec;
516 sb64->st_mtimespec.tv_nsec = cpipe->st_mtimespec.tv_nsec;
517
518 sb64->st_ctimespec.tv_sec = cpipe->st_ctimespec.tv_sec;
519 sb64->st_ctimespec.tv_nsec = cpipe->st_ctimespec.tv_nsec;
520
521 /*
522 * Return a relatively unique inode number based on the current
523 * address of this pipe's struct pipe. This number may be recycled
524 * relatively quickly.
525 */
526 sb64->st_ino = (ino64_t)VM_KERNEL_ADDRHASH((uintptr_t)cpipe);
527 } else {
528 sb = (struct stat *)ub;
529
530 bzero(sb, sizeof(*sb));
531 sb->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
532 sb->st_blksize = pipe_size;
533 sb->st_size = pipe_count;
534 sb->st_blocks = (sb->st_size + sb->st_blksize - 1) / sb->st_blksize;
535
536 sb->st_uid = kauth_getuid();
537 sb->st_gid = kauth_getgid();
538
539 sb->st_atimespec.tv_sec = cpipe->st_atimespec.tv_sec;
540 sb->st_atimespec.tv_nsec = cpipe->st_atimespec.tv_nsec;
541
542 sb->st_mtimespec.tv_sec = cpipe->st_mtimespec.tv_sec;
543 sb->st_mtimespec.tv_nsec = cpipe->st_mtimespec.tv_nsec;
544
545 sb->st_ctimespec.tv_sec = cpipe->st_ctimespec.tv_sec;
546 sb->st_ctimespec.tv_nsec = cpipe->st_ctimespec.tv_nsec;
547
548 /*
549 * Return a relatively unique inode number based on the current
550 * address of this pipe's struct pipe. This number may be recycled
551 * relatively quickly.
552 */
553 sb->st_ino = (ino_t)VM_KERNEL_ADDRHASH((uintptr_t)cpipe);
554 }
555 PIPE_UNLOCK(cpipe);
556
557 /*
558 * POSIX: Left as 0: st_dev, st_nlink, st_rdev, st_flags, st_gen,
559 * st_uid, st_gid.
560 *
561 * XXX (st_dev) should be unique, but there is no device driver that
562 * XXX is associated with pipes, since they are implemented via a
563 * XXX struct fileops indirection rather than as FS objects.
564 */
565 return 0;
566 }
567
568 uint64_t
569 pipe_id(struct pipe *p)
570 {
571 return PIPE_PAIR(p)->pp_pipe_id;
572 }
573
574 /*
575 * Allocate kva for pipe circular buffer, the space is pageable
576 * This routine will 'realloc' the size of a pipe safely, if it fails
577 * it will retain the old buffer.
578 * If it fails it will return ENOMEM.
579 */
580 static int
581 pipespace(struct pipe *cpipe, int size)
582 {
583 vm_offset_t buffer;
584
585 if (size <= 0) {
586 return EINVAL;
587 }
588
589 buffer = (vm_offset_t)kheap_alloc(KHEAP_DATA_BUFFERS, size, Z_WAITOK);
590 if (!buffer) {
591 return ENOMEM;
592 }
593
594 /* free old resources if we're resizing */
595 pipe_free_kmem(cpipe);
596 cpipe->pipe_buffer.buffer = (caddr_t)buffer;
597 cpipe->pipe_buffer.size = size;
598 cpipe->pipe_buffer.in = 0;
599 cpipe->pipe_buffer.out = 0;
600 cpipe->pipe_buffer.cnt = 0;
601
602 OSAddAtomic(1, &amountpipes);
603 OSAddAtomic(cpipe->pipe_buffer.size, &amountpipekva);
604
605 return 0;
606 }
607
608 /*
609 * initialize and allocate VM and memory for pipe
610 */
611 static int
612 pipepair_alloc(struct pipe **rp_out, struct pipe **wp_out)
613 {
614 struct pipepair *pp = zalloc(pipe_zone);
615 struct pipe *rpipe = &pp->pp_rpipe;
616 struct pipe *wpipe = &pp->pp_wpipe;
617
618 if (pp == NULL) {
619 return ENOMEM;
620 }
621
622 /*
623 * protect so pipespace or pipeclose don't follow a junk pointer
624 * if pipespace() fails.
625 */
626 bzero(pp, sizeof(struct pipepair));
627 pp->pp_pipe_id = os_atomic_inc_orig(&pipe_unique_id, relaxed);
628 lck_mtx_init(&pp->pp_mtx, &pipe_mtx_grp, LCK_ATTR_NULL);
629
630 rpipe->pipe_mtxp = &pp->pp_mtx;
631 wpipe->pipe_mtxp = &pp->pp_mtx;
632
633 #if defined(XNU_TARGET_OS_OSX)
634 /* Initial times are all the time of creation of the pipe */
635 pipe_touch(rpipe, PIPE_ATIME | PIPE_MTIME | PIPE_CTIME);
636 pipe_touch(wpipe, PIPE_ATIME | PIPE_MTIME | PIPE_CTIME);
637 #endif
638
639 /*
640 * allocate the space for the normal I/O direction up
641 * front... we'll delay the allocation for the other
642 * direction until a write actually occurs (most likely it won't)...
643 */
644 int error = pipespace(rpipe, choose_pipespace(rpipe->pipe_buffer.size, 0));
645 if (__improbable(error)) {
646 lck_mtx_destroy(&pp->pp_mtx, &pipe_mtx_grp);
647 zfree(pipe_zone, pp);
648 return error;
649 }
650
651 *rp_out = rpipe;
652 *wp_out = wpipe;
653 return 0;
654 }
655
656 static void
657 pipepair_destroy_pipe(struct pipepair *pp, struct pipe *cpipe)
658 {
659 bool can_free;
660
661 pipe_free_kmem(cpipe);
662
663 lck_mtx_lock(&pp->pp_mtx);
664 if (__improbable(cpipe->pipe_state & PIPE_DEAD)) {
665 panic("double free of pipe %p in pair %p", cpipe, pp);
666 }
667
668 cpipe->pipe_state |= PIPE_DEAD;
669
670 can_free = (pp->pp_rpipe.pipe_state & PIPE_DEAD) &&
671 (pp->pp_wpipe.pipe_state & PIPE_DEAD);
672 lck_mtx_unlock(&pp->pp_mtx);
673
674 if (can_free) {
675 lck_mtx_destroy(&pp->pp_mtx, &pipe_mtx_grp);
676 zfree(pipe_zone, pp);
677 }
678 }
679
680 /*
681 * lock a pipe for I/O, blocking other access
682 */
683 static inline int
684 pipeio_lock(struct pipe *cpipe, int catch)
685 {
686 int error;
687 while (cpipe->pipe_state & PIPE_LOCKFL) {
688 cpipe->pipe_state |= PIPE_LWANT;
689 error = msleep(cpipe, PIPE_MTX(cpipe), catch ? (PRIBIO | PCATCH) : PRIBIO,
690 "pipelk", 0);
691 if (error != 0) {
692 return error;
693 }
694 }
695 cpipe->pipe_state |= PIPE_LOCKFL;
696 return 0;
697 }
698
699 /*
700 * unlock a pipe I/O lock
701 */
702 static inline void
703 pipeio_unlock(struct pipe *cpipe)
704 {
705 cpipe->pipe_state &= ~PIPE_LOCKFL;
706 if (cpipe->pipe_state & PIPE_LWANT) {
707 cpipe->pipe_state &= ~PIPE_LWANT;
708 wakeup(cpipe);
709 }
710 }
711
712 /*
713 * wakeup anyone whos blocked in select
714 */
715 static void
716 pipeselwakeup(struct pipe *cpipe, struct pipe *spipe)
717 {
718 if (cpipe->pipe_state & PIPE_SEL) {
719 cpipe->pipe_state &= ~PIPE_SEL;
720 selwakeup(&cpipe->pipe_sel);
721 }
722
723 KNOTE(&cpipe->pipe_sel.si_note, 1);
724
725 if (spipe && (spipe->pipe_state & PIPE_ASYNC) && spipe->pipe_pgid) {
726 if (spipe->pipe_pgid < 0) {
727 gsignal(-spipe->pipe_pgid, SIGIO);
728 } else {
729 proc_signal(spipe->pipe_pgid, SIGIO);
730 }
731 }
732 }
733
734 /*
735 * Read n bytes from the buffer. Semantics are similar to file read.
736 * returns: number of bytes read from the buffer
737 */
738 /* ARGSUSED */
739 static int
740 pipe_read(struct fileproc *fp, struct uio *uio, __unused int flags,
741 __unused vfs_context_t ctx)
742 {
743 struct pipe *rpipe = (struct pipe *)fp->f_data;
744 int error;
745 int nread = 0;
746 u_int size;
747
748 PIPE_LOCK(rpipe);
749 ++rpipe->pipe_busy;
750
751 error = pipeio_lock(rpipe, 1);
752 if (error) {
753 goto unlocked_error;
754 }
755
756 #if CONFIG_MACF
757 error = mac_pipe_check_read(kauth_cred_get(), rpipe);
758 if (error) {
759 goto locked_error;
760 }
761 #endif
762
763
764 while (uio_resid(uio)) {
765 /*
766 * normal pipe buffer receive
767 */
768 if (rpipe->pipe_buffer.cnt > 0) {
769 /*
770 * # bytes to read is min( bytes from read pointer until end of buffer,
771 * total unread bytes,
772 * user requested byte count)
773 */
774 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
775 if (size > rpipe->pipe_buffer.cnt) {
776 size = rpipe->pipe_buffer.cnt;
777 }
778
779 size = (u_int) MIN(INT_MAX, MIN((user_size_t)size,
780 (user_size_t)uio_resid(uio)));
781
782 PIPE_UNLOCK(rpipe); /* we still hold io lock.*/
783 error = uiomove(
784 &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
785 size, uio);
786 PIPE_LOCK(rpipe);
787 if (error) {
788 break;
789 }
790
791 rpipe->pipe_buffer.out += size;
792 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) {
793 rpipe->pipe_buffer.out = 0;
794 }
795
796 rpipe->pipe_buffer.cnt -= size;
797
798 /*
799 * If there is no more to read in the pipe, reset
800 * its pointers to the beginning. This improves
801 * cache hit stats.
802 */
803 if (rpipe->pipe_buffer.cnt == 0) {
804 rpipe->pipe_buffer.in = 0;
805 rpipe->pipe_buffer.out = 0;
806 }
807 nread += size;
808 } else {
809 /*
810 * detect EOF condition
811 * read returns 0 on EOF, no need to set error
812 */
813 if ((rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) ||
814 (fileproc_get_vflags(fp) & FPV_DRAIN)) {
815 break;
816 }
817
818 /*
819 * If the "write-side" has been blocked, wake it up now.
820 */
821 if (rpipe->pipe_state & PIPE_WANTW) {
822 rpipe->pipe_state &= ~PIPE_WANTW;
823 wakeup(rpipe);
824 }
825
826 /*
827 * Break if some data was read in previous iteration.
828 */
829 if (nread > 0) {
830 break;
831 }
832
833 /*
834 * Unlock the pipe buffer for our remaining processing.
835 * We will either break out with an error or we will
836 * sleep and relock to loop.
837 */
838 pipeio_unlock(rpipe);
839
840 /*
841 * Handle non-blocking mode operation or
842 * wait for more data.
843 */
844 if (fp->f_flag & FNONBLOCK) {
845 error = EAGAIN;
846 } else {
847 rpipe->pipe_state |= PIPE_WANTR;
848 error = msleep(rpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, "piperd", 0);
849 if (error == 0) {
850 error = pipeio_lock(rpipe, 1);
851 }
852 }
853 if (error) {
854 goto unlocked_error;
855 }
856 }
857 }
858 #if CONFIG_MACF
859 locked_error:
860 #endif
861 pipeio_unlock(rpipe);
862
863 unlocked_error:
864 --rpipe->pipe_busy;
865
866 /*
867 * PIPE_WANT processing only makes sense if pipe_busy is 0.
868 */
869 if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) {
870 rpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTW);
871 wakeup(rpipe);
872 } else if (rpipe->pipe_buffer.cnt < rpipe->pipe_buffer.size) {
873 /*
874 * Handle write blocking hysteresis.
875 */
876 if (rpipe->pipe_state & PIPE_WANTW) {
877 rpipe->pipe_state &= ~PIPE_WANTW;
878 wakeup(rpipe);
879 }
880 }
881
882 if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) > 0) {
883 pipeselwakeup(rpipe, rpipe->pipe_peer);
884 }
885
886 #if defined(XNU_TARGET_OS_OSX)
887 /* update last read time */
888 pipe_touch(rpipe, PIPE_ATIME);
889 #endif
890
891 PIPE_UNLOCK(rpipe);
892
893 return error;
894 }
895
896 /*
897 * perform a write of n bytes into the read side of buffer. Since
898 * pipes are unidirectional a write is meant to be read by the otherside only.
899 */
900 static int
901 pipe_write(struct fileproc *fp, struct uio *uio, __unused int flags,
902 __unused vfs_context_t ctx)
903 {
904 int error = 0;
905 size_t orig_resid;
906 int pipe_size;
907 struct pipe *wpipe, *rpipe;
908 // LP64todo - fix this!
909 orig_resid = (size_t)uio_resid(uio);
910 if (orig_resid > LONG_MAX) {
911 return EINVAL;
912 }
913 int space;
914
915 rpipe = (struct pipe *)fp->f_data;
916
917 PIPE_LOCK(rpipe);
918 wpipe = rpipe->pipe_peer;
919
920 /*
921 * detect loss of pipe read side, issue SIGPIPE if lost.
922 */
923 if (wpipe == NULL || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) ||
924 (fileproc_get_vflags(fp) & FPV_DRAIN)) {
925 PIPE_UNLOCK(rpipe);
926 return EPIPE;
927 }
928 #if CONFIG_MACF
929 error = mac_pipe_check_write(kauth_cred_get(), wpipe);
930 if (error) {
931 PIPE_UNLOCK(rpipe);
932 return error;
933 }
934 #endif
935 ++wpipe->pipe_busy;
936
937 pipe_size = 0;
938
939 /*
940 * need to allocate some storage... we delay the allocation
941 * until the first write on fd[0] to avoid allocating storage for both
942 * 'pipe ends'... most pipes are half-duplex with the writes targeting
943 * fd[1], so allocating space for both ends is a waste...
944 */
945
946 if (wpipe->pipe_buffer.buffer == 0 || (
947 (unsigned)orig_resid > wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt &&
948 amountpipekva < maxpipekva)) {
949 pipe_size = choose_pipespace(wpipe->pipe_buffer.size, wpipe->pipe_buffer.cnt + orig_resid);
950 }
951 if (pipe_size) {
952 /*
953 * need to do initial allocation or resizing of pipe
954 * holding both structure and io locks.
955 */
956 if ((error = pipeio_lock(wpipe, 1)) == 0) {
957 if (wpipe->pipe_buffer.cnt == 0) {
958 error = pipespace(wpipe, pipe_size);
959 } else {
960 error = expand_pipespace(wpipe, pipe_size);
961 }
962
963 pipeio_unlock(wpipe);
964
965 /* allocation failed */
966 if (wpipe->pipe_buffer.buffer == 0) {
967 error = ENOMEM;
968 }
969 }
970 if (error) {
971 /*
972 * If an error occurred unbusy and return, waking up any pending
973 * readers.
974 */
975 --wpipe->pipe_busy;
976 if ((wpipe->pipe_busy == 0) &&
977 (wpipe->pipe_state & PIPE_WANT)) {
978 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
979 wakeup(wpipe);
980 }
981 PIPE_UNLOCK(rpipe);
982 return error;
983 }
984 }
985
986 while (uio_resid(uio)) {
987 retrywrite:
988 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
989
990 /* Writes of size <= PIPE_BUF must be atomic. */
991 if ((space < uio_resid(uio)) && (orig_resid <= PIPE_BUF)) {
992 space = 0;
993 }
994
995 if (space > 0) {
996 if ((error = pipeio_lock(wpipe, 1)) == 0) {
997 size_t size; /* Transfer size */
998 size_t segsize; /* first segment to transfer */
999
1000 if ((wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) ||
1001 (fileproc_get_vflags(fp) & FPV_DRAIN)) {
1002 pipeio_unlock(wpipe);
1003 error = EPIPE;
1004 break;
1005 }
1006 /*
1007 * If a process blocked in pipeio_lock, our
1008 * value for space might be bad... the mutex
1009 * is dropped while we're blocked
1010 */
1011 if (space > (int)(wpipe->pipe_buffer.size -
1012 wpipe->pipe_buffer.cnt)) {
1013 pipeio_unlock(wpipe);
1014 goto retrywrite;
1015 }
1016
1017 /*
1018 * Transfer size is minimum of uio transfer
1019 * and free space in pipe buffer.
1020 */
1021 // LP64todo - fix this!
1022 if (space > uio_resid(uio)) {
1023 size = (size_t)uio_resid(uio);
1024 if (size > LONG_MAX) {
1025 panic("size greater than LONG_MAX");
1026 }
1027 } else {
1028 size = space;
1029 }
1030 /*
1031 * First segment to transfer is minimum of
1032 * transfer size and contiguous space in
1033 * pipe buffer. If first segment to transfer
1034 * is less than the transfer size, we've got
1035 * a wraparound in the buffer.
1036 */
1037 segsize = wpipe->pipe_buffer.size -
1038 wpipe->pipe_buffer.in;
1039 if (segsize > size) {
1040 segsize = size;
1041 }
1042
1043 /* Transfer first segment */
1044
1045 PIPE_UNLOCK(rpipe);
1046 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
1047 (int)segsize, uio);
1048 PIPE_LOCK(rpipe);
1049
1050 if (error == 0 && segsize < size) {
1051 /*
1052 * Transfer remaining part now, to
1053 * support atomic writes. Wraparound
1054 * happened. (State 3)
1055 */
1056 if (wpipe->pipe_buffer.in + segsize !=
1057 wpipe->pipe_buffer.size) {
1058 panic("Expected pipe buffer "
1059 "wraparound disappeared");
1060 }
1061
1062 PIPE_UNLOCK(rpipe);
1063 error = uiomove(
1064 &wpipe->pipe_buffer.buffer[0],
1065 (int)(size - segsize), uio);
1066 PIPE_LOCK(rpipe);
1067 }
1068 /*
1069 * readers never know to read until count is updated.
1070 */
1071 if (error == 0) {
1072 wpipe->pipe_buffer.in += size;
1073 if (wpipe->pipe_buffer.in >
1074 wpipe->pipe_buffer.size) {
1075 if (wpipe->pipe_buffer.in !=
1076 size - segsize +
1077 wpipe->pipe_buffer.size) {
1078 panic("Expected "
1079 "wraparound bad");
1080 }
1081 wpipe->pipe_buffer.in = (unsigned int)(size -
1082 segsize);
1083 }
1084
1085 wpipe->pipe_buffer.cnt += size;
1086 if (wpipe->pipe_buffer.cnt >
1087 wpipe->pipe_buffer.size) {
1088 panic("Pipe buffer overflow");
1089 }
1090 }
1091 pipeio_unlock(wpipe);
1092 }
1093 if (error) {
1094 break;
1095 }
1096 } else {
1097 /*
1098 * If the "read-side" has been blocked, wake it up now.
1099 */
1100 if (wpipe->pipe_state & PIPE_WANTR) {
1101 wpipe->pipe_state &= ~PIPE_WANTR;
1102 wakeup(wpipe);
1103 }
1104
1105 /*
1106 * If read side wants to go away, we just issue a signal
1107 * to ourselves.
1108 */
1109 if ((wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) ||
1110 (fileproc_get_vflags(fp) & FPV_DRAIN)) {
1111 error = EPIPE;
1112 break;
1113 }
1114
1115 /*
1116 * don't block on non-blocking I/O
1117 * we'll do the pipeselwakeup on the way out
1118 */
1119 if (fp->f_flag & FNONBLOCK) {
1120 error = EAGAIN;
1121 break;
1122 }
1123
1124 /*
1125 * We have no more space and have something to offer,
1126 * wake up select/poll.
1127 */
1128 pipeselwakeup(wpipe, wpipe);
1129
1130 wpipe->pipe_state |= PIPE_WANTW;
1131
1132 error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, "pipewr", 0);
1133
1134 if (error != 0) {
1135 break;
1136 }
1137 }
1138 }
1139 --wpipe->pipe_busy;
1140
1141 if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) {
1142 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
1143 wakeup(wpipe);
1144 }
1145 if (wpipe->pipe_buffer.cnt > 0) {
1146 /*
1147 * If there are any characters in the buffer, we wake up
1148 * the reader if it was blocked waiting for data.
1149 */
1150 if (wpipe->pipe_state & PIPE_WANTR) {
1151 wpipe->pipe_state &= ~PIPE_WANTR;
1152 wakeup(wpipe);
1153 }
1154 /*
1155 * wake up thread blocked in select/poll or post the notification
1156 */
1157 pipeselwakeup(wpipe, wpipe);
1158 }
1159
1160 #if defined(XNU_TARGET_OS_OSX)
1161 /* Update modification, status change (# of bytes in pipe) times */
1162 pipe_touch(rpipe, PIPE_MTIME | PIPE_CTIME);
1163 pipe_touch(wpipe, PIPE_MTIME | PIPE_CTIME);
1164 #endif
1165 PIPE_UNLOCK(rpipe);
1166
1167 return error;
1168 }
1169
1170 /*
1171 * we implement a very minimal set of ioctls for compatibility with sockets.
1172 */
1173 /* ARGSUSED 3 */
1174 static int
1175 pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data,
1176 __unused vfs_context_t ctx)
1177 {
1178 struct pipe *mpipe = (struct pipe *)fp->f_data;
1179 #if CONFIG_MACF
1180 int error;
1181 #endif
1182
1183 PIPE_LOCK(mpipe);
1184
1185 #if CONFIG_MACF
1186 error = mac_pipe_check_ioctl(kauth_cred_get(), mpipe, cmd);
1187 if (error) {
1188 PIPE_UNLOCK(mpipe);
1189
1190 return error;
1191 }
1192 #endif
1193
1194 switch (cmd) {
1195 case FIONBIO:
1196 PIPE_UNLOCK(mpipe);
1197 return 0;
1198
1199 case FIOASYNC:
1200 if (*(int *)data) {
1201 mpipe->pipe_state |= PIPE_ASYNC;
1202 } else {
1203 mpipe->pipe_state &= ~PIPE_ASYNC;
1204 }
1205 PIPE_UNLOCK(mpipe);
1206 return 0;
1207
1208 case FIONREAD:
1209 *(int *)data = mpipe->pipe_buffer.cnt;
1210 PIPE_UNLOCK(mpipe);
1211 return 0;
1212
1213 case TIOCSPGRP:
1214 mpipe->pipe_pgid = *(int *)data;
1215
1216 PIPE_UNLOCK(mpipe);
1217 return 0;
1218
1219 case TIOCGPGRP:
1220 *(int *)data = mpipe->pipe_pgid;
1221
1222 PIPE_UNLOCK(mpipe);
1223 return 0;
1224 }
1225 PIPE_UNLOCK(mpipe);
1226 return ENOTTY;
1227 }
1228
1229
1230 static int
1231 pipe_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
1232 {
1233 struct pipe *rpipe = (struct pipe *)fp->f_data;
1234 struct pipe *wpipe;
1235 int retnum = 0;
1236
1237 if (rpipe == NULL || rpipe == (struct pipe *)-1) {
1238 return retnum;
1239 }
1240
1241 PIPE_LOCK(rpipe);
1242
1243 wpipe = rpipe->pipe_peer;
1244
1245
1246 #if CONFIG_MACF
1247 /*
1248 * XXX We should use a per thread credential here; minimally, the
1249 * XXX process credential should have a persistent reference on it
1250 * XXX before being passed in here.
1251 */
1252 if (mac_pipe_check_select(vfs_context_ucred(ctx), rpipe, which)) {
1253 PIPE_UNLOCK(rpipe);
1254 return 0;
1255 }
1256 #endif
1257 switch (which) {
1258 case FREAD:
1259 if ((rpipe->pipe_state & PIPE_DIRECTW) ||
1260 (rpipe->pipe_buffer.cnt > 0) ||
1261 (rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) ||
1262 (fileproc_get_vflags(fp) & FPV_DRAIN)) {
1263 retnum = 1;
1264 } else {
1265 rpipe->pipe_state |= PIPE_SEL;
1266 selrecord(vfs_context_proc(ctx), &rpipe->pipe_sel, wql);
1267 }
1268 break;
1269
1270 case FWRITE:
1271 if (wpipe) {
1272 wpipe->pipe_state |= PIPE_WSELECT;
1273 }
1274 if (wpipe == NULL || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) ||
1275 (fileproc_get_vflags(fp) & FPV_DRAIN) ||
1276 (((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
1277 (MAX_PIPESIZE(wpipe) - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) {
1278 retnum = 1;
1279 } else {
1280 wpipe->pipe_state |= PIPE_SEL;
1281 selrecord(vfs_context_proc(ctx), &wpipe->pipe_sel, wql);
1282 }
1283 break;
1284 case 0:
1285 rpipe->pipe_state |= PIPE_SEL;
1286 selrecord(vfs_context_proc(ctx), &rpipe->pipe_sel, wql);
1287 break;
1288 }
1289 PIPE_UNLOCK(rpipe);
1290
1291 return retnum;
1292 }
1293
1294
1295 /* ARGSUSED 1 */
1296 static int
1297 pipe_close(struct fileglob *fg, __unused vfs_context_t ctx)
1298 {
1299 struct pipe *cpipe;
1300
1301 proc_fdlock_spin(vfs_context_proc(ctx));
1302 cpipe = (struct pipe *)fg->fg_data;
1303 fg->fg_data = NULL;
1304 proc_fdunlock(vfs_context_proc(ctx));
1305 if (cpipe) {
1306 pipeclose(cpipe);
1307 }
1308
1309 return 0;
1310 }
1311
1312 static void
1313 pipe_free_kmem(struct pipe *cpipe)
1314 {
1315 if (cpipe->pipe_buffer.buffer != NULL) {
1316 OSAddAtomic(-(cpipe->pipe_buffer.size), &amountpipekva);
1317 OSAddAtomic(-1, &amountpipes);
1318 kheap_free(KHEAP_DATA_BUFFERS, cpipe->pipe_buffer.buffer,
1319 cpipe->pipe_buffer.size);
1320 cpipe->pipe_buffer.buffer = NULL;
1321 cpipe->pipe_buffer.size = 0;
1322 }
1323 }
1324
1325 /*
1326 * shutdown the pipe
1327 */
1328 static void
1329 pipeclose(struct pipe *cpipe)
1330 {
1331 struct pipe *ppipe;
1332
1333 PIPE_LOCK(cpipe);
1334
1335 /*
1336 * If the other side is blocked, wake it up saying that
1337 * we want to close it down.
1338 */
1339 cpipe->pipe_state &= ~PIPE_DRAIN;
1340 cpipe->pipe_state |= PIPE_EOF;
1341 pipeselwakeup(cpipe, cpipe);
1342
1343 while (cpipe->pipe_busy) {
1344 cpipe->pipe_state |= PIPE_WANT;
1345
1346 wakeup(cpipe);
1347 msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0);
1348 }
1349
1350 #if CONFIG_MACF
1351 /*
1352 * Free the shared pipe label only after the two ends are disconnected.
1353 */
1354 if (cpipe->pipe_label != NULL && cpipe->pipe_peer == NULL) {
1355 mac_pipe_label_destroy(cpipe);
1356 }
1357 #endif
1358
1359 /*
1360 * Disconnect from peer
1361 */
1362 if ((ppipe = cpipe->pipe_peer) != NULL) {
1363 ppipe->pipe_state &= ~(PIPE_DRAIN);
1364 ppipe->pipe_state |= PIPE_EOF;
1365
1366 pipeselwakeup(ppipe, ppipe);
1367 wakeup(ppipe);
1368
1369 KNOTE(&ppipe->pipe_sel.si_note, 1);
1370
1371 ppipe->pipe_peer = NULL;
1372 }
1373
1374 /*
1375 * free resources
1376 */
1377
1378 PIPE_UNLOCK(cpipe);
1379
1380 pipepair_destroy_pipe(PIPE_PAIR(cpipe), cpipe);
1381 }
1382
1383 static int64_t
1384 filt_pipelowwat(struct knote *kn, struct pipe *rpipe, int64_t def_lowwat)
1385 {
1386 if ((kn->kn_sfflags & NOTE_LOWAT) == 0) {
1387 return def_lowwat;
1388 }
1389 if (rpipe->pipe_buffer.size && kn->kn_sdata > MAX_PIPESIZE(rpipe)) {
1390 return MAX_PIPESIZE(rpipe);
1391 }
1392 return MAX(kn->kn_sdata, def_lowwat);
1393 }
1394
1395 static int
1396 filt_pipe_draincommon(struct knote *kn, struct pipe *rpipe)
1397 {
1398 struct pipe *wpipe = rpipe->pipe_peer;
1399
1400 if ((rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) ||
1401 (wpipe == NULL) || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
1402 kn->kn_flags |= EV_EOF;
1403 return 1;
1404 }
1405
1406 return 0;
1407 }
1408
1409 static int
1410 filt_pipenotsup(struct knote *kn, long hint)
1411 {
1412 #pragma unused(hint)
1413 struct pipe *rpipe = kn->kn_hook;
1414
1415 return filt_pipe_draincommon(kn, rpipe);
1416 }
1417
1418 static int
1419 filt_pipenotsuptouch(struct knote *kn, struct kevent_qos_s *kev)
1420 {
1421 struct pipe *rpipe = kn->kn_hook;
1422 int res;
1423
1424 PIPE_LOCK(rpipe);
1425
1426 /* accept new kevent data (and save off lowat threshold and flag) */
1427 kn->kn_sfflags = kev->fflags;
1428 kn->kn_sdata = kev->data;
1429
1430 /* determine if any event is now deemed fired */
1431 res = filt_pipe_draincommon(kn, rpipe);
1432
1433 PIPE_UNLOCK(rpipe);
1434
1435 return res;
1436 }
1437
1438 static int
1439 filt_pipenotsupprocess(struct knote *kn, struct kevent_qos_s *kev)
1440 {
1441 struct pipe *rpipe = kn->kn_hook;
1442 int res;
1443
1444 PIPE_LOCK(rpipe);
1445 res = filt_pipe_draincommon(kn, rpipe);
1446 if (res) {
1447 knote_fill_kevent(kn, kev, 0);
1448 }
1449 PIPE_UNLOCK(rpipe);
1450
1451 return res;
1452 }
1453
1454 /*ARGSUSED*/
1455 static int
1456 filt_piperead_common(struct knote *kn, struct kevent_qos_s *kev, struct pipe *rpipe)
1457 {
1458 int64_t data = rpipe->pipe_buffer.cnt;
1459 int res = 0;
1460
1461 if (filt_pipe_draincommon(kn, rpipe)) {
1462 res = 1;
1463 } else {
1464 res = data >= filt_pipelowwat(kn, rpipe, 1);
1465 }
1466 if (res && kev) {
1467 knote_fill_kevent(kn, kev, data);
1468 }
1469 return res;
1470 }
1471
1472 static int
1473 filt_piperead(struct knote *kn, long hint)
1474 {
1475 #pragma unused(hint)
1476 struct pipe *rpipe = kn->kn_hook;
1477
1478 return filt_piperead_common(kn, NULL, rpipe);
1479 }
1480
1481 static int
1482 filt_pipereadtouch(struct knote *kn, struct kevent_qos_s *kev)
1483 {
1484 struct pipe *rpipe = kn->kn_hook;
1485 int retval;
1486
1487 PIPE_LOCK(rpipe);
1488
1489 /* accept new inputs (and save the low water threshold and flag) */
1490 kn->kn_sdata = kev->data;
1491 kn->kn_sfflags = kev->fflags;
1492
1493 /* identify if any events are now fired */
1494 retval = filt_piperead_common(kn, NULL, rpipe);
1495
1496 PIPE_UNLOCK(rpipe);
1497
1498 return retval;
1499 }
1500
1501 static int
1502 filt_pipereadprocess(struct knote *kn, struct kevent_qos_s *kev)
1503 {
1504 struct pipe *rpipe = kn->kn_hook;
1505 int retval;
1506
1507 PIPE_LOCK(rpipe);
1508 retval = filt_piperead_common(kn, kev, rpipe);
1509 PIPE_UNLOCK(rpipe);
1510
1511 return retval;
1512 }
1513
1514 /*ARGSUSED*/
1515 static int
1516 filt_pipewrite_common(struct knote *kn, struct kevent_qos_s *kev, struct pipe *rpipe)
1517 {
1518 int64_t data = 0;
1519 int res = 0;
1520
1521 if (filt_pipe_draincommon(kn, rpipe)) {
1522 res = 1;
1523 } else {
1524 data = MAX_PIPESIZE(rpipe) - rpipe->pipe_buffer.cnt;
1525 res = data >= filt_pipelowwat(kn, rpipe, PIPE_BUF);
1526 }
1527 if (res && kev) {
1528 knote_fill_kevent(kn, kev, data);
1529 }
1530 return res;
1531 }
1532
1533 /*ARGSUSED*/
1534 static int
1535 filt_pipewrite(struct knote *kn, long hint)
1536 {
1537 #pragma unused(hint)
1538 struct pipe *rpipe = kn->kn_hook;
1539
1540 return filt_pipewrite_common(kn, NULL, rpipe);
1541 }
1542
1543
1544 static int
1545 filt_pipewritetouch(struct knote *kn, struct kevent_qos_s *kev)
1546 {
1547 struct pipe *rpipe = kn->kn_hook;
1548 int res;
1549
1550 PIPE_LOCK(rpipe);
1551
1552 /* accept new kevent data (and save off lowat threshold and flag) */
1553 kn->kn_sfflags = kev->fflags;
1554 kn->kn_sdata = kev->data;
1555
1556 /* determine if any event is now deemed fired */
1557 res = filt_pipewrite_common(kn, NULL, rpipe);
1558
1559 PIPE_UNLOCK(rpipe);
1560
1561 return res;
1562 }
1563
1564 static int
1565 filt_pipewriteprocess(struct knote *kn, struct kevent_qos_s *kev)
1566 {
1567 struct pipe *rpipe = kn->kn_hook;
1568 int res;
1569
1570 PIPE_LOCK(rpipe);
1571 res = filt_pipewrite_common(kn, kev, rpipe);
1572 PIPE_UNLOCK(rpipe);
1573
1574 return res;
1575 }
1576
1577 /*ARGSUSED*/
1578 static int
1579 pipe_kqfilter(struct fileproc *fp, struct knote *kn,
1580 __unused struct kevent_qos_s *kev)
1581 {
1582 struct pipe *cpipe = (struct pipe *)fp->f_data;
1583 struct pipe *rpipe = &PIPE_PAIR(cpipe)->pp_rpipe;
1584 int res;
1585
1586 PIPE_LOCK(cpipe);
1587 #if CONFIG_MACF
1588 /*
1589 * XXX We should use a per thread credential here; minimally, the
1590 * XXX process credential should have a persistent reference on it
1591 * XXX before being passed in here.
1592 */
1593 kauth_cred_t cred = vfs_context_ucred(vfs_context_current());
1594 if (mac_pipe_check_kqfilter(cred, kn, cpipe) != 0) {
1595 PIPE_UNLOCK(cpipe);
1596 knote_set_error(kn, EPERM);
1597 return 0;
1598 }
1599 #endif
1600
1601 /*
1602 * FreeBSD will fail the attach with EPIPE if the peer pipe is detached,
1603 * however, this isn't a programming error as the other side closing
1604 * could race with the kevent registration.
1605 *
1606 * Attach should only fail for programming mistakes else it will break
1607 * libdispatch.
1608 *
1609 * Like FreeBSD, have a "Neutered" filter that will not fire until
1610 * the pipe dies if the wrong filter is attached to the wrong end.
1611 *
1612 * Knotes are always attached to the "rpipe".
1613 */
1614 switch (kn->kn_filter) {
1615 case EVFILT_READ:
1616 if (fp->f_flag & FREAD) {
1617 kn->kn_filtid = EVFILTID_PIPE_R;
1618 res = filt_piperead_common(kn, NULL, rpipe);
1619 } else {
1620 kn->kn_filtid = EVFILTID_PIPE_N;
1621 res = filt_pipe_draincommon(kn, rpipe);
1622 }
1623 break;
1624
1625 case EVFILT_WRITE:
1626 if (fp->f_flag & FWRITE) {
1627 kn->kn_filtid = EVFILTID_PIPE_W;
1628 res = filt_pipewrite_common(kn, NULL, rpipe);
1629 } else {
1630 kn->kn_filtid = EVFILTID_PIPE_N;
1631 res = filt_pipe_draincommon(kn, rpipe);
1632 }
1633 break;
1634
1635 default:
1636 PIPE_UNLOCK(cpipe);
1637 knote_set_error(kn, EINVAL);
1638 return 0;
1639 }
1640
1641 kn->kn_hook = rpipe;
1642 KNOTE_ATTACH(&rpipe->pipe_sel.si_note, kn);
1643
1644 PIPE_UNLOCK(cpipe);
1645 return res;
1646 }
1647
1648 static void
1649 filt_pipedetach(struct knote *kn)
1650 {
1651 struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data;
1652 struct pipe *rpipe = &PIPE_PAIR(cpipe)->pp_rpipe;
1653
1654 PIPE_LOCK(cpipe);
1655 KNOTE_DETACH(&rpipe->pipe_sel.si_note, kn);
1656 PIPE_UNLOCK(cpipe);
1657 }
1658
1659 int
1660 fill_pipeinfo(struct pipe * cpipe, struct pipe_info * pinfo)
1661 {
1662 #if CONFIG_MACF
1663 int error;
1664 #endif
1665 struct timespec now;
1666 struct vinfo_stat * ub;
1667 int pipe_size = 0;
1668 int pipe_count;
1669
1670 if (cpipe == NULL) {
1671 return EBADF;
1672 }
1673 PIPE_LOCK(cpipe);
1674
1675 #if CONFIG_MACF
1676 error = mac_pipe_check_stat(kauth_cred_get(), cpipe);
1677 if (error) {
1678 PIPE_UNLOCK(cpipe);
1679 return error;
1680 }
1681 #endif
1682 if (cpipe->pipe_buffer.buffer == 0) {
1683 /*
1684 * must be stat'ing the write fd
1685 */
1686 if (cpipe->pipe_peer) {
1687 /*
1688 * the peer still exists, use it's info
1689 */
1690 pipe_size = MAX_PIPESIZE(cpipe->pipe_peer);
1691 pipe_count = cpipe->pipe_peer->pipe_buffer.cnt;
1692 } else {
1693 pipe_count = 0;
1694 }
1695 } else {
1696 pipe_size = MAX_PIPESIZE(cpipe);
1697 pipe_count = cpipe->pipe_buffer.cnt;
1698 }
1699 /*
1700 * since peer's buffer is setup ouside of lock
1701 * we might catch it in transient state
1702 */
1703 if (pipe_size == 0) {
1704 pipe_size = PIPE_SIZE;
1705 }
1706
1707 ub = &pinfo->pipe_stat;
1708
1709 bzero(ub, sizeof(*ub));
1710 ub->vst_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
1711 ub->vst_blksize = pipe_size;
1712 ub->vst_size = pipe_count;
1713 if (ub->vst_blksize != 0) {
1714 ub->vst_blocks = (ub->vst_size + ub->vst_blksize - 1) / ub->vst_blksize;
1715 }
1716 ub->vst_nlink = 1;
1717
1718 ub->vst_uid = kauth_getuid();
1719 ub->vst_gid = kauth_getgid();
1720
1721 nanotime(&now);
1722 ub->vst_atime = now.tv_sec;
1723 ub->vst_atimensec = now.tv_nsec;
1724
1725 ub->vst_mtime = now.tv_sec;
1726 ub->vst_mtimensec = now.tv_nsec;
1727
1728 ub->vst_ctime = now.tv_sec;
1729 ub->vst_ctimensec = now.tv_nsec;
1730
1731 /*
1732 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen, st_uid, st_gid.
1733 * XXX (st_dev, st_ino) should be unique.
1734 */
1735
1736 pinfo->pipe_handle = (uint64_t)VM_KERNEL_ADDRHASH((uintptr_t)cpipe);
1737 pinfo->pipe_peerhandle = (uint64_t)VM_KERNEL_ADDRHASH((uintptr_t)(cpipe->pipe_peer));
1738 pinfo->pipe_status = cpipe->pipe_state;
1739
1740 PIPE_UNLOCK(cpipe);
1741
1742 return 0;
1743 }
1744
1745
1746 static int
1747 pipe_drain(struct fileproc *fp, __unused vfs_context_t ctx)
1748 {
1749 /* Note: fdlock already held */
1750 struct pipe *ppipe, *cpipe = (struct pipe *)(fp->fp_glob->fg_data);
1751 boolean_t drain_pipe = FALSE;
1752
1753 /* Check if the pipe is going away */
1754 lck_mtx_lock_spin(&fp->fp_glob->fg_lock);
1755 if (os_ref_get_count_raw(&fp->fp_glob->fg_count) == 1) {
1756 drain_pipe = TRUE;
1757 }
1758 lck_mtx_unlock(&fp->fp_glob->fg_lock);
1759
1760 if (cpipe) {
1761 PIPE_LOCK(cpipe);
1762
1763 if (drain_pipe) {
1764 cpipe->pipe_state |= PIPE_DRAIN;
1765 cpipe->pipe_state &= ~(PIPE_WANTR | PIPE_WANTW);
1766 }
1767 wakeup(cpipe);
1768
1769 /* Must wake up peer: a writer sleeps on the read side */
1770 if ((ppipe = cpipe->pipe_peer)) {
1771 if (drain_pipe) {
1772 ppipe->pipe_state |= PIPE_DRAIN;
1773 ppipe->pipe_state &= ~(PIPE_WANTR | PIPE_WANTW);
1774 }
1775 wakeup(ppipe);
1776 }
1777
1778 PIPE_UNLOCK(cpipe);
1779 return 0;
1780 }
1781
1782 return 1;
1783 }