]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/sys_pipe.c
a04ad0c8285f7c7b2ec5f0fa48cdff9c6005b883
[apple/xnu.git] / bsd / kern / sys_pipe.c
1 /*
2 * Copyright (c) 1996 John S. Dyson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice immediately at the beginning of the file, without modification,
10 * this list of conditions, and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. Absolutely no warranty of function or purpose is made by the author
15 * John S. Dyson.
16 * 4. Modifications may be freely made to this file if the above conditions
17 * are met.
18 */
19 /*
20 * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved.
21 *
22 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
23 *
24 * This file contains Original Code and/or Modifications of Original Code
25 * as defined in and that are subject to the Apple Public Source License
26 * Version 2.0 (the 'License'). You may not use this file except in
27 * compliance with the License. The rights granted to you under the
28 * License may not be used to create, or enable the creation or
29 * redistribution of, unlawful or unlicensed copies of an Apple operating
30 * system, or to circumvent, violate, or enable the circumvention or
31 * violation of, any terms of an Apple operating system software license
32 * agreement.
33 *
34 * Please obtain a copy of the License at
35 * http://www.opensource.apple.com/apsl/ and read it before using this
36 * file.
37 *
38 * The Original Code and all software distributed under the License are
39 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
40 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
41 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
42 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
43 * Please see the License for the specific language governing rights and
44 * limitations under the License.
45 *
46 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
47 */
48
49 /*
50 * This file contains a high-performance replacement for the socket-based
51 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support
52 * all features of sockets, but does do everything that pipes normally
53 * do.
54 */
55
56 /*
57 * This code has two modes of operation, a small write mode and a large
58 * write mode. The small write mode acts like conventional pipes with
59 * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the
60 * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT
61 * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and
62 * the receiving process can copy it directly from the pages in the sending
63 * process.
64 *
65 * If the sending process receives a signal, it is possible that it will
66 * go away, and certainly its address space can change, because control
67 * is returned back to the user-mode side. In that case, the pipe code
68 * arranges to copy the buffer supplied by the user process, to a pageable
69 * kernel buffer, and the receiving process will grab the data from the
70 * pageable kernel buffer. Since signals don't happen all that often,
71 * the copy operation is normally eliminated.
72 *
73 * The constant PIPE_MINDIRECT is chosen to make sure that buffering will
74 * happen for small transfers so that the system will not spend all of
75 * its time context switching.
76 *
77 * In order to limit the resource use of pipes, two sysctls exist:
78 *
79 * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable
80 * address space available to us in pipe_map. Whenever the amount in use
81 * exceeds half of this value, all new pipes will be created with size
82 * SMALL_PIPE_SIZE, rather than PIPE_SIZE. Big pipe creation will be limited
83 * as well. This value is loader tunable only.
84 *
85 * kern.ipc.maxpipekvawired - This value limits the amount of memory that may
86 * be wired in order to facilitate direct copies using page flipping.
87 * Whenever this value is exceeded, pipes will fall back to using regular
88 * copies. This value is sysctl controllable at all times.
89 *
90 * These values are autotuned in subr_param.c.
91 *
92 * Memory usage may be monitored through the sysctls
93 * kern.ipc.pipes, kern.ipc.pipekva and kern.ipc.pipekvawired.
94 *
95 */
96
97 #include <sys/param.h>
98 #include <sys/systm.h>
99 #include <sys/filedesc.h>
100 #include <sys/kernel.h>
101 #include <sys/vnode.h>
102 #include <sys/proc_internal.h>
103 #include <sys/kauth.h>
104 #include <sys/file_internal.h>
105 #include <sys/stat.h>
106 #include <sys/ioctl.h>
107 #include <sys/fcntl.h>
108 #include <sys/malloc.h>
109 #include <sys/syslog.h>
110 #include <sys/unistd.h>
111 #include <sys/resourcevar.h>
112 #include <sys/aio_kern.h>
113 #include <sys/signalvar.h>
114 #include <sys/pipe.h>
115 #include <sys/sysproto.h>
116
117 #include <bsm/audit_kernel.h>
118
119 #include <sys/kdebug.h>
120
121 #include <kern/zalloc.h>
122 #include <vm/vm_kern.h>
123 #include <libkern/OSAtomic.h>
124
125 #define f_flag f_fglob->fg_flag
126 #define f_type f_fglob->fg_type
127 #define f_msgcount f_fglob->fg_msgcount
128 #define f_cred f_fglob->fg_cred
129 #define f_ops f_fglob->fg_ops
130 #define f_offset f_fglob->fg_offset
131 #define f_data f_fglob->fg_data
132 /*
133 * Use this define if you want to disable *fancy* VM things. Expect an
134 * approx 30% decrease in transfer rate. This could be useful for
135 * NetBSD or OpenBSD.
136 *
137 * this needs to be ported to X and the performance measured
138 * before committing to supporting it
139 */
140 #define PIPE_NODIRECT 1
141
142 #ifndef PIPE_NODIRECT
143
144 #include <vm/vm.h>
145 #include <vm/vm_param.h>
146 #include <vm/vm_object.h>
147 #include <vm/vm_kern.h>
148 #include <vm/vm_extern.h>
149 #include <vm/pmap.h>
150 #include <vm/vm_map.h>
151 #include <vm/vm_page.h>
152 #include <vm/uma.h>
153
154 #endif
155
156
157 /*
158 * interfaces to the outside world
159 */
160 static int pipe_read(struct fileproc *fp, struct uio *uio,
161 kauth_cred_t cred, int flags, struct proc *p);
162
163 static int pipe_write(struct fileproc *fp, struct uio *uio,
164 kauth_cred_t cred, int flags, struct proc *p);
165
166 static int pipe_close(struct fileglob *fg, struct proc *p);
167
168 static int pipe_select(struct fileproc *fp, int which, void * wql, struct proc *p);
169
170 static int pipe_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p);
171
172 static int pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, struct proc *p);
173
174
175 struct fileops pipeops =
176 { pipe_read,
177 pipe_write,
178 pipe_ioctl,
179 pipe_select,
180 pipe_close,
181 pipe_kqfilter,
182 0 };
183
184
185 static void filt_pipedetach(struct knote *kn);
186 static int filt_piperead(struct knote *kn, long hint);
187 static int filt_pipewrite(struct knote *kn, long hint);
188
189 static struct filterops pipe_rfiltops =
190 { 1, NULL, filt_pipedetach, filt_piperead };
191 static struct filterops pipe_wfiltops =
192 { 1, NULL, filt_pipedetach, filt_pipewrite };
193
194 /*
195 * Default pipe buffer size(s), this can be kind-of large now because pipe
196 * space is pageable. The pipe code will try to maintain locality of
197 * reference for performance reasons, so small amounts of outstanding I/O
198 * will not wipe the cache.
199 */
200 #define MINPIPESIZE (PIPE_SIZE/3)
201
202 /*
203 * Limit the number of "big" pipes
204 */
205 #define LIMITBIGPIPES 32
206 static int nbigpipe;
207
208 static int amountpipes;
209 static int amountpipekva;
210
211 #ifndef PIPE_NODIRECT
212 static int amountpipekvawired;
213 #endif
214 int maxpipekva = 1024 * 1024 * 16;
215
216 #if PIPE_SYSCTLS
217 SYSCTL_DECL(_kern_ipc);
218
219 SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RD,
220 &maxpipekva, 0, "Pipe KVA limit");
221 SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW,
222 &maxpipekvawired, 0, "Pipe KVA wired limit");
223 SYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD,
224 &amountpipes, 0, "Current # of pipes");
225 SYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD,
226 &nbigpipe, 0, "Current # of big pipes");
227 SYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD,
228 &amountpipekva, 0, "Pipe KVA usage");
229 SYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD,
230 &amountpipekvawired, 0, "Pipe wired KVA usage");
231 #endif
232
233 void pipeinit(void *dummy __unused);
234 static void pipeclose(struct pipe *cpipe);
235 static void pipe_free_kmem(struct pipe *cpipe);
236 static int pipe_create(struct pipe **cpipep);
237 static void pipeselwakeup(struct pipe *cpipe, struct pipe *spipe);
238 static __inline int pipelock(struct pipe *cpipe, int catch);
239 static __inline void pipeunlock(struct pipe *cpipe);
240
241 #ifndef PIPE_NODIRECT
242 static int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio);
243 static void pipe_destroy_write_buffer(struct pipe *wpipe);
244 static int pipe_direct_write(struct pipe *wpipe, struct uio *uio);
245 static void pipe_clone_write_buffer(struct pipe *wpipe);
246 #endif
247
248 extern int postpipeevent(struct pipe *, int);
249 extern void evpipefree(struct pipe *cpipe);
250
251
252 static int pipespace(struct pipe *cpipe, int size);
253
254 static lck_grp_t *pipe_mtx_grp;
255 static lck_attr_t *pipe_mtx_attr;
256 static lck_grp_attr_t *pipe_mtx_grp_attr;
257
258 static zone_t pipe_zone;
259
260 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL);
261
262 void
263 pipeinit(void *dummy __unused)
264 {
265 pipe_zone = (zone_t)zinit(sizeof(struct pipe), 8192 * sizeof(struct pipe), 4096, "pipe zone");
266
267 /*
268 * allocate lock group attribute and group for pipe mutexes
269 */
270 pipe_mtx_grp_attr = lck_grp_attr_alloc_init();
271 //lck_grp_attr_setstat(pipe_mtx_grp_attr);
272 pipe_mtx_grp = lck_grp_alloc_init("pipe", pipe_mtx_grp_attr);
273
274 /*
275 * allocate the lock attribute for pipe mutexes
276 */
277 pipe_mtx_attr = lck_attr_alloc_init();
278 //lck_attr_setdebug(pipe_mtx_attr);
279 }
280
281
282
283 /*
284 * The pipe system call for the DTYPE_PIPE type of pipes
285 */
286
287 /* ARGSUSED */
288 int
289 pipe(struct proc *p, __unused struct pipe_args *uap, register_t *retval)
290 {
291 struct fileproc *rf, *wf;
292 struct pipe *rpipe, *wpipe;
293 lck_mtx_t *pmtx;
294 int fd, error;
295
296 if ((pmtx = lck_mtx_alloc_init(pipe_mtx_grp, pipe_mtx_attr)) == NULL)
297 return (ENOMEM);
298
299 rpipe = wpipe = NULL;
300 if (pipe_create(&rpipe) || pipe_create(&wpipe)) {
301 error = ENFILE;
302 goto freepipes;
303 }
304 /*
305 * allocate the space for the normal I/O direction up
306 * front... we'll delay the allocation for the other
307 * direction until a write actually occurs (most
308 * likely it won't)...
309 *
310 * Reduce to 1/4th pipe size if we're over our global max.
311 */
312 if (amountpipekva > maxpipekva / 2)
313 error = pipespace(rpipe, SMALL_PIPE_SIZE);
314 else
315 error = pipespace(rpipe, PIPE_SIZE);
316 if (error)
317 goto freepipes;
318
319 #ifndef PIPE_NODIRECT
320 rpipe->pipe_state |= PIPE_DIRECTOK;
321 wpipe->pipe_state |= PIPE_DIRECTOK;
322 #endif
323 TAILQ_INIT(&rpipe->pipe_evlist);
324 TAILQ_INIT(&wpipe->pipe_evlist);
325
326 error = falloc(p, &rf, &fd);
327 if (error) {
328 goto freepipes;
329 }
330 retval[0] = fd;
331
332 /*
333 * for now we'll create half-duplex
334 * pipes... this is what we've always
335 * supported..
336 */
337 rf->f_flag = FREAD;
338 rf->f_type = DTYPE_PIPE;
339 rf->f_data = (caddr_t)rpipe;
340 rf->f_ops = &pipeops;
341
342 error = falloc(p, &wf, &fd);
343 if (error) {
344 fp_free(p, retval[0], rf);
345 goto freepipes;
346 }
347 wf->f_flag = FWRITE;
348 wf->f_type = DTYPE_PIPE;
349 wf->f_data = (caddr_t)wpipe;
350 wf->f_ops = &pipeops;
351
352 retval[1] = fd;
353 #ifdef MAC
354 /*
355 * XXXXXXXX SHOULD NOT HOLD FILE_LOCK() XXXXXXXXXXXX
356 *
357 * struct pipe represents a pipe endpoint. The MAC label is shared
358 * between the connected endpoints. As a result mac_init_pipe() and
359 * mac_create_pipe() should only be called on one of the endpoints
360 * after they have been connected.
361 */
362 mac_init_pipe(rpipe);
363 mac_create_pipe(td->td_ucred, rpipe);
364 #endif
365 proc_fdlock(p);
366 *fdflags(p, retval[0]) &= ~UF_RESERVED;
367 *fdflags(p, retval[1]) &= ~UF_RESERVED;
368 fp_drop(p, retval[0], rf, 1);
369 fp_drop(p, retval[1], wf, 1);
370 proc_fdunlock(p);
371
372 rpipe->pipe_peer = wpipe;
373 wpipe->pipe_peer = rpipe;
374
375 rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx;
376
377 return (0);
378
379 freepipes:
380 pipeclose(rpipe);
381 pipeclose(wpipe);
382 lck_mtx_free(pmtx, pipe_mtx_grp);
383
384 return (error);
385 }
386
387
388 int
389 pipe_stat(struct pipe *cpipe, struct stat *ub)
390 {
391 #ifdef MAC
392 int error;
393 #endif
394 struct timeval now;
395
396 if (cpipe == NULL)
397 return (EBADF);
398 #ifdef MAC
399 PIPE_LOCK(cpipe);
400 error = mac_check_pipe_stat(active_cred, cpipe);
401 PIPE_UNLOCK(cpipe);
402 if (error)
403 return (error);
404 #endif
405 if (cpipe->pipe_buffer.buffer == 0) {
406 /*
407 * must be stat'ing the write fd
408 */
409 cpipe = cpipe->pipe_peer;
410
411 if (cpipe == NULL)
412 return (EBADF);
413 }
414 bzero(ub, sizeof(*ub));
415 ub->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
416 ub->st_blksize = cpipe->pipe_buffer.size;
417 ub->st_size = cpipe->pipe_buffer.cnt;
418 ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize;
419 ub->st_nlink = 1;
420
421 ub->st_uid = kauth_getuid();
422 ub->st_gid = kauth_getgid();
423
424 microtime(&now);
425 ub->st_atimespec.tv_sec = now.tv_sec;
426 ub->st_atimespec.tv_nsec = now.tv_usec * 1000;
427
428 ub->st_mtimespec.tv_sec = now.tv_sec;
429 ub->st_mtimespec.tv_nsec = now.tv_usec * 1000;
430
431 ub->st_ctimespec.tv_sec = now.tv_sec;
432 ub->st_ctimespec.tv_nsec = now.tv_usec * 1000;
433
434 /*
435 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen, st_uid, st_gid.
436 * XXX (st_dev, st_ino) should be unique.
437 */
438 return (0);
439 }
440
441
442 /*
443 * Allocate kva for pipe circular buffer, the space is pageable
444 * This routine will 'realloc' the size of a pipe safely, if it fails
445 * it will retain the old buffer.
446 * If it fails it will return ENOMEM.
447 */
448 static int
449 pipespace(struct pipe *cpipe, int size)
450 {
451 vm_offset_t buffer;
452
453 size = round_page(size);
454
455 if (kmem_alloc(kernel_map, &buffer, size) != KERN_SUCCESS)
456 return(ENOMEM);
457
458 /* free old resources if we're resizing */
459 pipe_free_kmem(cpipe);
460 cpipe->pipe_buffer.buffer = (caddr_t)buffer;
461 cpipe->pipe_buffer.size = size;
462 cpipe->pipe_buffer.in = 0;
463 cpipe->pipe_buffer.out = 0;
464 cpipe->pipe_buffer.cnt = 0;
465
466 OSAddAtomic(1, (SInt32 *)&amountpipes);
467 OSAddAtomic(cpipe->pipe_buffer.size, (SInt32 *)&amountpipekva);
468
469 return (0);
470 }
471
472 /*
473 * initialize and allocate VM and memory for pipe
474 */
475 static int
476 pipe_create(struct pipe **cpipep)
477 {
478 struct pipe *cpipe;
479
480 cpipe = (struct pipe *)zalloc(pipe_zone);
481
482 if ((*cpipep = cpipe) == NULL)
483 return (ENOMEM);
484
485 /*
486 * protect so pipespace or pipeclose don't follow a junk pointer
487 * if pipespace() fails.
488 */
489 bzero(cpipe, sizeof *cpipe);
490
491 return (0);
492 }
493
494
495 /*
496 * lock a pipe for I/O, blocking other access
497 */
498 static __inline int
499 pipelock(cpipe, catch)
500 struct pipe *cpipe;
501 int catch;
502 {
503 int error;
504
505 while (cpipe->pipe_state & PIPE_LOCKFL) {
506 cpipe->pipe_state |= PIPE_LWANT;
507
508 error = msleep(cpipe, PIPE_MTX(cpipe), catch ? (PRIBIO | PCATCH) : PRIBIO,
509 "pipelk", 0);
510 if (error != 0)
511 return (error);
512 }
513 cpipe->pipe_state |= PIPE_LOCKFL;
514
515 return (0);
516 }
517
518 /*
519 * unlock a pipe I/O lock
520 */
521 static __inline void
522 pipeunlock(cpipe)
523 struct pipe *cpipe;
524 {
525
526 cpipe->pipe_state &= ~PIPE_LOCKFL;
527
528 if (cpipe->pipe_state & PIPE_LWANT) {
529 cpipe->pipe_state &= ~PIPE_LWANT;
530 wakeup(cpipe);
531 }
532 }
533
534 static void
535 pipeselwakeup(cpipe, spipe)
536 struct pipe *cpipe;
537 struct pipe *spipe;
538 {
539
540 if (cpipe->pipe_state & PIPE_SEL) {
541 cpipe->pipe_state &= ~PIPE_SEL;
542 selwakeup(&cpipe->pipe_sel);
543 }
544 if (cpipe->pipe_state & PIPE_KNOTE)
545 KNOTE(&cpipe->pipe_sel.si_note, 1);
546
547 postpipeevent(cpipe, EV_RWBYTES);
548
549 if (spipe && (spipe->pipe_state & PIPE_ASYNC) && spipe->pipe_pgid) {
550 struct proc *p;
551
552 if (spipe->pipe_pgid < 0)
553 gsignal(-spipe->pipe_pgid, SIGIO);
554 else if ((p = pfind(spipe->pipe_pgid)) != (struct proc *)0)
555 psignal(p, SIGIO);
556 }
557 }
558
559 /* ARGSUSED */
560 static int
561 pipe_read(struct fileproc *fp, struct uio *uio, __unused kauth_cred_t active_cred, __unused int flags, __unused struct proc *p)
562 {
563 struct pipe *rpipe = (struct pipe *)fp->f_data;
564 int error;
565 int nread = 0;
566 u_int size;
567
568 PIPE_LOCK(rpipe);
569 ++rpipe->pipe_busy;
570
571 error = pipelock(rpipe, 1);
572 if (error)
573 goto unlocked_error;
574
575 #ifdef MAC
576 error = mac_check_pipe_read(active_cred, rpipe);
577 if (error)
578 goto locked_error;
579 #endif
580
581 while (uio_resid(uio)) {
582 /*
583 * normal pipe buffer receive
584 */
585 if (rpipe->pipe_buffer.cnt > 0) {
586 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
587 if (size > rpipe->pipe_buffer.cnt)
588 size = rpipe->pipe_buffer.cnt;
589 // LP64todo - fix this!
590 if (size > (u_int) uio_resid(uio))
591 size = (u_int) uio_resid(uio);
592
593 PIPE_UNLOCK(rpipe);
594 error = uiomove(
595 &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
596 size, uio);
597 PIPE_LOCK(rpipe);
598 if (error)
599 break;
600
601 rpipe->pipe_buffer.out += size;
602 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size)
603 rpipe->pipe_buffer.out = 0;
604
605 rpipe->pipe_buffer.cnt -= size;
606
607 /*
608 * If there is no more to read in the pipe, reset
609 * its pointers to the beginning. This improves
610 * cache hit stats.
611 */
612 if (rpipe->pipe_buffer.cnt == 0) {
613 rpipe->pipe_buffer.in = 0;
614 rpipe->pipe_buffer.out = 0;
615 }
616 nread += size;
617 #ifndef PIPE_NODIRECT
618 /*
619 * Direct copy, bypassing a kernel buffer.
620 */
621 } else if ((size = rpipe->pipe_map.cnt) &&
622 (rpipe->pipe_state & PIPE_DIRECTW)) {
623 caddr_t va;
624 // LP64todo - fix this!
625 if (size > (u_int) uio_resid(uio))
626 size = (u_int) uio_resid(uio);
627
628 va = (caddr_t) rpipe->pipe_map.kva +
629 rpipe->pipe_map.pos;
630 PIPE_UNLOCK(rpipe);
631 error = uiomove(va, size, uio);
632 PIPE_LOCK(rpipe);
633 if (error)
634 break;
635 nread += size;
636 rpipe->pipe_map.pos += size;
637 rpipe->pipe_map.cnt -= size;
638 if (rpipe->pipe_map.cnt == 0) {
639 rpipe->pipe_state &= ~PIPE_DIRECTW;
640 wakeup(rpipe);
641 }
642 #endif
643 } else {
644 /*
645 * detect EOF condition
646 * read returns 0 on EOF, no need to set error
647 */
648 if (rpipe->pipe_state & PIPE_EOF)
649 break;
650
651 /*
652 * If the "write-side" has been blocked, wake it up now.
653 */
654 if (rpipe->pipe_state & PIPE_WANTW) {
655 rpipe->pipe_state &= ~PIPE_WANTW;
656 wakeup(rpipe);
657 }
658
659 /*
660 * Break if some data was read.
661 */
662 if (nread > 0)
663 break;
664
665 /*
666 * Unlock the pipe buffer for our remaining processing.
667 * We will either break out with an error or we will
668 * sleep and relock to loop.
669 */
670 pipeunlock(rpipe);
671
672 /*
673 * Handle non-blocking mode operation or
674 * wait for more data.
675 */
676 if (fp->f_flag & FNONBLOCK) {
677 error = EAGAIN;
678 } else {
679 rpipe->pipe_state |= PIPE_WANTR;
680
681 error = msleep(rpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, "piperd", 0);
682
683 if (error == 0)
684 error = pipelock(rpipe, 1);
685 }
686 if (error)
687 goto unlocked_error;
688 }
689 }
690 #ifdef MAC
691 locked_error:
692 #endif
693 pipeunlock(rpipe);
694
695 unlocked_error:
696 --rpipe->pipe_busy;
697
698 /*
699 * PIPE_WANT processing only makes sense if pipe_busy is 0.
700 */
701 if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) {
702 rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW);
703 wakeup(rpipe);
704 } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) {
705 /*
706 * Handle write blocking hysteresis.
707 */
708 if (rpipe->pipe_state & PIPE_WANTW) {
709 rpipe->pipe_state &= ~PIPE_WANTW;
710 wakeup(rpipe);
711 }
712 }
713
714 if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF)
715 pipeselwakeup(rpipe, rpipe->pipe_peer);
716
717 PIPE_UNLOCK(rpipe);
718
719 return (error);
720 }
721
722
723
724 #ifndef PIPE_NODIRECT
725 /*
726 * Map the sending processes' buffer into kernel space and wire it.
727 * This is similar to a physical write operation.
728 */
729 static int
730 pipe_build_write_buffer(wpipe, uio)
731 struct pipe *wpipe;
732 struct uio *uio;
733 {
734 pmap_t pmap;
735 u_int size;
736 int i, j;
737 vm_offset_t addr, endaddr;
738
739
740 size = (u_int) uio->uio_iov->iov_len;
741 if (size > wpipe->pipe_buffer.size)
742 size = wpipe->pipe_buffer.size;
743
744 pmap = vmspace_pmap(curproc->p_vmspace);
745 endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size);
746 addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base);
747 for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) {
748 /*
749 * vm_fault_quick() can sleep. Consequently,
750 * vm_page_lock_queue() and vm_page_unlock_queue()
751 * should not be performed outside of this loop.
752 */
753 race:
754 if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0) {
755 vm_page_lock_queues();
756 for (j = 0; j < i; j++)
757 vm_page_unhold(wpipe->pipe_map.ms[j]);
758 vm_page_unlock_queues();
759 return (EFAULT);
760 }
761 wpipe->pipe_map.ms[i] = pmap_extract_and_hold(pmap, addr,
762 VM_PROT_READ);
763 if (wpipe->pipe_map.ms[i] == NULL)
764 goto race;
765 }
766
767 /*
768 * set up the control block
769 */
770 wpipe->pipe_map.npages = i;
771 wpipe->pipe_map.pos =
772 ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK;
773 wpipe->pipe_map.cnt = size;
774
775 /*
776 * and map the buffer
777 */
778 if (wpipe->pipe_map.kva == 0) {
779 /*
780 * We need to allocate space for an extra page because the
781 * address range might (will) span pages at times.
782 */
783 wpipe->pipe_map.kva = kmem_alloc_nofault(kernel_map,
784 wpipe->pipe_buffer.size + PAGE_SIZE);
785 atomic_add_int(&amountpipekvawired,
786 wpipe->pipe_buffer.size + PAGE_SIZE);
787 }
788 pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms,
789 wpipe->pipe_map.npages);
790
791 /*
792 * and update the uio data
793 */
794
795 uio->uio_iov->iov_len -= size;
796 uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size;
797 if (uio->uio_iov->iov_len == 0)
798 uio->uio_iov++;
799 uio_setresid(uio, (uio_resid(uio) - size));
800 uio->uio_offset += size;
801 return (0);
802 }
803
804 /*
805 * unmap and unwire the process buffer
806 */
807 static void
808 pipe_destroy_write_buffer(wpipe)
809 struct pipe *wpipe;
810 {
811 int i;
812
813 if (wpipe->pipe_map.kva) {
814 pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages);
815
816 if (amountpipekvawired > maxpipekvawired / 2) {
817 /* Conserve address space */
818 vm_offset_t kva = wpipe->pipe_map.kva;
819 wpipe->pipe_map.kva = 0;
820 kmem_free(kernel_map, kva,
821 wpipe->pipe_buffer.size + PAGE_SIZE);
822 atomic_subtract_int(&amountpipekvawired,
823 wpipe->pipe_buffer.size + PAGE_SIZE);
824 }
825 }
826 vm_page_lock_queues();
827 for (i = 0; i < wpipe->pipe_map.npages; i++) {
828 vm_page_unhold(wpipe->pipe_map.ms[i]);
829 }
830 vm_page_unlock_queues();
831 wpipe->pipe_map.npages = 0;
832 }
833
834 /*
835 * In the case of a signal, the writing process might go away. This
836 * code copies the data into the circular buffer so that the source
837 * pages can be freed without loss of data.
838 */
839 static void
840 pipe_clone_write_buffer(wpipe)
841 struct pipe *wpipe;
842 {
843 int size;
844 int pos;
845
846 size = wpipe->pipe_map.cnt;
847 pos = wpipe->pipe_map.pos;
848
849 wpipe->pipe_buffer.in = size;
850 wpipe->pipe_buffer.out = 0;
851 wpipe->pipe_buffer.cnt = size;
852 wpipe->pipe_state &= ~PIPE_DIRECTW;
853
854 PIPE_UNLOCK(wpipe);
855 bcopy((caddr_t) wpipe->pipe_map.kva + pos,
856 wpipe->pipe_buffer.buffer, size);
857 pipe_destroy_write_buffer(wpipe);
858 PIPE_LOCK(wpipe);
859 }
860
861 /*
862 * This implements the pipe buffer write mechanism. Note that only
863 * a direct write OR a normal pipe write can be pending at any given time.
864 * If there are any characters in the pipe buffer, the direct write will
865 * be deferred until the receiving process grabs all of the bytes from
866 * the pipe buffer. Then the direct mapping write is set-up.
867 */
868 static int
869 pipe_direct_write(wpipe, uio)
870 struct pipe *wpipe;
871 struct uio *uio;
872 {
873 int error;
874
875 retry:
876 while (wpipe->pipe_state & PIPE_DIRECTW) {
877 if (wpipe->pipe_state & PIPE_WANTR) {
878 wpipe->pipe_state &= ~PIPE_WANTR;
879 wakeup(wpipe);
880 }
881 wpipe->pipe_state |= PIPE_WANTW;
882 error = msleep(wpipe, PIPE_MTX(wpipe),
883 PRIBIO | PCATCH, "pipdww", 0);
884 if (error)
885 goto error1;
886 if (wpipe->pipe_state & PIPE_EOF) {
887 error = EPIPE;
888 goto error1;
889 }
890 }
891 wpipe->pipe_map.cnt = 0; /* transfer not ready yet */
892 if (wpipe->pipe_buffer.cnt > 0) {
893 if (wpipe->pipe_state & PIPE_WANTR) {
894 wpipe->pipe_state &= ~PIPE_WANTR;
895 wakeup(wpipe);
896 }
897
898 wpipe->pipe_state |= PIPE_WANTW;
899 error = msleep(wpipe, PIPE_MTX(wpipe),
900 PRIBIO | PCATCH, "pipdwc", 0);
901 if (error)
902 goto error1;
903 if (wpipe->pipe_state & PIPE_EOF) {
904 error = EPIPE;
905 goto error1;
906 }
907 goto retry;
908 }
909
910 wpipe->pipe_state |= PIPE_DIRECTW;
911
912 pipelock(wpipe, 0);
913 PIPE_UNLOCK(wpipe);
914 error = pipe_build_write_buffer(wpipe, uio);
915 PIPE_LOCK(wpipe);
916 pipeunlock(wpipe);
917 if (error) {
918 wpipe->pipe_state &= ~PIPE_DIRECTW;
919 goto error1;
920 }
921
922 error = 0;
923 while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) {
924 if (wpipe->pipe_state & PIPE_EOF) {
925 pipelock(wpipe, 0);
926 PIPE_UNLOCK(wpipe);
927 pipe_destroy_write_buffer(wpipe);
928 PIPE_LOCK(wpipe);
929 pipeselwakeup(wpipe, wpipe);
930 pipeunlock(wpipe);
931 error = EPIPE;
932 goto error1;
933 }
934 if (wpipe->pipe_state & PIPE_WANTR) {
935 wpipe->pipe_state &= ~PIPE_WANTR;
936 wakeup(wpipe);
937 }
938 pipeselwakeup(wpipe, wpipe);
939 error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH,
940 "pipdwt", 0);
941 }
942
943 pipelock(wpipe,0);
944 if (wpipe->pipe_state & PIPE_DIRECTW) {
945 /*
946 * this bit of trickery substitutes a kernel buffer for
947 * the process that might be going away.
948 */
949 pipe_clone_write_buffer(wpipe);
950 } else {
951 PIPE_UNLOCK(wpipe);
952 pipe_destroy_write_buffer(wpipe);
953 PIPE_LOCK(wpipe);
954 }
955 pipeunlock(wpipe);
956 return (error);
957
958 error1:
959 wakeup(wpipe);
960 return (error);
961 }
962 #endif
963
964
965
966 static int
967 pipe_write(struct fileproc *fp, struct uio *uio, __unused kauth_cred_t active_cred, __unused int flags, __unused struct proc *p)
968 {
969 int error = 0;
970 int orig_resid;
971 int pipe_size;
972 struct pipe *wpipe, *rpipe;
973
974 rpipe = (struct pipe *)fp->f_data;
975
976 PIPE_LOCK(rpipe);
977 wpipe = rpipe->pipe_peer;
978
979 /*
980 * detect loss of pipe read side, issue SIGPIPE if lost.
981 */
982 if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF)) {
983 PIPE_UNLOCK(rpipe);
984 return (EPIPE);
985 }
986 #ifdef MAC
987 error = mac_check_pipe_write(active_cred, wpipe);
988 if (error) {
989 PIPE_UNLOCK(rpipe);
990 return (error);
991 }
992 #endif
993 ++wpipe->pipe_busy;
994
995 pipe_size = 0;
996
997 if (wpipe->pipe_buffer.buffer == 0) {
998 /*
999 * need to allocate some storage... we delay the allocation
1000 * until the first write on fd[0] to avoid allocating storage for both
1001 * 'pipe ends'... most pipes are half-duplex with the writes targeting
1002 * fd[1], so allocating space for both ends is a waste...
1003 *
1004 * Reduce to 1/4th pipe size if we're over our global max.
1005 */
1006 if (amountpipekva > maxpipekva / 2)
1007 pipe_size = SMALL_PIPE_SIZE;
1008 else
1009 pipe_size = PIPE_SIZE;
1010 }
1011
1012 /*
1013 * If it is advantageous to resize the pipe buffer, do
1014 * so.
1015 */
1016 if ((uio_resid(uio) > PIPE_SIZE) &&
1017 (wpipe->pipe_buffer.size <= PIPE_SIZE) &&
1018 (amountpipekva < maxpipekva / 2) &&
1019 (nbigpipe < LIMITBIGPIPES) &&
1020 #ifndef PIPE_NODIRECT
1021 (wpipe->pipe_state & PIPE_DIRECTW) == 0 &&
1022 #endif
1023 (wpipe->pipe_buffer.cnt == 0)) {
1024
1025 pipe_size = BIG_PIPE_SIZE;
1026
1027 }
1028 if (pipe_size) {
1029 /*
1030 * need to do initial allocation or resizing of pipe
1031 */
1032 if ((error = pipelock(wpipe, 1)) == 0) {
1033 PIPE_UNLOCK(wpipe);
1034 if (pipespace(wpipe, pipe_size) == 0)
1035 OSAddAtomic(1, (SInt32 *)&nbigpipe);
1036 PIPE_LOCK(wpipe);
1037 pipeunlock(wpipe);
1038
1039 if (wpipe->pipe_buffer.buffer == 0) {
1040 /*
1041 * initial allocation failed
1042 */
1043 error = ENOMEM;
1044 }
1045 }
1046 if (error) {
1047 /*
1048 * If an error occurred unbusy and return, waking up any pending
1049 * readers.
1050 */
1051 --wpipe->pipe_busy;
1052 if ((wpipe->pipe_busy == 0) &&
1053 (wpipe->pipe_state & PIPE_WANT)) {
1054 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
1055 wakeup(wpipe);
1056 }
1057 PIPE_UNLOCK(rpipe);
1058 return(error);
1059 }
1060 }
1061 // LP64todo - fix this!
1062 orig_resid = uio_resid(uio);
1063
1064 while (uio_resid(uio)) {
1065 int space;
1066
1067 #ifndef PIPE_NODIRECT
1068 /*
1069 * If the transfer is large, we can gain performance if
1070 * we do process-to-process copies directly.
1071 * If the write is non-blocking, we don't use the
1072 * direct write mechanism.
1073 *
1074 * The direct write mechanism will detect the reader going
1075 * away on us.
1076 */
1077 if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) &&
1078 (fp->f_flag & FNONBLOCK) == 0 &&
1079 amountpipekvawired + uio->uio_resid < maxpipekvawired) {
1080 error = pipe_direct_write(wpipe, uio);
1081 if (error)
1082 break;
1083 continue;
1084 }
1085
1086 /*
1087 * Pipe buffered writes cannot be coincidental with
1088 * direct writes. We wait until the currently executing
1089 * direct write is completed before we start filling the
1090 * pipe buffer. We break out if a signal occurs or the
1091 * reader goes away.
1092 */
1093 retrywrite:
1094 while (wpipe->pipe_state & PIPE_DIRECTW) {
1095 if (wpipe->pipe_state & PIPE_WANTR) {
1096 wpipe->pipe_state &= ~PIPE_WANTR;
1097 wakeup(wpipe);
1098 }
1099 error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, "pipbww", 0);
1100
1101 if (wpipe->pipe_state & PIPE_EOF)
1102 break;
1103 if (error)
1104 break;
1105 }
1106 #else
1107 retrywrite:
1108 #endif
1109 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
1110
1111 /*
1112 * Writes of size <= PIPE_BUF must be atomic.
1113 */
1114 if ((space < uio_resid(uio)) && (orig_resid <= PIPE_BUF))
1115 space = 0;
1116
1117 if (space > 0) {
1118
1119 if ((error = pipelock(wpipe,1)) == 0) {
1120 int size; /* Transfer size */
1121 int segsize; /* first segment to transfer */
1122
1123 if (wpipe->pipe_state & PIPE_EOF) {
1124 pipeunlock(wpipe);
1125 error = EPIPE;
1126 break;
1127 }
1128 #ifndef PIPE_NODIRECT
1129 /*
1130 * It is possible for a direct write to
1131 * slip in on us... handle it here...
1132 */
1133 if (wpipe->pipe_state & PIPE_DIRECTW) {
1134 pipeunlock(wpipe);
1135 goto retrywrite;
1136 }
1137 #endif
1138 /*
1139 * If a process blocked in pipelock, our
1140 * value for space might be bad... the mutex
1141 * is dropped while we're blocked
1142 */
1143 if (space > (int)(wpipe->pipe_buffer.size -
1144 wpipe->pipe_buffer.cnt)) {
1145 pipeunlock(wpipe);
1146 goto retrywrite;
1147 }
1148
1149 /*
1150 * Transfer size is minimum of uio transfer
1151 * and free space in pipe buffer.
1152 */
1153 // LP64todo - fix this!
1154 if (space > uio_resid(uio))
1155 size = uio_resid(uio);
1156 else
1157 size = space;
1158 /*
1159 * First segment to transfer is minimum of
1160 * transfer size and contiguous space in
1161 * pipe buffer. If first segment to transfer
1162 * is less than the transfer size, we've got
1163 * a wraparound in the buffer.
1164 */
1165 segsize = wpipe->pipe_buffer.size -
1166 wpipe->pipe_buffer.in;
1167 if (segsize > size)
1168 segsize = size;
1169
1170 /* Transfer first segment */
1171
1172 PIPE_UNLOCK(rpipe);
1173 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
1174 segsize, uio);
1175 PIPE_LOCK(rpipe);
1176
1177 if (error == 0 && segsize < size) {
1178 /*
1179 * Transfer remaining part now, to
1180 * support atomic writes. Wraparound
1181 * happened.
1182 */
1183 if (wpipe->pipe_buffer.in + segsize !=
1184 wpipe->pipe_buffer.size)
1185 panic("Expected pipe buffer "
1186 "wraparound disappeared");
1187
1188 PIPE_UNLOCK(rpipe);
1189 error = uiomove(
1190 &wpipe->pipe_buffer.buffer[0],
1191 size - segsize, uio);
1192 PIPE_LOCK(rpipe);
1193 }
1194 if (error == 0) {
1195 wpipe->pipe_buffer.in += size;
1196 if (wpipe->pipe_buffer.in >=
1197 wpipe->pipe_buffer.size) {
1198 if (wpipe->pipe_buffer.in !=
1199 size - segsize +
1200 wpipe->pipe_buffer.size)
1201 panic("Expected "
1202 "wraparound bad");
1203 wpipe->pipe_buffer.in = size -
1204 segsize;
1205 }
1206
1207 wpipe->pipe_buffer.cnt += size;
1208 if (wpipe->pipe_buffer.cnt >
1209 wpipe->pipe_buffer.size)
1210 panic("Pipe buffer overflow");
1211
1212 }
1213 pipeunlock(wpipe);
1214 }
1215 if (error)
1216 break;
1217
1218 } else {
1219 /*
1220 * If the "read-side" has been blocked, wake it up now.
1221 */
1222 if (wpipe->pipe_state & PIPE_WANTR) {
1223 wpipe->pipe_state &= ~PIPE_WANTR;
1224 wakeup(wpipe);
1225 }
1226 /*
1227 * don't block on non-blocking I/O
1228 * we'll do the pipeselwakeup on the way out
1229 */
1230 if (fp->f_flag & FNONBLOCK) {
1231 error = EAGAIN;
1232 break;
1233 }
1234 /*
1235 * We have no more space and have something to offer,
1236 * wake up select/poll.
1237 */
1238 pipeselwakeup(wpipe, wpipe);
1239
1240 wpipe->pipe_state |= PIPE_WANTW;
1241
1242 error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, "pipewr", 0);
1243
1244 if (error != 0)
1245 break;
1246 /*
1247 * If read side wants to go away, we just issue a signal
1248 * to ourselves.
1249 */
1250 if (wpipe->pipe_state & PIPE_EOF) {
1251 error = EPIPE;
1252 break;
1253 }
1254 }
1255 }
1256 --wpipe->pipe_busy;
1257
1258 if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) {
1259 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
1260 wakeup(wpipe);
1261 }
1262 if (wpipe->pipe_buffer.cnt > 0) {
1263 /*
1264 * If there are any characters in the buffer, we wake up
1265 * the reader if it was blocked waiting for data.
1266 */
1267 if (wpipe->pipe_state & PIPE_WANTR) {
1268 wpipe->pipe_state &= ~PIPE_WANTR;
1269 wakeup(wpipe);
1270 }
1271 /*
1272 * wake up thread blocked in select/poll or post the notification
1273 */
1274 pipeselwakeup(wpipe, wpipe);
1275 }
1276 PIPE_UNLOCK(rpipe);
1277
1278 return (error);
1279 }
1280
1281 /*
1282 * we implement a very minimal set of ioctls for compatibility with sockets.
1283 */
1284 /* ARGSUSED 3 */
1285 static int
1286 pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, __unused struct proc *p)
1287 {
1288 struct pipe *mpipe = (struct pipe *)fp->f_data;
1289 #ifdef MAC
1290 int error;
1291 #endif
1292
1293 PIPE_LOCK(mpipe);
1294
1295 #ifdef MAC
1296 error = mac_check_pipe_ioctl(active_cred, mpipe, cmd, data);
1297 if (error) {
1298 PIPE_UNLOCK(mpipe);
1299
1300 return (error);
1301 }
1302 #endif
1303
1304 switch (cmd) {
1305
1306 case FIONBIO:
1307 PIPE_UNLOCK(mpipe);
1308 return (0);
1309
1310 case FIOASYNC:
1311 if (*(int *)data) {
1312 mpipe->pipe_state |= PIPE_ASYNC;
1313 } else {
1314 mpipe->pipe_state &= ~PIPE_ASYNC;
1315 }
1316 PIPE_UNLOCK(mpipe);
1317 return (0);
1318
1319 case FIONREAD:
1320 #ifndef PIPE_NODIRECT
1321 if (mpipe->pipe_state & PIPE_DIRECTW)
1322 *(int *)data = mpipe->pipe_map.cnt;
1323 else
1324 #endif
1325 *(int *)data = mpipe->pipe_buffer.cnt;
1326 PIPE_UNLOCK(mpipe);
1327 return (0);
1328
1329 case TIOCSPGRP:
1330 mpipe->pipe_pgid = *(int *)data;
1331
1332 PIPE_UNLOCK(mpipe);
1333 return (0);
1334
1335 case TIOCGPGRP:
1336 *(int *)data = mpipe->pipe_pgid;
1337
1338 PIPE_UNLOCK(mpipe);
1339 return (0);
1340
1341 }
1342 PIPE_UNLOCK(mpipe);
1343 return (ENOTTY);
1344 }
1345
1346
1347 static int
1348 pipe_select(struct fileproc *fp, int which, void *wql, struct proc *p)
1349 {
1350 struct pipe *rpipe = (struct pipe *)fp->f_data;
1351 struct pipe *wpipe;
1352 int retnum = 0;
1353
1354 if (rpipe == NULL || rpipe == (struct pipe *)-1)
1355 return (retnum);
1356
1357 PIPE_LOCK(rpipe);
1358
1359 wpipe = rpipe->pipe_peer;
1360
1361 switch (which) {
1362
1363 case FREAD:
1364 if ((rpipe->pipe_state & PIPE_DIRECTW) ||
1365 (rpipe->pipe_buffer.cnt > 0) ||
1366 (rpipe->pipe_state & PIPE_EOF)) {
1367
1368 retnum = 1;
1369 } else {
1370 rpipe->pipe_state |= PIPE_SEL;
1371 selrecord(p, &rpipe->pipe_sel, wql);
1372 }
1373 break;
1374
1375 case FWRITE:
1376 if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) ||
1377 (((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
1378 (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) {
1379
1380 retnum = 1;
1381 } else {
1382 wpipe->pipe_state |= PIPE_SEL;
1383 selrecord(p, &wpipe->pipe_sel, wql);
1384 }
1385 break;
1386 case 0:
1387 rpipe->pipe_state |= PIPE_SEL;
1388 selrecord(p, &rpipe->pipe_sel, wql);
1389 break;
1390 }
1391 PIPE_UNLOCK(rpipe);
1392
1393 return (retnum);
1394 }
1395
1396
1397 /* ARGSUSED 1 */
1398 static int
1399 pipe_close(struct fileglob *fg, __unused struct proc *p)
1400 {
1401 struct pipe *cpipe;
1402
1403 proc_fdlock(p);
1404 cpipe = (struct pipe *)fg->fg_data;
1405 fg->fg_data = NULL;
1406 proc_fdunlock(p);
1407
1408 if (cpipe)
1409 pipeclose(cpipe);
1410
1411 return (0);
1412 }
1413
1414 static void
1415 pipe_free_kmem(struct pipe *cpipe)
1416 {
1417
1418 if (cpipe->pipe_buffer.buffer != NULL) {
1419 if (cpipe->pipe_buffer.size > PIPE_SIZE)
1420 OSAddAtomic(-1, (SInt32 *)&nbigpipe);
1421 OSAddAtomic(cpipe->pipe_buffer.size, (SInt32 *)&amountpipekva);
1422 OSAddAtomic(-1, (SInt32 *)&amountpipes);
1423
1424 kmem_free(kernel_map, (vm_offset_t)cpipe->pipe_buffer.buffer,
1425 cpipe->pipe_buffer.size);
1426 cpipe->pipe_buffer.buffer = NULL;
1427 }
1428 #ifndef PIPE_NODIRECT
1429 if (cpipe->pipe_map.kva != 0) {
1430 atomic_subtract_int(&amountpipekvawired,
1431 cpipe->pipe_buffer.size + PAGE_SIZE);
1432 kmem_free(kernel_map,
1433 cpipe->pipe_map.kva,
1434 cpipe->pipe_buffer.size + PAGE_SIZE);
1435 cpipe->pipe_map.cnt = 0;
1436 cpipe->pipe_map.kva = 0;
1437 cpipe->pipe_map.pos = 0;
1438 cpipe->pipe_map.npages = 0;
1439 }
1440 #endif
1441 }
1442
1443 /*
1444 * shutdown the pipe
1445 */
1446 static void
1447 pipeclose(struct pipe *cpipe)
1448 {
1449 struct pipe *ppipe;
1450
1451 if (cpipe == NULL)
1452 return;
1453
1454 /* partially created pipes won't have a valid mutex. */
1455 if (PIPE_MTX(cpipe) != NULL)
1456 PIPE_LOCK(cpipe);
1457
1458 pipeselwakeup(cpipe, cpipe);
1459
1460 /*
1461 * If the other side is blocked, wake it up saying that
1462 * we want to close it down.
1463 */
1464 while (cpipe->pipe_busy) {
1465 cpipe->pipe_state |= PIPE_WANT | PIPE_EOF;
1466
1467 wakeup(cpipe);
1468
1469 msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0);
1470 }
1471
1472 #ifdef MAC
1473 if (cpipe->pipe_label != NULL && cpipe->pipe_peer == NULL)
1474 mac_destroy_pipe(cpipe);
1475 #endif
1476
1477 /*
1478 * Disconnect from peer
1479 */
1480 if ((ppipe = cpipe->pipe_peer) != NULL) {
1481
1482 ppipe->pipe_state |= PIPE_EOF;
1483
1484 pipeselwakeup(ppipe, ppipe);
1485 wakeup(ppipe);
1486
1487 if (cpipe->pipe_state & PIPE_KNOTE)
1488 KNOTE(&ppipe->pipe_sel.si_note, 1);
1489
1490 postpipeevent(ppipe, EV_RCLOSED);
1491
1492 ppipe->pipe_peer = NULL;
1493 }
1494 evpipefree(cpipe);
1495
1496 /*
1497 * free resources
1498 */
1499 if (PIPE_MTX(cpipe) != NULL) {
1500 if (ppipe != NULL) {
1501 /*
1502 * since the mutex is shared and the peer is still
1503 * alive, we need to release the mutex, not free it
1504 */
1505 PIPE_UNLOCK(cpipe);
1506 } else {
1507 /*
1508 * peer is gone, so we're the sole party left with
1509 * interest in this mutex... we can just free it
1510 */
1511 lck_mtx_free(PIPE_MTX(cpipe), pipe_mtx_grp);
1512 }
1513 }
1514 pipe_free_kmem(cpipe);
1515
1516 zfree(pipe_zone, cpipe);
1517 }
1518
1519
1520 /*ARGSUSED*/
1521 static int
1522 pipe_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct proc *p)
1523 {
1524 struct pipe *cpipe;
1525
1526 cpipe = (struct pipe *)kn->kn_fp->f_data;
1527
1528 PIPE_LOCK(cpipe);
1529
1530 switch (kn->kn_filter) {
1531 case EVFILT_READ:
1532 kn->kn_fop = &pipe_rfiltops;
1533 break;
1534 case EVFILT_WRITE:
1535 kn->kn_fop = &pipe_wfiltops;
1536
1537 if (cpipe->pipe_peer == NULL) {
1538 /*
1539 * other end of pipe has been closed
1540 */
1541 PIPE_UNLOCK(cpipe);
1542 return (EPIPE);
1543 }
1544 cpipe = cpipe->pipe_peer;
1545 break;
1546 default:
1547 PIPE_UNLOCK(cpipe);
1548 return (1);
1549 }
1550
1551 if (KNOTE_ATTACH(&cpipe->pipe_sel.si_note, kn))
1552 cpipe->pipe_state |= PIPE_KNOTE;
1553
1554 PIPE_UNLOCK(cpipe);
1555 return (0);
1556 }
1557
1558 static void
1559 filt_pipedetach(struct knote *kn)
1560 {
1561 struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data;
1562
1563 PIPE_LOCK(cpipe);
1564
1565 if (kn->kn_filter == EVFILT_WRITE) {
1566 if (cpipe->pipe_peer == NULL) {
1567 PIPE_UNLOCK(cpipe);
1568 return;
1569 }
1570 cpipe = cpipe->pipe_peer;
1571 }
1572 if (cpipe->pipe_state & PIPE_KNOTE) {
1573 if (KNOTE_DETACH(&cpipe->pipe_sel.si_note, kn))
1574 cpipe->pipe_state &= ~PIPE_KNOTE;
1575 }
1576 PIPE_UNLOCK(cpipe);
1577 }
1578
1579 /*ARGSUSED*/
1580 static int
1581 filt_piperead(struct knote *kn, long hint)
1582 {
1583 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
1584 struct pipe *wpipe;
1585 int retval;
1586
1587 /*
1588 * if hint == 0, then we've been called from the kevent
1589 * world directly and do not currently hold the pipe mutex...
1590 * if hint == 1, we're being called back via the KNOTE post
1591 * we made in pipeselwakeup, and we already hold the mutex...
1592 */
1593 if (hint == 0)
1594 PIPE_LOCK(rpipe);
1595
1596 wpipe = rpipe->pipe_peer;
1597 kn->kn_data = rpipe->pipe_buffer.cnt;
1598
1599 #ifndef PIPE_NODIRECT
1600 if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW))
1601 kn->kn_data = rpipe->pipe_map.cnt;
1602 #endif
1603 if ((rpipe->pipe_state & PIPE_EOF) ||
1604 (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
1605 kn->kn_flags |= EV_EOF;
1606 retval = 1;
1607 } else
1608 retval = (kn->kn_sfflags & NOTE_LOWAT) ?
1609 (kn->kn_data >= kn->kn_sdata) : (kn->kn_data > 0);
1610
1611 if (hint == 0)
1612 PIPE_UNLOCK(rpipe);
1613
1614 return (retval);
1615 }
1616
1617 /*ARGSUSED*/
1618 static int
1619 filt_pipewrite(struct knote *kn, long hint)
1620 {
1621 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
1622 struct pipe *wpipe;
1623
1624 /*
1625 * if hint == 0, then we've been called from the kevent
1626 * world directly and do not currently hold the pipe mutex...
1627 * if hint == 1, we're being called back via the KNOTE post
1628 * we made in pipeselwakeup, and we already hold the mutex...
1629 */
1630 if (hint == 0)
1631 PIPE_LOCK(rpipe);
1632
1633 wpipe = rpipe->pipe_peer;
1634
1635 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
1636 kn->kn_data = 0;
1637 kn->kn_flags |= EV_EOF;
1638
1639 if (hint == 0)
1640 PIPE_UNLOCK(rpipe);
1641 return (1);
1642 }
1643 kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
1644
1645 #ifndef PIPE_NODIRECT
1646 if (wpipe->pipe_state & PIPE_DIRECTW)
1647 kn->kn_data = 0;
1648 #endif
1649 if (hint == 0)
1650 PIPE_UNLOCK(rpipe);
1651
1652 return (kn->kn_data >= ((kn->kn_sfflags & NOTE_LOWAT) ?
1653 kn->kn_sdata : PIPE_BUF));
1654 }