]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/sys_pipe.c
xnu-792.21.3.tar.gz
[apple/xnu.git] / bsd / kern / sys_pipe.c
CommitLineData
91447636
A
1/*
2 * Copyright (c) 1996 John S. Dyson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice immediately at the beginning of the file, without modification,
10 * this list of conditions, and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. Absolutely no warranty of function or purpose is made by the author
15 * John S. Dyson.
16 * 4. Modifications may be freely made to this file if the above conditions
17 * are met.
18 */
19/*
20 * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved.
21 *
8f6c56a5 22 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
91447636 23 *
8f6c56a5
A
24 * This file contains Original Code and/or Modifications of Original Code
25 * as defined in and that are subject to the Apple Public Source License
26 * Version 2.0 (the 'License'). You may not use this file except in
27 * compliance with the License. The rights granted to you under the License
28 * may not be used to create, or enable the creation or redistribution of,
29 * unlawful or unlicensed copies of an Apple operating system, or to
30 * circumvent, violate, or enable the circumvention or violation of, any
31 * terms of an Apple operating system software license agreement.
32 *
33 * Please obtain a copy of the License at
34 * http://www.opensource.apple.com/apsl/ and read it before using this file.
35 *
36 * The Original Code and all software distributed under the License are
37 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
38 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
39 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
40 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
41 * Please see the License for the specific language governing rights and
8ad349bb 42 * limitations under the License.
8f6c56a5
A
43 *
44 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
91447636
A
45 */
46
47/*
48 * This file contains a high-performance replacement for the socket-based
49 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support
50 * all features of sockets, but does do everything that pipes normally
51 * do.
52 */
53
54/*
55 * This code has two modes of operation, a small write mode and a large
56 * write mode. The small write mode acts like conventional pipes with
57 * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the
58 * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT
59 * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and
60 * the receiving process can copy it directly from the pages in the sending
61 * process.
62 *
63 * If the sending process receives a signal, it is possible that it will
64 * go away, and certainly its address space can change, because control
65 * is returned back to the user-mode side. In that case, the pipe code
66 * arranges to copy the buffer supplied by the user process, to a pageable
67 * kernel buffer, and the receiving process will grab the data from the
68 * pageable kernel buffer. Since signals don't happen all that often,
69 * the copy operation is normally eliminated.
70 *
71 * The constant PIPE_MINDIRECT is chosen to make sure that buffering will
72 * happen for small transfers so that the system will not spend all of
73 * its time context switching.
74 *
75 * In order to limit the resource use of pipes, two sysctls exist:
76 *
77 * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable
78 * address space available to us in pipe_map. Whenever the amount in use
79 * exceeds half of this value, all new pipes will be created with size
80 * SMALL_PIPE_SIZE, rather than PIPE_SIZE. Big pipe creation will be limited
81 * as well. This value is loader tunable only.
82 *
83 * kern.ipc.maxpipekvawired - This value limits the amount of memory that may
84 * be wired in order to facilitate direct copies using page flipping.
85 * Whenever this value is exceeded, pipes will fall back to using regular
86 * copies. This value is sysctl controllable at all times.
87 *
88 * These values are autotuned in subr_param.c.
89 *
90 * Memory usage may be monitored through the sysctls
91 * kern.ipc.pipes, kern.ipc.pipekva and kern.ipc.pipekvawired.
92 *
93 */
94
95#include <sys/param.h>
96#include <sys/systm.h>
97#include <sys/filedesc.h>
98#include <sys/kernel.h>
99#include <sys/vnode.h>
100#include <sys/proc_internal.h>
101#include <sys/kauth.h>
102#include <sys/file_internal.h>
103#include <sys/stat.h>
104#include <sys/ioctl.h>
105#include <sys/fcntl.h>
106#include <sys/malloc.h>
107#include <sys/syslog.h>
108#include <sys/unistd.h>
109#include <sys/resourcevar.h>
110#include <sys/aio_kern.h>
111#include <sys/signalvar.h>
112#include <sys/pipe.h>
113#include <sys/sysproto.h>
114
115#include <bsm/audit_kernel.h>
116
117#include <sys/kdebug.h>
118
119#include <kern/zalloc.h>
120#include <vm/vm_kern.h>
121#include <libkern/OSAtomic.h>
122
123#define f_flag f_fglob->fg_flag
124#define f_type f_fglob->fg_type
125#define f_msgcount f_fglob->fg_msgcount
126#define f_cred f_fglob->fg_cred
127#define f_ops f_fglob->fg_ops
128#define f_offset f_fglob->fg_offset
129#define f_data f_fglob->fg_data
130/*
131 * Use this define if you want to disable *fancy* VM things. Expect an
132 * approx 30% decrease in transfer rate. This could be useful for
133 * NetBSD or OpenBSD.
134 *
135 * this needs to be ported to X and the performance measured
136 * before committing to supporting it
137 */
138#define PIPE_NODIRECT 1
139
140#ifndef PIPE_NODIRECT
141
142#include <vm/vm.h>
143#include <vm/vm_param.h>
144#include <vm/vm_object.h>
145#include <vm/vm_kern.h>
146#include <vm/vm_extern.h>
147#include <vm/pmap.h>
148#include <vm/vm_map.h>
149#include <vm/vm_page.h>
150#include <vm/uma.h>
151
152#endif
153
154
155/*
156 * interfaces to the outside world
157 */
158static int pipe_read(struct fileproc *fp, struct uio *uio,
159 kauth_cred_t cred, int flags, struct proc *p);
160
161static int pipe_write(struct fileproc *fp, struct uio *uio,
162 kauth_cred_t cred, int flags, struct proc *p);
163
164static int pipe_close(struct fileglob *fg, struct proc *p);
165
166static int pipe_select(struct fileproc *fp, int which, void * wql, struct proc *p);
167
168static int pipe_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p);
169
170static int pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, struct proc *p);
171
172
173struct fileops pipeops =
174 { pipe_read,
175 pipe_write,
176 pipe_ioctl,
177 pipe_select,
178 pipe_close,
179 pipe_kqfilter,
180 0 };
181
182
183static void filt_pipedetach(struct knote *kn);
184static int filt_piperead(struct knote *kn, long hint);
185static int filt_pipewrite(struct knote *kn, long hint);
186
187static struct filterops pipe_rfiltops =
188 { 1, NULL, filt_pipedetach, filt_piperead };
189static struct filterops pipe_wfiltops =
190 { 1, NULL, filt_pipedetach, filt_pipewrite };
191
192/*
193 * Default pipe buffer size(s), this can be kind-of large now because pipe
194 * space is pageable. The pipe code will try to maintain locality of
195 * reference for performance reasons, so small amounts of outstanding I/O
196 * will not wipe the cache.
197 */
198#define MINPIPESIZE (PIPE_SIZE/3)
199
200/*
201 * Limit the number of "big" pipes
202 */
203#define LIMITBIGPIPES 32
204static int nbigpipe;
205
206static int amountpipes;
207static int amountpipekva;
208
209#ifndef PIPE_NODIRECT
210static int amountpipekvawired;
211#endif
212int maxpipekva = 1024 * 1024 * 16;
213
214#if PIPE_SYSCTLS
215SYSCTL_DECL(_kern_ipc);
216
217SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RD,
218 &maxpipekva, 0, "Pipe KVA limit");
219SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW,
220 &maxpipekvawired, 0, "Pipe KVA wired limit");
221SYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD,
222 &amountpipes, 0, "Current # of pipes");
223SYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD,
224 &nbigpipe, 0, "Current # of big pipes");
225SYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD,
226 &amountpipekva, 0, "Pipe KVA usage");
227SYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD,
228 &amountpipekvawired, 0, "Pipe wired KVA usage");
229#endif
230
231void pipeinit(void *dummy __unused);
232static void pipeclose(struct pipe *cpipe);
233static void pipe_free_kmem(struct pipe *cpipe);
234static int pipe_create(struct pipe **cpipep);
235static void pipeselwakeup(struct pipe *cpipe, struct pipe *spipe);
236static __inline int pipelock(struct pipe *cpipe, int catch);
237static __inline void pipeunlock(struct pipe *cpipe);
238
239#ifndef PIPE_NODIRECT
240static int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio);
241static void pipe_destroy_write_buffer(struct pipe *wpipe);
242static int pipe_direct_write(struct pipe *wpipe, struct uio *uio);
243static void pipe_clone_write_buffer(struct pipe *wpipe);
244#endif
245
246extern int postpipeevent(struct pipe *, int);
247extern void evpipefree(struct pipe *cpipe);
248
249
250static int pipespace(struct pipe *cpipe, int size);
251
252static lck_grp_t *pipe_mtx_grp;
253static lck_attr_t *pipe_mtx_attr;
254static lck_grp_attr_t *pipe_mtx_grp_attr;
255
256static zone_t pipe_zone;
257
258SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL);
259
260void
261pipeinit(void *dummy __unused)
262{
263 pipe_zone = (zone_t)zinit(sizeof(struct pipe), 8192 * sizeof(struct pipe), 4096, "pipe zone");
264
265 /*
266 * allocate lock group attribute and group for pipe mutexes
267 */
268 pipe_mtx_grp_attr = lck_grp_attr_alloc_init();
21362eb3 269 //lck_grp_attr_setstat(pipe_mtx_grp_attr);
91447636
A
270 pipe_mtx_grp = lck_grp_alloc_init("pipe", pipe_mtx_grp_attr);
271
272 /*
273 * allocate the lock attribute for pipe mutexes
274 */
275 pipe_mtx_attr = lck_attr_alloc_init();
21362eb3 276 //lck_attr_setdebug(pipe_mtx_attr);
91447636
A
277}
278
279
280
281/*
282 * The pipe system call for the DTYPE_PIPE type of pipes
283 */
284
285/* ARGSUSED */
286int
287pipe(struct proc *p, __unused struct pipe_args *uap, register_t *retval)
288{
289 struct fileproc *rf, *wf;
290 struct pipe *rpipe, *wpipe;
291 lck_mtx_t *pmtx;
292 int fd, error;
293
294 if ((pmtx = lck_mtx_alloc_init(pipe_mtx_grp, pipe_mtx_attr)) == NULL)
295 return (ENOMEM);
296
297 rpipe = wpipe = NULL;
298 if (pipe_create(&rpipe) || pipe_create(&wpipe)) {
299 error = ENFILE;
300 goto freepipes;
301 }
302 /*
303 * allocate the space for the normal I/O direction up
304 * front... we'll delay the allocation for the other
305 * direction until a write actually occurs (most
306 * likely it won't)...
307 *
308 * Reduce to 1/4th pipe size if we're over our global max.
309 */
310 if (amountpipekva > maxpipekva / 2)
311 error = pipespace(rpipe, SMALL_PIPE_SIZE);
312 else
313 error = pipespace(rpipe, PIPE_SIZE);
314 if (error)
315 goto freepipes;
316
317#ifndef PIPE_NODIRECT
318 rpipe->pipe_state |= PIPE_DIRECTOK;
319 wpipe->pipe_state |= PIPE_DIRECTOK;
320#endif
321 TAILQ_INIT(&rpipe->pipe_evlist);
322 TAILQ_INIT(&wpipe->pipe_evlist);
323
324 error = falloc(p, &rf, &fd);
325 if (error) {
326 goto freepipes;
327 }
328 retval[0] = fd;
329
330 /*
331 * for now we'll create half-duplex
332 * pipes... this is what we've always
333 * supported..
334 */
335 rf->f_flag = FREAD;
336 rf->f_type = DTYPE_PIPE;
337 rf->f_data = (caddr_t)rpipe;
338 rf->f_ops = &pipeops;
339
340 error = falloc(p, &wf, &fd);
341 if (error) {
342 fp_free(p, retval[0], rf);
343 goto freepipes;
344 }
345 wf->f_flag = FWRITE;
346 wf->f_type = DTYPE_PIPE;
347 wf->f_data = (caddr_t)wpipe;
348 wf->f_ops = &pipeops;
349
350 retval[1] = fd;
351#ifdef MAC
352 /*
353 * XXXXXXXX SHOULD NOT HOLD FILE_LOCK() XXXXXXXXXXXX
354 *
355 * struct pipe represents a pipe endpoint. The MAC label is shared
356 * between the connected endpoints. As a result mac_init_pipe() and
357 * mac_create_pipe() should only be called on one of the endpoints
358 * after they have been connected.
359 */
360 mac_init_pipe(rpipe);
361 mac_create_pipe(td->td_ucred, rpipe);
362#endif
363 proc_fdlock(p);
364 *fdflags(p, retval[0]) &= ~UF_RESERVED;
365 *fdflags(p, retval[1]) &= ~UF_RESERVED;
366 fp_drop(p, retval[0], rf, 1);
367 fp_drop(p, retval[1], wf, 1);
368 proc_fdunlock(p);
369
370 rpipe->pipe_peer = wpipe;
371 wpipe->pipe_peer = rpipe;
372
373 rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx;
374
375 return (0);
376
377freepipes:
378 pipeclose(rpipe);
379 pipeclose(wpipe);
380 lck_mtx_free(pmtx, pipe_mtx_grp);
381
382 return (error);
383}
384
385
386int
387pipe_stat(struct pipe *cpipe, struct stat *ub)
388{
389#ifdef MAC
390 int error;
391#endif
392 struct timeval now;
393
394 if (cpipe == NULL)
395 return (EBADF);
396#ifdef MAC
397 PIPE_LOCK(cpipe);
398 error = mac_check_pipe_stat(active_cred, cpipe);
399 PIPE_UNLOCK(cpipe);
400 if (error)
401 return (error);
402#endif
403 if (cpipe->pipe_buffer.buffer == 0) {
404 /*
405 * must be stat'ing the write fd
406 */
407 cpipe = cpipe->pipe_peer;
408
409 if (cpipe == NULL)
410 return (EBADF);
411 }
412 bzero(ub, sizeof(*ub));
413 ub->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
414 ub->st_blksize = cpipe->pipe_buffer.size;
415 ub->st_size = cpipe->pipe_buffer.cnt;
416 ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize;
417 ub->st_nlink = 1;
418
419 ub->st_uid = kauth_getuid();
420 ub->st_gid = kauth_getgid();
421
422 microtime(&now);
423 ub->st_atimespec.tv_sec = now.tv_sec;
424 ub->st_atimespec.tv_nsec = now.tv_usec * 1000;
425
426 ub->st_mtimespec.tv_sec = now.tv_sec;
427 ub->st_mtimespec.tv_nsec = now.tv_usec * 1000;
428
429 ub->st_ctimespec.tv_sec = now.tv_sec;
430 ub->st_ctimespec.tv_nsec = now.tv_usec * 1000;
431
432 /*
433 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen, st_uid, st_gid.
434 * XXX (st_dev, st_ino) should be unique.
435 */
436 return (0);
437}
438
439
440/*
441 * Allocate kva for pipe circular buffer, the space is pageable
442 * This routine will 'realloc' the size of a pipe safely, if it fails
443 * it will retain the old buffer.
444 * If it fails it will return ENOMEM.
445 */
446static int
447pipespace(struct pipe *cpipe, int size)
448{
449 vm_offset_t buffer;
450
451 size = round_page(size);
452
453 if (kmem_alloc(kernel_map, &buffer, size) != KERN_SUCCESS)
454 return(ENOMEM);
455
456 /* free old resources if we're resizing */
457 pipe_free_kmem(cpipe);
458 cpipe->pipe_buffer.buffer = (caddr_t)buffer;
459 cpipe->pipe_buffer.size = size;
460 cpipe->pipe_buffer.in = 0;
461 cpipe->pipe_buffer.out = 0;
462 cpipe->pipe_buffer.cnt = 0;
463
464 OSAddAtomic(1, (SInt32 *)&amountpipes);
465 OSAddAtomic(cpipe->pipe_buffer.size, (SInt32 *)&amountpipekva);
466
467 return (0);
468}
469
470/*
471 * initialize and allocate VM and memory for pipe
472 */
473static int
474pipe_create(struct pipe **cpipep)
475{
476 struct pipe *cpipe;
477
478 cpipe = (struct pipe *)zalloc(pipe_zone);
479
480 if ((*cpipep = cpipe) == NULL)
481 return (ENOMEM);
482
483 /*
484 * protect so pipespace or pipeclose don't follow a junk pointer
485 * if pipespace() fails.
486 */
487 bzero(cpipe, sizeof *cpipe);
488
489 return (0);
490}
491
492
493/*
494 * lock a pipe for I/O, blocking other access
495 */
496static __inline int
497pipelock(cpipe, catch)
498 struct pipe *cpipe;
499 int catch;
500{
501 int error;
502
503 while (cpipe->pipe_state & PIPE_LOCKFL) {
504 cpipe->pipe_state |= PIPE_LWANT;
505
506 error = msleep(cpipe, PIPE_MTX(cpipe), catch ? (PRIBIO | PCATCH) : PRIBIO,
507 "pipelk", 0);
508 if (error != 0)
509 return (error);
510 }
511 cpipe->pipe_state |= PIPE_LOCKFL;
512
513 return (0);
514}
515
516/*
517 * unlock a pipe I/O lock
518 */
519static __inline void
520pipeunlock(cpipe)
521 struct pipe *cpipe;
522{
523
524 cpipe->pipe_state &= ~PIPE_LOCKFL;
525
526 if (cpipe->pipe_state & PIPE_LWANT) {
527 cpipe->pipe_state &= ~PIPE_LWANT;
528 wakeup(cpipe);
529 }
530}
531
532static void
533pipeselwakeup(cpipe, spipe)
534 struct pipe *cpipe;
535 struct pipe *spipe;
536{
537
538 if (cpipe->pipe_state & PIPE_SEL) {
539 cpipe->pipe_state &= ~PIPE_SEL;
540 selwakeup(&cpipe->pipe_sel);
541 }
542 if (cpipe->pipe_state & PIPE_KNOTE)
543 KNOTE(&cpipe->pipe_sel.si_note, 1);
544
545 postpipeevent(cpipe, EV_RWBYTES);
546
547 if (spipe && (spipe->pipe_state & PIPE_ASYNC) && spipe->pipe_pgid) {
548 struct proc *p;
549
550 if (spipe->pipe_pgid < 0)
551 gsignal(-spipe->pipe_pgid, SIGIO);
552 else if ((p = pfind(spipe->pipe_pgid)) != (struct proc *)0)
553 psignal(p, SIGIO);
554 }
555}
556
557/* ARGSUSED */
558static int
559pipe_read(struct fileproc *fp, struct uio *uio, __unused kauth_cred_t active_cred, __unused int flags, __unused struct proc *p)
560{
561 struct pipe *rpipe = (struct pipe *)fp->f_data;
562 int error;
563 int nread = 0;
564 u_int size;
565
566 PIPE_LOCK(rpipe);
567 ++rpipe->pipe_busy;
568
569 error = pipelock(rpipe, 1);
570 if (error)
571 goto unlocked_error;
572
573#ifdef MAC
574 error = mac_check_pipe_read(active_cred, rpipe);
575 if (error)
576 goto locked_error;
577#endif
578
579 while (uio_resid(uio)) {
580 /*
581 * normal pipe buffer receive
582 */
583 if (rpipe->pipe_buffer.cnt > 0) {
584 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
585 if (size > rpipe->pipe_buffer.cnt)
586 size = rpipe->pipe_buffer.cnt;
587 // LP64todo - fix this!
588 if (size > (u_int) uio_resid(uio))
589 size = (u_int) uio_resid(uio);
590
591 PIPE_UNLOCK(rpipe);
592 error = uiomove(
593 &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
594 size, uio);
595 PIPE_LOCK(rpipe);
596 if (error)
597 break;
598
599 rpipe->pipe_buffer.out += size;
600 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size)
601 rpipe->pipe_buffer.out = 0;
602
603 rpipe->pipe_buffer.cnt -= size;
604
605 /*
606 * If there is no more to read in the pipe, reset
607 * its pointers to the beginning. This improves
608 * cache hit stats.
609 */
610 if (rpipe->pipe_buffer.cnt == 0) {
611 rpipe->pipe_buffer.in = 0;
612 rpipe->pipe_buffer.out = 0;
613 }
614 nread += size;
615#ifndef PIPE_NODIRECT
616 /*
617 * Direct copy, bypassing a kernel buffer.
618 */
619 } else if ((size = rpipe->pipe_map.cnt) &&
620 (rpipe->pipe_state & PIPE_DIRECTW)) {
621 caddr_t va;
622 // LP64todo - fix this!
623 if (size > (u_int) uio_resid(uio))
624 size = (u_int) uio_resid(uio);
625
626 va = (caddr_t) rpipe->pipe_map.kva +
627 rpipe->pipe_map.pos;
628 PIPE_UNLOCK(rpipe);
629 error = uiomove(va, size, uio);
630 PIPE_LOCK(rpipe);
631 if (error)
632 break;
633 nread += size;
634 rpipe->pipe_map.pos += size;
635 rpipe->pipe_map.cnt -= size;
636 if (rpipe->pipe_map.cnt == 0) {
637 rpipe->pipe_state &= ~PIPE_DIRECTW;
638 wakeup(rpipe);
639 }
640#endif
641 } else {
642 /*
643 * detect EOF condition
644 * read returns 0 on EOF, no need to set error
645 */
646 if (rpipe->pipe_state & PIPE_EOF)
647 break;
648
649 /*
650 * If the "write-side" has been blocked, wake it up now.
651 */
652 if (rpipe->pipe_state & PIPE_WANTW) {
653 rpipe->pipe_state &= ~PIPE_WANTW;
654 wakeup(rpipe);
655 }
656
657 /*
658 * Break if some data was read.
659 */
660 if (nread > 0)
661 break;
662
663 /*
664 * Unlock the pipe buffer for our remaining processing.
665 * We will either break out with an error or we will
666 * sleep and relock to loop.
667 */
668 pipeunlock(rpipe);
669
670 /*
671 * Handle non-blocking mode operation or
672 * wait for more data.
673 */
674 if (fp->f_flag & FNONBLOCK) {
675 error = EAGAIN;
676 } else {
677 rpipe->pipe_state |= PIPE_WANTR;
678
679 error = msleep(rpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, "piperd", 0);
680
681 if (error == 0)
682 error = pipelock(rpipe, 1);
683 }
684 if (error)
685 goto unlocked_error;
686 }
687 }
688#ifdef MAC
689locked_error:
690#endif
691 pipeunlock(rpipe);
692
693unlocked_error:
694 --rpipe->pipe_busy;
695
696 /*
697 * PIPE_WANT processing only makes sense if pipe_busy is 0.
698 */
699 if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) {
700 rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW);
701 wakeup(rpipe);
702 } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) {
703 /*
704 * Handle write blocking hysteresis.
705 */
706 if (rpipe->pipe_state & PIPE_WANTW) {
707 rpipe->pipe_state &= ~PIPE_WANTW;
708 wakeup(rpipe);
709 }
710 }
711
712 if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF)
713 pipeselwakeup(rpipe, rpipe->pipe_peer);
714
715 PIPE_UNLOCK(rpipe);
716
717 return (error);
718}
719
720
721
722#ifndef PIPE_NODIRECT
723/*
724 * Map the sending processes' buffer into kernel space and wire it.
725 * This is similar to a physical write operation.
726 */
727static int
728pipe_build_write_buffer(wpipe, uio)
729 struct pipe *wpipe;
730 struct uio *uio;
731{
732 pmap_t pmap;
733 u_int size;
734 int i, j;
735 vm_offset_t addr, endaddr;
736
737
738 size = (u_int) uio->uio_iov->iov_len;
739 if (size > wpipe->pipe_buffer.size)
740 size = wpipe->pipe_buffer.size;
741
742 pmap = vmspace_pmap(curproc->p_vmspace);
743 endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size);
744 addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base);
745 for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) {
746 /*
747 * vm_fault_quick() can sleep. Consequently,
748 * vm_page_lock_queue() and vm_page_unlock_queue()
749 * should not be performed outside of this loop.
750 */
751 race:
752 if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0) {
753 vm_page_lock_queues();
754 for (j = 0; j < i; j++)
755 vm_page_unhold(wpipe->pipe_map.ms[j]);
756 vm_page_unlock_queues();
757 return (EFAULT);
758 }
759 wpipe->pipe_map.ms[i] = pmap_extract_and_hold(pmap, addr,
760 VM_PROT_READ);
761 if (wpipe->pipe_map.ms[i] == NULL)
762 goto race;
763 }
764
765/*
766 * set up the control block
767 */
768 wpipe->pipe_map.npages = i;
769 wpipe->pipe_map.pos =
770 ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK;
771 wpipe->pipe_map.cnt = size;
772
773/*
774 * and map the buffer
775 */
776 if (wpipe->pipe_map.kva == 0) {
777 /*
778 * We need to allocate space for an extra page because the
779 * address range might (will) span pages at times.
780 */
781 wpipe->pipe_map.kva = kmem_alloc_nofault(kernel_map,
782 wpipe->pipe_buffer.size + PAGE_SIZE);
783 atomic_add_int(&amountpipekvawired,
784 wpipe->pipe_buffer.size + PAGE_SIZE);
785 }
786 pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms,
787 wpipe->pipe_map.npages);
788
789/*
790 * and update the uio data
791 */
792
793 uio->uio_iov->iov_len -= size;
794 uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size;
795 if (uio->uio_iov->iov_len == 0)
796 uio->uio_iov++;
797 uio_setresid(uio, (uio_resid(uio) - size));
798 uio->uio_offset += size;
799 return (0);
800}
801
802/*
803 * unmap and unwire the process buffer
804 */
805static void
806pipe_destroy_write_buffer(wpipe)
807 struct pipe *wpipe;
808{
809 int i;
810
811 if (wpipe->pipe_map.kva) {
812 pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages);
813
814 if (amountpipekvawired > maxpipekvawired / 2) {
815 /* Conserve address space */
816 vm_offset_t kva = wpipe->pipe_map.kva;
817 wpipe->pipe_map.kva = 0;
818 kmem_free(kernel_map, kva,
819 wpipe->pipe_buffer.size + PAGE_SIZE);
820 atomic_subtract_int(&amountpipekvawired,
821 wpipe->pipe_buffer.size + PAGE_SIZE);
822 }
823 }
824 vm_page_lock_queues();
825 for (i = 0; i < wpipe->pipe_map.npages; i++) {
826 vm_page_unhold(wpipe->pipe_map.ms[i]);
827 }
828 vm_page_unlock_queues();
829 wpipe->pipe_map.npages = 0;
830}
831
832/*
833 * In the case of a signal, the writing process might go away. This
834 * code copies the data into the circular buffer so that the source
835 * pages can be freed without loss of data.
836 */
837static void
838pipe_clone_write_buffer(wpipe)
839 struct pipe *wpipe;
840{
841 int size;
842 int pos;
843
844 size = wpipe->pipe_map.cnt;
845 pos = wpipe->pipe_map.pos;
846
847 wpipe->pipe_buffer.in = size;
848 wpipe->pipe_buffer.out = 0;
849 wpipe->pipe_buffer.cnt = size;
850 wpipe->pipe_state &= ~PIPE_DIRECTW;
851
852 PIPE_UNLOCK(wpipe);
853 bcopy((caddr_t) wpipe->pipe_map.kva + pos,
854 wpipe->pipe_buffer.buffer, size);
855 pipe_destroy_write_buffer(wpipe);
856 PIPE_LOCK(wpipe);
857}
858
859/*
860 * This implements the pipe buffer write mechanism. Note that only
861 * a direct write OR a normal pipe write can be pending at any given time.
862 * If there are any characters in the pipe buffer, the direct write will
863 * be deferred until the receiving process grabs all of the bytes from
864 * the pipe buffer. Then the direct mapping write is set-up.
865 */
866static int
867pipe_direct_write(wpipe, uio)
868 struct pipe *wpipe;
869 struct uio *uio;
870{
871 int error;
872
873retry:
874 while (wpipe->pipe_state & PIPE_DIRECTW) {
875 if (wpipe->pipe_state & PIPE_WANTR) {
876 wpipe->pipe_state &= ~PIPE_WANTR;
877 wakeup(wpipe);
878 }
879 wpipe->pipe_state |= PIPE_WANTW;
880 error = msleep(wpipe, PIPE_MTX(wpipe),
881 PRIBIO | PCATCH, "pipdww", 0);
882 if (error)
883 goto error1;
884 if (wpipe->pipe_state & PIPE_EOF) {
885 error = EPIPE;
886 goto error1;
887 }
888 }
889 wpipe->pipe_map.cnt = 0; /* transfer not ready yet */
890 if (wpipe->pipe_buffer.cnt > 0) {
891 if (wpipe->pipe_state & PIPE_WANTR) {
892 wpipe->pipe_state &= ~PIPE_WANTR;
893 wakeup(wpipe);
894 }
895
896 wpipe->pipe_state |= PIPE_WANTW;
897 error = msleep(wpipe, PIPE_MTX(wpipe),
898 PRIBIO | PCATCH, "pipdwc", 0);
899 if (error)
900 goto error1;
901 if (wpipe->pipe_state & PIPE_EOF) {
902 error = EPIPE;
903 goto error1;
904 }
905 goto retry;
906 }
907
908 wpipe->pipe_state |= PIPE_DIRECTW;
909
910 pipelock(wpipe, 0);
911 PIPE_UNLOCK(wpipe);
912 error = pipe_build_write_buffer(wpipe, uio);
913 PIPE_LOCK(wpipe);
914 pipeunlock(wpipe);
915 if (error) {
916 wpipe->pipe_state &= ~PIPE_DIRECTW;
917 goto error1;
918 }
919
920 error = 0;
921 while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) {
922 if (wpipe->pipe_state & PIPE_EOF) {
923 pipelock(wpipe, 0);
924 PIPE_UNLOCK(wpipe);
925 pipe_destroy_write_buffer(wpipe);
926 PIPE_LOCK(wpipe);
927 pipeselwakeup(wpipe, wpipe);
928 pipeunlock(wpipe);
929 error = EPIPE;
930 goto error1;
931 }
932 if (wpipe->pipe_state & PIPE_WANTR) {
933 wpipe->pipe_state &= ~PIPE_WANTR;
934 wakeup(wpipe);
935 }
936 pipeselwakeup(wpipe, wpipe);
937 error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH,
938 "pipdwt", 0);
939 }
940
941 pipelock(wpipe,0);
942 if (wpipe->pipe_state & PIPE_DIRECTW) {
943 /*
944 * this bit of trickery substitutes a kernel buffer for
945 * the process that might be going away.
946 */
947 pipe_clone_write_buffer(wpipe);
948 } else {
949 PIPE_UNLOCK(wpipe);
950 pipe_destroy_write_buffer(wpipe);
951 PIPE_LOCK(wpipe);
952 }
953 pipeunlock(wpipe);
954 return (error);
955
956error1:
957 wakeup(wpipe);
958 return (error);
959}
960#endif
961
962
963
964static int
965pipe_write(struct fileproc *fp, struct uio *uio, __unused kauth_cred_t active_cred, __unused int flags, __unused struct proc *p)
966{
967 int error = 0;
968 int orig_resid;
969 int pipe_size;
970 struct pipe *wpipe, *rpipe;
971
972 rpipe = (struct pipe *)fp->f_data;
973
974 PIPE_LOCK(rpipe);
975 wpipe = rpipe->pipe_peer;
976
977 /*
978 * detect loss of pipe read side, issue SIGPIPE if lost.
979 */
980 if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF)) {
981 PIPE_UNLOCK(rpipe);
982 return (EPIPE);
983 }
984#ifdef MAC
985 error = mac_check_pipe_write(active_cred, wpipe);
986 if (error) {
987 PIPE_UNLOCK(rpipe);
988 return (error);
989 }
990#endif
991 ++wpipe->pipe_busy;
992
993 pipe_size = 0;
994
995 if (wpipe->pipe_buffer.buffer == 0) {
996 /*
997 * need to allocate some storage... we delay the allocation
998 * until the first write on fd[0] to avoid allocating storage for both
999 * 'pipe ends'... most pipes are half-duplex with the writes targeting
1000 * fd[1], so allocating space for both ends is a waste...
1001 *
1002 * Reduce to 1/4th pipe size if we're over our global max.
1003 */
1004 if (amountpipekva > maxpipekva / 2)
1005 pipe_size = SMALL_PIPE_SIZE;
1006 else
1007 pipe_size = PIPE_SIZE;
1008 }
1009
1010 /*
1011 * If it is advantageous to resize the pipe buffer, do
1012 * so.
1013 */
1014 if ((uio_resid(uio) > PIPE_SIZE) &&
1015 (wpipe->pipe_buffer.size <= PIPE_SIZE) &&
1016 (amountpipekva < maxpipekva / 2) &&
1017 (nbigpipe < LIMITBIGPIPES) &&
1018#ifndef PIPE_NODIRECT
1019 (wpipe->pipe_state & PIPE_DIRECTW) == 0 &&
1020#endif
1021 (wpipe->pipe_buffer.cnt == 0)) {
1022
1023 pipe_size = BIG_PIPE_SIZE;
1024
1025 }
1026 if (pipe_size) {
1027 /*
1028 * need to do initial allocation or resizing of pipe
1029 */
1030 if ((error = pipelock(wpipe, 1)) == 0) {
1031 PIPE_UNLOCK(wpipe);
1032 if (pipespace(wpipe, pipe_size) == 0)
1033 OSAddAtomic(1, (SInt32 *)&nbigpipe);
1034 PIPE_LOCK(wpipe);
1035 pipeunlock(wpipe);
1036
1037 if (wpipe->pipe_buffer.buffer == 0) {
1038 /*
1039 * initial allocation failed
1040 */
1041 error = ENOMEM;
1042 }
1043 }
1044 if (error) {
1045 /*
1046 * If an error occurred unbusy and return, waking up any pending
1047 * readers.
1048 */
1049 --wpipe->pipe_busy;
1050 if ((wpipe->pipe_busy == 0) &&
1051 (wpipe->pipe_state & PIPE_WANT)) {
1052 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
1053 wakeup(wpipe);
1054 }
1055 PIPE_UNLOCK(rpipe);
1056 return(error);
1057 }
1058 }
1059 // LP64todo - fix this!
1060 orig_resid = uio_resid(uio);
1061
1062 while (uio_resid(uio)) {
1063 int space;
1064
1065#ifndef PIPE_NODIRECT
1066 /*
1067 * If the transfer is large, we can gain performance if
1068 * we do process-to-process copies directly.
1069 * If the write is non-blocking, we don't use the
1070 * direct write mechanism.
1071 *
1072 * The direct write mechanism will detect the reader going
1073 * away on us.
1074 */
1075 if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) &&
1076 (fp->f_flag & FNONBLOCK) == 0 &&
1077 amountpipekvawired + uio->uio_resid < maxpipekvawired) {
1078 error = pipe_direct_write(wpipe, uio);
1079 if (error)
1080 break;
1081 continue;
1082 }
1083
1084 /*
1085 * Pipe buffered writes cannot be coincidental with
1086 * direct writes. We wait until the currently executing
1087 * direct write is completed before we start filling the
1088 * pipe buffer. We break out if a signal occurs or the
1089 * reader goes away.
1090 */
1091 retrywrite:
1092 while (wpipe->pipe_state & PIPE_DIRECTW) {
1093 if (wpipe->pipe_state & PIPE_WANTR) {
1094 wpipe->pipe_state &= ~PIPE_WANTR;
1095 wakeup(wpipe);
1096 }
1097 error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, "pipbww", 0);
1098
1099 if (wpipe->pipe_state & PIPE_EOF)
1100 break;
1101 if (error)
1102 break;
1103 }
1104#else
1105 retrywrite:
1106#endif
1107 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
1108
1109 /*
1110 * Writes of size <= PIPE_BUF must be atomic.
1111 */
1112 if ((space < uio_resid(uio)) && (orig_resid <= PIPE_BUF))
1113 space = 0;
1114
1115 if (space > 0) {
1116
1117 if ((error = pipelock(wpipe,1)) == 0) {
1118 int size; /* Transfer size */
1119 int segsize; /* first segment to transfer */
1120
1121 if (wpipe->pipe_state & PIPE_EOF) {
1122 pipeunlock(wpipe);
1123 error = EPIPE;
1124 break;
1125 }
1126#ifndef PIPE_NODIRECT
1127 /*
1128 * It is possible for a direct write to
1129 * slip in on us... handle it here...
1130 */
1131 if (wpipe->pipe_state & PIPE_DIRECTW) {
1132 pipeunlock(wpipe);
1133 goto retrywrite;
1134 }
1135#endif
1136 /*
1137 * If a process blocked in pipelock, our
1138 * value for space might be bad... the mutex
1139 * is dropped while we're blocked
1140 */
1141 if (space > (int)(wpipe->pipe_buffer.size -
1142 wpipe->pipe_buffer.cnt)) {
1143 pipeunlock(wpipe);
1144 goto retrywrite;
1145 }
1146
1147 /*
1148 * Transfer size is minimum of uio transfer
1149 * and free space in pipe buffer.
1150 */
1151 // LP64todo - fix this!
1152 if (space > uio_resid(uio))
1153 size = uio_resid(uio);
1154 else
1155 size = space;
1156 /*
1157 * First segment to transfer is minimum of
1158 * transfer size and contiguous space in
1159 * pipe buffer. If first segment to transfer
1160 * is less than the transfer size, we've got
1161 * a wraparound in the buffer.
1162 */
1163 segsize = wpipe->pipe_buffer.size -
1164 wpipe->pipe_buffer.in;
1165 if (segsize > size)
1166 segsize = size;
1167
1168 /* Transfer first segment */
1169
1170 PIPE_UNLOCK(rpipe);
1171 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
1172 segsize, uio);
1173 PIPE_LOCK(rpipe);
1174
1175 if (error == 0 && segsize < size) {
1176 /*
1177 * Transfer remaining part now, to
1178 * support atomic writes. Wraparound
1179 * happened.
1180 */
1181 if (wpipe->pipe_buffer.in + segsize !=
1182 wpipe->pipe_buffer.size)
1183 panic("Expected pipe buffer "
1184 "wraparound disappeared");
1185
1186 PIPE_UNLOCK(rpipe);
1187 error = uiomove(
1188 &wpipe->pipe_buffer.buffer[0],
1189 size - segsize, uio);
1190 PIPE_LOCK(rpipe);
1191 }
1192 if (error == 0) {
1193 wpipe->pipe_buffer.in += size;
1194 if (wpipe->pipe_buffer.in >=
1195 wpipe->pipe_buffer.size) {
1196 if (wpipe->pipe_buffer.in !=
1197 size - segsize +
1198 wpipe->pipe_buffer.size)
1199 panic("Expected "
1200 "wraparound bad");
1201 wpipe->pipe_buffer.in = size -
1202 segsize;
1203 }
1204
1205 wpipe->pipe_buffer.cnt += size;
1206 if (wpipe->pipe_buffer.cnt >
1207 wpipe->pipe_buffer.size)
1208 panic("Pipe buffer overflow");
1209
1210 }
1211 pipeunlock(wpipe);
1212 }
1213 if (error)
1214 break;
1215
1216 } else {
1217 /*
1218 * If the "read-side" has been blocked, wake it up now.
1219 */
1220 if (wpipe->pipe_state & PIPE_WANTR) {
1221 wpipe->pipe_state &= ~PIPE_WANTR;
1222 wakeup(wpipe);
1223 }
1224 /*
1225 * don't block on non-blocking I/O
1226 * we'll do the pipeselwakeup on the way out
1227 */
1228 if (fp->f_flag & FNONBLOCK) {
1229 error = EAGAIN;
1230 break;
1231 }
1232 /*
1233 * We have no more space and have something to offer,
1234 * wake up select/poll.
1235 */
1236 pipeselwakeup(wpipe, wpipe);
1237
1238 wpipe->pipe_state |= PIPE_WANTW;
1239
1240 error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, "pipewr", 0);
1241
1242 if (error != 0)
1243 break;
1244 /*
1245 * If read side wants to go away, we just issue a signal
1246 * to ourselves.
1247 */
1248 if (wpipe->pipe_state & PIPE_EOF) {
1249 error = EPIPE;
1250 break;
1251 }
1252 }
1253 }
1254 --wpipe->pipe_busy;
1255
1256 if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) {
1257 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
1258 wakeup(wpipe);
1259 }
1260 if (wpipe->pipe_buffer.cnt > 0) {
1261 /*
1262 * If there are any characters in the buffer, we wake up
1263 * the reader if it was blocked waiting for data.
1264 */
1265 if (wpipe->pipe_state & PIPE_WANTR) {
1266 wpipe->pipe_state &= ~PIPE_WANTR;
1267 wakeup(wpipe);
1268 }
1269 /*
1270 * wake up thread blocked in select/poll or post the notification
1271 */
1272 pipeselwakeup(wpipe, wpipe);
1273 }
1274 PIPE_UNLOCK(rpipe);
1275
1276 return (error);
1277}
1278
1279/*
1280 * we implement a very minimal set of ioctls for compatibility with sockets.
1281 */
1282/* ARGSUSED 3 */
1283static int
1284pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, __unused struct proc *p)
1285{
1286 struct pipe *mpipe = (struct pipe *)fp->f_data;
1287#ifdef MAC
1288 int error;
1289#endif
1290
1291 PIPE_LOCK(mpipe);
1292
1293#ifdef MAC
1294 error = mac_check_pipe_ioctl(active_cred, mpipe, cmd, data);
1295 if (error) {
1296 PIPE_UNLOCK(mpipe);
1297
1298 return (error);
1299 }
1300#endif
1301
1302 switch (cmd) {
1303
1304 case FIONBIO:
1305 PIPE_UNLOCK(mpipe);
1306 return (0);
1307
1308 case FIOASYNC:
1309 if (*(int *)data) {
1310 mpipe->pipe_state |= PIPE_ASYNC;
1311 } else {
1312 mpipe->pipe_state &= ~PIPE_ASYNC;
1313 }
1314 PIPE_UNLOCK(mpipe);
1315 return (0);
1316
1317 case FIONREAD:
1318#ifndef PIPE_NODIRECT
1319 if (mpipe->pipe_state & PIPE_DIRECTW)
1320 *(int *)data = mpipe->pipe_map.cnt;
1321 else
1322#endif
1323 *(int *)data = mpipe->pipe_buffer.cnt;
1324 PIPE_UNLOCK(mpipe);
1325 return (0);
1326
1327 case TIOCSPGRP:
1328 mpipe->pipe_pgid = *(int *)data;
1329
1330 PIPE_UNLOCK(mpipe);
1331 return (0);
1332
1333 case TIOCGPGRP:
1334 *(int *)data = mpipe->pipe_pgid;
1335
1336 PIPE_UNLOCK(mpipe);
1337 return (0);
1338
1339 }
1340 PIPE_UNLOCK(mpipe);
1341 return (ENOTTY);
1342}
1343
1344
1345static int
1346pipe_select(struct fileproc *fp, int which, void *wql, struct proc *p)
1347{
1348 struct pipe *rpipe = (struct pipe *)fp->f_data;
1349 struct pipe *wpipe;
1350 int retnum = 0;
1351
1352 if (rpipe == NULL || rpipe == (struct pipe *)-1)
1353 return (retnum);
1354
1355 PIPE_LOCK(rpipe);
1356
1357 wpipe = rpipe->pipe_peer;
1358
1359 switch (which) {
1360
1361 case FREAD:
1362 if ((rpipe->pipe_state & PIPE_DIRECTW) ||
1363 (rpipe->pipe_buffer.cnt > 0) ||
1364 (rpipe->pipe_state & PIPE_EOF)) {
1365
1366 retnum = 1;
1367 } else {
1368 rpipe->pipe_state |= PIPE_SEL;
1369 selrecord(p, &rpipe->pipe_sel, wql);
1370 }
1371 break;
1372
1373 case FWRITE:
1374 if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) ||
1375 (((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
1376 (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) {
1377
1378 retnum = 1;
1379 } else {
1380 wpipe->pipe_state |= PIPE_SEL;
1381 selrecord(p, &wpipe->pipe_sel, wql);
1382 }
1383 break;
1384 case 0:
1385 rpipe->pipe_state |= PIPE_SEL;
1386 selrecord(p, &rpipe->pipe_sel, wql);
1387 break;
1388 }
1389 PIPE_UNLOCK(rpipe);
1390
1391 return (retnum);
1392}
1393
1394
1395/* ARGSUSED 1 */
1396static int
1397pipe_close(struct fileglob *fg, __unused struct proc *p)
1398{
1399 struct pipe *cpipe;
1400
1401 proc_fdlock(p);
1402 cpipe = (struct pipe *)fg->fg_data;
1403 fg->fg_data = NULL;
1404 proc_fdunlock(p);
1405
1406 if (cpipe)
1407 pipeclose(cpipe);
1408
1409 return (0);
1410}
1411
1412static void
1413pipe_free_kmem(struct pipe *cpipe)
1414{
1415
1416 if (cpipe->pipe_buffer.buffer != NULL) {
1417 if (cpipe->pipe_buffer.size > PIPE_SIZE)
1418 OSAddAtomic(-1, (SInt32 *)&nbigpipe);
21362eb3 1419 OSAddAtomic(cpipe->pipe_buffer.size, (SInt32 *)&amountpipekva);
91447636
A
1420 OSAddAtomic(-1, (SInt32 *)&amountpipes);
1421
1422 kmem_free(kernel_map, (vm_offset_t)cpipe->pipe_buffer.buffer,
1423 cpipe->pipe_buffer.size);
1424 cpipe->pipe_buffer.buffer = NULL;
1425 }
1426#ifndef PIPE_NODIRECT
1427 if (cpipe->pipe_map.kva != 0) {
1428 atomic_subtract_int(&amountpipekvawired,
1429 cpipe->pipe_buffer.size + PAGE_SIZE);
1430 kmem_free(kernel_map,
1431 cpipe->pipe_map.kva,
1432 cpipe->pipe_buffer.size + PAGE_SIZE);
1433 cpipe->pipe_map.cnt = 0;
1434 cpipe->pipe_map.kva = 0;
1435 cpipe->pipe_map.pos = 0;
1436 cpipe->pipe_map.npages = 0;
1437 }
1438#endif
1439}
1440
1441/*
1442 * shutdown the pipe
1443 */
1444static void
1445pipeclose(struct pipe *cpipe)
1446{
1447 struct pipe *ppipe;
1448
1449 if (cpipe == NULL)
1450 return;
1451
1452 /* partially created pipes won't have a valid mutex. */
1453 if (PIPE_MTX(cpipe) != NULL)
1454 PIPE_LOCK(cpipe);
1455
1456 pipeselwakeup(cpipe, cpipe);
1457
1458 /*
1459 * If the other side is blocked, wake it up saying that
1460 * we want to close it down.
1461 */
1462 while (cpipe->pipe_busy) {
1463 cpipe->pipe_state |= PIPE_WANT | PIPE_EOF;
1464
1465 wakeup(cpipe);
1466
1467 msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0);
1468 }
1469
1470#ifdef MAC
1471 if (cpipe->pipe_label != NULL && cpipe->pipe_peer == NULL)
1472 mac_destroy_pipe(cpipe);
1473#endif
1474
1475 /*
1476 * Disconnect from peer
1477 */
1478 if ((ppipe = cpipe->pipe_peer) != NULL) {
1479
1480 ppipe->pipe_state |= PIPE_EOF;
1481
1482 pipeselwakeup(ppipe, ppipe);
1483 wakeup(ppipe);
1484
1485 if (cpipe->pipe_state & PIPE_KNOTE)
1486 KNOTE(&ppipe->pipe_sel.si_note, 1);
1487
1488 postpipeevent(ppipe, EV_RCLOSED);
1489
1490 ppipe->pipe_peer = NULL;
1491 }
1492 evpipefree(cpipe);
1493
1494 /*
1495 * free resources
1496 */
1497 if (PIPE_MTX(cpipe) != NULL) {
1498 if (ppipe != NULL) {
1499 /*
1500 * since the mutex is shared and the peer is still
1501 * alive, we need to release the mutex, not free it
1502 */
1503 PIPE_UNLOCK(cpipe);
1504 } else {
1505 /*
1506 * peer is gone, so we're the sole party left with
1507 * interest in this mutex... we can just free it
1508 */
1509 lck_mtx_free(PIPE_MTX(cpipe), pipe_mtx_grp);
1510 }
1511 }
1512 pipe_free_kmem(cpipe);
1513
1514 zfree(pipe_zone, cpipe);
1515}
1516
1517
1518/*ARGSUSED*/
1519static int
1520pipe_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct proc *p)
1521{
1522 struct pipe *cpipe;
1523
1524 cpipe = (struct pipe *)kn->kn_fp->f_data;
1525
1526 PIPE_LOCK(cpipe);
1527
1528 switch (kn->kn_filter) {
1529 case EVFILT_READ:
1530 kn->kn_fop = &pipe_rfiltops;
1531 break;
1532 case EVFILT_WRITE:
1533 kn->kn_fop = &pipe_wfiltops;
1534
1535 if (cpipe->pipe_peer == NULL) {
1536 /*
1537 * other end of pipe has been closed
1538 */
1539 PIPE_UNLOCK(cpipe);
1540 return (EPIPE);
1541 }
1542 cpipe = cpipe->pipe_peer;
1543 break;
1544 default:
1545 PIPE_UNLOCK(cpipe);
1546 return (1);
1547 }
1548
1549 if (KNOTE_ATTACH(&cpipe->pipe_sel.si_note, kn))
1550 cpipe->pipe_state |= PIPE_KNOTE;
1551
1552 PIPE_UNLOCK(cpipe);
1553 return (0);
1554}
1555
1556static void
1557filt_pipedetach(struct knote *kn)
1558{
1559 struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data;
1560
1561 PIPE_LOCK(cpipe);
1562
1563 if (kn->kn_filter == EVFILT_WRITE) {
1564 if (cpipe->pipe_peer == NULL) {
1565 PIPE_UNLOCK(cpipe);
1566 return;
1567 }
1568 cpipe = cpipe->pipe_peer;
1569 }
1570 if (cpipe->pipe_state & PIPE_KNOTE) {
1571 if (KNOTE_DETACH(&cpipe->pipe_sel.si_note, kn))
1572 cpipe->pipe_state &= ~PIPE_KNOTE;
1573 }
1574 PIPE_UNLOCK(cpipe);
1575}
1576
1577/*ARGSUSED*/
1578static int
1579filt_piperead(struct knote *kn, long hint)
1580{
1581 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
1582 struct pipe *wpipe;
1583 int retval;
1584
1585 /*
1586 * if hint == 0, then we've been called from the kevent
1587 * world directly and do not currently hold the pipe mutex...
1588 * if hint == 1, we're being called back via the KNOTE post
1589 * we made in pipeselwakeup, and we already hold the mutex...
1590 */
1591 if (hint == 0)
1592 PIPE_LOCK(rpipe);
1593
1594 wpipe = rpipe->pipe_peer;
1595 kn->kn_data = rpipe->pipe_buffer.cnt;
1596
1597#ifndef PIPE_NODIRECT
1598 if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW))
1599 kn->kn_data = rpipe->pipe_map.cnt;
1600#endif
1601 if ((rpipe->pipe_state & PIPE_EOF) ||
1602 (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
1603 kn->kn_flags |= EV_EOF;
1604 retval = 1;
1605 } else
1606 retval = (kn->kn_sfflags & NOTE_LOWAT) ?
1607 (kn->kn_data >= kn->kn_sdata) : (kn->kn_data > 0);
1608
1609 if (hint == 0)
1610 PIPE_UNLOCK(rpipe);
1611
1612 return (retval);
1613}
1614
1615/*ARGSUSED*/
1616static int
1617filt_pipewrite(struct knote *kn, long hint)
1618{
1619 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
1620 struct pipe *wpipe;
1621
1622 /*
1623 * if hint == 0, then we've been called from the kevent
1624 * world directly and do not currently hold the pipe mutex...
1625 * if hint == 1, we're being called back via the KNOTE post
1626 * we made in pipeselwakeup, and we already hold the mutex...
1627 */
1628 if (hint == 0)
1629 PIPE_LOCK(rpipe);
1630
1631 wpipe = rpipe->pipe_peer;
1632
1633 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
1634 kn->kn_data = 0;
1635 kn->kn_flags |= EV_EOF;
1636
1637 if (hint == 0)
1638 PIPE_UNLOCK(rpipe);
1639 return (1);
1640 }
1641 kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
1642
1643#ifndef PIPE_NODIRECT
1644 if (wpipe->pipe_state & PIPE_DIRECTW)
1645 kn->kn_data = 0;
1646#endif
1647 if (hint == 0)
1648 PIPE_UNLOCK(rpipe);
1649
1650 return (kn->kn_data >= ((kn->kn_sfflags & NOTE_LOWAT) ?
1651 kn->kn_sdata : PIPE_BUF));
1652}