]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/sys_generic.c
xnu-517.3.15.tar.gz
[apple/xnu.git] / bsd / kern / sys_generic.c
CommitLineData
1c79356b 1/*
9bccf70c 2 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
1c79356b
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
43866e37 6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
1c79356b 7 *
43866e37
A
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
43866e37
A
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
1c79356b
A
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
26/*
27 * Copyright (c) 1982, 1986, 1989, 1993
28 * The Regents of the University of California. All rights reserved.
29 * (c) UNIX System Laboratories, Inc.
30 * All or some portions of this file are derived from material licensed
31 * to the University of California by American Telephone and Telegraph
32 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
33 * the permission of UNIX System Laboratories, Inc.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
64 */
65
66#include <sys/param.h>
67#include <sys/systm.h>
68#include <sys/filedesc.h>
69#include <sys/ioctl.h>
70#include <sys/file.h>
71#include <sys/proc.h>
72#include <sys/socketvar.h>
73#include <sys/uio.h>
74#include <sys/kernel.h>
75#include <sys/stat.h>
76#include <sys/malloc.h>
77
1c79356b
A
78#include <sys/mount.h>
79#include <sys/protosw.h>
80#include <sys/ev.h>
81#include <sys/user.h>
82#include <sys/kdebug.h>
83#include <kern/assert.h>
84#include <kern/thread_act.h>
85
86#include <sys/mbuf.h>
87#include <sys/socket.h>
88#include <sys/socketvar.h>
89#include <sys/errno.h>
55e303ae 90#include <sys/syscall.h>
1c79356b
A
91
92#include <net/if.h>
93#include <net/route.h>
94
95#include <netinet/in.h>
96#include <netinet/in_systm.h>
97#include <netinet/ip.h>
98#include <netinet/in_pcb.h>
99#include <netinet/ip_var.h>
100#include <netinet/ip6.h>
101#include <netinet/tcp.h>
102#include <netinet/tcp_fsm.h>
103#include <netinet/tcp_seq.h>
104#include <netinet/tcp_timer.h>
105#include <netinet/tcp_var.h>
106#include <netinet/tcpip.h>
107#include <netinet/tcp_debug.h>
0b4e3aa0
A
108/* for wait queue based select */
109#include <kern/wait_queue.h>
9bccf70c
A
110#if KTRACE
111#include <sys/ktrace.h>
112#endif
55e303ae 113#include <sys/vnode.h>
9bccf70c 114
9bccf70c 115
55e303ae 116__private_extern__ struct file*
9bccf70c
A
117holdfp(fdp, fd, flag)
118 struct filedesc* fdp;
119 int fd, flag;
120{
121 struct file* fp;
122
123 if (((u_int)fd) >= fdp->fd_nfiles ||
124 (fp = fdp->fd_ofiles[fd]) == NULL ||
125 (fp->f_flag & flag) == 0) {
126 return (NULL);
127 }
d7e50217
A
128 if (fref(fp) == -1)
129 return (NULL);
9bccf70c
A
130 return (fp);
131}
1c79356b
A
132
133/*
134 * Read system call.
135 */
9bccf70c 136#ifndef _SYS_SYSPROTO_H_
1c79356b
A
137struct read_args {
138 int fd;
139 char *cbuf;
140 u_int nbyte;
141};
9bccf70c
A
142#endif
143int
1c79356b
A
144read(p, uap, retval)
145 struct proc *p;
146 register struct read_args *uap;
147 register_t *retval;
9bccf70c
A
148{
149 register struct file *fp;
150 int error;
151
152 if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL)
153 return (EBADF);
154 error = dofileread(p, fp, uap->fd, uap->cbuf, uap->nbyte,
155 (off_t)-1, 0, retval);
156 frele(fp);
157 return(error);
158}
159
160/*
161 * Pread system call
162 */
163#ifndef _SYS_SYSPROTO_H_
164struct pread_args {
165 int fd;
166 void *buf;
167 size_t nbyte;
168#ifdef DOUBLE_ALIGN_PARAMS
169 int pad;
170#endif
171 off_t offset;
172};
173#endif
174int
175pread(p, uap, retval)
176 struct proc *p;
177 register struct pread_args *uap;
178 int *retval;
179{
180 register struct file *fp;
181 int error;
182
183 if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL)
184 return (EBADF);
185 if (fp->f_type != DTYPE_VNODE) {
186 error = ESPIPE;
187 } else {
188 error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte,
189 uap->offset, FOF_OFFSET, retval);
190 }
191 frele(fp);
55e303ae
A
192
193 if (!error)
194 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pread) | DBG_FUNC_NONE),
195 uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
196
9bccf70c
A
197 return(error);
198}
199
200/*
201 * Code common for read and pread
202 */
55e303ae 203__private_extern__ int
9bccf70c
A
204dofileread(p, fp, fd, buf, nbyte, offset, flags, retval)
205 struct proc *p;
206 struct file *fp;
207 int fd, flags;
208 void *buf;
209 size_t nbyte;
210 off_t offset;
211 int *retval;
1c79356b
A
212{
213 struct uio auio;
214 struct iovec aiov;
9bccf70c
A
215 long cnt, error = 0;
216#if KTRACE
217 struct iovec ktriov;
218 struct uio ktruio;
219 int didktr = 0;
220#endif
1c79356b 221
9bccf70c
A
222 aiov.iov_base = (caddr_t)buf;
223 aiov.iov_len = nbyte;
1c79356b
A
224 auio.uio_iov = &aiov;
225 auio.uio_iovcnt = 1;
9bccf70c
A
226 auio.uio_offset = offset;
227 if (nbyte > INT_MAX)
228 return (EINVAL);
229 auio.uio_resid = nbyte;
1c79356b 230 auio.uio_rw = UIO_READ;
9bccf70c
A
231 auio.uio_segflg = UIO_USERSPACE;
232 auio.uio_procp = p;
233#if KTRACE
234 /*
235 * if tracing, save a copy of iovec
236 */
237 if (KTRPOINT(p, KTR_GENIO)) {
238 ktriov = aiov;
239 ktruio = auio;
240 didktr = 1;
241 }
242#endif
243 cnt = nbyte;
244
245 if ((error = fo_read(fp, &auio, fp->f_cred, flags, p))) {
246 if (auio.uio_resid != cnt && (error == ERESTART ||
247 error == EINTR || error == EWOULDBLOCK))
248 error = 0;
249 }
250 cnt -= auio.uio_resid;
251#if KTRACE
252 if (didktr && error == 0) {
253 ktruio.uio_iov = &ktriov;
254 ktruio.uio_resid = cnt;
255 ktrgenio(p->p_tracep, fd, UIO_READ, &ktruio, error,
256 KERNEL_FUNNEL);
257 }
258#endif
259 *retval = cnt;
260 return (error);
1c79356b
A
261}
262
9bccf70c
A
263/*
264 * Scatter read system call.
265 */
266#ifndef _SYS_SYSPROTO_H_
1c79356b
A
267struct readv_args {
268 int fd;
269 struct iovec *iovp;
270 u_int iovcnt;
271};
9bccf70c
A
272#endif
273int
1c79356b
A
274readv(p, uap, retval)
275 struct proc *p;
276 register struct readv_args *uap;
277 int *retval;
278{
279 struct uio auio;
280 register struct iovec *iov;
281 int error;
282 struct iovec aiov[UIO_SMALLIOV];
283
284 if (uap->iovcnt > UIO_SMALLIOV) {
285 if (uap->iovcnt > UIO_MAXIOV)
286 return (EINVAL);
287 if ((iov = (struct iovec *)
288 kalloc(sizeof(struct iovec) * (uap->iovcnt))) == 0)
289 return (ENOMEM);
290 } else
291 iov = aiov;
292 auio.uio_iov = iov;
293 auio.uio_iovcnt = uap->iovcnt;
294 auio.uio_rw = UIO_READ;
295 error = copyin((caddr_t)uap->iovp, (caddr_t)iov,
296 uap->iovcnt * sizeof (struct iovec));
297 if (!error)
298 error = rwuio(p, uap->fd, &auio, UIO_READ, retval);
299 if (uap->iovcnt > UIO_SMALLIOV)
300 kfree(iov, sizeof(struct iovec)*uap->iovcnt);
301 return (error);
302}
303
304/*
305 * Write system call
306 */
9bccf70c 307#ifndef _SYS_SYSPROTO_H_
1c79356b
A
308struct write_args {
309 int fd;
310 char *cbuf;
311 u_int nbyte;
312};
9bccf70c
A
313#endif
314int
1c79356b
A
315write(p, uap, retval)
316 struct proc *p;
317 register struct write_args *uap;
318 int *retval;
319{
9bccf70c
A
320 register struct file *fp;
321 int error;
322
323 if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL)
324 return (EBADF);
325 error = dofilewrite(p, fp, uap->fd, uap->cbuf, uap->nbyte,
326 (off_t)-1, 0, retval);
327 frele(fp);
328 return(error);
329}
330
331/*
332 * Pwrite system call
333 */
334#ifndef _SYS_SYSPROTO_H_
335struct pwrite_args {
336 int fd;
337 const void *buf;
338 size_t nbyte;
339#ifdef DOUBLE_ALIGN_PARAMS
340 int pad;
341#endif
342 off_t offset;
343};
344#endif
345int
346pwrite(p, uap, retval)
347 struct proc *p;
348 register struct pwrite_args *uap;
349 int *retval;
350{
351 register struct file *fp;
352 int error;
353
354 if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL)
355 return (EBADF);
356 if (fp->f_type != DTYPE_VNODE) {
357 error = ESPIPE;
358 } else {
359 error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte,
360 uap->offset, FOF_OFFSET, retval);
361 }
362 frele(fp);
55e303ae
A
363
364 if (!error)
365 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pwrite) | DBG_FUNC_NONE),
366 uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
367
9bccf70c
A
368 return(error);
369}
370
55e303ae 371__private_extern__ int
9bccf70c
A
372dofilewrite(p, fp, fd, buf, nbyte, offset, flags, retval)
373 struct proc *p;
374 struct file *fp;
375 int fd, flags;
376 const void *buf;
377 size_t nbyte;
378 off_t offset;
379 int *retval;
380{
1c79356b
A
381 struct uio auio;
382 struct iovec aiov;
9bccf70c
A
383 long cnt, error = 0;
384#if KTRACE
385 struct iovec ktriov;
386 struct uio ktruio;
387 int didktr = 0;
388#endif
389
390 aiov.iov_base = (void *)(uintptr_t)buf;
391 aiov.iov_len = nbyte;
1c79356b 392 auio.uio_iov = &aiov;
9bccf70c
A
393 auio.uio_iovcnt = 1;
394 auio.uio_offset = offset;
395 if (nbyte > INT_MAX)
396 return (EINVAL);
397 auio.uio_resid = nbyte;
1c79356b 398 auio.uio_rw = UIO_WRITE;
9bccf70c
A
399 auio.uio_segflg = UIO_USERSPACE;
400 auio.uio_procp = p;
401#if KTRACE
402 /*
403 * if tracing, save a copy of iovec and uio
404 */
405 if (KTRPOINT(p, KTR_GENIO)) {
406 ktriov = aiov;
407 ktruio = auio;
408 didktr = 1;
409 }
410#endif
411 cnt = nbyte;
412 if (fp->f_type == DTYPE_VNODE)
413 bwillwrite();
414 if ((error = fo_write(fp, &auio, fp->f_cred, flags, p))) {
415 if (auio.uio_resid != cnt && (error == ERESTART ||
416 error == EINTR || error == EWOULDBLOCK))
417 error = 0;
55e303ae
A
418 /* The socket layer handles SIGPIPE */
419 if (error == EPIPE && fp->f_type != DTYPE_SOCKET)
420 psignal(p, SIGPIPE);
9bccf70c
A
421 }
422 cnt -= auio.uio_resid;
423#if KTRACE
424 if (didktr && error == 0) {
425 ktruio.uio_iov = &ktriov;
426 ktruio.uio_resid = cnt;
427 ktrgenio(p->p_tracep, fd, UIO_WRITE, &ktruio, error,
428 KERNEL_FUNNEL);
429 }
430#endif
431 *retval = cnt;
432 return (error);
1c79356b 433}
9bccf70c
A
434
435/*
436 * Gather write system call
437 */
438#ifndef _SYS_SYSPROTO_H_
1c79356b
A
439struct writev_args {
440 int fd;
441 struct iovec *iovp;
442 u_int iovcnt;
443};
9bccf70c
A
444#endif
445int
1c79356b
A
446writev(p, uap, retval)
447 struct proc *p;
448 register struct writev_args *uap;
449 int *retval;
450{
451 struct uio auio;
452 register struct iovec *iov;
453 int error;
454 struct iovec aiov[UIO_SMALLIOV];
455
456 if (uap->iovcnt > UIO_SMALLIOV) {
457 if (uap->iovcnt > UIO_MAXIOV)
458 return (EINVAL);
459 if ((iov = (struct iovec *)
460 kalloc(sizeof(struct iovec) * (uap->iovcnt))) == 0)
461 return (ENOMEM);
462 } else
463 iov = aiov;
464 auio.uio_iov = iov;
465 auio.uio_iovcnt = uap->iovcnt;
466 auio.uio_rw = UIO_WRITE;
467 error = copyin((caddr_t)uap->iovp, (caddr_t)iov,
468 uap->iovcnt * sizeof (struct iovec));
469 if (!error)
470 error = rwuio(p, uap->fd, &auio, UIO_WRITE, retval);
471 if (uap->iovcnt > UIO_SMALLIOV)
472 kfree(iov, sizeof(struct iovec)*uap->iovcnt);
473 return (error);
474}
475
9bccf70c 476int
1c79356b
A
477rwuio(p, fdes, uio, rw, retval)
478 struct proc *p;
479 int fdes;
480 register struct uio *uio;
481 enum uio_rw rw;
482 int *retval;
483{
484 struct file *fp;
485 register struct iovec *iov;
486 int i, count, flag, error;
9bccf70c
A
487#if KTRACE
488 struct iovec *ktriov;
489 struct uio ktruio;
490 int didktr = 0;
491 u_int iovlen;
492#endif
1c79356b
A
493
494 if (error = fdgetf(p, fdes, &fp))
495 return (error);
496
497 if ((fp->f_flag&(rw==UIO_READ ? FREAD : FWRITE)) == 0) {
498 return(EBADF);
499 }
500 uio->uio_resid = 0;
501 uio->uio_segflg = UIO_USERSPACE;
502 uio->uio_procp = p;
503 iov = uio->uio_iov;
504 for (i = 0; i < uio->uio_iovcnt; i++) {
505 if (iov->iov_len < 0) {
506 return(EINVAL);
507 }
508 uio->uio_resid += iov->iov_len;
509 if (uio->uio_resid < 0) {
510 return(EINVAL);
511 }
512 iov++;
513 }
514 count = uio->uio_resid;
9bccf70c
A
515#if KTRACE
516 /*
517 * if tracing, save a copy of iovec
518 */
519 if (KTRPOINT(p, KTR_GENIO)) {
520 iovlen = uio->uio_iovcnt * sizeof (struct iovec);
521 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
522 bcopy((caddr_t)uio->uio_iov, (caddr_t)ktriov, iovlen);
523 ktruio = *uio;
524 didktr = 1;
525 }
526#endif
527
1c79356b 528 if (rw == UIO_READ) {
9bccf70c
A
529 if (error = fo_read(fp, uio, fp->f_cred, 0, p))
530 if (uio->uio_resid != count && (error == ERESTART ||
531 error == EINTR || error == EWOULDBLOCK))
532 error = 0;
1c79356b 533 } else {
9bccf70c
A
534 if (fp->f_type == DTYPE_VNODE)
535 bwillwrite();
536 if (error = fo_write(fp, uio, fp->f_cred, 0, p)) {
1c79356b 537 if (uio->uio_resid != count && (error == ERESTART ||
9bccf70c 538 error == EINTR || error == EWOULDBLOCK))
1c79356b 539 error = 0;
9bccf70c
A
540 /* The socket layer handles SIGPIPE */
541 if (error == EPIPE && fp->f_type != DTYPE_SOCKET)
1c79356b
A
542 psignal(p, SIGPIPE);
543 }
544 }
9bccf70c 545
1c79356b 546 *retval = count - uio->uio_resid;
9bccf70c
A
547
548#if KTRACE
549 if (didktr) {
550 if (error == 0) {
551 ktruio.uio_iov = ktriov;
552 ktruio.uio_resid = *retval;
553 ktrgenio(p->p_tracep, fdes, rw, &ktruio, error,
554 KERNEL_FUNNEL);
555 }
556 FREE(ktriov, M_TEMP);
557 }
558#endif
559
1c79356b
A
560 return(error);
561}
562
563/*
564 * Ioctl system call
565 */
9bccf70c 566#ifndef _SYS_SYSPROTO_H_
1c79356b
A
567struct ioctl_args {
568 int fd;
569 u_long com;
570 caddr_t data;
571};
9bccf70c
A
572#endif
573int
1c79356b
A
574ioctl(p, uap, retval)
575 struct proc *p;
576 register struct ioctl_args *uap;
577 register_t *retval;
578{
579 struct file *fp;
580 register u_long com;
581 register int error;
582 register u_int size;
583 caddr_t data, memp;
584 int tmp;
585#define STK_PARAMS 128
586 char stkbuf[STK_PARAMS];
587
588 if (error = fdgetf(p, uap->fd, &fp))
589 return (error);
590
591 if ((fp->f_flag & (FREAD | FWRITE)) == 0)
592 return (EBADF);
593
9bccf70c
A
594#if NETAT
595 /*
596 * ### LD 6/11/97 Hack Alert: this is to get AppleTalk to work
1c79356b
A
597 * while implementing an ATioctl system call
598 */
1c79356b
A
599 {
600 extern int appletalk_inited;
601
602 if (appletalk_inited && ((uap->com & 0x0000FFFF) == 0xff99)) {
603#ifdef APPLETALK_DEBUG
604 kprintf("ioctl: special AppleTalk \n");
605#endif
9bccf70c 606 error = fo_ioctl(fp, uap->com, uap->data, p);
1c79356b
A
607 return(error);
608 }
609 }
610
611#endif /* NETAT */
612
613
614 switch (com = uap->com) {
615 case FIONCLEX:
616 *fdflags(p, uap->fd) &= ~UF_EXCLOSE;
617 return (0);
618 case FIOCLEX:
619 *fdflags(p, uap->fd) |= UF_EXCLOSE;
620 return (0);
621 }
622
623 /*
624 * Interpret high order word to find amount of data to be
625 * copied to/from the user's address space.
626 */
627 size = IOCPARM_LEN(com);
628 if (size > IOCPARM_MAX)
629 return (ENOTTY);
630 memp = NULL;
631 if (size > sizeof (stkbuf)) {
632 if ((memp = (caddr_t)kalloc(size)) == 0)
633 return(ENOMEM);
634 data = memp;
635 } else
636 data = stkbuf;
637 if (com&IOC_IN) {
638 if (size) {
639 error = copyin(uap->data, data, (u_int)size);
640 if (error) {
641 if (memp)
642 kfree(memp, size);
643 return (error);
644 }
645 } else
646 *(caddr_t *)data = uap->data;
647 } else if ((com&IOC_OUT) && size)
648 /*
649 * Zero the buffer so the user always
650 * gets back something deterministic.
651 */
652 bzero(data, size);
653 else if (com&IOC_VOID)
654 *(caddr_t *)data = uap->data;
655
656 switch (com) {
657
658 case FIONBIO:
659 if (tmp = *(int *)data)
660 fp->f_flag |= FNONBLOCK;
661 else
662 fp->f_flag &= ~FNONBLOCK;
9bccf70c 663 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p);
1c79356b
A
664 break;
665
666 case FIOASYNC:
667 if (tmp = *(int *)data)
668 fp->f_flag |= FASYNC;
669 else
670 fp->f_flag &= ~FASYNC;
9bccf70c 671 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p);
1c79356b
A
672 break;
673
674 case FIOSETOWN:
675 tmp = *(int *)data;
676 if (fp->f_type == DTYPE_SOCKET) {
677 ((struct socket *)fp->f_data)->so_pgid = tmp;
678 error = 0;
679 break;
680 }
681 if (tmp <= 0) {
682 tmp = -tmp;
683 } else {
684 struct proc *p1 = pfind(tmp);
685 if (p1 == 0) {
686 error = ESRCH;
687 break;
688 }
689 tmp = p1->p_pgrp->pg_id;
690 }
9bccf70c 691 error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, p);
1c79356b
A
692 break;
693
694 case FIOGETOWN:
695 if (fp->f_type == DTYPE_SOCKET) {
696 error = 0;
697 *(int *)data = ((struct socket *)fp->f_data)->so_pgid;
698 break;
699 }
9bccf70c 700 error = fo_ioctl(fp, TIOCGPGRP, data, p);
1c79356b
A
701 *(int *)data = -*(int *)data;
702 break;
703
704 default:
9bccf70c 705 error = fo_ioctl(fp, com, data, p);
1c79356b
A
706 /*
707 * Copy any data to user, size was
708 * already set and checked above.
709 */
710 if (error == 0 && (com&IOC_OUT) && size)
711 error = copyout(data, uap->data, (u_int)size);
712 break;
713 }
714 if (memp)
715 kfree(memp, size);
716 return (error);
717}
718
1c79356b 719int selwait, nselcoll;
0b4e3aa0
A
720#define SEL_FIRSTPASS 1
721#define SEL_SECONDPASS 2
9bccf70c
A
722extern int selcontinue(int error);
723extern int selprocess(int error, int sel_pass);
724static int selscan(struct proc *p, struct _select * sel,
725 int nfd, register_t *retval, int sel_pass);
726static int selcount(struct proc *p, u_int32_t *ibits, u_int32_t *obits,
727 int nfd, int * count, int * nfcount);
728extern uint64_t tvtoabstime(struct timeval *tvp);
1c79356b
A
729
730/*
731 * Select system call.
732 */
9bccf70c 733#ifndef _SYS_SYSPROTO_H_
1c79356b
A
734struct select_args {
735 int nd;
736 u_int32_t *in;
737 u_int32_t *ou;
738 u_int32_t *ex;
739 struct timeval *tv;
740};
9bccf70c
A
741#endif
742int
1c79356b
A
743select(p, uap, retval)
744 register struct proc *p;
745 register struct select_args *uap;
746 register_t *retval;
747{
9bccf70c 748 int error = 0;
0b4e3aa0 749 u_int ni, nw, size;
1c79356b
A
750 thread_act_t th_act;
751 struct uthread *uth;
752 struct _select *sel;
753 int needzerofill = 1;
0b4e3aa0
A
754 int kfcount =0;
755 int nfcount = 0;
756 int count = 0;
1c79356b
A
757
758 th_act = current_act();
759 uth = get_bsdthread_info(th_act);
760 sel = &uth->uu_state.ss_select;
761 retval = (int *)get_bsduthreadrval(th_act);
762 *retval = 0;
763
0b4e3aa0 764 if (uap->nd < 0) {
1c79356b 765 return (EINVAL);
0b4e3aa0 766 }
1c79356b
A
767
768 if (uap->nd > p->p_fd->fd_nfiles)
769 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */
770
771 nw = howmany(uap->nd, NFDBITS);
772 ni = nw * sizeof(fd_mask);
773
774 /*
775 * if this is the first select by the thread
776 * allocate the space for bits.
777 */
778 if (sel->nbytes == 0) {
779 sel->nbytes = 3 * ni;
780 MALLOC(sel->ibits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK);
781 MALLOC(sel->obits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK);
782 bzero((caddr_t)sel->ibits, sel->nbytes);
783 bzero((caddr_t)sel->obits, sel->nbytes);
784 needzerofill = 0;
785 }
786
787 /*
788 * if the previously allocated space for the bits
789 * is smaller than what is requested. Reallocate.
790 */
791 if (sel->nbytes < (3 * ni)) {
792 sel->nbytes = (3 * ni);
793 FREE(sel->ibits, M_TEMP);
794 FREE(sel->obits, M_TEMP);
795 MALLOC(sel->ibits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK);
796 MALLOC(sel->obits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK);
797 bzero((caddr_t)sel->ibits, sel->nbytes);
798 bzero((caddr_t)sel->obits, sel->nbytes);
799 needzerofill = 0;
800 }
801
802 if (needzerofill) {
803 bzero((caddr_t)sel->ibits, sel->nbytes);
804 bzero((caddr_t)sel->obits, sel->nbytes);
805 }
806
807 /*
808 * get the bits from the user address space
809 */
810#define getbits(name, x) \
811 do { \
812 if (uap->name && (error = copyin((caddr_t)uap->name, \
813 (caddr_t)&sel->ibits[(x) * nw], ni))) \
814 goto continuation; \
815 } while (0)
816
817 getbits(in, 0);
818 getbits(ou, 1);
819 getbits(ex, 2);
820#undef getbits
821
822 if (uap->tv) {
9bccf70c
A
823 struct timeval atv;
824
825 error = copyin((caddr_t)uap->tv, (caddr_t)&atv, sizeof (atv));
1c79356b
A
826 if (error)
827 goto continuation;
9bccf70c 828 if (itimerfix(&atv)) {
1c79356b
A
829 error = EINVAL;
830 goto continuation;
831 }
0b4e3aa0 832
9bccf70c
A
833 clock_absolutetime_interval_to_deadline(
834 tvtoabstime(&atv), &sel->abstime);
835 }
836 else
837 sel->abstime = 0;
838
0b4e3aa0
A
839 sel->nfcount = 0;
840 if (error = selcount(p, sel->ibits, sel->obits, uap->nd, &count, &nfcount)) {
841 goto continuation;
842 }
843
844 sel->nfcount = nfcount;
845 sel->count = count;
846 size = SIZEOF_WAITQUEUE_SUB + (count * SIZEOF_WAITQUEUE_LINK);
847 if (sel->allocsize) {
848 if (uth->uu_wqsub == 0)
849 panic("select: wql memory smashed");
850 /* needed for the select now */
851 if (size > sel->allocsize) {
852 kfree(uth->uu_wqsub, sel->allocsize);
853 sel->allocsize = size;
854 uth->uu_wqsub = (wait_queue_sub_t)kalloc(sel->allocsize);
855 if (uth->uu_wqsub == (wait_queue_sub_t)NULL)
856 panic("failed to allocate memory for waitqueue\n");
857 sel->wql = (char *)uth->uu_wqsub + SIZEOF_WAITQUEUE_SUB;
858 }
859 } else {
860 sel->count = count;
861 sel->allocsize = size;
862 uth->uu_wqsub = (wait_queue_sub_t)kalloc(sel->allocsize);
863 if (uth->uu_wqsub == (wait_queue_sub_t)NULL)
864 panic("failed to allocate memory for waitqueue\n");
865 sel->wql = (char *)uth->uu_wqsub + SIZEOF_WAITQUEUE_SUB;
866 }
867 bzero(uth->uu_wqsub, size);
868 wait_queue_sub_init(uth->uu_wqsub, (SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST));
869
1c79356b 870continuation:
9bccf70c 871 return selprocess(error, SEL_FIRSTPASS);
0b4e3aa0
A
872}
873
874int
875selcontinue(int error)
876{
9bccf70c 877 return selprocess(error, SEL_SECONDPASS);
1c79356b
A
878}
879
880int
0b4e3aa0 881selprocess(error, sel_pass)
1c79356b 882{
9bccf70c 883 int ncoll;
1c79356b
A
884 u_int ni, nw;
885 thread_act_t th_act;
886 struct uthread *uth;
887 struct proc *p;
888 struct select_args *uap;
889 int *retval;
890 struct _select *sel;
0b4e3aa0 891 int unwind = 1;
9bccf70c 892 int prepost = 0;
0b4e3aa0
A
893 int somewakeup = 0;
894 int doretry = 0;
9bccf70c 895 wait_result_t wait_result;
1c79356b
A
896
897 p = current_proc();
898 th_act = current_act();
899 uap = (struct select_args *)get_bsduthreadarg(th_act);
900 retval = (int *)get_bsduthreadrval(th_act);
901 uth = get_bsdthread_info(th_act);
902 sel = &uth->uu_state.ss_select;
903
0b4e3aa0
A
904 /* if it is first pass wait queue is not setup yet */
905 if ((error != 0) && (sel_pass == SEL_FIRSTPASS))
906 unwind = 0;
907 if (sel->count == 0)
908 unwind = 0;
1c79356b 909retry:
0b4e3aa0 910 if (error != 0) {
1c79356b 911 goto done;
0b4e3aa0
A
912 }
913
1c79356b
A
914 ncoll = nselcoll;
915 p->p_flag |= P_SELECT;
0b4e3aa0
A
916 /* skip scans if the select is just for timeouts */
917 if (sel->count) {
918 if (sel_pass == SEL_FIRSTPASS)
919 wait_queue_sub_clearrefs(uth->uu_wqsub);
920
921 error = selscan(p, sel, uap->nd, retval, sel_pass);
922 if (error || *retval) {
923 goto done;
924 }
925 if (prepost) {
926 /* if the select of log, then we canwakeup and discover some one
927 * else already read the data; go toselct again if time permits
928 */
929 prepost = 0;
930 doretry = 1;
931 }
932 if (somewakeup) {
933 somewakeup = 0;
934 doretry = 1;
935 }
936 }
937
9bccf70c
A
938 if (uap->tv) {
939 uint64_t now;
940
941 clock_get_uptime(&now);
942 if (now >= sel->abstime)
943 goto done;
1c79356b 944 }
0b4e3aa0
A
945
946 if (doretry) {
947 /* cleanup obits and try again */
948 doretry = 0;
949 sel_pass = SEL_FIRSTPASS;
950 goto retry;
951 }
952
1c79356b
A
953 /*
954 * To effect a poll, the timeout argument should be
955 * non-nil, pointing to a zero-valued timeval structure.
956 */
9bccf70c 957 if (uap->tv && sel->abstime == 0) {
1c79356b
A
958 goto done;
959 }
0b4e3aa0
A
960
961 /* No spurious wakeups due to colls,no need to check for them */
962 if ((sel_pass == SEL_SECONDPASS) || ((p->p_flag & P_SELECT) == 0)) {
963 sel_pass = SEL_FIRSTPASS;
1c79356b
A
964 goto retry;
965 }
0b4e3aa0 966
1c79356b
A
967 p->p_flag &= ~P_SELECT;
968
0b4e3aa0
A
969 /* if the select is just for timeout skip check */
970 if (sel->count &&(sel_pass == SEL_SECONDPASS))
971 panic("selprocess: 2nd pass assertwaiting");
972
973 /* Wait Queue Subordinate has waitqueue as first element */
9bccf70c
A
974 wait_result = wait_queue_assert_wait((wait_queue_t)uth->uu_wqsub,
975 &selwait, THREAD_ABORTSAFE);
976 if (wait_result != THREAD_AWAKENED) {
977 /* there are no preposted events */
978 error = tsleep1(NULL, PSOCK | PCATCH,
979 "select", sel->abstime, selcontinue);
0b4e3aa0
A
980 } else {
981 prepost = 1;
982 error = 0;
983 }
984
985 sel_pass = SEL_SECONDPASS;
986 if (error == 0) {
987 if (!prepost)
988 somewakeup =1;
1c79356b 989 goto retry;
0b4e3aa0 990 }
1c79356b 991done:
0b4e3aa0
A
992 if (unwind)
993 wait_subqueue_unlink_all(uth->uu_wqsub);
1c79356b
A
994 p->p_flag &= ~P_SELECT;
995 /* select is not restarted after signals... */
996 if (error == ERESTART)
997 error = EINTR;
998 if (error == EWOULDBLOCK)
999 error = 0;
1c79356b
A
1000 nw = howmany(uap->nd, NFDBITS);
1001 ni = nw * sizeof(fd_mask);
1002
1003#define putbits(name, x) \
1004 do { \
1005 if (uap->name && (error2 = copyout((caddr_t)&sel->obits[(x) * nw], \
1006 (caddr_t)uap->name, ni))) \
1007 error = error2; \
1008 } while (0)
1009
1010 if (error == 0) {
1011 int error2;
1012
1013 putbits(in, 0);
1014 putbits(ou, 1);
1015 putbits(ex, 2);
1016#undef putbits
1017 }
1c79356b 1018 return(error);
1c79356b
A
1019}
1020
1021static int
0b4e3aa0 1022selscan(p, sel, nfd, retval, sel_pass)
1c79356b 1023 struct proc *p;
0b4e3aa0 1024 struct _select *sel;
1c79356b
A
1025 int nfd;
1026 register_t *retval;
0b4e3aa0 1027 int sel_pass;
1c79356b
A
1028{
1029 register struct filedesc *fdp = p->p_fd;
1030 register int msk, i, j, fd;
1031 register u_int32_t bits;
1032 struct file *fp;
1033 int n = 0;
0b4e3aa0 1034 int nc = 0;
1c79356b
A
1035 static int flag[3] = { FREAD, FWRITE, 0 };
1036 u_int32_t *iptr, *optr;
1037 u_int nw;
0b4e3aa0
A
1038 u_int32_t *ibits, *obits;
1039 char * wql;
1040 int nfunnel = 0;
1041 int count, nfcount;
1042 char * wql_ptr;
55e303ae 1043 struct vnode *vp;
1c79356b
A
1044
1045 /*
1046 * Problems when reboot; due to MacOSX signal probs
1047 * in Beaker1C ; verify that the p->p_fd is valid
1048 */
1049 if (fdp == NULL) {
1050 *retval=0;
1051 return(EIO);
1052 }
1053
0b4e3aa0
A
1054 ibits = sel->ibits;
1055 obits = sel->obits;
1056 wql = sel->wql;
1057
1058 count = sel->count;
1059 nfcount = sel->nfcount;
1060
1061 if (nfcount > count)
1062 panic("selcount count<nfcount");
1063
1c79356b
A
1064 nw = howmany(nfd, NFDBITS);
1065
0b4e3aa0
A
1066 nc = 0;
1067 if ( nfcount < count) {
1068 /* some or all in kernel funnel */
1069 for (msk = 0; msk < 3; msk++) {
1070 iptr = (u_int32_t *)&ibits[msk * nw];
1071 optr = (u_int32_t *)&obits[msk * nw];
1072 for (i = 0; i < nfd; i += NFDBITS) {
1073 bits = iptr[i/NFDBITS];
1074 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1075 bits &= ~(1 << j);
1076 fp = fdp->fd_ofiles[fd];
1077 if (fp == NULL ||
1078 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1079 return(EBADF);
1080 }
1081 if (sel_pass == SEL_SECONDPASS)
1082 wql_ptr = (char *)0;
1083 else
1084 wql_ptr = (wql+ nc * SIZEOF_WAITQUEUE_LINK);
55e303ae
A
1085 /*
1086 * Merlot: need to remove the bogus f_data check
1087 * from the following "if" statement. It's there
1088 * because of various problems stemming from
1089 * races due to the split-funnels and lack of real
1090 * referencing on sockets...
1091 */
1092 if (fp->f_ops && (fp->f_type != DTYPE_SOCKET)
1093 && (fp->f_data != (caddr_t)-1)
1094 && !(fp->f_type == DTYPE_VNODE
1095 && (vp = (struct vnode *)fp->f_data)
1096 && vp->v_type == VFIFO)
9bccf70c 1097 && fo_select(fp, flag[msk], wql_ptr, p)) {
0b4e3aa0
A
1098 optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
1099 n++;
1100 }
1101 nc++;
1c79356b
A
1102 }
1103 }
1104 }
1105 }
0b4e3aa0
A
1106
1107 if (nfcount) {
1108 /* socket file descriptors for scan */
1109 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
1110
1111 nc = 0;
1112 for (msk = 0; msk < 3; msk++) {
1113 iptr = (u_int32_t *)&ibits[msk * nw];
1114 optr = (u_int32_t *)&obits[msk * nw];
1115 for (i = 0; i < nfd; i += NFDBITS) {
1116 bits = iptr[i/NFDBITS];
1117 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1118 bits &= ~(1 << j);
1119 fp = fdp->fd_ofiles[fd];
1120 if (fp == NULL ||
1121 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
9bccf70c 1122 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
0b4e3aa0
A
1123 return(EBADF);
1124 }
1125 if (sel_pass == SEL_SECONDPASS)
1126 wql_ptr = (char *)0;
1127 else
1128 wql_ptr = (wql+ nc * SIZEOF_WAITQUEUE_LINK);
55e303ae
A
1129 if (fp->f_ops
1130 && (fp->f_type == DTYPE_SOCKET
1131 || (fp->f_type == DTYPE_VNODE
1132 && (vp = (struct vnode *)fp->f_data)
1133 && vp != (struct vnode *)-1
1134 && vp->v_type == VFIFO))
1135 && fo_select(fp, flag[msk], wql_ptr, p)) {
0b4e3aa0
A
1136 optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
1137 n++;
1138 }
1139 nc++;
1140 }
1141 }
1142 }
1143 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
1144 }
1145
1c79356b
A
1146 *retval = n;
1147 return (0);
1148}
1149
1150/*ARGSUSED*/
9bccf70c 1151int
1c79356b
A
1152seltrue(dev, flag, p)
1153 dev_t dev;
1154 int flag;
1155 struct proc *p;
1156{
1157
1158 return (1);
1159}
1160
0b4e3aa0
A
1161static int
1162selcount(p, ibits, obits, nfd, count, nfcount)
1163 struct proc *p;
1164 u_int32_t *ibits, *obits;
1165 int nfd;
1166 int *count;
1167 int *nfcount;
1168{
1169 register struct filedesc *fdp = p->p_fd;
1170 register int msk, i, j, fd;
1171 register u_int32_t bits;
1172 struct file *fp;
1173 int n = 0;
1174 int nc = 0;
1175 int nfc = 0;
1176 static int flag[3] = { FREAD, FWRITE, 0 };
1177 u_int32_t *iptr, *fptr, *fbits;
1178 u_int nw;
55e303ae 1179 struct vnode *vp;
0b4e3aa0
A
1180
1181 /*
1182 * Problems when reboot; due to MacOSX signal probs
1183 * in Beaker1C ; verify that the p->p_fd is valid
1184 */
1185 if (fdp == NULL) {
1186 *count=0;
1187 *nfcount=0;
1188 return(EIO);
1189 }
1190
1191 nw = howmany(nfd, NFDBITS);
1192
1193
1194 for (msk = 0; msk < 3; msk++) {
1195 iptr = (u_int32_t *)&ibits[msk * nw];
1196 for (i = 0; i < nfd; i += NFDBITS) {
1197 bits = iptr[i/NFDBITS];
1198 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1199 bits &= ~(1 << j);
1200 fp = fdp->fd_ofiles[fd];
1201 if (fp == NULL ||
1202 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1203 *count=0;
1204 *nfcount=0;
1205 return(EBADF);
1206 }
55e303ae
A
1207 if (fp->f_type == DTYPE_SOCKET ||
1208 (fp->f_type == DTYPE_VNODE
1209 && (vp = (struct vnode *)fp->f_data)
1210 && vp->v_type == VFIFO))
0b4e3aa0
A
1211 nfc++;
1212 n++;
1213 }
1214 }
1215 }
1216 *count = n;
1217 *nfcount = nfc;
1218 return (0);
1219}
1220
1c79356b
A
1221/*
1222 * Record a select request.
1223 */
1224void
0b4e3aa0 1225selrecord(selector, sip, p_wql)
1c79356b
A
1226 struct proc *selector;
1227 struct selinfo *sip;
0b4e3aa0 1228 void * p_wql;
1c79356b 1229{
0b4e3aa0
A
1230 thread_act_t cur_act = current_act();
1231 struct uthread * ut = get_bsdthread_info(cur_act);
1c79356b 1232
0b4e3aa0
A
1233 /* need to look at collisions */
1234
1235 if ((p_wql == (void *)0) && ((sip->si_flags & SI_INITED) == 0)) {
1c79356b
A
1236 return;
1237 }
0b4e3aa0
A
1238
1239 /*do not record if this is second pass of select */
1240 if((p_wql == (void *)0)) {
1241 return;
1c79356b
A
1242 }
1243
0b4e3aa0 1244 if ((sip->si_flags & SI_INITED) == 0) {
55e303ae 1245 wait_queue_init(&sip->si_wait_queue, SYNC_POLICY_FIFO);
0b4e3aa0
A
1246 sip->si_flags |= SI_INITED;
1247 sip->si_flags &= ~SI_CLEAR;
1248 }
1249
1250 if (sip->si_flags & SI_RECORDED) {
1251 sip->si_flags |= SI_COLL;
1252 } else
1253 sip->si_flags &= ~SI_COLL;
1254
1255 sip->si_flags |= SI_RECORDED;
55e303ae
A
1256 if (!wait_queue_member(&sip->si_wait_queue, ut->uu_wqsub))
1257 wait_queue_link_noalloc(&sip->si_wait_queue, ut->uu_wqsub, (wait_queue_link_t)p_wql);
0b4e3aa0 1258
1c79356b
A
1259 return;
1260}
1261
1262void
1263selwakeup(sip)
1264 register struct selinfo *sip;
1265{
1c79356b 1266
0b4e3aa0 1267 if ((sip->si_flags & SI_INITED) == 0) {
1c79356b 1268 return;
0b4e3aa0 1269 }
1c79356b
A
1270
1271 if (sip->si_flags & SI_COLL) {
1272 nselcoll++;
1273 sip->si_flags &= ~SI_COLL;
0b4e3aa0
A
1274#if 0
1275 /* will not support */
1276 //wakeup((caddr_t)&selwait);
1277#endif
1c79356b 1278 }
1c79356b 1279
0b4e3aa0 1280 if (sip->si_flags & SI_RECORDED) {
55e303ae 1281 wait_queue_wakeup_all(&sip->si_wait_queue, &selwait, THREAD_AWAKENED);
0b4e3aa0 1282 sip->si_flags &= ~SI_RECORDED;
1c79356b 1283 }
1c79356b 1284
1c79356b
A
1285}
1286
1287void
1288selthreadclear(sip)
1289 register struct selinfo *sip;
1290{
1c79356b 1291
0b4e3aa0
A
1292 if ((sip->si_flags & SI_INITED) == 0) {
1293 return;
1294 }
1295 if (sip->si_flags & SI_RECORDED) {
1296 selwakeup(sip);
1297 sip->si_flags &= ~(SI_RECORDED | SI_COLL);
1c79356b 1298 }
0b4e3aa0 1299 sip->si_flags |= SI_CLEAR;
55e303ae 1300 wait_queue_unlinkall_nofree(&sip->si_wait_queue);
1c79356b
A
1301}
1302
1303
1304extern struct eventqelt *evprocdeque(struct proc *p, struct eventqelt *eqp);
1305
1306/*
1307 * called upon socket close. deque and free all events for
1308 * the socket
1309 */
9bccf70c 1310void
1c79356b
A
1311evsofree(struct socket *sp)
1312{
1313 struct eventqelt *eqp, *next;
1314
1315 if (sp == NULL) return;
1316
1317 for (eqp = sp->so_evlist.tqh_first; eqp != NULL; eqp = next) {
1318 next = eqp->ee_slist.tqe_next;
1319 evprocdeque(eqp->ee_proc, eqp); // remove from proc q if there
1320 TAILQ_REMOVE(&sp->so_evlist, eqp, ee_slist); // remove from socket q
1321 FREE(eqp, M_TEMP);
1322 }
1323}
1324
1325
1326#define DBG_EVENT 0x10
1327
1328#define DBG_POST 0x10
1329#define DBG_WATCH 0x11
1330#define DBG_WAIT 0x12
1331#define DBG_MOD 0x13
1332#define DBG_EWAKEUP 0x14
1333#define DBG_ENQUEUE 0x15
1334#define DBG_DEQUEUE 0x16
1335
1336#define DBG_MISC_POST MISCDBG_CODE(DBG_EVENT,DBG_POST)
1337#define DBG_MISC_WATCH MISCDBG_CODE(DBG_EVENT,DBG_WATCH)
1338#define DBG_MISC_WAIT MISCDBG_CODE(DBG_EVENT,DBG_WAIT)
1339#define DBG_MISC_MOD MISCDBG_CODE(DBG_EVENT,DBG_MOD)
1340#define DBG_MISC_EWAKEUP MISCDBG_CODE(DBG_EVENT,DBG_EWAKEUP)
1341#define DBG_MISC_ENQUEUE MISCDBG_CODE(DBG_EVENT,DBG_ENQUEUE)
1342#define DBG_MISC_DEQUEUE MISCDBG_CODE(DBG_EVENT,DBG_DEQUEUE)
1343
1344
1345/*
1346 * enque this event if it's not already queued. wakeup
1347 the proc if we do queue this event to it.
1348 */
9bccf70c 1349void
1c79356b
A
1350evprocenque(struct eventqelt *eqp)
1351{
1352 struct proc *p;
1353
1354 assert(eqp);
1355 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_START, eqp, eqp->ee_flags, eqp->ee_eventmask,0,0);
1356 if (eqp->ee_flags & EV_QUEUED) {
1357 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0);
1358 return;
1359 }
1360 eqp->ee_flags |= EV_QUEUED;
1361 eqp->ee_eventmask = 0; // disarm
1362 p = eqp->ee_proc;
1363 TAILQ_INSERT_TAIL(&p->p_evlist, eqp, ee_plist);
1364 KERNEL_DEBUG(DBG_MISC_EWAKEUP,0,0,0,eqp,0);
1365 wakeup(&p->p_evlist);
1366 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0);
1367}
1368
1369/*
1370 * given either a sockbuf or a socket run down the
1371 * event list and queue ready events found
1372 */
9bccf70c 1373void
1c79356b
A
1374postevent(struct socket *sp, struct sockbuf *sb, int event)
1375{
1376 int mask;
1377 struct eventqelt *evq;
1378 register struct tcpcb *tp;
1379
1380 if (sb) sp = sb->sb_so;
1381 if (!sp || sp->so_evlist.tqh_first == NULL) return;
1382
1383 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, event,0,0,0,0);
1384
1385 for (evq = sp->so_evlist.tqh_first;
1386 evq != NULL; evq = evq->ee_slist.tqe_next) {
1387
1388 mask = 0;
1389
1390 /* ready for reading:
1391 - byte cnt >= receive low water mark
1392 - read-half of conn closed
1393 - conn pending for listening sock
1394 - socket error pending
1395
1396 ready for writing
1397 - byte cnt avail >= send low water mark
1398 - write half of conn closed
1399 - socket error pending
1400 - non-blocking conn completed successfully
1401
1402 exception pending
1403 - out of band data
1404 - sock at out of band mark
1405
1406 */
1407 switch (event & EV_DMASK) {
1408
1409 case EV_RWBYTES:
1410 case EV_OOB:
1411 case EV_RWBYTES|EV_OOB:
1412 if (event & EV_OOB) {
1413 if ((evq->ee_eventmask & EV_EX)) {
1414 if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK))) {
1415 mask |= EV_EX|EV_OOB;
1416 }
1417 }
1418 }
1419 if (event & EV_RWBYTES) {
1420 if ((evq->ee_eventmask & EV_RE) && soreadable(sp)) {
1421 if ((sp->so_type == SOCK_STREAM) && (sp->so_error == ECONNREFUSED) ||
1422 (sp->so_error == ECONNRESET)) {
1423 if ((sp->so_pcb == 0) ||
1424 !(tp = sototcpcb(sp)) ||
1425 (tp->t_state == TCPS_CLOSED)) {
1426 mask |= EV_RE|EV_RESET;
1427 break;
1428 }
1429 }
1430 if (sp->so_state & SS_CANTRCVMORE) {
1431 mask |= EV_RE|EV_FIN;
1432 evq->ee_req.er_rcnt = sp->so_rcv.sb_cc;
1433 break;
1434 }
1435 mask |= EV_RE;
1436 evq->ee_req.er_rcnt = sp->so_rcv.sb_cc;
1437 }
1438
1439 if ((evq->ee_eventmask & EV_WR) && sowriteable(sp)) {
1440 if ((sp->so_type == SOCK_STREAM) &&(sp->so_error == ECONNREFUSED) ||
1441 (sp->so_error == ECONNRESET)) {
1442 if ((sp->so_pcb == 0) ||
1443 !(tp = sototcpcb(sp)) ||
1444 (tp->t_state == TCPS_CLOSED)) {
1445 mask |= EV_WR|EV_RESET;
1446 break;
1447 }
1448 }
1449 mask |= EV_WR;
1450 evq->ee_req.er_wcnt = sbspace(&sp->so_snd);
1451 }
1452 }
1453 break;
1454
1455 case EV_RCONN:
1456 if ((evq->ee_eventmask & EV_RE)) {
1457 evq->ee_req.er_rcnt = sp->so_qlen + 1; // incl this one
1458 mask |= EV_RE|EV_RCONN;
1459 }
1460 break;
1461
1462 case EV_WCONN:
1463 if ((evq->ee_eventmask & EV_WR)) {
1464 mask |= EV_WR|EV_WCONN;
1465 }
1466 break;
1467
1468 case EV_RCLOSED:
1469 if ((evq->ee_eventmask & EV_RE)) {
1470 mask |= EV_RE|EV_RCLOSED;
1471 }
1472 break;
1473
1474 case EV_WCLOSED:
1475 if ((evq->ee_eventmask & EV_WR)) {
1476 mask |= EV_WR|EV_WCLOSED;
1477 }
1478 break;
1479
1480 case EV_FIN:
1481 if (evq->ee_eventmask & EV_RE) {
1482 mask |= EV_RE|EV_FIN;
1483 }
1484 break;
1485
1486 case EV_RESET:
1487 case EV_TIMEOUT:
1488 if (evq->ee_eventmask & EV_RE) {
1489 mask |= EV_RE | event;
1490 }
1491 if (evq->ee_eventmask & EV_WR) {
1492 mask |= EV_WR | event;
1493 }
1494 break;
1495
1496 default:
1497 return;
1498 } /* switch */
1499
1500 if (mask) {
1501 evq->ee_req.er_eventbits |= mask;
1502 KERNEL_DEBUG(DBG_MISC_POST, evq, evq->ee_req.er_eventbits, mask,0,0);
1503 evprocenque(evq);
1504 }
1505 }
1506 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, 0,0,0,0,0);
1507}
1508
1509/*
1510 * remove and return the first event (eqp=NULL) or a specific
1511 * event, or return NULL if no events found
1512 */
1513struct eventqelt *
1514evprocdeque(struct proc *p, struct eventqelt *eqp)
1515{
1516
1517 KERNEL_DEBUG(DBG_MISC_DEQUEUE|DBG_FUNC_START,p,eqp,0,0,0);
1518
1519 if (eqp && ((eqp->ee_flags & EV_QUEUED) == NULL)) {
1520 KERNEL_DEBUG(DBG_MISC_DEQUEUE|DBG_FUNC_END,0,0,0,0,0);
1521 return(NULL);
1522 }
1523 if (p->p_evlist.tqh_first == NULL) {
1524 KERNEL_DEBUG(DBG_MISC_DEQUEUE|DBG_FUNC_END,0,0,0,0,0);
1525 return(NULL);
1526 }
1527 if (eqp == NULL) { // remove first
1528 eqp = p->p_evlist.tqh_first;
1529 }
1530 TAILQ_REMOVE(&p->p_evlist, eqp, ee_plist);
1531 eqp->ee_flags &= ~EV_QUEUED;
1532 KERNEL_DEBUG(DBG_MISC_DEQUEUE|DBG_FUNC_END,eqp,0,0,0,0);
1533 return(eqp);
1534}
1535
1536struct evwatch_args {
1537 struct eventreq *u_req;
1538 int u_eventmask;
1539};
1540
1541
1542/*
1543 * watchevent system call. user passes us an event to watch
1544 * for. we malloc an event object, initialize it, and queue
1545 * it to the open socket. when the event occurs, postevent()
1546 * will enque it back to our proc where we can retrieve it
1547 * via waitevent().
1548 *
1549 * should this prevent duplicate events on same socket?
1550 */
1551int
1552watchevent(p, uap, retval)
1553 struct proc *p;
1554 struct evwatch_args *uap;
1555 register_t *retval;
1556{
1557 struct eventqelt *eqp = (struct eventqelt *)0;
1558 struct eventqelt *np;
1559 struct eventreq *erp;
1560 struct file *fp;
1561 struct socket *sp;
1562 int error;
1563
1564 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_START, 0,0,0,0,0);
1565
1566 // get a qelt and fill with users req
1567 MALLOC(eqp, struct eventqelt *, sizeof(struct eventqelt), M_TEMP, M_WAITOK);
1568 if (!eqp) panic("can't MALLOC eqp");
1569 erp = &eqp->ee_req;
1570 // get users request pkt
1571 if (error = copyin((caddr_t)uap->u_req, (caddr_t)erp,
1572 sizeof(struct eventreq))) {
1573 FREE(eqp, M_TEMP);
1574 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0);
1575 return(error);
1576 }
1577 KERNEL_DEBUG(DBG_MISC_WATCH, erp->er_handle,uap->u_eventmask,eqp,0,0);
1578 // validate, freeing qelt if errors
1579 error = 0;
1580 if (erp->er_type != EV_FD) {
1581 error = EINVAL;
1582 } else if (erp->er_handle < 0) {
1583 error = EBADF;
1584 } else if (erp->er_handle > p->p_fd->fd_nfiles) {
1585 error = EBADF;
1586 } else if ((fp = *fdfile(p, erp->er_handle)) == NULL) {
1587 error = EBADF;
1588 } else if (fp->f_type != DTYPE_SOCKET) {
1589 error = EINVAL;
1590 }
1591 if (error) {
1592 FREE(eqp,M_TEMP);
1593 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0);
1594 return(error);
1595 }
1596
1597 erp->er_rcnt = erp->er_wcnt = erp->er_eventbits = 0;
1598 eqp->ee_proc = p;
1599 eqp->ee_eventmask = uap->u_eventmask & EV_MASK;
1600 eqp->ee_flags = 0;
1601
1602 sp = (struct socket *)fp->f_data;
1603 assert(sp != NULL);
1604
1605 // only allow one watch per file per proc
1606 for (np = sp->so_evlist.tqh_first; np != NULL; np = np->ee_slist.tqe_next) {
1607 if (np->ee_proc == p) {
1608 FREE(eqp,M_TEMP);
1609 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0);
1610 return(EINVAL);
1611 }
1612 }
1613
1614 TAILQ_INSERT_TAIL(&sp->so_evlist, eqp, ee_slist);
1615 postevent(sp, 0, EV_RWBYTES); // catch existing events
1616 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, 0,0,0,0,0);
1617 return(0);
1618}
1619
1620struct evwait_args {
1621 struct eventreq *u_req;
1622 struct timeval *tv;
1623};
1624
1625/*
1626 * waitevent system call.
1627 * grabs the next waiting event for this proc and returns
1628 * it. if no events, user can request to sleep with timeout
1629 * or poll mode (tv=NULL);
1630 */
1631int
1632waitevent(p, uap, retval)
9bccf70c
A
1633 struct proc *p;
1634 struct evwait_args *uap;
1635 register_t *retval;
1c79356b 1636{
9bccf70c
A
1637 int error = 0;
1638 struct eventqelt *eqp;
1639 uint64_t abstime, interval;
1c79356b
A
1640
1641 if (uap->tv) {
9bccf70c
A
1642 struct timeval atv;
1643
1644 error = copyin((caddr_t)uap->tv, (caddr_t)&atv, sizeof (atv));
1c79356b 1645 if (error)
9bccf70c 1646 return(error);
1c79356b
A
1647 if (itimerfix(&atv)) {
1648 error = EINVAL;
1649 return(error);
1650 }
1c79356b 1651
9bccf70c
A
1652 interval = tvtoabstime(&atv);
1653 }
1654 else
1655 abstime = interval = 0;
1656
1657 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_START, 0,0,0,0,0);
1c79356b
A
1658
1659retry:
9bccf70c
A
1660 if ((eqp = evprocdeque(p,NULL)) != NULL) {
1661 error = copyout((caddr_t)&eqp->ee_req,
1662 (caddr_t)uap->u_req, sizeof(struct eventreq));
1663 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,
1664 eqp->ee_req.er_handle,eqp->ee_req.er_eventbits,eqp,0);
1c79356b 1665
9bccf70c
A
1666 return (error);
1667 }
1668 else {
1669 if (uap->tv && interval == 0) {
1670 *retval = 1; // poll failed
1671 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,0,0,0,0);
1672
1673 return (error);
1674 }
1675
1676 if (interval != 0)
55e303ae 1677 clock_absolutetime_interval_to_deadline(interval, &abstime);
9bccf70c
A
1678
1679 KERNEL_DEBUG(DBG_MISC_WAIT, 1,&p->p_evlist,0,0,0);
1680 error = tsleep1(&p->p_evlist, PSOCK | PCATCH,
1681 "waitevent", abstime, (int (*)(int))0);
1682 KERNEL_DEBUG(DBG_MISC_WAIT, 2,&p->p_evlist,0,0,0);
1683 if (error == 0)
1684 goto retry;
1685 if (error == ERESTART)
1686 error = EINTR;
1687 if (error == EWOULDBLOCK) {
1688 *retval = 1;
1689 error = 0;
1690 }
1691 }
1692
1693 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, 0,0,0,0,0);
1694
1695 return (error);
1c79356b
A
1696}
1697
1698struct modwatch_args {
1699 struct eventreq *u_req;
1700 int u_eventmask;
1701};
1702
1703/*
1704 * modwatch system call. user passes in event to modify.
1705 * if we find it we reset the event bits and que/deque event
1706 * it needed.
1707 */
1708int
1709modwatch(p, uap, retval)
1710 struct proc *p;
1711 struct modwatch_args *uap;
1712 register_t *retval;
1713{
1714 struct eventreq er;
1715 struct eventreq *erp = &er;
1716 struct eventqelt *evq;
1717 int error;
1718 struct file *fp;
1719 struct socket *sp;
1720 int flag;
1721
1722 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_START, 0,0,0,0,0);
1723
1724 // get users request pkt
1725 if (error = copyin((caddr_t)uap->u_req, (caddr_t)erp,
1726 sizeof(struct eventreq))) return(error);
1727
1728 if (erp->er_type != EV_FD) return(EINVAL);
1729 if (erp->er_handle < 0) return(EBADF);
1730 if (erp->er_handle > p->p_fd->fd_nfiles) return(EBADF);
1731 if ((fp = *fdfile(p, erp->er_handle)) == NULL)
1732 return(EBADF);
1733 if (fp->f_type != DTYPE_SOCKET) return(EINVAL); // for now must be sock
1734 sp = (struct socket *)fp->f_data;
1c79356b 1735
55e303ae
A
1736 /* soo_close sets f_data to 0 before switching funnel */
1737 if (sp == (struct socket *)0)
1738 return(EBADF);
1c79356b
A
1739
1740 // locate event if possible
1741 for (evq = sp->so_evlist.tqh_first;
1742 evq != NULL; evq = evq->ee_slist.tqe_next) {
1743 if (evq->ee_proc == p) break;
1744 }
1745
1746 if (evq == NULL) {
1747 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, EINVAL,0,0,0,0);
1748 return(EINVAL);
1749 }
1750 KERNEL_DEBUG(DBG_MISC_MOD, erp->er_handle,uap->u_eventmask,evq,0,0);
1751
1752 if (uap->u_eventmask == EV_RM) {
1753 evprocdeque(p, evq);
1754 TAILQ_REMOVE(&sp->so_evlist, evq, ee_slist);
1755 FREE(evq, M_TEMP);
1756 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, 0,0,0,0,0);
1757 return(0);
1758 }
1759
1760 switch (uap->u_eventmask & EV_MASK) {
1761
1762 case 0:
1763 flag = 0;
1764 break;
1765
1766 case EV_RE:
1767 case EV_WR:
1768 case EV_RE|EV_WR:
1769 flag = EV_RWBYTES;
1770 break;
1771
1772 case EV_EX:
1773 flag = EV_OOB;
1774 break;
1775
1776 case EV_EX|EV_RE:
1777 case EV_EX|EV_WR:
1778 case EV_EX|EV_RE|EV_WR:
1779 flag = EV_OOB|EV_RWBYTES;
1780 break;
1781
1782 default:
1783 return(EINVAL);
1784 }
1785
1786 evq->ee_eventmask = uap->u_eventmask & EV_MASK;
1787 evprocdeque(p, evq);
1788 evq->ee_req.er_eventbits = 0;
1789 postevent(sp, 0, flag);
1790 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, evq->ee_req.er_handle,evq->ee_eventmask,sp,flag,0);
1791 return(0);
1792}