]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/sys_generic.c
e02a30b866fee47ec0b76de697673b2da71dad8f
[apple/xnu.git] / bsd / kern / sys_generic.c
1 /*
2 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
23 /*
24 * Copyright (c) 1982, 1986, 1989, 1993
25 * The Regents of the University of California. All rights reserved.
26 * (c) UNIX System Laboratories, Inc.
27 * All or some portions of this file are derived from material licensed
28 * to the University of California by American Telephone and Telegraph
29 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
30 * the permission of UNIX System Laboratories, Inc.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
61 */
62
63 #include <sys/param.h>
64 #include <sys/systm.h>
65 #include <sys/filedesc.h>
66 #include <sys/ioctl.h>
67 #include <sys/file.h>
68 #include <sys/proc.h>
69 #include <sys/socketvar.h>
70 #include <sys/uio.h>
71 #include <sys/kernel.h>
72 #include <sys/stat.h>
73 #include <sys/malloc.h>
74
75 #include <sys/mount.h>
76 #include <sys/protosw.h>
77 #include <sys/ev.h>
78 #include <sys/user.h>
79 #include <sys/kdebug.h>
80 #include <kern/assert.h>
81 #include <kern/thread_act.h>
82
83 #include <sys/mbuf.h>
84 #include <sys/socket.h>
85 #include <sys/socketvar.h>
86 #include <sys/errno.h>
87
88 #include <net/if.h>
89 #include <net/route.h>
90
91 #include <netinet/in.h>
92 #include <netinet/in_systm.h>
93 #include <netinet/ip.h>
94 #include <netinet/in_pcb.h>
95 #include <netinet/ip_var.h>
96 #include <netinet/ip6.h>
97 #include <netinet/tcp.h>
98 #include <netinet/tcp_fsm.h>
99 #include <netinet/tcp_seq.h>
100 #include <netinet/tcp_timer.h>
101 #include <netinet/tcp_var.h>
102 #include <netinet/tcpip.h>
103 #include <netinet/tcp_debug.h>
104 /* for wait queue based select */
105 #include <kern/wait_queue.h>
106 #if KTRACE
107 #include <sys/ktrace.h>
108 #endif
109
110 static int dofileread __P((struct proc *, struct file *, int, void *,
111 size_t, off_t, int, int*));
112 static int dofilewrite __P((struct proc *, struct file *, int,
113 const void *, size_t, off_t, int, int*));
114
115 static struct file*
116 holdfp(fdp, fd, flag)
117 struct filedesc* fdp;
118 int fd, flag;
119 {
120 struct file* fp;
121
122 if (((u_int)fd) >= fdp->fd_nfiles ||
123 (fp = fdp->fd_ofiles[fd]) == NULL ||
124 (fp->f_flag & flag) == 0) {
125 return (NULL);
126 }
127 fref(fp);
128 return (fp);
129 }
130
131 /*
132 * Read system call.
133 */
134 #ifndef _SYS_SYSPROTO_H_
135 struct read_args {
136 int fd;
137 char *cbuf;
138 u_int nbyte;
139 };
140 #endif
141 int
142 read(p, uap, retval)
143 struct proc *p;
144 register struct read_args *uap;
145 register_t *retval;
146 {
147 register struct file *fp;
148 int error;
149
150 if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL)
151 return (EBADF);
152 error = dofileread(p, fp, uap->fd, uap->cbuf, uap->nbyte,
153 (off_t)-1, 0, retval);
154 frele(fp);
155 return(error);
156 }
157
158 /*
159 * Pread system call
160 */
161 #ifndef _SYS_SYSPROTO_H_
162 struct pread_args {
163 int fd;
164 void *buf;
165 size_t nbyte;
166 #ifdef DOUBLE_ALIGN_PARAMS
167 int pad;
168 #endif
169 off_t offset;
170 };
171 #endif
172 int
173 pread(p, uap, retval)
174 struct proc *p;
175 register struct pread_args *uap;
176 int *retval;
177 {
178 register struct file *fp;
179 int error;
180
181 if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL)
182 return (EBADF);
183 if (fp->f_type != DTYPE_VNODE) {
184 error = ESPIPE;
185 } else {
186 error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte,
187 uap->offset, FOF_OFFSET, retval);
188 }
189 frele(fp);
190 return(error);
191 }
192
193 /*
194 * Code common for read and pread
195 */
196 int
197 dofileread(p, fp, fd, buf, nbyte, offset, flags, retval)
198 struct proc *p;
199 struct file *fp;
200 int fd, flags;
201 void *buf;
202 size_t nbyte;
203 off_t offset;
204 int *retval;
205 {
206 struct uio auio;
207 struct iovec aiov;
208 long cnt, error = 0;
209 #if KTRACE
210 struct iovec ktriov;
211 struct uio ktruio;
212 int didktr = 0;
213 #endif
214
215 aiov.iov_base = (caddr_t)buf;
216 aiov.iov_len = nbyte;
217 auio.uio_iov = &aiov;
218 auio.uio_iovcnt = 1;
219 auio.uio_offset = offset;
220 if (nbyte > INT_MAX)
221 return (EINVAL);
222 auio.uio_resid = nbyte;
223 auio.uio_rw = UIO_READ;
224 auio.uio_segflg = UIO_USERSPACE;
225 auio.uio_procp = p;
226 #if KTRACE
227 /*
228 * if tracing, save a copy of iovec
229 */
230 if (KTRPOINT(p, KTR_GENIO)) {
231 ktriov = aiov;
232 ktruio = auio;
233 didktr = 1;
234 }
235 #endif
236 cnt = nbyte;
237
238 if ((error = fo_read(fp, &auio, fp->f_cred, flags, p))) {
239 if (auio.uio_resid != cnt && (error == ERESTART ||
240 error == EINTR || error == EWOULDBLOCK))
241 error = 0;
242 }
243 cnt -= auio.uio_resid;
244 #if KTRACE
245 if (didktr && error == 0) {
246 ktruio.uio_iov = &ktriov;
247 ktruio.uio_resid = cnt;
248 ktrgenio(p->p_tracep, fd, UIO_READ, &ktruio, error,
249 KERNEL_FUNNEL);
250 }
251 #endif
252 *retval = cnt;
253 return (error);
254 }
255
256 /*
257 * Scatter read system call.
258 */
259 #ifndef _SYS_SYSPROTO_H_
260 struct readv_args {
261 int fd;
262 struct iovec *iovp;
263 u_int iovcnt;
264 };
265 #endif
266 int
267 readv(p, uap, retval)
268 struct proc *p;
269 register struct readv_args *uap;
270 int *retval;
271 {
272 struct uio auio;
273 register struct iovec *iov;
274 int error;
275 struct iovec aiov[UIO_SMALLIOV];
276
277 if (uap->iovcnt > UIO_SMALLIOV) {
278 if (uap->iovcnt > UIO_MAXIOV)
279 return (EINVAL);
280 if ((iov = (struct iovec *)
281 kalloc(sizeof(struct iovec) * (uap->iovcnt))) == 0)
282 return (ENOMEM);
283 } else
284 iov = aiov;
285 auio.uio_iov = iov;
286 auio.uio_iovcnt = uap->iovcnt;
287 auio.uio_rw = UIO_READ;
288 error = copyin((caddr_t)uap->iovp, (caddr_t)iov,
289 uap->iovcnt * sizeof (struct iovec));
290 if (!error)
291 error = rwuio(p, uap->fd, &auio, UIO_READ, retval);
292 if (uap->iovcnt > UIO_SMALLIOV)
293 kfree(iov, sizeof(struct iovec)*uap->iovcnt);
294 return (error);
295 }
296
297 /*
298 * Write system call
299 */
300 #ifndef _SYS_SYSPROTO_H_
301 struct write_args {
302 int fd;
303 char *cbuf;
304 u_int nbyte;
305 };
306 #endif
307 int
308 write(p, uap, retval)
309 struct proc *p;
310 register struct write_args *uap;
311 int *retval;
312 {
313 register struct file *fp;
314 int error;
315
316 if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL)
317 return (EBADF);
318 error = dofilewrite(p, fp, uap->fd, uap->cbuf, uap->nbyte,
319 (off_t)-1, 0, retval);
320 frele(fp);
321 return(error);
322 }
323
324 /*
325 * Pwrite system call
326 */
327 #ifndef _SYS_SYSPROTO_H_
328 struct pwrite_args {
329 int fd;
330 const void *buf;
331 size_t nbyte;
332 #ifdef DOUBLE_ALIGN_PARAMS
333 int pad;
334 #endif
335 off_t offset;
336 };
337 #endif
338 int
339 pwrite(p, uap, retval)
340 struct proc *p;
341 register struct pwrite_args *uap;
342 int *retval;
343 {
344 register struct file *fp;
345 int error;
346
347 if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL)
348 return (EBADF);
349 if (fp->f_type != DTYPE_VNODE) {
350 error = ESPIPE;
351 } else {
352 error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte,
353 uap->offset, FOF_OFFSET, retval);
354 }
355 frele(fp);
356 return(error);
357 }
358
359 static int
360 dofilewrite(p, fp, fd, buf, nbyte, offset, flags, retval)
361 struct proc *p;
362 struct file *fp;
363 int fd, flags;
364 const void *buf;
365 size_t nbyte;
366 off_t offset;
367 int *retval;
368 {
369 struct uio auio;
370 struct iovec aiov;
371 long cnt, error = 0;
372 #if KTRACE
373 struct iovec ktriov;
374 struct uio ktruio;
375 int didktr = 0;
376 #endif
377
378 aiov.iov_base = (void *)(uintptr_t)buf;
379 aiov.iov_len = nbyte;
380 auio.uio_iov = &aiov;
381 auio.uio_iovcnt = 1;
382 auio.uio_offset = offset;
383 if (nbyte > INT_MAX)
384 return (EINVAL);
385 auio.uio_resid = nbyte;
386 auio.uio_rw = UIO_WRITE;
387 auio.uio_segflg = UIO_USERSPACE;
388 auio.uio_procp = p;
389 #if KTRACE
390 /*
391 * if tracing, save a copy of iovec and uio
392 */
393 if (KTRPOINT(p, KTR_GENIO)) {
394 ktriov = aiov;
395 ktruio = auio;
396 didktr = 1;
397 }
398 #endif
399 cnt = nbyte;
400 if (fp->f_type == DTYPE_VNODE)
401 bwillwrite();
402 if ((error = fo_write(fp, &auio, fp->f_cred, flags, p))) {
403 if (auio.uio_resid != cnt && (error == ERESTART ||
404 error == EINTR || error == EWOULDBLOCK))
405 error = 0;
406 if (error == EPIPE)
407 psignal(p, SIGPIPE);
408 }
409 cnt -= auio.uio_resid;
410 #if KTRACE
411 if (didktr && error == 0) {
412 ktruio.uio_iov = &ktriov;
413 ktruio.uio_resid = cnt;
414 ktrgenio(p->p_tracep, fd, UIO_WRITE, &ktruio, error,
415 KERNEL_FUNNEL);
416 }
417 #endif
418 *retval = cnt;
419 return (error);
420 }
421
422 /*
423 * Gather write system call
424 */
425 #ifndef _SYS_SYSPROTO_H_
426 struct writev_args {
427 int fd;
428 struct iovec *iovp;
429 u_int iovcnt;
430 };
431 #endif
432 int
433 writev(p, uap, retval)
434 struct proc *p;
435 register struct writev_args *uap;
436 int *retval;
437 {
438 struct uio auio;
439 register struct iovec *iov;
440 int error;
441 struct iovec aiov[UIO_SMALLIOV];
442
443 if (uap->iovcnt > UIO_SMALLIOV) {
444 if (uap->iovcnt > UIO_MAXIOV)
445 return (EINVAL);
446 if ((iov = (struct iovec *)
447 kalloc(sizeof(struct iovec) * (uap->iovcnt))) == 0)
448 return (ENOMEM);
449 } else
450 iov = aiov;
451 auio.uio_iov = iov;
452 auio.uio_iovcnt = uap->iovcnt;
453 auio.uio_rw = UIO_WRITE;
454 error = copyin((caddr_t)uap->iovp, (caddr_t)iov,
455 uap->iovcnt * sizeof (struct iovec));
456 if (!error)
457 error = rwuio(p, uap->fd, &auio, UIO_WRITE, retval);
458 if (uap->iovcnt > UIO_SMALLIOV)
459 kfree(iov, sizeof(struct iovec)*uap->iovcnt);
460 return (error);
461 }
462
463 int
464 rwuio(p, fdes, uio, rw, retval)
465 struct proc *p;
466 int fdes;
467 register struct uio *uio;
468 enum uio_rw rw;
469 int *retval;
470 {
471 struct file *fp;
472 register struct iovec *iov;
473 int i, count, flag, error;
474 #if KTRACE
475 struct iovec *ktriov;
476 struct uio ktruio;
477 int didktr = 0;
478 u_int iovlen;
479 #endif
480
481 if (error = fdgetf(p, fdes, &fp))
482 return (error);
483
484 if ((fp->f_flag&(rw==UIO_READ ? FREAD : FWRITE)) == 0) {
485 return(EBADF);
486 }
487 uio->uio_resid = 0;
488 uio->uio_segflg = UIO_USERSPACE;
489 uio->uio_procp = p;
490 iov = uio->uio_iov;
491 for (i = 0; i < uio->uio_iovcnt; i++) {
492 if (iov->iov_len < 0) {
493 return(EINVAL);
494 }
495 uio->uio_resid += iov->iov_len;
496 if (uio->uio_resid < 0) {
497 return(EINVAL);
498 }
499 iov++;
500 }
501 count = uio->uio_resid;
502 #if KTRACE
503 /*
504 * if tracing, save a copy of iovec
505 */
506 if (KTRPOINT(p, KTR_GENIO)) {
507 iovlen = uio->uio_iovcnt * sizeof (struct iovec);
508 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
509 bcopy((caddr_t)uio->uio_iov, (caddr_t)ktriov, iovlen);
510 ktruio = *uio;
511 didktr = 1;
512 }
513 #endif
514
515 if (rw == UIO_READ) {
516 if (error = fo_read(fp, uio, fp->f_cred, 0, p))
517 if (uio->uio_resid != count && (error == ERESTART ||
518 error == EINTR || error == EWOULDBLOCK))
519 error = 0;
520 } else {
521 if (fp->f_type == DTYPE_VNODE)
522 bwillwrite();
523 if (error = fo_write(fp, uio, fp->f_cred, 0, p)) {
524 if (uio->uio_resid != count && (error == ERESTART ||
525 error == EINTR || error == EWOULDBLOCK))
526 error = 0;
527 /* The socket layer handles SIGPIPE */
528 if (error == EPIPE && fp->f_type != DTYPE_SOCKET)
529 psignal(p, SIGPIPE);
530 }
531 }
532
533 *retval = count - uio->uio_resid;
534
535 #if KTRACE
536 if (didktr) {
537 if (error == 0) {
538 ktruio.uio_iov = ktriov;
539 ktruio.uio_resid = *retval;
540 ktrgenio(p->p_tracep, fdes, rw, &ktruio, error,
541 KERNEL_FUNNEL);
542 }
543 FREE(ktriov, M_TEMP);
544 }
545 #endif
546
547 return(error);
548 }
549
550 /*
551 * Ioctl system call
552 */
553 #ifndef _SYS_SYSPROTO_H_
554 struct ioctl_args {
555 int fd;
556 u_long com;
557 caddr_t data;
558 };
559 #endif
560 int
561 ioctl(p, uap, retval)
562 struct proc *p;
563 register struct ioctl_args *uap;
564 register_t *retval;
565 {
566 struct file *fp;
567 register u_long com;
568 register int error;
569 register u_int size;
570 caddr_t data, memp;
571 int tmp;
572 #define STK_PARAMS 128
573 char stkbuf[STK_PARAMS];
574
575 if (error = fdgetf(p, uap->fd, &fp))
576 return (error);
577
578 if ((fp->f_flag & (FREAD | FWRITE)) == 0)
579 return (EBADF);
580
581 #if NETAT
582 /*
583 * ### LD 6/11/97 Hack Alert: this is to get AppleTalk to work
584 * while implementing an ATioctl system call
585 */
586 {
587 extern int appletalk_inited;
588
589 if (appletalk_inited && ((uap->com & 0x0000FFFF) == 0xff99)) {
590 #ifdef APPLETALK_DEBUG
591 kprintf("ioctl: special AppleTalk \n");
592 #endif
593 error = fo_ioctl(fp, uap->com, uap->data, p);
594 return(error);
595 }
596 }
597
598 #endif /* NETAT */
599
600
601 switch (com = uap->com) {
602 case FIONCLEX:
603 *fdflags(p, uap->fd) &= ~UF_EXCLOSE;
604 return (0);
605 case FIOCLEX:
606 *fdflags(p, uap->fd) |= UF_EXCLOSE;
607 return (0);
608 }
609
610 /*
611 * Interpret high order word to find amount of data to be
612 * copied to/from the user's address space.
613 */
614 size = IOCPARM_LEN(com);
615 if (size > IOCPARM_MAX)
616 return (ENOTTY);
617 memp = NULL;
618 if (size > sizeof (stkbuf)) {
619 if ((memp = (caddr_t)kalloc(size)) == 0)
620 return(ENOMEM);
621 data = memp;
622 } else
623 data = stkbuf;
624 if (com&IOC_IN) {
625 if (size) {
626 error = copyin(uap->data, data, (u_int)size);
627 if (error) {
628 if (memp)
629 kfree(memp, size);
630 return (error);
631 }
632 } else
633 *(caddr_t *)data = uap->data;
634 } else if ((com&IOC_OUT) && size)
635 /*
636 * Zero the buffer so the user always
637 * gets back something deterministic.
638 */
639 bzero(data, size);
640 else if (com&IOC_VOID)
641 *(caddr_t *)data = uap->data;
642
643 switch (com) {
644
645 case FIONBIO:
646 if (tmp = *(int *)data)
647 fp->f_flag |= FNONBLOCK;
648 else
649 fp->f_flag &= ~FNONBLOCK;
650 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p);
651 break;
652
653 case FIOASYNC:
654 if (tmp = *(int *)data)
655 fp->f_flag |= FASYNC;
656 else
657 fp->f_flag &= ~FASYNC;
658 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p);
659 break;
660
661 case FIOSETOWN:
662 tmp = *(int *)data;
663 if (fp->f_type == DTYPE_SOCKET) {
664 ((struct socket *)fp->f_data)->so_pgid = tmp;
665 error = 0;
666 break;
667 }
668 if (tmp <= 0) {
669 tmp = -tmp;
670 } else {
671 struct proc *p1 = pfind(tmp);
672 if (p1 == 0) {
673 error = ESRCH;
674 break;
675 }
676 tmp = p1->p_pgrp->pg_id;
677 }
678 error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, p);
679 break;
680
681 case FIOGETOWN:
682 if (fp->f_type == DTYPE_SOCKET) {
683 error = 0;
684 *(int *)data = ((struct socket *)fp->f_data)->so_pgid;
685 break;
686 }
687 error = fo_ioctl(fp, TIOCGPGRP, data, p);
688 *(int *)data = -*(int *)data;
689 break;
690
691 default:
692 error = fo_ioctl(fp, com, data, p);
693 /*
694 * Copy any data to user, size was
695 * already set and checked above.
696 */
697 if (error == 0 && (com&IOC_OUT) && size)
698 error = copyout(data, uap->data, (u_int)size);
699 break;
700 }
701 if (memp)
702 kfree(memp, size);
703 return (error);
704 }
705
706 int selwait, nselcoll;
707 #define SEL_FIRSTPASS 1
708 #define SEL_SECONDPASS 2
709 extern int selcontinue(int error);
710 extern int selprocess(int error, int sel_pass);
711 static int selscan(struct proc *p, struct _select * sel,
712 int nfd, register_t *retval, int sel_pass);
713 static int selcount(struct proc *p, u_int32_t *ibits, u_int32_t *obits,
714 int nfd, int * count, int * nfcount);
715 extern uint64_t tvtoabstime(struct timeval *tvp);
716
717 /*
718 * Select system call.
719 */
720 #ifndef _SYS_SYSPROTO_H_
721 struct select_args {
722 int nd;
723 u_int32_t *in;
724 u_int32_t *ou;
725 u_int32_t *ex;
726 struct timeval *tv;
727 };
728 #endif
729 int
730 select(p, uap, retval)
731 register struct proc *p;
732 register struct select_args *uap;
733 register_t *retval;
734 {
735 int error = 0;
736 u_int ni, nw, size;
737 thread_act_t th_act;
738 struct uthread *uth;
739 struct _select *sel;
740 int needzerofill = 1;
741 int kfcount =0;
742 int nfcount = 0;
743 int count = 0;
744
745 th_act = current_act();
746 uth = get_bsdthread_info(th_act);
747 sel = &uth->uu_state.ss_select;
748 retval = (int *)get_bsduthreadrval(th_act);
749 *retval = 0;
750
751 if (uap->nd < 0) {
752 return (EINVAL);
753 }
754
755 if (uap->nd > p->p_fd->fd_nfiles)
756 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */
757
758 nw = howmany(uap->nd, NFDBITS);
759 ni = nw * sizeof(fd_mask);
760
761 /*
762 * if this is the first select by the thread
763 * allocate the space for bits.
764 */
765 if (sel->nbytes == 0) {
766 sel->nbytes = 3 * ni;
767 MALLOC(sel->ibits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK);
768 MALLOC(sel->obits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK);
769 bzero((caddr_t)sel->ibits, sel->nbytes);
770 bzero((caddr_t)sel->obits, sel->nbytes);
771 needzerofill = 0;
772 }
773
774 /*
775 * if the previously allocated space for the bits
776 * is smaller than what is requested. Reallocate.
777 */
778 if (sel->nbytes < (3 * ni)) {
779 sel->nbytes = (3 * ni);
780 FREE(sel->ibits, M_TEMP);
781 FREE(sel->obits, M_TEMP);
782 MALLOC(sel->ibits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK);
783 MALLOC(sel->obits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK);
784 bzero((caddr_t)sel->ibits, sel->nbytes);
785 bzero((caddr_t)sel->obits, sel->nbytes);
786 needzerofill = 0;
787 }
788
789 if (needzerofill) {
790 bzero((caddr_t)sel->ibits, sel->nbytes);
791 bzero((caddr_t)sel->obits, sel->nbytes);
792 }
793
794 /*
795 * get the bits from the user address space
796 */
797 #define getbits(name, x) \
798 do { \
799 if (uap->name && (error = copyin((caddr_t)uap->name, \
800 (caddr_t)&sel->ibits[(x) * nw], ni))) \
801 goto continuation; \
802 } while (0)
803
804 getbits(in, 0);
805 getbits(ou, 1);
806 getbits(ex, 2);
807 #undef getbits
808
809 if (uap->tv) {
810 struct timeval atv;
811
812 error = copyin((caddr_t)uap->tv, (caddr_t)&atv, sizeof (atv));
813 if (error)
814 goto continuation;
815 if (itimerfix(&atv)) {
816 error = EINVAL;
817 goto continuation;
818 }
819
820 clock_absolutetime_interval_to_deadline(
821 tvtoabstime(&atv), &sel->abstime);
822 }
823 else
824 sel->abstime = 0;
825
826 sel->nfcount = 0;
827 if (error = selcount(p, sel->ibits, sel->obits, uap->nd, &count, &nfcount)) {
828 goto continuation;
829 }
830
831 sel->nfcount = nfcount;
832 sel->count = count;
833 size = SIZEOF_WAITQUEUE_SUB + (count * SIZEOF_WAITQUEUE_LINK);
834 if (sel->allocsize) {
835 if (uth->uu_wqsub == 0)
836 panic("select: wql memory smashed");
837 /* needed for the select now */
838 if (size > sel->allocsize) {
839 kfree(uth->uu_wqsub, sel->allocsize);
840 sel->allocsize = size;
841 uth->uu_wqsub = (wait_queue_sub_t)kalloc(sel->allocsize);
842 if (uth->uu_wqsub == (wait_queue_sub_t)NULL)
843 panic("failed to allocate memory for waitqueue\n");
844 sel->wql = (char *)uth->uu_wqsub + SIZEOF_WAITQUEUE_SUB;
845 }
846 } else {
847 sel->count = count;
848 sel->allocsize = size;
849 uth->uu_wqsub = (wait_queue_sub_t)kalloc(sel->allocsize);
850 if (uth->uu_wqsub == (wait_queue_sub_t)NULL)
851 panic("failed to allocate memory for waitqueue\n");
852 sel->wql = (char *)uth->uu_wqsub + SIZEOF_WAITQUEUE_SUB;
853 }
854 bzero(uth->uu_wqsub, size);
855 wait_queue_sub_init(uth->uu_wqsub, (SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST));
856
857 continuation:
858 return selprocess(error, SEL_FIRSTPASS);
859 }
860
861 int
862 selcontinue(int error)
863 {
864 return selprocess(error, SEL_SECONDPASS);
865 }
866
867 int
868 selprocess(error, sel_pass)
869 {
870 int ncoll;
871 u_int ni, nw;
872 thread_act_t th_act;
873 struct uthread *uth;
874 struct proc *p;
875 struct select_args *uap;
876 int *retval;
877 struct _select *sel;
878 int unwind = 1;
879 int prepost = 0;
880 int somewakeup = 0;
881 int doretry = 0;
882 wait_result_t wait_result;
883
884 p = current_proc();
885 th_act = current_act();
886 uap = (struct select_args *)get_bsduthreadarg(th_act);
887 retval = (int *)get_bsduthreadrval(th_act);
888 uth = get_bsdthread_info(th_act);
889 sel = &uth->uu_state.ss_select;
890
891 /* if it is first pass wait queue is not setup yet */
892 if ((error != 0) && (sel_pass == SEL_FIRSTPASS))
893 unwind = 0;
894 if (sel->count == 0)
895 unwind = 0;
896 retry:
897 if (error != 0) {
898 goto done;
899 }
900
901 ncoll = nselcoll;
902 p->p_flag |= P_SELECT;
903 /* skip scans if the select is just for timeouts */
904 if (sel->count) {
905 if (sel_pass == SEL_FIRSTPASS)
906 wait_queue_sub_clearrefs(uth->uu_wqsub);
907
908 error = selscan(p, sel, uap->nd, retval, sel_pass);
909 if (error || *retval) {
910 goto done;
911 }
912 if (prepost) {
913 /* if the select of log, then we canwakeup and discover some one
914 * else already read the data; go toselct again if time permits
915 */
916 prepost = 0;
917 doretry = 1;
918 }
919 if (somewakeup) {
920 somewakeup = 0;
921 doretry = 1;
922 }
923 }
924
925 if (uap->tv) {
926 uint64_t now;
927
928 clock_get_uptime(&now);
929 if (now >= sel->abstime)
930 goto done;
931 }
932
933 if (doretry) {
934 /* cleanup obits and try again */
935 doretry = 0;
936 sel_pass = SEL_FIRSTPASS;
937 goto retry;
938 }
939
940 /*
941 * To effect a poll, the timeout argument should be
942 * non-nil, pointing to a zero-valued timeval structure.
943 */
944 if (uap->tv && sel->abstime == 0) {
945 goto done;
946 }
947
948 /* No spurious wakeups due to colls,no need to check for them */
949 if ((sel_pass == SEL_SECONDPASS) || ((p->p_flag & P_SELECT) == 0)) {
950 sel_pass = SEL_FIRSTPASS;
951 goto retry;
952 }
953
954 p->p_flag &= ~P_SELECT;
955
956 /* if the select is just for timeout skip check */
957 if (sel->count &&(sel_pass == SEL_SECONDPASS))
958 panic("selprocess: 2nd pass assertwaiting");
959
960 /* Wait Queue Subordinate has waitqueue as first element */
961 wait_result = wait_queue_assert_wait((wait_queue_t)uth->uu_wqsub,
962 &selwait, THREAD_ABORTSAFE);
963 if (wait_result != THREAD_AWAKENED) {
964 /* there are no preposted events */
965 error = tsleep1(NULL, PSOCK | PCATCH,
966 "select", sel->abstime, selcontinue);
967 } else {
968 prepost = 1;
969 error = 0;
970 }
971
972 sel_pass = SEL_SECONDPASS;
973 if (error == 0) {
974 if (!prepost)
975 somewakeup =1;
976 goto retry;
977 }
978 done:
979 if (unwind)
980 wait_subqueue_unlink_all(uth->uu_wqsub);
981 p->p_flag &= ~P_SELECT;
982 /* select is not restarted after signals... */
983 if (error == ERESTART)
984 error = EINTR;
985 if (error == EWOULDBLOCK)
986 error = 0;
987 nw = howmany(uap->nd, NFDBITS);
988 ni = nw * sizeof(fd_mask);
989
990 #define putbits(name, x) \
991 do { \
992 if (uap->name && (error2 = copyout((caddr_t)&sel->obits[(x) * nw], \
993 (caddr_t)uap->name, ni))) \
994 error = error2; \
995 } while (0)
996
997 if (error == 0) {
998 int error2;
999
1000 putbits(in, 0);
1001 putbits(ou, 1);
1002 putbits(ex, 2);
1003 #undef putbits
1004 }
1005 return(error);
1006 }
1007
1008 static int
1009 selscan(p, sel, nfd, retval, sel_pass)
1010 struct proc *p;
1011 struct _select *sel;
1012 int nfd;
1013 register_t *retval;
1014 int sel_pass;
1015 {
1016 register struct filedesc *fdp = p->p_fd;
1017 register int msk, i, j, fd;
1018 register u_int32_t bits;
1019 struct file *fp;
1020 int n = 0;
1021 int nc = 0;
1022 static int flag[3] = { FREAD, FWRITE, 0 };
1023 u_int32_t *iptr, *optr;
1024 u_int nw;
1025 u_int32_t *ibits, *obits;
1026 char * wql;
1027 int nfunnel = 0;
1028 int count, nfcount;
1029 char * wql_ptr;
1030
1031 /*
1032 * Problems when reboot; due to MacOSX signal probs
1033 * in Beaker1C ; verify that the p->p_fd is valid
1034 */
1035 if (fdp == NULL) {
1036 *retval=0;
1037 return(EIO);
1038 }
1039
1040 ibits = sel->ibits;
1041 obits = sel->obits;
1042 wql = sel->wql;
1043
1044 count = sel->count;
1045 nfcount = sel->nfcount;
1046
1047 if (nfcount > count)
1048 panic("selcount count<nfcount");
1049
1050 nw = howmany(nfd, NFDBITS);
1051
1052 nc = 0;
1053 if ( nfcount < count) {
1054 /* some or all in kernel funnel */
1055 for (msk = 0; msk < 3; msk++) {
1056 iptr = (u_int32_t *)&ibits[msk * nw];
1057 optr = (u_int32_t *)&obits[msk * nw];
1058 for (i = 0; i < nfd; i += NFDBITS) {
1059 bits = iptr[i/NFDBITS];
1060 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1061 bits &= ~(1 << j);
1062 fp = fdp->fd_ofiles[fd];
1063 if (fp == NULL ||
1064 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1065 return(EBADF);
1066 }
1067 if (sel_pass == SEL_SECONDPASS)
1068 wql_ptr = (char *)0;
1069 else
1070 wql_ptr = (wql+ nc * SIZEOF_WAITQUEUE_LINK);
1071 if (fp->f_ops && (fp->f_type != DTYPE_SOCKET)
1072 && fo_select(fp, flag[msk], wql_ptr, p)) {
1073 optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
1074 n++;
1075 }
1076 nc++;
1077 }
1078 }
1079 }
1080 }
1081
1082 if (nfcount) {
1083 /* socket file descriptors for scan */
1084 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
1085
1086 nc = 0;
1087 for (msk = 0; msk < 3; msk++) {
1088 iptr = (u_int32_t *)&ibits[msk * nw];
1089 optr = (u_int32_t *)&obits[msk * nw];
1090 for (i = 0; i < nfd; i += NFDBITS) {
1091 bits = iptr[i/NFDBITS];
1092 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1093 bits &= ~(1 << j);
1094 fp = fdp->fd_ofiles[fd];
1095 if (fp == NULL ||
1096 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1097 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
1098 return(EBADF);
1099 }
1100 if (sel_pass == SEL_SECONDPASS)
1101 wql_ptr = (char *)0;
1102 else
1103 wql_ptr = (wql+ nc * SIZEOF_WAITQUEUE_LINK);
1104 if (fp->f_ops && (fp->f_type == DTYPE_SOCKET) &&
1105 fo_select(fp, flag[msk], wql_ptr, p)) {
1106 optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
1107 n++;
1108 }
1109 nc++;
1110 }
1111 }
1112 }
1113 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
1114 }
1115
1116 *retval = n;
1117 return (0);
1118 }
1119
1120 /*ARGSUSED*/
1121 int
1122 seltrue(dev, flag, p)
1123 dev_t dev;
1124 int flag;
1125 struct proc *p;
1126 {
1127
1128 return (1);
1129 }
1130
1131 static int
1132 selcount(p, ibits, obits, nfd, count, nfcount)
1133 struct proc *p;
1134 u_int32_t *ibits, *obits;
1135 int nfd;
1136 int *count;
1137 int *nfcount;
1138 {
1139 register struct filedesc *fdp = p->p_fd;
1140 register int msk, i, j, fd;
1141 register u_int32_t bits;
1142 struct file *fp;
1143 int n = 0;
1144 int nc = 0;
1145 int nfc = 0;
1146 static int flag[3] = { FREAD, FWRITE, 0 };
1147 u_int32_t *iptr, *fptr, *fbits;
1148 u_int nw;
1149
1150 /*
1151 * Problems when reboot; due to MacOSX signal probs
1152 * in Beaker1C ; verify that the p->p_fd is valid
1153 */
1154 if (fdp == NULL) {
1155 *count=0;
1156 *nfcount=0;
1157 return(EIO);
1158 }
1159
1160 nw = howmany(nfd, NFDBITS);
1161
1162
1163 for (msk = 0; msk < 3; msk++) {
1164 iptr = (u_int32_t *)&ibits[msk * nw];
1165 for (i = 0; i < nfd; i += NFDBITS) {
1166 bits = iptr[i/NFDBITS];
1167 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1168 bits &= ~(1 << j);
1169 fp = fdp->fd_ofiles[fd];
1170 if (fp == NULL ||
1171 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1172 *count=0;
1173 *nfcount=0;
1174 return(EBADF);
1175 }
1176 if (fp->f_type == DTYPE_SOCKET)
1177 nfc++;
1178 n++;
1179 }
1180 }
1181 }
1182 *count = n;
1183 *nfcount = nfc;
1184 return (0);
1185 }
1186
1187 /*
1188 * Record a select request.
1189 */
1190 void
1191 selrecord(selector, sip, p_wql)
1192 struct proc *selector;
1193 struct selinfo *sip;
1194 void * p_wql;
1195 {
1196 thread_act_t cur_act = current_act();
1197 struct uthread * ut = get_bsdthread_info(cur_act);
1198
1199 /* need to look at collisions */
1200
1201 if ((p_wql == (void *)0) && ((sip->si_flags & SI_INITED) == 0)) {
1202 return;
1203 }
1204
1205 /*do not record if this is second pass of select */
1206 if((p_wql == (void *)0)) {
1207 return;
1208 }
1209
1210 if ((sip->si_flags & SI_INITED) == 0) {
1211 wait_queue_init(&sip->wait_queue, SYNC_POLICY_FIFO);
1212 sip->si_flags |= SI_INITED;
1213 sip->si_flags &= ~SI_CLEAR;
1214 }
1215
1216 if (sip->si_flags & SI_RECORDED) {
1217 sip->si_flags |= SI_COLL;
1218 } else
1219 sip->si_flags &= ~SI_COLL;
1220
1221 sip->si_flags |= SI_RECORDED;
1222 if (!wait_queue_member(&sip->wait_queue, ut->uu_wqsub))
1223 wait_queue_link_noalloc(&sip->wait_queue, ut->uu_wqsub, (wait_queue_link_t)p_wql);
1224
1225 return;
1226 }
1227
1228 void
1229 selwakeup(sip)
1230 register struct selinfo *sip;
1231 {
1232
1233 if ((sip->si_flags & SI_INITED) == 0) {
1234 return;
1235 }
1236
1237 if (sip->si_flags & SI_COLL) {
1238 nselcoll++;
1239 sip->si_flags &= ~SI_COLL;
1240 #if 0
1241 /* will not support */
1242 //wakeup((caddr_t)&selwait);
1243 #endif
1244 }
1245
1246 if (sip->si_flags & SI_RECORDED) {
1247 wait_queue_wakeup_all(&sip->wait_queue, &selwait, THREAD_AWAKENED);
1248 sip->si_flags &= ~SI_RECORDED;
1249 }
1250
1251 }
1252
1253 void
1254 selthreadclear(sip)
1255 register struct selinfo *sip;
1256 {
1257
1258 if ((sip->si_flags & SI_INITED) == 0) {
1259 return;
1260 }
1261 if (sip->si_flags & SI_RECORDED) {
1262 selwakeup(sip);
1263 sip->si_flags &= ~(SI_RECORDED | SI_COLL);
1264 }
1265 sip->si_flags |= SI_CLEAR;
1266 wait_queue_unlinkall_nofree(&sip->wait_queue);
1267 }
1268
1269
1270 extern struct eventqelt *evprocdeque(struct proc *p, struct eventqelt *eqp);
1271
1272 /*
1273 * called upon socket close. deque and free all events for
1274 * the socket
1275 */
1276 void
1277 evsofree(struct socket *sp)
1278 {
1279 struct eventqelt *eqp, *next;
1280
1281 if (sp == NULL) return;
1282
1283 for (eqp = sp->so_evlist.tqh_first; eqp != NULL; eqp = next) {
1284 next = eqp->ee_slist.tqe_next;
1285 evprocdeque(eqp->ee_proc, eqp); // remove from proc q if there
1286 TAILQ_REMOVE(&sp->so_evlist, eqp, ee_slist); // remove from socket q
1287 FREE(eqp, M_TEMP);
1288 }
1289 }
1290
1291
1292 #define DBG_EVENT 0x10
1293
1294 #define DBG_POST 0x10
1295 #define DBG_WATCH 0x11
1296 #define DBG_WAIT 0x12
1297 #define DBG_MOD 0x13
1298 #define DBG_EWAKEUP 0x14
1299 #define DBG_ENQUEUE 0x15
1300 #define DBG_DEQUEUE 0x16
1301
1302 #define DBG_MISC_POST MISCDBG_CODE(DBG_EVENT,DBG_POST)
1303 #define DBG_MISC_WATCH MISCDBG_CODE(DBG_EVENT,DBG_WATCH)
1304 #define DBG_MISC_WAIT MISCDBG_CODE(DBG_EVENT,DBG_WAIT)
1305 #define DBG_MISC_MOD MISCDBG_CODE(DBG_EVENT,DBG_MOD)
1306 #define DBG_MISC_EWAKEUP MISCDBG_CODE(DBG_EVENT,DBG_EWAKEUP)
1307 #define DBG_MISC_ENQUEUE MISCDBG_CODE(DBG_EVENT,DBG_ENQUEUE)
1308 #define DBG_MISC_DEQUEUE MISCDBG_CODE(DBG_EVENT,DBG_DEQUEUE)
1309
1310
1311 /*
1312 * enque this event if it's not already queued. wakeup
1313 the proc if we do queue this event to it.
1314 */
1315 void
1316 evprocenque(struct eventqelt *eqp)
1317 {
1318 struct proc *p;
1319
1320 assert(eqp);
1321 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_START, eqp, eqp->ee_flags, eqp->ee_eventmask,0,0);
1322 if (eqp->ee_flags & EV_QUEUED) {
1323 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0);
1324 return;
1325 }
1326 eqp->ee_flags |= EV_QUEUED;
1327 eqp->ee_eventmask = 0; // disarm
1328 p = eqp->ee_proc;
1329 TAILQ_INSERT_TAIL(&p->p_evlist, eqp, ee_plist);
1330 KERNEL_DEBUG(DBG_MISC_EWAKEUP,0,0,0,eqp,0);
1331 wakeup(&p->p_evlist);
1332 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0);
1333 }
1334
1335 /*
1336 * given either a sockbuf or a socket run down the
1337 * event list and queue ready events found
1338 */
1339 void
1340 postevent(struct socket *sp, struct sockbuf *sb, int event)
1341 {
1342 int mask;
1343 struct eventqelt *evq;
1344 register struct tcpcb *tp;
1345
1346 if (sb) sp = sb->sb_so;
1347 if (!sp || sp->so_evlist.tqh_first == NULL) return;
1348
1349 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, event,0,0,0,0);
1350
1351 for (evq = sp->so_evlist.tqh_first;
1352 evq != NULL; evq = evq->ee_slist.tqe_next) {
1353
1354 mask = 0;
1355
1356 /* ready for reading:
1357 - byte cnt >= receive low water mark
1358 - read-half of conn closed
1359 - conn pending for listening sock
1360 - socket error pending
1361
1362 ready for writing
1363 - byte cnt avail >= send low water mark
1364 - write half of conn closed
1365 - socket error pending
1366 - non-blocking conn completed successfully
1367
1368 exception pending
1369 - out of band data
1370 - sock at out of band mark
1371
1372 */
1373 switch (event & EV_DMASK) {
1374
1375 case EV_RWBYTES:
1376 case EV_OOB:
1377 case EV_RWBYTES|EV_OOB:
1378 if (event & EV_OOB) {
1379 if ((evq->ee_eventmask & EV_EX)) {
1380 if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK))) {
1381 mask |= EV_EX|EV_OOB;
1382 }
1383 }
1384 }
1385 if (event & EV_RWBYTES) {
1386 if ((evq->ee_eventmask & EV_RE) && soreadable(sp)) {
1387 if ((sp->so_type == SOCK_STREAM) && (sp->so_error == ECONNREFUSED) ||
1388 (sp->so_error == ECONNRESET)) {
1389 if ((sp->so_pcb == 0) ||
1390 !(tp = sototcpcb(sp)) ||
1391 (tp->t_state == TCPS_CLOSED)) {
1392 mask |= EV_RE|EV_RESET;
1393 break;
1394 }
1395 }
1396 if (sp->so_state & SS_CANTRCVMORE) {
1397 mask |= EV_RE|EV_FIN;
1398 evq->ee_req.er_rcnt = sp->so_rcv.sb_cc;
1399 break;
1400 }
1401 mask |= EV_RE;
1402 evq->ee_req.er_rcnt = sp->so_rcv.sb_cc;
1403 }
1404
1405 if ((evq->ee_eventmask & EV_WR) && sowriteable(sp)) {
1406 if ((sp->so_type == SOCK_STREAM) &&(sp->so_error == ECONNREFUSED) ||
1407 (sp->so_error == ECONNRESET)) {
1408 if ((sp->so_pcb == 0) ||
1409 !(tp = sototcpcb(sp)) ||
1410 (tp->t_state == TCPS_CLOSED)) {
1411 mask |= EV_WR|EV_RESET;
1412 break;
1413 }
1414 }
1415 mask |= EV_WR;
1416 evq->ee_req.er_wcnt = sbspace(&sp->so_snd);
1417 }
1418 }
1419 break;
1420
1421 case EV_RCONN:
1422 if ((evq->ee_eventmask & EV_RE)) {
1423 evq->ee_req.er_rcnt = sp->so_qlen + 1; // incl this one
1424 mask |= EV_RE|EV_RCONN;
1425 }
1426 break;
1427
1428 case EV_WCONN:
1429 if ((evq->ee_eventmask & EV_WR)) {
1430 mask |= EV_WR|EV_WCONN;
1431 }
1432 break;
1433
1434 case EV_RCLOSED:
1435 if ((evq->ee_eventmask & EV_RE)) {
1436 mask |= EV_RE|EV_RCLOSED;
1437 }
1438 break;
1439
1440 case EV_WCLOSED:
1441 if ((evq->ee_eventmask & EV_WR)) {
1442 mask |= EV_WR|EV_WCLOSED;
1443 }
1444 break;
1445
1446 case EV_FIN:
1447 if (evq->ee_eventmask & EV_RE) {
1448 mask |= EV_RE|EV_FIN;
1449 }
1450 break;
1451
1452 case EV_RESET:
1453 case EV_TIMEOUT:
1454 if (evq->ee_eventmask & EV_RE) {
1455 mask |= EV_RE | event;
1456 }
1457 if (evq->ee_eventmask & EV_WR) {
1458 mask |= EV_WR | event;
1459 }
1460 break;
1461
1462 default:
1463 return;
1464 } /* switch */
1465
1466 if (mask) {
1467 evq->ee_req.er_eventbits |= mask;
1468 KERNEL_DEBUG(DBG_MISC_POST, evq, evq->ee_req.er_eventbits, mask,0,0);
1469 evprocenque(evq);
1470 }
1471 }
1472 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, 0,0,0,0,0);
1473 }
1474
1475 /*
1476 * remove and return the first event (eqp=NULL) or a specific
1477 * event, or return NULL if no events found
1478 */
1479 struct eventqelt *
1480 evprocdeque(struct proc *p, struct eventqelt *eqp)
1481 {
1482
1483 KERNEL_DEBUG(DBG_MISC_DEQUEUE|DBG_FUNC_START,p,eqp,0,0,0);
1484
1485 if (eqp && ((eqp->ee_flags & EV_QUEUED) == NULL)) {
1486 KERNEL_DEBUG(DBG_MISC_DEQUEUE|DBG_FUNC_END,0,0,0,0,0);
1487 return(NULL);
1488 }
1489 if (p->p_evlist.tqh_first == NULL) {
1490 KERNEL_DEBUG(DBG_MISC_DEQUEUE|DBG_FUNC_END,0,0,0,0,0);
1491 return(NULL);
1492 }
1493 if (eqp == NULL) { // remove first
1494 eqp = p->p_evlist.tqh_first;
1495 }
1496 TAILQ_REMOVE(&p->p_evlist, eqp, ee_plist);
1497 eqp->ee_flags &= ~EV_QUEUED;
1498 KERNEL_DEBUG(DBG_MISC_DEQUEUE|DBG_FUNC_END,eqp,0,0,0,0);
1499 return(eqp);
1500 }
1501
1502 struct evwatch_args {
1503 struct eventreq *u_req;
1504 int u_eventmask;
1505 };
1506
1507
1508 /*
1509 * watchevent system call. user passes us an event to watch
1510 * for. we malloc an event object, initialize it, and queue
1511 * it to the open socket. when the event occurs, postevent()
1512 * will enque it back to our proc where we can retrieve it
1513 * via waitevent().
1514 *
1515 * should this prevent duplicate events on same socket?
1516 */
1517 int
1518 watchevent(p, uap, retval)
1519 struct proc *p;
1520 struct evwatch_args *uap;
1521 register_t *retval;
1522 {
1523 struct eventqelt *eqp = (struct eventqelt *)0;
1524 struct eventqelt *np;
1525 struct eventreq *erp;
1526 struct file *fp;
1527 struct socket *sp;
1528 int error;
1529
1530 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_START, 0,0,0,0,0);
1531
1532 // get a qelt and fill with users req
1533 MALLOC(eqp, struct eventqelt *, sizeof(struct eventqelt), M_TEMP, M_WAITOK);
1534 if (!eqp) panic("can't MALLOC eqp");
1535 erp = &eqp->ee_req;
1536 // get users request pkt
1537 if (error = copyin((caddr_t)uap->u_req, (caddr_t)erp,
1538 sizeof(struct eventreq))) {
1539 FREE(eqp, M_TEMP);
1540 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0);
1541 return(error);
1542 }
1543 KERNEL_DEBUG(DBG_MISC_WATCH, erp->er_handle,uap->u_eventmask,eqp,0,0);
1544 // validate, freeing qelt if errors
1545 error = 0;
1546 if (erp->er_type != EV_FD) {
1547 error = EINVAL;
1548 } else if (erp->er_handle < 0) {
1549 error = EBADF;
1550 } else if (erp->er_handle > p->p_fd->fd_nfiles) {
1551 error = EBADF;
1552 } else if ((fp = *fdfile(p, erp->er_handle)) == NULL) {
1553 error = EBADF;
1554 } else if (fp->f_type != DTYPE_SOCKET) {
1555 error = EINVAL;
1556 }
1557 if (error) {
1558 FREE(eqp,M_TEMP);
1559 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0);
1560 return(error);
1561 }
1562
1563 erp->er_rcnt = erp->er_wcnt = erp->er_eventbits = 0;
1564 eqp->ee_proc = p;
1565 eqp->ee_eventmask = uap->u_eventmask & EV_MASK;
1566 eqp->ee_flags = 0;
1567
1568 sp = (struct socket *)fp->f_data;
1569 assert(sp != NULL);
1570
1571 // only allow one watch per file per proc
1572 for (np = sp->so_evlist.tqh_first; np != NULL; np = np->ee_slist.tqe_next) {
1573 if (np->ee_proc == p) {
1574 FREE(eqp,M_TEMP);
1575 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0);
1576 return(EINVAL);
1577 }
1578 }
1579
1580 TAILQ_INSERT_TAIL(&sp->so_evlist, eqp, ee_slist);
1581 postevent(sp, 0, EV_RWBYTES); // catch existing events
1582 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, 0,0,0,0,0);
1583 return(0);
1584 }
1585
1586 struct evwait_args {
1587 struct eventreq *u_req;
1588 struct timeval *tv;
1589 };
1590
1591 /*
1592 * waitevent system call.
1593 * grabs the next waiting event for this proc and returns
1594 * it. if no events, user can request to sleep with timeout
1595 * or poll mode (tv=NULL);
1596 */
1597 int
1598 waitevent(p, uap, retval)
1599 struct proc *p;
1600 struct evwait_args *uap;
1601 register_t *retval;
1602 {
1603 int error = 0;
1604 struct eventqelt *eqp;
1605 uint64_t abstime, interval;
1606
1607 if (uap->tv) {
1608 struct timeval atv;
1609
1610 error = copyin((caddr_t)uap->tv, (caddr_t)&atv, sizeof (atv));
1611 if (error)
1612 return(error);
1613 if (itimerfix(&atv)) {
1614 error = EINVAL;
1615 return(error);
1616 }
1617
1618 interval = tvtoabstime(&atv);
1619 }
1620 else
1621 abstime = interval = 0;
1622
1623 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_START, 0,0,0,0,0);
1624
1625 retry:
1626 if ((eqp = evprocdeque(p,NULL)) != NULL) {
1627 error = copyout((caddr_t)&eqp->ee_req,
1628 (caddr_t)uap->u_req, sizeof(struct eventreq));
1629 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,
1630 eqp->ee_req.er_handle,eqp->ee_req.er_eventbits,eqp,0);
1631
1632 return (error);
1633 }
1634 else {
1635 if (uap->tv && interval == 0) {
1636 *retval = 1; // poll failed
1637 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,0,0,0,0);
1638
1639 return (error);
1640 }
1641
1642 if (interval != 0)
1643 clock_absolutetime_interval_to_deadline(interval, &abstime)
1644
1645 KERNEL_DEBUG(DBG_MISC_WAIT, 1,&p->p_evlist,0,0,0);
1646 error = tsleep1(&p->p_evlist, PSOCK | PCATCH,
1647 "waitevent", abstime, (int (*)(int))0);
1648 KERNEL_DEBUG(DBG_MISC_WAIT, 2,&p->p_evlist,0,0,0);
1649 if (error == 0)
1650 goto retry;
1651 if (error == ERESTART)
1652 error = EINTR;
1653 if (error == EWOULDBLOCK) {
1654 *retval = 1;
1655 error = 0;
1656 }
1657 }
1658
1659 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, 0,0,0,0,0);
1660
1661 return (error);
1662 }
1663
1664 struct modwatch_args {
1665 struct eventreq *u_req;
1666 int u_eventmask;
1667 };
1668
1669 /*
1670 * modwatch system call. user passes in event to modify.
1671 * if we find it we reset the event bits and que/deque event
1672 * it needed.
1673 */
1674 int
1675 modwatch(p, uap, retval)
1676 struct proc *p;
1677 struct modwatch_args *uap;
1678 register_t *retval;
1679 {
1680 struct eventreq er;
1681 struct eventreq *erp = &er;
1682 struct eventqelt *evq;
1683 int error;
1684 struct file *fp;
1685 struct socket *sp;
1686 int flag;
1687
1688 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_START, 0,0,0,0,0);
1689
1690 // get users request pkt
1691 if (error = copyin((caddr_t)uap->u_req, (caddr_t)erp,
1692 sizeof(struct eventreq))) return(error);
1693
1694 if (erp->er_type != EV_FD) return(EINVAL);
1695 if (erp->er_handle < 0) return(EBADF);
1696 if (erp->er_handle > p->p_fd->fd_nfiles) return(EBADF);
1697 if ((fp = *fdfile(p, erp->er_handle)) == NULL)
1698 return(EBADF);
1699 if (fp->f_type != DTYPE_SOCKET) return(EINVAL); // for now must be sock
1700 sp = (struct socket *)fp->f_data;
1701 assert(sp != NULL);
1702
1703
1704 // locate event if possible
1705 for (evq = sp->so_evlist.tqh_first;
1706 evq != NULL; evq = evq->ee_slist.tqe_next) {
1707 if (evq->ee_proc == p) break;
1708 }
1709
1710 if (evq == NULL) {
1711 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, EINVAL,0,0,0,0);
1712 return(EINVAL);
1713 }
1714 KERNEL_DEBUG(DBG_MISC_MOD, erp->er_handle,uap->u_eventmask,evq,0,0);
1715
1716 if (uap->u_eventmask == EV_RM) {
1717 evprocdeque(p, evq);
1718 TAILQ_REMOVE(&sp->so_evlist, evq, ee_slist);
1719 FREE(evq, M_TEMP);
1720 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, 0,0,0,0,0);
1721 return(0);
1722 }
1723
1724 switch (uap->u_eventmask & EV_MASK) {
1725
1726 case 0:
1727 flag = 0;
1728 break;
1729
1730 case EV_RE:
1731 case EV_WR:
1732 case EV_RE|EV_WR:
1733 flag = EV_RWBYTES;
1734 break;
1735
1736 case EV_EX:
1737 flag = EV_OOB;
1738 break;
1739
1740 case EV_EX|EV_RE:
1741 case EV_EX|EV_WR:
1742 case EV_EX|EV_RE|EV_WR:
1743 flag = EV_OOB|EV_RWBYTES;
1744 break;
1745
1746 default:
1747 return(EINVAL);
1748 }
1749
1750 evq->ee_eventmask = uap->u_eventmask & EV_MASK;
1751 evprocdeque(p, evq);
1752 evq->ee_req.er_eventbits = 0;
1753 postevent(sp, 0, flag);
1754 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, evq->ee_req.er_handle,evq->ee_eventmask,sp,flag,0);
1755 return(0);
1756 }