]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/sys_generic.c
xnu-344.49.tar.gz
[apple/xnu.git] / bsd / kern / sys_generic.c
1 /*
2 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
26 /*
27 * Copyright (c) 1982, 1986, 1989, 1993
28 * The Regents of the University of California. All rights reserved.
29 * (c) UNIX System Laboratories, Inc.
30 * All or some portions of this file are derived from material licensed
31 * to the University of California by American Telephone and Telegraph
32 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
33 * the permission of UNIX System Laboratories, Inc.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
64 */
65
66 #include <sys/param.h>
67 #include <sys/systm.h>
68 #include <sys/filedesc.h>
69 #include <sys/ioctl.h>
70 #include <sys/file.h>
71 #include <sys/proc.h>
72 #include <sys/socketvar.h>
73 #include <sys/uio.h>
74 #include <sys/kernel.h>
75 #include <sys/stat.h>
76 #include <sys/malloc.h>
77
78 #include <sys/mount.h>
79 #include <sys/protosw.h>
80 #include <sys/ev.h>
81 #include <sys/user.h>
82 #include <sys/kdebug.h>
83 #include <kern/assert.h>
84 #include <kern/thread_act.h>
85
86 #include <sys/mbuf.h>
87 #include <sys/socket.h>
88 #include <sys/socketvar.h>
89 #include <sys/errno.h>
90
91 #include <net/if.h>
92 #include <net/route.h>
93
94 #include <netinet/in.h>
95 #include <netinet/in_systm.h>
96 #include <netinet/ip.h>
97 #include <netinet/in_pcb.h>
98 #include <netinet/ip_var.h>
99 #include <netinet/ip6.h>
100 #include <netinet/tcp.h>
101 #include <netinet/tcp_fsm.h>
102 #include <netinet/tcp_seq.h>
103 #include <netinet/tcp_timer.h>
104 #include <netinet/tcp_var.h>
105 #include <netinet/tcpip.h>
106 #include <netinet/tcp_debug.h>
107 /* for wait queue based select */
108 #include <kern/wait_queue.h>
109 #if KTRACE
110 #include <sys/ktrace.h>
111 #endif
112
113 static int dofileread __P((struct proc *, struct file *, int, void *,
114 size_t, off_t, int, int*));
115 static int dofilewrite __P((struct proc *, struct file *, int,
116 const void *, size_t, off_t, int, int*));
117
118 static struct file*
119 holdfp(fdp, fd, flag)
120 struct filedesc* fdp;
121 int fd, flag;
122 {
123 struct file* fp;
124
125 if (((u_int)fd) >= fdp->fd_nfiles ||
126 (fp = fdp->fd_ofiles[fd]) == NULL ||
127 (fp->f_flag & flag) == 0) {
128 return (NULL);
129 }
130 if (fref(fp) == -1)
131 return (NULL);
132 return (fp);
133 }
134
135 /*
136 * Read system call.
137 */
138 #ifndef _SYS_SYSPROTO_H_
139 struct read_args {
140 int fd;
141 char *cbuf;
142 u_int nbyte;
143 };
144 #endif
145 int
146 read(p, uap, retval)
147 struct proc *p;
148 register struct read_args *uap;
149 register_t *retval;
150 {
151 register struct file *fp;
152 int error;
153
154 if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL)
155 return (EBADF);
156 error = dofileread(p, fp, uap->fd, uap->cbuf, uap->nbyte,
157 (off_t)-1, 0, retval);
158 frele(fp);
159 return(error);
160 }
161
162 /*
163 * Pread system call
164 */
165 #ifndef _SYS_SYSPROTO_H_
166 struct pread_args {
167 int fd;
168 void *buf;
169 size_t nbyte;
170 #ifdef DOUBLE_ALIGN_PARAMS
171 int pad;
172 #endif
173 off_t offset;
174 };
175 #endif
176 int
177 pread(p, uap, retval)
178 struct proc *p;
179 register struct pread_args *uap;
180 int *retval;
181 {
182 register struct file *fp;
183 int error;
184
185 if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL)
186 return (EBADF);
187 if (fp->f_type != DTYPE_VNODE) {
188 error = ESPIPE;
189 } else {
190 error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte,
191 uap->offset, FOF_OFFSET, retval);
192 }
193 frele(fp);
194 return(error);
195 }
196
197 /*
198 * Code common for read and pread
199 */
200 int
201 dofileread(p, fp, fd, buf, nbyte, offset, flags, retval)
202 struct proc *p;
203 struct file *fp;
204 int fd, flags;
205 void *buf;
206 size_t nbyte;
207 off_t offset;
208 int *retval;
209 {
210 struct uio auio;
211 struct iovec aiov;
212 long cnt, error = 0;
213 #if KTRACE
214 struct iovec ktriov;
215 struct uio ktruio;
216 int didktr = 0;
217 #endif
218
219 aiov.iov_base = (caddr_t)buf;
220 aiov.iov_len = nbyte;
221 auio.uio_iov = &aiov;
222 auio.uio_iovcnt = 1;
223 auio.uio_offset = offset;
224 if (nbyte > INT_MAX)
225 return (EINVAL);
226 auio.uio_resid = nbyte;
227 auio.uio_rw = UIO_READ;
228 auio.uio_segflg = UIO_USERSPACE;
229 auio.uio_procp = p;
230 #if KTRACE
231 /*
232 * if tracing, save a copy of iovec
233 */
234 if (KTRPOINT(p, KTR_GENIO)) {
235 ktriov = aiov;
236 ktruio = auio;
237 didktr = 1;
238 }
239 #endif
240 cnt = nbyte;
241
242 if ((error = fo_read(fp, &auio, fp->f_cred, flags, p))) {
243 if (auio.uio_resid != cnt && (error == ERESTART ||
244 error == EINTR || error == EWOULDBLOCK))
245 error = 0;
246 }
247 cnt -= auio.uio_resid;
248 #if KTRACE
249 if (didktr && error == 0) {
250 ktruio.uio_iov = &ktriov;
251 ktruio.uio_resid = cnt;
252 ktrgenio(p->p_tracep, fd, UIO_READ, &ktruio, error,
253 KERNEL_FUNNEL);
254 }
255 #endif
256 *retval = cnt;
257 return (error);
258 }
259
260 /*
261 * Scatter read system call.
262 */
263 #ifndef _SYS_SYSPROTO_H_
264 struct readv_args {
265 int fd;
266 struct iovec *iovp;
267 u_int iovcnt;
268 };
269 #endif
270 int
271 readv(p, uap, retval)
272 struct proc *p;
273 register struct readv_args *uap;
274 int *retval;
275 {
276 struct uio auio;
277 register struct iovec *iov;
278 int error;
279 struct iovec aiov[UIO_SMALLIOV];
280
281 if (uap->iovcnt > UIO_SMALLIOV) {
282 if (uap->iovcnt > UIO_MAXIOV)
283 return (EINVAL);
284 if ((iov = (struct iovec *)
285 kalloc(sizeof(struct iovec) * (uap->iovcnt))) == 0)
286 return (ENOMEM);
287 } else
288 iov = aiov;
289 auio.uio_iov = iov;
290 auio.uio_iovcnt = uap->iovcnt;
291 auio.uio_rw = UIO_READ;
292 error = copyin((caddr_t)uap->iovp, (caddr_t)iov,
293 uap->iovcnt * sizeof (struct iovec));
294 if (!error)
295 error = rwuio(p, uap->fd, &auio, UIO_READ, retval);
296 if (uap->iovcnt > UIO_SMALLIOV)
297 kfree(iov, sizeof(struct iovec)*uap->iovcnt);
298 return (error);
299 }
300
301 /*
302 * Write system call
303 */
304 #ifndef _SYS_SYSPROTO_H_
305 struct write_args {
306 int fd;
307 char *cbuf;
308 u_int nbyte;
309 };
310 #endif
311 int
312 write(p, uap, retval)
313 struct proc *p;
314 register struct write_args *uap;
315 int *retval;
316 {
317 register struct file *fp;
318 int error;
319
320 if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL)
321 return (EBADF);
322 error = dofilewrite(p, fp, uap->fd, uap->cbuf, uap->nbyte,
323 (off_t)-1, 0, retval);
324 frele(fp);
325 return(error);
326 }
327
328 /*
329 * Pwrite system call
330 */
331 #ifndef _SYS_SYSPROTO_H_
332 struct pwrite_args {
333 int fd;
334 const void *buf;
335 size_t nbyte;
336 #ifdef DOUBLE_ALIGN_PARAMS
337 int pad;
338 #endif
339 off_t offset;
340 };
341 #endif
342 int
343 pwrite(p, uap, retval)
344 struct proc *p;
345 register struct pwrite_args *uap;
346 int *retval;
347 {
348 register struct file *fp;
349 int error;
350
351 if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL)
352 return (EBADF);
353 if (fp->f_type != DTYPE_VNODE) {
354 error = ESPIPE;
355 } else {
356 error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte,
357 uap->offset, FOF_OFFSET, retval);
358 }
359 frele(fp);
360 return(error);
361 }
362
363 static int
364 dofilewrite(p, fp, fd, buf, nbyte, offset, flags, retval)
365 struct proc *p;
366 struct file *fp;
367 int fd, flags;
368 const void *buf;
369 size_t nbyte;
370 off_t offset;
371 int *retval;
372 {
373 struct uio auio;
374 struct iovec aiov;
375 long cnt, error = 0;
376 #if KTRACE
377 struct iovec ktriov;
378 struct uio ktruio;
379 int didktr = 0;
380 #endif
381
382 aiov.iov_base = (void *)(uintptr_t)buf;
383 aiov.iov_len = nbyte;
384 auio.uio_iov = &aiov;
385 auio.uio_iovcnt = 1;
386 auio.uio_offset = offset;
387 if (nbyte > INT_MAX)
388 return (EINVAL);
389 auio.uio_resid = nbyte;
390 auio.uio_rw = UIO_WRITE;
391 auio.uio_segflg = UIO_USERSPACE;
392 auio.uio_procp = p;
393 #if KTRACE
394 /*
395 * if tracing, save a copy of iovec and uio
396 */
397 if (KTRPOINT(p, KTR_GENIO)) {
398 ktriov = aiov;
399 ktruio = auio;
400 didktr = 1;
401 }
402 #endif
403 cnt = nbyte;
404 if (fp->f_type == DTYPE_VNODE)
405 bwillwrite();
406 if ((error = fo_write(fp, &auio, fp->f_cred, flags, p))) {
407 if (auio.uio_resid != cnt && (error == ERESTART ||
408 error == EINTR || error == EWOULDBLOCK))
409 error = 0;
410 if (error == EPIPE)
411 psignal(p, SIGPIPE);
412 }
413 cnt -= auio.uio_resid;
414 #if KTRACE
415 if (didktr && error == 0) {
416 ktruio.uio_iov = &ktriov;
417 ktruio.uio_resid = cnt;
418 ktrgenio(p->p_tracep, fd, UIO_WRITE, &ktruio, error,
419 KERNEL_FUNNEL);
420 }
421 #endif
422 *retval = cnt;
423 return (error);
424 }
425
426 /*
427 * Gather write system call
428 */
429 #ifndef _SYS_SYSPROTO_H_
430 struct writev_args {
431 int fd;
432 struct iovec *iovp;
433 u_int iovcnt;
434 };
435 #endif
436 int
437 writev(p, uap, retval)
438 struct proc *p;
439 register struct writev_args *uap;
440 int *retval;
441 {
442 struct uio auio;
443 register struct iovec *iov;
444 int error;
445 struct iovec aiov[UIO_SMALLIOV];
446
447 if (uap->iovcnt > UIO_SMALLIOV) {
448 if (uap->iovcnt > UIO_MAXIOV)
449 return (EINVAL);
450 if ((iov = (struct iovec *)
451 kalloc(sizeof(struct iovec) * (uap->iovcnt))) == 0)
452 return (ENOMEM);
453 } else
454 iov = aiov;
455 auio.uio_iov = iov;
456 auio.uio_iovcnt = uap->iovcnt;
457 auio.uio_rw = UIO_WRITE;
458 error = copyin((caddr_t)uap->iovp, (caddr_t)iov,
459 uap->iovcnt * sizeof (struct iovec));
460 if (!error)
461 error = rwuio(p, uap->fd, &auio, UIO_WRITE, retval);
462 if (uap->iovcnt > UIO_SMALLIOV)
463 kfree(iov, sizeof(struct iovec)*uap->iovcnt);
464 return (error);
465 }
466
467 int
468 rwuio(p, fdes, uio, rw, retval)
469 struct proc *p;
470 int fdes;
471 register struct uio *uio;
472 enum uio_rw rw;
473 int *retval;
474 {
475 struct file *fp;
476 register struct iovec *iov;
477 int i, count, flag, error;
478 #if KTRACE
479 struct iovec *ktriov;
480 struct uio ktruio;
481 int didktr = 0;
482 u_int iovlen;
483 #endif
484
485 if (error = fdgetf(p, fdes, &fp))
486 return (error);
487
488 if ((fp->f_flag&(rw==UIO_READ ? FREAD : FWRITE)) == 0) {
489 return(EBADF);
490 }
491 uio->uio_resid = 0;
492 uio->uio_segflg = UIO_USERSPACE;
493 uio->uio_procp = p;
494 iov = uio->uio_iov;
495 for (i = 0; i < uio->uio_iovcnt; i++) {
496 if (iov->iov_len < 0) {
497 return(EINVAL);
498 }
499 uio->uio_resid += iov->iov_len;
500 if (uio->uio_resid < 0) {
501 return(EINVAL);
502 }
503 iov++;
504 }
505 count = uio->uio_resid;
506 #if KTRACE
507 /*
508 * if tracing, save a copy of iovec
509 */
510 if (KTRPOINT(p, KTR_GENIO)) {
511 iovlen = uio->uio_iovcnt * sizeof (struct iovec);
512 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
513 bcopy((caddr_t)uio->uio_iov, (caddr_t)ktriov, iovlen);
514 ktruio = *uio;
515 didktr = 1;
516 }
517 #endif
518
519 if (rw == UIO_READ) {
520 if (error = fo_read(fp, uio, fp->f_cred, 0, p))
521 if (uio->uio_resid != count && (error == ERESTART ||
522 error == EINTR || error == EWOULDBLOCK))
523 error = 0;
524 } else {
525 if (fp->f_type == DTYPE_VNODE)
526 bwillwrite();
527 if (error = fo_write(fp, uio, fp->f_cred, 0, p)) {
528 if (uio->uio_resid != count && (error == ERESTART ||
529 error == EINTR || error == EWOULDBLOCK))
530 error = 0;
531 /* The socket layer handles SIGPIPE */
532 if (error == EPIPE && fp->f_type != DTYPE_SOCKET)
533 psignal(p, SIGPIPE);
534 }
535 }
536
537 *retval = count - uio->uio_resid;
538
539 #if KTRACE
540 if (didktr) {
541 if (error == 0) {
542 ktruio.uio_iov = ktriov;
543 ktruio.uio_resid = *retval;
544 ktrgenio(p->p_tracep, fdes, rw, &ktruio, error,
545 KERNEL_FUNNEL);
546 }
547 FREE(ktriov, M_TEMP);
548 }
549 #endif
550
551 return(error);
552 }
553
554 /*
555 * Ioctl system call
556 */
557 #ifndef _SYS_SYSPROTO_H_
558 struct ioctl_args {
559 int fd;
560 u_long com;
561 caddr_t data;
562 };
563 #endif
564 int
565 ioctl(p, uap, retval)
566 struct proc *p;
567 register struct ioctl_args *uap;
568 register_t *retval;
569 {
570 struct file *fp;
571 register u_long com;
572 register int error;
573 register u_int size;
574 caddr_t data, memp;
575 int tmp;
576 #define STK_PARAMS 128
577 char stkbuf[STK_PARAMS];
578
579 if (error = fdgetf(p, uap->fd, &fp))
580 return (error);
581
582 if ((fp->f_flag & (FREAD | FWRITE)) == 0)
583 return (EBADF);
584
585 #if NETAT
586 /*
587 * ### LD 6/11/97 Hack Alert: this is to get AppleTalk to work
588 * while implementing an ATioctl system call
589 */
590 {
591 extern int appletalk_inited;
592
593 if (appletalk_inited && ((uap->com & 0x0000FFFF) == 0xff99)) {
594 #ifdef APPLETALK_DEBUG
595 kprintf("ioctl: special AppleTalk \n");
596 #endif
597 error = fo_ioctl(fp, uap->com, uap->data, p);
598 return(error);
599 }
600 }
601
602 #endif /* NETAT */
603
604
605 switch (com = uap->com) {
606 case FIONCLEX:
607 *fdflags(p, uap->fd) &= ~UF_EXCLOSE;
608 return (0);
609 case FIOCLEX:
610 *fdflags(p, uap->fd) |= UF_EXCLOSE;
611 return (0);
612 }
613
614 /*
615 * Interpret high order word to find amount of data to be
616 * copied to/from the user's address space.
617 */
618 size = IOCPARM_LEN(com);
619 if (size > IOCPARM_MAX)
620 return (ENOTTY);
621 memp = NULL;
622 if (size > sizeof (stkbuf)) {
623 if ((memp = (caddr_t)kalloc(size)) == 0)
624 return(ENOMEM);
625 data = memp;
626 } else
627 data = stkbuf;
628 if (com&IOC_IN) {
629 if (size) {
630 error = copyin(uap->data, data, (u_int)size);
631 if (error) {
632 if (memp)
633 kfree(memp, size);
634 return (error);
635 }
636 } else
637 *(caddr_t *)data = uap->data;
638 } else if ((com&IOC_OUT) && size)
639 /*
640 * Zero the buffer so the user always
641 * gets back something deterministic.
642 */
643 bzero(data, size);
644 else if (com&IOC_VOID)
645 *(caddr_t *)data = uap->data;
646
647 switch (com) {
648
649 case FIONBIO:
650 if (tmp = *(int *)data)
651 fp->f_flag |= FNONBLOCK;
652 else
653 fp->f_flag &= ~FNONBLOCK;
654 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p);
655 break;
656
657 case FIOASYNC:
658 if (tmp = *(int *)data)
659 fp->f_flag |= FASYNC;
660 else
661 fp->f_flag &= ~FASYNC;
662 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p);
663 break;
664
665 case FIOSETOWN:
666 tmp = *(int *)data;
667 if (fp->f_type == DTYPE_SOCKET) {
668 ((struct socket *)fp->f_data)->so_pgid = tmp;
669 error = 0;
670 break;
671 }
672 if (tmp <= 0) {
673 tmp = -tmp;
674 } else {
675 struct proc *p1 = pfind(tmp);
676 if (p1 == 0) {
677 error = ESRCH;
678 break;
679 }
680 tmp = p1->p_pgrp->pg_id;
681 }
682 error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, p);
683 break;
684
685 case FIOGETOWN:
686 if (fp->f_type == DTYPE_SOCKET) {
687 error = 0;
688 *(int *)data = ((struct socket *)fp->f_data)->so_pgid;
689 break;
690 }
691 error = fo_ioctl(fp, TIOCGPGRP, data, p);
692 *(int *)data = -*(int *)data;
693 break;
694
695 default:
696 error = fo_ioctl(fp, com, data, p);
697 /*
698 * Copy any data to user, size was
699 * already set and checked above.
700 */
701 if (error == 0 && (com&IOC_OUT) && size)
702 error = copyout(data, uap->data, (u_int)size);
703 break;
704 }
705 if (memp)
706 kfree(memp, size);
707 return (error);
708 }
709
710 int selwait, nselcoll;
711 #define SEL_FIRSTPASS 1
712 #define SEL_SECONDPASS 2
713 extern int selcontinue(int error);
714 extern int selprocess(int error, int sel_pass);
715 static int selscan(struct proc *p, struct _select * sel,
716 int nfd, register_t *retval, int sel_pass);
717 static int selcount(struct proc *p, u_int32_t *ibits, u_int32_t *obits,
718 int nfd, int * count, int * nfcount);
719 extern uint64_t tvtoabstime(struct timeval *tvp);
720
721 /*
722 * Select system call.
723 */
724 #ifndef _SYS_SYSPROTO_H_
725 struct select_args {
726 int nd;
727 u_int32_t *in;
728 u_int32_t *ou;
729 u_int32_t *ex;
730 struct timeval *tv;
731 };
732 #endif
733 int
734 select(p, uap, retval)
735 register struct proc *p;
736 register struct select_args *uap;
737 register_t *retval;
738 {
739 int error = 0;
740 u_int ni, nw, size;
741 thread_act_t th_act;
742 struct uthread *uth;
743 struct _select *sel;
744 int needzerofill = 1;
745 int kfcount =0;
746 int nfcount = 0;
747 int count = 0;
748
749 th_act = current_act();
750 uth = get_bsdthread_info(th_act);
751 sel = &uth->uu_state.ss_select;
752 retval = (int *)get_bsduthreadrval(th_act);
753 *retval = 0;
754
755 if (uap->nd < 0) {
756 return (EINVAL);
757 }
758
759 if (uap->nd > p->p_fd->fd_nfiles)
760 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */
761
762 nw = howmany(uap->nd, NFDBITS);
763 ni = nw * sizeof(fd_mask);
764
765 /*
766 * if this is the first select by the thread
767 * allocate the space for bits.
768 */
769 if (sel->nbytes == 0) {
770 sel->nbytes = 3 * ni;
771 MALLOC(sel->ibits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK);
772 MALLOC(sel->obits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK);
773 bzero((caddr_t)sel->ibits, sel->nbytes);
774 bzero((caddr_t)sel->obits, sel->nbytes);
775 needzerofill = 0;
776 }
777
778 /*
779 * if the previously allocated space for the bits
780 * is smaller than what is requested. Reallocate.
781 */
782 if (sel->nbytes < (3 * ni)) {
783 sel->nbytes = (3 * ni);
784 FREE(sel->ibits, M_TEMP);
785 FREE(sel->obits, M_TEMP);
786 MALLOC(sel->ibits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK);
787 MALLOC(sel->obits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK);
788 bzero((caddr_t)sel->ibits, sel->nbytes);
789 bzero((caddr_t)sel->obits, sel->nbytes);
790 needzerofill = 0;
791 }
792
793 if (needzerofill) {
794 bzero((caddr_t)sel->ibits, sel->nbytes);
795 bzero((caddr_t)sel->obits, sel->nbytes);
796 }
797
798 /*
799 * get the bits from the user address space
800 */
801 #define getbits(name, x) \
802 do { \
803 if (uap->name && (error = copyin((caddr_t)uap->name, \
804 (caddr_t)&sel->ibits[(x) * nw], ni))) \
805 goto continuation; \
806 } while (0)
807
808 getbits(in, 0);
809 getbits(ou, 1);
810 getbits(ex, 2);
811 #undef getbits
812
813 if (uap->tv) {
814 struct timeval atv;
815
816 error = copyin((caddr_t)uap->tv, (caddr_t)&atv, sizeof (atv));
817 if (error)
818 goto continuation;
819 if (itimerfix(&atv)) {
820 error = EINVAL;
821 goto continuation;
822 }
823
824 clock_absolutetime_interval_to_deadline(
825 tvtoabstime(&atv), &sel->abstime);
826 }
827 else
828 sel->abstime = 0;
829
830 sel->nfcount = 0;
831 if (error = selcount(p, sel->ibits, sel->obits, uap->nd, &count, &nfcount)) {
832 goto continuation;
833 }
834
835 sel->nfcount = nfcount;
836 sel->count = count;
837 size = SIZEOF_WAITQUEUE_SUB + (count * SIZEOF_WAITQUEUE_LINK);
838 if (sel->allocsize) {
839 if (uth->uu_wqsub == 0)
840 panic("select: wql memory smashed");
841 /* needed for the select now */
842 if (size > sel->allocsize) {
843 kfree(uth->uu_wqsub, sel->allocsize);
844 sel->allocsize = size;
845 uth->uu_wqsub = (wait_queue_sub_t)kalloc(sel->allocsize);
846 if (uth->uu_wqsub == (wait_queue_sub_t)NULL)
847 panic("failed to allocate memory for waitqueue\n");
848 sel->wql = (char *)uth->uu_wqsub + SIZEOF_WAITQUEUE_SUB;
849 }
850 } else {
851 sel->count = count;
852 sel->allocsize = size;
853 uth->uu_wqsub = (wait_queue_sub_t)kalloc(sel->allocsize);
854 if (uth->uu_wqsub == (wait_queue_sub_t)NULL)
855 panic("failed to allocate memory for waitqueue\n");
856 sel->wql = (char *)uth->uu_wqsub + SIZEOF_WAITQUEUE_SUB;
857 }
858 bzero(uth->uu_wqsub, size);
859 wait_queue_sub_init(uth->uu_wqsub, (SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST));
860
861 continuation:
862 return selprocess(error, SEL_FIRSTPASS);
863 }
864
865 int
866 selcontinue(int error)
867 {
868 return selprocess(error, SEL_SECONDPASS);
869 }
870
871 int
872 selprocess(error, sel_pass)
873 {
874 int ncoll;
875 u_int ni, nw;
876 thread_act_t th_act;
877 struct uthread *uth;
878 struct proc *p;
879 struct select_args *uap;
880 int *retval;
881 struct _select *sel;
882 int unwind = 1;
883 int prepost = 0;
884 int somewakeup = 0;
885 int doretry = 0;
886 wait_result_t wait_result;
887
888 p = current_proc();
889 th_act = current_act();
890 uap = (struct select_args *)get_bsduthreadarg(th_act);
891 retval = (int *)get_bsduthreadrval(th_act);
892 uth = get_bsdthread_info(th_act);
893 sel = &uth->uu_state.ss_select;
894
895 /* if it is first pass wait queue is not setup yet */
896 if ((error != 0) && (sel_pass == SEL_FIRSTPASS))
897 unwind = 0;
898 if (sel->count == 0)
899 unwind = 0;
900 retry:
901 if (error != 0) {
902 goto done;
903 }
904
905 ncoll = nselcoll;
906 p->p_flag |= P_SELECT;
907 /* skip scans if the select is just for timeouts */
908 if (sel->count) {
909 if (sel_pass == SEL_FIRSTPASS)
910 wait_queue_sub_clearrefs(uth->uu_wqsub);
911
912 error = selscan(p, sel, uap->nd, retval, sel_pass);
913 if (error || *retval) {
914 goto done;
915 }
916 if (prepost) {
917 /* if the select of log, then we canwakeup and discover some one
918 * else already read the data; go toselct again if time permits
919 */
920 prepost = 0;
921 doretry = 1;
922 }
923 if (somewakeup) {
924 somewakeup = 0;
925 doretry = 1;
926 }
927 }
928
929 if (uap->tv) {
930 uint64_t now;
931
932 clock_get_uptime(&now);
933 if (now >= sel->abstime)
934 goto done;
935 }
936
937 if (doretry) {
938 /* cleanup obits and try again */
939 doretry = 0;
940 sel_pass = SEL_FIRSTPASS;
941 goto retry;
942 }
943
944 /*
945 * To effect a poll, the timeout argument should be
946 * non-nil, pointing to a zero-valued timeval structure.
947 */
948 if (uap->tv && sel->abstime == 0) {
949 goto done;
950 }
951
952 /* No spurious wakeups due to colls,no need to check for them */
953 if ((sel_pass == SEL_SECONDPASS) || ((p->p_flag & P_SELECT) == 0)) {
954 sel_pass = SEL_FIRSTPASS;
955 goto retry;
956 }
957
958 p->p_flag &= ~P_SELECT;
959
960 /* if the select is just for timeout skip check */
961 if (sel->count &&(sel_pass == SEL_SECONDPASS))
962 panic("selprocess: 2nd pass assertwaiting");
963
964 /* Wait Queue Subordinate has waitqueue as first element */
965 wait_result = wait_queue_assert_wait((wait_queue_t)uth->uu_wqsub,
966 &selwait, THREAD_ABORTSAFE);
967 if (wait_result != THREAD_AWAKENED) {
968 /* there are no preposted events */
969 error = tsleep1(NULL, PSOCK | PCATCH,
970 "select", sel->abstime, selcontinue);
971 } else {
972 prepost = 1;
973 error = 0;
974 }
975
976 sel_pass = SEL_SECONDPASS;
977 if (error == 0) {
978 if (!prepost)
979 somewakeup =1;
980 goto retry;
981 }
982 done:
983 if (unwind)
984 wait_subqueue_unlink_all(uth->uu_wqsub);
985 p->p_flag &= ~P_SELECT;
986 /* select is not restarted after signals... */
987 if (error == ERESTART)
988 error = EINTR;
989 if (error == EWOULDBLOCK)
990 error = 0;
991 nw = howmany(uap->nd, NFDBITS);
992 ni = nw * sizeof(fd_mask);
993
994 #define putbits(name, x) \
995 do { \
996 if (uap->name && (error2 = copyout((caddr_t)&sel->obits[(x) * nw], \
997 (caddr_t)uap->name, ni))) \
998 error = error2; \
999 } while (0)
1000
1001 if (error == 0) {
1002 int error2;
1003
1004 putbits(in, 0);
1005 putbits(ou, 1);
1006 putbits(ex, 2);
1007 #undef putbits
1008 }
1009 return(error);
1010 }
1011
1012 static int
1013 selscan(p, sel, nfd, retval, sel_pass)
1014 struct proc *p;
1015 struct _select *sel;
1016 int nfd;
1017 register_t *retval;
1018 int sel_pass;
1019 {
1020 register struct filedesc *fdp = p->p_fd;
1021 register int msk, i, j, fd;
1022 register u_int32_t bits;
1023 struct file *fp;
1024 int n = 0;
1025 int nc = 0;
1026 static int flag[3] = { FREAD, FWRITE, 0 };
1027 u_int32_t *iptr, *optr;
1028 u_int nw;
1029 u_int32_t *ibits, *obits;
1030 char * wql;
1031 int nfunnel = 0;
1032 int count, nfcount;
1033 char * wql_ptr;
1034
1035 /*
1036 * Problems when reboot; due to MacOSX signal probs
1037 * in Beaker1C ; verify that the p->p_fd is valid
1038 */
1039 if (fdp == NULL) {
1040 *retval=0;
1041 return(EIO);
1042 }
1043
1044 ibits = sel->ibits;
1045 obits = sel->obits;
1046 wql = sel->wql;
1047
1048 count = sel->count;
1049 nfcount = sel->nfcount;
1050
1051 if (nfcount > count)
1052 panic("selcount count<nfcount");
1053
1054 nw = howmany(nfd, NFDBITS);
1055
1056 nc = 0;
1057 if ( nfcount < count) {
1058 /* some or all in kernel funnel */
1059 for (msk = 0; msk < 3; msk++) {
1060 iptr = (u_int32_t *)&ibits[msk * nw];
1061 optr = (u_int32_t *)&obits[msk * nw];
1062 for (i = 0; i < nfd; i += NFDBITS) {
1063 bits = iptr[i/NFDBITS];
1064 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1065 bits &= ~(1 << j);
1066 fp = fdp->fd_ofiles[fd];
1067 if (fp == NULL ||
1068 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1069 return(EBADF);
1070 }
1071 if (sel_pass == SEL_SECONDPASS)
1072 wql_ptr = (char *)0;
1073 else
1074 wql_ptr = (wql+ nc * SIZEOF_WAITQUEUE_LINK);
1075 if (fp->f_ops && (fp->f_type != DTYPE_SOCKET)
1076 && fo_select(fp, flag[msk], wql_ptr, p)) {
1077 optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
1078 n++;
1079 }
1080 nc++;
1081 }
1082 }
1083 }
1084 }
1085
1086 if (nfcount) {
1087 /* socket file descriptors for scan */
1088 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
1089
1090 nc = 0;
1091 for (msk = 0; msk < 3; msk++) {
1092 iptr = (u_int32_t *)&ibits[msk * nw];
1093 optr = (u_int32_t *)&obits[msk * nw];
1094 for (i = 0; i < nfd; i += NFDBITS) {
1095 bits = iptr[i/NFDBITS];
1096 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1097 bits &= ~(1 << j);
1098 fp = fdp->fd_ofiles[fd];
1099 if (fp == NULL ||
1100 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1101 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
1102 return(EBADF);
1103 }
1104 if (sel_pass == SEL_SECONDPASS)
1105 wql_ptr = (char *)0;
1106 else
1107 wql_ptr = (wql+ nc * SIZEOF_WAITQUEUE_LINK);
1108 if (fp->f_ops && (fp->f_type == DTYPE_SOCKET) &&
1109 fo_select(fp, flag[msk], wql_ptr, p)) {
1110 optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
1111 n++;
1112 }
1113 nc++;
1114 }
1115 }
1116 }
1117 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
1118 }
1119
1120 *retval = n;
1121 return (0);
1122 }
1123
1124 /*ARGSUSED*/
1125 int
1126 seltrue(dev, flag, p)
1127 dev_t dev;
1128 int flag;
1129 struct proc *p;
1130 {
1131
1132 return (1);
1133 }
1134
1135 static int
1136 selcount(p, ibits, obits, nfd, count, nfcount)
1137 struct proc *p;
1138 u_int32_t *ibits, *obits;
1139 int nfd;
1140 int *count;
1141 int *nfcount;
1142 {
1143 register struct filedesc *fdp = p->p_fd;
1144 register int msk, i, j, fd;
1145 register u_int32_t bits;
1146 struct file *fp;
1147 int n = 0;
1148 int nc = 0;
1149 int nfc = 0;
1150 static int flag[3] = { FREAD, FWRITE, 0 };
1151 u_int32_t *iptr, *fptr, *fbits;
1152 u_int nw;
1153
1154 /*
1155 * Problems when reboot; due to MacOSX signal probs
1156 * in Beaker1C ; verify that the p->p_fd is valid
1157 */
1158 if (fdp == NULL) {
1159 *count=0;
1160 *nfcount=0;
1161 return(EIO);
1162 }
1163
1164 nw = howmany(nfd, NFDBITS);
1165
1166
1167 for (msk = 0; msk < 3; msk++) {
1168 iptr = (u_int32_t *)&ibits[msk * nw];
1169 for (i = 0; i < nfd; i += NFDBITS) {
1170 bits = iptr[i/NFDBITS];
1171 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1172 bits &= ~(1 << j);
1173 fp = fdp->fd_ofiles[fd];
1174 if (fp == NULL ||
1175 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1176 *count=0;
1177 *nfcount=0;
1178 return(EBADF);
1179 }
1180 if (fp->f_type == DTYPE_SOCKET)
1181 nfc++;
1182 n++;
1183 }
1184 }
1185 }
1186 *count = n;
1187 *nfcount = nfc;
1188 return (0);
1189 }
1190
1191 /*
1192 * Record a select request.
1193 */
1194 void
1195 selrecord(selector, sip, p_wql)
1196 struct proc *selector;
1197 struct selinfo *sip;
1198 void * p_wql;
1199 {
1200 thread_act_t cur_act = current_act();
1201 struct uthread * ut = get_bsdthread_info(cur_act);
1202
1203 /* need to look at collisions */
1204
1205 if ((p_wql == (void *)0) && ((sip->si_flags & SI_INITED) == 0)) {
1206 return;
1207 }
1208
1209 /*do not record if this is second pass of select */
1210 if((p_wql == (void *)0)) {
1211 return;
1212 }
1213
1214 if ((sip->si_flags & SI_INITED) == 0) {
1215 wait_queue_init(&sip->wait_queue, SYNC_POLICY_FIFO);
1216 sip->si_flags |= SI_INITED;
1217 sip->si_flags &= ~SI_CLEAR;
1218 }
1219
1220 if (sip->si_flags & SI_RECORDED) {
1221 sip->si_flags |= SI_COLL;
1222 } else
1223 sip->si_flags &= ~SI_COLL;
1224
1225 sip->si_flags |= SI_RECORDED;
1226 if (!wait_queue_member(&sip->wait_queue, ut->uu_wqsub))
1227 wait_queue_link_noalloc(&sip->wait_queue, ut->uu_wqsub, (wait_queue_link_t)p_wql);
1228
1229 return;
1230 }
1231
1232 void
1233 selwakeup(sip)
1234 register struct selinfo *sip;
1235 {
1236
1237 if ((sip->si_flags & SI_INITED) == 0) {
1238 return;
1239 }
1240
1241 if (sip->si_flags & SI_COLL) {
1242 nselcoll++;
1243 sip->si_flags &= ~SI_COLL;
1244 #if 0
1245 /* will not support */
1246 //wakeup((caddr_t)&selwait);
1247 #endif
1248 }
1249
1250 if (sip->si_flags & SI_RECORDED) {
1251 wait_queue_wakeup_all(&sip->wait_queue, &selwait, THREAD_AWAKENED);
1252 sip->si_flags &= ~SI_RECORDED;
1253 }
1254
1255 }
1256
1257 void
1258 selthreadclear(sip)
1259 register struct selinfo *sip;
1260 {
1261
1262 if ((sip->si_flags & SI_INITED) == 0) {
1263 return;
1264 }
1265 if (sip->si_flags & SI_RECORDED) {
1266 selwakeup(sip);
1267 sip->si_flags &= ~(SI_RECORDED | SI_COLL);
1268 }
1269 sip->si_flags |= SI_CLEAR;
1270 wait_queue_unlinkall_nofree(&sip->wait_queue);
1271 }
1272
1273
1274 extern struct eventqelt *evprocdeque(struct proc *p, struct eventqelt *eqp);
1275
1276 /*
1277 * called upon socket close. deque and free all events for
1278 * the socket
1279 */
1280 void
1281 evsofree(struct socket *sp)
1282 {
1283 struct eventqelt *eqp, *next;
1284
1285 if (sp == NULL) return;
1286
1287 for (eqp = sp->so_evlist.tqh_first; eqp != NULL; eqp = next) {
1288 next = eqp->ee_slist.tqe_next;
1289 evprocdeque(eqp->ee_proc, eqp); // remove from proc q if there
1290 TAILQ_REMOVE(&sp->so_evlist, eqp, ee_slist); // remove from socket q
1291 FREE(eqp, M_TEMP);
1292 }
1293 }
1294
1295
1296 #define DBG_EVENT 0x10
1297
1298 #define DBG_POST 0x10
1299 #define DBG_WATCH 0x11
1300 #define DBG_WAIT 0x12
1301 #define DBG_MOD 0x13
1302 #define DBG_EWAKEUP 0x14
1303 #define DBG_ENQUEUE 0x15
1304 #define DBG_DEQUEUE 0x16
1305
1306 #define DBG_MISC_POST MISCDBG_CODE(DBG_EVENT,DBG_POST)
1307 #define DBG_MISC_WATCH MISCDBG_CODE(DBG_EVENT,DBG_WATCH)
1308 #define DBG_MISC_WAIT MISCDBG_CODE(DBG_EVENT,DBG_WAIT)
1309 #define DBG_MISC_MOD MISCDBG_CODE(DBG_EVENT,DBG_MOD)
1310 #define DBG_MISC_EWAKEUP MISCDBG_CODE(DBG_EVENT,DBG_EWAKEUP)
1311 #define DBG_MISC_ENQUEUE MISCDBG_CODE(DBG_EVENT,DBG_ENQUEUE)
1312 #define DBG_MISC_DEQUEUE MISCDBG_CODE(DBG_EVENT,DBG_DEQUEUE)
1313
1314
1315 /*
1316 * enque this event if it's not already queued. wakeup
1317 the proc if we do queue this event to it.
1318 */
1319 void
1320 evprocenque(struct eventqelt *eqp)
1321 {
1322 struct proc *p;
1323
1324 assert(eqp);
1325 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_START, eqp, eqp->ee_flags, eqp->ee_eventmask,0,0);
1326 if (eqp->ee_flags & EV_QUEUED) {
1327 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0);
1328 return;
1329 }
1330 eqp->ee_flags |= EV_QUEUED;
1331 eqp->ee_eventmask = 0; // disarm
1332 p = eqp->ee_proc;
1333 TAILQ_INSERT_TAIL(&p->p_evlist, eqp, ee_plist);
1334 KERNEL_DEBUG(DBG_MISC_EWAKEUP,0,0,0,eqp,0);
1335 wakeup(&p->p_evlist);
1336 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0);
1337 }
1338
1339 /*
1340 * given either a sockbuf or a socket run down the
1341 * event list and queue ready events found
1342 */
1343 void
1344 postevent(struct socket *sp, struct sockbuf *sb, int event)
1345 {
1346 int mask;
1347 struct eventqelt *evq;
1348 register struct tcpcb *tp;
1349
1350 if (sb) sp = sb->sb_so;
1351 if (!sp || sp->so_evlist.tqh_first == NULL) return;
1352
1353 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, event,0,0,0,0);
1354
1355 for (evq = sp->so_evlist.tqh_first;
1356 evq != NULL; evq = evq->ee_slist.tqe_next) {
1357
1358 mask = 0;
1359
1360 /* ready for reading:
1361 - byte cnt >= receive low water mark
1362 - read-half of conn closed
1363 - conn pending for listening sock
1364 - socket error pending
1365
1366 ready for writing
1367 - byte cnt avail >= send low water mark
1368 - write half of conn closed
1369 - socket error pending
1370 - non-blocking conn completed successfully
1371
1372 exception pending
1373 - out of band data
1374 - sock at out of band mark
1375
1376 */
1377 switch (event & EV_DMASK) {
1378
1379 case EV_RWBYTES:
1380 case EV_OOB:
1381 case EV_RWBYTES|EV_OOB:
1382 if (event & EV_OOB) {
1383 if ((evq->ee_eventmask & EV_EX)) {
1384 if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK))) {
1385 mask |= EV_EX|EV_OOB;
1386 }
1387 }
1388 }
1389 if (event & EV_RWBYTES) {
1390 if ((evq->ee_eventmask & EV_RE) && soreadable(sp)) {
1391 if ((sp->so_type == SOCK_STREAM) && (sp->so_error == ECONNREFUSED) ||
1392 (sp->so_error == ECONNRESET)) {
1393 if ((sp->so_pcb == 0) ||
1394 !(tp = sototcpcb(sp)) ||
1395 (tp->t_state == TCPS_CLOSED)) {
1396 mask |= EV_RE|EV_RESET;
1397 break;
1398 }
1399 }
1400 if (sp->so_state & SS_CANTRCVMORE) {
1401 mask |= EV_RE|EV_FIN;
1402 evq->ee_req.er_rcnt = sp->so_rcv.sb_cc;
1403 break;
1404 }
1405 mask |= EV_RE;
1406 evq->ee_req.er_rcnt = sp->so_rcv.sb_cc;
1407 }
1408
1409 if ((evq->ee_eventmask & EV_WR) && sowriteable(sp)) {
1410 if ((sp->so_type == SOCK_STREAM) &&(sp->so_error == ECONNREFUSED) ||
1411 (sp->so_error == ECONNRESET)) {
1412 if ((sp->so_pcb == 0) ||
1413 !(tp = sototcpcb(sp)) ||
1414 (tp->t_state == TCPS_CLOSED)) {
1415 mask |= EV_WR|EV_RESET;
1416 break;
1417 }
1418 }
1419 mask |= EV_WR;
1420 evq->ee_req.er_wcnt = sbspace(&sp->so_snd);
1421 }
1422 }
1423 break;
1424
1425 case EV_RCONN:
1426 if ((evq->ee_eventmask & EV_RE)) {
1427 evq->ee_req.er_rcnt = sp->so_qlen + 1; // incl this one
1428 mask |= EV_RE|EV_RCONN;
1429 }
1430 break;
1431
1432 case EV_WCONN:
1433 if ((evq->ee_eventmask & EV_WR)) {
1434 mask |= EV_WR|EV_WCONN;
1435 }
1436 break;
1437
1438 case EV_RCLOSED:
1439 if ((evq->ee_eventmask & EV_RE)) {
1440 mask |= EV_RE|EV_RCLOSED;
1441 }
1442 break;
1443
1444 case EV_WCLOSED:
1445 if ((evq->ee_eventmask & EV_WR)) {
1446 mask |= EV_WR|EV_WCLOSED;
1447 }
1448 break;
1449
1450 case EV_FIN:
1451 if (evq->ee_eventmask & EV_RE) {
1452 mask |= EV_RE|EV_FIN;
1453 }
1454 break;
1455
1456 case EV_RESET:
1457 case EV_TIMEOUT:
1458 if (evq->ee_eventmask & EV_RE) {
1459 mask |= EV_RE | event;
1460 }
1461 if (evq->ee_eventmask & EV_WR) {
1462 mask |= EV_WR | event;
1463 }
1464 break;
1465
1466 default:
1467 return;
1468 } /* switch */
1469
1470 if (mask) {
1471 evq->ee_req.er_eventbits |= mask;
1472 KERNEL_DEBUG(DBG_MISC_POST, evq, evq->ee_req.er_eventbits, mask,0,0);
1473 evprocenque(evq);
1474 }
1475 }
1476 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, 0,0,0,0,0);
1477 }
1478
1479 /*
1480 * remove and return the first event (eqp=NULL) or a specific
1481 * event, or return NULL if no events found
1482 */
1483 struct eventqelt *
1484 evprocdeque(struct proc *p, struct eventqelt *eqp)
1485 {
1486
1487 KERNEL_DEBUG(DBG_MISC_DEQUEUE|DBG_FUNC_START,p,eqp,0,0,0);
1488
1489 if (eqp && ((eqp->ee_flags & EV_QUEUED) == NULL)) {
1490 KERNEL_DEBUG(DBG_MISC_DEQUEUE|DBG_FUNC_END,0,0,0,0,0);
1491 return(NULL);
1492 }
1493 if (p->p_evlist.tqh_first == NULL) {
1494 KERNEL_DEBUG(DBG_MISC_DEQUEUE|DBG_FUNC_END,0,0,0,0,0);
1495 return(NULL);
1496 }
1497 if (eqp == NULL) { // remove first
1498 eqp = p->p_evlist.tqh_first;
1499 }
1500 TAILQ_REMOVE(&p->p_evlist, eqp, ee_plist);
1501 eqp->ee_flags &= ~EV_QUEUED;
1502 KERNEL_DEBUG(DBG_MISC_DEQUEUE|DBG_FUNC_END,eqp,0,0,0,0);
1503 return(eqp);
1504 }
1505
1506 struct evwatch_args {
1507 struct eventreq *u_req;
1508 int u_eventmask;
1509 };
1510
1511
1512 /*
1513 * watchevent system call. user passes us an event to watch
1514 * for. we malloc an event object, initialize it, and queue
1515 * it to the open socket. when the event occurs, postevent()
1516 * will enque it back to our proc where we can retrieve it
1517 * via waitevent().
1518 *
1519 * should this prevent duplicate events on same socket?
1520 */
1521 int
1522 watchevent(p, uap, retval)
1523 struct proc *p;
1524 struct evwatch_args *uap;
1525 register_t *retval;
1526 {
1527 struct eventqelt *eqp = (struct eventqelt *)0;
1528 struct eventqelt *np;
1529 struct eventreq *erp;
1530 struct file *fp;
1531 struct socket *sp;
1532 int error;
1533
1534 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_START, 0,0,0,0,0);
1535
1536 // get a qelt and fill with users req
1537 MALLOC(eqp, struct eventqelt *, sizeof(struct eventqelt), M_TEMP, M_WAITOK);
1538 if (!eqp) panic("can't MALLOC eqp");
1539 erp = &eqp->ee_req;
1540 // get users request pkt
1541 if (error = copyin((caddr_t)uap->u_req, (caddr_t)erp,
1542 sizeof(struct eventreq))) {
1543 FREE(eqp, M_TEMP);
1544 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0);
1545 return(error);
1546 }
1547 KERNEL_DEBUG(DBG_MISC_WATCH, erp->er_handle,uap->u_eventmask,eqp,0,0);
1548 // validate, freeing qelt if errors
1549 error = 0;
1550 if (erp->er_type != EV_FD) {
1551 error = EINVAL;
1552 } else if (erp->er_handle < 0) {
1553 error = EBADF;
1554 } else if (erp->er_handle > p->p_fd->fd_nfiles) {
1555 error = EBADF;
1556 } else if ((fp = *fdfile(p, erp->er_handle)) == NULL) {
1557 error = EBADF;
1558 } else if (fp->f_type != DTYPE_SOCKET) {
1559 error = EINVAL;
1560 }
1561 if (error) {
1562 FREE(eqp,M_TEMP);
1563 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0);
1564 return(error);
1565 }
1566
1567 erp->er_rcnt = erp->er_wcnt = erp->er_eventbits = 0;
1568 eqp->ee_proc = p;
1569 eqp->ee_eventmask = uap->u_eventmask & EV_MASK;
1570 eqp->ee_flags = 0;
1571
1572 sp = (struct socket *)fp->f_data;
1573 assert(sp != NULL);
1574
1575 // only allow one watch per file per proc
1576 for (np = sp->so_evlist.tqh_first; np != NULL; np = np->ee_slist.tqe_next) {
1577 if (np->ee_proc == p) {
1578 FREE(eqp,M_TEMP);
1579 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0);
1580 return(EINVAL);
1581 }
1582 }
1583
1584 TAILQ_INSERT_TAIL(&sp->so_evlist, eqp, ee_slist);
1585 postevent(sp, 0, EV_RWBYTES); // catch existing events
1586 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, 0,0,0,0,0);
1587 return(0);
1588 }
1589
1590 struct evwait_args {
1591 struct eventreq *u_req;
1592 struct timeval *tv;
1593 };
1594
1595 /*
1596 * waitevent system call.
1597 * grabs the next waiting event for this proc and returns
1598 * it. if no events, user can request to sleep with timeout
1599 * or poll mode (tv=NULL);
1600 */
1601 int
1602 waitevent(p, uap, retval)
1603 struct proc *p;
1604 struct evwait_args *uap;
1605 register_t *retval;
1606 {
1607 int error = 0;
1608 struct eventqelt *eqp;
1609 uint64_t abstime, interval;
1610
1611 if (uap->tv) {
1612 struct timeval atv;
1613
1614 error = copyin((caddr_t)uap->tv, (caddr_t)&atv, sizeof (atv));
1615 if (error)
1616 return(error);
1617 if (itimerfix(&atv)) {
1618 error = EINVAL;
1619 return(error);
1620 }
1621
1622 interval = tvtoabstime(&atv);
1623 }
1624 else
1625 abstime = interval = 0;
1626
1627 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_START, 0,0,0,0,0);
1628
1629 retry:
1630 if ((eqp = evprocdeque(p,NULL)) != NULL) {
1631 error = copyout((caddr_t)&eqp->ee_req,
1632 (caddr_t)uap->u_req, sizeof(struct eventreq));
1633 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,
1634 eqp->ee_req.er_handle,eqp->ee_req.er_eventbits,eqp,0);
1635
1636 return (error);
1637 }
1638 else {
1639 if (uap->tv && interval == 0) {
1640 *retval = 1; // poll failed
1641 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,0,0,0,0);
1642
1643 return (error);
1644 }
1645
1646 if (interval != 0)
1647 clock_absolutetime_interval_to_deadline(interval, &abstime)
1648
1649 KERNEL_DEBUG(DBG_MISC_WAIT, 1,&p->p_evlist,0,0,0);
1650 error = tsleep1(&p->p_evlist, PSOCK | PCATCH,
1651 "waitevent", abstime, (int (*)(int))0);
1652 KERNEL_DEBUG(DBG_MISC_WAIT, 2,&p->p_evlist,0,0,0);
1653 if (error == 0)
1654 goto retry;
1655 if (error == ERESTART)
1656 error = EINTR;
1657 if (error == EWOULDBLOCK) {
1658 *retval = 1;
1659 error = 0;
1660 }
1661 }
1662
1663 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, 0,0,0,0,0);
1664
1665 return (error);
1666 }
1667
1668 struct modwatch_args {
1669 struct eventreq *u_req;
1670 int u_eventmask;
1671 };
1672
1673 /*
1674 * modwatch system call. user passes in event to modify.
1675 * if we find it we reset the event bits and que/deque event
1676 * it needed.
1677 */
1678 int
1679 modwatch(p, uap, retval)
1680 struct proc *p;
1681 struct modwatch_args *uap;
1682 register_t *retval;
1683 {
1684 struct eventreq er;
1685 struct eventreq *erp = &er;
1686 struct eventqelt *evq;
1687 int error;
1688 struct file *fp;
1689 struct socket *sp;
1690 int flag;
1691
1692 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_START, 0,0,0,0,0);
1693
1694 // get users request pkt
1695 if (error = copyin((caddr_t)uap->u_req, (caddr_t)erp,
1696 sizeof(struct eventreq))) return(error);
1697
1698 if (erp->er_type != EV_FD) return(EINVAL);
1699 if (erp->er_handle < 0) return(EBADF);
1700 if (erp->er_handle > p->p_fd->fd_nfiles) return(EBADF);
1701 if ((fp = *fdfile(p, erp->er_handle)) == NULL)
1702 return(EBADF);
1703 if (fp->f_type != DTYPE_SOCKET) return(EINVAL); // for now must be sock
1704 sp = (struct socket *)fp->f_data;
1705 assert(sp != NULL);
1706
1707
1708 // locate event if possible
1709 for (evq = sp->so_evlist.tqh_first;
1710 evq != NULL; evq = evq->ee_slist.tqe_next) {
1711 if (evq->ee_proc == p) break;
1712 }
1713
1714 if (evq == NULL) {
1715 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, EINVAL,0,0,0,0);
1716 return(EINVAL);
1717 }
1718 KERNEL_DEBUG(DBG_MISC_MOD, erp->er_handle,uap->u_eventmask,evq,0,0);
1719
1720 if (uap->u_eventmask == EV_RM) {
1721 evprocdeque(p, evq);
1722 TAILQ_REMOVE(&sp->so_evlist, evq, ee_slist);
1723 FREE(evq, M_TEMP);
1724 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, 0,0,0,0,0);
1725 return(0);
1726 }
1727
1728 switch (uap->u_eventmask & EV_MASK) {
1729
1730 case 0:
1731 flag = 0;
1732 break;
1733
1734 case EV_RE:
1735 case EV_WR:
1736 case EV_RE|EV_WR:
1737 flag = EV_RWBYTES;
1738 break;
1739
1740 case EV_EX:
1741 flag = EV_OOB;
1742 break;
1743
1744 case EV_EX|EV_RE:
1745 case EV_EX|EV_WR:
1746 case EV_EX|EV_RE|EV_WR:
1747 flag = EV_OOB|EV_RWBYTES;
1748 break;
1749
1750 default:
1751 return(EINVAL);
1752 }
1753
1754 evq->ee_eventmask = uap->u_eventmask & EV_MASK;
1755 evprocdeque(p, evq);
1756 evq->ee_req.er_eventbits = 0;
1757 postevent(sp, 0, flag);
1758 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, evq->ee_req.er_handle,evq->ee_eventmask,sp,flag,0);
1759 return(0);
1760 }