]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/sys_generic.c
xnu-344.49.tar.gz
[apple/xnu.git] / bsd / kern / sys_generic.c
CommitLineData
1c79356b 1/*
9bccf70c 2 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
1c79356b
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
43866e37 6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
1c79356b 7 *
43866e37
A
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
43866e37
A
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
1c79356b
A
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
26/*
27 * Copyright (c) 1982, 1986, 1989, 1993
28 * The Regents of the University of California. All rights reserved.
29 * (c) UNIX System Laboratories, Inc.
30 * All or some portions of this file are derived from material licensed
31 * to the University of California by American Telephone and Telegraph
32 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
33 * the permission of UNIX System Laboratories, Inc.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
64 */
65
66#include <sys/param.h>
67#include <sys/systm.h>
68#include <sys/filedesc.h>
69#include <sys/ioctl.h>
70#include <sys/file.h>
71#include <sys/proc.h>
72#include <sys/socketvar.h>
73#include <sys/uio.h>
74#include <sys/kernel.h>
75#include <sys/stat.h>
76#include <sys/malloc.h>
77
1c79356b
A
78#include <sys/mount.h>
79#include <sys/protosw.h>
80#include <sys/ev.h>
81#include <sys/user.h>
82#include <sys/kdebug.h>
83#include <kern/assert.h>
84#include <kern/thread_act.h>
85
86#include <sys/mbuf.h>
87#include <sys/socket.h>
88#include <sys/socketvar.h>
89#include <sys/errno.h>
90
91#include <net/if.h>
92#include <net/route.h>
93
94#include <netinet/in.h>
95#include <netinet/in_systm.h>
96#include <netinet/ip.h>
97#include <netinet/in_pcb.h>
98#include <netinet/ip_var.h>
99#include <netinet/ip6.h>
100#include <netinet/tcp.h>
101#include <netinet/tcp_fsm.h>
102#include <netinet/tcp_seq.h>
103#include <netinet/tcp_timer.h>
104#include <netinet/tcp_var.h>
105#include <netinet/tcpip.h>
106#include <netinet/tcp_debug.h>
0b4e3aa0
A
107/* for wait queue based select */
108#include <kern/wait_queue.h>
9bccf70c
A
109#if KTRACE
110#include <sys/ktrace.h>
111#endif
112
113static int dofileread __P((struct proc *, struct file *, int, void *,
114 size_t, off_t, int, int*));
115static int dofilewrite __P((struct proc *, struct file *, int,
116 const void *, size_t, off_t, int, int*));
117
118static struct file*
119holdfp(fdp, fd, flag)
120 struct filedesc* fdp;
121 int fd, flag;
122{
123 struct file* fp;
124
125 if (((u_int)fd) >= fdp->fd_nfiles ||
126 (fp = fdp->fd_ofiles[fd]) == NULL ||
127 (fp->f_flag & flag) == 0) {
128 return (NULL);
129 }
d7e50217
A
130 if (fref(fp) == -1)
131 return (NULL);
9bccf70c
A
132 return (fp);
133}
1c79356b
A
134
135/*
136 * Read system call.
137 */
9bccf70c 138#ifndef _SYS_SYSPROTO_H_
1c79356b
A
139struct read_args {
140 int fd;
141 char *cbuf;
142 u_int nbyte;
143};
9bccf70c
A
144#endif
145int
1c79356b
A
146read(p, uap, retval)
147 struct proc *p;
148 register struct read_args *uap;
149 register_t *retval;
9bccf70c
A
150{
151 register struct file *fp;
152 int error;
153
154 if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL)
155 return (EBADF);
156 error = dofileread(p, fp, uap->fd, uap->cbuf, uap->nbyte,
157 (off_t)-1, 0, retval);
158 frele(fp);
159 return(error);
160}
161
162/*
163 * Pread system call
164 */
165#ifndef _SYS_SYSPROTO_H_
166struct pread_args {
167 int fd;
168 void *buf;
169 size_t nbyte;
170#ifdef DOUBLE_ALIGN_PARAMS
171 int pad;
172#endif
173 off_t offset;
174};
175#endif
176int
177pread(p, uap, retval)
178 struct proc *p;
179 register struct pread_args *uap;
180 int *retval;
181{
182 register struct file *fp;
183 int error;
184
185 if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL)
186 return (EBADF);
187 if (fp->f_type != DTYPE_VNODE) {
188 error = ESPIPE;
189 } else {
190 error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte,
191 uap->offset, FOF_OFFSET, retval);
192 }
193 frele(fp);
194 return(error);
195}
196
197/*
198 * Code common for read and pread
199 */
200int
201dofileread(p, fp, fd, buf, nbyte, offset, flags, retval)
202 struct proc *p;
203 struct file *fp;
204 int fd, flags;
205 void *buf;
206 size_t nbyte;
207 off_t offset;
208 int *retval;
1c79356b
A
209{
210 struct uio auio;
211 struct iovec aiov;
9bccf70c
A
212 long cnt, error = 0;
213#if KTRACE
214 struct iovec ktriov;
215 struct uio ktruio;
216 int didktr = 0;
217#endif
1c79356b 218
9bccf70c
A
219 aiov.iov_base = (caddr_t)buf;
220 aiov.iov_len = nbyte;
1c79356b
A
221 auio.uio_iov = &aiov;
222 auio.uio_iovcnt = 1;
9bccf70c
A
223 auio.uio_offset = offset;
224 if (nbyte > INT_MAX)
225 return (EINVAL);
226 auio.uio_resid = nbyte;
1c79356b 227 auio.uio_rw = UIO_READ;
9bccf70c
A
228 auio.uio_segflg = UIO_USERSPACE;
229 auio.uio_procp = p;
230#if KTRACE
231 /*
232 * if tracing, save a copy of iovec
233 */
234 if (KTRPOINT(p, KTR_GENIO)) {
235 ktriov = aiov;
236 ktruio = auio;
237 didktr = 1;
238 }
239#endif
240 cnt = nbyte;
241
242 if ((error = fo_read(fp, &auio, fp->f_cred, flags, p))) {
243 if (auio.uio_resid != cnt && (error == ERESTART ||
244 error == EINTR || error == EWOULDBLOCK))
245 error = 0;
246 }
247 cnt -= auio.uio_resid;
248#if KTRACE
249 if (didktr && error == 0) {
250 ktruio.uio_iov = &ktriov;
251 ktruio.uio_resid = cnt;
252 ktrgenio(p->p_tracep, fd, UIO_READ, &ktruio, error,
253 KERNEL_FUNNEL);
254 }
255#endif
256 *retval = cnt;
257 return (error);
1c79356b
A
258}
259
9bccf70c
A
260/*
261 * Scatter read system call.
262 */
263#ifndef _SYS_SYSPROTO_H_
1c79356b
A
264struct readv_args {
265 int fd;
266 struct iovec *iovp;
267 u_int iovcnt;
268};
9bccf70c
A
269#endif
270int
1c79356b
A
271readv(p, uap, retval)
272 struct proc *p;
273 register struct readv_args *uap;
274 int *retval;
275{
276 struct uio auio;
277 register struct iovec *iov;
278 int error;
279 struct iovec aiov[UIO_SMALLIOV];
280
281 if (uap->iovcnt > UIO_SMALLIOV) {
282 if (uap->iovcnt > UIO_MAXIOV)
283 return (EINVAL);
284 if ((iov = (struct iovec *)
285 kalloc(sizeof(struct iovec) * (uap->iovcnt))) == 0)
286 return (ENOMEM);
287 } else
288 iov = aiov;
289 auio.uio_iov = iov;
290 auio.uio_iovcnt = uap->iovcnt;
291 auio.uio_rw = UIO_READ;
292 error = copyin((caddr_t)uap->iovp, (caddr_t)iov,
293 uap->iovcnt * sizeof (struct iovec));
294 if (!error)
295 error = rwuio(p, uap->fd, &auio, UIO_READ, retval);
296 if (uap->iovcnt > UIO_SMALLIOV)
297 kfree(iov, sizeof(struct iovec)*uap->iovcnt);
298 return (error);
299}
300
301/*
302 * Write system call
303 */
9bccf70c 304#ifndef _SYS_SYSPROTO_H_
1c79356b
A
305struct write_args {
306 int fd;
307 char *cbuf;
308 u_int nbyte;
309};
9bccf70c
A
310#endif
311int
1c79356b
A
312write(p, uap, retval)
313 struct proc *p;
314 register struct write_args *uap;
315 int *retval;
316{
9bccf70c
A
317 register struct file *fp;
318 int error;
319
320 if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL)
321 return (EBADF);
322 error = dofilewrite(p, fp, uap->fd, uap->cbuf, uap->nbyte,
323 (off_t)-1, 0, retval);
324 frele(fp);
325 return(error);
326}
327
328/*
329 * Pwrite system call
330 */
331#ifndef _SYS_SYSPROTO_H_
332struct pwrite_args {
333 int fd;
334 const void *buf;
335 size_t nbyte;
336#ifdef DOUBLE_ALIGN_PARAMS
337 int pad;
338#endif
339 off_t offset;
340};
341#endif
342int
343pwrite(p, uap, retval)
344 struct proc *p;
345 register struct pwrite_args *uap;
346 int *retval;
347{
348 register struct file *fp;
349 int error;
350
351 if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL)
352 return (EBADF);
353 if (fp->f_type != DTYPE_VNODE) {
354 error = ESPIPE;
355 } else {
356 error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte,
357 uap->offset, FOF_OFFSET, retval);
358 }
359 frele(fp);
360 return(error);
361}
362
363static int
364dofilewrite(p, fp, fd, buf, nbyte, offset, flags, retval)
365 struct proc *p;
366 struct file *fp;
367 int fd, flags;
368 const void *buf;
369 size_t nbyte;
370 off_t offset;
371 int *retval;
372{
1c79356b
A
373 struct uio auio;
374 struct iovec aiov;
9bccf70c
A
375 long cnt, error = 0;
376#if KTRACE
377 struct iovec ktriov;
378 struct uio ktruio;
379 int didktr = 0;
380#endif
381
382 aiov.iov_base = (void *)(uintptr_t)buf;
383 aiov.iov_len = nbyte;
1c79356b 384 auio.uio_iov = &aiov;
9bccf70c
A
385 auio.uio_iovcnt = 1;
386 auio.uio_offset = offset;
387 if (nbyte > INT_MAX)
388 return (EINVAL);
389 auio.uio_resid = nbyte;
1c79356b 390 auio.uio_rw = UIO_WRITE;
9bccf70c
A
391 auio.uio_segflg = UIO_USERSPACE;
392 auio.uio_procp = p;
393#if KTRACE
394 /*
395 * if tracing, save a copy of iovec and uio
396 */
397 if (KTRPOINT(p, KTR_GENIO)) {
398 ktriov = aiov;
399 ktruio = auio;
400 didktr = 1;
401 }
402#endif
403 cnt = nbyte;
404 if (fp->f_type == DTYPE_VNODE)
405 bwillwrite();
406 if ((error = fo_write(fp, &auio, fp->f_cred, flags, p))) {
407 if (auio.uio_resid != cnt && (error == ERESTART ||
408 error == EINTR || error == EWOULDBLOCK))
409 error = 0;
410 if (error == EPIPE)
411 psignal(p, SIGPIPE);
412 }
413 cnt -= auio.uio_resid;
414#if KTRACE
415 if (didktr && error == 0) {
416 ktruio.uio_iov = &ktriov;
417 ktruio.uio_resid = cnt;
418 ktrgenio(p->p_tracep, fd, UIO_WRITE, &ktruio, error,
419 KERNEL_FUNNEL);
420 }
421#endif
422 *retval = cnt;
423 return (error);
1c79356b 424}
9bccf70c
A
425
426/*
427 * Gather write system call
428 */
429#ifndef _SYS_SYSPROTO_H_
1c79356b
A
430struct writev_args {
431 int fd;
432 struct iovec *iovp;
433 u_int iovcnt;
434};
9bccf70c
A
435#endif
436int
1c79356b
A
437writev(p, uap, retval)
438 struct proc *p;
439 register struct writev_args *uap;
440 int *retval;
441{
442 struct uio auio;
443 register struct iovec *iov;
444 int error;
445 struct iovec aiov[UIO_SMALLIOV];
446
447 if (uap->iovcnt > UIO_SMALLIOV) {
448 if (uap->iovcnt > UIO_MAXIOV)
449 return (EINVAL);
450 if ((iov = (struct iovec *)
451 kalloc(sizeof(struct iovec) * (uap->iovcnt))) == 0)
452 return (ENOMEM);
453 } else
454 iov = aiov;
455 auio.uio_iov = iov;
456 auio.uio_iovcnt = uap->iovcnt;
457 auio.uio_rw = UIO_WRITE;
458 error = copyin((caddr_t)uap->iovp, (caddr_t)iov,
459 uap->iovcnt * sizeof (struct iovec));
460 if (!error)
461 error = rwuio(p, uap->fd, &auio, UIO_WRITE, retval);
462 if (uap->iovcnt > UIO_SMALLIOV)
463 kfree(iov, sizeof(struct iovec)*uap->iovcnt);
464 return (error);
465}
466
9bccf70c 467int
1c79356b
A
468rwuio(p, fdes, uio, rw, retval)
469 struct proc *p;
470 int fdes;
471 register struct uio *uio;
472 enum uio_rw rw;
473 int *retval;
474{
475 struct file *fp;
476 register struct iovec *iov;
477 int i, count, flag, error;
9bccf70c
A
478#if KTRACE
479 struct iovec *ktriov;
480 struct uio ktruio;
481 int didktr = 0;
482 u_int iovlen;
483#endif
1c79356b
A
484
485 if (error = fdgetf(p, fdes, &fp))
486 return (error);
487
488 if ((fp->f_flag&(rw==UIO_READ ? FREAD : FWRITE)) == 0) {
489 return(EBADF);
490 }
491 uio->uio_resid = 0;
492 uio->uio_segflg = UIO_USERSPACE;
493 uio->uio_procp = p;
494 iov = uio->uio_iov;
495 for (i = 0; i < uio->uio_iovcnt; i++) {
496 if (iov->iov_len < 0) {
497 return(EINVAL);
498 }
499 uio->uio_resid += iov->iov_len;
500 if (uio->uio_resid < 0) {
501 return(EINVAL);
502 }
503 iov++;
504 }
505 count = uio->uio_resid;
9bccf70c
A
506#if KTRACE
507 /*
508 * if tracing, save a copy of iovec
509 */
510 if (KTRPOINT(p, KTR_GENIO)) {
511 iovlen = uio->uio_iovcnt * sizeof (struct iovec);
512 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
513 bcopy((caddr_t)uio->uio_iov, (caddr_t)ktriov, iovlen);
514 ktruio = *uio;
515 didktr = 1;
516 }
517#endif
518
1c79356b 519 if (rw == UIO_READ) {
9bccf70c
A
520 if (error = fo_read(fp, uio, fp->f_cred, 0, p))
521 if (uio->uio_resid != count && (error == ERESTART ||
522 error == EINTR || error == EWOULDBLOCK))
523 error = 0;
1c79356b 524 } else {
9bccf70c
A
525 if (fp->f_type == DTYPE_VNODE)
526 bwillwrite();
527 if (error = fo_write(fp, uio, fp->f_cred, 0, p)) {
1c79356b 528 if (uio->uio_resid != count && (error == ERESTART ||
9bccf70c 529 error == EINTR || error == EWOULDBLOCK))
1c79356b 530 error = 0;
9bccf70c
A
531 /* The socket layer handles SIGPIPE */
532 if (error == EPIPE && fp->f_type != DTYPE_SOCKET)
1c79356b
A
533 psignal(p, SIGPIPE);
534 }
535 }
9bccf70c 536
1c79356b 537 *retval = count - uio->uio_resid;
9bccf70c
A
538
539#if KTRACE
540 if (didktr) {
541 if (error == 0) {
542 ktruio.uio_iov = ktriov;
543 ktruio.uio_resid = *retval;
544 ktrgenio(p->p_tracep, fdes, rw, &ktruio, error,
545 KERNEL_FUNNEL);
546 }
547 FREE(ktriov, M_TEMP);
548 }
549#endif
550
1c79356b
A
551 return(error);
552}
553
554/*
555 * Ioctl system call
556 */
9bccf70c 557#ifndef _SYS_SYSPROTO_H_
1c79356b
A
558struct ioctl_args {
559 int fd;
560 u_long com;
561 caddr_t data;
562};
9bccf70c
A
563#endif
564int
1c79356b
A
565ioctl(p, uap, retval)
566 struct proc *p;
567 register struct ioctl_args *uap;
568 register_t *retval;
569{
570 struct file *fp;
571 register u_long com;
572 register int error;
573 register u_int size;
574 caddr_t data, memp;
575 int tmp;
576#define STK_PARAMS 128
577 char stkbuf[STK_PARAMS];
578
579 if (error = fdgetf(p, uap->fd, &fp))
580 return (error);
581
582 if ((fp->f_flag & (FREAD | FWRITE)) == 0)
583 return (EBADF);
584
9bccf70c
A
585#if NETAT
586 /*
587 * ### LD 6/11/97 Hack Alert: this is to get AppleTalk to work
1c79356b
A
588 * while implementing an ATioctl system call
589 */
1c79356b
A
590 {
591 extern int appletalk_inited;
592
593 if (appletalk_inited && ((uap->com & 0x0000FFFF) == 0xff99)) {
594#ifdef APPLETALK_DEBUG
595 kprintf("ioctl: special AppleTalk \n");
596#endif
9bccf70c 597 error = fo_ioctl(fp, uap->com, uap->data, p);
1c79356b
A
598 return(error);
599 }
600 }
601
602#endif /* NETAT */
603
604
605 switch (com = uap->com) {
606 case FIONCLEX:
607 *fdflags(p, uap->fd) &= ~UF_EXCLOSE;
608 return (0);
609 case FIOCLEX:
610 *fdflags(p, uap->fd) |= UF_EXCLOSE;
611 return (0);
612 }
613
614 /*
615 * Interpret high order word to find amount of data to be
616 * copied to/from the user's address space.
617 */
618 size = IOCPARM_LEN(com);
619 if (size > IOCPARM_MAX)
620 return (ENOTTY);
621 memp = NULL;
622 if (size > sizeof (stkbuf)) {
623 if ((memp = (caddr_t)kalloc(size)) == 0)
624 return(ENOMEM);
625 data = memp;
626 } else
627 data = stkbuf;
628 if (com&IOC_IN) {
629 if (size) {
630 error = copyin(uap->data, data, (u_int)size);
631 if (error) {
632 if (memp)
633 kfree(memp, size);
634 return (error);
635 }
636 } else
637 *(caddr_t *)data = uap->data;
638 } else if ((com&IOC_OUT) && size)
639 /*
640 * Zero the buffer so the user always
641 * gets back something deterministic.
642 */
643 bzero(data, size);
644 else if (com&IOC_VOID)
645 *(caddr_t *)data = uap->data;
646
647 switch (com) {
648
649 case FIONBIO:
650 if (tmp = *(int *)data)
651 fp->f_flag |= FNONBLOCK;
652 else
653 fp->f_flag &= ~FNONBLOCK;
9bccf70c 654 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p);
1c79356b
A
655 break;
656
657 case FIOASYNC:
658 if (tmp = *(int *)data)
659 fp->f_flag |= FASYNC;
660 else
661 fp->f_flag &= ~FASYNC;
9bccf70c 662 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p);
1c79356b
A
663 break;
664
665 case FIOSETOWN:
666 tmp = *(int *)data;
667 if (fp->f_type == DTYPE_SOCKET) {
668 ((struct socket *)fp->f_data)->so_pgid = tmp;
669 error = 0;
670 break;
671 }
672 if (tmp <= 0) {
673 tmp = -tmp;
674 } else {
675 struct proc *p1 = pfind(tmp);
676 if (p1 == 0) {
677 error = ESRCH;
678 break;
679 }
680 tmp = p1->p_pgrp->pg_id;
681 }
9bccf70c 682 error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, p);
1c79356b
A
683 break;
684
685 case FIOGETOWN:
686 if (fp->f_type == DTYPE_SOCKET) {
687 error = 0;
688 *(int *)data = ((struct socket *)fp->f_data)->so_pgid;
689 break;
690 }
9bccf70c 691 error = fo_ioctl(fp, TIOCGPGRP, data, p);
1c79356b
A
692 *(int *)data = -*(int *)data;
693 break;
694
695 default:
9bccf70c 696 error = fo_ioctl(fp, com, data, p);
1c79356b
A
697 /*
698 * Copy any data to user, size was
699 * already set and checked above.
700 */
701 if (error == 0 && (com&IOC_OUT) && size)
702 error = copyout(data, uap->data, (u_int)size);
703 break;
704 }
705 if (memp)
706 kfree(memp, size);
707 return (error);
708}
709
1c79356b 710int selwait, nselcoll;
0b4e3aa0
A
711#define SEL_FIRSTPASS 1
712#define SEL_SECONDPASS 2
9bccf70c
A
713extern int selcontinue(int error);
714extern int selprocess(int error, int sel_pass);
715static int selscan(struct proc *p, struct _select * sel,
716 int nfd, register_t *retval, int sel_pass);
717static int selcount(struct proc *p, u_int32_t *ibits, u_int32_t *obits,
718 int nfd, int * count, int * nfcount);
719extern uint64_t tvtoabstime(struct timeval *tvp);
1c79356b
A
720
721/*
722 * Select system call.
723 */
9bccf70c 724#ifndef _SYS_SYSPROTO_H_
1c79356b
A
725struct select_args {
726 int nd;
727 u_int32_t *in;
728 u_int32_t *ou;
729 u_int32_t *ex;
730 struct timeval *tv;
731};
9bccf70c
A
732#endif
733int
1c79356b
A
734select(p, uap, retval)
735 register struct proc *p;
736 register struct select_args *uap;
737 register_t *retval;
738{
9bccf70c 739 int error = 0;
0b4e3aa0 740 u_int ni, nw, size;
1c79356b
A
741 thread_act_t th_act;
742 struct uthread *uth;
743 struct _select *sel;
744 int needzerofill = 1;
0b4e3aa0
A
745 int kfcount =0;
746 int nfcount = 0;
747 int count = 0;
1c79356b
A
748
749 th_act = current_act();
750 uth = get_bsdthread_info(th_act);
751 sel = &uth->uu_state.ss_select;
752 retval = (int *)get_bsduthreadrval(th_act);
753 *retval = 0;
754
0b4e3aa0 755 if (uap->nd < 0) {
1c79356b 756 return (EINVAL);
0b4e3aa0 757 }
1c79356b
A
758
759 if (uap->nd > p->p_fd->fd_nfiles)
760 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */
761
762 nw = howmany(uap->nd, NFDBITS);
763 ni = nw * sizeof(fd_mask);
764
765 /*
766 * if this is the first select by the thread
767 * allocate the space for bits.
768 */
769 if (sel->nbytes == 0) {
770 sel->nbytes = 3 * ni;
771 MALLOC(sel->ibits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK);
772 MALLOC(sel->obits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK);
773 bzero((caddr_t)sel->ibits, sel->nbytes);
774 bzero((caddr_t)sel->obits, sel->nbytes);
775 needzerofill = 0;
776 }
777
778 /*
779 * if the previously allocated space for the bits
780 * is smaller than what is requested. Reallocate.
781 */
782 if (sel->nbytes < (3 * ni)) {
783 sel->nbytes = (3 * ni);
784 FREE(sel->ibits, M_TEMP);
785 FREE(sel->obits, M_TEMP);
786 MALLOC(sel->ibits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK);
787 MALLOC(sel->obits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK);
788 bzero((caddr_t)sel->ibits, sel->nbytes);
789 bzero((caddr_t)sel->obits, sel->nbytes);
790 needzerofill = 0;
791 }
792
793 if (needzerofill) {
794 bzero((caddr_t)sel->ibits, sel->nbytes);
795 bzero((caddr_t)sel->obits, sel->nbytes);
796 }
797
798 /*
799 * get the bits from the user address space
800 */
801#define getbits(name, x) \
802 do { \
803 if (uap->name && (error = copyin((caddr_t)uap->name, \
804 (caddr_t)&sel->ibits[(x) * nw], ni))) \
805 goto continuation; \
806 } while (0)
807
808 getbits(in, 0);
809 getbits(ou, 1);
810 getbits(ex, 2);
811#undef getbits
812
813 if (uap->tv) {
9bccf70c
A
814 struct timeval atv;
815
816 error = copyin((caddr_t)uap->tv, (caddr_t)&atv, sizeof (atv));
1c79356b
A
817 if (error)
818 goto continuation;
9bccf70c 819 if (itimerfix(&atv)) {
1c79356b
A
820 error = EINVAL;
821 goto continuation;
822 }
0b4e3aa0 823
9bccf70c
A
824 clock_absolutetime_interval_to_deadline(
825 tvtoabstime(&atv), &sel->abstime);
826 }
827 else
828 sel->abstime = 0;
829
0b4e3aa0
A
830 sel->nfcount = 0;
831 if (error = selcount(p, sel->ibits, sel->obits, uap->nd, &count, &nfcount)) {
832 goto continuation;
833 }
834
835 sel->nfcount = nfcount;
836 sel->count = count;
837 size = SIZEOF_WAITQUEUE_SUB + (count * SIZEOF_WAITQUEUE_LINK);
838 if (sel->allocsize) {
839 if (uth->uu_wqsub == 0)
840 panic("select: wql memory smashed");
841 /* needed for the select now */
842 if (size > sel->allocsize) {
843 kfree(uth->uu_wqsub, sel->allocsize);
844 sel->allocsize = size;
845 uth->uu_wqsub = (wait_queue_sub_t)kalloc(sel->allocsize);
846 if (uth->uu_wqsub == (wait_queue_sub_t)NULL)
847 panic("failed to allocate memory for waitqueue\n");
848 sel->wql = (char *)uth->uu_wqsub + SIZEOF_WAITQUEUE_SUB;
849 }
850 } else {
851 sel->count = count;
852 sel->allocsize = size;
853 uth->uu_wqsub = (wait_queue_sub_t)kalloc(sel->allocsize);
854 if (uth->uu_wqsub == (wait_queue_sub_t)NULL)
855 panic("failed to allocate memory for waitqueue\n");
856 sel->wql = (char *)uth->uu_wqsub + SIZEOF_WAITQUEUE_SUB;
857 }
858 bzero(uth->uu_wqsub, size);
859 wait_queue_sub_init(uth->uu_wqsub, (SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST));
860
1c79356b 861continuation:
9bccf70c 862 return selprocess(error, SEL_FIRSTPASS);
0b4e3aa0
A
863}
864
865int
866selcontinue(int error)
867{
9bccf70c 868 return selprocess(error, SEL_SECONDPASS);
1c79356b
A
869}
870
871int
0b4e3aa0 872selprocess(error, sel_pass)
1c79356b 873{
9bccf70c 874 int ncoll;
1c79356b
A
875 u_int ni, nw;
876 thread_act_t th_act;
877 struct uthread *uth;
878 struct proc *p;
879 struct select_args *uap;
880 int *retval;
881 struct _select *sel;
0b4e3aa0 882 int unwind = 1;
9bccf70c 883 int prepost = 0;
0b4e3aa0
A
884 int somewakeup = 0;
885 int doretry = 0;
9bccf70c 886 wait_result_t wait_result;
1c79356b
A
887
888 p = current_proc();
889 th_act = current_act();
890 uap = (struct select_args *)get_bsduthreadarg(th_act);
891 retval = (int *)get_bsduthreadrval(th_act);
892 uth = get_bsdthread_info(th_act);
893 sel = &uth->uu_state.ss_select;
894
0b4e3aa0
A
895 /* if it is first pass wait queue is not setup yet */
896 if ((error != 0) && (sel_pass == SEL_FIRSTPASS))
897 unwind = 0;
898 if (sel->count == 0)
899 unwind = 0;
1c79356b 900retry:
0b4e3aa0 901 if (error != 0) {
1c79356b 902 goto done;
0b4e3aa0
A
903 }
904
1c79356b
A
905 ncoll = nselcoll;
906 p->p_flag |= P_SELECT;
0b4e3aa0
A
907 /* skip scans if the select is just for timeouts */
908 if (sel->count) {
909 if (sel_pass == SEL_FIRSTPASS)
910 wait_queue_sub_clearrefs(uth->uu_wqsub);
911
912 error = selscan(p, sel, uap->nd, retval, sel_pass);
913 if (error || *retval) {
914 goto done;
915 }
916 if (prepost) {
917 /* if the select of log, then we canwakeup and discover some one
918 * else already read the data; go toselct again if time permits
919 */
920 prepost = 0;
921 doretry = 1;
922 }
923 if (somewakeup) {
924 somewakeup = 0;
925 doretry = 1;
926 }
927 }
928
9bccf70c
A
929 if (uap->tv) {
930 uint64_t now;
931
932 clock_get_uptime(&now);
933 if (now >= sel->abstime)
934 goto done;
1c79356b 935 }
0b4e3aa0
A
936
937 if (doretry) {
938 /* cleanup obits and try again */
939 doretry = 0;
940 sel_pass = SEL_FIRSTPASS;
941 goto retry;
942 }
943
1c79356b
A
944 /*
945 * To effect a poll, the timeout argument should be
946 * non-nil, pointing to a zero-valued timeval structure.
947 */
9bccf70c 948 if (uap->tv && sel->abstime == 0) {
1c79356b
A
949 goto done;
950 }
0b4e3aa0
A
951
952 /* No spurious wakeups due to colls,no need to check for them */
953 if ((sel_pass == SEL_SECONDPASS) || ((p->p_flag & P_SELECT) == 0)) {
954 sel_pass = SEL_FIRSTPASS;
1c79356b
A
955 goto retry;
956 }
0b4e3aa0 957
1c79356b
A
958 p->p_flag &= ~P_SELECT;
959
0b4e3aa0
A
960 /* if the select is just for timeout skip check */
961 if (sel->count &&(sel_pass == SEL_SECONDPASS))
962 panic("selprocess: 2nd pass assertwaiting");
963
964 /* Wait Queue Subordinate has waitqueue as first element */
9bccf70c
A
965 wait_result = wait_queue_assert_wait((wait_queue_t)uth->uu_wqsub,
966 &selwait, THREAD_ABORTSAFE);
967 if (wait_result != THREAD_AWAKENED) {
968 /* there are no preposted events */
969 error = tsleep1(NULL, PSOCK | PCATCH,
970 "select", sel->abstime, selcontinue);
0b4e3aa0
A
971 } else {
972 prepost = 1;
973 error = 0;
974 }
975
976 sel_pass = SEL_SECONDPASS;
977 if (error == 0) {
978 if (!prepost)
979 somewakeup =1;
1c79356b 980 goto retry;
0b4e3aa0 981 }
1c79356b 982done:
0b4e3aa0
A
983 if (unwind)
984 wait_subqueue_unlink_all(uth->uu_wqsub);
1c79356b
A
985 p->p_flag &= ~P_SELECT;
986 /* select is not restarted after signals... */
987 if (error == ERESTART)
988 error = EINTR;
989 if (error == EWOULDBLOCK)
990 error = 0;
1c79356b
A
991 nw = howmany(uap->nd, NFDBITS);
992 ni = nw * sizeof(fd_mask);
993
994#define putbits(name, x) \
995 do { \
996 if (uap->name && (error2 = copyout((caddr_t)&sel->obits[(x) * nw], \
997 (caddr_t)uap->name, ni))) \
998 error = error2; \
999 } while (0)
1000
1001 if (error == 0) {
1002 int error2;
1003
1004 putbits(in, 0);
1005 putbits(ou, 1);
1006 putbits(ex, 2);
1007#undef putbits
1008 }
1c79356b 1009 return(error);
1c79356b
A
1010}
1011
1012static int
0b4e3aa0 1013selscan(p, sel, nfd, retval, sel_pass)
1c79356b 1014 struct proc *p;
0b4e3aa0 1015 struct _select *sel;
1c79356b
A
1016 int nfd;
1017 register_t *retval;
0b4e3aa0 1018 int sel_pass;
1c79356b
A
1019{
1020 register struct filedesc *fdp = p->p_fd;
1021 register int msk, i, j, fd;
1022 register u_int32_t bits;
1023 struct file *fp;
1024 int n = 0;
0b4e3aa0 1025 int nc = 0;
1c79356b
A
1026 static int flag[3] = { FREAD, FWRITE, 0 };
1027 u_int32_t *iptr, *optr;
1028 u_int nw;
0b4e3aa0
A
1029 u_int32_t *ibits, *obits;
1030 char * wql;
1031 int nfunnel = 0;
1032 int count, nfcount;
1033 char * wql_ptr;
1c79356b
A
1034
1035 /*
1036 * Problems when reboot; due to MacOSX signal probs
1037 * in Beaker1C ; verify that the p->p_fd is valid
1038 */
1039 if (fdp == NULL) {
1040 *retval=0;
1041 return(EIO);
1042 }
1043
0b4e3aa0
A
1044 ibits = sel->ibits;
1045 obits = sel->obits;
1046 wql = sel->wql;
1047
1048 count = sel->count;
1049 nfcount = sel->nfcount;
1050
1051 if (nfcount > count)
1052 panic("selcount count<nfcount");
1053
1c79356b
A
1054 nw = howmany(nfd, NFDBITS);
1055
0b4e3aa0
A
1056 nc = 0;
1057 if ( nfcount < count) {
1058 /* some or all in kernel funnel */
1059 for (msk = 0; msk < 3; msk++) {
1060 iptr = (u_int32_t *)&ibits[msk * nw];
1061 optr = (u_int32_t *)&obits[msk * nw];
1062 for (i = 0; i < nfd; i += NFDBITS) {
1063 bits = iptr[i/NFDBITS];
1064 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1065 bits &= ~(1 << j);
1066 fp = fdp->fd_ofiles[fd];
1067 if (fp == NULL ||
1068 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1069 return(EBADF);
1070 }
1071 if (sel_pass == SEL_SECONDPASS)
1072 wql_ptr = (char *)0;
1073 else
1074 wql_ptr = (wql+ nc * SIZEOF_WAITQUEUE_LINK);
1075 if (fp->f_ops && (fp->f_type != DTYPE_SOCKET)
9bccf70c 1076 && fo_select(fp, flag[msk], wql_ptr, p)) {
0b4e3aa0
A
1077 optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
1078 n++;
1079 }
1080 nc++;
1c79356b
A
1081 }
1082 }
1083 }
1084 }
0b4e3aa0
A
1085
1086 if (nfcount) {
1087 /* socket file descriptors for scan */
1088 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
1089
1090 nc = 0;
1091 for (msk = 0; msk < 3; msk++) {
1092 iptr = (u_int32_t *)&ibits[msk * nw];
1093 optr = (u_int32_t *)&obits[msk * nw];
1094 for (i = 0; i < nfd; i += NFDBITS) {
1095 bits = iptr[i/NFDBITS];
1096 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1097 bits &= ~(1 << j);
1098 fp = fdp->fd_ofiles[fd];
1099 if (fp == NULL ||
1100 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
9bccf70c 1101 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
0b4e3aa0
A
1102 return(EBADF);
1103 }
1104 if (sel_pass == SEL_SECONDPASS)
1105 wql_ptr = (char *)0;
1106 else
1107 wql_ptr = (wql+ nc * SIZEOF_WAITQUEUE_LINK);
1108 if (fp->f_ops && (fp->f_type == DTYPE_SOCKET) &&
9bccf70c 1109 fo_select(fp, flag[msk], wql_ptr, p)) {
0b4e3aa0
A
1110 optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
1111 n++;
1112 }
1113 nc++;
1114 }
1115 }
1116 }
1117 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
1118 }
1119
1c79356b
A
1120 *retval = n;
1121 return (0);
1122}
1123
1124/*ARGSUSED*/
9bccf70c 1125int
1c79356b
A
1126seltrue(dev, flag, p)
1127 dev_t dev;
1128 int flag;
1129 struct proc *p;
1130{
1131
1132 return (1);
1133}
1134
0b4e3aa0
A
1135static int
1136selcount(p, ibits, obits, nfd, count, nfcount)
1137 struct proc *p;
1138 u_int32_t *ibits, *obits;
1139 int nfd;
1140 int *count;
1141 int *nfcount;
1142{
1143 register struct filedesc *fdp = p->p_fd;
1144 register int msk, i, j, fd;
1145 register u_int32_t bits;
1146 struct file *fp;
1147 int n = 0;
1148 int nc = 0;
1149 int nfc = 0;
1150 static int flag[3] = { FREAD, FWRITE, 0 };
1151 u_int32_t *iptr, *fptr, *fbits;
1152 u_int nw;
1153
1154 /*
1155 * Problems when reboot; due to MacOSX signal probs
1156 * in Beaker1C ; verify that the p->p_fd is valid
1157 */
1158 if (fdp == NULL) {
1159 *count=0;
1160 *nfcount=0;
1161 return(EIO);
1162 }
1163
1164 nw = howmany(nfd, NFDBITS);
1165
1166
1167 for (msk = 0; msk < 3; msk++) {
1168 iptr = (u_int32_t *)&ibits[msk * nw];
1169 for (i = 0; i < nfd; i += NFDBITS) {
1170 bits = iptr[i/NFDBITS];
1171 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1172 bits &= ~(1 << j);
1173 fp = fdp->fd_ofiles[fd];
1174 if (fp == NULL ||
1175 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1176 *count=0;
1177 *nfcount=0;
1178 return(EBADF);
1179 }
1180 if (fp->f_type == DTYPE_SOCKET)
1181 nfc++;
1182 n++;
1183 }
1184 }
1185 }
1186 *count = n;
1187 *nfcount = nfc;
1188 return (0);
1189}
1190
1c79356b
A
1191/*
1192 * Record a select request.
1193 */
1194void
0b4e3aa0 1195selrecord(selector, sip, p_wql)
1c79356b
A
1196 struct proc *selector;
1197 struct selinfo *sip;
0b4e3aa0 1198 void * p_wql;
1c79356b 1199{
0b4e3aa0
A
1200 thread_act_t cur_act = current_act();
1201 struct uthread * ut = get_bsdthread_info(cur_act);
1c79356b 1202
0b4e3aa0
A
1203 /* need to look at collisions */
1204
1205 if ((p_wql == (void *)0) && ((sip->si_flags & SI_INITED) == 0)) {
1c79356b
A
1206 return;
1207 }
0b4e3aa0
A
1208
1209 /*do not record if this is second pass of select */
1210 if((p_wql == (void *)0)) {
1211 return;
1c79356b
A
1212 }
1213
0b4e3aa0
A
1214 if ((sip->si_flags & SI_INITED) == 0) {
1215 wait_queue_init(&sip->wait_queue, SYNC_POLICY_FIFO);
1216 sip->si_flags |= SI_INITED;
1217 sip->si_flags &= ~SI_CLEAR;
1218 }
1219
1220 if (sip->si_flags & SI_RECORDED) {
1221 sip->si_flags |= SI_COLL;
1222 } else
1223 sip->si_flags &= ~SI_COLL;
1224
1225 sip->si_flags |= SI_RECORDED;
1226 if (!wait_queue_member(&sip->wait_queue, ut->uu_wqsub))
1227 wait_queue_link_noalloc(&sip->wait_queue, ut->uu_wqsub, (wait_queue_link_t)p_wql);
1228
1c79356b
A
1229 return;
1230}
1231
1232void
1233selwakeup(sip)
1234 register struct selinfo *sip;
1235{
1c79356b 1236
0b4e3aa0 1237 if ((sip->si_flags & SI_INITED) == 0) {
1c79356b 1238 return;
0b4e3aa0 1239 }
1c79356b
A
1240
1241 if (sip->si_flags & SI_COLL) {
1242 nselcoll++;
1243 sip->si_flags &= ~SI_COLL;
0b4e3aa0
A
1244#if 0
1245 /* will not support */
1246 //wakeup((caddr_t)&selwait);
1247#endif
1c79356b 1248 }
1c79356b 1249
0b4e3aa0
A
1250 if (sip->si_flags & SI_RECORDED) {
1251 wait_queue_wakeup_all(&sip->wait_queue, &selwait, THREAD_AWAKENED);
1252 sip->si_flags &= ~SI_RECORDED;
1c79356b 1253 }
1c79356b 1254
1c79356b
A
1255}
1256
1257void
1258selthreadclear(sip)
1259 register struct selinfo *sip;
1260{
1c79356b 1261
0b4e3aa0
A
1262 if ((sip->si_flags & SI_INITED) == 0) {
1263 return;
1264 }
1265 if (sip->si_flags & SI_RECORDED) {
1266 selwakeup(sip);
1267 sip->si_flags &= ~(SI_RECORDED | SI_COLL);
1c79356b 1268 }
0b4e3aa0
A
1269 sip->si_flags |= SI_CLEAR;
1270 wait_queue_unlinkall_nofree(&sip->wait_queue);
1c79356b
A
1271}
1272
1273
1274extern struct eventqelt *evprocdeque(struct proc *p, struct eventqelt *eqp);
1275
1276/*
1277 * called upon socket close. deque and free all events for
1278 * the socket
1279 */
9bccf70c 1280void
1c79356b
A
1281evsofree(struct socket *sp)
1282{
1283 struct eventqelt *eqp, *next;
1284
1285 if (sp == NULL) return;
1286
1287 for (eqp = sp->so_evlist.tqh_first; eqp != NULL; eqp = next) {
1288 next = eqp->ee_slist.tqe_next;
1289 evprocdeque(eqp->ee_proc, eqp); // remove from proc q if there
1290 TAILQ_REMOVE(&sp->so_evlist, eqp, ee_slist); // remove from socket q
1291 FREE(eqp, M_TEMP);
1292 }
1293}
1294
1295
1296#define DBG_EVENT 0x10
1297
1298#define DBG_POST 0x10
1299#define DBG_WATCH 0x11
1300#define DBG_WAIT 0x12
1301#define DBG_MOD 0x13
1302#define DBG_EWAKEUP 0x14
1303#define DBG_ENQUEUE 0x15
1304#define DBG_DEQUEUE 0x16
1305
1306#define DBG_MISC_POST MISCDBG_CODE(DBG_EVENT,DBG_POST)
1307#define DBG_MISC_WATCH MISCDBG_CODE(DBG_EVENT,DBG_WATCH)
1308#define DBG_MISC_WAIT MISCDBG_CODE(DBG_EVENT,DBG_WAIT)
1309#define DBG_MISC_MOD MISCDBG_CODE(DBG_EVENT,DBG_MOD)
1310#define DBG_MISC_EWAKEUP MISCDBG_CODE(DBG_EVENT,DBG_EWAKEUP)
1311#define DBG_MISC_ENQUEUE MISCDBG_CODE(DBG_EVENT,DBG_ENQUEUE)
1312#define DBG_MISC_DEQUEUE MISCDBG_CODE(DBG_EVENT,DBG_DEQUEUE)
1313
1314
1315/*
1316 * enque this event if it's not already queued. wakeup
1317 the proc if we do queue this event to it.
1318 */
9bccf70c 1319void
1c79356b
A
1320evprocenque(struct eventqelt *eqp)
1321{
1322 struct proc *p;
1323
1324 assert(eqp);
1325 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_START, eqp, eqp->ee_flags, eqp->ee_eventmask,0,0);
1326 if (eqp->ee_flags & EV_QUEUED) {
1327 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0);
1328 return;
1329 }
1330 eqp->ee_flags |= EV_QUEUED;
1331 eqp->ee_eventmask = 0; // disarm
1332 p = eqp->ee_proc;
1333 TAILQ_INSERT_TAIL(&p->p_evlist, eqp, ee_plist);
1334 KERNEL_DEBUG(DBG_MISC_EWAKEUP,0,0,0,eqp,0);
1335 wakeup(&p->p_evlist);
1336 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0);
1337}
1338
1339/*
1340 * given either a sockbuf or a socket run down the
1341 * event list and queue ready events found
1342 */
9bccf70c 1343void
1c79356b
A
1344postevent(struct socket *sp, struct sockbuf *sb, int event)
1345{
1346 int mask;
1347 struct eventqelt *evq;
1348 register struct tcpcb *tp;
1349
1350 if (sb) sp = sb->sb_so;
1351 if (!sp || sp->so_evlist.tqh_first == NULL) return;
1352
1353 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, event,0,0,0,0);
1354
1355 for (evq = sp->so_evlist.tqh_first;
1356 evq != NULL; evq = evq->ee_slist.tqe_next) {
1357
1358 mask = 0;
1359
1360 /* ready for reading:
1361 - byte cnt >= receive low water mark
1362 - read-half of conn closed
1363 - conn pending for listening sock
1364 - socket error pending
1365
1366 ready for writing
1367 - byte cnt avail >= send low water mark
1368 - write half of conn closed
1369 - socket error pending
1370 - non-blocking conn completed successfully
1371
1372 exception pending
1373 - out of band data
1374 - sock at out of band mark
1375
1376 */
1377 switch (event & EV_DMASK) {
1378
1379 case EV_RWBYTES:
1380 case EV_OOB:
1381 case EV_RWBYTES|EV_OOB:
1382 if (event & EV_OOB) {
1383 if ((evq->ee_eventmask & EV_EX)) {
1384 if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK))) {
1385 mask |= EV_EX|EV_OOB;
1386 }
1387 }
1388 }
1389 if (event & EV_RWBYTES) {
1390 if ((evq->ee_eventmask & EV_RE) && soreadable(sp)) {
1391 if ((sp->so_type == SOCK_STREAM) && (sp->so_error == ECONNREFUSED) ||
1392 (sp->so_error == ECONNRESET)) {
1393 if ((sp->so_pcb == 0) ||
1394 !(tp = sototcpcb(sp)) ||
1395 (tp->t_state == TCPS_CLOSED)) {
1396 mask |= EV_RE|EV_RESET;
1397 break;
1398 }
1399 }
1400 if (sp->so_state & SS_CANTRCVMORE) {
1401 mask |= EV_RE|EV_FIN;
1402 evq->ee_req.er_rcnt = sp->so_rcv.sb_cc;
1403 break;
1404 }
1405 mask |= EV_RE;
1406 evq->ee_req.er_rcnt = sp->so_rcv.sb_cc;
1407 }
1408
1409 if ((evq->ee_eventmask & EV_WR) && sowriteable(sp)) {
1410 if ((sp->so_type == SOCK_STREAM) &&(sp->so_error == ECONNREFUSED) ||
1411 (sp->so_error == ECONNRESET)) {
1412 if ((sp->so_pcb == 0) ||
1413 !(tp = sototcpcb(sp)) ||
1414 (tp->t_state == TCPS_CLOSED)) {
1415 mask |= EV_WR|EV_RESET;
1416 break;
1417 }
1418 }
1419 mask |= EV_WR;
1420 evq->ee_req.er_wcnt = sbspace(&sp->so_snd);
1421 }
1422 }
1423 break;
1424
1425 case EV_RCONN:
1426 if ((evq->ee_eventmask & EV_RE)) {
1427 evq->ee_req.er_rcnt = sp->so_qlen + 1; // incl this one
1428 mask |= EV_RE|EV_RCONN;
1429 }
1430 break;
1431
1432 case EV_WCONN:
1433 if ((evq->ee_eventmask & EV_WR)) {
1434 mask |= EV_WR|EV_WCONN;
1435 }
1436 break;
1437
1438 case EV_RCLOSED:
1439 if ((evq->ee_eventmask & EV_RE)) {
1440 mask |= EV_RE|EV_RCLOSED;
1441 }
1442 break;
1443
1444 case EV_WCLOSED:
1445 if ((evq->ee_eventmask & EV_WR)) {
1446 mask |= EV_WR|EV_WCLOSED;
1447 }
1448 break;
1449
1450 case EV_FIN:
1451 if (evq->ee_eventmask & EV_RE) {
1452 mask |= EV_RE|EV_FIN;
1453 }
1454 break;
1455
1456 case EV_RESET:
1457 case EV_TIMEOUT:
1458 if (evq->ee_eventmask & EV_RE) {
1459 mask |= EV_RE | event;
1460 }
1461 if (evq->ee_eventmask & EV_WR) {
1462 mask |= EV_WR | event;
1463 }
1464 break;
1465
1466 default:
1467 return;
1468 } /* switch */
1469
1470 if (mask) {
1471 evq->ee_req.er_eventbits |= mask;
1472 KERNEL_DEBUG(DBG_MISC_POST, evq, evq->ee_req.er_eventbits, mask,0,0);
1473 evprocenque(evq);
1474 }
1475 }
1476 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, 0,0,0,0,0);
1477}
1478
1479/*
1480 * remove and return the first event (eqp=NULL) or a specific
1481 * event, or return NULL if no events found
1482 */
1483struct eventqelt *
1484evprocdeque(struct proc *p, struct eventqelt *eqp)
1485{
1486
1487 KERNEL_DEBUG(DBG_MISC_DEQUEUE|DBG_FUNC_START,p,eqp,0,0,0);
1488
1489 if (eqp && ((eqp->ee_flags & EV_QUEUED) == NULL)) {
1490 KERNEL_DEBUG(DBG_MISC_DEQUEUE|DBG_FUNC_END,0,0,0,0,0);
1491 return(NULL);
1492 }
1493 if (p->p_evlist.tqh_first == NULL) {
1494 KERNEL_DEBUG(DBG_MISC_DEQUEUE|DBG_FUNC_END,0,0,0,0,0);
1495 return(NULL);
1496 }
1497 if (eqp == NULL) { // remove first
1498 eqp = p->p_evlist.tqh_first;
1499 }
1500 TAILQ_REMOVE(&p->p_evlist, eqp, ee_plist);
1501 eqp->ee_flags &= ~EV_QUEUED;
1502 KERNEL_DEBUG(DBG_MISC_DEQUEUE|DBG_FUNC_END,eqp,0,0,0,0);
1503 return(eqp);
1504}
1505
1506struct evwatch_args {
1507 struct eventreq *u_req;
1508 int u_eventmask;
1509};
1510
1511
1512/*
1513 * watchevent system call. user passes us an event to watch
1514 * for. we malloc an event object, initialize it, and queue
1515 * it to the open socket. when the event occurs, postevent()
1516 * will enque it back to our proc where we can retrieve it
1517 * via waitevent().
1518 *
1519 * should this prevent duplicate events on same socket?
1520 */
1521int
1522watchevent(p, uap, retval)
1523 struct proc *p;
1524 struct evwatch_args *uap;
1525 register_t *retval;
1526{
1527 struct eventqelt *eqp = (struct eventqelt *)0;
1528 struct eventqelt *np;
1529 struct eventreq *erp;
1530 struct file *fp;
1531 struct socket *sp;
1532 int error;
1533
1534 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_START, 0,0,0,0,0);
1535
1536 // get a qelt and fill with users req
1537 MALLOC(eqp, struct eventqelt *, sizeof(struct eventqelt), M_TEMP, M_WAITOK);
1538 if (!eqp) panic("can't MALLOC eqp");
1539 erp = &eqp->ee_req;
1540 // get users request pkt
1541 if (error = copyin((caddr_t)uap->u_req, (caddr_t)erp,
1542 sizeof(struct eventreq))) {
1543 FREE(eqp, M_TEMP);
1544 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0);
1545 return(error);
1546 }
1547 KERNEL_DEBUG(DBG_MISC_WATCH, erp->er_handle,uap->u_eventmask,eqp,0,0);
1548 // validate, freeing qelt if errors
1549 error = 0;
1550 if (erp->er_type != EV_FD) {
1551 error = EINVAL;
1552 } else if (erp->er_handle < 0) {
1553 error = EBADF;
1554 } else if (erp->er_handle > p->p_fd->fd_nfiles) {
1555 error = EBADF;
1556 } else if ((fp = *fdfile(p, erp->er_handle)) == NULL) {
1557 error = EBADF;
1558 } else if (fp->f_type != DTYPE_SOCKET) {
1559 error = EINVAL;
1560 }
1561 if (error) {
1562 FREE(eqp,M_TEMP);
1563 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0);
1564 return(error);
1565 }
1566
1567 erp->er_rcnt = erp->er_wcnt = erp->er_eventbits = 0;
1568 eqp->ee_proc = p;
1569 eqp->ee_eventmask = uap->u_eventmask & EV_MASK;
1570 eqp->ee_flags = 0;
1571
1572 sp = (struct socket *)fp->f_data;
1573 assert(sp != NULL);
1574
1575 // only allow one watch per file per proc
1576 for (np = sp->so_evlist.tqh_first; np != NULL; np = np->ee_slist.tqe_next) {
1577 if (np->ee_proc == p) {
1578 FREE(eqp,M_TEMP);
1579 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0);
1580 return(EINVAL);
1581 }
1582 }
1583
1584 TAILQ_INSERT_TAIL(&sp->so_evlist, eqp, ee_slist);
1585 postevent(sp, 0, EV_RWBYTES); // catch existing events
1586 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, 0,0,0,0,0);
1587 return(0);
1588}
1589
1590struct evwait_args {
1591 struct eventreq *u_req;
1592 struct timeval *tv;
1593};
1594
1595/*
1596 * waitevent system call.
1597 * grabs the next waiting event for this proc and returns
1598 * it. if no events, user can request to sleep with timeout
1599 * or poll mode (tv=NULL);
1600 */
1601int
1602waitevent(p, uap, retval)
9bccf70c
A
1603 struct proc *p;
1604 struct evwait_args *uap;
1605 register_t *retval;
1c79356b 1606{
9bccf70c
A
1607 int error = 0;
1608 struct eventqelt *eqp;
1609 uint64_t abstime, interval;
1c79356b
A
1610
1611 if (uap->tv) {
9bccf70c
A
1612 struct timeval atv;
1613
1614 error = copyin((caddr_t)uap->tv, (caddr_t)&atv, sizeof (atv));
1c79356b 1615 if (error)
9bccf70c 1616 return(error);
1c79356b
A
1617 if (itimerfix(&atv)) {
1618 error = EINVAL;
1619 return(error);
1620 }
1c79356b 1621
9bccf70c
A
1622 interval = tvtoabstime(&atv);
1623 }
1624 else
1625 abstime = interval = 0;
1626
1627 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_START, 0,0,0,0,0);
1c79356b
A
1628
1629retry:
9bccf70c
A
1630 if ((eqp = evprocdeque(p,NULL)) != NULL) {
1631 error = copyout((caddr_t)&eqp->ee_req,
1632 (caddr_t)uap->u_req, sizeof(struct eventreq));
1633 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,
1634 eqp->ee_req.er_handle,eqp->ee_req.er_eventbits,eqp,0);
1c79356b 1635
9bccf70c
A
1636 return (error);
1637 }
1638 else {
1639 if (uap->tv && interval == 0) {
1640 *retval = 1; // poll failed
1641 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,0,0,0,0);
1642
1643 return (error);
1644 }
1645
1646 if (interval != 0)
1647 clock_absolutetime_interval_to_deadline(interval, &abstime)
1648
1649 KERNEL_DEBUG(DBG_MISC_WAIT, 1,&p->p_evlist,0,0,0);
1650 error = tsleep1(&p->p_evlist, PSOCK | PCATCH,
1651 "waitevent", abstime, (int (*)(int))0);
1652 KERNEL_DEBUG(DBG_MISC_WAIT, 2,&p->p_evlist,0,0,0);
1653 if (error == 0)
1654 goto retry;
1655 if (error == ERESTART)
1656 error = EINTR;
1657 if (error == EWOULDBLOCK) {
1658 *retval = 1;
1659 error = 0;
1660 }
1661 }
1662
1663 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, 0,0,0,0,0);
1664
1665 return (error);
1c79356b
A
1666}
1667
1668struct modwatch_args {
1669 struct eventreq *u_req;
1670 int u_eventmask;
1671};
1672
1673/*
1674 * modwatch system call. user passes in event to modify.
1675 * if we find it we reset the event bits and que/deque event
1676 * it needed.
1677 */
1678int
1679modwatch(p, uap, retval)
1680 struct proc *p;
1681 struct modwatch_args *uap;
1682 register_t *retval;
1683{
1684 struct eventreq er;
1685 struct eventreq *erp = &er;
1686 struct eventqelt *evq;
1687 int error;
1688 struct file *fp;
1689 struct socket *sp;
1690 int flag;
1691
1692 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_START, 0,0,0,0,0);
1693
1694 // get users request pkt
1695 if (error = copyin((caddr_t)uap->u_req, (caddr_t)erp,
1696 sizeof(struct eventreq))) return(error);
1697
1698 if (erp->er_type != EV_FD) return(EINVAL);
1699 if (erp->er_handle < 0) return(EBADF);
1700 if (erp->er_handle > p->p_fd->fd_nfiles) return(EBADF);
1701 if ((fp = *fdfile(p, erp->er_handle)) == NULL)
1702 return(EBADF);
1703 if (fp->f_type != DTYPE_SOCKET) return(EINVAL); // for now must be sock
1704 sp = (struct socket *)fp->f_data;
1705 assert(sp != NULL);
1706
1707
1708 // locate event if possible
1709 for (evq = sp->so_evlist.tqh_first;
1710 evq != NULL; evq = evq->ee_slist.tqe_next) {
1711 if (evq->ee_proc == p) break;
1712 }
1713
1714 if (evq == NULL) {
1715 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, EINVAL,0,0,0,0);
1716 return(EINVAL);
1717 }
1718 KERNEL_DEBUG(DBG_MISC_MOD, erp->er_handle,uap->u_eventmask,evq,0,0);
1719
1720 if (uap->u_eventmask == EV_RM) {
1721 evprocdeque(p, evq);
1722 TAILQ_REMOVE(&sp->so_evlist, evq, ee_slist);
1723 FREE(evq, M_TEMP);
1724 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, 0,0,0,0,0);
1725 return(0);
1726 }
1727
1728 switch (uap->u_eventmask & EV_MASK) {
1729
1730 case 0:
1731 flag = 0;
1732 break;
1733
1734 case EV_RE:
1735 case EV_WR:
1736 case EV_RE|EV_WR:
1737 flag = EV_RWBYTES;
1738 break;
1739
1740 case EV_EX:
1741 flag = EV_OOB;
1742 break;
1743
1744 case EV_EX|EV_RE:
1745 case EV_EX|EV_WR:
1746 case EV_EX|EV_RE|EV_WR:
1747 flag = EV_OOB|EV_RWBYTES;
1748 break;
1749
1750 default:
1751 return(EINVAL);
1752 }
1753
1754 evq->ee_eventmask = uap->u_eventmask & EV_MASK;
1755 evprocdeque(p, evq);
1756 evq->ee_req.er_eventbits = 0;
1757 postevent(sp, 0, flag);
1758 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, evq->ee_req.er_handle,evq->ee_eventmask,sp,flag,0);
1759 return(0);
1760}