]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/sys_generic.c
xnu-517.12.7.tar.gz
[apple/xnu.git] / bsd / kern / sys_generic.c
CommitLineData
1c79356b 1/*
e5568f75 2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
1c79356b
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
e5568f75
A
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
1c79356b 11 *
e5568f75
A
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
e5568f75
A
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
1c79356b
A
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
23/*
24 * Copyright (c) 1982, 1986, 1989, 1993
25 * The Regents of the University of California. All rights reserved.
26 * (c) UNIX System Laboratories, Inc.
27 * All or some portions of this file are derived from material licensed
28 * to the University of California by American Telephone and Telegraph
29 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
30 * the permission of UNIX System Laboratories, Inc.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
61 */
62
63#include <sys/param.h>
64#include <sys/systm.h>
65#include <sys/filedesc.h>
66#include <sys/ioctl.h>
67#include <sys/file.h>
68#include <sys/proc.h>
69#include <sys/socketvar.h>
70#include <sys/uio.h>
71#include <sys/kernel.h>
72#include <sys/stat.h>
73#include <sys/malloc.h>
74
1c79356b
A
75#include <sys/mount.h>
76#include <sys/protosw.h>
77#include <sys/ev.h>
78#include <sys/user.h>
79#include <sys/kdebug.h>
80#include <kern/assert.h>
81#include <kern/thread_act.h>
82
83#include <sys/mbuf.h>
84#include <sys/socket.h>
85#include <sys/socketvar.h>
86#include <sys/errno.h>
55e303ae 87#include <sys/syscall.h>
1c79356b 88
e5568f75
A
89#include <bsm/audit_kernel.h>
90
1c79356b
A
91#include <net/if.h>
92#include <net/route.h>
93
94#include <netinet/in.h>
95#include <netinet/in_systm.h>
96#include <netinet/ip.h>
97#include <netinet/in_pcb.h>
98#include <netinet/ip_var.h>
99#include <netinet/ip6.h>
100#include <netinet/tcp.h>
101#include <netinet/tcp_fsm.h>
102#include <netinet/tcp_seq.h>
103#include <netinet/tcp_timer.h>
104#include <netinet/tcp_var.h>
105#include <netinet/tcpip.h>
106#include <netinet/tcp_debug.h>
0b4e3aa0
A
107/* for wait queue based select */
108#include <kern/wait_queue.h>
9bccf70c
A
109#if KTRACE
110#include <sys/ktrace.h>
111#endif
55e303ae 112#include <sys/vnode.h>
9bccf70c 113
9bccf70c 114
55e303ae 115__private_extern__ struct file*
9bccf70c
A
116holdfp(fdp, fd, flag)
117 struct filedesc* fdp;
118 int fd, flag;
119{
120 struct file* fp;
121
122 if (((u_int)fd) >= fdp->fd_nfiles ||
123 (fp = fdp->fd_ofiles[fd]) == NULL ||
124 (fp->f_flag & flag) == 0) {
125 return (NULL);
126 }
d7e50217
A
127 if (fref(fp) == -1)
128 return (NULL);
9bccf70c
A
129 return (fp);
130}
1c79356b
A
131
132/*
133 * Read system call.
134 */
9bccf70c 135#ifndef _SYS_SYSPROTO_H_
1c79356b
A
136struct read_args {
137 int fd;
138 char *cbuf;
139 u_int nbyte;
140};
9bccf70c
A
141#endif
142int
1c79356b
A
143read(p, uap, retval)
144 struct proc *p;
145 register struct read_args *uap;
146 register_t *retval;
9bccf70c
A
147{
148 register struct file *fp;
149 int error;
150
151 if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL)
152 return (EBADF);
153 error = dofileread(p, fp, uap->fd, uap->cbuf, uap->nbyte,
154 (off_t)-1, 0, retval);
155 frele(fp);
156 return(error);
157}
158
159/*
160 * Pread system call
161 */
162#ifndef _SYS_SYSPROTO_H_
163struct pread_args {
164 int fd;
165 void *buf;
166 size_t nbyte;
167#ifdef DOUBLE_ALIGN_PARAMS
168 int pad;
169#endif
170 off_t offset;
171};
172#endif
173int
174pread(p, uap, retval)
175 struct proc *p;
176 register struct pread_args *uap;
177 int *retval;
178{
179 register struct file *fp;
180 int error;
181
182 if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL)
183 return (EBADF);
184 if (fp->f_type != DTYPE_VNODE) {
185 error = ESPIPE;
186 } else {
187 error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte,
188 uap->offset, FOF_OFFSET, retval);
189 }
190 frele(fp);
55e303ae
A
191
192 if (!error)
193 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pread) | DBG_FUNC_NONE),
194 uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
195
9bccf70c
A
196 return(error);
197}
198
199/*
200 * Code common for read and pread
201 */
55e303ae 202__private_extern__ int
9bccf70c
A
203dofileread(p, fp, fd, buf, nbyte, offset, flags, retval)
204 struct proc *p;
205 struct file *fp;
206 int fd, flags;
207 void *buf;
208 size_t nbyte;
209 off_t offset;
210 int *retval;
1c79356b
A
211{
212 struct uio auio;
213 struct iovec aiov;
9bccf70c
A
214 long cnt, error = 0;
215#if KTRACE
216 struct iovec ktriov;
217 struct uio ktruio;
218 int didktr = 0;
219#endif
1c79356b 220
9bccf70c
A
221 aiov.iov_base = (caddr_t)buf;
222 aiov.iov_len = nbyte;
1c79356b
A
223 auio.uio_iov = &aiov;
224 auio.uio_iovcnt = 1;
9bccf70c
A
225 auio.uio_offset = offset;
226 if (nbyte > INT_MAX)
227 return (EINVAL);
228 auio.uio_resid = nbyte;
1c79356b 229 auio.uio_rw = UIO_READ;
9bccf70c
A
230 auio.uio_segflg = UIO_USERSPACE;
231 auio.uio_procp = p;
232#if KTRACE
233 /*
234 * if tracing, save a copy of iovec
235 */
236 if (KTRPOINT(p, KTR_GENIO)) {
237 ktriov = aiov;
238 ktruio = auio;
239 didktr = 1;
240 }
241#endif
242 cnt = nbyte;
243
244 if ((error = fo_read(fp, &auio, fp->f_cred, flags, p))) {
245 if (auio.uio_resid != cnt && (error == ERESTART ||
246 error == EINTR || error == EWOULDBLOCK))
247 error = 0;
248 }
249 cnt -= auio.uio_resid;
250#if KTRACE
251 if (didktr && error == 0) {
252 ktruio.uio_iov = &ktriov;
253 ktruio.uio_resid = cnt;
254 ktrgenio(p->p_tracep, fd, UIO_READ, &ktruio, error,
255 KERNEL_FUNNEL);
256 }
257#endif
258 *retval = cnt;
259 return (error);
1c79356b
A
260}
261
9bccf70c
A
262/*
263 * Scatter read system call.
264 */
265#ifndef _SYS_SYSPROTO_H_
1c79356b
A
266struct readv_args {
267 int fd;
268 struct iovec *iovp;
269 u_int iovcnt;
270};
9bccf70c
A
271#endif
272int
1c79356b
A
273readv(p, uap, retval)
274 struct proc *p;
275 register struct readv_args *uap;
276 int *retval;
277{
278 struct uio auio;
279 register struct iovec *iov;
280 int error;
281 struct iovec aiov[UIO_SMALLIOV];
282
283 if (uap->iovcnt > UIO_SMALLIOV) {
284 if (uap->iovcnt > UIO_MAXIOV)
285 return (EINVAL);
286 if ((iov = (struct iovec *)
287 kalloc(sizeof(struct iovec) * (uap->iovcnt))) == 0)
288 return (ENOMEM);
289 } else
290 iov = aiov;
291 auio.uio_iov = iov;
292 auio.uio_iovcnt = uap->iovcnt;
293 auio.uio_rw = UIO_READ;
294 error = copyin((caddr_t)uap->iovp, (caddr_t)iov,
295 uap->iovcnt * sizeof (struct iovec));
296 if (!error)
297 error = rwuio(p, uap->fd, &auio, UIO_READ, retval);
298 if (uap->iovcnt > UIO_SMALLIOV)
299 kfree(iov, sizeof(struct iovec)*uap->iovcnt);
300 return (error);
301}
302
303/*
304 * Write system call
305 */
9bccf70c 306#ifndef _SYS_SYSPROTO_H_
1c79356b
A
307struct write_args {
308 int fd;
309 char *cbuf;
310 u_int nbyte;
311};
9bccf70c
A
312#endif
313int
1c79356b
A
314write(p, uap, retval)
315 struct proc *p;
316 register struct write_args *uap;
317 int *retval;
318{
9bccf70c
A
319 register struct file *fp;
320 int error;
321
322 if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL)
323 return (EBADF);
324 error = dofilewrite(p, fp, uap->fd, uap->cbuf, uap->nbyte,
325 (off_t)-1, 0, retval);
326 frele(fp);
327 return(error);
328}
329
330/*
331 * Pwrite system call
332 */
333#ifndef _SYS_SYSPROTO_H_
334struct pwrite_args {
335 int fd;
336 const void *buf;
337 size_t nbyte;
338#ifdef DOUBLE_ALIGN_PARAMS
339 int pad;
340#endif
341 off_t offset;
342};
343#endif
344int
345pwrite(p, uap, retval)
346 struct proc *p;
347 register struct pwrite_args *uap;
348 int *retval;
349{
350 register struct file *fp;
351 int error;
352
353 if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL)
354 return (EBADF);
355 if (fp->f_type != DTYPE_VNODE) {
356 error = ESPIPE;
357 } else {
358 error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte,
359 uap->offset, FOF_OFFSET, retval);
360 }
361 frele(fp);
55e303ae
A
362
363 if (!error)
364 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pwrite) | DBG_FUNC_NONE),
365 uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
366
9bccf70c
A
367 return(error);
368}
369
55e303ae 370__private_extern__ int
9bccf70c
A
371dofilewrite(p, fp, fd, buf, nbyte, offset, flags, retval)
372 struct proc *p;
373 struct file *fp;
374 int fd, flags;
375 const void *buf;
376 size_t nbyte;
377 off_t offset;
378 int *retval;
379{
1c79356b
A
380 struct uio auio;
381 struct iovec aiov;
9bccf70c
A
382 long cnt, error = 0;
383#if KTRACE
384 struct iovec ktriov;
385 struct uio ktruio;
386 int didktr = 0;
387#endif
388
389 aiov.iov_base = (void *)(uintptr_t)buf;
390 aiov.iov_len = nbyte;
1c79356b 391 auio.uio_iov = &aiov;
9bccf70c
A
392 auio.uio_iovcnt = 1;
393 auio.uio_offset = offset;
394 if (nbyte > INT_MAX)
395 return (EINVAL);
396 auio.uio_resid = nbyte;
1c79356b 397 auio.uio_rw = UIO_WRITE;
9bccf70c
A
398 auio.uio_segflg = UIO_USERSPACE;
399 auio.uio_procp = p;
400#if KTRACE
401 /*
402 * if tracing, save a copy of iovec and uio
403 */
404 if (KTRPOINT(p, KTR_GENIO)) {
405 ktriov = aiov;
406 ktruio = auio;
407 didktr = 1;
408 }
409#endif
410 cnt = nbyte;
411 if (fp->f_type == DTYPE_VNODE)
412 bwillwrite();
413 if ((error = fo_write(fp, &auio, fp->f_cred, flags, p))) {
414 if (auio.uio_resid != cnt && (error == ERESTART ||
415 error == EINTR || error == EWOULDBLOCK))
416 error = 0;
55e303ae
A
417 /* The socket layer handles SIGPIPE */
418 if (error == EPIPE && fp->f_type != DTYPE_SOCKET)
419 psignal(p, SIGPIPE);
9bccf70c
A
420 }
421 cnt -= auio.uio_resid;
422#if KTRACE
423 if (didktr && error == 0) {
424 ktruio.uio_iov = &ktriov;
425 ktruio.uio_resid = cnt;
426 ktrgenio(p->p_tracep, fd, UIO_WRITE, &ktruio, error,
427 KERNEL_FUNNEL);
428 }
429#endif
430 *retval = cnt;
431 return (error);
1c79356b 432}
9bccf70c
A
433
434/*
435 * Gather write system call
436 */
437#ifndef _SYS_SYSPROTO_H_
1c79356b
A
438struct writev_args {
439 int fd;
440 struct iovec *iovp;
441 u_int iovcnt;
442};
9bccf70c
A
443#endif
444int
1c79356b
A
445writev(p, uap, retval)
446 struct proc *p;
447 register struct writev_args *uap;
448 int *retval;
449{
450 struct uio auio;
451 register struct iovec *iov;
452 int error;
453 struct iovec aiov[UIO_SMALLIOV];
454
455 if (uap->iovcnt > UIO_SMALLIOV) {
456 if (uap->iovcnt > UIO_MAXIOV)
457 return (EINVAL);
458 if ((iov = (struct iovec *)
459 kalloc(sizeof(struct iovec) * (uap->iovcnt))) == 0)
460 return (ENOMEM);
461 } else
462 iov = aiov;
463 auio.uio_iov = iov;
464 auio.uio_iovcnt = uap->iovcnt;
465 auio.uio_rw = UIO_WRITE;
466 error = copyin((caddr_t)uap->iovp, (caddr_t)iov,
467 uap->iovcnt * sizeof (struct iovec));
468 if (!error)
469 error = rwuio(p, uap->fd, &auio, UIO_WRITE, retval);
470 if (uap->iovcnt > UIO_SMALLIOV)
471 kfree(iov, sizeof(struct iovec)*uap->iovcnt);
472 return (error);
473}
474
9bccf70c 475int
1c79356b
A
476rwuio(p, fdes, uio, rw, retval)
477 struct proc *p;
478 int fdes;
479 register struct uio *uio;
480 enum uio_rw rw;
481 int *retval;
482{
483 struct file *fp;
484 register struct iovec *iov;
485 int i, count, flag, error;
9bccf70c
A
486#if KTRACE
487 struct iovec *ktriov;
488 struct uio ktruio;
489 int didktr = 0;
490 u_int iovlen;
491#endif
1c79356b
A
492
493 if (error = fdgetf(p, fdes, &fp))
494 return (error);
495
496 if ((fp->f_flag&(rw==UIO_READ ? FREAD : FWRITE)) == 0) {
497 return(EBADF);
498 }
499 uio->uio_resid = 0;
500 uio->uio_segflg = UIO_USERSPACE;
501 uio->uio_procp = p;
502 iov = uio->uio_iov;
503 for (i = 0; i < uio->uio_iovcnt; i++) {
504 if (iov->iov_len < 0) {
505 return(EINVAL);
506 }
507 uio->uio_resid += iov->iov_len;
508 if (uio->uio_resid < 0) {
509 return(EINVAL);
510 }
511 iov++;
512 }
513 count = uio->uio_resid;
9bccf70c
A
514#if KTRACE
515 /*
516 * if tracing, save a copy of iovec
517 */
518 if (KTRPOINT(p, KTR_GENIO)) {
519 iovlen = uio->uio_iovcnt * sizeof (struct iovec);
520 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
521 bcopy((caddr_t)uio->uio_iov, (caddr_t)ktriov, iovlen);
522 ktruio = *uio;
523 didktr = 1;
524 }
525#endif
526
1c79356b 527 if (rw == UIO_READ) {
9bccf70c
A
528 if (error = fo_read(fp, uio, fp->f_cred, 0, p))
529 if (uio->uio_resid != count && (error == ERESTART ||
530 error == EINTR || error == EWOULDBLOCK))
531 error = 0;
1c79356b 532 } else {
9bccf70c
A
533 if (fp->f_type == DTYPE_VNODE)
534 bwillwrite();
535 if (error = fo_write(fp, uio, fp->f_cred, 0, p)) {
1c79356b 536 if (uio->uio_resid != count && (error == ERESTART ||
9bccf70c 537 error == EINTR || error == EWOULDBLOCK))
1c79356b 538 error = 0;
9bccf70c
A
539 /* The socket layer handles SIGPIPE */
540 if (error == EPIPE && fp->f_type != DTYPE_SOCKET)
1c79356b
A
541 psignal(p, SIGPIPE);
542 }
543 }
9bccf70c 544
1c79356b 545 *retval = count - uio->uio_resid;
9bccf70c
A
546
547#if KTRACE
548 if (didktr) {
549 if (error == 0) {
550 ktruio.uio_iov = ktriov;
551 ktruio.uio_resid = *retval;
552 ktrgenio(p->p_tracep, fdes, rw, &ktruio, error,
553 KERNEL_FUNNEL);
554 }
555 FREE(ktriov, M_TEMP);
556 }
557#endif
558
1c79356b
A
559 return(error);
560}
561
562/*
563 * Ioctl system call
564 */
9bccf70c 565#ifndef _SYS_SYSPROTO_H_
1c79356b
A
566struct ioctl_args {
567 int fd;
568 u_long com;
569 caddr_t data;
570};
9bccf70c
A
571#endif
572int
1c79356b
A
573ioctl(p, uap, retval)
574 struct proc *p;
575 register struct ioctl_args *uap;
576 register_t *retval;
577{
578 struct file *fp;
579 register u_long com;
580 register int error;
581 register u_int size;
582 caddr_t data, memp;
583 int tmp;
584#define STK_PARAMS 128
585 char stkbuf[STK_PARAMS];
586
e5568f75
A
587 AUDIT_ARG(fd, uap->fd);
588 AUDIT_ARG(cmd, uap->com); /* XXX cmd is int, uap->com is long */
589 AUDIT_ARG(addr, uap->data);
1c79356b
A
590 if (error = fdgetf(p, uap->fd, &fp))
591 return (error);
592
e5568f75 593 AUDIT_ARG(file, p, fp);
1c79356b
A
594 if ((fp->f_flag & (FREAD | FWRITE)) == 0)
595 return (EBADF);
596
9bccf70c
A
597#if NETAT
598 /*
599 * ### LD 6/11/97 Hack Alert: this is to get AppleTalk to work
1c79356b
A
600 * while implementing an ATioctl system call
601 */
1c79356b
A
602 {
603 extern int appletalk_inited;
604
605 if (appletalk_inited && ((uap->com & 0x0000FFFF) == 0xff99)) {
606#ifdef APPLETALK_DEBUG
607 kprintf("ioctl: special AppleTalk \n");
608#endif
9bccf70c 609 error = fo_ioctl(fp, uap->com, uap->data, p);
1c79356b
A
610 return(error);
611 }
612 }
613
614#endif /* NETAT */
615
616
617 switch (com = uap->com) {
618 case FIONCLEX:
619 *fdflags(p, uap->fd) &= ~UF_EXCLOSE;
620 return (0);
621 case FIOCLEX:
622 *fdflags(p, uap->fd) |= UF_EXCLOSE;
623 return (0);
624 }
625
626 /*
627 * Interpret high order word to find amount of data to be
628 * copied to/from the user's address space.
629 */
630 size = IOCPARM_LEN(com);
631 if (size > IOCPARM_MAX)
632 return (ENOTTY);
633 memp = NULL;
634 if (size > sizeof (stkbuf)) {
635 if ((memp = (caddr_t)kalloc(size)) == 0)
636 return(ENOMEM);
637 data = memp;
638 } else
639 data = stkbuf;
640 if (com&IOC_IN) {
641 if (size) {
642 error = copyin(uap->data, data, (u_int)size);
643 if (error) {
644 if (memp)
645 kfree(memp, size);
646 return (error);
647 }
648 } else
649 *(caddr_t *)data = uap->data;
650 } else if ((com&IOC_OUT) && size)
651 /*
652 * Zero the buffer so the user always
653 * gets back something deterministic.
654 */
655 bzero(data, size);
656 else if (com&IOC_VOID)
657 *(caddr_t *)data = uap->data;
658
659 switch (com) {
660
661 case FIONBIO:
662 if (tmp = *(int *)data)
663 fp->f_flag |= FNONBLOCK;
664 else
665 fp->f_flag &= ~FNONBLOCK;
9bccf70c 666 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p);
1c79356b
A
667 break;
668
669 case FIOASYNC:
670 if (tmp = *(int *)data)
671 fp->f_flag |= FASYNC;
672 else
673 fp->f_flag &= ~FASYNC;
9bccf70c 674 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p);
1c79356b
A
675 break;
676
677 case FIOSETOWN:
678 tmp = *(int *)data;
679 if (fp->f_type == DTYPE_SOCKET) {
680 ((struct socket *)fp->f_data)->so_pgid = tmp;
681 error = 0;
682 break;
683 }
684 if (tmp <= 0) {
685 tmp = -tmp;
686 } else {
687 struct proc *p1 = pfind(tmp);
688 if (p1 == 0) {
689 error = ESRCH;
690 break;
691 }
692 tmp = p1->p_pgrp->pg_id;
693 }
9bccf70c 694 error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, p);
1c79356b
A
695 break;
696
697 case FIOGETOWN:
698 if (fp->f_type == DTYPE_SOCKET) {
699 error = 0;
700 *(int *)data = ((struct socket *)fp->f_data)->so_pgid;
701 break;
702 }
9bccf70c 703 error = fo_ioctl(fp, TIOCGPGRP, data, p);
1c79356b
A
704 *(int *)data = -*(int *)data;
705 break;
706
707 default:
9bccf70c 708 error = fo_ioctl(fp, com, data, p);
1c79356b
A
709 /*
710 * Copy any data to user, size was
711 * already set and checked above.
712 */
713 if (error == 0 && (com&IOC_OUT) && size)
714 error = copyout(data, uap->data, (u_int)size);
715 break;
716 }
717 if (memp)
718 kfree(memp, size);
719 return (error);
720}
721
1c79356b 722int selwait, nselcoll;
0b4e3aa0
A
723#define SEL_FIRSTPASS 1
724#define SEL_SECONDPASS 2
9bccf70c
A
725extern int selcontinue(int error);
726extern int selprocess(int error, int sel_pass);
727static int selscan(struct proc *p, struct _select * sel,
728 int nfd, register_t *retval, int sel_pass);
729static int selcount(struct proc *p, u_int32_t *ibits, u_int32_t *obits,
730 int nfd, int * count, int * nfcount);
731extern uint64_t tvtoabstime(struct timeval *tvp);
1c79356b
A
732
733/*
734 * Select system call.
735 */
9bccf70c 736#ifndef _SYS_SYSPROTO_H_
1c79356b
A
737struct select_args {
738 int nd;
739 u_int32_t *in;
740 u_int32_t *ou;
741 u_int32_t *ex;
742 struct timeval *tv;
743};
9bccf70c
A
744#endif
745int
1c79356b
A
746select(p, uap, retval)
747 register struct proc *p;
748 register struct select_args *uap;
749 register_t *retval;
750{
9bccf70c 751 int error = 0;
0b4e3aa0 752 u_int ni, nw, size;
1c79356b
A
753 thread_act_t th_act;
754 struct uthread *uth;
755 struct _select *sel;
756 int needzerofill = 1;
0b4e3aa0
A
757 int kfcount =0;
758 int nfcount = 0;
759 int count = 0;
1c79356b
A
760
761 th_act = current_act();
762 uth = get_bsdthread_info(th_act);
763 sel = &uth->uu_state.ss_select;
764 retval = (int *)get_bsduthreadrval(th_act);
765 *retval = 0;
766
0b4e3aa0 767 if (uap->nd < 0) {
1c79356b 768 return (EINVAL);
0b4e3aa0 769 }
1c79356b
A
770
771 if (uap->nd > p->p_fd->fd_nfiles)
772 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */
773
774 nw = howmany(uap->nd, NFDBITS);
775 ni = nw * sizeof(fd_mask);
776
777 /*
778 * if this is the first select by the thread
779 * allocate the space for bits.
780 */
781 if (sel->nbytes == 0) {
782 sel->nbytes = 3 * ni;
783 MALLOC(sel->ibits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK);
784 MALLOC(sel->obits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK);
785 bzero((caddr_t)sel->ibits, sel->nbytes);
786 bzero((caddr_t)sel->obits, sel->nbytes);
787 needzerofill = 0;
788 }
789
790 /*
791 * if the previously allocated space for the bits
792 * is smaller than what is requested. Reallocate.
793 */
794 if (sel->nbytes < (3 * ni)) {
795 sel->nbytes = (3 * ni);
796 FREE(sel->ibits, M_TEMP);
797 FREE(sel->obits, M_TEMP);
798 MALLOC(sel->ibits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK);
799 MALLOC(sel->obits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK);
800 bzero((caddr_t)sel->ibits, sel->nbytes);
801 bzero((caddr_t)sel->obits, sel->nbytes);
802 needzerofill = 0;
803 }
804
805 if (needzerofill) {
806 bzero((caddr_t)sel->ibits, sel->nbytes);
807 bzero((caddr_t)sel->obits, sel->nbytes);
808 }
809
810 /*
811 * get the bits from the user address space
812 */
813#define getbits(name, x) \
814 do { \
815 if (uap->name && (error = copyin((caddr_t)uap->name, \
816 (caddr_t)&sel->ibits[(x) * nw], ni))) \
817 goto continuation; \
818 } while (0)
819
820 getbits(in, 0);
821 getbits(ou, 1);
822 getbits(ex, 2);
823#undef getbits
824
825 if (uap->tv) {
9bccf70c
A
826 struct timeval atv;
827
828 error = copyin((caddr_t)uap->tv, (caddr_t)&atv, sizeof (atv));
1c79356b
A
829 if (error)
830 goto continuation;
9bccf70c 831 if (itimerfix(&atv)) {
1c79356b
A
832 error = EINVAL;
833 goto continuation;
834 }
0b4e3aa0 835
9bccf70c
A
836 clock_absolutetime_interval_to_deadline(
837 tvtoabstime(&atv), &sel->abstime);
838 }
839 else
840 sel->abstime = 0;
841
0b4e3aa0
A
842 sel->nfcount = 0;
843 if (error = selcount(p, sel->ibits, sel->obits, uap->nd, &count, &nfcount)) {
844 goto continuation;
845 }
846
847 sel->nfcount = nfcount;
848 sel->count = count;
849 size = SIZEOF_WAITQUEUE_SUB + (count * SIZEOF_WAITQUEUE_LINK);
850 if (sel->allocsize) {
851 if (uth->uu_wqsub == 0)
852 panic("select: wql memory smashed");
853 /* needed for the select now */
854 if (size > sel->allocsize) {
855 kfree(uth->uu_wqsub, sel->allocsize);
856 sel->allocsize = size;
857 uth->uu_wqsub = (wait_queue_sub_t)kalloc(sel->allocsize);
858 if (uth->uu_wqsub == (wait_queue_sub_t)NULL)
859 panic("failed to allocate memory for waitqueue\n");
860 sel->wql = (char *)uth->uu_wqsub + SIZEOF_WAITQUEUE_SUB;
861 }
862 } else {
863 sel->count = count;
864 sel->allocsize = size;
865 uth->uu_wqsub = (wait_queue_sub_t)kalloc(sel->allocsize);
866 if (uth->uu_wqsub == (wait_queue_sub_t)NULL)
867 panic("failed to allocate memory for waitqueue\n");
868 sel->wql = (char *)uth->uu_wqsub + SIZEOF_WAITQUEUE_SUB;
869 }
870 bzero(uth->uu_wqsub, size);
871 wait_queue_sub_init(uth->uu_wqsub, (SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST));
872
1c79356b 873continuation:
9bccf70c 874 return selprocess(error, SEL_FIRSTPASS);
0b4e3aa0
A
875}
876
877int
878selcontinue(int error)
879{
9bccf70c 880 return selprocess(error, SEL_SECONDPASS);
1c79356b
A
881}
882
883int
0b4e3aa0 884selprocess(error, sel_pass)
1c79356b 885{
9bccf70c 886 int ncoll;
1c79356b
A
887 u_int ni, nw;
888 thread_act_t th_act;
889 struct uthread *uth;
890 struct proc *p;
891 struct select_args *uap;
892 int *retval;
893 struct _select *sel;
0b4e3aa0 894 int unwind = 1;
9bccf70c 895 int prepost = 0;
0b4e3aa0
A
896 int somewakeup = 0;
897 int doretry = 0;
9bccf70c 898 wait_result_t wait_result;
1c79356b
A
899
900 p = current_proc();
901 th_act = current_act();
902 uap = (struct select_args *)get_bsduthreadarg(th_act);
903 retval = (int *)get_bsduthreadrval(th_act);
904 uth = get_bsdthread_info(th_act);
905 sel = &uth->uu_state.ss_select;
906
0b4e3aa0
A
907 /* if it is first pass wait queue is not setup yet */
908 if ((error != 0) && (sel_pass == SEL_FIRSTPASS))
909 unwind = 0;
910 if (sel->count == 0)
911 unwind = 0;
1c79356b 912retry:
0b4e3aa0 913 if (error != 0) {
1c79356b 914 goto done;
0b4e3aa0
A
915 }
916
1c79356b
A
917 ncoll = nselcoll;
918 p->p_flag |= P_SELECT;
0b4e3aa0
A
919 /* skip scans if the select is just for timeouts */
920 if (sel->count) {
921 if (sel_pass == SEL_FIRSTPASS)
922 wait_queue_sub_clearrefs(uth->uu_wqsub);
923
924 error = selscan(p, sel, uap->nd, retval, sel_pass);
925 if (error || *retval) {
926 goto done;
927 }
928 if (prepost) {
929 /* if the select of log, then we canwakeup and discover some one
930 * else already read the data; go toselct again if time permits
931 */
932 prepost = 0;
933 doretry = 1;
934 }
935 if (somewakeup) {
936 somewakeup = 0;
937 doretry = 1;
938 }
939 }
940
9bccf70c
A
941 if (uap->tv) {
942 uint64_t now;
943
944 clock_get_uptime(&now);
945 if (now >= sel->abstime)
946 goto done;
1c79356b 947 }
0b4e3aa0
A
948
949 if (doretry) {
950 /* cleanup obits and try again */
951 doretry = 0;
952 sel_pass = SEL_FIRSTPASS;
953 goto retry;
954 }
955
1c79356b
A
956 /*
957 * To effect a poll, the timeout argument should be
958 * non-nil, pointing to a zero-valued timeval structure.
959 */
9bccf70c 960 if (uap->tv && sel->abstime == 0) {
1c79356b
A
961 goto done;
962 }
0b4e3aa0
A
963
964 /* No spurious wakeups due to colls,no need to check for them */
965 if ((sel_pass == SEL_SECONDPASS) || ((p->p_flag & P_SELECT) == 0)) {
966 sel_pass = SEL_FIRSTPASS;
1c79356b
A
967 goto retry;
968 }
0b4e3aa0 969
1c79356b
A
970 p->p_flag &= ~P_SELECT;
971
0b4e3aa0
A
972 /* if the select is just for timeout skip check */
973 if (sel->count &&(sel_pass == SEL_SECONDPASS))
974 panic("selprocess: 2nd pass assertwaiting");
975
976 /* Wait Queue Subordinate has waitqueue as first element */
9bccf70c
A
977 wait_result = wait_queue_assert_wait((wait_queue_t)uth->uu_wqsub,
978 &selwait, THREAD_ABORTSAFE);
979 if (wait_result != THREAD_AWAKENED) {
980 /* there are no preposted events */
981 error = tsleep1(NULL, PSOCK | PCATCH,
982 "select", sel->abstime, selcontinue);
0b4e3aa0
A
983 } else {
984 prepost = 1;
985 error = 0;
986 }
987
988 sel_pass = SEL_SECONDPASS;
989 if (error == 0) {
990 if (!prepost)
991 somewakeup =1;
1c79356b 992 goto retry;
0b4e3aa0 993 }
1c79356b 994done:
0b4e3aa0
A
995 if (unwind)
996 wait_subqueue_unlink_all(uth->uu_wqsub);
1c79356b
A
997 p->p_flag &= ~P_SELECT;
998 /* select is not restarted after signals... */
999 if (error == ERESTART)
1000 error = EINTR;
1001 if (error == EWOULDBLOCK)
1002 error = 0;
1c79356b
A
1003 nw = howmany(uap->nd, NFDBITS);
1004 ni = nw * sizeof(fd_mask);
1005
1006#define putbits(name, x) \
1007 do { \
1008 if (uap->name && (error2 = copyout((caddr_t)&sel->obits[(x) * nw], \
1009 (caddr_t)uap->name, ni))) \
1010 error = error2; \
1011 } while (0)
1012
1013 if (error == 0) {
1014 int error2;
1015
1016 putbits(in, 0);
1017 putbits(ou, 1);
1018 putbits(ex, 2);
1019#undef putbits
1020 }
1c79356b 1021 return(error);
1c79356b
A
1022}
1023
1024static int
0b4e3aa0 1025selscan(p, sel, nfd, retval, sel_pass)
1c79356b 1026 struct proc *p;
0b4e3aa0 1027 struct _select *sel;
1c79356b
A
1028 int nfd;
1029 register_t *retval;
0b4e3aa0 1030 int sel_pass;
1c79356b
A
1031{
1032 register struct filedesc *fdp = p->p_fd;
1033 register int msk, i, j, fd;
1034 register u_int32_t bits;
1035 struct file *fp;
1036 int n = 0;
0b4e3aa0 1037 int nc = 0;
1c79356b
A
1038 static int flag[3] = { FREAD, FWRITE, 0 };
1039 u_int32_t *iptr, *optr;
1040 u_int nw;
0b4e3aa0
A
1041 u_int32_t *ibits, *obits;
1042 char * wql;
1043 int nfunnel = 0;
1044 int count, nfcount;
1045 char * wql_ptr;
55e303ae 1046 struct vnode *vp;
1c79356b
A
1047
1048 /*
1049 * Problems when reboot; due to MacOSX signal probs
1050 * in Beaker1C ; verify that the p->p_fd is valid
1051 */
1052 if (fdp == NULL) {
1053 *retval=0;
1054 return(EIO);
1055 }
1056
0b4e3aa0
A
1057 ibits = sel->ibits;
1058 obits = sel->obits;
1059 wql = sel->wql;
1060
1061 count = sel->count;
1062 nfcount = sel->nfcount;
1063
1064 if (nfcount > count)
1065 panic("selcount count<nfcount");
1066
1c79356b
A
1067 nw = howmany(nfd, NFDBITS);
1068
0b4e3aa0
A
1069 nc = 0;
1070 if ( nfcount < count) {
1071 /* some or all in kernel funnel */
1072 for (msk = 0; msk < 3; msk++) {
1073 iptr = (u_int32_t *)&ibits[msk * nw];
1074 optr = (u_int32_t *)&obits[msk * nw];
1075 for (i = 0; i < nfd; i += NFDBITS) {
1076 bits = iptr[i/NFDBITS];
1077 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1078 bits &= ~(1 << j);
1079 fp = fdp->fd_ofiles[fd];
1080 if (fp == NULL ||
1081 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1082 return(EBADF);
1083 }
1084 if (sel_pass == SEL_SECONDPASS)
1085 wql_ptr = (char *)0;
1086 else
1087 wql_ptr = (wql+ nc * SIZEOF_WAITQUEUE_LINK);
55e303ae
A
1088 /*
1089 * Merlot: need to remove the bogus f_data check
1090 * from the following "if" statement. It's there
1091 * because of various problems stemming from
1092 * races due to the split-funnels and lack of real
1093 * referencing on sockets...
1094 */
1095 if (fp->f_ops && (fp->f_type != DTYPE_SOCKET)
1096 && (fp->f_data != (caddr_t)-1)
1097 && !(fp->f_type == DTYPE_VNODE
1098 && (vp = (struct vnode *)fp->f_data)
1099 && vp->v_type == VFIFO)
9bccf70c 1100 && fo_select(fp, flag[msk], wql_ptr, p)) {
0b4e3aa0
A
1101 optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
1102 n++;
1103 }
1104 nc++;
1c79356b
A
1105 }
1106 }
1107 }
1108 }
0b4e3aa0
A
1109
1110 if (nfcount) {
1111 /* socket file descriptors for scan */
1112 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
1113
1114 nc = 0;
1115 for (msk = 0; msk < 3; msk++) {
1116 iptr = (u_int32_t *)&ibits[msk * nw];
1117 optr = (u_int32_t *)&obits[msk * nw];
1118 for (i = 0; i < nfd; i += NFDBITS) {
1119 bits = iptr[i/NFDBITS];
1120 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1121 bits &= ~(1 << j);
1122 fp = fdp->fd_ofiles[fd];
1123 if (fp == NULL ||
1124 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
9bccf70c 1125 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
0b4e3aa0
A
1126 return(EBADF);
1127 }
1128 if (sel_pass == SEL_SECONDPASS)
1129 wql_ptr = (char *)0;
1130 else
1131 wql_ptr = (wql+ nc * SIZEOF_WAITQUEUE_LINK);
55e303ae
A
1132 if (fp->f_ops
1133 && (fp->f_type == DTYPE_SOCKET
1134 || (fp->f_type == DTYPE_VNODE
1135 && (vp = (struct vnode *)fp->f_data)
1136 && vp != (struct vnode *)-1
1137 && vp->v_type == VFIFO))
1138 && fo_select(fp, flag[msk], wql_ptr, p)) {
0b4e3aa0
A
1139 optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
1140 n++;
1141 }
1142 nc++;
1143 }
1144 }
1145 }
1146 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
1147 }
1148
1c79356b
A
1149 *retval = n;
1150 return (0);
1151}
1152
1153/*ARGSUSED*/
9bccf70c 1154int
1c79356b
A
1155seltrue(dev, flag, p)
1156 dev_t dev;
1157 int flag;
1158 struct proc *p;
1159{
1160
1161 return (1);
1162}
1163
0b4e3aa0
A
1164static int
1165selcount(p, ibits, obits, nfd, count, nfcount)
1166 struct proc *p;
1167 u_int32_t *ibits, *obits;
1168 int nfd;
1169 int *count;
1170 int *nfcount;
1171{
1172 register struct filedesc *fdp = p->p_fd;
1173 register int msk, i, j, fd;
1174 register u_int32_t bits;
1175 struct file *fp;
1176 int n = 0;
1177 int nc = 0;
1178 int nfc = 0;
1179 static int flag[3] = { FREAD, FWRITE, 0 };
1180 u_int32_t *iptr, *fptr, *fbits;
1181 u_int nw;
55e303ae 1182 struct vnode *vp;
0b4e3aa0
A
1183
1184 /*
1185 * Problems when reboot; due to MacOSX signal probs
1186 * in Beaker1C ; verify that the p->p_fd is valid
1187 */
1188 if (fdp == NULL) {
1189 *count=0;
1190 *nfcount=0;
1191 return(EIO);
1192 }
1193
1194 nw = howmany(nfd, NFDBITS);
1195
1196
1197 for (msk = 0; msk < 3; msk++) {
1198 iptr = (u_int32_t *)&ibits[msk * nw];
1199 for (i = 0; i < nfd; i += NFDBITS) {
1200 bits = iptr[i/NFDBITS];
1201 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1202 bits &= ~(1 << j);
1203 fp = fdp->fd_ofiles[fd];
1204 if (fp == NULL ||
1205 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1206 *count=0;
1207 *nfcount=0;
1208 return(EBADF);
1209 }
55e303ae
A
1210 if (fp->f_type == DTYPE_SOCKET ||
1211 (fp->f_type == DTYPE_VNODE
1212 && (vp = (struct vnode *)fp->f_data)
1213 && vp->v_type == VFIFO))
0b4e3aa0
A
1214 nfc++;
1215 n++;
1216 }
1217 }
1218 }
1219 *count = n;
1220 *nfcount = nfc;
1221 return (0);
1222}
1223
1c79356b
A
1224/*
1225 * Record a select request.
1226 */
1227void
0b4e3aa0 1228selrecord(selector, sip, p_wql)
1c79356b
A
1229 struct proc *selector;
1230 struct selinfo *sip;
0b4e3aa0 1231 void * p_wql;
1c79356b 1232{
0b4e3aa0
A
1233 thread_act_t cur_act = current_act();
1234 struct uthread * ut = get_bsdthread_info(cur_act);
1c79356b 1235
0b4e3aa0
A
1236 /* need to look at collisions */
1237
1238 if ((p_wql == (void *)0) && ((sip->si_flags & SI_INITED) == 0)) {
1c79356b
A
1239 return;
1240 }
0b4e3aa0
A
1241
1242 /*do not record if this is second pass of select */
1243 if((p_wql == (void *)0)) {
1244 return;
1c79356b
A
1245 }
1246
0b4e3aa0 1247 if ((sip->si_flags & SI_INITED) == 0) {
55e303ae 1248 wait_queue_init(&sip->si_wait_queue, SYNC_POLICY_FIFO);
0b4e3aa0
A
1249 sip->si_flags |= SI_INITED;
1250 sip->si_flags &= ~SI_CLEAR;
1251 }
1252
1253 if (sip->si_flags & SI_RECORDED) {
1254 sip->si_flags |= SI_COLL;
1255 } else
1256 sip->si_flags &= ~SI_COLL;
1257
1258 sip->si_flags |= SI_RECORDED;
55e303ae
A
1259 if (!wait_queue_member(&sip->si_wait_queue, ut->uu_wqsub))
1260 wait_queue_link_noalloc(&sip->si_wait_queue, ut->uu_wqsub, (wait_queue_link_t)p_wql);
0b4e3aa0 1261
1c79356b
A
1262 return;
1263}
1264
1265void
1266selwakeup(sip)
1267 register struct selinfo *sip;
1268{
1c79356b 1269
0b4e3aa0 1270 if ((sip->si_flags & SI_INITED) == 0) {
1c79356b 1271 return;
0b4e3aa0 1272 }
1c79356b
A
1273
1274 if (sip->si_flags & SI_COLL) {
1275 nselcoll++;
1276 sip->si_flags &= ~SI_COLL;
0b4e3aa0
A
1277#if 0
1278 /* will not support */
1279 //wakeup((caddr_t)&selwait);
1280#endif
1c79356b 1281 }
1c79356b 1282
0b4e3aa0 1283 if (sip->si_flags & SI_RECORDED) {
55e303ae 1284 wait_queue_wakeup_all(&sip->si_wait_queue, &selwait, THREAD_AWAKENED);
0b4e3aa0 1285 sip->si_flags &= ~SI_RECORDED;
1c79356b 1286 }
1c79356b 1287
1c79356b
A
1288}
1289
1290void
1291selthreadclear(sip)
1292 register struct selinfo *sip;
1293{
1c79356b 1294
0b4e3aa0
A
1295 if ((sip->si_flags & SI_INITED) == 0) {
1296 return;
1297 }
1298 if (sip->si_flags & SI_RECORDED) {
1299 selwakeup(sip);
1300 sip->si_flags &= ~(SI_RECORDED | SI_COLL);
1c79356b 1301 }
0b4e3aa0 1302 sip->si_flags |= SI_CLEAR;
55e303ae 1303 wait_queue_unlinkall_nofree(&sip->si_wait_queue);
1c79356b
A
1304}
1305
1306
1307extern struct eventqelt *evprocdeque(struct proc *p, struct eventqelt *eqp);
1308
1309/*
1310 * called upon socket close. deque and free all events for
1311 * the socket
1312 */
9bccf70c 1313void
1c79356b
A
1314evsofree(struct socket *sp)
1315{
1316 struct eventqelt *eqp, *next;
1317
1318 if (sp == NULL) return;
1319
1320 for (eqp = sp->so_evlist.tqh_first; eqp != NULL; eqp = next) {
1321 next = eqp->ee_slist.tqe_next;
1322 evprocdeque(eqp->ee_proc, eqp); // remove from proc q if there
1323 TAILQ_REMOVE(&sp->so_evlist, eqp, ee_slist); // remove from socket q
1324 FREE(eqp, M_TEMP);
1325 }
1326}
1327
1328
1329#define DBG_EVENT 0x10
1330
1331#define DBG_POST 0x10
1332#define DBG_WATCH 0x11
1333#define DBG_WAIT 0x12
1334#define DBG_MOD 0x13
1335#define DBG_EWAKEUP 0x14
1336#define DBG_ENQUEUE 0x15
1337#define DBG_DEQUEUE 0x16
1338
1339#define DBG_MISC_POST MISCDBG_CODE(DBG_EVENT,DBG_POST)
1340#define DBG_MISC_WATCH MISCDBG_CODE(DBG_EVENT,DBG_WATCH)
1341#define DBG_MISC_WAIT MISCDBG_CODE(DBG_EVENT,DBG_WAIT)
1342#define DBG_MISC_MOD MISCDBG_CODE(DBG_EVENT,DBG_MOD)
1343#define DBG_MISC_EWAKEUP MISCDBG_CODE(DBG_EVENT,DBG_EWAKEUP)
1344#define DBG_MISC_ENQUEUE MISCDBG_CODE(DBG_EVENT,DBG_ENQUEUE)
1345#define DBG_MISC_DEQUEUE MISCDBG_CODE(DBG_EVENT,DBG_DEQUEUE)
1346
1347
1348/*
1349 * enque this event if it's not already queued. wakeup
1350 the proc if we do queue this event to it.
1351 */
9bccf70c 1352void
1c79356b
A
1353evprocenque(struct eventqelt *eqp)
1354{
1355 struct proc *p;
1356
1357 assert(eqp);
1358 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_START, eqp, eqp->ee_flags, eqp->ee_eventmask,0,0);
1359 if (eqp->ee_flags & EV_QUEUED) {
1360 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0);
1361 return;
1362 }
1363 eqp->ee_flags |= EV_QUEUED;
1364 eqp->ee_eventmask = 0; // disarm
1365 p = eqp->ee_proc;
1366 TAILQ_INSERT_TAIL(&p->p_evlist, eqp, ee_plist);
1367 KERNEL_DEBUG(DBG_MISC_EWAKEUP,0,0,0,eqp,0);
1368 wakeup(&p->p_evlist);
1369 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0);
1370}
1371
1372/*
1373 * given either a sockbuf or a socket run down the
1374 * event list and queue ready events found
1375 */
9bccf70c 1376void
1c79356b
A
1377postevent(struct socket *sp, struct sockbuf *sb, int event)
1378{
1379 int mask;
1380 struct eventqelt *evq;
1381 register struct tcpcb *tp;
1382
1383 if (sb) sp = sb->sb_so;
1384 if (!sp || sp->so_evlist.tqh_first == NULL) return;
1385
1386 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, event,0,0,0,0);
1387
1388 for (evq = sp->so_evlist.tqh_first;
1389 evq != NULL; evq = evq->ee_slist.tqe_next) {
1390
1391 mask = 0;
1392
1393 /* ready for reading:
1394 - byte cnt >= receive low water mark
1395 - read-half of conn closed
1396 - conn pending for listening sock
1397 - socket error pending
1398
1399 ready for writing
1400 - byte cnt avail >= send low water mark
1401 - write half of conn closed
1402 - socket error pending
1403 - non-blocking conn completed successfully
1404
1405 exception pending
1406 - out of band data
1407 - sock at out of band mark
1408
1409 */
1410 switch (event & EV_DMASK) {
1411
1412 case EV_RWBYTES:
1413 case EV_OOB:
1414 case EV_RWBYTES|EV_OOB:
1415 if (event & EV_OOB) {
1416 if ((evq->ee_eventmask & EV_EX)) {
1417 if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK))) {
1418 mask |= EV_EX|EV_OOB;
1419 }
1420 }
1421 }
1422 if (event & EV_RWBYTES) {
1423 if ((evq->ee_eventmask & EV_RE) && soreadable(sp)) {
1424 if ((sp->so_type == SOCK_STREAM) && (sp->so_error == ECONNREFUSED) ||
1425 (sp->so_error == ECONNRESET)) {
1426 if ((sp->so_pcb == 0) ||
1427 !(tp = sototcpcb(sp)) ||
1428 (tp->t_state == TCPS_CLOSED)) {
1429 mask |= EV_RE|EV_RESET;
1430 break;
1431 }
1432 }
1433 if (sp->so_state & SS_CANTRCVMORE) {
1434 mask |= EV_RE|EV_FIN;
1435 evq->ee_req.er_rcnt = sp->so_rcv.sb_cc;
1436 break;
1437 }
1438 mask |= EV_RE;
1439 evq->ee_req.er_rcnt = sp->so_rcv.sb_cc;
1440 }
1441
1442 if ((evq->ee_eventmask & EV_WR) && sowriteable(sp)) {
1443 if ((sp->so_type == SOCK_STREAM) &&(sp->so_error == ECONNREFUSED) ||
1444 (sp->so_error == ECONNRESET)) {
1445 if ((sp->so_pcb == 0) ||
1446 !(tp = sototcpcb(sp)) ||
1447 (tp->t_state == TCPS_CLOSED)) {
1448 mask |= EV_WR|EV_RESET;
1449 break;
1450 }
1451 }
1452 mask |= EV_WR;
1453 evq->ee_req.er_wcnt = sbspace(&sp->so_snd);
1454 }
1455 }
1456 break;
1457
1458 case EV_RCONN:
1459 if ((evq->ee_eventmask & EV_RE)) {
1460 evq->ee_req.er_rcnt = sp->so_qlen + 1; // incl this one
1461 mask |= EV_RE|EV_RCONN;
1462 }
1463 break;
1464
1465 case EV_WCONN:
1466 if ((evq->ee_eventmask & EV_WR)) {
1467 mask |= EV_WR|EV_WCONN;
1468 }
1469 break;
1470
1471 case EV_RCLOSED:
1472 if ((evq->ee_eventmask & EV_RE)) {
1473 mask |= EV_RE|EV_RCLOSED;
1474 }
1475 break;
1476
1477 case EV_WCLOSED:
1478 if ((evq->ee_eventmask & EV_WR)) {
1479 mask |= EV_WR|EV_WCLOSED;
1480 }
1481 break;
1482
1483 case EV_FIN:
1484 if (evq->ee_eventmask & EV_RE) {
1485 mask |= EV_RE|EV_FIN;
1486 }
1487 break;
1488
1489 case EV_RESET:
1490 case EV_TIMEOUT:
1491 if (evq->ee_eventmask & EV_RE) {
1492 mask |= EV_RE | event;
1493 }
1494 if (evq->ee_eventmask & EV_WR) {
1495 mask |= EV_WR | event;
1496 }
1497 break;
1498
1499 default:
1500 return;
1501 } /* switch */
1502
1503 if (mask) {
1504 evq->ee_req.er_eventbits |= mask;
1505 KERNEL_DEBUG(DBG_MISC_POST, evq, evq->ee_req.er_eventbits, mask,0,0);
1506 evprocenque(evq);
1507 }
1508 }
1509 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, 0,0,0,0,0);
1510}
1511
1512/*
1513 * remove and return the first event (eqp=NULL) or a specific
1514 * event, or return NULL if no events found
1515 */
1516struct eventqelt *
1517evprocdeque(struct proc *p, struct eventqelt *eqp)
1518{
1519
1520 KERNEL_DEBUG(DBG_MISC_DEQUEUE|DBG_FUNC_START,p,eqp,0,0,0);
1521
1522 if (eqp && ((eqp->ee_flags & EV_QUEUED) == NULL)) {
1523 KERNEL_DEBUG(DBG_MISC_DEQUEUE|DBG_FUNC_END,0,0,0,0,0);
1524 return(NULL);
1525 }
1526 if (p->p_evlist.tqh_first == NULL) {
1527 KERNEL_DEBUG(DBG_MISC_DEQUEUE|DBG_FUNC_END,0,0,0,0,0);
1528 return(NULL);
1529 }
1530 if (eqp == NULL) { // remove first
1531 eqp = p->p_evlist.tqh_first;
1532 }
1533 TAILQ_REMOVE(&p->p_evlist, eqp, ee_plist);
1534 eqp->ee_flags &= ~EV_QUEUED;
1535 KERNEL_DEBUG(DBG_MISC_DEQUEUE|DBG_FUNC_END,eqp,0,0,0,0);
1536 return(eqp);
1537}
1538
1539struct evwatch_args {
1540 struct eventreq *u_req;
1541 int u_eventmask;
1542};
1543
1544
1545/*
1546 * watchevent system call. user passes us an event to watch
1547 * for. we malloc an event object, initialize it, and queue
1548 * it to the open socket. when the event occurs, postevent()
1549 * will enque it back to our proc where we can retrieve it
1550 * via waitevent().
1551 *
1552 * should this prevent duplicate events on same socket?
1553 */
1554int
1555watchevent(p, uap, retval)
1556 struct proc *p;
1557 struct evwatch_args *uap;
1558 register_t *retval;
1559{
1560 struct eventqelt *eqp = (struct eventqelt *)0;
1561 struct eventqelt *np;
1562 struct eventreq *erp;
1563 struct file *fp;
1564 struct socket *sp;
1565 int error;
1566
1567 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_START, 0,0,0,0,0);
1568
1569 // get a qelt and fill with users req
1570 MALLOC(eqp, struct eventqelt *, sizeof(struct eventqelt), M_TEMP, M_WAITOK);
1571 if (!eqp) panic("can't MALLOC eqp");
1572 erp = &eqp->ee_req;
1573 // get users request pkt
1574 if (error = copyin((caddr_t)uap->u_req, (caddr_t)erp,
1575 sizeof(struct eventreq))) {
1576 FREE(eqp, M_TEMP);
1577 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0);
1578 return(error);
1579 }
1580 KERNEL_DEBUG(DBG_MISC_WATCH, erp->er_handle,uap->u_eventmask,eqp,0,0);
1581 // validate, freeing qelt if errors
1582 error = 0;
1583 if (erp->er_type != EV_FD) {
1584 error = EINVAL;
1585 } else if (erp->er_handle < 0) {
1586 error = EBADF;
1587 } else if (erp->er_handle > p->p_fd->fd_nfiles) {
1588 error = EBADF;
1589 } else if ((fp = *fdfile(p, erp->er_handle)) == NULL) {
1590 error = EBADF;
1591 } else if (fp->f_type != DTYPE_SOCKET) {
1592 error = EINVAL;
1593 }
1594 if (error) {
1595 FREE(eqp,M_TEMP);
1596 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0);
1597 return(error);
1598 }
1599
1600 erp->er_rcnt = erp->er_wcnt = erp->er_eventbits = 0;
1601 eqp->ee_proc = p;
1602 eqp->ee_eventmask = uap->u_eventmask & EV_MASK;
1603 eqp->ee_flags = 0;
1604
1605 sp = (struct socket *)fp->f_data;
1606 assert(sp != NULL);
1607
1608 // only allow one watch per file per proc
1609 for (np = sp->so_evlist.tqh_first; np != NULL; np = np->ee_slist.tqe_next) {
1610 if (np->ee_proc == p) {
1611 FREE(eqp,M_TEMP);
1612 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0);
1613 return(EINVAL);
1614 }
1615 }
1616
1617 TAILQ_INSERT_TAIL(&sp->so_evlist, eqp, ee_slist);
1618 postevent(sp, 0, EV_RWBYTES); // catch existing events
1619 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, 0,0,0,0,0);
1620 return(0);
1621}
1622
1623struct evwait_args {
1624 struct eventreq *u_req;
1625 struct timeval *tv;
1626};
1627
1628/*
1629 * waitevent system call.
1630 * grabs the next waiting event for this proc and returns
1631 * it. if no events, user can request to sleep with timeout
1632 * or poll mode (tv=NULL);
1633 */
1634int
1635waitevent(p, uap, retval)
9bccf70c
A
1636 struct proc *p;
1637 struct evwait_args *uap;
1638 register_t *retval;
1c79356b 1639{
9bccf70c
A
1640 int error = 0;
1641 struct eventqelt *eqp;
1642 uint64_t abstime, interval;
1c79356b
A
1643
1644 if (uap->tv) {
9bccf70c
A
1645 struct timeval atv;
1646
1647 error = copyin((caddr_t)uap->tv, (caddr_t)&atv, sizeof (atv));
1c79356b 1648 if (error)
9bccf70c 1649 return(error);
1c79356b
A
1650 if (itimerfix(&atv)) {
1651 error = EINVAL;
1652 return(error);
1653 }
1c79356b 1654
9bccf70c
A
1655 interval = tvtoabstime(&atv);
1656 }
1657 else
1658 abstime = interval = 0;
1659
1660 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_START, 0,0,0,0,0);
1c79356b
A
1661
1662retry:
9bccf70c
A
1663 if ((eqp = evprocdeque(p,NULL)) != NULL) {
1664 error = copyout((caddr_t)&eqp->ee_req,
1665 (caddr_t)uap->u_req, sizeof(struct eventreq));
1666 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,
1667 eqp->ee_req.er_handle,eqp->ee_req.er_eventbits,eqp,0);
1c79356b 1668
9bccf70c
A
1669 return (error);
1670 }
1671 else {
1672 if (uap->tv && interval == 0) {
1673 *retval = 1; // poll failed
1674 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,0,0,0,0);
1675
1676 return (error);
1677 }
1678
1679 if (interval != 0)
55e303ae 1680 clock_absolutetime_interval_to_deadline(interval, &abstime);
9bccf70c
A
1681
1682 KERNEL_DEBUG(DBG_MISC_WAIT, 1,&p->p_evlist,0,0,0);
1683 error = tsleep1(&p->p_evlist, PSOCK | PCATCH,
1684 "waitevent", abstime, (int (*)(int))0);
1685 KERNEL_DEBUG(DBG_MISC_WAIT, 2,&p->p_evlist,0,0,0);
1686 if (error == 0)
1687 goto retry;
1688 if (error == ERESTART)
1689 error = EINTR;
1690 if (error == EWOULDBLOCK) {
1691 *retval = 1;
1692 error = 0;
1693 }
1694 }
1695
1696 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, 0,0,0,0,0);
1697
1698 return (error);
1c79356b
A
1699}
1700
1701struct modwatch_args {
1702 struct eventreq *u_req;
1703 int u_eventmask;
1704};
1705
1706/*
1707 * modwatch system call. user passes in event to modify.
1708 * if we find it we reset the event bits and que/deque event
1709 * it needed.
1710 */
1711int
1712modwatch(p, uap, retval)
1713 struct proc *p;
1714 struct modwatch_args *uap;
1715 register_t *retval;
1716{
1717 struct eventreq er;
1718 struct eventreq *erp = &er;
1719 struct eventqelt *evq;
1720 int error;
1721 struct file *fp;
1722 struct socket *sp;
1723 int flag;
1724
1725 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_START, 0,0,0,0,0);
1726
1727 // get users request pkt
1728 if (error = copyin((caddr_t)uap->u_req, (caddr_t)erp,
1729 sizeof(struct eventreq))) return(error);
1730
1731 if (erp->er_type != EV_FD) return(EINVAL);
1732 if (erp->er_handle < 0) return(EBADF);
1733 if (erp->er_handle > p->p_fd->fd_nfiles) return(EBADF);
1734 if ((fp = *fdfile(p, erp->er_handle)) == NULL)
1735 return(EBADF);
1736 if (fp->f_type != DTYPE_SOCKET) return(EINVAL); // for now must be sock
1737 sp = (struct socket *)fp->f_data;
1c79356b 1738
55e303ae
A
1739 /* soo_close sets f_data to 0 before switching funnel */
1740 if (sp == (struct socket *)0)
1741 return(EBADF);
1c79356b
A
1742
1743 // locate event if possible
1744 for (evq = sp->so_evlist.tqh_first;
1745 evq != NULL; evq = evq->ee_slist.tqe_next) {
1746 if (evq->ee_proc == p) break;
1747 }
1748
1749 if (evq == NULL) {
1750 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, EINVAL,0,0,0,0);
1751 return(EINVAL);
1752 }
1753 KERNEL_DEBUG(DBG_MISC_MOD, erp->er_handle,uap->u_eventmask,evq,0,0);
1754
1755 if (uap->u_eventmask == EV_RM) {
1756 evprocdeque(p, evq);
1757 TAILQ_REMOVE(&sp->so_evlist, evq, ee_slist);
1758 FREE(evq, M_TEMP);
1759 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, 0,0,0,0,0);
1760 return(0);
1761 }
1762
1763 switch (uap->u_eventmask & EV_MASK) {
1764
1765 case 0:
1766 flag = 0;
1767 break;
1768
1769 case EV_RE:
1770 case EV_WR:
1771 case EV_RE|EV_WR:
1772 flag = EV_RWBYTES;
1773 break;
1774
1775 case EV_EX:
1776 flag = EV_OOB;
1777 break;
1778
1779 case EV_EX|EV_RE:
1780 case EV_EX|EV_WR:
1781 case EV_EX|EV_RE|EV_WR:
1782 flag = EV_OOB|EV_RWBYTES;
1783 break;
1784
1785 default:
1786 return(EINVAL);
1787 }
1788
1789 evq->ee_eventmask = uap->u_eventmask & EV_MASK;
1790 evprocdeque(p, evq);
1791 evq->ee_req.er_eventbits = 0;
1792 postevent(sp, 0, flag);
1793 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, evq->ee_req.er_handle,evq->ee_eventmask,sp,flag,0);
1794 return(0);
1795}