X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/e7c99d92bf4e4d1189c904195ed317951f9a35ad..a3d08fcd5120d2aa8303b6349ca8b14e3f284af3:/bsd/kern/sys_generic.c diff --git a/bsd/kern/sys_generic.c b/bsd/kern/sys_generic.c index cf5878004..15fdf111f 100644 --- a/bsd/kern/sys_generic.c +++ b/bsd/kern/sys_generic.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -72,10 +72,6 @@ #include #include -#if KTRACE -#include -#endif - #include #include #include @@ -88,6 +84,9 @@ #include #include #include +#include + +#include #include #include @@ -105,37 +104,172 @@ #include #include #include +/* for wait queue based select */ +#include +#if KTRACE +#include +#endif +#include + + +__private_extern__ struct file* +holdfp(fdp, fd, flag) + struct filedesc* fdp; + int fd, flag; +{ + struct file* fp; + + if (((u_int)fd) >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd]) == NULL || + (fp->f_flag & flag) == 0) { + return (NULL); + } + if (fref(fp) == -1) + return (NULL); + return (fp); +} /* * Read system call. */ +#ifndef _SYS_SYSPROTO_H_ struct read_args { int fd; char *cbuf; u_int nbyte; }; -/* ARGSUSED */ +#endif +int read(p, uap, retval) struct proc *p; register struct read_args *uap; register_t *retval; +{ + register struct file *fp; + int error; + + if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL) + return (EBADF); + error = dofileread(p, fp, uap->fd, uap->cbuf, uap->nbyte, + (off_t)-1, 0, retval); + frele(fp); + return(error); +} + +/* + * Pread system call + */ +#ifndef _SYS_SYSPROTO_H_ +struct pread_args { + int fd; + void *buf; + size_t nbyte; +#ifdef DOUBLE_ALIGN_PARAMS + int pad; +#endif + off_t offset; +}; +#endif +int +pread(p, uap, retval) + struct proc *p; + register struct pread_args *uap; + int *retval; +{ + register struct file *fp; + int error; + + if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL) + return (EBADF); + if (fp->f_type != DTYPE_VNODE) { + error = ESPIPE; + } else { + error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, + uap->offset, FOF_OFFSET, retval); + } + frele(fp); + + if (!error) + KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pread) | DBG_FUNC_NONE), + uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0); + + return(error); +} + +/* + * Code common for read and pread + */ +__private_extern__ int +dofileread(p, fp, fd, buf, nbyte, offset, flags, retval) + struct proc *p; + struct file *fp; + int fd, flags; + void *buf; + size_t nbyte; + off_t offset; + int *retval; { struct uio auio; struct iovec aiov; + long cnt, error = 0; +#if KTRACE + struct iovec ktriov; + struct uio ktruio; + int didktr = 0; +#endif - aiov.iov_base = (caddr_t)uap->cbuf; - aiov.iov_len = uap->nbyte; + aiov.iov_base = (caddr_t)buf; + aiov.iov_len = nbyte; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; + auio.uio_offset = offset; + if (nbyte > INT_MAX) + return (EINVAL); + auio.uio_resid = nbyte; auio.uio_rw = UIO_READ; - return (rwuio(p, uap->fd, &auio, UIO_READ, retval)); + auio.uio_segflg = UIO_USERSPACE; + auio.uio_procp = p; +#if KTRACE + /* + * if tracing, save a copy of iovec + */ + if (KTRPOINT(p, KTR_GENIO)) { + ktriov = aiov; + ktruio = auio; + didktr = 1; + } +#endif + cnt = nbyte; + + if ((error = fo_read(fp, &auio, fp->f_cred, flags, p))) { + if (auio.uio_resid != cnt && (error == ERESTART || + error == EINTR || error == EWOULDBLOCK)) + error = 0; + } + cnt -= auio.uio_resid; +#if KTRACE + if (didktr && error == 0) { + ktruio.uio_iov = &ktriov; + ktruio.uio_resid = cnt; + ktrgenio(p->p_tracep, fd, UIO_READ, &ktruio, error, + KERNEL_FUNNEL); + } +#endif + *retval = cnt; + return (error); } +/* + * Scatter read system call. + */ +#ifndef _SYS_SYSPROTO_H_ struct readv_args { int fd; struct iovec *iovp; u_int iovcnt; }; +#endif +int readv(p, uap, retval) struct proc *p; register struct readv_args *uap; @@ -169,32 +303,145 @@ readv(p, uap, retval) /* * Write system call */ +#ifndef _SYS_SYSPROTO_H_ struct write_args { int fd; char *cbuf; u_int nbyte; }; +#endif +int write(p, uap, retval) struct proc *p; register struct write_args *uap; int *retval; { + register struct file *fp; + int error; + + if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL) + return (EBADF); + error = dofilewrite(p, fp, uap->fd, uap->cbuf, uap->nbyte, + (off_t)-1, 0, retval); + frele(fp); + return(error); +} + +/* + * Pwrite system call + */ +#ifndef _SYS_SYSPROTO_H_ +struct pwrite_args { + int fd; + const void *buf; + size_t nbyte; +#ifdef DOUBLE_ALIGN_PARAMS + int pad; +#endif + off_t offset; +}; +#endif +int +pwrite(p, uap, retval) + struct proc *p; + register struct pwrite_args *uap; + int *retval; +{ + register struct file *fp; + int error; + + if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL) + return (EBADF); + if (fp->f_type != DTYPE_VNODE) { + error = ESPIPE; + } else { + error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, + uap->offset, FOF_OFFSET, retval); + } + frele(fp); + + if (!error) + KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pwrite) | DBG_FUNC_NONE), + uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0); + + return(error); +} + +__private_extern__ int +dofilewrite(p, fp, fd, buf, nbyte, offset, flags, retval) + struct proc *p; + struct file *fp; + int fd, flags; + const void *buf; + size_t nbyte; + off_t offset; + int *retval; +{ struct uio auio; struct iovec aiov; - - aiov.iov_base = uap->cbuf; - aiov.iov_len = uap->nbyte; + long cnt, error = 0; +#if KTRACE + struct iovec ktriov; + struct uio ktruio; + int didktr = 0; +#endif + + aiov.iov_base = (void *)(uintptr_t)buf; + aiov.iov_len = nbyte; auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; + auio.uio_iovcnt = 1; + auio.uio_offset = offset; + if (nbyte > INT_MAX) + return (EINVAL); + auio.uio_resid = nbyte; auio.uio_rw = UIO_WRITE; - return (rwuio(p, uap->fd, &auio, UIO_WRITE, retval)); + auio.uio_segflg = UIO_USERSPACE; + auio.uio_procp = p; +#if KTRACE + /* + * if tracing, save a copy of iovec and uio + */ + if (KTRPOINT(p, KTR_GENIO)) { + ktriov = aiov; + ktruio = auio; + didktr = 1; + } +#endif + cnt = nbyte; + if (fp->f_type == DTYPE_VNODE) + bwillwrite(); + if ((error = fo_write(fp, &auio, fp->f_cred, flags, p))) { + if (auio.uio_resid != cnt && (error == ERESTART || + error == EINTR || error == EWOULDBLOCK)) + error = 0; + /* The socket layer handles SIGPIPE */ + if (error == EPIPE && fp->f_type != DTYPE_SOCKET) + psignal(p, SIGPIPE); + } + cnt -= auio.uio_resid; +#if KTRACE + if (didktr && error == 0) { + ktruio.uio_iov = &ktriov; + ktruio.uio_resid = cnt; + ktrgenio(p->p_tracep, fd, UIO_WRITE, &ktruio, error, + KERNEL_FUNNEL); + } +#endif + *retval = cnt; + return (error); } - + +/* + * Gather write system call + */ +#ifndef _SYS_SYSPROTO_H_ struct writev_args { int fd; struct iovec *iovp; u_int iovcnt; }; +#endif +int writev(p, uap, retval) struct proc *p; register struct writev_args *uap; @@ -225,6 +472,7 @@ writev(p, uap, retval) return (error); } +int rwuio(p, fdes, uio, rw, retval) struct proc *p; int fdes; @@ -235,6 +483,12 @@ rwuio(p, fdes, uio, rw, retval) struct file *fp; register struct iovec *iov; int i, count, flag, error; +#if KTRACE + struct iovec *ktriov; + struct uio ktruio; + int didktr = 0; + u_int iovlen; +#endif if (error = fdgetf(p, fdes, &fp)) return (error); @@ -257,33 +511,65 @@ rwuio(p, fdes, uio, rw, retval) iov++; } count = uio->uio_resid; +#if KTRACE + /* + * if tracing, save a copy of iovec + */ + if (KTRPOINT(p, KTR_GENIO)) { + iovlen = uio->uio_iovcnt * sizeof (struct iovec); + MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); + bcopy((caddr_t)uio->uio_iov, (caddr_t)ktriov, iovlen); + ktruio = *uio; + didktr = 1; + } +#endif + if (rw == UIO_READ) { - if (error = (*fp->f_ops->fo_read)(fp, uio, fp->f_cred)) - if (uio->uio_resid != count && (error == ERESTART || - error == EINTR || error == EWOULDBLOCK)) - error = 0; + if (error = fo_read(fp, uio, fp->f_cred, 0, p)) + if (uio->uio_resid != count && (error == ERESTART || + error == EINTR || error == EWOULDBLOCK)) + error = 0; } else { - if (error = (*fp->f_ops->fo_write)(fp, uio, fp->f_cred)) { + if (fp->f_type == DTYPE_VNODE) + bwillwrite(); + if (error = fo_write(fp, uio, fp->f_cred, 0, p)) { if (uio->uio_resid != count && (error == ERESTART || - error == EINTR || error == EWOULDBLOCK)) + error == EINTR || error == EWOULDBLOCK)) error = 0; - if (error == EPIPE) + /* The socket layer handles SIGPIPE */ + if (error == EPIPE && fp->f_type != DTYPE_SOCKET) psignal(p, SIGPIPE); } } + *retval = count - uio->uio_resid; + +#if KTRACE + if (didktr) { + if (error == 0) { + ktruio.uio_iov = ktriov; + ktruio.uio_resid = *retval; + ktrgenio(p->p_tracep, fdes, rw, &ktruio, error, + KERNEL_FUNNEL); + } + FREE(ktriov, M_TEMP); + } +#endif + return(error); } /* * Ioctl system call */ +#ifndef _SYS_SYSPROTO_H_ struct ioctl_args { int fd; u_long com; caddr_t data; }; -/* ARGSUSED */ +#endif +int ioctl(p, uap, retval) struct proc *p; register struct ioctl_args *uap; @@ -298,16 +584,21 @@ ioctl(p, uap, retval) #define STK_PARAMS 128 char stkbuf[STK_PARAMS]; + AUDIT_ARG(fd, uap->fd); + AUDIT_ARG(cmd, uap->com); /* XXX cmd is int, uap->com is long */ + AUDIT_ARG(addr, uap->data); if (error = fdgetf(p, uap->fd, &fp)) return (error); + AUDIT_ARG(file, p, fp); if ((fp->f_flag & (FREAD | FWRITE)) == 0) return (EBADF); - /*### LD 6/11/97 Hack Alert: this is to get AppleTalk to work +#if NETAT + /* + * ### LD 6/11/97 Hack Alert: this is to get AppleTalk to work * while implementing an ATioctl system call */ -#if NETAT { extern int appletalk_inited; @@ -315,7 +606,7 @@ ioctl(p, uap, retval) #ifdef APPLETALK_DEBUG kprintf("ioctl: special AppleTalk \n"); #endif - error = (*fp->f_ops->fo_ioctl)(fp, uap->com, uap->data, p); + error = fo_ioctl(fp, uap->com, uap->data, p); return(error); } } @@ -372,7 +663,7 @@ ioctl(p, uap, retval) fp->f_flag |= FNONBLOCK; else fp->f_flag &= ~FNONBLOCK; - error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); + error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p); break; case FIOASYNC: @@ -380,7 +671,7 @@ ioctl(p, uap, retval) fp->f_flag |= FASYNC; else fp->f_flag &= ~FASYNC; - error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); + error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p); break; case FIOSETOWN: @@ -400,8 +691,7 @@ ioctl(p, uap, retval) } tmp = p1->p_pgrp->pg_id; } - error = (*fp->f_ops->fo_ioctl) - (fp, (int)TIOCSPGRP, (caddr_t)&tmp, p); + error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, p); break; case FIOGETOWN: @@ -410,12 +700,12 @@ ioctl(p, uap, retval) *(int *)data = ((struct socket *)fp->f_data)->so_pgid; break; } - error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p); + error = fo_ioctl(fp, TIOCGPGRP, data, p); *(int *)data = -*(int *)data; break; default: - error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); + error = fo_ioctl(fp, com, data, p); /* * Copy any data to user, size was * already set and checked above. @@ -429,12 +719,21 @@ ioctl(p, uap, retval) return (error); } - int selwait, nselcoll; +#define SEL_FIRSTPASS 1 +#define SEL_SECONDPASS 2 +extern int selcontinue(int error); +extern int selprocess(int error, int sel_pass); +static int selscan(struct proc *p, struct _select * sel, + int nfd, register_t *retval, int sel_pass); +static int selcount(struct proc *p, u_int32_t *ibits, u_int32_t *obits, + int nfd, int * count, int * nfcount); +extern uint64_t tvtoabstime(struct timeval *tvp); /* * Select system call. */ +#ifndef _SYS_SYSPROTO_H_ struct select_args { int nd; u_int32_t *in; @@ -442,22 +741,22 @@ struct select_args { u_int32_t *ex; struct timeval *tv; }; - -extern int selcontinue(int error); -static int selscan( struct proc *p, u_int32_t *ibits, u_int32_t *obits, - int nfd, register_t *retval); - +#endif +int select(p, uap, retval) register struct proc *p; register struct select_args *uap; register_t *retval; { - int s, error = 0, timo; - u_int ni, nw; + int error = 0; + u_int ni, nw, size; thread_act_t th_act; struct uthread *uth; struct _select *sel; int needzerofill = 1; + int kfcount =0; + int nfcount = 0; + int count = 0; th_act = current_act(); uth = get_bsdthread_info(th_act); @@ -465,8 +764,9 @@ select(p, uap, retval) retval = (int *)get_bsduthreadrval(th_act); *retval = 0; - if (uap->nd < 0) + if (uap->nd < 0) { return (EINVAL); + } if (uap->nd > p->p_fd->fd_nfiles) uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */ @@ -523,29 +823,67 @@ select(p, uap, retval) #undef getbits if (uap->tv) { - error = copyin((caddr_t)uap->tv, (caddr_t)&sel->atv, - sizeof (sel->atv)); + struct timeval atv; + + error = copyin((caddr_t)uap->tv, (caddr_t)&atv, sizeof (atv)); if (error) goto continuation; - if (itimerfix(&sel->atv)) { + if (itimerfix(&atv)) { error = EINVAL; goto continuation; } - s = splhigh(); - timeradd(&sel->atv, &time, &sel->atv); - timo = hzto(&sel->atv); - splx(s); - } else - timo = 0; - sel->poll = timo; + + clock_absolutetime_interval_to_deadline( + tvtoabstime(&atv), &sel->abstime); + } + else + sel->abstime = 0; + + sel->nfcount = 0; + if (error = selcount(p, sel->ibits, sel->obits, uap->nd, &count, &nfcount)) { + goto continuation; + } + + sel->nfcount = nfcount; + sel->count = count; + size = SIZEOF_WAITQUEUE_SUB + (count * SIZEOF_WAITQUEUE_LINK); + if (sel->allocsize) { + if (uth->uu_wqsub == 0) + panic("select: wql memory smashed"); + /* needed for the select now */ + if (size > sel->allocsize) { + kfree(uth->uu_wqsub, sel->allocsize); + sel->allocsize = size; + uth->uu_wqsub = (wait_queue_sub_t)kalloc(sel->allocsize); + if (uth->uu_wqsub == (wait_queue_sub_t)NULL) + panic("failed to allocate memory for waitqueue\n"); + sel->wql = (char *)uth->uu_wqsub + SIZEOF_WAITQUEUE_SUB; + } + } else { + sel->count = count; + sel->allocsize = size; + uth->uu_wqsub = (wait_queue_sub_t)kalloc(sel->allocsize); + if (uth->uu_wqsub == (wait_queue_sub_t)NULL) + panic("failed to allocate memory for waitqueue\n"); + sel->wql = (char *)uth->uu_wqsub + SIZEOF_WAITQUEUE_SUB; + } + bzero(uth->uu_wqsub, size); + wait_queue_sub_init(uth->uu_wqsub, (SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST)); + continuation: - selcontinue(error); + return selprocess(error, SEL_FIRSTPASS); +} + +int +selcontinue(int error) +{ + return selprocess(error, SEL_SECONDPASS); } int -selcontinue(error) +selprocess(error, sel_pass) { - int s, ncoll, timo; + int ncoll; u_int ni, nw; thread_act_t th_act; struct uthread *uth; @@ -553,6 +891,11 @@ selcontinue(error) struct select_args *uap; int *retval; struct _select *sel; + int unwind = 1; + int prepost = 0; + int somewakeup = 0; + int doretry = 0; + wait_result_t wait_result; p = current_proc(); th_act = current_act(); @@ -561,54 +904,102 @@ selcontinue(error) uth = get_bsdthread_info(th_act); sel = &uth->uu_state.ss_select; + /* if it is first pass wait queue is not setup yet */ + if ((error != 0) && (sel_pass == SEL_FIRSTPASS)) + unwind = 0; + if (sel->count == 0) + unwind = 0; retry: - if (error != 0) + if (error != 0) { goto done; + } + ncoll = nselcoll; p->p_flag |= P_SELECT; - error = selscan(p, sel->ibits, sel->obits, uap->nd, retval); - if (error || *retval) - goto done; - s = splhigh(); - /* this should be timercmp(&time, &atv, >=) */ - if (uap->tv && (time.tv_sec > sel->atv.tv_sec || - time.tv_sec == sel->atv.tv_sec && time.tv_usec >= sel->atv.tv_usec)) { - splx(s); - goto done; + /* skip scans if the select is just for timeouts */ + if (sel->count) { + if (sel_pass == SEL_FIRSTPASS) + wait_queue_sub_clearrefs(uth->uu_wqsub); + + error = selscan(p, sel, uap->nd, retval, sel_pass); + if (error || *retval) { + goto done; + } + if (prepost) { + /* if the select of log, then we canwakeup and discover some one + * else already read the data; go toselct again if time permits + */ + prepost = 0; + doretry = 1; + } + if (somewakeup) { + somewakeup = 0; + doretry = 1; + } + } + + if (uap->tv) { + uint64_t now; + + clock_get_uptime(&now); + if (now >= sel->abstime) + goto done; } + + if (doretry) { + /* cleanup obits and try again */ + doretry = 0; + sel_pass = SEL_FIRSTPASS; + goto retry; + } + /* * To effect a poll, the timeout argument should be * non-nil, pointing to a zero-valued timeval structure. */ - timo = sel->poll; - - if (uap->tv && (timo == 0)) { - splx(s); + if (uap->tv && sel->abstime == 0) { goto done; } - if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { - splx(s); + + /* No spurious wakeups due to colls,no need to check for them */ + if ((sel_pass == SEL_SECONDPASS) || ((p->p_flag & P_SELECT) == 0)) { + sel_pass = SEL_FIRSTPASS; goto retry; } + p->p_flag &= ~P_SELECT; -#if 1 /* Use Continuations */ - error = tsleep0((caddr_t)&selwait, PSOCK | PCATCH, "select", timo, selcontinue); - /* NOTREACHED */ -#else - error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); -#endif - splx(s); - if (error == 0) + /* if the select is just for timeout skip check */ + if (sel->count &&(sel_pass == SEL_SECONDPASS)) + panic("selprocess: 2nd pass assertwaiting"); + + /* Wait Queue Subordinate has waitqueue as first element */ + wait_result = wait_queue_assert_wait((wait_queue_t)uth->uu_wqsub, + &selwait, THREAD_ABORTSAFE); + if (wait_result != THREAD_AWAKENED) { + /* there are no preposted events */ + error = tsleep1(NULL, PSOCK | PCATCH, + "select", sel->abstime, selcontinue); + } else { + prepost = 1; + error = 0; + } + + sel_pass = SEL_SECONDPASS; + if (error == 0) { + if (!prepost) + somewakeup =1; goto retry; + } done: + if (unwind) + wait_subqueue_unlink_all(uth->uu_wqsub); p->p_flag &= ~P_SELECT; /* select is not restarted after signals... */ if (error == ERESTART) error = EINTR; if (error == EWOULDBLOCK) error = 0; - nw = howmany(uap->nd, NFDBITS); ni = nw * sizeof(fd_mask); @@ -627,29 +1018,32 @@ done: putbits(ex, 2); #undef putbits } - -#if defined (__i386__) return(error); -#else - unix_syscall_return(error); -#endif } static int -selscan(p, ibits, obits, nfd, retval) +selscan(p, sel, nfd, retval, sel_pass) struct proc *p; - u_int32_t *ibits, *obits; + struct _select *sel; int nfd; register_t *retval; + int sel_pass; { register struct filedesc *fdp = p->p_fd; register int msk, i, j, fd; register u_int32_t bits; struct file *fp; int n = 0; + int nc = 0; static int flag[3] = { FREAD, FWRITE, 0 }; u_int32_t *iptr, *optr; u_int nw; + u_int32_t *ibits, *obits; + char * wql; + int nfunnel = 0; + int count, nfcount; + char * wql_ptr; + struct vnode *vp; /* * Problems when reboot; due to MacOSX signal probs @@ -660,31 +1054,104 @@ selscan(p, ibits, obits, nfd, retval) return(EIO); } + ibits = sel->ibits; + obits = sel->obits; + wql = sel->wql; + + count = sel->count; + nfcount = sel->nfcount; + + if (nfcount > count) + panic("selcount countfd_ofiles[fd]; - if (fp == NULL || - (fdp->fd_ofileflags[fd] & UF_RESERVED)) - return (EBADF); - if (fp->f_ops && (*fp->f_ops->fo_select)(fp, flag[msk], p)) { - optr[fd/NFDBITS] |= (1 << (fd % NFDBITS)); - n++; + nc = 0; + if ( nfcount < count) { + /* some or all in kernel funnel */ + for (msk = 0; msk < 3; msk++) { + iptr = (u_int32_t *)&ibits[msk * nw]; + optr = (u_int32_t *)&obits[msk * nw]; + for (i = 0; i < nfd; i += NFDBITS) { + bits = iptr[i/NFDBITS]; + while ((j = ffs(bits)) && (fd = i + --j) < nfd) { + bits &= ~(1 << j); + fp = fdp->fd_ofiles[fd]; + if (fp == NULL || + (fdp->fd_ofileflags[fd] & UF_RESERVED)) { + return(EBADF); + } + if (sel_pass == SEL_SECONDPASS) + wql_ptr = (char *)0; + else + wql_ptr = (wql+ nc * SIZEOF_WAITQUEUE_LINK); + /* + * Merlot: need to remove the bogus f_data check + * from the following "if" statement. It's there + * because of various problems stemming from + * races due to the split-funnels and lack of real + * referencing on sockets... + */ + if (fp->f_ops && (fp->f_type != DTYPE_SOCKET) + && (fp->f_data != (caddr_t)-1) + && !(fp->f_type == DTYPE_VNODE + && (vp = (struct vnode *)fp->f_data) + && vp->v_type == VFIFO) + && fo_select(fp, flag[msk], wql_ptr, p)) { + optr[fd/NFDBITS] |= (1 << (fd % NFDBITS)); + n++; + } + nc++; + } + } + } + } + + if (nfcount) { + /* socket file descriptors for scan */ + thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + + nc = 0; + for (msk = 0; msk < 3; msk++) { + iptr = (u_int32_t *)&ibits[msk * nw]; + optr = (u_int32_t *)&obits[msk * nw]; + for (i = 0; i < nfd; i += NFDBITS) { + bits = iptr[i/NFDBITS]; + while ((j = ffs(bits)) && (fd = i + --j) < nfd) { + bits &= ~(1 << j); + fp = fdp->fd_ofiles[fd]; + if (fp == NULL || + (fdp->fd_ofileflags[fd] & UF_RESERVED)) { + thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + return(EBADF); + } + if (sel_pass == SEL_SECONDPASS) + wql_ptr = (char *)0; + else + wql_ptr = (wql+ nc * SIZEOF_WAITQUEUE_LINK); + if (fp->f_ops + && (fp->f_type == DTYPE_SOCKET + || (fp->f_type == DTYPE_VNODE + && (vp = (struct vnode *)fp->f_data) + && vp != (struct vnode *)-1 + && vp->v_type == VFIFO)) + && fo_select(fp, flag[msk], wql_ptr, p)) { + optr[fd/NFDBITS] |= (1 << (fd % NFDBITS)); + n++; + } + nc++; } } } + thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); } + *retval = n; return (0); } /*ARGSUSED*/ +int seltrue(dev, flag, p) dev_t dev; int flag; @@ -694,38 +1161,104 @@ seltrue(dev, flag, p) return (1); } +static int +selcount(p, ibits, obits, nfd, count, nfcount) + struct proc *p; + u_int32_t *ibits, *obits; + int nfd; + int *count; + int *nfcount; +{ + register struct filedesc *fdp = p->p_fd; + register int msk, i, j, fd; + register u_int32_t bits; + struct file *fp; + int n = 0; + int nc = 0; + int nfc = 0; + static int flag[3] = { FREAD, FWRITE, 0 }; + u_int32_t *iptr, *fptr, *fbits; + u_int nw; + struct vnode *vp; + + /* + * Problems when reboot; due to MacOSX signal probs + * in Beaker1C ; verify that the p->p_fd is valid + */ + if (fdp == NULL) { + *count=0; + *nfcount=0; + return(EIO); + } + + nw = howmany(nfd, NFDBITS); + + + for (msk = 0; msk < 3; msk++) { + iptr = (u_int32_t *)&ibits[msk * nw]; + for (i = 0; i < nfd; i += NFDBITS) { + bits = iptr[i/NFDBITS]; + while ((j = ffs(bits)) && (fd = i + --j) < nfd) { + bits &= ~(1 << j); + fp = fdp->fd_ofiles[fd]; + if (fp == NULL || + (fdp->fd_ofileflags[fd] & UF_RESERVED)) { + *count=0; + *nfcount=0; + return(EBADF); + } + if (fp->f_type == DTYPE_SOCKET || + (fp->f_type == DTYPE_VNODE + && (vp = (struct vnode *)fp->f_data) + && vp->v_type == VFIFO)) + nfc++; + n++; + } + } + } + *count = n; + *nfcount = nfc; + return (0); +} + /* * Record a select request. */ void -selrecord(selector, sip) +selrecord(selector, sip, p_wql) struct proc *selector; struct selinfo *sip; + void * p_wql; { - int oldpri = splhigh(); - thread_t my_thread = current_thread(); - thread_t selthread; + thread_act_t cur_act = current_act(); + struct uthread * ut = get_bsdthread_info(cur_act); - selthread = sip->si_thread; - - if (selthread == my_thread) { - splx(oldpri); + /* need to look at collisions */ + + if ((p_wql == (void *)0) && ((sip->si_flags & SI_INITED) == 0)) { return; } - - if (selthread && is_thread_active(selthread) && - get_thread_waitevent(selthread) == (caddr_t)&selwait) { - sip->si_flags |= SI_COLL; - splx(oldpri); - } else { - sip->si_thread = my_thread; - splx(oldpri); - act_reference(current_act()); - if (selthread) { - act_deallocate(getact_thread(selthread)); - } + + /*do not record if this is second pass of select */ + if((p_wql == (void *)0)) { + return; } + if ((sip->si_flags & SI_INITED) == 0) { + wait_queue_init(&sip->si_wait_queue, SYNC_POLICY_FIFO); + sip->si_flags |= SI_INITED; + sip->si_flags &= ~SI_CLEAR; + } + + if (sip->si_flags & SI_RECORDED) { + sip->si_flags |= SI_COLL; + } else + sip->si_flags &= ~SI_COLL; + + sip->si_flags |= SI_RECORDED; + if (!wait_queue_member(&sip->si_wait_queue, ut->uu_wqsub)) + wait_queue_link_noalloc(&sip->si_wait_queue, ut->uu_wqsub, (wait_queue_link_t)p_wql); + return; } @@ -733,50 +1266,41 @@ void selwakeup(sip) register struct selinfo *sip; { - register thread_t the_thread = (thread_t)sip->si_thread; - int oldpri; - struct proc *p; - thread_act_t th_act; - if (the_thread == 0) + if ((sip->si_flags & SI_INITED) == 0) { return; + } if (sip->si_flags & SI_COLL) { nselcoll++; sip->si_flags &= ~SI_COLL; - wakeup((caddr_t)&selwait); +#if 0 + /* will not support */ + //wakeup((caddr_t)&selwait); +#endif } - - oldpri = splhigh(); - th_act = (thread_act_t)getact_thread(the_thread); - - if (is_thread_active(the_thread)) { - if (get_thread_waitevent(the_thread) == &selwait) - clear_wait(the_thread, THREAD_AWAKENED); - if (p = current_proc()) - p->p_flag &= ~P_SELECT; + if (sip->si_flags & SI_RECORDED) { + wait_queue_wakeup_all(&sip->si_wait_queue, &selwait, THREAD_AWAKENED); + sip->si_flags &= ~SI_RECORDED; } - - /* th_act = (thread_act_t)getact_thread(the_thread); */ - act_deallocate(th_act); - - sip->si_thread = 0; - - splx(oldpri); } void selthreadclear(sip) register struct selinfo *sip; { - thread_act_t th_act; - if (sip->si_thread) { - th_act = (thread_act_t)getact_thread(sip->si_thread); - act_deallocate(th_act); + if ((sip->si_flags & SI_INITED) == 0) { + return; + } + if (sip->si_flags & SI_RECORDED) { + selwakeup(sip); + sip->si_flags &= ~(SI_RECORDED | SI_COLL); } + sip->si_flags |= SI_CLEAR; + wait_queue_unlinkall_nofree(&sip->si_wait_queue); } @@ -786,6 +1310,7 @@ extern struct eventqelt *evprocdeque(struct proc *p, struct eventqelt *eqp); * called upon socket close. deque and free all events for * the socket */ +void evsofree(struct socket *sp) { struct eventqelt *eqp, *next; @@ -824,6 +1349,7 @@ evsofree(struct socket *sp) * enque this event if it's not already queued. wakeup the proc if we do queue this event to it. */ +void evprocenque(struct eventqelt *eqp) { struct proc *p; @@ -847,6 +1373,7 @@ evprocenque(struct eventqelt *eqp) * given either a sockbuf or a socket run down the * event list and queue ready events found */ +void postevent(struct socket *sp, struct sockbuf *sb, int event) { int mask; @@ -1106,66 +1633,69 @@ struct evwait_args { */ int waitevent(p, uap, retval) - struct proc *p; - struct evwait_args *uap; - register_t *retval; + struct proc *p; + struct evwait_args *uap; + register_t *retval; { - int error = 0; - struct eventqelt *eqp; - int timo; - struct timeval atv; - int s; + int error = 0; + struct eventqelt *eqp; + uint64_t abstime, interval; if (uap->tv) { - error = copyin((caddr_t)uap->tv, (caddr_t)&atv, - sizeof (atv)); + struct timeval atv; + + error = copyin((caddr_t)uap->tv, (caddr_t)&atv, sizeof (atv)); if (error) - return(error); + return(error); if (itimerfix(&atv)) { error = EINVAL; return(error); } - s = splhigh(); - timeradd(&atv, &time, &atv); - timo = hzto(&atv); - splx(s); - } else - timo = 0; - KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_START, 0,0,0,0,0); + interval = tvtoabstime(&atv); + } + else + abstime = interval = 0; + + KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_START, 0,0,0,0,0); retry: - s = splhigh(); - if ((eqp = evprocdeque(p,NULL)) != NULL) { - splx(s); - error = copyout((caddr_t)&eqp->ee_req, (caddr_t)uap->u_req, - sizeof(struct eventreq)); - KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error, - eqp->ee_req.er_handle,eqp->ee_req.er_eventbits,eqp,0); - return(error); - } else { - if (uap->tv && (timo == 0)) { - splx(s); - *retval = 1; // poll failed - KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,0,0,0,0); - return(error); - } + if ((eqp = evprocdeque(p,NULL)) != NULL) { + error = copyout((caddr_t)&eqp->ee_req, + (caddr_t)uap->u_req, sizeof(struct eventreq)); + KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error, + eqp->ee_req.er_handle,eqp->ee_req.er_eventbits,eqp,0); - KERNEL_DEBUG(DBG_MISC_WAIT, 1,&p->p_evlist,0,0,0); - error = tsleep(&p->p_evlist, PSOCK | PCATCH, "waitevent", timo); - KERNEL_DEBUG(DBG_MISC_WAIT, 2,&p->p_evlist,0,0,0); - splx(s); - if (error == 0) - goto retry; - if (error == ERESTART) - error = EINTR; - if (error == EWOULDBLOCK) { - *retval = 1; - error = 0; - } - } - KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, 0,0,0,0,0); - return(error); + return (error); + } + else { + if (uap->tv && interval == 0) { + *retval = 1; // poll failed + KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,0,0,0,0); + + return (error); + } + + if (interval != 0) + clock_absolutetime_interval_to_deadline(interval, &abstime); + + KERNEL_DEBUG(DBG_MISC_WAIT, 1,&p->p_evlist,0,0,0); + error = tsleep1(&p->p_evlist, PSOCK | PCATCH, + "waitevent", abstime, (int (*)(int))0); + KERNEL_DEBUG(DBG_MISC_WAIT, 2,&p->p_evlist,0,0,0); + if (error == 0) + goto retry; + if (error == ERESTART) + error = EINTR; + if (error == EWOULDBLOCK) { + *retval = 1; + error = 0; + } + } + + KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, 0,0,0,0,0); + + return (error); } struct modwatch_args { @@ -1205,8 +1735,10 @@ modwatch(p, uap, retval) return(EBADF); if (fp->f_type != DTYPE_SOCKET) return(EINVAL); // for now must be sock sp = (struct socket *)fp->f_data; - assert(sp != NULL); + /* soo_close sets f_data to 0 before switching funnel */ + if (sp == (struct socket *)0) + return(EBADF); // locate event if possible for (evq = sp->so_evlist.tqh_first;