__private_extern__ int preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_vnode);
__private_extern__ void donefileread(struct proc *p, struct fileproc *fp_ret, int fd);
+
+/* Conflict wait queue for when selects collide (opaque type) */
+struct wait_queue select_conflict_queue;
+
+/*
+ * Init routine called from bsd_init.c
+ */
+void select_wait_queue_init(void);
+void
+select_wait_queue_init(void)
+{
+ wait_queue_init(&select_conflict_queue, SYNC_POLICY_FIFO);
+}
+
+
#if NETAT
extern int appletalk_inited;
#endif /* NETAT */
error == EINTR || error == EWOULDBLOCK))
error = 0;
/* The socket layer handles SIGPIPE */
- if (error == EPIPE && fp->f_type != DTYPE_SOCKET) {
+ if (error == EPIPE && fp->f_type != DTYPE_SOCKET &&
+ (fp->f_fglob->fg_lflags & FG_NOSIGPIPE) == 0) {
/* XXX Raise the signal on the thread? */
psignal(vfs_context_proc(ctx), SIGPIPE);
}
error == EINTR || error == EWOULDBLOCK))
error = 0;
/* The socket layer handles SIGPIPE */
- if (error == EPIPE && fp->f_type != DTYPE_SOCKET)
+ if (error == EPIPE && fp->f_type != DTYPE_SOCKET &&
+ (fp->f_fglob->fg_lflags & FG_NOSIGPIPE) == 0)
psignal(p, SIGPIPE);
}
*retval = count - uio_resid(uio);
out:
- if ( (error == 0) )
+ if (error == 0)
fp_drop_written(p, fdes, fp);
else
fp_drop(p, fdes, fp, 0);
extern int selprocess(int error, int sel_pass);
static int selscan(struct proc *p, struct _select * sel,
int nfd, int32_t *retval, int sel_pass, wait_queue_sub_t wqsub);
-static int selcount(struct proc *p, u_int32_t *ibits, u_int32_t *obits,
- int nfd, int * count, int *kfcount);
+static int selcount(struct proc *p, u_int32_t *ibits, int nfd, int *count);
+static int seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wakeup, int fromselcount);
static int seldrop(struct proc *p, u_int32_t *ibits, int nfd);
/*
struct _select *sel;
int needzerofill = 1;
int count = 0;
- int kfcount = 0;
th_act = current_thread();
uth = get_bsdthread_info(th_act);
else
sel->abstime = 0;
- sel->kfcount = 0;
- if ( (error = selcount(p, sel->ibits, sel->obits, uap->nd, &count, &kfcount)) ) {
+ if ( (error = selcount(p, sel->ibits, uap->nd, &count)) ) {
goto continuation;
}
sel->count = count;
- sel->kfcount = kfcount;
size = SIZEOF_WAITQUEUE_SET + (count * SIZEOF_WAITQUEUE_LINK);
if (uth->uu_allocsize) {
if (uth->uu_wqset == 0)
panic("failed to allocate memory for waitqueue\n");
}
} else {
- sel->count = count;
uth->uu_allocsize = size;
uth->uu_wqset = (wait_queue_set_t)kalloc(uth->uu_allocsize);
if (uth->uu_wqset == (wait_queue_set_t)NULL)
wait_queue_set_init(uth->uu_wqset, (SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST));
continuation:
- return selprocess(error, SEL_FIRSTPASS);
+
+ if (error) {
+ /*
+ * We have already cleaned up any state we established,
+ * either locally or as a result of selcount(). We don't
+ * need to wait_subqueue_unlink_all(), since we haven't set
+ * anything at this point.
+ */
+ return (error);
+ }
+
+ return selprocess(0, SEL_FIRSTPASS);
}
int
return selprocess(error, SEL_SECONDPASS);
}
+
+/*
+ * selprocess
+ *
+ * Parameters: error The error code from our caller
+ * sel_pass The pass we are on
+ */
int
selprocess(int error, int sel_pass)
{
uth = get_bsdthread_info(th_act);
sel = &uth->uu_select;
- /* if it is first pass wait queue is not setup yet */
if ((error != 0) && (sel_pass == SEL_FIRSTPASS))
unwind = 0;
if (sel->count == 0)
unwind = 0;
retry:
if (error != 0) {
- goto done;
+ sel_pass = SEL_FIRSTPASS; /* Reset for seldrop */
+ goto done;
}
ncoll = nselcoll;
OSBitOrAtomic(P_SELECT, &p->p_flag);
/* skip scans if the select is just for timeouts */
if (sel->count) {
+ /*
+ * Clear out any dangling refs from prior calls; technically
+ * there should not be any.
+ */
if (sel_pass == SEL_FIRSTPASS)
wait_queue_sub_clearrefs(uth->uu_wqset);
error = 0;
}
- sel_pass = SEL_SECONDPASS;
if (error == 0) {
+ sel_pass = SEL_SECONDPASS;
if (!prepost)
- somewakeup =1;
+ somewakeup = 1;
goto retry;
}
done:
return(error);
}
+
+/*
+ * selscan
+ *
+ * Parameters: p Process performing the select
+ * sel The per-thread select context structure
+ * nfd The number of file descriptors to scan
+ * retval The per thread system call return area
+ * sel_pass Which pass this is; allowed values are
+ * SEL_FIRSTPASS and SEL_SECONDPASS
+ * wqsub The per thread wait queue set
+ *
+ * Returns: 0 Success
+ * EIO Invalid p->p_fd field XXX Obsolete?
+ * EBADF One of the files in the bit vector is
+ * invalid.
+ */
static int
selscan(struct proc *p, struct _select *sel, int nfd, int32_t *retval,
int sel_pass, wait_queue_sub_t wqsub)
int msk, i, j, fd;
u_int32_t bits;
struct fileproc *fp;
- int n = 0;
- int nc = 0;
+ int n = 0; /* count of bits */
+ int nc = 0; /* bit vector offset (nc'th bit) */
static int flag[3] = { FREAD, FWRITE, 0 };
u_int32_t *iptr, *optr;
u_int nw;
u_int32_t *ibits, *obits;
char * wql;
char * wql_ptr;
- int count, kfcount;
- vnode_t vp;
+ int count;
struct vfs_context context = *vfs_context_current();
/*
nw = howmany(nfd, NFDBITS);
count = sel->count;
- kfcount = sel->kfcount;
-
- if (kfcount > count)
- panic("selscan: count < kfcount");
-
- if (kfcount != 0) {
- proc_fdlock(p);
- for (msk = 0; msk < 3; msk++) {
- iptr = (u_int32_t *)&ibits[msk * nw];
- optr = (u_int32_t *)&obits[msk * nw];
-
- for (i = 0; i < nfd; i += NFDBITS) {
- bits = iptr[i/NFDBITS];
-
- while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
- bits &= ~(1 << j);
- fp = fdp->fd_ofiles[fd];
-
- if (fp == NULL ||
- (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
- proc_fdunlock(p);
- return(EBADF);
- }
- if (sel_pass == SEL_SECONDPASS) {
- wql_ptr = (char *)0;
- fp->f_flags &= ~FP_INSELECT;
- fp->f_waddr = (void *)0;
- } else {
- wql_ptr = (wql + nc * SIZEOF_WAITQUEUE_LINK);
- fp->f_flags |= FP_INSELECT;
- fp->f_waddr = (void *)wqsub;
- }
-
- context.vc_ucred = fp->f_cred;
-
- if (fp->f_ops && (fp->f_type == DTYPE_VNODE)
- && ((vp = (struct vnode *)fp->f_data) != NULLVP)
- && (vp->v_type == VCHR)
- && fo_select(fp, flag[msk], wql_ptr, &context)) {
- optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
- n++;
- }
- nc++;
- }
- }
- }
- proc_fdunlock(p);
- }
nc = 0;
- if (kfcount != count) {
+ if (count) {
proc_fdlock(p);
for (msk = 0; msk < 3; msk++) {
iptr = (u_int32_t *)&ibits[msk * nw];
bits &= ~(1 << j);
fp = fdp->fd_ofiles[fd];
- if (fp == NULL ||
- (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
+ if (fp == NULL || (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
+ /*
+ * If we abort because of a bad
+ * fd, let the caller unwind...
+ */
proc_fdunlock(p);
return(EBADF);
}
if (sel_pass == SEL_SECONDPASS) {
wql_ptr = (char *)0;
- fp->f_flags &= ~FP_INSELECT;
- fp->f_waddr = (void *)0;
+ if ((fp->f_flags & FP_INSELECT) && (fp->f_waddr == (void *)wqsub)) {
+ fp->f_flags &= ~FP_INSELECT;
+ fp->f_waddr = (void *)0;
+ }
} else {
wql_ptr = (wql + nc * SIZEOF_WAITQUEUE_LINK);
- fp->f_flags |= FP_INSELECT;
- fp->f_waddr = (void *)wqsub;
+ if (fp->f_flags & FP_INSELECT) {
+ /* someone is already in select on this fp */
+ fp->f_flags |= FP_SELCONFLICT;
+ wait_queue_link(&select_conflict_queue, (wait_queue_set_t)wqsub);
+ } else {
+ fp->f_flags |= FP_INSELECT;
+ fp->f_waddr = (void *)wqsub;
+ }
}
context.vc_ucred = fp->f_cred;
- if ((fp->f_ops &&
- ((fp->f_type != DTYPE_VNODE)
- || (((vp = (struct vnode *)fp->f_data) != NULLVP)
- && (vp->v_type != VCHR))
- )
- && fo_select(fp, flag[msk], wql_ptr, &context))) {
+ /* The select; set the bit, if true */
+ if (fp->f_ops
+ && fo_select(fp, flag[msk], wql_ptr, &context)) {
optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
n++;
}
/* convert the poll event into a kqueue kevent */
kev.ident = fds[i].fd;
kev.flags = EV_ADD | EV_ONESHOT | EV_POLL;
- kev.fflags = NOTE_LOWAT;
- kev.data = 1; /* efficiency be damned: any data should trigger */
kev.udata = CAST_USER_ADDR_T(&fds[i]);
+ kev.fflags = 0;
+ kev.data = 0;
kev.ext[0] = 0;
kev.ext[1] = 0;
return (1);
}
+/*
+ * selcount
+ *
+ * Count the number of bits set in the input bit vector, and establish an
+ * outstanding fp->f_iocount for each of the descriptors which will be in
+ * use in the select operation.
+ *
+ * Parameters: p The process doing the select
+ * ibits The input bit vector
+ * nfd The number of fd's in the vector
+ * countp Pointer to where to store the bit count
+ *
+ * Returns: 0 Success
+ * EIO Bad per process open file table
+ * EBADF One of the bits in the input bit vector
+ * references an invalid fd
+ *
+ * Implicit: *countp (modified) Count of fd's
+ *
+ * Notes: This function is the first pass under the proc_fdlock() that
+ * permits us to recognize invalid descriptors in the bit vector;
+ * the may, however, not remain valid through the drop and
+ * later reacquisition of the proc_fdlock().
+ */
static int
-selcount(struct proc *p, u_int32_t *ibits, __unused u_int32_t *obits,
- int nfd, int *countp, int * kfcountp)
+selcount(struct proc *p, u_int32_t *ibits, int nfd, int *countp)
{
struct filedesc *fdp = p->p_fd;
int msk, i, j, fd;
u_int32_t *iptr;
u_int nw;
int error=0;
- int kfc = 0;
int dropcount;
- vnode_t vp;
+ int need_wakeup = 0;
/*
* Problems when reboot; due to MacOSX signal probs
*/
if (fdp == NULL) {
*countp = 0;
- *kfcountp = 0;
return(EIO);
}
nw = howmany(nfd, NFDBITS);
if (fp == NULL ||
(fdp->fd_ofileflags[fd] & UF_RESERVED)) {
*countp = 0;
- *kfcountp = 0;
error = EBADF;
goto bad;
}
fp->f_iocount++;
- if ((fp->f_type == DTYPE_VNODE)
- && ((vp = (struct vnode *)fp->f_data) != NULLVP)
- && (vp->v_type == VCHR) )
- kfc++;
-
n++;
}
}
proc_fdunlock(p);
*countp = n;
- *kfcountp = kfc;
return (0);
+
bad:
dropcount = 0;
if (n== 0)
goto out;
- /* undo the iocounts */
- for (msk = 0; msk < 3; msk++) {
- iptr = (u_int32_t *)&ibits[msk * nw];
- for (i = 0; i < nfd; i += NFDBITS) {
- bits = iptr[i/NFDBITS];
- while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
- bits &= ~(1 << j);
- fp = fdp->fd_ofiles[fd];
- if (dropcount >= n)
- goto out;
- fp->f_iocount--;
+ /* Ignore error return; it's already EBADF */
+ (void)seldrop_locked(p, ibits, nfd, n, &need_wakeup, 1);
- if (p->p_fpdrainwait && fp->f_iocount == 0) {
- p->p_fpdrainwait = 0;
- wakeup(&p->p_fpdrainwait);
- }
- dropcount++;
- }
- }
- }
out:
proc_fdunlock(p);
+ if (need_wakeup) {
+ wakeup(&p->p_fpdrainwait);
+ }
return(error);
}
+
+/*
+ * seldrop_locked
+ *
+ * Drop outstanding wait queue references set up during selscan(); drop the
+ * outstanding per fileproc f_iocount() picked up during the selcount().
+ *
+ * Parameters: p Process performing the select
+ * ibits Input pit bector of fd's
+ * nfd Number of fd's
+ * lim Limit to number of vector entries to
+ * consider, or -1 for "all"
+ * inselect True if
+ * need_wakeup Pointer to flag to set to do a wakeup
+ * if f_iocont on any descriptor goes to 0
+ *
+ * Returns: 0 Success
+ * EBADF One or more fds in the bit vector
+ * were invalid, but the rest
+ * were successfully dropped
+ *
+ * Notes: An fd make become bad while the proc_fdlock() is not held,
+ * if a multithreaded application closes the fd out from under
+ * the in progress select. In this case, we still have to
+ * clean up after the set up on the remaining fds.
+ */
static int
-seldrop(struct proc *p, u_int32_t *ibits, int nfd)
+seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wakeup, int fromselcount)
{
struct filedesc *fdp = p->p_fd;
int msk, i, j, fd;
u_int32_t bits;
struct fileproc *fp;
- int n = 0;
u_int32_t *iptr;
u_int nw;
+ int error = 0;
+ int dropcount = 0;
+ uthread_t uth = get_bsdthread_info(current_thread());
+
+ *need_wakeup = 0;
/*
* Problems when reboot; due to MacOSX signal probs
nw = howmany(nfd, NFDBITS);
-
- proc_fdlock(p);
for (msk = 0; msk < 3; msk++) {
iptr = (u_int32_t *)&ibits[msk * nw];
for (i = 0; i < nfd; i += NFDBITS) {
while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
bits &= ~(1 << j);
fp = fdp->fd_ofiles[fd];
- if (fp == NULL
-#if 0
- /* if you are here then it is being closed */
- || (fdp->fd_ofileflags[fd] & UF_RESERVED)
-#endif
- ) {
- proc_fdunlock(p);
- return(EBADF);
+ /*
+ * If we've already dropped as many as were
+ * counted/scanned, then we are done.
+ */
+ if ((fromselcount != 0) && (++dropcount > lim))
+ goto done;
+
+ if (fp == NULL) {
+ /* skip (now) bad fds */
+ error = EBADF;
+ continue;
+ }
+ /*
+ * Only clear the flag if we set it. We'll
+ * only find that we set it if we had made
+ * at least one [partial] pass through selscan().
+ */
+ if ((fp->f_flags & FP_INSELECT) && (fp->f_waddr == (void *)uth->uu_wqset)) {
+ fp->f_flags &= ~FP_INSELECT;
+ fp->f_waddr = (void *)0;
}
- n++;
- fp->f_iocount--;
- fp->f_flags &= ~FP_INSELECT;
- if (p->p_fpdrainwait && fp->f_iocount == 0) {
- p->p_fpdrainwait = 0;
- wakeup(&p->p_fpdrainwait);
+ fp->f_iocount--;
+ if (fp->f_iocount < 0)
+ panic("f_iocount overdecrement!");
+
+ if (fp->f_iocount == 0) {
+ /*
+ * The last iocount is responsible for clearing
+ * selconfict flag - even if we didn't set it -
+ * and is also responsible for waking up anyone
+ * waiting on iocounts to drain.
+ */
+ if (fp->f_flags & FP_SELCONFLICT)
+ fp->f_flags &= ~FP_SELCONFLICT;
+ if (p->p_fpdrainwait) {
+ p->p_fpdrainwait = 0;
+ *need_wakeup = 1;
+ }
}
}
}
}
+done:
+ return (error);
+}
+
+
+static int
+seldrop(struct proc *p, u_int32_t *ibits, int nfd)
+{
+ int error;
+ int need_wakeup = 0;
+
+ proc_fdlock(p);
+ error = seldrop_locked(p, ibits, nfd, nfd, &need_wakeup, 0);
proc_fdunlock(p);
- return (0);
+ if (need_wakeup) {
+ wakeup(&p->p_fpdrainwait);
+ }
+ return (error);
}
/*
/* need to look at collisions */
- if ((p_wql == (void *)0) && ((sip->si_flags & SI_INITED) == 0)) {
- return;
- }
-
/*do not record if this is second pass of select */
- if((p_wql == (void *)0)) {
+ if(p_wql == (void *)0) {
return;
}