]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/kern/sys_generic.c
xnu-2422.1.72.tar.gz
[apple/xnu.git] / bsd / kern / sys_generic.c
index 0fe948aaed433cfcc7f547d929b912e7e3e31d09..f41cd0018ef8e9f457f6cb4915f46a6f671a459b 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -93,6 +93,8 @@
 #include <sys/poll.h>
 #include <sys/event.h>
 #include <sys/eventvar.h>
+#include <sys/proc.h>
+#include <sys/kauth.h>
 
 #include <mach/mach_types.h>
 #include <kern/kern_types.h>
 #include <kern/kalloc.h>
 #include <kern/thread.h>
 #include <kern/clock.h>
+#include <kern/ledger.h>
+#include <kern/task.h>
+#if CONFIG_TELEMETRY
+#include <kern/telemetry.h>
+#endif
 
 #include <sys/mbuf.h>
+#include <sys/domain.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/errno.h>
 #include <sys/syscall.h>
 #include <sys/pipe.h>
 
-#include <bsm/audit_kernel.h>
+#include <security/audit/audit.h>
 
 #include <net/if.h>
 #include <net/route.h>
 #include <kern/kalloc.h>
 #include <sys/vnode_internal.h>
 
+#include <pexpert/pexpert.h>
+
 /* XXX should be in a header file somewhere */
 void evsofree(struct socket *);
 void evpipefree(struct pipe *);
@@ -152,12 +162,31 @@ __private_extern__ int    dofilewrite(vfs_context_t ctx, struct fileproc *fp,
 __private_extern__ int preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_vnode);
 __private_extern__ void        donefileread(struct proc *p, struct fileproc *fp_ret, int fd);
 
-#if NETAT
-extern int appletalk_inited;
-#endif /* NETAT */
+
+/* Conflict wait queue for when selects collide (opaque type) */
+struct wait_queue select_conflict_queue;
+
+#if 13841988
+int temp_debug_13841988 = 0;
+#endif
+
+/*
+ * Init routine called from bsd_init.c
+ */
+void select_wait_queue_init(void);
+void
+select_wait_queue_init(void)
+{
+       wait_queue_init(&select_conflict_queue, SYNC_POLICY_FIFO);
+#if 13841988
+       if (PE_parse_boot_argn("temp_debug_13841988", &temp_debug_13841988, sizeof(temp_debug_13841988))) {
+               kprintf("Temporary debugging for 13841988 enabled\n");
+       }
+#endif
+}
 
 #define f_flag f_fglob->fg_flag
-#define f_type f_fglob->fg_type
+#define f_type f_fglob->fg_ops->fo_type
 #define f_msgcount f_fglob->fg_msgcount
 #define f_cred f_fglob->fg_cred
 #define f_ops f_fglob->fg_ops
@@ -187,11 +216,15 @@ read_nocancel(struct proc *p, struct read_nocancel_args *uap, user_ssize_t *retv
        struct fileproc *fp;
        int error;
        int fd = uap->fd;
+       struct vfs_context context;
 
        if ( (error = preparefileread(p, &fp, fd, 0)) )
                return (error);
 
-       error = dofileread(vfs_context_current(), fp, uap->cbuf, uap->nbyte,
+       context = *(vfs_context_current());
+       context.vc_ucred = fp->f_fglob->fg_cred;
+
+       error = dofileread(&context, fp, uap->cbuf, uap->nbyte,
                           (off_t)-1, 0, retval);
 
        donefileread(p, fp, fd);
@@ -222,19 +255,23 @@ pread_nocancel(struct proc *p, struct pread_nocancel_args *uap, user_ssize_t *re
        struct fileproc *fp = NULL;     /* fp set by preparefileread() */
        int fd = uap->fd;
        int error;
+       struct vfs_context context;
 
        if ( (error = preparefileread(p, &fp, fd, 1)) )
-               return (error);
+               goto out;
 
-       error = dofileread(vfs_context_current(), fp, uap->buf, uap->nbyte,
+       context = *(vfs_context_current());
+       context.vc_ucred = fp->f_fglob->fg_cred;
+
+       error = dofileread(&context, fp, uap->buf, uap->nbyte,
                        uap->offset, FOF_OFFSET, retval);
        
        donefileread(p, fp, fd);
 
-       if (!error)
-           KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pread) | DBG_FUNC_NONE),
+       KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pread) | DBG_FUNC_NONE),
              uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
-       
+
+out:
        return (error);
 }
 
@@ -246,9 +283,6 @@ void
 donefileread(struct proc *p, struct fileproc *fp, int fd)
 {
        proc_fdlock_spin(p);
-
-       fp->f_flags &= ~FP_INCHRREAD;
-
        fp_drop(p, fd, fp, 1);
         proc_fdunlock(p);
 }
@@ -268,6 +302,8 @@ preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_
        int     error;
        struct fileproc *fp;
 
+       AUDIT_ARG(fd, fd);
+
        proc_fdlock_spin(p);
 
        error = fp_lookup(p, fd, &fp, 1);
@@ -295,8 +331,6 @@ preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_
                        error = ENXIO;
                        goto out;
                }
-               if (vp->v_type == VCHR)
-                       fp->f_flags |= FP_INCHRREAD;
        }
 
        *fp_ret = fp;
@@ -326,7 +360,6 @@ dofileread(vfs_context_t ctx, struct fileproc *fp,
        long error = 0;
        char uio_buf[ UIO_SIZEOF(1) ];
 
-       // LP64todo - do we want to raise this?
        if (nbyte > INT_MAX)
                return (EINVAL);
 
@@ -374,7 +407,6 @@ readv_nocancel(struct proc *p, struct readv_nocancel_args *uap, user_ssize_t *re
 {
        uio_t auio = NULL;
        int error;
-       int size_of_iovec;
        struct user_iovec *iovp;
 
        /* Verify range bedfore calling uio_create() */
@@ -394,15 +426,19 @@ readv_nocancel(struct proc *p, struct readv_nocancel_args *uap, user_ssize_t *re
                error = ENOMEM;
                goto ExitThisRoutine;
        }
-       size_of_iovec = (IS_64BIT_PROCESS(p) ? sizeof(struct user_iovec) : sizeof(struct iovec));
-       error = copyin(uap->iovp, (caddr_t)iovp, (uap->iovcnt * size_of_iovec));
+       error = copyin_user_iovec_array(uap->iovp,
+               IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
+               uap->iovcnt, iovp);
        if (error) {
                goto ExitThisRoutine;
        }
        
        /* finalize uio_t for use and do the IO 
         */
-       uio_calculateresid(auio);
+       error = uio_calculateresid(auio);
+       if (error) {
+               goto ExitThisRoutine;
+       }
        error = rd_uio(p, uap->fd, auio, retval);
 
 ExitThisRoutine:
@@ -435,6 +471,8 @@ write_nocancel(struct proc *p, struct write_nocancel_args *uap, user_ssize_t *re
        int error;      
        int fd = uap->fd;
 
+       AUDIT_ARG(fd, fd);
+
        error = fp_lookup(p,fd,&fp,0);
        if (error)
                return(error);
@@ -480,6 +518,8 @@ pwrite_nocancel(struct proc *p, struct pwrite_nocancel_args *uap, user_ssize_t *
        int fd = uap->fd;
        vnode_t vp  = (vnode_t)0;
 
+       AUDIT_ARG(fd, fd);
+
        error = fp_lookup(p,fd,&fp,0);
        if (error)
                return(error);
@@ -517,8 +557,7 @@ errout:
        else
                fp_drop(p, fd, fp, 0);
 
-       if (!error)
-           KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pwrite) | DBG_FUNC_NONE),
+       KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pwrite) | DBG_FUNC_NONE),
              uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
        
         return(error);
@@ -540,7 +579,6 @@ dofilewrite(vfs_context_t ctx, struct fileproc *fp,
        user_ssize_t bytecnt;
        char uio_buf[ UIO_SIZEOF(1) ];
 
-       // LP64todo - do we want to raise this?
        if (nbyte > INT_MAX)   
                return (EINVAL);
 
@@ -559,7 +597,8 @@ dofilewrite(vfs_context_t ctx, struct fileproc *fp,
                        error == EINTR || error == EWOULDBLOCK))
                        error = 0;
                /* The socket layer handles SIGPIPE */
-               if (error == EPIPE && fp->f_type != DTYPE_SOCKET) {
+               if (error == EPIPE && fp->f_type != DTYPE_SOCKET &&
+                   (fp->f_fglob->fg_lflags & FG_NOSIGPIPE) == 0) {
                        /* XXX Raise the signal on the thread? */
                        psignal(vfs_context_proc(ctx), SIGPIPE);
                }
@@ -585,9 +624,10 @@ writev_nocancel(struct proc *p, struct writev_nocancel_args *uap, user_ssize_t *
 {
        uio_t auio = NULL;
        int error;
-       int size_of_iovec;
        struct user_iovec *iovp;
 
+       AUDIT_ARG(fd, uap->fd);
+
        /* Verify range bedfore calling uio_create() */
        if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV)
                return (EINVAL);
@@ -605,15 +645,19 @@ writev_nocancel(struct proc *p, struct writev_nocancel_args *uap, user_ssize_t *
                error = ENOMEM;
                goto ExitThisRoutine;
        }
-       size_of_iovec = (IS_64BIT_PROCESS(p) ? sizeof(struct user_iovec) : sizeof(struct iovec));
-       error = copyin(uap->iovp, (caddr_t)iovp, (uap->iovcnt * size_of_iovec));
+       error = copyin_user_iovec_array(uap->iovp,
+               IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
+               uap->iovcnt, iovp);
        if (error) {
                goto ExitThisRoutine;
        }
        
        /* finalize uio_t for use and do the IO 
         */
-       uio_calculateresid(auio);
+       error = uio_calculateresid(auio);
+       if (error) {
+               goto ExitThisRoutine;
+       }
        error = wr_uio(p, uap->fd, auio, retval);
 
 ExitThisRoutine:
@@ -649,13 +693,14 @@ wr_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval)
                                                error == EINTR || error == EWOULDBLOCK))
                        error = 0;
                /* The socket layer handles SIGPIPE */
-               if (error == EPIPE && fp->f_type != DTYPE_SOCKET)
+               if (error == EPIPE && fp->f_type != DTYPE_SOCKET &&
+                   (fp->f_fglob->fg_lflags & FG_NOSIGPIPE) == 0)
                        psignal(p, SIGPIPE);
        }
        *retval = count - uio_resid(uio);
 
 out:
-       if ( (error == 0) )
+       if (error == 0)
                fp_drop_written(p, fdes, fp);
        else
                fp_drop(p, fdes, fp, 0);
@@ -706,114 +751,49 @@ rd_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval)
  *     fo_ioctl:???
  */
 int
-ioctl(struct proc *p, struct ioctl_args *uap, __unused register_t *retval)
+ioctl(struct proc *p, struct ioctl_args *uap, __unused int32_t *retval)
 {
-       struct fileproc *fp;
-       u_long com;
+       struct fileproc *fp = NULL;
        int error = 0;
-       u_int size;
-       caddr_t datap, memp;
-       boolean_t is64bit;
-       int tmp;
+       u_int size = 0;
+       caddr_t datap = NULL, memp = NULL;
+       boolean_t is64bit = FALSE;
+       int tmp = 0;
 #define STK_PARAMS     128
        char stkbuf[STK_PARAMS];
        int fd = uap->fd;
+       u_long com = uap->com;
        struct vfs_context context = *vfs_context_current();
 
        AUDIT_ARG(fd, uap->fd);
-       AUDIT_ARG(cmd, CAST_DOWN(int, uap->com)); /* LP64todo: uap->com is a user-land long */
        AUDIT_ARG(addr, uap->data);
 
        is64bit = proc_is64bit(p);
-
-       proc_fdlock(p);
-       error = fp_lookup(p,fd,&fp,1);
-       if (error)  {
-               proc_fdunlock(p);
-               return(error);
-       }
-
-       AUDIT_ARG(file, p, fp);
-
-       if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
-                       error = EBADF;
-                       goto out;
-       }
-
-       context.vc_ucred = fp->f_fglob->fg_cred;
-
-#if CONFIG_MACF
-       error = mac_file_check_ioctl(context.vc_ucred, fp->f_fglob, uap->com);
-       if (error)
-               goto out;
-#endif
-               
-#if NETAT
-       /*
-        * ### LD 6/11/97 Hack Alert: this is to get AppleTalk to work
-        * while implementing an ATioctl system call
-        */
-       {
-               if (appletalk_inited && ((uap->com & 0x0000FFFF) == 0xff99)) {
-                       u_long  fixed_command;
-
-#ifdef APPLETALK_DEBUG
-                       kprintf("ioctl: special AppleTalk \n");
-#endif
-                       datap = &stkbuf[0];
-                       *(user_addr_t *)datap = uap->data;
-                       fixed_command = _IOW(0, 0xff99, uap->data);
-                       error = fo_ioctl(fp, fixed_command, datap, &context);
-                       goto out;
-               }
-       }
-
-#endif /* NETAT */
-
-
-       switch (com = uap->com) {
-       case FIONCLEX:
-               *fdflags(p, uap->fd) &= ~UF_EXCLOSE;
-               error =0;
-               goto out;
-       case FIOCLEX:
-               *fdflags(p, uap->fd) |= UF_EXCLOSE;
-               error =0;
-               goto out;
-       }
+#if CONFIG_AUDIT
+       if (is64bit)
+               AUDIT_ARG(value64, com);
+       else
+               AUDIT_ARG(cmd, CAST_DOWN_EXPLICIT(int, com));
+#endif /* CONFIG_AUDIT */
 
        /*
         * Interpret high order word to find amount of data to be
         * copied to/from the user's address space.
         */
        size = IOCPARM_LEN(com);
-       if (size > IOCPARM_MAX) {
-                       error = ENOTTY;
-                       goto out;
-       }
-       memp = NULL;
+       if (size > IOCPARM_MAX)
+                       return ENOTTY;
        if (size > sizeof (stkbuf)) {
-               proc_fdunlock(p);
-               if ((memp = (caddr_t)kalloc(size)) == 0) {
-                       proc_fdlock(p);
-                       error = ENOMEM;
-                       goto out;
-               }
-               proc_fdlock(p);
+               if ((memp = (caddr_t)kalloc(size)) == 0)
+                       return ENOMEM;
                datap = memp;
        } else
                datap = &stkbuf[0];
-       if (com&IOC_IN) {
+       if (com & IOC_IN) {
                if (size) {
-                       proc_fdunlock(p);
                        error = copyin(uap->data, datap, size);
-                       if (error) {
-                               if (memp)
-                                       kfree(memp, size);
-                               proc_fdlock(p);
-                               goto out;
-                       }
-                       proc_fdlock(p);
+                       if (error)
+                               goto out_nofp;
                } else {
                        /* XXX - IOC_IN and no size?  we should proably return an error here!! */
                        if (is64bit) {
@@ -823,13 +803,13 @@ ioctl(struct proc *p, struct ioctl_args *uap, __unused register_t *retval)
                                *(uint32_t *)datap = (uint32_t)uap->data;
                        }
                }
-       } else if ((com&IOC_OUT) && size)
+       } else if ((com & IOC_OUT) && size)
                /*
                 * Zero the buffer so the user always
                 * gets back something deterministic.
                 */
                bzero(datap, size);
-       else if (com&IOC_VOID) {
+       else if (com & IOC_VOID) {
                /* XXX - this is odd since IOC_VOID means no parameters */
                if (is64bit) {
                        *(user_addr_t *)datap = uap->data;
@@ -839,7 +819,36 @@ ioctl(struct proc *p, struct ioctl_args *uap, __unused register_t *retval)
                }
        }
 
+       proc_fdlock(p);
+       error = fp_lookup(p,fd,&fp,1);
+       if (error)  {
+               proc_fdunlock(p);
+               goto out_nofp;
+       }
+
+       AUDIT_ARG(file, p, fp);
+
+       if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
+                       error = EBADF;
+                       goto out;
+       }
+
+       context.vc_ucred = fp->f_fglob->fg_cred;
+
+#if CONFIG_MACF
+       error = mac_file_check_ioctl(context.vc_ucred, fp->f_fglob, com);
+       if (error)
+               goto out;
+#endif
+
        switch (com) {
+       case FIONCLEX:
+               *fdflags(p, fd) &= ~UF_EXCLOSE;
+               break;
+
+       case FIOCLEX:
+               *fdflags(p, fd) |= UF_EXCLOSE;
+               break;
 
        case FIONBIO:
                if ( (tmp = *(int *)datap) )
@@ -861,7 +870,6 @@ ioctl(struct proc *p, struct ioctl_args *uap, __unused register_t *retval)
                tmp = *(int *)datap;
                if (fp->f_type == DTYPE_SOCKET) {
                        ((struct socket *)fp->f_data)->so_pgid = tmp;
-                       error = 0;
                        break;
                }
                if (fp->f_type == DTYPE_PIPE) {
@@ -884,7 +892,6 @@ ioctl(struct proc *p, struct ioctl_args *uap, __unused register_t *retval)
 
        case FIOGETOWN:
                if (fp->f_type == DTYPE_SOCKET) {
-                       error = 0;
                        *(int *)datap = ((struct socket *)fp->f_data)->so_pgid;
                        break;
                }
@@ -898,17 +905,17 @@ ioctl(struct proc *p, struct ioctl_args *uap, __unused register_t *retval)
                 * Copy any data to user, size was
                 * already set and checked above.
                 */
-               if (error == 0 && (com&IOC_OUT) && size)
+               if (error == 0 && (com & IOC_OUT) && size)
                        error = copyout(datap, uap->data, (u_int)size);
                break;
        }
-       proc_fdunlock(p);
-       if (memp)
-               kfree(memp, size);
-       proc_fdlock(p);
 out:
        fp_drop(p, fd, fp, 1);
        proc_fdunlock(p);
+
+out_nofp:
+       if (memp)
+               kfree(memp, size);
        return(error);
 }
 
@@ -918,11 +925,10 @@ int       selwait, nselcoll;
 extern int selcontinue(int error);
 extern int selprocess(int error, int sel_pass);
 static int selscan(struct proc *p, struct _select * sel,
-                       int nfd, register_t *retval, int sel_pass, wait_queue_sub_t wqsub);
-static int selcount(struct proc *p, u_int32_t *ibits, u_int32_t *obits,
-                       int nfd, int * count, int *kfcount);
+                       int nfd, int32_t *retval, int sel_pass, wait_queue_sub_t wqsub);
+static int selcount(struct proc *p, u_int32_t *ibits, int nfd, int *count);
+static int seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wakeup, int fromselcount);
 static int seldrop(struct proc *p, u_int32_t *ibits, int nfd);
-extern uint64_t        tvtoabstime(struct timeval      *tvp);
 
 /*
  * Select system call.
@@ -933,14 +939,14 @@ extern uint64_t   tvtoabstime(struct timeval      *tvp);
  *     selprocess:???
  */
 int
-select(struct proc *p, struct select_args *uap, register_t *retval)
+select(struct proc *p, struct select_args *uap, int32_t *retval)
 {
        __pthread_testcancel(1);
        return(select_nocancel(p, (struct select_nocancel_args *)uap, retval));
 }
 
 int
-select_nocancel(struct proc *p, struct select_nocancel_args *uap, register_t *retval)
+select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retval)
 {
        int error = 0;
        u_int ni, nw, size;
@@ -949,11 +955,11 @@ select_nocancel(struct proc *p, struct select_nocancel_args *uap, register_t *re
        struct _select *sel;
        int needzerofill = 1;
        int count = 0;
-       int kfcount = 0;
 
        th_act = current_thread();
        uth = get_bsdthread_info(th_act);
        sel = &uth->uu_select;
+       sel->data = &uth->uu_kevent.ss_select_data;
        retval = (int *)get_bsduthreadrval(th_act);
        *retval = 0;
 
@@ -1029,13 +1035,16 @@ select_nocancel(struct proc *p, struct select_nocancel_args *uap, register_t *re
        if (uap->tv) {
                struct timeval atv;
                if (IS_64BIT_PROCESS(p)) {
-                       struct user_timeval atv64;
+                       struct user64_timeval atv64;
                        error = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64));
                        /* Loses resolution - assume timeout < 68 years */
                        atv.tv_sec = atv64.tv_sec;
                        atv.tv_usec = atv64.tv_usec;
                } else {
-                       error = copyin(uap->tv, (caddr_t)&atv, sizeof(atv));
+                       struct user32_timeval atv32;
+                       error = copyin(uap->tv, (caddr_t)&atv32, sizeof(atv32));
+                       atv.tv_sec = atv32.tv_sec;
+                       atv.tv_usec = atv32.tv_usec;
                }
                if (error)
                        goto continuation;
@@ -1045,17 +1054,16 @@ select_nocancel(struct proc *p, struct select_nocancel_args *uap, register_t *re
                }
 
                clock_absolutetime_interval_to_deadline(
-                                                                               tvtoabstime(&atv), &sel->abstime);
+                                                                               tvtoabstime(&atv), &sel->data->abstime);
        }
        else
-               sel->abstime = 0;
+               sel->data->abstime = 0;
 
-       sel->kfcount = 0;
-       if ( (error = selcount(p, sel->ibits, sel->obits, uap->nd, &count, &kfcount)) ) {
+       if ( (error = selcount(p, sel->ibits, uap->nd, &count)) ) {
                        goto continuation;
        }
-       sel->count = count;
-       sel->kfcount = kfcount;
+
+       sel->data->count = count;
        size = SIZEOF_WAITQUEUE_SET + (count * SIZEOF_WAITQUEUE_LINK);
        if (uth->uu_allocsize) {
                if (uth->uu_wqset == 0)
@@ -1069,18 +1077,28 @@ select_nocancel(struct proc *p, struct select_nocancel_args *uap, register_t *re
                                panic("failed to allocate memory for waitqueue\n");
                }
        } else {
-               sel->count = count;
                uth->uu_allocsize = size;
                uth->uu_wqset = (wait_queue_set_t)kalloc(uth->uu_allocsize);
                if (uth->uu_wqset == (wait_queue_set_t)NULL)
                        panic("failed to allocate memory for waitqueue\n");
        }
        bzero(uth->uu_wqset, size);
-       sel->wql = (char *)uth->uu_wqset + SIZEOF_WAITQUEUE_SET;
+       sel->data->wql = (char *)uth->uu_wqset + SIZEOF_WAITQUEUE_SET;
        wait_queue_set_init(uth->uu_wqset, (SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST));
 
 continuation:
-       return selprocess(error, SEL_FIRSTPASS);
+
+       if (error) {
+               /*
+                * We have already cleaned up any state we established,
+                * either locally or as a result of selcount().  We don't
+                * need to wait_subqueue_unlink_all(), since we haven't set
+                * anything at this point.
+                */
+               return (error);
+       }
+
+       return selprocess(0, SEL_FIRSTPASS);
 }
 
 int
@@ -1089,6 +1107,13 @@ selcontinue(int error)
        return selprocess(error, SEL_SECONDPASS);
 }
 
+
+/*
+ * selprocess
+ *
+ * Parameters: error                   The error code from our caller
+ *             sel_pass                The pass we are on
+ */
 int
 selprocess(int error, int sel_pass)
 {
@@ -1113,20 +1138,24 @@ selprocess(int error, int sel_pass)
        uth = get_bsdthread_info(th_act);
        sel = &uth->uu_select;
 
-       /* if it is first pass wait queue is not setup yet */
        if ((error != 0) && (sel_pass == SEL_FIRSTPASS))
                        unwind = 0;
-       if (sel->count == 0)
+       if (sel->data->count == 0)
                        unwind = 0;
 retry:
        if (error != 0) {
-         goto done;
+               sel_pass = SEL_FIRSTPASS;       /* Reset for seldrop */
+               goto done;
        }
 
        ncoll = nselcoll;
-       OSBitOrAtomic(P_SELECT, (UInt32 *)&p->p_flag);
+       OSBitOrAtomic(P_SELECT, &p->p_flag);
        /* skip scans if the select is just for timeouts */
-       if (sel->count) {
+       if (sel->data->count) {
+               /*
+                * Clear out any dangling refs from prior calls; technically
+                * there should not be any.
+                */
                if (sel_pass == SEL_FIRSTPASS)
                        wait_queue_sub_clearrefs(uth->uu_wqset);
 
@@ -1151,7 +1180,7 @@ retry:
                uint64_t        now;
 
                clock_get_uptime(&now);
-               if (now >= sel->abstime)
+               if (now >= sel->data->abstime)
                        goto done;
        }
 
@@ -1166,7 +1195,7 @@ retry:
         * To effect a poll, the timeout argument should be
         * non-nil, pointing to a zero-valued timeval structure.
         */
-       if (uap->tv && sel->abstime == 0) {
+       if (uap->tv && sel->data->abstime == 0) {
                goto done;
        }
 
@@ -1176,15 +1205,16 @@ retry:
                goto retry;
        }
 
-       OSBitAndAtomic(~((uint32_t)P_SELECT), (UInt32 *)&p->p_flag);
+       OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
 
        /* if the select is just for timeout skip check */
-       if (sel->count &&(sel_pass == SEL_SECONDPASS))
+       if (sel->data->count &&(sel_pass == SEL_SECONDPASS))
                panic("selprocess: 2nd pass assertwaiting");
 
        /* Wait Queue Subordinate has waitqueue as first element */
-       wait_result = wait_queue_assert_wait((wait_queue_t)uth->uu_wqset,
-                                            &selwait, THREAD_ABORTSAFE, sel->abstime);
+       wait_result = wait_queue_assert_wait_with_leeway((wait_queue_t)uth->uu_wqset,
+                                            NULL, THREAD_ABORTSAFE,
+                                            TIMEOUT_URGENCY_USER_NORMAL, sel->data->abstime, 0);
        if (wait_result != THREAD_AWAKENED) {
                /* there are no preposted events */
                error = tsleep1(NULL, PSOCK | PCATCH,
@@ -1194,10 +1224,10 @@ retry:
                error = 0;
        }
 
-       sel_pass = SEL_SECONDPASS;
        if (error == 0) {
+               sel_pass = SEL_SECONDPASS;
                if (!prepost)
-                       somewakeup =1;
+                       somewakeup = 1;
                goto retry;
        }
 done:
@@ -1205,7 +1235,7 @@ done:
                wait_subqueue_unlink_all(uth->uu_wqset);
                seldrop(p, sel->ibits, uap->nd);
        }
-       OSBitAndAtomic(~((uint32_t)P_SELECT), (UInt32 *)&p->p_flag);
+       OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
        /* select is not restarted after signals... */
        if (error == ERESTART)
                error = EINTR;
@@ -1232,25 +1262,40 @@ done:
        return(error);
 }
 
+
+/*
+ * selscan
+ *
+ * Parameters: p                       Process performing the select
+ *             sel                     The per-thread select context structure
+ *             nfd                     The number of file descriptors to scan
+ *             retval                  The per thread system call return area
+ *             sel_pass                Which pass this is; allowed values are
+ *                                             SEL_FIRSTPASS and SEL_SECONDPASS
+ *             wqsub                   The per thread wait queue set
+ *
+ * Returns:    0                       Success
+ *             EIO                     Invalid p->p_fd field XXX Obsolete?
+ *             EBADF                   One of the files in the bit vector is
+ *                                             invalid.
+ */
 static int
-selscan(struct proc *p, struct _select *sel, int nfd, register_t *retval,
+selscan(struct proc *p, struct _select *sel, int nfd, int32_t *retval,
        int sel_pass, wait_queue_sub_t wqsub)
 {
        struct filedesc *fdp = p->p_fd;
        int msk, i, j, fd;
        u_int32_t bits;
        struct fileproc *fp;
-       int n = 0;
-       int nc = 0;
+       int n = 0;              /* count of bits */
+       int nc = 0;             /* bit vector offset (nc'th bit) */
        static int flag[3] = { FREAD, FWRITE, 0 };
        u_int32_t *iptr, *optr;
        u_int nw;
        u_int32_t *ibits, *obits;
        char * wql;
        char * wql_ptr;
-       int count, kfcount;
-       boolean_t funnel_state;
-       vnode_t vp;
+       int count;
        struct vfs_context context = *vfs_context_current();
 
        /*
@@ -1263,66 +1308,14 @@ selscan(struct proc *p, struct _select *sel, int nfd, register_t *retval,
        }
        ibits = sel->ibits;
        obits = sel->obits;
-       wql = sel->wql;
+       wql = sel->data->wql;
 
        nw = howmany(nfd, NFDBITS);
 
-       count = sel->count;
-       kfcount = sel->kfcount;
-
-       if (kfcount > count)
-               panic("selscan: count < kfcount");
-
-       if (kfcount != 0) {
-               funnel_state = thread_funnel_set(kernel_flock, TRUE);
-
-               proc_fdlock(p);
-               for (msk = 0; msk < 3; msk++) {
-                       iptr = (u_int32_t *)&ibits[msk * nw];
-                       optr = (u_int32_t *)&obits[msk * nw];
-
-                       for (i = 0; i < nfd; i += NFDBITS) {
-                               bits = iptr[i/NFDBITS];
-
-                               while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
-                                       bits &= ~(1 << j);
-                                       fp = fdp->fd_ofiles[fd];
-
-                                       if (fp == NULL ||
-                                               (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
-                                               proc_fdunlock(p);
-                                               thread_funnel_set(kernel_flock, funnel_state);
-                                               return(EBADF);
-                                       }
-                                       if (sel_pass == SEL_SECONDPASS) {
-                                               wql_ptr = (char *)0;
-                                               fp->f_flags &= ~FP_INSELECT;
-                                               fp->f_waddr = (void *)0;
-                                       } else {
-                                               wql_ptr = (wql + nc * SIZEOF_WAITQUEUE_LINK);
-                                               fp->f_flags |= FP_INSELECT;
-                                               fp->f_waddr = (void *)wqsub;
-                                       }
-
-                                       context.vc_ucred = fp->f_cred;
-
-                                       if (fp->f_ops && (fp->f_type == DTYPE_VNODE)
-                                                       && ((vp = (struct vnode *)fp->f_data)  != NULLVP)
-                                                       && (vp->v_type == VCHR)
-                                               && fo_select(fp, flag[msk], wql_ptr, &context)) {
-                                               optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
-                                               n++;
-                                       }
-                                       nc++;
-                               }
-                       }
-               }
-               proc_fdunlock(p);
-               thread_funnel_set(kernel_flock, funnel_state);
-       }
+       count = sel->data->count;
 
        nc = 0;
-       if (kfcount != count) {
+       if (count) {
                proc_fdlock(p);
                for (msk = 0; msk < 3; msk++) {
                        iptr = (u_int32_t *)&ibits[msk * nw];
@@ -1335,29 +1328,37 @@ selscan(struct proc *p, struct _select *sel, int nfd, register_t *retval,
                                        bits &= ~(1 << j);
                                        fp = fdp->fd_ofiles[fd];
 
-                                       if (fp == NULL ||
-                                               (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
+                                       if (fp == NULL || (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
+                                               /*
+                                                * If we abort because of a bad
+                                                * fd, let the caller unwind...
+                                                */
                                                proc_fdunlock(p);
                                                return(EBADF);
                                        }
                                        if (sel_pass == SEL_SECONDPASS) {
                                                wql_ptr = (char *)0;
-                                               fp->f_flags &= ~FP_INSELECT;
-                                               fp->f_waddr = (void *)0;
+                                               if ((fp->f_flags & FP_INSELECT) && (fp->f_waddr == (void *)wqsub)) {
+                                                       fp->f_flags &= ~FP_INSELECT;
+                                                       fp->f_waddr = (void *)0;
+                                               }
                                        } else {
                                                wql_ptr = (wql + nc * SIZEOF_WAITQUEUE_LINK);
-                                               fp->f_flags |= FP_INSELECT;
-                                               fp->f_waddr = (void *)wqsub;
+                                               if (fp->f_flags & FP_INSELECT) {
+                                                       /* someone is already in select on this fp */
+                                                       fp->f_flags |= FP_SELCONFLICT;
+                                                       wait_queue_link(&select_conflict_queue, (wait_queue_set_t)wqsub);
+                                               } else {
+                                                       fp->f_flags |= FP_INSELECT;
+                                                       fp->f_waddr = (void *)wqsub;
+                                               }
                                        }
 
                                        context.vc_ucred = fp->f_cred;
 
-                                       if ((fp->f_ops && 
-                                               ((fp->f_type != DTYPE_VNODE)
-                                               || (((vp = (struct vnode *)fp->f_data)  != NULLVP)
-                                                       && (vp->v_type != VCHR))
-                                               )
-                                               && fo_select(fp, flag[msk], wql_ptr, &context))) {
+                                       /* The select; set the bit, if true */
+                                       if (fp->f_ops && fp->f_type
+                                               && fo_select(fp, flag[msk], wql_ptr, &context)) {
                                                optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
                                                n++;
                                        }
@@ -1371,7 +1372,7 @@ selscan(struct proc *p, struct _select *sel, int nfd, register_t *retval,
        return (0);
 }
 
-int poll_callback(struct kqueue *, struct kevent *, void *);
+int poll_callback(struct kqueue *, struct kevent64_s *, void *);
 
 struct poll_continue_args {
        user_addr_t pca_fds;
@@ -1380,7 +1381,7 @@ struct poll_continue_args {
 };
 
 int
-poll(struct proc *p, struct poll_args *uap, register_t *retval)
+poll(struct proc *p, struct poll_args *uap, int32_t *retval)
 {
        __pthread_testcancel(1);
        return(poll_nocancel(p, (struct poll_nocancel_args *)uap, retval));
@@ -1388,7 +1389,7 @@ poll(struct proc *p, struct poll_args *uap, register_t *retval)
 
 
 int
-poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, register_t *retval)
+poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, int32_t *retval)
 {
        struct poll_continue_args *cont;
        struct pollfd *fds;
@@ -1445,10 +1446,10 @@ poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, register_t *retval
 
        /* JMM - all this P_SELECT stuff is bogus */
        ncoll = nselcoll;
-       OSBitOrAtomic(P_SELECT, (UInt32 *)&p->p_flag);
+       OSBitOrAtomic(P_SELECT, &p->p_flag);
        for (i = 0; i < nfds; i++) {
                short events = fds[i].events;
-               struct kevent kev;
+               struct kevent64_s kev;
                int kerror = 0;
 
                /* per spec, ignore fd values below zero */
@@ -1460,9 +1461,11 @@ poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, register_t *retval
                /* convert the poll event into a kqueue kevent */
                kev.ident = fds[i].fd;
                kev.flags = EV_ADD | EV_ONESHOT | EV_POLL;
-               kev.fflags = NOTE_LOWAT;
-               kev.data = 1; /* efficiency be damned: any data should trigger */
                kev.udata = CAST_USER_ADDR_T(&fds[i]);
+               kev.fflags = 0;
+               kev.data = 0;
+               kev.ext[0] = 0;
+               kev.ext[1] = 0;
 
                /* Handle input events */
                if (events & ( POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND | POLLHUP )) {
@@ -1510,11 +1513,11 @@ poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, register_t *retval
        cont->pca_fds = uap->fds;
        cont->pca_nfds = nfds;
        cont->pca_rfds = rfds;
-       error = kevent_scan(kq, poll_callback, NULL, cont, &atv, p);
+       error = kqueue_scan(kq, poll_callback, NULL, cont, &atv, p);
        rfds = cont->pca_rfds;
 
  done:
-       OSBitAndAtomic(~((uint32_t)P_SELECT), (UInt32 *)&p->p_flag);
+       OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
        /* poll is not restarted after signals... */
        if (error == ERESTART)
                error = EINTR;
@@ -1534,10 +1537,11 @@ poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, register_t *retval
 }
 
 int
-poll_callback(__unused struct kqueue *kq, struct kevent *kevp, void *data)
+poll_callback(__unused struct kqueue *kq, struct kevent64_s *kevp, void *data)
 {
        struct poll_continue_args *cont = (struct poll_continue_args *)data;
        struct pollfd *fds = CAST_DOWN(struct pollfd *, kevp->udata);
+       short prev_revents = fds->revents;
        short mask;
 
        /* convert the results back into revents */
@@ -1577,7 +1581,7 @@ poll_callback(__unused struct kqueue *kq, struct kevent *kevp, void *data)
                break;
        }
 
-       if (fds->revents)
+       if (fds->revents != 0 && prev_revents == 0)
                cont->pca_rfds++;
 
        return 0;
@@ -1590,9 +1594,32 @@ seltrue(__unused dev_t dev, __unused int flag, __unused struct proc *p)
        return (1);
 }
 
+/*
+ * selcount
+ *
+ * Count the number of bits set in the input bit vector, and establish an
+ * outstanding fp->f_iocount for each of the descriptors which will be in
+ * use in the select operation.
+ *
+ * Parameters: p                       The process doing the select
+ *             ibits                   The input bit vector
+ *             nfd                     The number of fd's in the vector
+ *             countp                  Pointer to where to store the bit count
+ *
+ * Returns:    0                       Success
+ *             EIO                     Bad per process open file table
+ *             EBADF                   One of the bits in the input bit vector
+ *                                             references an invalid fd
+ *
+ * Implicit:   *countp (modified)      Count of fd's
+ *
+ * Notes:      This function is the first pass under the proc_fdlock() that
+ *             permits us to recognize invalid descriptors in the bit vector;
+ *             the may, however, not remain valid through the drop and
+ *             later reacquisition of the proc_fdlock().
+ */
 static int
-selcount(struct proc *p, u_int32_t *ibits, __unused u_int32_t *obits, 
-                int nfd, int *countp, int * kfcountp)
+selcount(struct proc *p, u_int32_t *ibits, int nfd, int *countp)
 {
        struct filedesc *fdp = p->p_fd;
        int msk, i, j, fd;
@@ -1602,9 +1629,8 @@ selcount(struct proc *p, u_int32_t *ibits, __unused u_int32_t *obits,
        u_int32_t *iptr;
        u_int nw;
        int error=0; 
-       int kfc = 0;
        int dropcount;
-       vnode_t vp;
+       int need_wakeup = 0;
 
        /*
         * Problems when reboot; due to MacOSX signal probs
@@ -1612,7 +1638,6 @@ selcount(struct proc *p, u_int32_t *ibits, __unused u_int32_t *obits,
         */
        if (fdp == NULL) {
                *countp = 0;
-               *kfcountp = 0;
                return(EIO);
        }
        nw = howmany(nfd, NFDBITS);
@@ -1628,16 +1653,10 @@ selcount(struct proc *p, u_int32_t *ibits, __unused u_int32_t *obits,
                                if (fp == NULL ||
                                        (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
                                                *countp = 0;
-                                               *kfcountp = 0;
                                                error = EBADF;
                                                goto bad;
                                }
                                fp->f_iocount++;
-                               if ((fp->f_type == DTYPE_VNODE)
-                                               && ((vp = (struct vnode *)fp->f_data)  != NULLVP)
-                                               && (vp->v_type == VCHR) )
-                                       kfc++;
-
                                n++;
                        }
                }
@@ -1645,48 +1664,64 @@ selcount(struct proc *p, u_int32_t *ibits, __unused u_int32_t *obits,
        proc_fdunlock(p);
 
        *countp = n;
-       *kfcountp = kfc;
        return (0);
+
 bad:
        dropcount = 0;
        
        if (n== 0)
                goto out;
-       /* undo the iocounts */
-       for (msk = 0; msk < 3; msk++) {
-               iptr = (u_int32_t *)&ibits[msk * nw];
-               for (i = 0; i < nfd; i += NFDBITS) {
-                       bits = iptr[i/NFDBITS];
-                       while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
-                               bits &= ~(1 << j);
-                               fp = fdp->fd_ofiles[fd];
-                               if (dropcount >= n)
-                                       goto out;
-                               fp->f_iocount--;
+       /* Ignore error return; it's already EBADF */
+       (void)seldrop_locked(p, ibits, nfd, n, &need_wakeup, 1);
 
-                               if (p->p_fpdrainwait && fp->f_iocount == 0) {
-                                       p->p_fpdrainwait = 0;
-                                       wakeup(&p->p_fpdrainwait);
-                               }
-                               dropcount++;
-                       }
-               }
-       }
 out:
        proc_fdunlock(p);
+       if (need_wakeup) {
+               wakeup(&p->p_fpdrainwait);
+       }
        return(error);
 }
 
+
+/*
+ * seldrop_locked
+ *
+ * Drop outstanding wait queue references set up during selscan(); drop the
+ * outstanding per fileproc f_iocount() picked up during the selcount().
+ *
+ * Parameters: p                       Process performing the select
+ *             ibits                   Input pit bector of fd's
+ *             nfd                     Number of fd's
+ *             lim                     Limit to number of vector entries to
+ *                                             consider, or -1 for "all"
+ *             inselect                True if
+ *             need_wakeup             Pointer to flag to set to do a wakeup
+ *                                     if f_iocont on any descriptor goes to 0
+ *
+ * Returns:    0                       Success
+ *             EBADF                   One or more fds in the bit vector
+ *                                             were invalid, but the rest
+ *                                             were successfully dropped
+ *
+ * Notes:      An fd make become bad while the proc_fdlock() is not held,
+ *             if a multithreaded application closes the fd out from under
+ *             the in progress select.  In this case, we still have to
+ *             clean up after the set up on the remaining fds.
+ */
 static int
-seldrop(struct proc *p, u_int32_t *ibits, int nfd)
+seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wakeup, int fromselcount)
 {
        struct filedesc *fdp = p->p_fd;
        int msk, i, j, fd;
        u_int32_t bits;
        struct fileproc *fp;
-       int n = 0;
        u_int32_t *iptr;
        u_int nw;
+       int error = 0;
+       int dropcount = 0;
+       uthread_t uth = get_bsdthread_info(current_thread());
+
+       *need_wakeup = 0;
 
        /*
         * Problems when reboot; due to MacOSX signal probs
@@ -1698,8 +1733,6 @@ seldrop(struct proc *p, u_int32_t *ibits, int nfd)
 
        nw = howmany(nfd, NFDBITS);
 
-
-       proc_fdlock(p);
        for (msk = 0; msk < 3; msk++) {
                iptr = (u_int32_t *)&ibits[msk * nw];
                for (i = 0; i < nfd; i += NFDBITS) {
@@ -1707,28 +1740,67 @@ seldrop(struct proc *p, u_int32_t *ibits, int nfd)
                        while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
                                bits &= ~(1 << j);
                                fp = fdp->fd_ofiles[fd];
-                               if (fp == NULL 
-#if 0
-                       /* if you are here then it is being closed */
-                                       || (fdp->fd_ofileflags[fd] & UF_RESERVED)
-#endif
-                                       ) {
-                                               proc_fdunlock(p);
-                                               return(EBADF);
+                               /*
+                                * If we've already dropped as many as were
+                                * counted/scanned, then we are done.  
+                                */
+                               if ((fromselcount != 0) && (++dropcount > lim))
+                                       goto done;
+
+                               if (fp == NULL) {
+                                       /* skip (now) bad fds */
+                                       error = EBADF;
+                                       continue;
+                               }
+                               /*
+                                * Only clear the flag if we set it.  We'll
+                                * only find that we set it if we had made
+                                * at least one [partial] pass through selscan().
+                                */
+                               if ((fp->f_flags & FP_INSELECT) && (fp->f_waddr == (void *)uth->uu_wqset)) {
+                                       fp->f_flags &= ~FP_INSELECT;
+                                       fp->f_waddr = (void *)0;
                                }
-                               n++;
-                               fp->f_iocount--;
-                               fp->f_flags &= ~FP_INSELECT;
 
-                               if (p->p_fpdrainwait && fp->f_iocount == 0) {
-                                       p->p_fpdrainwait = 0;
-                                       wakeup(&p->p_fpdrainwait);
+                               fp->f_iocount--;
+                               if (fp->f_iocount < 0)
+                                       panic("f_iocount overdecrement!");
+
+                               if (fp->f_iocount == 0) {
+                                       /*
+                                        * The last iocount is responsible for clearing
+                                        * selconfict flag - even if we didn't set it -
+                                        * and is also responsible for waking up anyone
+                                        * waiting on iocounts to drain.
+                                        */
+                                       if (fp->f_flags & FP_SELCONFLICT)
+                                               fp->f_flags &= ~FP_SELCONFLICT;
+                                       if (p->p_fpdrainwait) {
+                                               p->p_fpdrainwait = 0;
+                                               *need_wakeup = 1;
+                                       }
                                }
                        }
                }
        }
+done:
+       return (error);
+}
+
+
+static int
+seldrop(struct proc *p, u_int32_t *ibits, int nfd)
+{
+       int error;
+       int need_wakeup = 0;
+
+       proc_fdlock(p);
+       error =  seldrop_locked(p, ibits, nfd, nfd, &need_wakeup, 0);
        proc_fdunlock(p);
-       return (0);
+       if (need_wakeup) {
+               wakeup(&p->p_fpdrainwait);
+       }
+       return (error);
 }
 
 /*
@@ -1742,12 +1814,8 @@ selrecord(__unused struct proc *selector, struct selinfo *sip, void * p_wql)
 
        /* need to look at collisions */
 
-       if ((p_wql == (void *)0) && ((sip->si_flags & SI_INITED) == 0)) {
-               return;
-       }
-
        /*do not record if this is second pass of select */
-       if((p_wql == (void *)0)) {
+       if(p_wql == (void *)0) {
                return;
        }
 
@@ -1788,7 +1856,7 @@ selwakeup(struct selinfo *sip)
        }
 
        if (sip->si_flags & SI_RECORDED) {
-               wait_queue_wakeup_all(&sip->si_wait_queue, &selwait, THREAD_AWAKENED);
+               wait_queue_wakeup_all(&sip->si_wait_queue, NULL, THREAD_AWAKENED);
                sip->si_flags &= ~SI_RECORDED;
        }
 
@@ -1806,7 +1874,7 @@ selthreadclear(struct selinfo *sip)
                        sip->si_flags &= ~(SI_RECORDED | SI_COLL);
        }
        sip->si_flags |= SI_CLEAR;
-       wait_queue_unlinkall_nofree(&sip->si_wait_queue);
+       wait_queue_unlink_all(&sip->si_wait_queue);
 }
 
 
@@ -1951,14 +2019,14 @@ postpipeevent(struct pipe *pipep, int event)
                          evq->ee_req.er_rcnt = pipep->pipe_buffer.cnt;
                  }
                  if ((evq->ee_eventmask & EV_WR) && 
-                     (pipep->pipe_buffer.size - pipep->pipe_buffer.cnt) >= PIPE_BUF) {
+                     (MAX(pipep->pipe_buffer.size,PIPE_SIZE) - pipep->pipe_buffer.cnt) >= PIPE_BUF) {
 
                          if (pipep->pipe_state & PIPE_EOF) {
                                  mask |= EV_WR|EV_RESET;
                                  break;
                          }
                          mask |= EV_WR;
-                         evq->ee_req.er_wcnt = pipep->pipe_buffer.size - pipep->pipe_buffer.cnt;
+                         evq->ee_req.er_wcnt = MAX(pipep->pipe_buffer.size, PIPE_SIZE) - pipep->pipe_buffer.cnt;
                  }
                  break;
 
@@ -2064,13 +2132,19 @@ postevent(struct socket *sp, struct sockbuf *sb, int event)
                   */
                case EV_RWBYTES:
                  if ((evq->ee_eventmask & EV_RE) && soreadable(sp)) {
-                         if (sp->so_error) {
-                                 if ((sp->so_type == SOCK_STREAM) && ((sp->so_error == ECONNREFUSED) || (sp->so_error == ECONNRESET))) {
-                                         if ((sp->so_pcb == 0) || (((struct inpcb *)sp->so_pcb)->inp_state == INPCB_STATE_DEAD) || !(tp = sototcpcb(sp)) ||
-                                             (tp->t_state == TCPS_CLOSED)) {
-                                                 mask |= EV_RE|EV_RESET;
-                                                 break;
-                                         }
+                         /* for AFP/OT purposes; may go away in future */
+                         if ((SOCK_DOM(sp) == PF_INET ||
+                             SOCK_DOM(sp) == PF_INET6) &&
+                             SOCK_PROTO(sp) == IPPROTO_TCP &&
+                             (sp->so_error == ECONNREFUSED ||
+                             sp->so_error == ECONNRESET)) {
+                                 if (sp->so_pcb == NULL ||
+                                     sotoinpcb(sp)->inp_state ==
+                                     INPCB_STATE_DEAD ||
+                                     (tp = sototcpcb(sp)) == NULL ||
+                                     tp->t_state == TCPS_CLOSED) {
+                                         mask |= EV_RE|EV_RESET;
+                                         break;
                                  }
                          }
                          mask |= EV_RE;
@@ -2082,13 +2156,19 @@ postevent(struct socket *sp, struct sockbuf *sb, int event)
                          }
                  }
                  if ((evq->ee_eventmask & EV_WR) && sowriteable(sp)) {
-                         if (sp->so_error) {
-                                 if ((sp->so_type == SOCK_STREAM) && ((sp->so_error == ECONNREFUSED) || (sp->so_error == ECONNRESET))) {
-                                         if ((sp->so_pcb == 0) || (((struct inpcb *)sp->so_pcb)->inp_state == INPCB_STATE_DEAD) || !(tp = sototcpcb(sp)) ||
-                                             (tp->t_state == TCPS_CLOSED)) {
-                                                 mask |= EV_WR|EV_RESET;
-                                                 break;
-                                         }
+                         /* for AFP/OT purposes; may go away in future */
+                         if ((SOCK_DOM(sp) == PF_INET ||
+                             SOCK_DOM(sp) == PF_INET6) &&
+                             SOCK_PROTO(sp) == IPPROTO_TCP &&
+                             (sp->so_error == ECONNREFUSED ||
+                             sp->so_error == ECONNRESET)) {
+                                 if (sp->so_pcb == NULL ||
+                                     sotoinpcb(sp)->inp_state ==
+                                     INPCB_STATE_DEAD ||
+                                     (tp = sototcpcb(sp)) == NULL ||
+                                     tp->t_state == TCPS_CLOSED) {
+                                         mask |= EV_WR|EV_RESET;
+                                         break;
                                  }
                          }
                          mask |= EV_WR;
@@ -2349,7 +2429,18 @@ waitevent(proc_t p, struct waitevent_args *uap, int *retval)
                        proc_lock(p);
                        goto retry;
                }
-               error = copyin(uap->tv, (caddr_t)&atv, sizeof (atv));
+               if (IS_64BIT_PROCESS(p)) {
+                       struct user64_timeval atv64;
+                       error = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64));
+                       /* Loses resolution - assume timeout < 68 years */
+                       atv.tv_sec = atv64.tv_sec;
+                       atv.tv_usec = atv64.tv_usec;
+               } else {
+                       struct user32_timeval atv32;
+                       error = copyin(uap->tv, (caddr_t)&atv32, sizeof(atv32));
+                       atv.tv_sec = atv32.tv_sec;
+                       atv.tv_usec = atv32.tv_usec;
+               }
 
                if (error)
                        return(error);
@@ -2666,6 +2757,7 @@ waitevent_close(struct proc *p, struct fileproc *fp)
  *
  * Parameters: uuid_buf                Pointer to buffer to receive UUID
  *             timeout                 Timespec for timout
+ *             spi                             SPI, skip sandbox check (temporary)
  *
  * Returns:    0                       Success
  *             EWOULDBLOCK             Timeout is too short
@@ -2675,23 +2767,35 @@ waitevent_close(struct proc *p, struct fileproc *fp)
  *             have a system UUID in hand, then why ask for one?
  */
 int
-gethostuuid(struct proc *p, struct gethostuuid_args *uap, __unused register_t *retval)
+gethostuuid(struct proc *p, struct gethostuuid_args *uap, __unused int32_t *retval)
 {
        kern_return_t kret;
        int error;
        mach_timespec_t mach_ts;        /* for IOKit call */
        __darwin_uuid_t uuid_kern;      /* for IOKit call */
 
+       if (!uap->spi) {
+#if 13841988
+               uint32_t flags;
+               if (temp_debug_13841988 && (0 == proc_get_darwinbgstate(p->task, &flags)) && (flags & PROC_FLAG_IOS_APPLICATION)) {
+                       printf("Unauthorized access to gethostuuid() by %s(%d)\n", p->p_comm, proc_pid(p));
+                       return (EPERM);
+               }
+#else
+               /* Perform sandbox check */
+#endif
+       }
+
        /* Convert the 32/64 bit timespec into a mach_timespec_t */
        if ( proc_is64bit(p) ) {
-               struct user_timespec ts;
+               struct user64_timespec ts;
                error = copyin(uap->timeoutp, &ts, sizeof(ts));
                if (error)
                        return (error);
                mach_ts.tv_sec = ts.tv_sec;
                mach_ts.tv_nsec = ts.tv_nsec;
        } else {
-               struct timespec ts;
+               struct user32_timespec ts;
                error = copyin(uap->timeoutp, &ts, sizeof(ts) );
                if (error)
                        return (error);
@@ -2715,3 +2819,133 @@ gethostuuid(struct proc *p, struct gethostuuid_args *uap, __unused register_t *r
 
        return (error);
 }
+
+/*
+ * ledger
+ *
+ * Description:        Omnibus system call for ledger operations
+ */
+int
+ledger(struct proc *p, struct ledger_args *args, __unused int32_t *retval)
+{
+#if !CONFIG_MACF
+#pragma unused(p)
+#endif
+       int rval, pid, len, error;
+#ifdef LEDGER_DEBUG
+       struct ledger_limit_args lla;
+#endif
+       task_t task;
+       proc_t proc;
+
+       /* Finish copying in the necessary args before taking the proc lock */
+       error = 0;
+       len = 0;
+       if (args->cmd == LEDGER_ENTRY_INFO)
+               error = copyin(args->arg3, (char *)&len, sizeof (len));
+       else if (args->cmd == LEDGER_TEMPLATE_INFO)
+               error = copyin(args->arg2, (char *)&len, sizeof (len));
+#ifdef LEDGER_DEBUG
+       else if (args->cmd == LEDGER_LIMIT)
+               error = copyin(args->arg2, (char *)&lla, sizeof (lla));
+#endif
+       if (error)
+               return (error);
+       if (len < 0)
+               return (EINVAL);
+
+       rval = 0;
+       if (args->cmd != LEDGER_TEMPLATE_INFO) {
+               pid = args->arg1;
+               proc = proc_find(pid);
+               if (proc == NULL)
+                       return (ESRCH);
+
+#if CONFIG_MACF
+               error = mac_proc_check_ledger(p, proc, args->cmd);
+               if (error) {
+                       proc_rele(proc);
+                       return (error);
+               }
+#endif
+
+               task = proc->task;
+       }
+               
+       switch (args->cmd) {
+#ifdef LEDGER_DEBUG
+               case LEDGER_LIMIT: {
+                       if (!kauth_cred_issuser(kauth_cred_get()))
+                               rval = EPERM;
+                       rval = ledger_limit(task, &lla);
+                       proc_rele(proc);
+                       break;
+               }
+#endif
+               case LEDGER_INFO: {
+                       struct ledger_info info;
+
+                       rval = ledger_info(task, &info);
+                       proc_rele(proc);
+                       if (rval == 0)
+                               rval = copyout(&info, args->arg2,
+                                   sizeof (info));
+                       break;
+               }
+
+               case LEDGER_ENTRY_INFO: {
+                       void *buf;
+                       int sz;
+
+                       rval = ledger_get_task_entry_info_multiple(task, &buf, &len);
+                       proc_rele(proc);
+                       if ((rval == 0) && (len > 0)) {
+                               sz = len * sizeof (struct ledger_entry_info);
+                               rval = copyout(buf, args->arg2, sz);
+                               kfree(buf, sz);
+                       }
+                       if (rval == 0)
+                               rval = copyout(&len, args->arg3, sizeof (len));
+                       break;
+               }
+
+               case LEDGER_TEMPLATE_INFO: {
+                       void *buf;
+                       int sz;
+
+                       rval = ledger_template_info(&buf, &len);
+                       if ((rval == 0) && (len > 0)) {
+                               sz = len * sizeof (struct ledger_template_info);
+                               rval = copyout(buf, args->arg1, sz);
+                               kfree(buf, sz);
+                       }
+                       if (rval == 0)
+                               rval = copyout(&len, args->arg2, sizeof (len));
+                       break;
+               }
+
+               default:
+                       rval = EINVAL;
+       }
+
+       return (rval);
+}
+
+#if CONFIG_TELEMETRY
+int
+telemetry(__unused struct proc *p, struct telemetry_args *args, __unused int32_t *retval)
+{
+       int error = 0;
+
+       switch (args->cmd) {
+       case TELEMETRY_CMD_TIMER_EVENT:
+               error = telemetry_timer_event(args->deadline, args->interval, args->leeway);
+               break;
+       default:
+               error = EINVAL;
+               break;
+       }
+
+       return (error);
+}
+#endif /* CONFIG_TELEMETRY */