bsd/kern/sys_generic.c

   1 /*
   2  * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  29 /*
  30  * Copyright (c) 1982, 1986, 1989, 1993
  31  *      The Regents of the University of California.  All rights reserved.
  32  * (c) UNIX System Laboratories, Inc.
  33  * All or some portions of this file are derived from material licensed
  34  * to the University of California by American Telephone and Telegraph
  35  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  36  * the permission of UNIX System Laboratories, Inc.
  37  *
  38  * Redistribution and use in source and binary forms, with or without
  39  * modification, are permitted provided that the following conditions
  40  * are met:
  41  * 1. Redistributions of source code must retain the above copyright
  42  *    notice, this list of conditions and the following disclaimer.
  43  * 2. Redistributions in binary form must reproduce the above copyright
  44  *    notice, this list of conditions and the following disclaimer in the
  45  *    documentation and/or other materials provided with the distribution.
  46  * 3. All advertising materials mentioning features or use of this software
  47  *    must display the following acknowledgement:
  48  *      This product includes software developed by the University of
  49  *      California, Berkeley and its contributors.
  50  * 4. Neither the name of the University nor the names of its contributors
  51  *    may be used to endorse or promote products derived from this software
  52  *    without specific prior written permission.
  53  *
  54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  64  * SUCH DAMAGE.
  65  *
  66  *      @(#)sys_generic.c       8.9 (Berkeley) 2/14/95
  67  */
  68 /*
  69  * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
  70  * support for mandatory and extensible security protections.  This notice
  71  * is included in support of clause 2.2 (b) of the Apple Public License,
  72  * Version 2.0.
  73  */
  74
  75 #include <sys/param.h>
  76 #include <sys/systm.h>
  77 #include <sys/filedesc.h>
  78 #include <sys/ioctl.h>
  79 #include <sys/file_internal.h>
  80 #include <sys/proc_internal.h>
  81 #include <sys/socketvar.h>
  82 #include <sys/uio_internal.h>
  83 #include <sys/kernel.h>
  84 #include <sys/stat.h>
  85 #include <sys/malloc.h>
  86 #include <sys/sysproto.h>
  87
  88 #include <sys/mount_internal.h>
  89 #include <sys/protosw.h>
  90 #include <sys/ev.h>
  91 #include <sys/user.h>
  92 #include <sys/kdebug.h>
  93 #include <sys/poll.h>
  94 #include <sys/event.h>
  95 #include <sys/eventvar.h>
  96
  97 #include <mach/mach_types.h>
  98 #include <kern/kern_types.h>
  99 #include <kern/assert.h>
 100 #include <kern/kalloc.h>
 101 #include <kern/thread.h>
 102 #include <kern/clock.h>
 103
 104 #include <sys/mbuf.h>
 105 #include <sys/socket.h>
 106 #include <sys/socketvar.h>
 107 #include <sys/errno.h>
 108 #include <sys/syscall.h>
 109 #include <sys/pipe.h>
 110
 111 #include <bsm/audit_kernel.h>
 112
 113 #include <net/if.h>
 114 #include <net/route.h>
 115
 116 #include <netinet/in.h>
 117 #include <netinet/in_systm.h>
 118 #include <netinet/ip.h>
 119 #include <netinet/in_pcb.h>
 120 #include <netinet/ip_var.h>
 121 #include <netinet/ip6.h>
 122 #include <netinet/tcp.h>
 123 #include <netinet/tcp_fsm.h>
 124 #include <netinet/tcp_seq.h>
 125 #include <netinet/tcp_timer.h>
 126 #include <netinet/tcp_var.h>
 127 #include <netinet/tcpip.h>
 128 #include <netinet/tcp_debug.h>
 129 /* for wait queue based select */
 130 #include <kern/wait_queue.h>
 131 #include <kern/kalloc.h>
 132 #include <sys/vnode_internal.h>
 133
 134 /* XXX should be in a header file somewhere */
 135 void evsofree(struct socket *);
 136 void evpipefree(struct pipe *);
 137 void postpipeevent(struct pipe *, int);
 138 void postevent(struct socket *, struct sockbuf *, int);
 139 extern kern_return_t IOBSDGetPlatformUUID(__darwin_uuid_t uuid, mach_timespec_t timeoutp);
 140
 141 int rd_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval);
 142 int wr_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval);
 143 extern void     *get_bsduthreadarg(thread_t);
 144 extern int      *get_bsduthreadrval(thread_t);
 145
 146 __private_extern__ int  dofileread(vfs_context_t ctx, struct fileproc *fp,
 147                                                                    user_addr_t bufp, user_size_t nbyte,
 148                                                                    off_t offset, int flags, user_ssize_t *retval);
 149 __private_extern__ int  dofilewrite(vfs_context_t ctx, struct fileproc *fp,
 150                                                                         user_addr_t bufp, user_size_t nbyte,
 151                                                                         off_t offset, int flags, user_ssize_t *retval);
 152 __private_extern__ int  preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_vnode);
 153 __private_extern__ void donefileread(struct proc *p, struct fileproc *fp_ret, int fd);
 154
 155 #if NETAT
 156 extern int appletalk_inited;
 157 #endif /* NETAT */
 158
 159 #define f_flag f_fglob->fg_flag
 160 #define f_type f_fglob->fg_type
 161 #define f_msgcount f_fglob->fg_msgcount
 162 #define f_cred f_fglob->fg_cred
 163 #define f_ops f_fglob->fg_ops
 164 #define f_offset f_fglob->fg_offset
 165 #define f_data f_fglob->fg_data
 166
 167 /*
 168  * Read system call.
 169  *
 170  * Returns:     0                       Success
 171  *      preparefileread:EBADF
 172  *      preparefileread:ESPIPE
 173  *      preparefileread:ENXIO
 174  *      preparefileread:EBADF
 175  *      dofileread:???
 176  */
 177 int
 178 read(struct proc *p, struct read_args *uap, user_ssize_t *retval)
 179 {
 180         __pthread_testcancel(1);
 181         return(read_nocancel(p, (struct read_nocancel_args *)uap, retval));
 182 }
 183
 184 int
 185 read_nocancel(struct proc *p, struct read_nocancel_args *uap, user_ssize_t *retval)
 186 {
 187         struct fileproc *fp;
 188         int error;
 189         int fd = uap->fd;
 190
 191         if ( (error = preparefileread(p, &fp, fd, 0)) )
 192                 return (error);
 193
 194         error = dofileread(vfs_context_current(), fp, uap->cbuf, uap->nbyte,
 195                            (off_t)-1, 0, retval);
 196
 197         donefileread(p, fp, fd);
 198
 199         return (error);
 200 }
 201
 202 /*
 203  * Pread system call
 204  *
 205  * Returns:     0                       Success
 206  *      preparefileread:EBADF
 207  *      preparefileread:ESPIPE
 208  *      preparefileread:ENXIO
 209  *      preparefileread:EBADF
 210  *      dofileread:???
 211  */
 212 int
 213 pread(struct proc *p, struct pread_args *uap, user_ssize_t *retval)
 214 {
 215         __pthread_testcancel(1);
 216         return(pread_nocancel(p, (struct pread_nocancel_args *)uap, retval));
 217 }
 218
 219 int
 220 pread_nocancel(struct proc *p, struct pread_nocancel_args *uap, user_ssize_t *retval)
 221 {
 222         struct fileproc *fp = NULL;     /* fp set by preparefileread() */
 223         int fd = uap->fd;
 224         int error;
 225
 226         if ( (error = preparefileread(p, &fp, fd, 1)) )
 227                 return (error);
 228
 229         error = dofileread(vfs_context_current(), fp, uap->buf, uap->nbyte,
 230                         uap->offset, FOF_OFFSET, retval);
 231
 232         donefileread(p, fp, fd);
 233
 234         if (!error)
 235             KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pread) | DBG_FUNC_NONE),
 236               uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
 237
 238         return (error);
 239 }
 240
 241 /*
 242  * Code common for read and pread
 243  */
 244
 245 void
 246 donefileread(struct proc *p, struct fileproc *fp, int fd)
 247 {
 248         proc_fdlock_spin(p);
 249
 250         fp->f_flags &= ~FP_INCHRREAD;
 251
 252         fp_drop(p, fd, fp, 1);
 253         proc_fdunlock(p);
 254 }
 255
 256 /*
 257  * Returns:     0                       Success
 258  *              EBADF
 259  *              ESPIPE
 260  *              ENXIO
 261  *      fp_lookup:EBADF
 262  *      fo_read:???
 263  */
 264 int
 265 preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_pread)
 266 {
 267         vnode_t vp;
 268         int     error;
 269         struct fileproc *fp;
 270
 271         proc_fdlock_spin(p);
 272
 273         error = fp_lookup(p, fd, &fp, 1);
 274
 275         if (error) {
 276                 proc_fdunlock(p);
 277                 return (error);
 278         }
 279         if ((fp->f_flag & FREAD) == 0) {
 280                 error = EBADF;
 281                 goto out;
 282         }
 283         if (check_for_pread && (fp->f_type != DTYPE_VNODE)) {
 284                 error = ESPIPE;
 285                 goto out;
 286         }
 287         if (fp->f_type == DTYPE_VNODE) {
 288                 vp = (struct vnode *)fp->f_fglob->fg_data;
 289
 290                 if (check_for_pread && (vnode_isfifo(vp))) {
 291                         error = ESPIPE;
 292                         goto out;
 293                 }
 294                 if (check_for_pread && (vp->v_flag & VISTTY)) {
 295                         error = ENXIO;
 296                         goto out;
 297                 }
 298                 if (vp->v_type == VCHR)
 299                         fp->f_flags |= FP_INCHRREAD;
 300         }
 301
 302         *fp_ret = fp;
 303
 304         proc_fdunlock(p);
 305         return (0);
 306
 307 out:
 308         fp_drop(p, fd, fp, 1);
 309         proc_fdunlock(p);
 310         return (error);
 311 }
 312
 313
 314 /*
 315  * Returns:     0                       Success
 316  *              EINVAL
 317  *      fo_read:???
 318  */
 319 __private_extern__ int
 320 dofileread(vfs_context_t ctx, struct fileproc *fp,
 321            user_addr_t bufp, user_size_t nbyte, off_t offset, int flags,
 322            user_ssize_t *retval)
 323 {
 324         uio_t auio;
 325         user_ssize_t bytecnt;
 326         long error = 0;
 327         char uio_buf[ UIO_SIZEOF(1) ];
 328
 329         // LP64todo - do we want to raise this?
 330         if (nbyte > INT_MAX)
 331                 return (EINVAL);
 332
 333         if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
 334                 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_READ,
 335                                                                           &uio_buf[0], sizeof(uio_buf));
 336         } else {
 337                 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_READ,
 338                                                                           &uio_buf[0], sizeof(uio_buf));
 339         }
 340         uio_addiov(auio, bufp, nbyte);
 341
 342         bytecnt = nbyte;
 343
 344         if ((error = fo_read(fp, auio, flags, ctx))) {
 345                 if (uio_resid(auio) != bytecnt && (error == ERESTART ||
 346                         error == EINTR || error == EWOULDBLOCK))
 347                         error = 0;
 348         }
 349         bytecnt -= uio_resid(auio);
 350
 351         *retval = bytecnt;
 352
 353         return (error);
 354 }
 355
 356 /*
 357  * Scatter read system call.
 358  *
 359  * Returns:     0                       Success
 360  *              EINVAL
 361  *              ENOMEM
 362  *      copyin:EFAULT
 363  *      rd_uio:???
 364  */
 365 int
 366 readv(struct proc *p, struct readv_args *uap, user_ssize_t *retval)
 367 {
 368         __pthread_testcancel(1);
 369         return(readv_nocancel(p, (struct readv_nocancel_args *)uap, retval));
 370 }
 371
 372 int
 373 readv_nocancel(struct proc *p, struct readv_nocancel_args *uap, user_ssize_t *retval)
 374 {
 375         uio_t auio = NULL;
 376         int error;
 377         int size_of_iovec;
 378         struct user_iovec *iovp;
 379
 380         /* Verify range bedfore calling uio_create() */
 381         if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV)
 382                 return (EINVAL);
 383
 384         /* allocate a uio large enough to hold the number of iovecs passed */
 385         auio = uio_create(uap->iovcnt, 0,
 386                                   (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
 387                                   UIO_READ);
 388
 389         /* get location of iovecs within the uio.  then copyin the iovecs from
 390          * user space.
 391          */
 392         iovp = uio_iovsaddr(auio);
 393         if (iovp == NULL) {
 394                 error = ENOMEM;
 395                 goto ExitThisRoutine;
 396         }
 397         size_of_iovec = (IS_64BIT_PROCESS(p) ? sizeof(struct user_iovec) : sizeof(struct iovec));
 398         error = copyin(uap->iovp, (caddr_t)iovp, (uap->iovcnt * size_of_iovec));
 399         if (error) {
 400                 goto ExitThisRoutine;
 401         }
 402
 403         /* finalize uio_t for use and do the IO
 404          */
 405         uio_calculateresid(auio);
 406         error = rd_uio(p, uap->fd, auio, retval);
 407
 408 ExitThisRoutine:
 409         if (auio != NULL) {
 410                 uio_free(auio);
 411         }
 412         return (error);
 413 }
 414
 415 /*
 416  * Write system call
 417  *
 418  * Returns:     0                       Success
 419  *              EBADF
 420  *      fp_lookup:EBADF
 421  *      dofilewrite:???
 422  */
 423 int
 424 write(struct proc *p, struct write_args *uap, user_ssize_t *retval)
 425 {
 426         __pthread_testcancel(1);
 427         return(write_nocancel(p, (struct write_nocancel_args *)uap, retval));
 428
 429 }
 430
 431 int
 432 write_nocancel(struct proc *p, struct write_nocancel_args *uap, user_ssize_t *retval)
 433 {
 434         struct fileproc *fp;
 435         int error;
 436         int fd = uap->fd;
 437
 438         error = fp_lookup(p,fd,&fp,0);
 439         if (error)
 440                 return(error);
 441         if ((fp->f_flag & FWRITE) == 0) {
 442                 error = EBADF;
 443         } else {
 444                 struct vfs_context context = *(vfs_context_current());
 445                 context.vc_ucred = fp->f_fglob->fg_cred;
 446
 447                 error = dofilewrite(&context, fp, uap->cbuf, uap->nbyte,
 448                         (off_t)-1, 0, retval);
 449         }
 450         if (error == 0)
 451                 fp_drop_written(p, fd, fp);
 452         else
 453                 fp_drop(p, fd, fp, 0);
 454         return(error);
 455 }
 456
 457 /*
 458  * pwrite system call
 459  *
 460  * Returns:     0                       Success
 461  *              EBADF
 462  *              ESPIPE
 463  *              ENXIO
 464  *              EINVAL
 465  *      fp_lookup:EBADF
 466  *      dofilewrite:???
 467  */
 468 int
 469 pwrite(struct proc *p, struct pwrite_args *uap, user_ssize_t *retval)
 470 {
 471         __pthread_testcancel(1);
 472         return(pwrite_nocancel(p, (struct pwrite_nocancel_args *)uap, retval));
 473 }
 474
 475 int
 476 pwrite_nocancel(struct proc *p, struct pwrite_nocancel_args *uap, user_ssize_t *retval)
 477 {
 478         struct fileproc *fp;
 479         int error;
 480         int fd = uap->fd;
 481         vnode_t vp  = (vnode_t)0;
 482
 483         error = fp_lookup(p,fd,&fp,0);
 484         if (error)
 485                 return(error);
 486
 487         if ((fp->f_flag & FWRITE) == 0) {
 488                 error = EBADF;
 489         } else {
 490                 struct vfs_context context = *vfs_context_current();
 491                 context.vc_ucred = fp->f_fglob->fg_cred;
 492
 493                 if (fp->f_type != DTYPE_VNODE) {
 494                         error = ESPIPE;
 495                         goto errout;
 496                 }
 497                 vp = (vnode_t)fp->f_fglob->fg_data;
 498                 if (vnode_isfifo(vp)) {
 499                         error = ESPIPE;
 500                         goto errout;
 501                 }
 502                 if ((vp->v_flag & VISTTY)) {
 503                         error = ENXIO;
 504                         goto errout;
 505                 }
 506                 if (uap->offset == (off_t)-1) {
 507                         error = EINVAL;
 508                         goto errout;
 509                 }
 510
 511                     error = dofilewrite(&context, fp, uap->buf, uap->nbyte,
 512                         uap->offset, FOF_OFFSET, retval);
 513         }
 514 errout:
 515         if (error == 0)
 516                 fp_drop_written(p, fd, fp);
 517         else
 518                 fp_drop(p, fd, fp, 0);
 519
 520         if (!error)
 521             KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pwrite) | DBG_FUNC_NONE),
 522               uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
 523
 524         return(error);
 525 }
 526
 527 /*
 528  * Returns:     0                       Success
 529  *              EINVAL
 530  *      <fo_write>:EPIPE
 531  *      <fo_write>:???                  [indirect through struct fileops]
 532  */
 533 __private_extern__ int
 534 dofilewrite(vfs_context_t ctx, struct fileproc *fp,
 535             user_addr_t bufp, user_size_t nbyte, off_t offset, int flags,
 536             user_ssize_t *retval)
 537 {
 538         uio_t auio;
 539         long error = 0;
 540         user_ssize_t bytecnt;
 541         char uio_buf[ UIO_SIZEOF(1) ];
 542
 543         // LP64todo - do we want to raise this?
 544         if (nbyte > INT_MAX)
 545                 return (EINVAL);
 546
 547         if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
 548                 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_WRITE,
 549                                                                           &uio_buf[0], sizeof(uio_buf));
 550         } else {
 551                 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_WRITE,
 552                                                                           &uio_buf[0], sizeof(uio_buf));
 553         }
 554         uio_addiov(auio, bufp, nbyte);
 555
 556         bytecnt = nbyte;
 557         if ((error = fo_write(fp, auio, flags, ctx))) {
 558                 if (uio_resid(auio) != bytecnt && (error == ERESTART ||
 559                         error == EINTR || error == EWOULDBLOCK))
 560                         error = 0;
 561                 /* The socket layer handles SIGPIPE */
 562                 if (error == EPIPE && fp->f_type != DTYPE_SOCKET) {
 563                         /* XXX Raise the signal on the thread? */
 564                         psignal(vfs_context_proc(ctx), SIGPIPE);
 565                 }
 566         }
 567         bytecnt -= uio_resid(auio);
 568         *retval = bytecnt;
 569
 570         return (error);
 571 }
 572
 573 /*
 574  * Gather write system call
 575  */
 576 int
 577 writev(struct proc *p, struct writev_args *uap, user_ssize_t *retval)
 578 {
 579         __pthread_testcancel(1);
 580         return(writev_nocancel(p, (struct writev_nocancel_args *)uap, retval));
 581 }
 582
 583 int
 584 writev_nocancel(struct proc *p, struct writev_nocancel_args *uap, user_ssize_t *retval)
 585 {
 586         uio_t auio = NULL;
 587         int error;
 588         int size_of_iovec;
 589         struct user_iovec *iovp;
 590
 591         /* Verify range bedfore calling uio_create() */
 592         if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV)
 593                 return (EINVAL);
 594
 595         /* allocate a uio large enough to hold the number of iovecs passed */
 596         auio = uio_create(uap->iovcnt, 0,
 597                                   (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
 598                                   UIO_WRITE);
 599
 600         /* get location of iovecs within the uio.  then copyin the iovecs from
 601          * user space.
 602          */
 603         iovp = uio_iovsaddr(auio);
 604         if (iovp == NULL) {
 605                 error = ENOMEM;
 606                 goto ExitThisRoutine;
 607         }
 608         size_of_iovec = (IS_64BIT_PROCESS(p) ? sizeof(struct user_iovec) : sizeof(struct iovec));
 609         error = copyin(uap->iovp, (caddr_t)iovp, (uap->iovcnt * size_of_iovec));
 610         if (error) {
 611                 goto ExitThisRoutine;
 612         }
 613
 614         /* finalize uio_t for use and do the IO
 615          */
 616         uio_calculateresid(auio);
 617         error = wr_uio(p, uap->fd, auio, retval);
 618
 619 ExitThisRoutine:
 620         if (auio != NULL) {
 621                 uio_free(auio);
 622         }
 623         return (error);
 624 }
 625
 626
 627 int
 628 wr_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval)
 629 {
 630         struct fileproc *fp;
 631         int error;
 632         user_ssize_t count;
 633         struct vfs_context context = *vfs_context_current();
 634
 635         error = fp_lookup(p,fdes,&fp,0);
 636         if (error)
 637                 return(error);
 638
 639         if ((fp->f_flag & FWRITE) == 0) {
 640                 error = EBADF;
 641                 goto out;
 642         }
 643         count = uio_resid(uio);
 644
 645         context.vc_ucred = fp->f_cred;
 646         error = fo_write(fp, uio, 0, &context);
 647         if (error) {
 648                 if (uio_resid(uio) != count && (error == ERESTART ||
 649                                                 error == EINTR || error == EWOULDBLOCK))
 650                         error = 0;
 651                 /* The socket layer handles SIGPIPE */
 652                 if (error == EPIPE && fp->f_type != DTYPE_SOCKET)
 653                         psignal(p, SIGPIPE);
 654         }
 655         *retval = count - uio_resid(uio);
 656
 657 out:
 658         if ( (error == 0) )
 659                 fp_drop_written(p, fdes, fp);
 660         else
 661                 fp_drop(p, fdes, fp, 0);
 662         return(error);
 663 }
 664
 665
 666 int
 667 rd_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval)
 668 {
 669         struct fileproc *fp;
 670         int error;
 671         user_ssize_t count;
 672         struct vfs_context context = *vfs_context_current();
 673
 674         if ( (error = preparefileread(p, &fp, fdes, 0)) )
 675                 return (error);
 676
 677         count = uio_resid(uio);
 678
 679         context.vc_ucred = fp->f_cred;
 680
 681         error = fo_read(fp, uio, 0, &context);
 682
 683         if (error) {
 684                 if (uio_resid(uio) != count && (error == ERESTART ||
 685                                                 error == EINTR || error == EWOULDBLOCK))
 686                         error = 0;
 687         }
 688         *retval = count - uio_resid(uio);
 689
 690         donefileread(p, fp, fdes);
 691
 692         return (error);
 693 }
 694
 695 /*
 696  * Ioctl system call
 697  *
 698  * Returns:     0                       Success
 699  *              EBADF
 700  *              ENOTTY
 701  *              ENOMEM
 702  *              ESRCH
 703  *      copyin:EFAULT
 704  *      copyoutEFAULT
 705  *      fp_lookup:EBADF                 Bad file descriptor
 706  *      fo_ioctl:???
 707  */
 708 int
 709 ioctl(struct proc *p, struct ioctl_args *uap, __unused register_t *retval)
 710 {
 711         struct fileproc *fp;
 712         u_long com;
 713         int error = 0;
 714         u_int size;
 715         caddr_t datap, memp;
 716         boolean_t is64bit;
 717         int tmp;
 718 #define STK_PARAMS      128
 719         char stkbuf[STK_PARAMS];
 720         int fd = uap->fd;
 721         struct vfs_context context = *vfs_context_current();
 722
 723         AUDIT_ARG(fd, uap->fd);
 724         AUDIT_ARG(cmd, CAST_DOWN(int, uap->com)); /* LP64todo: uap->com is a user-land long */
 725         AUDIT_ARG(addr, uap->data);
 726
 727         is64bit = proc_is64bit(p);
 728
 729         proc_fdlock(p);
 730         error = fp_lookup(p,fd,&fp,1);
 731         if (error)  {
 732                 proc_fdunlock(p);
 733                 return(error);
 734         }
 735
 736         AUDIT_ARG(file, p, fp);
 737
 738         if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
 739                         error = EBADF;
 740                         goto out;
 741         }
 742
 743         context.vc_ucred = fp->f_fglob->fg_cred;
 744
 745 #if CONFIG_MACF
 746         error = mac_file_check_ioctl(context.vc_ucred, fp->f_fglob, uap->com);
 747         if (error)
 748                 goto out;
 749 #endif
 750
 751 #if NETAT
 752         /*
 753          * ### LD 6/11/97 Hack Alert: this is to get AppleTalk to work
 754          * while implementing an ATioctl system call
 755          */
 756         {
 757                 if (appletalk_inited && ((uap->com & 0x0000FFFF) == 0xff99)) {
 758                         u_long  fixed_command;
 759
 760 #ifdef APPLETALK_DEBUG
 761                         kprintf("ioctl: special AppleTalk \n");
 762 #endif
 763                         datap = &stkbuf[0];
 764                         *(user_addr_t *)datap = uap->data;
 765                         fixed_command = _IOW(0, 0xff99, uap->data);
 766                         error = fo_ioctl(fp, fixed_command, datap, &context);
 767                         goto out;
 768                 }
 769         }
 770
 771 #endif /* NETAT */
 772
 773
 774         switch (com = uap->com) {
 775         case FIONCLEX:
 776                 *fdflags(p, uap->fd) &= ~UF_EXCLOSE;
 777                 error =0;
 778                 goto out;
 779         case FIOCLEX:
 780                 *fdflags(p, uap->fd) |= UF_EXCLOSE;
 781                 error =0;
 782                 goto out;
 783         }
 784
 785         /*
 786          * Interpret high order word to find amount of data to be
 787          * copied to/from the user's address space.
 788          */
 789         size = IOCPARM_LEN(com);
 790         if (size > IOCPARM_MAX) {
 791                         error = ENOTTY;
 792                         goto out;
 793         }
 794         memp = NULL;
 795         if (size > sizeof (stkbuf)) {
 796                 proc_fdunlock(p);
 797                 if ((memp = (caddr_t)kalloc(size)) == 0) {
 798                         proc_fdlock(p);
 799                         error = ENOMEM;
 800                         goto out;
 801                 }
 802                 proc_fdlock(p);
 803                 datap = memp;
 804         } else
 805                 datap = &stkbuf[0];
 806         if (com&IOC_IN) {
 807                 if (size) {
 808                         proc_fdunlock(p);
 809                         error = copyin(uap->data, datap, size);
 810                         if (error) {
 811                                 if (memp)
 812                                         kfree(memp, size);
 813                                 proc_fdlock(p);
 814                                 goto out;
 815                         }
 816                         proc_fdlock(p);
 817                 } else {
 818                         /* XXX - IOC_IN and no size?  we should proably return an error here!! */
 819                         if (is64bit) {
 820                                 *(user_addr_t *)datap = uap->data;
 821                         }
 822                         else {
 823                                 *(uint32_t *)datap = (uint32_t)uap->data;
 824                         }
 825                 }
 826         } else if ((com&IOC_OUT) && size)
 827                 /*
 828                  * Zero the buffer so the user always
 829                  * gets back something deterministic.
 830                  */
 831                 bzero(datap, size);
 832         else if (com&IOC_VOID) {
 833                 /* XXX - this is odd since IOC_VOID means no parameters */
 834                 if (is64bit) {
 835                         *(user_addr_t *)datap = uap->data;
 836                 }
 837                 else {
 838                         *(uint32_t *)datap = (uint32_t)uap->data;
 839                 }
 840         }
 841
 842         switch (com) {
 843
 844         case FIONBIO:
 845                 if ( (tmp = *(int *)datap) )
 846                         fp->f_flag |= FNONBLOCK;
 847                 else
 848                         fp->f_flag &= ~FNONBLOCK;
 849                 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
 850                 break;
 851
 852         case FIOASYNC:
 853                 if ( (tmp = *(int *)datap) )
 854                         fp->f_flag |= FASYNC;
 855                 else
 856                         fp->f_flag &= ~FASYNC;
 857                 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
 858                 break;
 859
 860         case FIOSETOWN:
 861                 tmp = *(int *)datap;
 862                 if (fp->f_type == DTYPE_SOCKET) {
 863                         ((struct socket *)fp->f_data)->so_pgid = tmp;
 864                         error = 0;
 865                         break;
 866                 }
 867                 if (fp->f_type == DTYPE_PIPE) {
 868                         error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context);
 869                         break;
 870                 }
 871                 if (tmp <= 0) {
 872                         tmp = -tmp;
 873                 } else {
 874                         struct proc *p1 = proc_find(tmp);
 875                         if (p1 == 0) {
 876                                 error = ESRCH;
 877                                 break;
 878                         }
 879                         tmp = p1->p_pgrpid;
 880                         proc_rele(p1);
 881                 }
 882                 error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context);
 883                 break;
 884
 885         case FIOGETOWN:
 886                 if (fp->f_type == DTYPE_SOCKET) {
 887                         error = 0;
 888                         *(int *)datap = ((struct socket *)fp->f_data)->so_pgid;
 889                         break;
 890                 }
 891                 error = fo_ioctl(fp, TIOCGPGRP, datap, &context);
 892                 *(int *)datap = -*(int *)datap;
 893                 break;
 894
 895         default:
 896                 error = fo_ioctl(fp, com, datap, &context);
 897                 /*
 898                  * Copy any data to user, size was
 899                  * already set and checked above.
 900                  */
 901                 if (error == 0 && (com&IOC_OUT) && size)
 902                         error = copyout(datap, uap->data, (u_int)size);
 903                 break;
 904         }
 905         proc_fdunlock(p);
 906         if (memp)
 907                 kfree(memp, size);
 908         proc_fdlock(p);
 909 out:
 910         fp_drop(p, fd, fp, 1);
 911         proc_fdunlock(p);
 912         return(error);
 913 }
 914
 915 int     selwait, nselcoll;
 916 #define SEL_FIRSTPASS 1
 917 #define SEL_SECONDPASS 2
 918 extern int selcontinue(int error);
 919 extern int selprocess(int error, int sel_pass);
 920 static int selscan(struct proc *p, struct _select * sel,
 921                         int nfd, register_t *retval, int sel_pass, wait_queue_sub_t wqsub);
 922 static int selcount(struct proc *p, u_int32_t *ibits, u_int32_t *obits,
 923                         int nfd, int * count, int *kfcount);
 924 static int seldrop(struct proc *p, u_int32_t *ibits, int nfd);
 925 extern uint64_t tvtoabstime(struct timeval      *tvp);
 926
 927 /*
 928  * Select system call.
 929  *
 930  * Returns:     0                       Success
 931  *              EINVAL                  Invalid argument
 932  *              EAGAIN                  Nonconformant error if allocation fails
 933  *      selprocess:???
 934  */
 935 int
 936 select(struct proc *p, struct select_args *uap, register_t *retval)
 937 {
 938         __pthread_testcancel(1);
 939         return(select_nocancel(p, (struct select_nocancel_args *)uap, retval));
 940 }
 941
 942 int
 943 select_nocancel(struct proc *p, struct select_nocancel_args *uap, register_t *retval)
 944 {
 945         int error = 0;
 946         u_int ni, nw, size;
 947         thread_t th_act;
 948         struct uthread  *uth;
 949         struct _select *sel;
 950         int needzerofill = 1;
 951         int count = 0;
 952         int kfcount = 0;
 953
 954         th_act = current_thread();
 955         uth = get_bsdthread_info(th_act);
 956         sel = &uth->uu_select;
 957         retval = (int *)get_bsduthreadrval(th_act);
 958         *retval = 0;
 959
 960         if (uap->nd < 0) {
 961                 return (EINVAL);
 962         }
 963
 964         /* select on thread of process that already called proc_exit() */
 965         if (p->p_fd == NULL) {
 966                 return (EBADF);
 967         }
 968
 969         if (uap->nd > p->p_fd->fd_nfiles)
 970                 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */
 971
 972         nw = howmany(uap->nd, NFDBITS);
 973         ni = nw * sizeof(fd_mask);
 974
 975         /*
 976          * if the previously allocated space for the bits is smaller than
 977          * what is requested or no space has yet been allocated for this
 978          * thread, allocate enough space now.
 979          *
 980          * Note: If this process fails, select() will return EAGAIN; this
 981          * is the same thing pool() returns in a no-memory situation, but
 982          * it is not a POSIX compliant error code for select().
 983          */
 984         if (sel->nbytes < (3 * ni)) {
 985                 int nbytes = 3 * ni;
 986
 987                 /* Free previous allocation, if any */
 988                 if (sel->ibits != NULL)
 989                         FREE(sel->ibits, M_TEMP);
 990                 if (sel->obits != NULL) {
 991                         FREE(sel->obits, M_TEMP);
 992                         /* NULL out; subsequent ibits allocation may fail */
 993                         sel->obits = NULL;
 994                 }
 995
 996                 MALLOC(sel->ibits, u_int32_t *, nbytes, M_TEMP, M_WAITOK | M_ZERO);
 997                 if (sel->ibits == NULL)
 998                         return (EAGAIN);
 999                 MALLOC(sel->obits, u_int32_t *, nbytes, M_TEMP, M_WAITOK | M_ZERO);
1000                 if (sel->obits == NULL) {
1001                         FREE(sel->ibits, M_TEMP);
1002                         sel->ibits = NULL;
1003                         return (EAGAIN);
1004                 }
1005                 sel->nbytes = nbytes;
1006                 needzerofill = 0;
1007         }
1008
1009         if (needzerofill) {
1010                 bzero((caddr_t)sel->ibits, sel->nbytes);
1011                 bzero((caddr_t)sel->obits, sel->nbytes);
1012         }
1013
1014         /*
1015          * get the bits from the user address space
1016          */
1017 #define getbits(name, x) \
1018         do { \
1019                 if (uap->name && (error = copyin(uap->name, \
1020                         (caddr_t)&sel->ibits[(x) * nw], ni))) \
1021                         goto continuation; \
1022         } while (0)
1023
1024         getbits(in, 0);
1025         getbits(ou, 1);
1026         getbits(ex, 2);
1027 #undef  getbits
1028
1029         if (uap->tv) {
1030                 struct timeval atv;
1031                 if (IS_64BIT_PROCESS(p)) {
1032                         struct user_timeval atv64;
1033                         error = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64));
1034                         /* Loses resolution - assume timeout < 68 years */
1035                         atv.tv_sec = atv64.tv_sec;
1036                         atv.tv_usec = atv64.tv_usec;
1037                 } else {
1038                         error = copyin(uap->tv, (caddr_t)&atv, sizeof(atv));
1039                 }
1040                 if (error)
1041                         goto continuation;
1042                 if (itimerfix(&atv)) {
1043                         error = EINVAL;
1044                         goto continuation;
1045                 }
1046
1047                 clock_absolutetime_interval_to_deadline(
1048                                                                                 tvtoabstime(&atv), &sel->abstime);
1049         }
1050         else
1051                 sel->abstime = 0;
1052
1053         sel->kfcount = 0;
1054         if ( (error = selcount(p, sel->ibits, sel->obits, uap->nd, &count, &kfcount)) ) {
1055                         goto continuation;
1056         }
1057         sel->count = count;
1058         sel->kfcount = kfcount;
1059         size = SIZEOF_WAITQUEUE_SET + (count * SIZEOF_WAITQUEUE_LINK);
1060         if (uth->uu_allocsize) {
1061                 if (uth->uu_wqset == 0)
1062                         panic("select: wql memory smashed");
1063                 /* needed for the select now */
1064                 if (size > uth->uu_allocsize) {
1065                         kfree(uth->uu_wqset,  uth->uu_allocsize);
1066                         uth->uu_allocsize = size;
1067                         uth->uu_wqset = (wait_queue_set_t)kalloc(size);
1068                         if (uth->uu_wqset == (wait_queue_set_t)NULL)
1069                                 panic("failed to allocate memory for waitqueue\n");
1070                 }
1071         } else {
1072                 sel->count = count;
1073                 uth->uu_allocsize = size;
1074                 uth->uu_wqset = (wait_queue_set_t)kalloc(uth->uu_allocsize);
1075                 if (uth->uu_wqset == (wait_queue_set_t)NULL)
1076                         panic("failed to allocate memory for waitqueue\n");
1077         }
1078         bzero(uth->uu_wqset, size);
1079         sel->wql = (char *)uth->uu_wqset + SIZEOF_WAITQUEUE_SET;
1080         wait_queue_set_init(uth->uu_wqset, (SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST));
1081
1082 continuation:
1083         return selprocess(error, SEL_FIRSTPASS);
1084 }
1085
1086 int
1087 selcontinue(int error)
1088 {
1089         return selprocess(error, SEL_SECONDPASS);
1090 }
1091
1092 int
1093 selprocess(int error, int sel_pass)
1094 {
1095         int ncoll;
1096         u_int ni, nw;
1097         thread_t th_act;
1098         struct uthread  *uth;
1099         struct proc *p;
1100         struct select_args *uap;
1101         int *retval;
1102         struct _select *sel;
1103         int unwind = 1;
1104         int prepost = 0;
1105         int somewakeup = 0;
1106         int doretry = 0;
1107         wait_result_t wait_result;
1108
1109         p = current_proc();
1110         th_act = current_thread();
1111         uap = (struct select_args *)get_bsduthreadarg(th_act);
1112         retval = (int *)get_bsduthreadrval(th_act);
1113         uth = get_bsdthread_info(th_act);
1114         sel = &uth->uu_select;
1115
1116         /* if it is first pass wait queue is not setup yet */
1117         if ((error != 0) && (sel_pass == SEL_FIRSTPASS))
1118                         unwind = 0;
1119         if (sel->count == 0)
1120                         unwind = 0;
1121 retry:
1122         if (error != 0) {
1123           goto done;
1124         }
1125
1126         ncoll = nselcoll;
1127         OSBitOrAtomic(P_SELECT, (UInt32 *)&p->p_flag);
1128         /* skip scans if the select is just for timeouts */
1129         if (sel->count) {
1130                 if (sel_pass == SEL_FIRSTPASS)
1131                         wait_queue_sub_clearrefs(uth->uu_wqset);
1132
1133                 error = selscan(p, sel, uap->nd, retval, sel_pass, (wait_queue_sub_t)uth->uu_wqset);
1134                 if (error || *retval) {
1135                         goto done;
1136                 }
1137                 if (prepost) {
1138                         /* if the select of log, then we canwakeup and discover some one
1139                         * else already read the data; go toselct again if time permits
1140                         */
1141                         prepost = 0;
1142                         doretry = 1;
1143                 }
1144                 if (somewakeup) {
1145                         somewakeup = 0;
1146                         doretry = 1;
1147                 }
1148         }
1149
1150         if (uap->tv) {
1151                 uint64_t        now;
1152
1153                 clock_get_uptime(&now);
1154                 if (now >= sel->abstime)
1155                         goto done;
1156         }
1157
1158         if (doretry) {
1159                 /* cleanup obits and try again */
1160                 doretry = 0;
1161                 sel_pass = SEL_FIRSTPASS;
1162                 goto retry;
1163         }
1164
1165         /*
1166          * To effect a poll, the timeout argument should be
1167          * non-nil, pointing to a zero-valued timeval structure.
1168          */
1169         if (uap->tv && sel->abstime == 0) {
1170                 goto done;
1171         }
1172
1173         /* No spurious wakeups due to colls,no need to check for them */
1174          if ((sel_pass == SEL_SECONDPASS) || ((p->p_flag & P_SELECT) == 0)) {
1175                 sel_pass = SEL_FIRSTPASS;
1176                 goto retry;
1177         }
1178
1179         OSBitAndAtomic(~((uint32_t)P_SELECT), (UInt32 *)&p->p_flag);
1180
1181         /* if the select is just for timeout skip check */
1182         if (sel->count &&(sel_pass == SEL_SECONDPASS))
1183                 panic("selprocess: 2nd pass assertwaiting");
1184
1185         /* Wait Queue Subordinate has waitqueue as first element */
1186         wait_result = wait_queue_assert_wait((wait_queue_t)uth->uu_wqset,
1187                                              &selwait, THREAD_ABORTSAFE, sel->abstime);
1188         if (wait_result != THREAD_AWAKENED) {
1189                 /* there are no preposted events */
1190                 error = tsleep1(NULL, PSOCK | PCATCH,
1191                                 "select", 0, selcontinue);
1192         } else  {
1193                 prepost = 1;
1194                 error = 0;
1195         }
1196
1197         sel_pass = SEL_SECONDPASS;
1198         if (error == 0) {
1199                 if (!prepost)
1200                         somewakeup =1;
1201                 goto retry;
1202         }
1203 done:
1204         if (unwind) {
1205                 wait_subqueue_unlink_all(uth->uu_wqset);
1206                 seldrop(p, sel->ibits, uap->nd);
1207         }
1208         OSBitAndAtomic(~((uint32_t)P_SELECT), (UInt32 *)&p->p_flag);
1209         /* select is not restarted after signals... */
1210         if (error == ERESTART)
1211                 error = EINTR;
1212         if (error == EWOULDBLOCK)
1213                 error = 0;
1214         nw = howmany(uap->nd, NFDBITS);
1215         ni = nw * sizeof(fd_mask);
1216
1217 #define putbits(name, x) \
1218         do { \
1219                 if (uap->name && (error2 = \
1220                         copyout((caddr_t)&sel->obits[(x) * nw], uap->name, ni))) \
1221                         error = error2; \
1222         } while (0)
1223
1224         if (error == 0) {
1225                 int error2;
1226
1227                 putbits(in, 0);
1228                 putbits(ou, 1);
1229                 putbits(ex, 2);
1230 #undef putbits
1231         }
1232         return(error);
1233 }
1234
1235 static int
1236 selscan(struct proc *p, struct _select *sel, int nfd, register_t *retval,
1237         int sel_pass, wait_queue_sub_t wqsub)
1238 {
1239         struct filedesc *fdp = p->p_fd;
1240         int msk, i, j, fd;
1241         u_int32_t bits;
1242         struct fileproc *fp;
1243         int n = 0;
1244         int nc = 0;
1245         static int flag[3] = { FREAD, FWRITE, 0 };
1246         u_int32_t *iptr, *optr;
1247         u_int nw;
1248         u_int32_t *ibits, *obits;
1249         char * wql;
1250         char * wql_ptr;
1251         int count, kfcount;
1252         boolean_t funnel_state;
1253         vnode_t vp;
1254         struct vfs_context context = *vfs_context_current();
1255
1256         /*
1257          * Problems when reboot; due to MacOSX signal probs
1258          * in Beaker1C ; verify that the p->p_fd is valid
1259          */
1260         if (fdp == NULL) {
1261                 *retval=0;
1262                 return(EIO);
1263         }
1264         ibits = sel->ibits;
1265         obits = sel->obits;
1266         wql = sel->wql;
1267
1268         nw = howmany(nfd, NFDBITS);
1269
1270         count = sel->count;
1271         kfcount = sel->kfcount;
1272
1273         if (kfcount > count)
1274                 panic("selscan: count < kfcount");
1275
1276         if (kfcount != 0) {
1277                 funnel_state = thread_funnel_set(kernel_flock, TRUE);
1278
1279                 proc_fdlock(p);
1280                 for (msk = 0; msk < 3; msk++) {
1281                         iptr = (u_int32_t *)&ibits[msk * nw];
1282                         optr = (u_int32_t *)&obits[msk * nw];
1283
1284                         for (i = 0; i < nfd; i += NFDBITS) {
1285                                 bits = iptr[i/NFDBITS];
1286
1287                                 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1288                                         bits &= ~(1 << j);
1289                                         fp = fdp->fd_ofiles[fd];
1290
1291                                         if (fp == NULL ||
1292                                                 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1293                                                 proc_fdunlock(p);
1294                                                 thread_funnel_set(kernel_flock, funnel_state);
1295                                                 return(EBADF);
1296                                         }
1297                                         if (sel_pass == SEL_SECONDPASS) {
1298                                                 wql_ptr = (char *)0;
1299                                                 fp->f_flags &= ~FP_INSELECT;
1300                                                 fp->f_waddr = (void *)0;
1301                                         } else {
1302                                                 wql_ptr = (wql + nc * SIZEOF_WAITQUEUE_LINK);
1303                                                 fp->f_flags |= FP_INSELECT;
1304                                                 fp->f_waddr = (void *)wqsub;
1305                                         }
1306
1307                                         context.vc_ucred = fp->f_cred;
1308
1309                                         if (fp->f_ops && (fp->f_type == DTYPE_VNODE)
1310                                                         && ((vp = (struct vnode *)fp->f_data)  != NULLVP)
1311                                                         && (vp->v_type == VCHR)
1312                                                 && fo_select(fp, flag[msk], wql_ptr, &context)) {
1313                                                 optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
1314                                                 n++;
1315                                         }
1316                                         nc++;
1317                                 }
1318                         }
1319                 }
1320                 proc_fdunlock(p);
1321                 thread_funnel_set(kernel_flock, funnel_state);
1322         }
1323
1324         nc = 0;
1325         if (kfcount != count) {
1326                 proc_fdlock(p);
1327                 for (msk = 0; msk < 3; msk++) {
1328                         iptr = (u_int32_t *)&ibits[msk * nw];
1329                         optr = (u_int32_t *)&obits[msk * nw];
1330
1331                         for (i = 0; i < nfd; i += NFDBITS) {
1332                                 bits = iptr[i/NFDBITS];
1333
1334                                 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1335                                         bits &= ~(1 << j);
1336                                         fp = fdp->fd_ofiles[fd];
1337
1338                                         if (fp == NULL ||
1339                                                 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1340                                                 proc_fdunlock(p);
1341                                                 return(EBADF);
1342                                         }
1343                                         if (sel_pass == SEL_SECONDPASS) {
1344                                                 wql_ptr = (char *)0;
1345                                                 fp->f_flags &= ~FP_INSELECT;
1346                                                 fp->f_waddr = (void *)0;
1347                                         } else {
1348                                                 wql_ptr = (wql + nc * SIZEOF_WAITQUEUE_LINK);
1349                                                 fp->f_flags |= FP_INSELECT;
1350                                                 fp->f_waddr = (void *)wqsub;
1351                                         }
1352
1353                                         context.vc_ucred = fp->f_cred;
1354
1355                                         if ((fp->f_ops &&
1356                                                 ((fp->f_type != DTYPE_VNODE)
1357                                                 || (((vp = (struct vnode *)fp->f_data)  != NULLVP)
1358                                                         && (vp->v_type != VCHR))
1359                                                 )
1360                                                 && fo_select(fp, flag[msk], wql_ptr, &context))) {
1361                                                 optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
1362                                                 n++;
1363                                         }
1364                                         nc++;
1365                                 }
1366                         }
1367                 }
1368                 proc_fdunlock(p);
1369         }
1370         *retval = n;
1371         return (0);
1372 }
1373
1374 int poll_callback(struct kqueue *, struct kevent *, void *);
1375
1376 struct poll_continue_args {
1377         user_addr_t pca_fds;
1378         u_int pca_nfds;
1379         u_int pca_rfds;
1380 };
1381
1382 int
1383 poll(struct proc *p, struct poll_args *uap, register_t *retval)
1384 {
1385         __pthread_testcancel(1);
1386         return(poll_nocancel(p, (struct poll_nocancel_args *)uap, retval));
1387 }
1388
1389
1390 int
1391 poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, register_t *retval)
1392 {
1393         struct poll_continue_args *cont;
1394         struct pollfd *fds;
1395         struct kqueue *kq;
1396         struct timeval atv;
1397         int ncoll, error = 0;
1398         u_int nfds = uap->nfds;
1399         u_int rfds = 0;
1400         u_int i;
1401         size_t ni;
1402
1403         /*
1404          * This is kinda bogus.  We have fd limits, but that is not
1405          * really related to the size of the pollfd array.  Make sure
1406          * we let the process use at least FD_SETSIZE entries and at
1407          * least enough for the current limits.  We want to be reasonably
1408          * safe, but not overly restrictive.
1409          */
1410         if (nfds > OPEN_MAX ||
1411             (nfds > p->p_rlimit[RLIMIT_NOFILE].rlim_cur && (proc_suser(p) || nfds > FD_SETSIZE)))
1412                 return (EINVAL);
1413
1414         kq = kqueue_alloc(p);
1415         if (kq == NULL)
1416                 return (EAGAIN);
1417
1418         ni = nfds * sizeof(struct pollfd) + sizeof(struct poll_continue_args);
1419         MALLOC(cont, struct poll_continue_args *, ni, M_TEMP, M_WAITOK);
1420         if (NULL == cont) {
1421                 error = EAGAIN;
1422                 goto out;
1423         }
1424
1425         fds = (struct pollfd *)&cont[1];
1426         error = copyin(uap->fds, fds, nfds * sizeof(struct pollfd));
1427         if (error)
1428                 goto out;
1429
1430         if (uap->timeout != -1) {
1431                 struct timeval rtv;
1432
1433                 atv.tv_sec = uap->timeout / 1000;
1434                 atv.tv_usec = (uap->timeout % 1000) * 1000;
1435                 if (itimerfix(&atv)) {
1436                         error = EINVAL;
1437                         goto out;
1438                 }
1439                 getmicrouptime(&rtv);
1440                 timevaladd(&atv, &rtv);
1441         } else {
1442                 atv.tv_sec = 0;
1443                 atv.tv_usec = 0;
1444         }
1445
1446         /* JMM - all this P_SELECT stuff is bogus */
1447         ncoll = nselcoll;
1448         OSBitOrAtomic(P_SELECT, (UInt32 *)&p->p_flag);
1449         for (i = 0; i < nfds; i++) {
1450                 short events = fds[i].events;
1451                 struct kevent kev;
1452                 int kerror = 0;
1453
1454                 /* per spec, ignore fd values below zero */
1455                 if (fds[i].fd < 0) {
1456                         fds[i].revents = 0;
1457                         continue;
1458                 }
1459
1460                 /* convert the poll event into a kqueue kevent */
1461                 kev.ident = fds[i].fd;
1462                 kev.flags = EV_ADD | EV_ONESHOT | EV_POLL;
1463                 kev.fflags = NOTE_LOWAT;
1464                 kev.data = 1; /* efficiency be damned: any data should trigger */
1465                 kev.udata = CAST_USER_ADDR_T(&fds[i]);
1466
1467                 /* Handle input events */
1468                 if (events & ( POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND | POLLHUP )) {
1469                         kev.filter = EVFILT_READ;
1470                         if (!(events & ( POLLIN | POLLRDNORM )))
1471                                 kev.flags |= EV_OOBAND;
1472                         kerror = kevent_register(kq, &kev, p);
1473                 }
1474
1475                 /* Handle output events */
1476                 if (kerror == 0 &&
1477                     events & ( POLLOUT | POLLWRNORM | POLLWRBAND )) {
1478                         kev.filter = EVFILT_WRITE;
1479                         kerror = kevent_register(kq, &kev, p);
1480                 }
1481
1482                 /* Handle BSD extension vnode events */
1483                 if (kerror == 0 &&
1484                     events & ( POLLEXTEND | POLLATTRIB | POLLNLINK | POLLWRITE )) {
1485                         kev.filter = EVFILT_VNODE;
1486                         kev.fflags = 0;
1487                         if (events & POLLEXTEND)
1488                                 kev.fflags |= NOTE_EXTEND;
1489                         if (events & POLLATTRIB)
1490                                 kev.fflags |= NOTE_ATTRIB;
1491                         if (events & POLLNLINK)
1492                                 kev.fflags |= NOTE_LINK;
1493                         if (events & POLLWRITE)
1494                                 kev.fflags |= NOTE_WRITE;
1495                         kerror = kevent_register(kq, &kev, p);
1496                 }
1497
1498                 if (kerror != 0) {
1499                         fds[i].revents = POLLNVAL;
1500                         rfds++;
1501                 } else
1502                         fds[i].revents = 0;
1503         }
1504
1505         /* Did we have any trouble registering? */
1506         if (rfds > 0)
1507                 goto done;
1508
1509         /* scan for, and possibly wait for, the kevents to trigger */
1510         cont->pca_fds = uap->fds;
1511         cont->pca_nfds = nfds;
1512         cont->pca_rfds = rfds;
1513         error = kevent_scan(kq, poll_callback, NULL, cont, &atv, p);
1514         rfds = cont->pca_rfds;
1515
1516  done:
1517         OSBitAndAtomic(~((uint32_t)P_SELECT), (UInt32 *)&p->p_flag);
1518         /* poll is not restarted after signals... */
1519         if (error == ERESTART)
1520                 error = EINTR;
1521         if (error == EWOULDBLOCK)
1522                 error = 0;
1523         if (error == 0) {
1524                 error = copyout(fds, uap->fds, nfds * sizeof(struct pollfd));
1525                 *retval = rfds;
1526         }
1527
1528  out:
1529         if (NULL != cont)
1530                 FREE(cont, M_TEMP);
1531
1532         kqueue_dealloc(kq);
1533         return (error);
1534 }
1535
1536 int
1537 poll_callback(__unused struct kqueue *kq, struct kevent *kevp, void *data)
1538 {
1539         struct poll_continue_args *cont = (struct poll_continue_args *)data;
1540         struct pollfd *fds = CAST_DOWN(struct pollfd *, kevp->udata);
1541         short mask;
1542
1543         /* convert the results back into revents */
1544         if (kevp->flags & EV_EOF)
1545                 fds->revents |= POLLHUP;
1546         if (kevp->flags & EV_ERROR)
1547                 fds->revents |= POLLERR;
1548
1549         switch (kevp->filter) {
1550         case EVFILT_READ:
1551                 if (fds->revents & POLLHUP)
1552                         mask = (POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND );
1553                 else {
1554                         mask = 0;
1555                         if (kevp->data != 0)
1556                                 mask |= (POLLIN | POLLRDNORM );
1557                         if (kevp->flags & EV_OOBAND)
1558                                 mask |= ( POLLPRI | POLLRDBAND );
1559                 }
1560                 fds->revents |= (fds->events & mask);
1561                 break;
1562
1563         case EVFILT_WRITE:
1564                 if (!(fds->revents & POLLHUP))
1565                         fds->revents |= (fds->events & ( POLLOUT | POLLWRNORM | POLLWRBAND ));
1566                 break;
1567
1568         case EVFILT_VNODE:
1569                 if (kevp->fflags & NOTE_EXTEND)
1570                         fds->revents |= (fds->events & POLLEXTEND);
1571                 if (kevp->fflags & NOTE_ATTRIB)
1572                         fds->revents |= (fds->events & POLLATTRIB);
1573                 if (kevp->fflags & NOTE_LINK)
1574                         fds->revents |= (fds->events & POLLNLINK);
1575                 if (kevp->fflags & NOTE_WRITE)
1576                         fds->revents |= (fds->events & POLLWRITE);
1577                 break;
1578         }
1579
1580         if (fds->revents)
1581                 cont->pca_rfds++;
1582
1583         return 0;
1584 }
1585
1586 int
1587 seltrue(__unused dev_t dev, __unused int flag, __unused struct proc *p)
1588 {
1589
1590         return (1);
1591 }
1592
1593 static int
1594 selcount(struct proc *p, u_int32_t *ibits, __unused u_int32_t *obits,
1595                  int nfd, int *countp, int * kfcountp)
1596 {
1597         struct filedesc *fdp = p->p_fd;
1598         int msk, i, j, fd;
1599         u_int32_t bits;
1600         struct fileproc *fp;
1601         int n = 0;
1602         u_int32_t *iptr;
1603         u_int nw;
1604         int error=0;
1605         int kfc = 0;
1606         int dropcount;
1607         vnode_t vp;
1608
1609         /*
1610          * Problems when reboot; due to MacOSX signal probs
1611          * in Beaker1C ; verify that the p->p_fd is valid
1612          */
1613         if (fdp == NULL) {
1614                 *countp = 0;
1615                 *kfcountp = 0;
1616                 return(EIO);
1617         }
1618         nw = howmany(nfd, NFDBITS);
1619
1620         proc_fdlock(p);
1621         for (msk = 0; msk < 3; msk++) {
1622                 iptr = (u_int32_t *)&ibits[msk * nw];
1623                 for (i = 0; i < nfd; i += NFDBITS) {
1624                         bits = iptr[i/NFDBITS];
1625                         while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1626                                 bits &= ~(1 << j);
1627                                 fp = fdp->fd_ofiles[fd];
1628                                 if (fp == NULL ||
1629                                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1630                                                 *countp = 0;
1631                                                 *kfcountp = 0;
1632                                                 error = EBADF;
1633                                                 goto bad;
1634                                 }
1635                                 fp->f_iocount++;
1636                                 if ((fp->f_type == DTYPE_VNODE)
1637                                                 && ((vp = (struct vnode *)fp->f_data)  != NULLVP)
1638                                                 && (vp->v_type == VCHR) )
1639                                         kfc++;
1640
1641                                 n++;
1642                         }
1643                 }
1644         }
1645         proc_fdunlock(p);
1646
1647         *countp = n;
1648         *kfcountp = kfc;
1649         return (0);
1650 bad:
1651         dropcount = 0;
1652
1653         if (n== 0)
1654                 goto out;
1655         /* undo the iocounts */
1656         for (msk = 0; msk < 3; msk++) {
1657                 iptr = (u_int32_t *)&ibits[msk * nw];
1658                 for (i = 0; i < nfd; i += NFDBITS) {
1659                         bits = iptr[i/NFDBITS];
1660                         while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1661                                 bits &= ~(1 << j);
1662                                 fp = fdp->fd_ofiles[fd];
1663                                 if (dropcount >= n)
1664                                         goto out;
1665                                 fp->f_iocount--;
1666
1667                                 if (p->p_fpdrainwait && fp->f_iocount == 0) {
1668                                         p->p_fpdrainwait = 0;
1669                                         wakeup(&p->p_fpdrainwait);
1670                                 }
1671                                 dropcount++;
1672                         }
1673                 }
1674         }
1675 out:
1676         proc_fdunlock(p);
1677         return(error);
1678 }
1679
1680 static int
1681 seldrop(struct proc *p, u_int32_t *ibits, int nfd)
1682 {
1683         struct filedesc *fdp = p->p_fd;
1684         int msk, i, j, fd;
1685         u_int32_t bits;
1686         struct fileproc *fp;
1687         int n = 0;
1688         u_int32_t *iptr;
1689         u_int nw;
1690
1691         /*
1692          * Problems when reboot; due to MacOSX signal probs
1693          * in Beaker1C ; verify that the p->p_fd is valid
1694          */
1695         if (fdp == NULL) {
1696                 return(EIO);
1697         }
1698
1699         nw = howmany(nfd, NFDBITS);
1700
1701
1702         proc_fdlock(p);
1703         for (msk = 0; msk < 3; msk++) {
1704                 iptr = (u_int32_t *)&ibits[msk * nw];
1705                 for (i = 0; i < nfd; i += NFDBITS) {
1706                         bits = iptr[i/NFDBITS];
1707                         while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1708                                 bits &= ~(1 << j);
1709                                 fp = fdp->fd_ofiles[fd];
1710                                 if (fp == NULL
1711 #if 0
1712                         /* if you are here then it is being closed */
1713                                         || (fdp->fd_ofileflags[fd] & UF_RESERVED)
1714 #endif
1715                                         ) {
1716                                                 proc_fdunlock(p);
1717                                                 return(EBADF);
1718                                 }
1719                                 n++;
1720                                 fp->f_iocount--;
1721                                 fp->f_flags &= ~FP_INSELECT;
1722
1723                                 if (p->p_fpdrainwait && fp->f_iocount == 0) {
1724                                         p->p_fpdrainwait = 0;
1725                                         wakeup(&p->p_fpdrainwait);
1726                                 }
1727                         }
1728                 }
1729         }
1730         proc_fdunlock(p);
1731         return (0);
1732 }
1733
1734 /*
1735  * Record a select request.
1736  */
1737 void
1738 selrecord(__unused struct proc *selector, struct selinfo *sip, void * p_wql)
1739 {
1740         thread_t        cur_act = current_thread();
1741         struct uthread * ut = get_bsdthread_info(cur_act);
1742
1743         /* need to look at collisions */
1744
1745         if ((p_wql == (void *)0) && ((sip->si_flags & SI_INITED) == 0)) {
1746                 return;
1747         }
1748
1749         /*do not record if this is second pass of select */
1750         if((p_wql == (void *)0)) {
1751                 return;
1752         }
1753
1754         if ((sip->si_flags & SI_INITED) == 0) {
1755                 wait_queue_init(&sip->si_wait_queue, SYNC_POLICY_FIFO);
1756                 sip->si_flags |= SI_INITED;
1757                 sip->si_flags &= ~SI_CLEAR;
1758         }
1759
1760         if (sip->si_flags & SI_RECORDED) {
1761                 sip->si_flags |= SI_COLL;
1762         } else
1763                 sip->si_flags &= ~SI_COLL;
1764
1765         sip->si_flags |= SI_RECORDED;
1766         if (!wait_queue_member(&sip->si_wait_queue, ut->uu_wqset))
1767                 wait_queue_link_noalloc(&sip->si_wait_queue, ut->uu_wqset,
1768                                         (wait_queue_link_t)p_wql);
1769
1770         return;
1771 }
1772
1773 void
1774 selwakeup(struct selinfo *sip)
1775 {
1776
1777         if ((sip->si_flags & SI_INITED) == 0) {
1778                 return;
1779         }
1780
1781         if (sip->si_flags & SI_COLL) {
1782                 nselcoll++;
1783                 sip->si_flags &= ~SI_COLL;
1784 #if 0
1785                 /* will not  support */
1786                 //wakeup((caddr_t)&selwait);
1787 #endif
1788         }
1789
1790         if (sip->si_flags & SI_RECORDED) {
1791                 wait_queue_wakeup_all(&sip->si_wait_queue, &selwait, THREAD_AWAKENED);
1792                 sip->si_flags &= ~SI_RECORDED;
1793         }
1794
1795 }
1796
1797 void
1798 selthreadclear(struct selinfo *sip)
1799 {
1800
1801         if ((sip->si_flags & SI_INITED) == 0) {
1802                 return;
1803         }
1804         if (sip->si_flags & SI_RECORDED) {
1805                         selwakeup(sip);
1806                         sip->si_flags &= ~(SI_RECORDED | SI_COLL);
1807         }
1808         sip->si_flags |= SI_CLEAR;
1809         wait_queue_unlinkall_nofree(&sip->si_wait_queue);
1810 }
1811
1812
1813
1814
1815 #define DBG_POST        0x10
1816 #define DBG_WATCH       0x11
1817 #define DBG_WAIT        0x12
1818 #define DBG_MOD         0x13
1819 #define DBG_EWAKEUP     0x14
1820 #define DBG_ENQUEUE     0x15
1821 #define DBG_DEQUEUE     0x16
1822
1823 #define DBG_MISC_POST MISCDBG_CODE(DBG_EVENT,DBG_POST)
1824 #define DBG_MISC_WATCH MISCDBG_CODE(DBG_EVENT,DBG_WATCH)
1825 #define DBG_MISC_WAIT MISCDBG_CODE(DBG_EVENT,DBG_WAIT)
1826 #define DBG_MISC_MOD MISCDBG_CODE(DBG_EVENT,DBG_MOD)
1827 #define DBG_MISC_EWAKEUP MISCDBG_CODE(DBG_EVENT,DBG_EWAKEUP)
1828 #define DBG_MISC_ENQUEUE MISCDBG_CODE(DBG_EVENT,DBG_ENQUEUE)
1829 #define DBG_MISC_DEQUEUE MISCDBG_CODE(DBG_EVENT,DBG_DEQUEUE)
1830
1831
1832 #define EVPROCDEQUE(p, evq)     do {                            \
1833         proc_lock(p);                                           \
1834         if (evq->ee_flags & EV_QUEUED) {                        \
1835                 TAILQ_REMOVE(&p->p_evlist, evq, ee_plist);      \
1836                 evq->ee_flags &= ~EV_QUEUED;                    \
1837         }                                                       \
1838         proc_unlock(p);                                         \
1839 } while (0);
1840
1841
1842 /*
1843  * called upon socket close. deque and free all events for
1844  * the socket...  socket must be locked by caller.
1845  */
1846 void
1847 evsofree(struct socket *sp)
1848 {
1849         struct eventqelt *evq, *next;
1850         proc_t  p;
1851
1852         if (sp == NULL)
1853                 return;
1854
1855         for (evq = sp->so_evlist.tqh_first; evq != NULL; evq = next) {
1856                 next = evq->ee_slist.tqe_next;
1857                 p = evq->ee_proc;
1858
1859                 if (evq->ee_flags & EV_QUEUED) {
1860                         EVPROCDEQUE(p, evq);
1861                 }
1862                 TAILQ_REMOVE(&sp->so_evlist, evq, ee_slist); // remove from socket q
1863                 FREE(evq, M_TEMP);
1864         }
1865 }
1866
1867
1868 /*
1869  * called upon pipe close. deque and free all events for
1870  * the pipe... pipe must be locked by caller
1871  */
1872 void
1873 evpipefree(struct pipe *cpipe)
1874 {
1875         struct eventqelt *evq, *next;
1876         proc_t  p;
1877
1878         for (evq = cpipe->pipe_evlist.tqh_first; evq != NULL; evq = next) {
1879                 next = evq->ee_slist.tqe_next;
1880                 p = evq->ee_proc;
1881
1882                 EVPROCDEQUE(p, evq);
1883
1884                 TAILQ_REMOVE(&cpipe->pipe_evlist, evq, ee_slist); // remove from pipe q
1885                 FREE(evq, M_TEMP);
1886         }
1887 }
1888
1889
1890 /*
1891  * enqueue this event if it's not already queued. wakeup
1892  * the proc if we do queue this event to it...
1893  * entered with proc lock held... we drop it before
1894  * doing the wakeup and return in that state
1895  */
1896 static void
1897 evprocenque(struct eventqelt *evq)
1898 {
1899         proc_t  p;
1900
1901         assert(evq);
1902         p = evq->ee_proc;
1903
1904         KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_START, (uint32_t)evq, evq->ee_flags, evq->ee_eventmask,0,0);
1905
1906         proc_lock(p);
1907
1908         if (evq->ee_flags & EV_QUEUED) {
1909                 proc_unlock(p);
1910
1911                 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0);
1912                 return;
1913         }
1914         evq->ee_flags |= EV_QUEUED;
1915
1916         TAILQ_INSERT_TAIL(&p->p_evlist, evq, ee_plist);
1917
1918         proc_unlock(p);
1919
1920         wakeup(&p->p_evlist);
1921
1922         KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0);
1923 }
1924
1925
1926 /*
1927  * pipe lock must be taken by the caller
1928  */
1929 void
1930 postpipeevent(struct pipe *pipep, int event)
1931 {
1932         int     mask;
1933         struct eventqelt *evq;
1934
1935         if (pipep == NULL)
1936                 return;
1937         KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, event,0,0,1,0);
1938
1939         for (evq = pipep->pipe_evlist.tqh_first;
1940              evq != NULL; evq = evq->ee_slist.tqe_next) {
1941
1942                 if (evq->ee_eventmask == 0)
1943                         continue;
1944                 mask = 0;
1945
1946                 switch (event & (EV_RWBYTES | EV_RCLOSED | EV_WCLOSED)) {
1947
1948                 case EV_RWBYTES:
1949                   if ((evq->ee_eventmask & EV_RE) && pipep->pipe_buffer.cnt) {
1950                           mask |= EV_RE;
1951                           evq->ee_req.er_rcnt = pipep->pipe_buffer.cnt;
1952                   }
1953                   if ((evq->ee_eventmask & EV_WR) &&
1954                       (pipep->pipe_buffer.size - pipep->pipe_buffer.cnt) >= PIPE_BUF) {
1955
1956                           if (pipep->pipe_state & PIPE_EOF) {
1957                                   mask |= EV_WR|EV_RESET;
1958                                   break;
1959                           }
1960                           mask |= EV_WR;
1961                           evq->ee_req.er_wcnt = pipep->pipe_buffer.size - pipep->pipe_buffer.cnt;
1962                   }
1963                   break;
1964
1965                 case EV_WCLOSED:
1966                 case EV_RCLOSED:
1967                   if ((evq->ee_eventmask & EV_RE)) {
1968                           mask |= EV_RE|EV_RCLOSED;
1969                   }
1970                   if ((evq->ee_eventmask & EV_WR)) {
1971                           mask |= EV_WR|EV_WCLOSED;
1972                   }
1973                   break;
1974
1975                 default:
1976                   return;
1977                 }
1978                 if (mask) {
1979                         /*
1980                          * disarm... postevents are nops until this event is 'read' via
1981                          * waitevent and then re-armed via modwatch
1982                          */
1983                         evq->ee_eventmask = 0;
1984
1985                         /*
1986                          * since events are disarmed until after the waitevent
1987                          * the ee_req.er_xxxx fields can't change once we've
1988                          * inserted this event into the proc queue...
1989                          * therefore, the waitevent will see a 'consistent'
1990                          * snapshot of the event, even though it won't hold
1991                          * the pipe lock, and we're updating the event outside
1992                          * of the proc lock, which it will hold
1993                          */
1994                         evq->ee_req.er_eventbits |= mask;
1995
1996                         KERNEL_DEBUG(DBG_MISC_POST, (uint32_t)evq, evq->ee_req.er_eventbits, mask, 1,0);
1997
1998                         evprocenque(evq);
1999                 }
2000         }
2001         KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, 0,0,0,1,0);
2002 }
2003
2004 #if SOCKETS
2005 /*
2006  * given either a sockbuf or a socket run down the
2007  * event list and queue ready events found...
2008  * the socket must be locked by the caller
2009  */
2010 void
2011 postevent(struct socket *sp, struct sockbuf *sb, int event)
2012 {
2013         int     mask;
2014         struct  eventqelt *evq;
2015         struct  tcpcb *tp;
2016
2017         if (sb)
2018                 sp = sb->sb_so;
2019         if (sp == NULL)
2020                 return;
2021
2022         KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, (int)sp, event, 0, 0, 0);
2023
2024         for (evq = sp->so_evlist.tqh_first;
2025              evq != NULL; evq = evq->ee_slist.tqe_next) {
2026
2027                 if (evq->ee_eventmask == 0)
2028                         continue;
2029                 mask = 0;
2030
2031                 /* ready for reading:
2032                    - byte cnt >= receive low water mark
2033                    - read-half of conn closed
2034                    - conn pending for listening sock
2035                    - socket error pending
2036
2037                    ready for writing
2038                    - byte cnt avail >= send low water mark
2039                    - write half of conn closed
2040                    - socket error pending
2041                    - non-blocking conn completed successfully
2042
2043                    exception pending
2044                    - out of band data
2045                    - sock at out of band mark
2046                 */
2047
2048                 switch (event & EV_DMASK) {
2049
2050                 case EV_OOB:
2051                   if ((evq->ee_eventmask & EV_EX)) {
2052                           if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK)))
2053                                   mask |= EV_EX|EV_OOB;
2054                   }
2055                   break;
2056
2057                 case EV_RWBYTES|EV_OOB:
2058                   if ((evq->ee_eventmask & EV_EX)) {
2059                           if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK)))
2060                                   mask |= EV_EX|EV_OOB;
2061                   }
2062                   /*
2063                    * fall into the next case
2064                    */
2065                 case EV_RWBYTES:
2066                   if ((evq->ee_eventmask & EV_RE) && soreadable(sp)) {
2067                           if (sp->so_error) {
2068                                   if ((sp->so_type == SOCK_STREAM) && ((sp->so_error == ECONNREFUSED) || (sp->so_error == ECONNRESET))) {
2069                                           if ((sp->so_pcb == 0) || (((struct inpcb *)sp->so_pcb)->inp_state == INPCB_STATE_DEAD) || !(tp = sototcpcb(sp)) ||
2070                                               (tp->t_state == TCPS_CLOSED)) {
2071                                                   mask |= EV_RE|EV_RESET;
2072                                                   break;
2073                                           }
2074                                   }
2075                           }
2076                           mask |= EV_RE;
2077                           evq->ee_req.er_rcnt = sp->so_rcv.sb_cc;
2078
2079                           if (sp->so_state & SS_CANTRCVMORE) {
2080                                   mask |= EV_FIN;
2081                                   break;
2082                           }
2083                   }
2084                   if ((evq->ee_eventmask & EV_WR) && sowriteable(sp)) {
2085                           if (sp->so_error) {
2086                                   if ((sp->so_type == SOCK_STREAM) && ((sp->so_error == ECONNREFUSED) || (sp->so_error == ECONNRESET))) {
2087                                           if ((sp->so_pcb == 0) || (((struct inpcb *)sp->so_pcb)->inp_state == INPCB_STATE_DEAD) || !(tp = sototcpcb(sp)) ||
2088                                               (tp->t_state == TCPS_CLOSED)) {
2089                                                   mask |= EV_WR|EV_RESET;
2090                                                   break;
2091                                           }
2092                                   }
2093                           }
2094                           mask |= EV_WR;
2095                           evq->ee_req.er_wcnt = sbspace(&sp->so_snd);
2096                   }
2097                   break;
2098
2099                 case EV_RCONN:
2100                   if ((evq->ee_eventmask & EV_RE)) {
2101                           mask |= EV_RE|EV_RCONN;
2102                           evq->ee_req.er_rcnt = sp->so_qlen + 1;  // incl this one
2103                   }
2104                   break;
2105
2106                 case EV_WCONN:
2107                   if ((evq->ee_eventmask & EV_WR)) {
2108                           mask |= EV_WR|EV_WCONN;
2109                   }
2110                   break;
2111
2112                 case EV_RCLOSED:
2113                   if ((evq->ee_eventmask & EV_RE)) {
2114                           mask |= EV_RE|EV_RCLOSED;
2115                   }
2116                   break;
2117
2118                 case EV_WCLOSED:
2119                   if ((evq->ee_eventmask & EV_WR)) {
2120                           mask |= EV_WR|EV_WCLOSED;
2121                   }
2122                   break;
2123
2124                 case EV_FIN:
2125                   if (evq->ee_eventmask & EV_RE) {
2126                           mask |= EV_RE|EV_FIN;
2127                   }
2128                   break;
2129
2130                 case EV_RESET:
2131                 case EV_TIMEOUT:
2132                   if (evq->ee_eventmask & EV_RE) {
2133                           mask |= EV_RE | event;
2134                   }
2135                   if (evq->ee_eventmask & EV_WR) {
2136                           mask |= EV_WR | event;
2137                   }
2138                   break;
2139
2140                 default:
2141                   KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, (int)sp, -1, 0, 0, 0);
2142                   return;
2143                 } /* switch */
2144
2145                 KERNEL_DEBUG(DBG_MISC_POST, (int)evq, evq->ee_eventmask, evq->ee_req.er_eventbits, mask, 0);
2146
2147                 if (mask) {
2148                         /*
2149                          * disarm... postevents are nops until this event is 'read' via
2150                          * waitevent and then re-armed via modwatch
2151                          */
2152                         evq->ee_eventmask = 0;
2153
2154                         /*
2155                          * since events are disarmed until after the waitevent
2156                          * the ee_req.er_xxxx fields can't change once we've
2157                          * inserted this event into the proc queue...
2158                          * since waitevent can't see this event until we
2159                          * enqueue it, waitevent will see a 'consistent'
2160                          * snapshot of the event, even though it won't hold
2161                          * the socket lock, and we're updating the event outside
2162                          * of the proc lock, which it will hold
2163                          */
2164                         evq->ee_req.er_eventbits |= mask;
2165
2166                         evprocenque(evq);
2167                 }
2168         }
2169         KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, (int)sp, 0, 0, 0, 0);
2170 }
2171 #endif /* SOCKETS */
2172
2173
2174 /*
2175  * watchevent system call. user passes us an event to watch
2176  * for. we malloc an event object, initialize it, and queue
2177  * it to the open socket. when the event occurs, postevent()
2178  * will enque it back to our proc where we can retrieve it
2179  * via waitevent().
2180  *
2181  * should this prevent duplicate events on same socket?
2182  *
2183  * Returns:
2184  *              ENOMEM                  No memory for operation
2185  *      copyin:EFAULT
2186  */
2187 int
2188 watchevent(proc_t p, struct watchevent_args *uap, __unused int *retval)
2189 {
2190         struct eventqelt *evq = (struct eventqelt *)0;
2191         struct eventqelt *np = NULL;
2192         struct eventreq64 *erp;
2193         struct fileproc *fp = NULL;
2194         int error;
2195
2196         KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_START, 0,0,0,0,0);
2197
2198         // get a qelt and fill with users req
2199         MALLOC(evq, struct eventqelt *, sizeof(struct eventqelt), M_TEMP, M_WAITOK);
2200
2201         if (evq == NULL)
2202                 return (ENOMEM);
2203         erp = &evq->ee_req;
2204
2205         // get users request pkt
2206
2207         if (IS_64BIT_PROCESS(p)) {
2208                 error = copyin(uap->u_req, (caddr_t)erp, sizeof(struct eventreq64));
2209         } else {
2210                 struct eventreq32 er32;
2211
2212                 error = copyin(uap->u_req, (caddr_t)&er32, sizeof(struct eventreq32));
2213                 if (error == 0) {
2214                        /*
2215                         * the user only passes in the
2216                         * er_type, er_handle and er_data...
2217                         * the other fields are initialized
2218                         * below, so don't bother to copy
2219                         */
2220                         erp->er_type = er32.er_type;
2221                         erp->er_handle = er32.er_handle;
2222                         erp->er_data = (user_addr_t)er32.er_data;
2223                 }
2224         }
2225         if (error) {
2226                 FREE(evq, M_TEMP);
2227                 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0);
2228
2229                 return(error);
2230         }
2231         KERNEL_DEBUG(DBG_MISC_WATCH, erp->er_handle,uap->u_eventmask,(uint32_t)evq,0,0);
2232
2233         // validate, freeing qelt if errors
2234         error = 0;
2235         proc_fdlock(p);
2236
2237         if (erp->er_type != EV_FD) {
2238                 error = EINVAL;
2239         } else if ((error = fp_lookup(p, erp->er_handle, &fp, 1)) != 0) {
2240                 error = EBADF;
2241 #if SOCKETS
2242         } else if (fp->f_type == DTYPE_SOCKET) {
2243                 socket_lock((struct socket *)fp->f_data, 1);
2244                 np = ((struct socket *)fp->f_data)->so_evlist.tqh_first;
2245 #endif /* SOCKETS */
2246         } else if (fp->f_type == DTYPE_PIPE) {
2247                 PIPE_LOCK((struct pipe *)fp->f_data);
2248                 np = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first;
2249         } else {
2250                 fp_drop(p, erp->er_handle, fp, 1);
2251                 error = EINVAL;
2252         }
2253         proc_fdunlock(p);
2254
2255         if (error) {
2256                 FREE(evq, M_TEMP);
2257
2258                 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0);
2259                 return(error);
2260         }
2261
2262         /*
2263          * only allow one watch per file per proc
2264          */
2265         for ( ; np != NULL; np = np->ee_slist.tqe_next) {
2266                 if (np->ee_proc == p) {
2267 #if SOCKETS
2268                         if (fp->f_type == DTYPE_SOCKET)
2269                                 socket_unlock((struct socket *)fp->f_data, 1);
2270                         else
2271 #endif /* SOCKETS */
2272                                 PIPE_UNLOCK((struct pipe *)fp->f_data);
2273                         fp_drop(p, erp->er_handle, fp, 0);
2274                         FREE(evq, M_TEMP);
2275
2276                         KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0);
2277                         return(EINVAL);
2278                 }
2279         }
2280         erp->er_ecnt = erp->er_rcnt = erp->er_wcnt = erp->er_eventbits = 0;
2281         evq->ee_proc = p;
2282         evq->ee_eventmask = uap->u_eventmask & EV_MASK;
2283         evq->ee_flags = 0;
2284
2285 #if SOCKETS
2286         if (fp->f_type == DTYPE_SOCKET) {
2287                 TAILQ_INSERT_TAIL(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist);
2288                 postevent((struct socket *)fp->f_data, 0, EV_RWBYTES); // catch existing events
2289
2290                 socket_unlock((struct socket *)fp->f_data, 1);
2291         } else
2292 #endif /* SOCKETS */
2293         {
2294                 TAILQ_INSERT_TAIL(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist);
2295                 postpipeevent((struct pipe *)fp->f_data, EV_RWBYTES);
2296
2297                 PIPE_UNLOCK((struct pipe *)fp->f_data);
2298         }
2299         fp_drop_event(p, erp->er_handle, fp);
2300
2301         KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, 0,0,0,0,0);
2302         return(0);
2303 }
2304
2305
2306
2307 /*
2308  * waitevent system call.
2309  * grabs the next waiting event for this proc and returns
2310  * it. if no events, user can request to sleep with timeout
2311  * or without or poll mode
2312  *    ((tv != NULL && interval == 0) || tv == -1)
2313  */
2314 int
2315 waitevent(proc_t p, struct waitevent_args *uap, int *retval)
2316 {
2317         int error = 0;
2318         struct eventqelt *evq;
2319         struct eventreq64 *erp;
2320         uint64_t abstime, interval;
2321         boolean_t fast_poll = FALSE;
2322         union {
2323                 struct eventreq64 er64;
2324                 struct eventreq32 er32;
2325         } uer;
2326
2327         interval = 0;
2328
2329         if (uap->tv) {
2330                 struct timeval atv;
2331                 /*
2332                  * check for fast poll method
2333                  */
2334                 if (IS_64BIT_PROCESS(p)) {
2335                         if (uap->tv == (user_addr_t)-1)
2336                                 fast_poll = TRUE;
2337                 } else if (uap->tv == (user_addr_t)((uint32_t)-1))
2338                         fast_poll = TRUE;
2339
2340                 if (fast_poll == TRUE) {
2341                         if (p->p_evlist.tqh_first == NULL) {
2342                                 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_NONE, -1,0,0,0,0);
2343                                 /*
2344                                  * poll failed
2345                                  */
2346                                 *retval = 1;
2347                                 return (0);
2348                         }
2349                         proc_lock(p);
2350                         goto retry;
2351                 }
2352                 error = copyin(uap->tv, (caddr_t)&atv, sizeof (atv));
2353
2354                 if (error)
2355                         return(error);
2356                 if (itimerfix(&atv)) {
2357                         error = EINVAL;
2358                         return(error);
2359                 }
2360                 interval = tvtoabstime(&atv);
2361         }
2362         KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_START, 0,0,0,0,0);
2363
2364         proc_lock(p);
2365 retry:
2366         if ((evq = p->p_evlist.tqh_first) != NULL) {
2367                 /*
2368                  * found one... make a local copy while it's still on the queue
2369                  * to prevent it from changing while in the midst of copying
2370                  * don't want to hold the proc lock across a copyout because
2371                  * it might block on a page fault at the target in user space
2372                  */
2373                 erp = &evq->ee_req;
2374
2375                 if (IS_64BIT_PROCESS(p))
2376                         bcopy((caddr_t)erp, (caddr_t)&uer.er64, sizeof (struct eventreq64));
2377                 else {
2378                         uer.er32.er_type  = erp->er_type;
2379                         uer.er32.er_handle  = erp->er_handle;
2380                         uer.er32.er_data  = (uint32_t)erp->er_data;
2381                         uer.er32.er_ecnt  = erp->er_ecnt;
2382                         uer.er32.er_rcnt  = erp->er_rcnt;
2383                         uer.er32.er_wcnt  = erp->er_wcnt;
2384                         uer.er32.er_eventbits = erp->er_eventbits;
2385                 }
2386                 TAILQ_REMOVE(&p->p_evlist, evq, ee_plist);
2387
2388                 evq->ee_flags &= ~EV_QUEUED;
2389
2390                 proc_unlock(p);
2391
2392                 if (IS_64BIT_PROCESS(p))
2393                         error = copyout((caddr_t)&uer.er64, uap->u_req, sizeof(struct eventreq64));
2394                 else
2395                         error = copyout((caddr_t)&uer.er32, uap->u_req, sizeof(struct eventreq32));
2396
2397                 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,
2398                              evq->ee_req.er_handle,evq->ee_req.er_eventbits,(uint32_t)evq,0);
2399                 return (error);
2400         }
2401         else {
2402                 if (uap->tv && interval == 0) {
2403                         proc_unlock(p);
2404                         *retval = 1;  // poll failed
2405
2406                         KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,0,0,0,0);
2407                         return (error);
2408                 }
2409                 if (interval != 0)
2410                         clock_absolutetime_interval_to_deadline(interval, &abstime);
2411                 else
2412                         abstime = 0;
2413
2414                 KERNEL_DEBUG(DBG_MISC_WAIT, 1,(uint32_t)&p->p_evlist,0,0,0);
2415
2416                 error = msleep1(&p->p_evlist, &p->p_mlock, (PSOCK | PCATCH), "waitevent", abstime);
2417
2418                 KERNEL_DEBUG(DBG_MISC_WAIT, 2,(uint32_t)&p->p_evlist,0,0,0);
2419
2420                 if (error == 0)
2421                         goto retry;
2422                 if (error == ERESTART)
2423                         error = EINTR;
2424                 if (error == EWOULDBLOCK) {
2425                         *retval = 1;
2426                         error = 0;
2427                 }
2428         }
2429         proc_unlock(p);
2430
2431         KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, 0,0,0,0,0);
2432         return (error);
2433 }
2434
2435
2436 /*
2437  * modwatch system call. user passes in event to modify.
2438  * if we find it we reset the event bits and que/deque event
2439  * it needed.
2440  */
2441 int
2442 modwatch(proc_t p, struct modwatch_args *uap, __unused int *retval)
2443 {
2444         struct eventreq64 er;
2445         struct eventreq64 *erp = &er;
2446         struct eventqelt *evq = NULL;   /* protected by error return */
2447         int error;
2448         struct fileproc *fp;
2449         int flag;
2450
2451         KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_START, 0,0,0,0,0);
2452
2453         /*
2454          * get user's request pkt
2455          * just need the er_type and er_handle which sit above the
2456          * problematic er_data (32/64 issue)... so only copy in
2457          * those 2 fields
2458          */
2459         if ((error = copyin(uap->u_req, (caddr_t)erp, sizeof(er.er_type) + sizeof(er.er_handle)))) {
2460                 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, error,0,0,0,0);
2461                 return(error);
2462         }
2463         proc_fdlock(p);
2464
2465         if (erp->er_type != EV_FD) {
2466                 error = EINVAL;
2467         } else if ((error = fp_lookup(p, erp->er_handle, &fp, 1)) != 0) {
2468                 error = EBADF;
2469 #if SOCKETS
2470         } else if (fp->f_type == DTYPE_SOCKET) {
2471                 socket_lock((struct socket *)fp->f_data, 1);
2472                 evq = ((struct socket *)fp->f_data)->so_evlist.tqh_first;
2473 #endif /* SOCKETS */
2474         } else if (fp->f_type == DTYPE_PIPE) {
2475                 PIPE_LOCK((struct pipe *)fp->f_data);
2476                 evq = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first;
2477         } else {
2478                 fp_drop(p, erp->er_handle, fp, 1);
2479                 error = EINVAL;
2480         }
2481
2482         if (error) {
2483                 proc_fdunlock(p);
2484                 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, error,0,0,0,0);
2485                 return(error);
2486         }
2487
2488         if ((uap->u_eventmask == EV_RM) && (fp->f_flags & FP_WAITEVENT)) {
2489                 fp->f_flags &= ~FP_WAITEVENT;
2490         }
2491         proc_fdunlock(p);
2492
2493         // locate event if possible
2494         for ( ; evq != NULL; evq = evq->ee_slist.tqe_next) {
2495                 if (evq->ee_proc == p)
2496                         break;
2497         }
2498         if (evq == NULL) {
2499 #if SOCKETS
2500                 if (fp->f_type == DTYPE_SOCKET)
2501                         socket_unlock((struct socket *)fp->f_data, 1);
2502                 else
2503 #endif /* SOCKETS */
2504                         PIPE_UNLOCK((struct pipe *)fp->f_data);
2505                 fp_drop(p, erp->er_handle, fp, 0);
2506                 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, EINVAL,0,0,0,0);
2507                 return(EINVAL);
2508         }
2509         KERNEL_DEBUG(DBG_MISC_MOD, erp->er_handle,uap->u_eventmask,(uint32_t)evq,0,0);
2510
2511         if (uap->u_eventmask == EV_RM) {
2512                 EVPROCDEQUE(p, evq);
2513
2514 #if SOCKETS
2515                 if (fp->f_type == DTYPE_SOCKET) {
2516                         TAILQ_REMOVE(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist);
2517                         socket_unlock((struct socket *)fp->f_data, 1);
2518                 } else
2519 #endif /* SOCKETS */
2520                 {
2521                         TAILQ_REMOVE(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist);
2522                         PIPE_UNLOCK((struct pipe *)fp->f_data);
2523                 }
2524                 fp_drop(p, erp->er_handle, fp, 0);
2525                 FREE(evq, M_TEMP);
2526                 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, 0,0,0,0,0);
2527                 return(0);
2528         }
2529         switch (uap->u_eventmask & EV_MASK) {
2530
2531         case 0:
2532                 flag = 0;
2533                 break;
2534
2535         case EV_RE:
2536         case EV_WR:
2537         case EV_RE|EV_WR:
2538                 flag = EV_RWBYTES;
2539                 break;
2540
2541         case EV_EX:
2542                 flag = EV_OOB;
2543                 break;
2544
2545         case EV_EX|EV_RE:
2546         case EV_EX|EV_WR:
2547         case EV_EX|EV_RE|EV_WR:
2548                 flag = EV_OOB|EV_RWBYTES;
2549                 break;
2550
2551         default:
2552 #if SOCKETS
2553                 if (fp->f_type == DTYPE_SOCKET)
2554                         socket_unlock((struct socket *)fp->f_data, 1);
2555                 else
2556 #endif /* SOCKETS */
2557                         PIPE_UNLOCK((struct pipe *)fp->f_data);
2558                 fp_drop(p, erp->er_handle, fp, 0);
2559                 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0);
2560                 return(EINVAL);
2561         }
2562         /*
2563          * since we're holding the socket/pipe lock, the event
2564          * cannot go from the unqueued state to the queued state
2565          * however, it can go from the queued state to the unqueued state
2566          * since that direction is protected by the proc_lock...
2567          * so do a quick check for EV_QUEUED w/o holding the proc lock
2568          * since by far the common case will be NOT EV_QUEUED, this saves
2569          * us taking the proc_lock the majority of the time
2570          */
2571         if (evq->ee_flags & EV_QUEUED) {
2572                 /*
2573                  * EVPROCDEQUE will recheck the state after it grabs the proc_lock
2574                  */
2575                 EVPROCDEQUE(p, evq);
2576         }
2577         /*
2578          * while the event is off the proc queue and
2579          * we're holding the socket/pipe lock
2580          * it's safe to update these fields...
2581          */
2582         evq->ee_req.er_eventbits = 0;
2583         evq->ee_eventmask = uap->u_eventmask & EV_MASK;
2584
2585 #if SOCKETS
2586         if (fp->f_type == DTYPE_SOCKET) {
2587                 postevent((struct socket *)fp->f_data, 0, flag);
2588                 socket_unlock((struct socket *)fp->f_data, 1);
2589         } else
2590 #endif /* SOCKETS */
2591         {
2592                 postpipeevent((struct pipe *)fp->f_data, flag);
2593                 PIPE_UNLOCK((struct pipe *)fp->f_data);
2594         }
2595         fp_drop(p, erp->er_handle, fp, 0);
2596         KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, evq->ee_req.er_handle,evq->ee_eventmask,(uint32_t)fp->f_data,flag,0);
2597         return(0);
2598 }
2599
2600 /* this routine is called from the close of fd with proc_fdlock held */
2601 int
2602 waitevent_close(struct proc *p, struct fileproc *fp)
2603 {
2604         struct eventqelt *evq;
2605
2606
2607         fp->f_flags &= ~FP_WAITEVENT;
2608
2609 #if SOCKETS
2610         if (fp->f_type == DTYPE_SOCKET) {
2611                 socket_lock((struct socket *)fp->f_data, 1);
2612                 evq = ((struct socket *)fp->f_data)->so_evlist.tqh_first;
2613         } else
2614 #endif /* SOCKETS */
2615         if (fp->f_type == DTYPE_PIPE) {
2616                 PIPE_LOCK((struct pipe *)fp->f_data);
2617                 evq = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first;
2618         }
2619         else {
2620                 return(EINVAL);
2621         }
2622         proc_fdunlock(p);
2623
2624
2625         // locate event if possible
2626         for ( ; evq != NULL; evq = evq->ee_slist.tqe_next) {
2627                 if (evq->ee_proc == p)
2628                         break;
2629         }
2630         if (evq == NULL) {
2631 #if SOCKETS
2632                 if (fp->f_type == DTYPE_SOCKET)
2633                         socket_unlock((struct socket *)fp->f_data, 1);
2634                 else
2635 #endif /* SOCKETS */
2636                         PIPE_UNLOCK((struct pipe *)fp->f_data);
2637
2638                 proc_fdlock(p);
2639
2640                 return(EINVAL);
2641         }
2642         EVPROCDEQUE(p, evq);
2643
2644 #if SOCKETS
2645         if (fp->f_type == DTYPE_SOCKET) {
2646                 TAILQ_REMOVE(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist);
2647                 socket_unlock((struct socket *)fp->f_data, 1);
2648         } else
2649 #endif /* SOCKETS */
2650         {
2651                 TAILQ_REMOVE(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist);
2652                 PIPE_UNLOCK((struct pipe *)fp->f_data);
2653         }
2654         FREE(evq, M_TEMP);
2655
2656         proc_fdlock(p);
2657
2658         return(0);
2659 }
2660
2661
2662 /*
2663  * gethostuuid
2664  *
2665  * Description: Get the host UUID from IOKit and return it to user space.
2666  *
2667  * Parameters:  uuid_buf                Pointer to buffer to receive UUID
2668  *              timeout                 Timespec for timout
2669  *
2670  * Returns:     0                       Success
2671  *              EWOULDBLOCK             Timeout is too short
2672  *              copyout:EFAULT          Bad user buffer
2673  *
2674  * Notes:       A timeout seems redundant, since if it's tolerable to not
2675  *              have a system UUID in hand, then why ask for one?
2676  */
2677 int
2678 gethostuuid(struct proc *p, struct gethostuuid_args *uap, __unused register_t *retval)
2679 {
2680         kern_return_t kret;
2681         int error;
2682         mach_timespec_t mach_ts;        /* for IOKit call */
2683         __darwin_uuid_t uuid_kern;      /* for IOKit call */
2684
2685         /* Convert the 32/64 bit timespec into a mach_timespec_t */
2686         if ( proc_is64bit(p) ) {
2687                 struct user_timespec ts;
2688                 error = copyin(uap->timeoutp, &ts, sizeof(ts));
2689                 if (error)
2690                         return (error);
2691                 mach_ts.tv_sec = ts.tv_sec;
2692                 mach_ts.tv_nsec = ts.tv_nsec;
2693         } else {
2694                 struct timespec ts;
2695                 error = copyin(uap->timeoutp, &ts, sizeof(ts) );
2696                 if (error)
2697                         return (error);
2698                 mach_ts.tv_sec = ts.tv_sec;
2699                 mach_ts.tv_nsec = ts.tv_nsec;
2700         }
2701
2702         /* Call IOKit with the stack buffer to get the UUID */
2703         kret = IOBSDGetPlatformUUID(uuid_kern, mach_ts);
2704
2705         /*
2706          * If we get it, copy out the data to the user buffer; note that a
2707          * uuid_t is an array of characters, so this is size invariant for
2708          * 32 vs. 64 bit.
2709          */
2710         if (kret == KERN_SUCCESS) {
2711                 error = copyout(uuid_kern, uap->uuid_buf, sizeof(uuid_kern));
2712         } else {
2713                 error = EWOULDBLOCK;
2714         }
2715
2716         return (error);
2717 }