bsd/kern/sys_generic.c

   1 /*
   2  * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  29 /*
  30  * Copyright (c) 1982, 1986, 1989, 1993
  31  *      The Regents of the University of California.  All rights reserved.
  32  * (c) UNIX System Laboratories, Inc.
  33  * All or some portions of this file are derived from material licensed
  34  * to the University of California by American Telephone and Telegraph
  35  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  36  * the permission of UNIX System Laboratories, Inc.
  37  *
  38  * Redistribution and use in source and binary forms, with or without
  39  * modification, are permitted provided that the following conditions
  40  * are met:
  41  * 1. Redistributions of source code must retain the above copyright
  42  *    notice, this list of conditions and the following disclaimer.
  43  * 2. Redistributions in binary form must reproduce the above copyright
  44  *    notice, this list of conditions and the following disclaimer in the
  45  *    documentation and/or other materials provided with the distribution.
  46  * 3. All advertising materials mentioning features or use of this software
  47  *    must display the following acknowledgement:
  48  *      This product includes software developed by the University of
  49  *      California, Berkeley and its contributors.
  50  * 4. Neither the name of the University nor the names of its contributors
  51  *    may be used to endorse or promote products derived from this software
  52  *    without specific prior written permission.
  53  *
  54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  64  * SUCH DAMAGE.
  65  *
  66  *      @(#)sys_generic.c       8.9 (Berkeley) 2/14/95
  67  */
  68 /*
  69  * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
  70  * support for mandatory and extensible security protections.  This notice
  71  * is included in support of clause 2.2 (b) of the Apple Public License,
  72  * Version 2.0.
  73  */
  74
  75 #include <sys/param.h>
  76 #include <sys/systm.h>
  77 #include <sys/filedesc.h>
  78 #include <sys/ioctl.h>
  79 #include <sys/file_internal.h>
  80 #include <sys/proc_internal.h>
  81 #include <sys/socketvar.h>
  82 #include <sys/uio_internal.h>
  83 #include <sys/kernel.h>
  84 #include <sys/stat.h>
  85 #include <sys/malloc.h>
  86 #include <sys/sysproto.h>
  87
  88 #include <sys/mount_internal.h>
  89 #include <sys/protosw.h>
  90 #include <sys/ev.h>
  91 #include <sys/user.h>
  92 #include <sys/kdebug.h>
  93 #include <sys/poll.h>
  94 #include <sys/event.h>
  95 #include <sys/eventvar.h>
  96
  97 #include <mach/mach_types.h>
  98 #include <kern/kern_types.h>
  99 #include <kern/assert.h>
 100 #include <kern/kalloc.h>
 101 #include <kern/thread.h>
 102 #include <kern/clock.h>
 103
 104 #include <sys/mbuf.h>
 105 #include <sys/socket.h>
 106 #include <sys/socketvar.h>
 107 #include <sys/errno.h>
 108 #include <sys/syscall.h>
 109 #include <sys/pipe.h>
 110
 111 #include <security/audit/audit.h>
 112
 113 #include <net/if.h>
 114 #include <net/route.h>
 115
 116 #include <netinet/in.h>
 117 #include <netinet/in_systm.h>
 118 #include <netinet/ip.h>
 119 #include <netinet/in_pcb.h>
 120 #include <netinet/ip_var.h>
 121 #include <netinet/ip6.h>
 122 #include <netinet/tcp.h>
 123 #include <netinet/tcp_fsm.h>
 124 #include <netinet/tcp_seq.h>
 125 #include <netinet/tcp_timer.h>
 126 #include <netinet/tcp_var.h>
 127 #include <netinet/tcpip.h>
 128 #include <netinet/tcp_debug.h>
 129 /* for wait queue based select */
 130 #include <kern/wait_queue.h>
 131 #include <kern/kalloc.h>
 132 #include <sys/vnode_internal.h>
 133
 134 /* XXX should be in a header file somewhere */
 135 void evsofree(struct socket *);
 136 void evpipefree(struct pipe *);
 137 void postpipeevent(struct pipe *, int);
 138 void postevent(struct socket *, struct sockbuf *, int);
 139 extern kern_return_t IOBSDGetPlatformUUID(__darwin_uuid_t uuid, mach_timespec_t timeoutp);
 140
 141 int rd_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval);
 142 int wr_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval);
 143 extern void     *get_bsduthreadarg(thread_t);
 144 extern int      *get_bsduthreadrval(thread_t);
 145
 146 __private_extern__ int  dofileread(vfs_context_t ctx, struct fileproc *fp,
 147                                                                    user_addr_t bufp, user_size_t nbyte,
 148                                                                    off_t offset, int flags, user_ssize_t *retval);
 149 __private_extern__ int  dofilewrite(vfs_context_t ctx, struct fileproc *fp,
 150                                                                         user_addr_t bufp, user_size_t nbyte,
 151                                                                         off_t offset, int flags, user_ssize_t *retval);
 152 __private_extern__ int  preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_vnode);
 153 __private_extern__ void donefileread(struct proc *p, struct fileproc *fp_ret, int fd);
 154
 155 #if NETAT
 156 extern int appletalk_inited;
 157 #endif /* NETAT */
 158
 159 #define f_flag f_fglob->fg_flag
 160 #define f_type f_fglob->fg_type
 161 #define f_msgcount f_fglob->fg_msgcount
 162 #define f_cred f_fglob->fg_cred
 163 #define f_ops f_fglob->fg_ops
 164 #define f_offset f_fglob->fg_offset
 165 #define f_data f_fglob->fg_data
 166
 167 /*
 168  * Read system call.
 169  *
 170  * Returns:     0                       Success
 171  *      preparefileread:EBADF
 172  *      preparefileread:ESPIPE
 173  *      preparefileread:ENXIO
 174  *      preparefileread:EBADF
 175  *      dofileread:???
 176  */
 177 int
 178 read(struct proc *p, struct read_args *uap, user_ssize_t *retval)
 179 {
 180         __pthread_testcancel(1);
 181         return(read_nocancel(p, (struct read_nocancel_args *)uap, retval));
 182 }
 183
 184 int
 185 read_nocancel(struct proc *p, struct read_nocancel_args *uap, user_ssize_t *retval)
 186 {
 187         struct fileproc *fp;
 188         int error;
 189         int fd = uap->fd;
 190         struct vfs_context context;
 191
 192         if ( (error = preparefileread(p, &fp, fd, 0)) )
 193                 return (error);
 194
 195         context = *(vfs_context_current());
 196         context.vc_ucred = fp->f_fglob->fg_cred;
 197
 198         error = dofileread(&context, fp, uap->cbuf, uap->nbyte,
 199                            (off_t)-1, 0, retval);
 200
 201         donefileread(p, fp, fd);
 202
 203         return (error);
 204 }
 205
 206 /*
 207  * Pread system call
 208  *
 209  * Returns:     0                       Success
 210  *      preparefileread:EBADF
 211  *      preparefileread:ESPIPE
 212  *      preparefileread:ENXIO
 213  *      preparefileread:EBADF
 214  *      dofileread:???
 215  */
 216 int
 217 pread(struct proc *p, struct pread_args *uap, user_ssize_t *retval)
 218 {
 219         __pthread_testcancel(1);
 220         return(pread_nocancel(p, (struct pread_nocancel_args *)uap, retval));
 221 }
 222
 223 int
 224 pread_nocancel(struct proc *p, struct pread_nocancel_args *uap, user_ssize_t *retval)
 225 {
 226         struct fileproc *fp = NULL;     /* fp set by preparefileread() */
 227         int fd = uap->fd;
 228         int error;
 229         struct vfs_context context;
 230
 231         if ( (error = preparefileread(p, &fp, fd, 1)) )
 232                 goto out;
 233
 234         context = *(vfs_context_current());
 235         context.vc_ucred = fp->f_fglob->fg_cred;
 236
 237         error = dofileread(&context, fp, uap->buf, uap->nbyte,
 238                         uap->offset, FOF_OFFSET, retval);
 239
 240         donefileread(p, fp, fd);
 241
 242         if (!error)
 243             KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pread) | DBG_FUNC_NONE),
 244               uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
 245
 246 out:
 247         return (error);
 248 }
 249
 250 /*
 251  * Code common for read and pread
 252  */
 253
 254 void
 255 donefileread(struct proc *p, struct fileproc *fp, int fd)
 256 {
 257         proc_fdlock_spin(p);
 258
 259         fp->f_flags &= ~FP_INCHRREAD;
 260
 261         fp_drop(p, fd, fp, 1);
 262         proc_fdunlock(p);
 263 }
 264
 265 /*
 266  * Returns:     0                       Success
 267  *              EBADF
 268  *              ESPIPE
 269  *              ENXIO
 270  *      fp_lookup:EBADF
 271  *      fo_read:???
 272  */
 273 int
 274 preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_pread)
 275 {
 276         vnode_t vp;
 277         int     error;
 278         struct fileproc *fp;
 279
 280         AUDIT_ARG(fd, fd);
 281
 282         proc_fdlock_spin(p);
 283
 284         error = fp_lookup(p, fd, &fp, 1);
 285
 286         if (error) {
 287                 proc_fdunlock(p);
 288                 return (error);
 289         }
 290         if ((fp->f_flag & FREAD) == 0) {
 291                 error = EBADF;
 292                 goto out;
 293         }
 294         if (check_for_pread && (fp->f_type != DTYPE_VNODE)) {
 295                 error = ESPIPE;
 296                 goto out;
 297         }
 298         if (fp->f_type == DTYPE_VNODE) {
 299                 vp = (struct vnode *)fp->f_fglob->fg_data;
 300
 301                 if (check_for_pread && (vnode_isfifo(vp))) {
 302                         error = ESPIPE;
 303                         goto out;
 304                 }
 305                 if (check_for_pread && (vp->v_flag & VISTTY)) {
 306                         error = ENXIO;
 307                         goto out;
 308                 }
 309                 if (vp->v_type == VCHR)
 310                         fp->f_flags |= FP_INCHRREAD;
 311         }
 312
 313         *fp_ret = fp;
 314
 315         proc_fdunlock(p);
 316         return (0);
 317
 318 out:
 319         fp_drop(p, fd, fp, 1);
 320         proc_fdunlock(p);
 321         return (error);
 322 }
 323
 324
 325 /*
 326  * Returns:     0                       Success
 327  *              EINVAL
 328  *      fo_read:???
 329  */
 330 __private_extern__ int
 331 dofileread(vfs_context_t ctx, struct fileproc *fp,
 332            user_addr_t bufp, user_size_t nbyte, off_t offset, int flags,
 333            user_ssize_t *retval)
 334 {
 335         uio_t auio;
 336         user_ssize_t bytecnt;
 337         long error = 0;
 338         char uio_buf[ UIO_SIZEOF(1) ];
 339
 340         if (nbyte > INT_MAX)
 341                 return (EINVAL);
 342
 343         if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
 344                 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_READ,
 345                                                                           &uio_buf[0], sizeof(uio_buf));
 346         } else {
 347                 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_READ,
 348                                                                           &uio_buf[0], sizeof(uio_buf));
 349         }
 350         uio_addiov(auio, bufp, nbyte);
 351
 352         bytecnt = nbyte;
 353
 354         if ((error = fo_read(fp, auio, flags, ctx))) {
 355                 if (uio_resid(auio) != bytecnt && (error == ERESTART ||
 356                         error == EINTR || error == EWOULDBLOCK))
 357                         error = 0;
 358         }
 359         bytecnt -= uio_resid(auio);
 360
 361         *retval = bytecnt;
 362
 363         return (error);
 364 }
 365
 366 /*
 367  * Scatter read system call.
 368  *
 369  * Returns:     0                       Success
 370  *              EINVAL
 371  *              ENOMEM
 372  *      copyin:EFAULT
 373  *      rd_uio:???
 374  */
 375 int
 376 readv(struct proc *p, struct readv_args *uap, user_ssize_t *retval)
 377 {
 378         __pthread_testcancel(1);
 379         return(readv_nocancel(p, (struct readv_nocancel_args *)uap, retval));
 380 }
 381
 382 int
 383 readv_nocancel(struct proc *p, struct readv_nocancel_args *uap, user_ssize_t *retval)
 384 {
 385         uio_t auio = NULL;
 386         int error;
 387         struct user_iovec *iovp;
 388
 389         /* Verify range bedfore calling uio_create() */
 390         if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV)
 391                 return (EINVAL);
 392
 393         /* allocate a uio large enough to hold the number of iovecs passed */
 394         auio = uio_create(uap->iovcnt, 0,
 395                                   (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
 396                                   UIO_READ);
 397
 398         /* get location of iovecs within the uio.  then copyin the iovecs from
 399          * user space.
 400          */
 401         iovp = uio_iovsaddr(auio);
 402         if (iovp == NULL) {
 403                 error = ENOMEM;
 404                 goto ExitThisRoutine;
 405         }
 406         error = copyin_user_iovec_array(uap->iovp,
 407                 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
 408                 uap->iovcnt, iovp);
 409         if (error) {
 410                 goto ExitThisRoutine;
 411         }
 412
 413         /* finalize uio_t for use and do the IO
 414          */
 415         uio_calculateresid(auio);
 416         error = rd_uio(p, uap->fd, auio, retval);
 417
 418 ExitThisRoutine:
 419         if (auio != NULL) {
 420                 uio_free(auio);
 421         }
 422         return (error);
 423 }
 424
 425 /*
 426  * Write system call
 427  *
 428  * Returns:     0                       Success
 429  *              EBADF
 430  *      fp_lookup:EBADF
 431  *      dofilewrite:???
 432  */
 433 int
 434 write(struct proc *p, struct write_args *uap, user_ssize_t *retval)
 435 {
 436         __pthread_testcancel(1);
 437         return(write_nocancel(p, (struct write_nocancel_args *)uap, retval));
 438
 439 }
 440
 441 int
 442 write_nocancel(struct proc *p, struct write_nocancel_args *uap, user_ssize_t *retval)
 443 {
 444         struct fileproc *fp;
 445         int error;
 446         int fd = uap->fd;
 447
 448         AUDIT_ARG(fd, fd);
 449
 450         error = fp_lookup(p,fd,&fp,0);
 451         if (error)
 452                 return(error);
 453         if ((fp->f_flag & FWRITE) == 0) {
 454                 error = EBADF;
 455         } else {
 456                 struct vfs_context context = *(vfs_context_current());
 457                 context.vc_ucred = fp->f_fglob->fg_cred;
 458
 459                 error = dofilewrite(&context, fp, uap->cbuf, uap->nbyte,
 460                         (off_t)-1, 0, retval);
 461         }
 462         if (error == 0)
 463                 fp_drop_written(p, fd, fp);
 464         else
 465                 fp_drop(p, fd, fp, 0);
 466         return(error);
 467 }
 468
 469 /*
 470  * pwrite system call
 471  *
 472  * Returns:     0                       Success
 473  *              EBADF
 474  *              ESPIPE
 475  *              ENXIO
 476  *              EINVAL
 477  *      fp_lookup:EBADF
 478  *      dofilewrite:???
 479  */
 480 int
 481 pwrite(struct proc *p, struct pwrite_args *uap, user_ssize_t *retval)
 482 {
 483         __pthread_testcancel(1);
 484         return(pwrite_nocancel(p, (struct pwrite_nocancel_args *)uap, retval));
 485 }
 486
 487 int
 488 pwrite_nocancel(struct proc *p, struct pwrite_nocancel_args *uap, user_ssize_t *retval)
 489 {
 490         struct fileproc *fp;
 491         int error;
 492         int fd = uap->fd;
 493         vnode_t vp  = (vnode_t)0;
 494
 495         AUDIT_ARG(fd, fd);
 496
 497         error = fp_lookup(p,fd,&fp,0);
 498         if (error)
 499                 return(error);
 500
 501         if ((fp->f_flag & FWRITE) == 0) {
 502                 error = EBADF;
 503         } else {
 504                 struct vfs_context context = *vfs_context_current();
 505                 context.vc_ucred = fp->f_fglob->fg_cred;
 506
 507                 if (fp->f_type != DTYPE_VNODE) {
 508                         error = ESPIPE;
 509                         goto errout;
 510                 }
 511                 vp = (vnode_t)fp->f_fglob->fg_data;
 512                 if (vnode_isfifo(vp)) {
 513                         error = ESPIPE;
 514                         goto errout;
 515                 }
 516                 if ((vp->v_flag & VISTTY)) {
 517                         error = ENXIO;
 518                         goto errout;
 519                 }
 520                 if (uap->offset == (off_t)-1) {
 521                         error = EINVAL;
 522                         goto errout;
 523                 }
 524
 525                     error = dofilewrite(&context, fp, uap->buf, uap->nbyte,
 526                         uap->offset, FOF_OFFSET, retval);
 527         }
 528 errout:
 529         if (error == 0)
 530                 fp_drop_written(p, fd, fp);
 531         else
 532                 fp_drop(p, fd, fp, 0);
 533
 534         if (!error)
 535             KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pwrite) | DBG_FUNC_NONE),
 536               uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
 537
 538         return(error);
 539 }
 540
 541 /*
 542  * Returns:     0                       Success
 543  *              EINVAL
 544  *      <fo_write>:EPIPE
 545  *      <fo_write>:???                  [indirect through struct fileops]
 546  */
 547 __private_extern__ int
 548 dofilewrite(vfs_context_t ctx, struct fileproc *fp,
 549             user_addr_t bufp, user_size_t nbyte, off_t offset, int flags,
 550             user_ssize_t *retval)
 551 {
 552         uio_t auio;
 553         long error = 0;
 554         user_ssize_t bytecnt;
 555         char uio_buf[ UIO_SIZEOF(1) ];
 556
 557         if (nbyte > INT_MAX)
 558                 return (EINVAL);
 559
 560         if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
 561                 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_WRITE,
 562                                                                           &uio_buf[0], sizeof(uio_buf));
 563         } else {
 564                 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_WRITE,
 565                                                                           &uio_buf[0], sizeof(uio_buf));
 566         }
 567         uio_addiov(auio, bufp, nbyte);
 568
 569         bytecnt = nbyte;
 570         if ((error = fo_write(fp, auio, flags, ctx))) {
 571                 if (uio_resid(auio) != bytecnt && (error == ERESTART ||
 572                         error == EINTR || error == EWOULDBLOCK))
 573                         error = 0;
 574                 /* The socket layer handles SIGPIPE */
 575                 if (error == EPIPE && fp->f_type != DTYPE_SOCKET) {
 576                         /* XXX Raise the signal on the thread? */
 577                         psignal(vfs_context_proc(ctx), SIGPIPE);
 578                 }
 579         }
 580         bytecnt -= uio_resid(auio);
 581         *retval = bytecnt;
 582
 583         return (error);
 584 }
 585
 586 /*
 587  * Gather write system call
 588  */
 589 int
 590 writev(struct proc *p, struct writev_args *uap, user_ssize_t *retval)
 591 {
 592         __pthread_testcancel(1);
 593         return(writev_nocancel(p, (struct writev_nocancel_args *)uap, retval));
 594 }
 595
 596 int
 597 writev_nocancel(struct proc *p, struct writev_nocancel_args *uap, user_ssize_t *retval)
 598 {
 599         uio_t auio = NULL;
 600         int error;
 601         struct user_iovec *iovp;
 602
 603         AUDIT_ARG(fd, uap->fd);
 604
 605         /* Verify range bedfore calling uio_create() */
 606         if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV)
 607                 return (EINVAL);
 608
 609         /* allocate a uio large enough to hold the number of iovecs passed */
 610         auio = uio_create(uap->iovcnt, 0,
 611                                   (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
 612                                   UIO_WRITE);
 613
 614         /* get location of iovecs within the uio.  then copyin the iovecs from
 615          * user space.
 616          */
 617         iovp = uio_iovsaddr(auio);
 618         if (iovp == NULL) {
 619                 error = ENOMEM;
 620                 goto ExitThisRoutine;
 621         }
 622         error = copyin_user_iovec_array(uap->iovp,
 623                 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
 624                 uap->iovcnt, iovp);
 625         if (error) {
 626                 goto ExitThisRoutine;
 627         }
 628
 629         /* finalize uio_t for use and do the IO
 630          */
 631         uio_calculateresid(auio);
 632         error = wr_uio(p, uap->fd, auio, retval);
 633
 634 ExitThisRoutine:
 635         if (auio != NULL) {
 636                 uio_free(auio);
 637         }
 638         return (error);
 639 }
 640
 641
 642 int
 643 wr_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval)
 644 {
 645         struct fileproc *fp;
 646         int error;
 647         user_ssize_t count;
 648         struct vfs_context context = *vfs_context_current();
 649
 650         error = fp_lookup(p,fdes,&fp,0);
 651         if (error)
 652                 return(error);
 653
 654         if ((fp->f_flag & FWRITE) == 0) {
 655                 error = EBADF;
 656                 goto out;
 657         }
 658         count = uio_resid(uio);
 659
 660         context.vc_ucred = fp->f_cred;
 661         error = fo_write(fp, uio, 0, &context);
 662         if (error) {
 663                 if (uio_resid(uio) != count && (error == ERESTART ||
 664                                                 error == EINTR || error == EWOULDBLOCK))
 665                         error = 0;
 666                 /* The socket layer handles SIGPIPE */
 667                 if (error == EPIPE && fp->f_type != DTYPE_SOCKET)
 668                         psignal(p, SIGPIPE);
 669         }
 670         *retval = count - uio_resid(uio);
 671
 672 out:
 673         if ( (error == 0) )
 674                 fp_drop_written(p, fdes, fp);
 675         else
 676                 fp_drop(p, fdes, fp, 0);
 677         return(error);
 678 }
 679
 680
 681 int
 682 rd_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval)
 683 {
 684         struct fileproc *fp;
 685         int error;
 686         user_ssize_t count;
 687         struct vfs_context context = *vfs_context_current();
 688
 689         if ( (error = preparefileread(p, &fp, fdes, 0)) )
 690                 return (error);
 691
 692         count = uio_resid(uio);
 693
 694         context.vc_ucred = fp->f_cred;
 695
 696         error = fo_read(fp, uio, 0, &context);
 697
 698         if (error) {
 699                 if (uio_resid(uio) != count && (error == ERESTART ||
 700                                                 error == EINTR || error == EWOULDBLOCK))
 701                         error = 0;
 702         }
 703         *retval = count - uio_resid(uio);
 704
 705         donefileread(p, fp, fdes);
 706
 707         return (error);
 708 }
 709
 710 /*
 711  * Ioctl system call
 712  *
 713  * Returns:     0                       Success
 714  *              EBADF
 715  *              ENOTTY
 716  *              ENOMEM
 717  *              ESRCH
 718  *      copyin:EFAULT
 719  *      copyoutEFAULT
 720  *      fp_lookup:EBADF                 Bad file descriptor
 721  *      fo_ioctl:???
 722  */
 723 int
 724 ioctl(struct proc *p, struct ioctl_args *uap, __unused int32_t *retval)
 725 {
 726         struct fileproc *fp;
 727         u_long com;
 728         int error = 0;
 729         u_int size;
 730         caddr_t datap, memp;
 731         boolean_t is64bit;
 732         int tmp;
 733 #define STK_PARAMS      128
 734         char stkbuf[STK_PARAMS];
 735         int fd = uap->fd;
 736         struct vfs_context context = *vfs_context_current();
 737
 738         AUDIT_ARG(fd, uap->fd);
 739         AUDIT_ARG(addr, uap->data);
 740
 741         is64bit = proc_is64bit(p);
 742 #if CONFIG_AUDIT
 743         if (is64bit)
 744                 AUDIT_ARG(value64, uap->com);
 745         else
 746                 AUDIT_ARG(cmd, CAST_DOWN_EXPLICIT(int, uap->com));
 747 #endif /* CONFIG_AUDIT */
 748
 749         proc_fdlock(p);
 750         error = fp_lookup(p,fd,&fp,1);
 751         if (error)  {
 752                 proc_fdunlock(p);
 753                 return(error);
 754         }
 755
 756         AUDIT_ARG(file, p, fp);
 757
 758         if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
 759                         error = EBADF;
 760                         goto out;
 761         }
 762
 763         context.vc_ucred = fp->f_fglob->fg_cred;
 764
 765 #if CONFIG_MACF
 766         error = mac_file_check_ioctl(context.vc_ucred, fp->f_fglob, uap->com);
 767         if (error)
 768                 goto out;
 769 #endif
 770
 771 #if NETAT
 772         /*
 773          * ### LD 6/11/97 Hack Alert: this is to get AppleTalk to work
 774          * while implementing an ATioctl system call
 775          */
 776         {
 777                 if (appletalk_inited && ((uap->com & 0x0000FFFF) == 0xff99)) {
 778                         u_long  fixed_command;
 779
 780 #ifdef APPLETALK_DEBUG
 781                         kprintf("ioctl: special AppleTalk \n");
 782 #endif
 783                         datap = &stkbuf[0];
 784                         *(user_addr_t *)datap = uap->data;
 785                         fixed_command = _IOW(0, 0xff99, uap->data);
 786                         error = fo_ioctl(fp, fixed_command, datap, &context);
 787                         goto out;
 788                 }
 789         }
 790
 791 #endif /* NETAT */
 792
 793
 794         switch (com = uap->com) {
 795         case FIONCLEX:
 796                 *fdflags(p, uap->fd) &= ~UF_EXCLOSE;
 797                 error =0;
 798                 goto out;
 799         case FIOCLEX:
 800                 *fdflags(p, uap->fd) |= UF_EXCLOSE;
 801                 error =0;
 802                 goto out;
 803         }
 804
 805         /*
 806          * Interpret high order word to find amount of data to be
 807          * copied to/from the user's address space.
 808          */
 809         size = IOCPARM_LEN(com);
 810         if (size > IOCPARM_MAX) {
 811                         error = ENOTTY;
 812                         goto out;
 813         }
 814         memp = NULL;
 815         if (size > sizeof (stkbuf)) {
 816                 proc_fdunlock(p);
 817                 if ((memp = (caddr_t)kalloc(size)) == 0) {
 818                         proc_fdlock(p);
 819                         error = ENOMEM;
 820                         goto out;
 821                 }
 822                 proc_fdlock(p);
 823                 datap = memp;
 824         } else
 825                 datap = &stkbuf[0];
 826         if (com&IOC_IN) {
 827                 if (size) {
 828                         proc_fdunlock(p);
 829                         error = copyin(uap->data, datap, size);
 830                         if (error) {
 831                                 if (memp)
 832                                         kfree(memp, size);
 833                                 proc_fdlock(p);
 834                                 goto out;
 835                         }
 836                         proc_fdlock(p);
 837                 } else {
 838                         /* XXX - IOC_IN and no size?  we should proably return an error here!! */
 839                         if (is64bit) {
 840                                 *(user_addr_t *)datap = uap->data;
 841                         }
 842                         else {
 843                                 *(uint32_t *)datap = (uint32_t)uap->data;
 844                         }
 845                 }
 846         } else if ((com&IOC_OUT) && size)
 847                 /*
 848                  * Zero the buffer so the user always
 849                  * gets back something deterministic.
 850                  */
 851                 bzero(datap, size);
 852         else if (com&IOC_VOID) {
 853                 /* XXX - this is odd since IOC_VOID means no parameters */
 854                 if (is64bit) {
 855                         *(user_addr_t *)datap = uap->data;
 856                 }
 857                 else {
 858                         *(uint32_t *)datap = (uint32_t)uap->data;
 859                 }
 860         }
 861
 862         switch (com) {
 863
 864         case FIONBIO:
 865                 if ( (tmp = *(int *)datap) )
 866                         fp->f_flag |= FNONBLOCK;
 867                 else
 868                         fp->f_flag &= ~FNONBLOCK;
 869                 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
 870                 break;
 871
 872         case FIOASYNC:
 873                 if ( (tmp = *(int *)datap) )
 874                         fp->f_flag |= FASYNC;
 875                 else
 876                         fp->f_flag &= ~FASYNC;
 877                 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
 878                 break;
 879
 880         case FIOSETOWN:
 881                 tmp = *(int *)datap;
 882                 if (fp->f_type == DTYPE_SOCKET) {
 883                         ((struct socket *)fp->f_data)->so_pgid = tmp;
 884                         error = 0;
 885                         break;
 886                 }
 887                 if (fp->f_type == DTYPE_PIPE) {
 888                         error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context);
 889                         break;
 890                 }
 891                 if (tmp <= 0) {
 892                         tmp = -tmp;
 893                 } else {
 894                         struct proc *p1 = proc_find(tmp);
 895                         if (p1 == 0) {
 896                                 error = ESRCH;
 897                                 break;
 898                         }
 899                         tmp = p1->p_pgrpid;
 900                         proc_rele(p1);
 901                 }
 902                 error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context);
 903                 break;
 904
 905         case FIOGETOWN:
 906                 if (fp->f_type == DTYPE_SOCKET) {
 907                         error = 0;
 908                         *(int *)datap = ((struct socket *)fp->f_data)->so_pgid;
 909                         break;
 910                 }
 911                 error = fo_ioctl(fp, TIOCGPGRP, datap, &context);
 912                 *(int *)datap = -*(int *)datap;
 913                 break;
 914
 915         default:
 916                 error = fo_ioctl(fp, com, datap, &context);
 917                 /*
 918                  * Copy any data to user, size was
 919                  * already set and checked above.
 920                  */
 921                 if (error == 0 && (com&IOC_OUT) && size)
 922                         error = copyout(datap, uap->data, (u_int)size);
 923                 break;
 924         }
 925         proc_fdunlock(p);
 926         if (memp)
 927                 kfree(memp, size);
 928         proc_fdlock(p);
 929 out:
 930         fp_drop(p, fd, fp, 1);
 931         proc_fdunlock(p);
 932         return(error);
 933 }
 934
 935 int     selwait, nselcoll;
 936 #define SEL_FIRSTPASS 1
 937 #define SEL_SECONDPASS 2
 938 extern int selcontinue(int error);
 939 extern int selprocess(int error, int sel_pass);
 940 static int selscan(struct proc *p, struct _select * sel,
 941                         int nfd, int32_t *retval, int sel_pass, wait_queue_sub_t wqsub);
 942 static int selcount(struct proc *p, u_int32_t *ibits, u_int32_t *obits,
 943                         int nfd, int * count, int *kfcount);
 944 static int seldrop(struct proc *p, u_int32_t *ibits, int nfd);
 945
 946 /*
 947  * Select system call.
 948  *
 949  * Returns:     0                       Success
 950  *              EINVAL                  Invalid argument
 951  *              EAGAIN                  Nonconformant error if allocation fails
 952  *      selprocess:???
 953  */
 954 int
 955 select(struct proc *p, struct select_args *uap, int32_t *retval)
 956 {
 957         __pthread_testcancel(1);
 958         return(select_nocancel(p, (struct select_nocancel_args *)uap, retval));
 959 }
 960
 961 int
 962 select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retval)
 963 {
 964         int error = 0;
 965         u_int ni, nw, size;
 966         thread_t th_act;
 967         struct uthread  *uth;
 968         struct _select *sel;
 969         int needzerofill = 1;
 970         int count = 0;
 971         int kfcount = 0;
 972
 973         th_act = current_thread();
 974         uth = get_bsdthread_info(th_act);
 975         sel = &uth->uu_select;
 976         retval = (int *)get_bsduthreadrval(th_act);
 977         *retval = 0;
 978
 979         if (uap->nd < 0) {
 980                 return (EINVAL);
 981         }
 982
 983         /* select on thread of process that already called proc_exit() */
 984         if (p->p_fd == NULL) {
 985                 return (EBADF);
 986         }
 987
 988         if (uap->nd > p->p_fd->fd_nfiles)
 989                 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */
 990
 991         nw = howmany(uap->nd, NFDBITS);
 992         ni = nw * sizeof(fd_mask);
 993
 994         /*
 995          * if the previously allocated space for the bits is smaller than
 996          * what is requested or no space has yet been allocated for this
 997          * thread, allocate enough space now.
 998          *
 999          * Note: If this process fails, select() will return EAGAIN; this
1000          * is the same thing pool() returns in a no-memory situation, but
1001          * it is not a POSIX compliant error code for select().
1002          */
1003         if (sel->nbytes < (3 * ni)) {
1004                 int nbytes = 3 * ni;
1005
1006                 /* Free previous allocation, if any */
1007                 if (sel->ibits != NULL)
1008                         FREE(sel->ibits, M_TEMP);
1009                 if (sel->obits != NULL) {
1010                         FREE(sel->obits, M_TEMP);
1011                         /* NULL out; subsequent ibits allocation may fail */
1012                         sel->obits = NULL;
1013                 }
1014
1015                 MALLOC(sel->ibits, u_int32_t *, nbytes, M_TEMP, M_WAITOK | M_ZERO);
1016                 if (sel->ibits == NULL)
1017                         return (EAGAIN);
1018                 MALLOC(sel->obits, u_int32_t *, nbytes, M_TEMP, M_WAITOK | M_ZERO);
1019                 if (sel->obits == NULL) {
1020                         FREE(sel->ibits, M_TEMP);
1021                         sel->ibits = NULL;
1022                         return (EAGAIN);
1023                 }
1024                 sel->nbytes = nbytes;
1025                 needzerofill = 0;
1026         }
1027
1028         if (needzerofill) {
1029                 bzero((caddr_t)sel->ibits, sel->nbytes);
1030                 bzero((caddr_t)sel->obits, sel->nbytes);
1031         }
1032
1033         /*
1034          * get the bits from the user address space
1035          */
1036 #define getbits(name, x) \
1037         do { \
1038                 if (uap->name && (error = copyin(uap->name, \
1039                         (caddr_t)&sel->ibits[(x) * nw], ni))) \
1040                         goto continuation; \
1041         } while (0)
1042
1043         getbits(in, 0);
1044         getbits(ou, 1);
1045         getbits(ex, 2);
1046 #undef  getbits
1047
1048         if (uap->tv) {
1049                 struct timeval atv;
1050                 if (IS_64BIT_PROCESS(p)) {
1051                         struct user64_timeval atv64;
1052                         error = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64));
1053                         /* Loses resolution - assume timeout < 68 years */
1054                         atv.tv_sec = atv64.tv_sec;
1055                         atv.tv_usec = atv64.tv_usec;
1056                 } else {
1057                         struct user32_timeval atv32;
1058                         error = copyin(uap->tv, (caddr_t)&atv32, sizeof(atv32));
1059                         atv.tv_sec = atv32.tv_sec;
1060                         atv.tv_usec = atv32.tv_usec;
1061                 }
1062                 if (error)
1063                         goto continuation;
1064                 if (itimerfix(&atv)) {
1065                         error = EINVAL;
1066                         goto continuation;
1067                 }
1068
1069                 clock_absolutetime_interval_to_deadline(
1070                                                                                 tvtoabstime(&atv), &sel->abstime);
1071         }
1072         else
1073                 sel->abstime = 0;
1074
1075         sel->kfcount = 0;
1076         if ( (error = selcount(p, sel->ibits, sel->obits, uap->nd, &count, &kfcount)) ) {
1077                         goto continuation;
1078         }
1079
1080         sel->count = count;
1081         sel->kfcount = kfcount;
1082         size = SIZEOF_WAITQUEUE_SET + (count * SIZEOF_WAITQUEUE_LINK);
1083         if (uth->uu_allocsize) {
1084                 if (uth->uu_wqset == 0)
1085                         panic("select: wql memory smashed");
1086                 /* needed for the select now */
1087                 if (size > uth->uu_allocsize) {
1088                         kfree(uth->uu_wqset,  uth->uu_allocsize);
1089                         uth->uu_allocsize = size;
1090                         uth->uu_wqset = (wait_queue_set_t)kalloc(size);
1091                         if (uth->uu_wqset == (wait_queue_set_t)NULL)
1092                                 panic("failed to allocate memory for waitqueue\n");
1093                 }
1094         } else {
1095                 sel->count = count;
1096                 uth->uu_allocsize = size;
1097                 uth->uu_wqset = (wait_queue_set_t)kalloc(uth->uu_allocsize);
1098                 if (uth->uu_wqset == (wait_queue_set_t)NULL)
1099                         panic("failed to allocate memory for waitqueue\n");
1100         }
1101         bzero(uth->uu_wqset, size);
1102         sel->wql = (char *)uth->uu_wqset + SIZEOF_WAITQUEUE_SET;
1103         wait_queue_set_init(uth->uu_wqset, (SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST));
1104
1105 continuation:
1106         return selprocess(error, SEL_FIRSTPASS);
1107 }
1108
1109 int
1110 selcontinue(int error)
1111 {
1112         return selprocess(error, SEL_SECONDPASS);
1113 }
1114
1115 int
1116 selprocess(int error, int sel_pass)
1117 {
1118         int ncoll;
1119         u_int ni, nw;
1120         thread_t th_act;
1121         struct uthread  *uth;
1122         struct proc *p;
1123         struct select_args *uap;
1124         int *retval;
1125         struct _select *sel;
1126         int unwind = 1;
1127         int prepost = 0;
1128         int somewakeup = 0;
1129         int doretry = 0;
1130         wait_result_t wait_result;
1131
1132         p = current_proc();
1133         th_act = current_thread();
1134         uap = (struct select_args *)get_bsduthreadarg(th_act);
1135         retval = (int *)get_bsduthreadrval(th_act);
1136         uth = get_bsdthread_info(th_act);
1137         sel = &uth->uu_select;
1138
1139         /* if it is first pass wait queue is not setup yet */
1140         if ((error != 0) && (sel_pass == SEL_FIRSTPASS))
1141                         unwind = 0;
1142         if (sel->count == 0)
1143                         unwind = 0;
1144 retry:
1145         if (error != 0) {
1146           goto done;
1147         }
1148
1149         ncoll = nselcoll;
1150         OSBitOrAtomic(P_SELECT, &p->p_flag);
1151         /* skip scans if the select is just for timeouts */
1152         if (sel->count) {
1153                 if (sel_pass == SEL_FIRSTPASS)
1154                         wait_queue_sub_clearrefs(uth->uu_wqset);
1155
1156                 error = selscan(p, sel, uap->nd, retval, sel_pass, (wait_queue_sub_t)uth->uu_wqset);
1157                 if (error || *retval) {
1158                         goto done;
1159                 }
1160                 if (prepost) {
1161                         /* if the select of log, then we canwakeup and discover some one
1162                         * else already read the data; go toselct again if time permits
1163                         */
1164                         prepost = 0;
1165                         doretry = 1;
1166                 }
1167                 if (somewakeup) {
1168                         somewakeup = 0;
1169                         doretry = 1;
1170                 }
1171         }
1172
1173         if (uap->tv) {
1174                 uint64_t        now;
1175
1176                 clock_get_uptime(&now);
1177                 if (now >= sel->abstime)
1178                         goto done;
1179         }
1180
1181         if (doretry) {
1182                 /* cleanup obits and try again */
1183                 doretry = 0;
1184                 sel_pass = SEL_FIRSTPASS;
1185                 goto retry;
1186         }
1187
1188         /*
1189          * To effect a poll, the timeout argument should be
1190          * non-nil, pointing to a zero-valued timeval structure.
1191          */
1192         if (uap->tv && sel->abstime == 0) {
1193                 goto done;
1194         }
1195
1196         /* No spurious wakeups due to colls,no need to check for them */
1197          if ((sel_pass == SEL_SECONDPASS) || ((p->p_flag & P_SELECT) == 0)) {
1198                 sel_pass = SEL_FIRSTPASS;
1199                 goto retry;
1200         }
1201
1202         OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
1203
1204         /* if the select is just for timeout skip check */
1205         if (sel->count &&(sel_pass == SEL_SECONDPASS))
1206                 panic("selprocess: 2nd pass assertwaiting");
1207
1208         /* Wait Queue Subordinate has waitqueue as first element */
1209         wait_result = wait_queue_assert_wait((wait_queue_t)uth->uu_wqset,
1210                                              NULL, THREAD_ABORTSAFE, sel->abstime);
1211         if (wait_result != THREAD_AWAKENED) {
1212                 /* there are no preposted events */
1213                 error = tsleep1(NULL, PSOCK | PCATCH,
1214                                 "select", 0, selcontinue);
1215         } else  {
1216                 prepost = 1;
1217                 error = 0;
1218         }
1219
1220         sel_pass = SEL_SECONDPASS;
1221         if (error == 0) {
1222                 if (!prepost)
1223                         somewakeup =1;
1224                 goto retry;
1225         }
1226 done:
1227         if (unwind) {
1228                 wait_subqueue_unlink_all(uth->uu_wqset);
1229                 seldrop(p, sel->ibits, uap->nd);
1230         }
1231         OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
1232         /* select is not restarted after signals... */
1233         if (error == ERESTART)
1234                 error = EINTR;
1235         if (error == EWOULDBLOCK)
1236                 error = 0;
1237         nw = howmany(uap->nd, NFDBITS);
1238         ni = nw * sizeof(fd_mask);
1239
1240 #define putbits(name, x) \
1241         do { \
1242                 if (uap->name && (error2 = \
1243                         copyout((caddr_t)&sel->obits[(x) * nw], uap->name, ni))) \
1244                         error = error2; \
1245         } while (0)
1246
1247         if (error == 0) {
1248                 int error2;
1249
1250                 putbits(in, 0);
1251                 putbits(ou, 1);
1252                 putbits(ex, 2);
1253 #undef putbits
1254         }
1255         return(error);
1256 }
1257
1258 static int
1259 selscan(struct proc *p, struct _select *sel, int nfd, int32_t *retval,
1260         int sel_pass, wait_queue_sub_t wqsub)
1261 {
1262         struct filedesc *fdp = p->p_fd;
1263         int msk, i, j, fd;
1264         u_int32_t bits;
1265         struct fileproc *fp;
1266         int n = 0;
1267         int nc = 0;
1268         static int flag[3] = { FREAD, FWRITE, 0 };
1269         u_int32_t *iptr, *optr;
1270         u_int nw;
1271         u_int32_t *ibits, *obits;
1272         char * wql;
1273         char * wql_ptr;
1274         int count, kfcount;
1275         vnode_t vp;
1276         struct vfs_context context = *vfs_context_current();
1277
1278         /*
1279          * Problems when reboot; due to MacOSX signal probs
1280          * in Beaker1C ; verify that the p->p_fd is valid
1281          */
1282         if (fdp == NULL) {
1283                 *retval=0;
1284                 return(EIO);
1285         }
1286         ibits = sel->ibits;
1287         obits = sel->obits;
1288         wql = sel->wql;
1289
1290         nw = howmany(nfd, NFDBITS);
1291
1292         count = sel->count;
1293         kfcount = sel->kfcount;
1294
1295         if (kfcount > count)
1296                 panic("selscan: count < kfcount");
1297
1298         if (kfcount != 0) {
1299                 proc_fdlock(p);
1300                 for (msk = 0; msk < 3; msk++) {
1301                         iptr = (u_int32_t *)&ibits[msk * nw];
1302                         optr = (u_int32_t *)&obits[msk * nw];
1303
1304                         for (i = 0; i < nfd; i += NFDBITS) {
1305                                 bits = iptr[i/NFDBITS];
1306
1307                                 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1308                                         bits &= ~(1 << j);
1309                                         fp = fdp->fd_ofiles[fd];
1310
1311                                         if (fp == NULL ||
1312                                                 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1313                                                 proc_fdunlock(p);
1314                                                 return(EBADF);
1315                                         }
1316                                         if (sel_pass == SEL_SECONDPASS) {
1317                                                 wql_ptr = (char *)0;
1318                                                 fp->f_flags &= ~FP_INSELECT;
1319                                                 fp->f_waddr = (void *)0;
1320                                         } else {
1321                                                 wql_ptr = (wql + nc * SIZEOF_WAITQUEUE_LINK);
1322                                                 fp->f_flags |= FP_INSELECT;
1323                                                 fp->f_waddr = (void *)wqsub;
1324                                         }
1325
1326                                         context.vc_ucred = fp->f_cred;
1327
1328                                         if (fp->f_ops && (fp->f_type == DTYPE_VNODE)
1329                                                         && ((vp = (struct vnode *)fp->f_data)  != NULLVP)
1330                                                         && (vp->v_type == VCHR)
1331                                                 && fo_select(fp, flag[msk], wql_ptr, &context)) {
1332                                                 optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
1333                                                 n++;
1334                                         }
1335                                         nc++;
1336                                 }
1337                         }
1338                 }
1339                 proc_fdunlock(p);
1340         }
1341
1342         nc = 0;
1343         if (kfcount != count) {
1344                 proc_fdlock(p);
1345                 for (msk = 0; msk < 3; msk++) {
1346                         iptr = (u_int32_t *)&ibits[msk * nw];
1347                         optr = (u_int32_t *)&obits[msk * nw];
1348
1349                         for (i = 0; i < nfd; i += NFDBITS) {
1350                                 bits = iptr[i/NFDBITS];
1351
1352                                 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1353                                         bits &= ~(1 << j);
1354                                         fp = fdp->fd_ofiles[fd];
1355
1356                                         if (fp == NULL ||
1357                                                 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1358                                                 proc_fdunlock(p);
1359                                                 return(EBADF);
1360                                         }
1361                                         if (sel_pass == SEL_SECONDPASS) {
1362                                                 wql_ptr = (char *)0;
1363                                                 fp->f_flags &= ~FP_INSELECT;
1364                                                 fp->f_waddr = (void *)0;
1365                                         } else {
1366                                                 wql_ptr = (wql + nc * SIZEOF_WAITQUEUE_LINK);
1367                                                 fp->f_flags |= FP_INSELECT;
1368                                                 fp->f_waddr = (void *)wqsub;
1369                                         }
1370
1371                                         context.vc_ucred = fp->f_cred;
1372
1373                                         if ((fp->f_ops &&
1374                                                 ((fp->f_type != DTYPE_VNODE)
1375                                                 || (((vp = (struct vnode *)fp->f_data)  != NULLVP)
1376                                                         && (vp->v_type != VCHR))
1377                                                 )
1378                                                 && fo_select(fp, flag[msk], wql_ptr, &context))) {
1379                                                 optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
1380                                                 n++;
1381                                         }
1382                                         nc++;
1383                                 }
1384                         }
1385                 }
1386                 proc_fdunlock(p);
1387         }
1388         *retval = n;
1389         return (0);
1390 }
1391
1392 int poll_callback(struct kqueue *, struct kevent64_s *, void *);
1393
1394 struct poll_continue_args {
1395         user_addr_t pca_fds;
1396         u_int pca_nfds;
1397         u_int pca_rfds;
1398 };
1399
1400 int
1401 poll(struct proc *p, struct poll_args *uap, int32_t *retval)
1402 {
1403         __pthread_testcancel(1);
1404         return(poll_nocancel(p, (struct poll_nocancel_args *)uap, retval));
1405 }
1406
1407
1408 int
1409 poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, int32_t *retval)
1410 {
1411         struct poll_continue_args *cont;
1412         struct pollfd *fds;
1413         struct kqueue *kq;
1414         struct timeval atv;
1415         int ncoll, error = 0;
1416         u_int nfds = uap->nfds;
1417         u_int rfds = 0;
1418         u_int i;
1419         size_t ni;
1420
1421         /*
1422          * This is kinda bogus.  We have fd limits, but that is not
1423          * really related to the size of the pollfd array.  Make sure
1424          * we let the process use at least FD_SETSIZE entries and at
1425          * least enough for the current limits.  We want to be reasonably
1426          * safe, but not overly restrictive.
1427          */
1428         if (nfds > OPEN_MAX ||
1429             (nfds > p->p_rlimit[RLIMIT_NOFILE].rlim_cur && (proc_suser(p) || nfds > FD_SETSIZE)))
1430                 return (EINVAL);
1431
1432         kq = kqueue_alloc(p);
1433         if (kq == NULL)
1434                 return (EAGAIN);
1435
1436         ni = nfds * sizeof(struct pollfd) + sizeof(struct poll_continue_args);
1437         MALLOC(cont, struct poll_continue_args *, ni, M_TEMP, M_WAITOK);
1438         if (NULL == cont) {
1439                 error = EAGAIN;
1440                 goto out;
1441         }
1442
1443         fds = (struct pollfd *)&cont[1];
1444         error = copyin(uap->fds, fds, nfds * sizeof(struct pollfd));
1445         if (error)
1446                 goto out;
1447
1448         if (uap->timeout != -1) {
1449                 struct timeval rtv;
1450
1451                 atv.tv_sec = uap->timeout / 1000;
1452                 atv.tv_usec = (uap->timeout % 1000) * 1000;
1453                 if (itimerfix(&atv)) {
1454                         error = EINVAL;
1455                         goto out;
1456                 }
1457                 getmicrouptime(&rtv);
1458                 timevaladd(&atv, &rtv);
1459         } else {
1460                 atv.tv_sec = 0;
1461                 atv.tv_usec = 0;
1462         }
1463
1464         /* JMM - all this P_SELECT stuff is bogus */
1465         ncoll = nselcoll;
1466         OSBitOrAtomic(P_SELECT, &p->p_flag);
1467         for (i = 0; i < nfds; i++) {
1468                 short events = fds[i].events;
1469                 struct kevent64_s kev;
1470                 int kerror = 0;
1471
1472                 /* per spec, ignore fd values below zero */
1473                 if (fds[i].fd < 0) {
1474                         fds[i].revents = 0;
1475                         continue;
1476                 }
1477
1478                 /* convert the poll event into a kqueue kevent */
1479                 kev.ident = fds[i].fd;
1480                 kev.flags = EV_ADD | EV_ONESHOT | EV_POLL;
1481                 kev.fflags = NOTE_LOWAT;
1482                 kev.data = 1; /* efficiency be damned: any data should trigger */
1483                 kev.udata = CAST_USER_ADDR_T(&fds[i]);
1484                 kev.ext[0] = 0;
1485                 kev.ext[1] = 0;
1486
1487                 /* Handle input events */
1488                 if (events & ( POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND | POLLHUP )) {
1489                         kev.filter = EVFILT_READ;
1490                         if (!(events & ( POLLIN | POLLRDNORM )))
1491                                 kev.flags |= EV_OOBAND;
1492                         kerror = kevent_register(kq, &kev, p);
1493                 }
1494
1495                 /* Handle output events */
1496                 if (kerror == 0 &&
1497                     events & ( POLLOUT | POLLWRNORM | POLLWRBAND )) {
1498                         kev.filter = EVFILT_WRITE;
1499                         kerror = kevent_register(kq, &kev, p);
1500                 }
1501
1502                 /* Handle BSD extension vnode events */
1503                 if (kerror == 0 &&
1504                     events & ( POLLEXTEND | POLLATTRIB | POLLNLINK | POLLWRITE )) {
1505                         kev.filter = EVFILT_VNODE;
1506                         kev.fflags = 0;
1507                         if (events & POLLEXTEND)
1508                                 kev.fflags |= NOTE_EXTEND;
1509                         if (events & POLLATTRIB)
1510                                 kev.fflags |= NOTE_ATTRIB;
1511                         if (events & POLLNLINK)
1512                                 kev.fflags |= NOTE_LINK;
1513                         if (events & POLLWRITE)
1514                                 kev.fflags |= NOTE_WRITE;
1515                         kerror = kevent_register(kq, &kev, p);
1516                 }
1517
1518                 if (kerror != 0) {
1519                         fds[i].revents = POLLNVAL;
1520                         rfds++;
1521                 } else
1522                         fds[i].revents = 0;
1523         }
1524
1525         /* Did we have any trouble registering? */
1526         if (rfds > 0)
1527                 goto done;
1528
1529         /* scan for, and possibly wait for, the kevents to trigger */
1530         cont->pca_fds = uap->fds;
1531         cont->pca_nfds = nfds;
1532         cont->pca_rfds = rfds;
1533         error = kqueue_scan(kq, poll_callback, NULL, cont, &atv, p);
1534         rfds = cont->pca_rfds;
1535
1536  done:
1537         OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
1538         /* poll is not restarted after signals... */
1539         if (error == ERESTART)
1540                 error = EINTR;
1541         if (error == EWOULDBLOCK)
1542                 error = 0;
1543         if (error == 0) {
1544                 error = copyout(fds, uap->fds, nfds * sizeof(struct pollfd));
1545                 *retval = rfds;
1546         }
1547
1548  out:
1549         if (NULL != cont)
1550                 FREE(cont, M_TEMP);
1551
1552         kqueue_dealloc(kq);
1553         return (error);
1554 }
1555
1556 int
1557 poll_callback(__unused struct kqueue *kq, struct kevent64_s *kevp, void *data)
1558 {
1559         struct poll_continue_args *cont = (struct poll_continue_args *)data;
1560         struct pollfd *fds = CAST_DOWN(struct pollfd *, kevp->udata);
1561         short mask;
1562
1563         /* convert the results back into revents */
1564         if (kevp->flags & EV_EOF)
1565                 fds->revents |= POLLHUP;
1566         if (kevp->flags & EV_ERROR)
1567                 fds->revents |= POLLERR;
1568
1569         switch (kevp->filter) {
1570         case EVFILT_READ:
1571                 if (fds->revents & POLLHUP)
1572                         mask = (POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND );
1573                 else {
1574                         mask = 0;
1575                         if (kevp->data != 0)
1576                                 mask |= (POLLIN | POLLRDNORM );
1577                         if (kevp->flags & EV_OOBAND)
1578                                 mask |= ( POLLPRI | POLLRDBAND );
1579                 }
1580                 fds->revents |= (fds->events & mask);
1581                 break;
1582
1583         case EVFILT_WRITE:
1584                 if (!(fds->revents & POLLHUP))
1585                         fds->revents |= (fds->events & ( POLLOUT | POLLWRNORM | POLLWRBAND ));
1586                 break;
1587
1588         case EVFILT_VNODE:
1589                 if (kevp->fflags & NOTE_EXTEND)
1590                         fds->revents |= (fds->events & POLLEXTEND);
1591                 if (kevp->fflags & NOTE_ATTRIB)
1592                         fds->revents |= (fds->events & POLLATTRIB);
1593                 if (kevp->fflags & NOTE_LINK)
1594                         fds->revents |= (fds->events & POLLNLINK);
1595                 if (kevp->fflags & NOTE_WRITE)
1596                         fds->revents |= (fds->events & POLLWRITE);
1597                 break;
1598         }
1599
1600         if (fds->revents)
1601                 cont->pca_rfds++;
1602
1603         return 0;
1604 }
1605
1606 int
1607 seltrue(__unused dev_t dev, __unused int flag, __unused struct proc *p)
1608 {
1609
1610         return (1);
1611 }
1612
1613 static int
1614 selcount(struct proc *p, u_int32_t *ibits, __unused u_int32_t *obits,
1615                  int nfd, int *countp, int * kfcountp)
1616 {
1617         struct filedesc *fdp = p->p_fd;
1618         int msk, i, j, fd;
1619         u_int32_t bits;
1620         struct fileproc *fp;
1621         int n = 0;
1622         u_int32_t *iptr;
1623         u_int nw;
1624         int error=0;
1625         int kfc = 0;
1626         int dropcount;
1627         vnode_t vp;
1628
1629         /*
1630          * Problems when reboot; due to MacOSX signal probs
1631          * in Beaker1C ; verify that the p->p_fd is valid
1632          */
1633         if (fdp == NULL) {
1634                 *countp = 0;
1635                 *kfcountp = 0;
1636                 return(EIO);
1637         }
1638         nw = howmany(nfd, NFDBITS);
1639
1640         proc_fdlock(p);
1641         for (msk = 0; msk < 3; msk++) {
1642                 iptr = (u_int32_t *)&ibits[msk * nw];
1643                 for (i = 0; i < nfd; i += NFDBITS) {
1644                         bits = iptr[i/NFDBITS];
1645                         while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1646                                 bits &= ~(1 << j);
1647                                 fp = fdp->fd_ofiles[fd];
1648                                 if (fp == NULL ||
1649                                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1650                                                 *countp = 0;
1651                                                 *kfcountp = 0;
1652                                                 error = EBADF;
1653                                                 goto bad;
1654                                 }
1655                                 fp->f_iocount++;
1656                                 if ((fp->f_type == DTYPE_VNODE)
1657                                                 && ((vp = (struct vnode *)fp->f_data)  != NULLVP)
1658                                                 && (vp->v_type == VCHR) )
1659                                         kfc++;
1660
1661                                 n++;
1662                         }
1663                 }
1664         }
1665         proc_fdunlock(p);
1666
1667         *countp = n;
1668         *kfcountp = kfc;
1669         return (0);
1670 bad:
1671         dropcount = 0;
1672
1673         if (n== 0)
1674                 goto out;
1675         /* undo the iocounts */
1676         for (msk = 0; msk < 3; msk++) {
1677                 iptr = (u_int32_t *)&ibits[msk * nw];
1678                 for (i = 0; i < nfd; i += NFDBITS) {
1679                         bits = iptr[i/NFDBITS];
1680                         while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1681                                 bits &= ~(1 << j);
1682                                 fp = fdp->fd_ofiles[fd];
1683                                 if (dropcount >= n)
1684                                         goto out;
1685                                 fp->f_iocount--;
1686
1687                                 if (p->p_fpdrainwait && fp->f_iocount == 0) {
1688                                         p->p_fpdrainwait = 0;
1689                                         wakeup(&p->p_fpdrainwait);
1690                                 }
1691                                 dropcount++;
1692                         }
1693                 }
1694         }
1695 out:
1696         proc_fdunlock(p);
1697         return(error);
1698 }
1699
1700 static int
1701 seldrop(struct proc *p, u_int32_t *ibits, int nfd)
1702 {
1703         struct filedesc *fdp = p->p_fd;
1704         int msk, i, j, fd;
1705         u_int32_t bits;
1706         struct fileproc *fp;
1707         int n = 0;
1708         u_int32_t *iptr;
1709         u_int nw;
1710
1711         /*
1712          * Problems when reboot; due to MacOSX signal probs
1713          * in Beaker1C ; verify that the p->p_fd is valid
1714          */
1715         if (fdp == NULL) {
1716                 return(EIO);
1717         }
1718
1719         nw = howmany(nfd, NFDBITS);
1720
1721
1722         proc_fdlock(p);
1723         for (msk = 0; msk < 3; msk++) {
1724                 iptr = (u_int32_t *)&ibits[msk * nw];
1725                 for (i = 0; i < nfd; i += NFDBITS) {
1726                         bits = iptr[i/NFDBITS];
1727                         while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1728                                 bits &= ~(1 << j);
1729                                 fp = fdp->fd_ofiles[fd];
1730                                 if (fp == NULL
1731 #if 0
1732                         /* if you are here then it is being closed */
1733                                         || (fdp->fd_ofileflags[fd] & UF_RESERVED)
1734 #endif
1735                                         ) {
1736                                                 proc_fdunlock(p);
1737                                                 return(EBADF);
1738                                 }
1739                                 n++;
1740                                 fp->f_iocount--;
1741                                 fp->f_flags &= ~FP_INSELECT;
1742
1743                                 if (p->p_fpdrainwait && fp->f_iocount == 0) {
1744                                         p->p_fpdrainwait = 0;
1745                                         wakeup(&p->p_fpdrainwait);
1746                                 }
1747                         }
1748                 }
1749         }
1750         proc_fdunlock(p);
1751         return (0);
1752 }
1753
1754 /*
1755  * Record a select request.
1756  */
1757 void
1758 selrecord(__unused struct proc *selector, struct selinfo *sip, void * p_wql)
1759 {
1760         thread_t        cur_act = current_thread();
1761         struct uthread * ut = get_bsdthread_info(cur_act);
1762
1763         /* need to look at collisions */
1764
1765         if ((p_wql == (void *)0) && ((sip->si_flags & SI_INITED) == 0)) {
1766                 return;
1767         }
1768
1769         /*do not record if this is second pass of select */
1770         if((p_wql == (void *)0)) {
1771                 return;
1772         }
1773
1774         if ((sip->si_flags & SI_INITED) == 0) {
1775                 wait_queue_init(&sip->si_wait_queue, SYNC_POLICY_FIFO);
1776                 sip->si_flags |= SI_INITED;
1777                 sip->si_flags &= ~SI_CLEAR;
1778         }
1779
1780         if (sip->si_flags & SI_RECORDED) {
1781                 sip->si_flags |= SI_COLL;
1782         } else
1783                 sip->si_flags &= ~SI_COLL;
1784
1785         sip->si_flags |= SI_RECORDED;
1786         if (!wait_queue_member(&sip->si_wait_queue, ut->uu_wqset))
1787                 wait_queue_link_noalloc(&sip->si_wait_queue, ut->uu_wqset,
1788                                         (wait_queue_link_t)p_wql);
1789
1790         return;
1791 }
1792
1793 void
1794 selwakeup(struct selinfo *sip)
1795 {
1796
1797         if ((sip->si_flags & SI_INITED) == 0) {
1798                 return;
1799         }
1800
1801         if (sip->si_flags & SI_COLL) {
1802                 nselcoll++;
1803                 sip->si_flags &= ~SI_COLL;
1804 #if 0
1805                 /* will not  support */
1806                 //wakeup((caddr_t)&selwait);
1807 #endif
1808         }
1809
1810         if (sip->si_flags & SI_RECORDED) {
1811                 wait_queue_wakeup_all(&sip->si_wait_queue, NULL, THREAD_AWAKENED);
1812                 sip->si_flags &= ~SI_RECORDED;
1813         }
1814
1815 }
1816
1817 void
1818 selthreadclear(struct selinfo *sip)
1819 {
1820
1821         if ((sip->si_flags & SI_INITED) == 0) {
1822                 return;
1823         }
1824         if (sip->si_flags & SI_RECORDED) {
1825                         selwakeup(sip);
1826                         sip->si_flags &= ~(SI_RECORDED | SI_COLL);
1827         }
1828         sip->si_flags |= SI_CLEAR;
1829         wait_queue_unlink_all(&sip->si_wait_queue);
1830 }
1831
1832
1833
1834
1835 #define DBG_POST        0x10
1836 #define DBG_WATCH       0x11
1837 #define DBG_WAIT        0x12
1838 #define DBG_MOD         0x13
1839 #define DBG_EWAKEUP     0x14
1840 #define DBG_ENQUEUE     0x15
1841 #define DBG_DEQUEUE     0x16
1842
1843 #define DBG_MISC_POST MISCDBG_CODE(DBG_EVENT,DBG_POST)
1844 #define DBG_MISC_WATCH MISCDBG_CODE(DBG_EVENT,DBG_WATCH)
1845 #define DBG_MISC_WAIT MISCDBG_CODE(DBG_EVENT,DBG_WAIT)
1846 #define DBG_MISC_MOD MISCDBG_CODE(DBG_EVENT,DBG_MOD)
1847 #define DBG_MISC_EWAKEUP MISCDBG_CODE(DBG_EVENT,DBG_EWAKEUP)
1848 #define DBG_MISC_ENQUEUE MISCDBG_CODE(DBG_EVENT,DBG_ENQUEUE)
1849 #define DBG_MISC_DEQUEUE MISCDBG_CODE(DBG_EVENT,DBG_DEQUEUE)
1850
1851
1852 #define EVPROCDEQUE(p, evq)     do {                            \
1853         proc_lock(p);                                           \
1854         if (evq->ee_flags & EV_QUEUED) {                        \
1855                 TAILQ_REMOVE(&p->p_evlist, evq, ee_plist);      \
1856                 evq->ee_flags &= ~EV_QUEUED;                    \
1857         }                                                       \
1858         proc_unlock(p);                                         \
1859 } while (0);
1860
1861
1862 /*
1863  * called upon socket close. deque and free all events for
1864  * the socket...  socket must be locked by caller.
1865  */
1866 void
1867 evsofree(struct socket *sp)
1868 {
1869         struct eventqelt *evq, *next;
1870         proc_t  p;
1871
1872         if (sp == NULL)
1873                 return;
1874
1875         for (evq = sp->so_evlist.tqh_first; evq != NULL; evq = next) {
1876                 next = evq->ee_slist.tqe_next;
1877                 p = evq->ee_proc;
1878
1879                 if (evq->ee_flags & EV_QUEUED) {
1880                         EVPROCDEQUE(p, evq);
1881                 }
1882                 TAILQ_REMOVE(&sp->so_evlist, evq, ee_slist); // remove from socket q
1883                 FREE(evq, M_TEMP);
1884         }
1885 }
1886
1887
1888 /*
1889  * called upon pipe close. deque and free all events for
1890  * the pipe... pipe must be locked by caller
1891  */
1892 void
1893 evpipefree(struct pipe *cpipe)
1894 {
1895         struct eventqelt *evq, *next;
1896         proc_t  p;
1897
1898         for (evq = cpipe->pipe_evlist.tqh_first; evq != NULL; evq = next) {
1899                 next = evq->ee_slist.tqe_next;
1900                 p = evq->ee_proc;
1901
1902                 EVPROCDEQUE(p, evq);
1903
1904                 TAILQ_REMOVE(&cpipe->pipe_evlist, evq, ee_slist); // remove from pipe q
1905                 FREE(evq, M_TEMP);
1906         }
1907 }
1908
1909
1910 /*
1911  * enqueue this event if it's not already queued. wakeup
1912  * the proc if we do queue this event to it...
1913  * entered with proc lock held... we drop it before
1914  * doing the wakeup and return in that state
1915  */
1916 static void
1917 evprocenque(struct eventqelt *evq)
1918 {
1919         proc_t  p;
1920
1921         assert(evq);
1922         p = evq->ee_proc;
1923
1924         KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_START, (uint32_t)evq, evq->ee_flags, evq->ee_eventmask,0,0);
1925
1926         proc_lock(p);
1927
1928         if (evq->ee_flags & EV_QUEUED) {
1929                 proc_unlock(p);
1930
1931                 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0);
1932                 return;
1933         }
1934         evq->ee_flags |= EV_QUEUED;
1935
1936         TAILQ_INSERT_TAIL(&p->p_evlist, evq, ee_plist);
1937
1938         proc_unlock(p);
1939
1940         wakeup(&p->p_evlist);
1941
1942         KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0);
1943 }
1944
1945
1946 /*
1947  * pipe lock must be taken by the caller
1948  */
1949 void
1950 postpipeevent(struct pipe *pipep, int event)
1951 {
1952         int     mask;
1953         struct eventqelt *evq;
1954
1955         if (pipep == NULL)
1956                 return;
1957         KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, event,0,0,1,0);
1958
1959         for (evq = pipep->pipe_evlist.tqh_first;
1960              evq != NULL; evq = evq->ee_slist.tqe_next) {
1961
1962                 if (evq->ee_eventmask == 0)
1963                         continue;
1964                 mask = 0;
1965
1966                 switch (event & (EV_RWBYTES | EV_RCLOSED | EV_WCLOSED)) {
1967
1968                 case EV_RWBYTES:
1969                   if ((evq->ee_eventmask & EV_RE) && pipep->pipe_buffer.cnt) {
1970                           mask |= EV_RE;
1971                           evq->ee_req.er_rcnt = pipep->pipe_buffer.cnt;
1972                   }
1973                   if ((evq->ee_eventmask & EV_WR) &&
1974                       (pipep->pipe_buffer.size - pipep->pipe_buffer.cnt) >= PIPE_BUF) {
1975
1976                           if (pipep->pipe_state & PIPE_EOF) {
1977                                   mask |= EV_WR|EV_RESET;
1978                                   break;
1979                           }
1980                           mask |= EV_WR;
1981                           evq->ee_req.er_wcnt = pipep->pipe_buffer.size - pipep->pipe_buffer.cnt;
1982                   }
1983                   break;
1984
1985                 case EV_WCLOSED:
1986                 case EV_RCLOSED:
1987                   if ((evq->ee_eventmask & EV_RE)) {
1988                           mask |= EV_RE|EV_RCLOSED;
1989                   }
1990                   if ((evq->ee_eventmask & EV_WR)) {
1991                           mask |= EV_WR|EV_WCLOSED;
1992                   }
1993                   break;
1994
1995                 default:
1996                   return;
1997                 }
1998                 if (mask) {
1999                         /*
2000                          * disarm... postevents are nops until this event is 'read' via
2001                          * waitevent and then re-armed via modwatch
2002                          */
2003                         evq->ee_eventmask = 0;
2004
2005                         /*
2006                          * since events are disarmed until after the waitevent
2007                          * the ee_req.er_xxxx fields can't change once we've
2008                          * inserted this event into the proc queue...
2009                          * therefore, the waitevent will see a 'consistent'
2010                          * snapshot of the event, even though it won't hold
2011                          * the pipe lock, and we're updating the event outside
2012                          * of the proc lock, which it will hold
2013                          */
2014                         evq->ee_req.er_eventbits |= mask;
2015
2016                         KERNEL_DEBUG(DBG_MISC_POST, (uint32_t)evq, evq->ee_req.er_eventbits, mask, 1,0);
2017
2018                         evprocenque(evq);
2019                 }
2020         }
2021         KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, 0,0,0,1,0);
2022 }
2023
2024 #if SOCKETS
2025 /*
2026  * given either a sockbuf or a socket run down the
2027  * event list and queue ready events found...
2028  * the socket must be locked by the caller
2029  */
2030 void
2031 postevent(struct socket *sp, struct sockbuf *sb, int event)
2032 {
2033         int     mask;
2034         struct  eventqelt *evq;
2035         struct  tcpcb *tp;
2036
2037         if (sb)
2038                 sp = sb->sb_so;
2039         if (sp == NULL)
2040                 return;
2041
2042         KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, (int)sp, event, 0, 0, 0);
2043
2044         for (evq = sp->so_evlist.tqh_first;
2045              evq != NULL; evq = evq->ee_slist.tqe_next) {
2046
2047                 if (evq->ee_eventmask == 0)
2048                         continue;
2049                 mask = 0;
2050
2051                 /* ready for reading:
2052                    - byte cnt >= receive low water mark
2053                    - read-half of conn closed
2054                    - conn pending for listening sock
2055                    - socket error pending
2056
2057                    ready for writing
2058                    - byte cnt avail >= send low water mark
2059                    - write half of conn closed
2060                    - socket error pending
2061                    - non-blocking conn completed successfully
2062
2063                    exception pending
2064                    - out of band data
2065                    - sock at out of band mark
2066                 */
2067
2068                 switch (event & EV_DMASK) {
2069
2070                 case EV_OOB:
2071                   if ((evq->ee_eventmask & EV_EX)) {
2072                           if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK)))
2073                                   mask |= EV_EX|EV_OOB;
2074                   }
2075                   break;
2076
2077                 case EV_RWBYTES|EV_OOB:
2078                   if ((evq->ee_eventmask & EV_EX)) {
2079                           if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK)))
2080                                   mask |= EV_EX|EV_OOB;
2081                   }
2082                   /*
2083                    * fall into the next case
2084                    */
2085                 case EV_RWBYTES:
2086                   if ((evq->ee_eventmask & EV_RE) && soreadable(sp)) {
2087                           if (sp->so_error) {
2088                                   if ((sp->so_type == SOCK_STREAM) && ((sp->so_error == ECONNREFUSED) || (sp->so_error == ECONNRESET))) {
2089                                           if ((sp->so_pcb == 0) || (((struct inpcb *)sp->so_pcb)->inp_state == INPCB_STATE_DEAD) || !(tp = sototcpcb(sp)) ||
2090                                               (tp->t_state == TCPS_CLOSED)) {
2091                                                   mask |= EV_RE|EV_RESET;
2092                                                   break;
2093                                           }
2094                                   }
2095                           }
2096                           mask |= EV_RE;
2097                           evq->ee_req.er_rcnt = sp->so_rcv.sb_cc;
2098
2099                           if (sp->so_state & SS_CANTRCVMORE) {
2100                                   mask |= EV_FIN;
2101                                   break;
2102                           }
2103                   }
2104                   if ((evq->ee_eventmask & EV_WR) && sowriteable(sp)) {
2105                           if (sp->so_error) {
2106                                   if ((sp->so_type == SOCK_STREAM) && ((sp->so_error == ECONNREFUSED) || (sp->so_error == ECONNRESET))) {
2107                                           if ((sp->so_pcb == 0) || (((struct inpcb *)sp->so_pcb)->inp_state == INPCB_STATE_DEAD) || !(tp = sototcpcb(sp)) ||
2108                                               (tp->t_state == TCPS_CLOSED)) {
2109                                                   mask |= EV_WR|EV_RESET;
2110                                                   break;
2111                                           }
2112                                   }
2113                           }
2114                           mask |= EV_WR;
2115                           evq->ee_req.er_wcnt = sbspace(&sp->so_snd);
2116                   }
2117                   break;
2118
2119                 case EV_RCONN:
2120                   if ((evq->ee_eventmask & EV_RE)) {
2121                           mask |= EV_RE|EV_RCONN;
2122                           evq->ee_req.er_rcnt = sp->so_qlen + 1;  // incl this one
2123                   }
2124                   break;
2125
2126                 case EV_WCONN:
2127                   if ((evq->ee_eventmask & EV_WR)) {
2128                           mask |= EV_WR|EV_WCONN;
2129                   }
2130                   break;
2131
2132                 case EV_RCLOSED:
2133                   if ((evq->ee_eventmask & EV_RE)) {
2134                           mask |= EV_RE|EV_RCLOSED;
2135                   }
2136                   break;
2137
2138                 case EV_WCLOSED:
2139                   if ((evq->ee_eventmask & EV_WR)) {
2140                           mask |= EV_WR|EV_WCLOSED;
2141                   }
2142                   break;
2143
2144                 case EV_FIN:
2145                   if (evq->ee_eventmask & EV_RE) {
2146                           mask |= EV_RE|EV_FIN;
2147                   }
2148                   break;
2149
2150                 case EV_RESET:
2151                 case EV_TIMEOUT:
2152                   if (evq->ee_eventmask & EV_RE) {
2153                           mask |= EV_RE | event;
2154                   }
2155                   if (evq->ee_eventmask & EV_WR) {
2156                           mask |= EV_WR | event;
2157                   }
2158                   break;
2159
2160                 default:
2161                   KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, (int)sp, -1, 0, 0, 0);
2162                   return;
2163                 } /* switch */
2164
2165                 KERNEL_DEBUG(DBG_MISC_POST, (int)evq, evq->ee_eventmask, evq->ee_req.er_eventbits, mask, 0);
2166
2167                 if (mask) {
2168                         /*
2169                          * disarm... postevents are nops until this event is 'read' via
2170                          * waitevent and then re-armed via modwatch
2171                          */
2172                         evq->ee_eventmask = 0;
2173
2174                         /*
2175                          * since events are disarmed until after the waitevent
2176                          * the ee_req.er_xxxx fields can't change once we've
2177                          * inserted this event into the proc queue...
2178                          * since waitevent can't see this event until we
2179                          * enqueue it, waitevent will see a 'consistent'
2180                          * snapshot of the event, even though it won't hold
2181                          * the socket lock, and we're updating the event outside
2182                          * of the proc lock, which it will hold
2183                          */
2184                         evq->ee_req.er_eventbits |= mask;
2185
2186                         evprocenque(evq);
2187                 }
2188         }
2189         KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, (int)sp, 0, 0, 0, 0);
2190 }
2191 #endif /* SOCKETS */
2192
2193
2194 /*
2195  * watchevent system call. user passes us an event to watch
2196  * for. we malloc an event object, initialize it, and queue
2197  * it to the open socket. when the event occurs, postevent()
2198  * will enque it back to our proc where we can retrieve it
2199  * via waitevent().
2200  *
2201  * should this prevent duplicate events on same socket?
2202  *
2203  * Returns:
2204  *              ENOMEM                  No memory for operation
2205  *      copyin:EFAULT
2206  */
2207 int
2208 watchevent(proc_t p, struct watchevent_args *uap, __unused int *retval)
2209 {
2210         struct eventqelt *evq = (struct eventqelt *)0;
2211         struct eventqelt *np = NULL;
2212         struct eventreq64 *erp;
2213         struct fileproc *fp = NULL;
2214         int error;
2215
2216         KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_START, 0,0,0,0,0);
2217
2218         // get a qelt and fill with users req
2219         MALLOC(evq, struct eventqelt *, sizeof(struct eventqelt), M_TEMP, M_WAITOK);
2220
2221         if (evq == NULL)
2222                 return (ENOMEM);
2223         erp = &evq->ee_req;
2224
2225         // get users request pkt
2226
2227         if (IS_64BIT_PROCESS(p)) {
2228                 error = copyin(uap->u_req, (caddr_t)erp, sizeof(struct eventreq64));
2229         } else {
2230                 struct eventreq32 er32;
2231
2232                 error = copyin(uap->u_req, (caddr_t)&er32, sizeof(struct eventreq32));
2233                 if (error == 0) {
2234                        /*
2235                         * the user only passes in the
2236                         * er_type, er_handle and er_data...
2237                         * the other fields are initialized
2238                         * below, so don't bother to copy
2239                         */
2240                         erp->er_type = er32.er_type;
2241                         erp->er_handle = er32.er_handle;
2242                         erp->er_data = (user_addr_t)er32.er_data;
2243                 }
2244         }
2245         if (error) {
2246                 FREE(evq, M_TEMP);
2247                 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0);
2248
2249                 return(error);
2250         }
2251         KERNEL_DEBUG(DBG_MISC_WATCH, erp->er_handle,uap->u_eventmask,(uint32_t)evq,0,0);
2252
2253         // validate, freeing qelt if errors
2254         error = 0;
2255         proc_fdlock(p);
2256
2257         if (erp->er_type != EV_FD) {
2258                 error = EINVAL;
2259         } else if ((error = fp_lookup(p, erp->er_handle, &fp, 1)) != 0) {
2260                 error = EBADF;
2261 #if SOCKETS
2262         } else if (fp->f_type == DTYPE_SOCKET) {
2263                 socket_lock((struct socket *)fp->f_data, 1);
2264                 np = ((struct socket *)fp->f_data)->so_evlist.tqh_first;
2265 #endif /* SOCKETS */
2266         } else if (fp->f_type == DTYPE_PIPE) {
2267                 PIPE_LOCK((struct pipe *)fp->f_data);
2268                 np = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first;
2269         } else {
2270                 fp_drop(p, erp->er_handle, fp, 1);
2271                 error = EINVAL;
2272         }
2273         proc_fdunlock(p);
2274
2275         if (error) {
2276                 FREE(evq, M_TEMP);
2277
2278                 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0);
2279                 return(error);
2280         }
2281
2282         /*
2283          * only allow one watch per file per proc
2284          */
2285         for ( ; np != NULL; np = np->ee_slist.tqe_next) {
2286                 if (np->ee_proc == p) {
2287 #if SOCKETS
2288                         if (fp->f_type == DTYPE_SOCKET)
2289                                 socket_unlock((struct socket *)fp->f_data, 1);
2290                         else
2291 #endif /* SOCKETS */
2292                                 PIPE_UNLOCK((struct pipe *)fp->f_data);
2293                         fp_drop(p, erp->er_handle, fp, 0);
2294                         FREE(evq, M_TEMP);
2295
2296                         KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0);
2297                         return(EINVAL);
2298                 }
2299         }
2300         erp->er_ecnt = erp->er_rcnt = erp->er_wcnt = erp->er_eventbits = 0;
2301         evq->ee_proc = p;
2302         evq->ee_eventmask = uap->u_eventmask & EV_MASK;
2303         evq->ee_flags = 0;
2304
2305 #if SOCKETS
2306         if (fp->f_type == DTYPE_SOCKET) {
2307                 TAILQ_INSERT_TAIL(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist);
2308                 postevent((struct socket *)fp->f_data, 0, EV_RWBYTES); // catch existing events
2309
2310                 socket_unlock((struct socket *)fp->f_data, 1);
2311         } else
2312 #endif /* SOCKETS */
2313         {
2314                 TAILQ_INSERT_TAIL(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist);
2315                 postpipeevent((struct pipe *)fp->f_data, EV_RWBYTES);
2316
2317                 PIPE_UNLOCK((struct pipe *)fp->f_data);
2318         }
2319         fp_drop_event(p, erp->er_handle, fp);
2320
2321         KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, 0,0,0,0,0);
2322         return(0);
2323 }
2324
2325
2326
2327 /*
2328  * waitevent system call.
2329  * grabs the next waiting event for this proc and returns
2330  * it. if no events, user can request to sleep with timeout
2331  * or without or poll mode
2332  *    ((tv != NULL && interval == 0) || tv == -1)
2333  */
2334 int
2335 waitevent(proc_t p, struct waitevent_args *uap, int *retval)
2336 {
2337         int error = 0;
2338         struct eventqelt *evq;
2339         struct eventreq64 *erp;
2340         uint64_t abstime, interval;
2341         boolean_t fast_poll = FALSE;
2342         union {
2343                 struct eventreq64 er64;
2344                 struct eventreq32 er32;
2345         } uer;
2346
2347         interval = 0;
2348
2349         if (uap->tv) {
2350                 struct timeval atv;
2351                 /*
2352                  * check for fast poll method
2353                  */
2354                 if (IS_64BIT_PROCESS(p)) {
2355                         if (uap->tv == (user_addr_t)-1)
2356                                 fast_poll = TRUE;
2357                 } else if (uap->tv == (user_addr_t)((uint32_t)-1))
2358                         fast_poll = TRUE;
2359
2360                 if (fast_poll == TRUE) {
2361                         if (p->p_evlist.tqh_first == NULL) {
2362                                 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_NONE, -1,0,0,0,0);
2363                                 /*
2364                                  * poll failed
2365                                  */
2366                                 *retval = 1;
2367                                 return (0);
2368                         }
2369                         proc_lock(p);
2370                         goto retry;
2371                 }
2372                 if (IS_64BIT_PROCESS(p)) {
2373                         struct user64_timeval atv64;
2374                         error = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64));
2375                         /* Loses resolution - assume timeout < 68 years */
2376                         atv.tv_sec = atv64.tv_sec;
2377                         atv.tv_usec = atv64.tv_usec;
2378                 } else {
2379                         struct user32_timeval atv32;
2380                         error = copyin(uap->tv, (caddr_t)&atv32, sizeof(atv32));
2381                         atv.tv_sec = atv32.tv_sec;
2382                         atv.tv_usec = atv32.tv_usec;
2383                 }
2384
2385                 if (error)
2386                         return(error);
2387                 if (itimerfix(&atv)) {
2388                         error = EINVAL;
2389                         return(error);
2390                 }
2391                 interval = tvtoabstime(&atv);
2392         }
2393         KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_START, 0,0,0,0,0);
2394
2395         proc_lock(p);
2396 retry:
2397         if ((evq = p->p_evlist.tqh_first) != NULL) {
2398                 /*
2399                  * found one... make a local copy while it's still on the queue
2400                  * to prevent it from changing while in the midst of copying
2401                  * don't want to hold the proc lock across a copyout because
2402                  * it might block on a page fault at the target in user space
2403                  */
2404                 erp = &evq->ee_req;
2405
2406                 if (IS_64BIT_PROCESS(p))
2407                         bcopy((caddr_t)erp, (caddr_t)&uer.er64, sizeof (struct eventreq64));
2408                 else {
2409                         uer.er32.er_type  = erp->er_type;
2410                         uer.er32.er_handle  = erp->er_handle;
2411                         uer.er32.er_data  = (uint32_t)erp->er_data;
2412                         uer.er32.er_ecnt  = erp->er_ecnt;
2413                         uer.er32.er_rcnt  = erp->er_rcnt;
2414                         uer.er32.er_wcnt  = erp->er_wcnt;
2415                         uer.er32.er_eventbits = erp->er_eventbits;
2416                 }
2417                 TAILQ_REMOVE(&p->p_evlist, evq, ee_plist);
2418
2419                 evq->ee_flags &= ~EV_QUEUED;
2420
2421                 proc_unlock(p);
2422
2423                 if (IS_64BIT_PROCESS(p))
2424                         error = copyout((caddr_t)&uer.er64, uap->u_req, sizeof(struct eventreq64));
2425                 else
2426                         error = copyout((caddr_t)&uer.er32, uap->u_req, sizeof(struct eventreq32));
2427
2428                 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,
2429                              evq->ee_req.er_handle,evq->ee_req.er_eventbits,(uint32_t)evq,0);
2430                 return (error);
2431         }
2432         else {
2433                 if (uap->tv && interval == 0) {
2434                         proc_unlock(p);
2435                         *retval = 1;  // poll failed
2436
2437                         KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,0,0,0,0);
2438                         return (error);
2439                 }
2440                 if (interval != 0)
2441                         clock_absolutetime_interval_to_deadline(interval, &abstime);
2442                 else
2443                         abstime = 0;
2444
2445                 KERNEL_DEBUG(DBG_MISC_WAIT, 1,(uint32_t)&p->p_evlist,0,0,0);
2446
2447                 error = msleep1(&p->p_evlist, &p->p_mlock, (PSOCK | PCATCH), "waitevent", abstime);
2448
2449                 KERNEL_DEBUG(DBG_MISC_WAIT, 2,(uint32_t)&p->p_evlist,0,0,0);
2450
2451                 if (error == 0)
2452                         goto retry;
2453                 if (error == ERESTART)
2454                         error = EINTR;
2455                 if (error == EWOULDBLOCK) {
2456                         *retval = 1;
2457                         error = 0;
2458                 }
2459         }
2460         proc_unlock(p);
2461
2462         KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, 0,0,0,0,0);
2463         return (error);
2464 }
2465
2466
2467 /*
2468  * modwatch system call. user passes in event to modify.
2469  * if we find it we reset the event bits and que/deque event
2470  * it needed.
2471  */
2472 int
2473 modwatch(proc_t p, struct modwatch_args *uap, __unused int *retval)
2474 {
2475         struct eventreq64 er;
2476         struct eventreq64 *erp = &er;
2477         struct eventqelt *evq = NULL;   /* protected by error return */
2478         int error;
2479         struct fileproc *fp;
2480         int flag;
2481
2482         KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_START, 0,0,0,0,0);
2483
2484         /*
2485          * get user's request pkt
2486          * just need the er_type and er_handle which sit above the
2487          * problematic er_data (32/64 issue)... so only copy in
2488          * those 2 fields
2489          */
2490         if ((error = copyin(uap->u_req, (caddr_t)erp, sizeof(er.er_type) + sizeof(er.er_handle)))) {
2491                 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, error,0,0,0,0);
2492                 return(error);
2493         }
2494         proc_fdlock(p);
2495
2496         if (erp->er_type != EV_FD) {
2497                 error = EINVAL;
2498         } else if ((error = fp_lookup(p, erp->er_handle, &fp, 1)) != 0) {
2499                 error = EBADF;
2500 #if SOCKETS
2501         } else if (fp->f_type == DTYPE_SOCKET) {
2502                 socket_lock((struct socket *)fp->f_data, 1);
2503                 evq = ((struct socket *)fp->f_data)->so_evlist.tqh_first;
2504 #endif /* SOCKETS */
2505         } else if (fp->f_type == DTYPE_PIPE) {
2506                 PIPE_LOCK((struct pipe *)fp->f_data);
2507                 evq = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first;
2508         } else {
2509                 fp_drop(p, erp->er_handle, fp, 1);
2510                 error = EINVAL;
2511         }
2512
2513         if (error) {
2514                 proc_fdunlock(p);
2515                 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, error,0,0,0,0);
2516                 return(error);
2517         }
2518
2519         if ((uap->u_eventmask == EV_RM) && (fp->f_flags & FP_WAITEVENT)) {
2520                 fp->f_flags &= ~FP_WAITEVENT;
2521         }
2522         proc_fdunlock(p);
2523
2524         // locate event if possible
2525         for ( ; evq != NULL; evq = evq->ee_slist.tqe_next) {
2526                 if (evq->ee_proc == p)
2527                         break;
2528         }
2529         if (evq == NULL) {
2530 #if SOCKETS
2531                 if (fp->f_type == DTYPE_SOCKET)
2532                         socket_unlock((struct socket *)fp->f_data, 1);
2533                 else
2534 #endif /* SOCKETS */
2535                         PIPE_UNLOCK((struct pipe *)fp->f_data);
2536                 fp_drop(p, erp->er_handle, fp, 0);
2537                 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, EINVAL,0,0,0,0);
2538                 return(EINVAL);
2539         }
2540         KERNEL_DEBUG(DBG_MISC_MOD, erp->er_handle,uap->u_eventmask,(uint32_t)evq,0,0);
2541
2542         if (uap->u_eventmask == EV_RM) {
2543                 EVPROCDEQUE(p, evq);
2544
2545 #if SOCKETS
2546                 if (fp->f_type == DTYPE_SOCKET) {
2547                         TAILQ_REMOVE(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist);
2548                         socket_unlock((struct socket *)fp->f_data, 1);
2549                 } else
2550 #endif /* SOCKETS */
2551                 {
2552                         TAILQ_REMOVE(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist);
2553                         PIPE_UNLOCK((struct pipe *)fp->f_data);
2554                 }
2555                 fp_drop(p, erp->er_handle, fp, 0);
2556                 FREE(evq, M_TEMP);
2557                 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, 0,0,0,0,0);
2558                 return(0);
2559         }
2560         switch (uap->u_eventmask & EV_MASK) {
2561
2562         case 0:
2563                 flag = 0;
2564                 break;
2565
2566         case EV_RE:
2567         case EV_WR:
2568         case EV_RE|EV_WR:
2569                 flag = EV_RWBYTES;
2570                 break;
2571
2572         case EV_EX:
2573                 flag = EV_OOB;
2574                 break;
2575
2576         case EV_EX|EV_RE:
2577         case EV_EX|EV_WR:
2578         case EV_EX|EV_RE|EV_WR:
2579                 flag = EV_OOB|EV_RWBYTES;
2580                 break;
2581
2582         default:
2583 #if SOCKETS
2584                 if (fp->f_type == DTYPE_SOCKET)
2585                         socket_unlock((struct socket *)fp->f_data, 1);
2586                 else
2587 #endif /* SOCKETS */
2588                         PIPE_UNLOCK((struct pipe *)fp->f_data);
2589                 fp_drop(p, erp->er_handle, fp, 0);
2590                 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0);
2591                 return(EINVAL);
2592         }
2593         /*
2594          * since we're holding the socket/pipe lock, the event
2595          * cannot go from the unqueued state to the queued state
2596          * however, it can go from the queued state to the unqueued state
2597          * since that direction is protected by the proc_lock...
2598          * so do a quick check for EV_QUEUED w/o holding the proc lock
2599          * since by far the common case will be NOT EV_QUEUED, this saves
2600          * us taking the proc_lock the majority of the time
2601          */
2602         if (evq->ee_flags & EV_QUEUED) {
2603                 /*
2604                  * EVPROCDEQUE will recheck the state after it grabs the proc_lock
2605                  */
2606                 EVPROCDEQUE(p, evq);
2607         }
2608         /*
2609          * while the event is off the proc queue and
2610          * we're holding the socket/pipe lock
2611          * it's safe to update these fields...
2612          */
2613         evq->ee_req.er_eventbits = 0;
2614         evq->ee_eventmask = uap->u_eventmask & EV_MASK;
2615
2616 #if SOCKETS
2617         if (fp->f_type == DTYPE_SOCKET) {
2618                 postevent((struct socket *)fp->f_data, 0, flag);
2619                 socket_unlock((struct socket *)fp->f_data, 1);
2620         } else
2621 #endif /* SOCKETS */
2622         {
2623                 postpipeevent((struct pipe *)fp->f_data, flag);
2624                 PIPE_UNLOCK((struct pipe *)fp->f_data);
2625         }
2626         fp_drop(p, erp->er_handle, fp, 0);
2627         KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, evq->ee_req.er_handle,evq->ee_eventmask,(uint32_t)fp->f_data,flag,0);
2628         return(0);
2629 }
2630
2631 /* this routine is called from the close of fd with proc_fdlock held */
2632 int
2633 waitevent_close(struct proc *p, struct fileproc *fp)
2634 {
2635         struct eventqelt *evq;
2636
2637
2638         fp->f_flags &= ~FP_WAITEVENT;
2639
2640 #if SOCKETS
2641         if (fp->f_type == DTYPE_SOCKET) {
2642                 socket_lock((struct socket *)fp->f_data, 1);
2643                 evq = ((struct socket *)fp->f_data)->so_evlist.tqh_first;
2644         } else
2645 #endif /* SOCKETS */
2646         if (fp->f_type == DTYPE_PIPE) {
2647                 PIPE_LOCK((struct pipe *)fp->f_data);
2648                 evq = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first;
2649         }
2650         else {
2651                 return(EINVAL);
2652         }
2653         proc_fdunlock(p);
2654
2655
2656         // locate event if possible
2657         for ( ; evq != NULL; evq = evq->ee_slist.tqe_next) {
2658                 if (evq->ee_proc == p)
2659                         break;
2660         }
2661         if (evq == NULL) {
2662 #if SOCKETS
2663                 if (fp->f_type == DTYPE_SOCKET)
2664                         socket_unlock((struct socket *)fp->f_data, 1);
2665                 else
2666 #endif /* SOCKETS */
2667                         PIPE_UNLOCK((struct pipe *)fp->f_data);
2668
2669                 proc_fdlock(p);
2670
2671                 return(EINVAL);
2672         }
2673         EVPROCDEQUE(p, evq);
2674
2675 #if SOCKETS
2676         if (fp->f_type == DTYPE_SOCKET) {
2677                 TAILQ_REMOVE(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist);
2678                 socket_unlock((struct socket *)fp->f_data, 1);
2679         } else
2680 #endif /* SOCKETS */
2681         {
2682                 TAILQ_REMOVE(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist);
2683                 PIPE_UNLOCK((struct pipe *)fp->f_data);
2684         }
2685         FREE(evq, M_TEMP);
2686
2687         proc_fdlock(p);
2688
2689         return(0);
2690 }
2691
2692
2693 /*
2694  * gethostuuid
2695  *
2696  * Description: Get the host UUID from IOKit and return it to user space.
2697  *
2698  * Parameters:  uuid_buf                Pointer to buffer to receive UUID
2699  *              timeout                 Timespec for timout
2700  *
2701  * Returns:     0                       Success
2702  *              EWOULDBLOCK             Timeout is too short
2703  *              copyout:EFAULT          Bad user buffer
2704  *
2705  * Notes:       A timeout seems redundant, since if it's tolerable to not
2706  *              have a system UUID in hand, then why ask for one?
2707  */
2708 int
2709 gethostuuid(struct proc *p, struct gethostuuid_args *uap, __unused int32_t *retval)
2710 {
2711         kern_return_t kret;
2712         int error;
2713         mach_timespec_t mach_ts;        /* for IOKit call */
2714         __darwin_uuid_t uuid_kern;      /* for IOKit call */
2715
2716         /* Convert the 32/64 bit timespec into a mach_timespec_t */
2717         if ( proc_is64bit(p) ) {
2718                 struct user64_timespec ts;
2719                 error = copyin(uap->timeoutp, &ts, sizeof(ts));
2720                 if (error)
2721                         return (error);
2722                 mach_ts.tv_sec = ts.tv_sec;
2723                 mach_ts.tv_nsec = ts.tv_nsec;
2724         } else {
2725                 struct user32_timespec ts;
2726                 error = copyin(uap->timeoutp, &ts, sizeof(ts) );
2727                 if (error)
2728                         return (error);
2729                 mach_ts.tv_sec = ts.tv_sec;
2730                 mach_ts.tv_nsec = ts.tv_nsec;
2731         }
2732
2733         /* Call IOKit with the stack buffer to get the UUID */
2734         kret = IOBSDGetPlatformUUID(uuid_kern, mach_ts);
2735
2736         /*
2737          * If we get it, copy out the data to the user buffer; note that a
2738          * uuid_t is an array of characters, so this is size invariant for
2739          * 32 vs. 64 bit.
2740          */
2741         if (kret == KERN_SUCCESS) {
2742                 error = copyout(uuid_kern, uap->uuid_buf, sizeof(uuid_kern));
2743         } else {
2744                 error = EWOULDBLOCK;
2745         }
2746
2747         return (error);
2748 }