bsd/kern/uipc_syscalls.c

   1 /*
   2  * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /*
  23  * Copyright (c) 1982, 1986, 1989, 1990, 1993
  24  *      The Regents of the University of California.  All rights reserved.
  25  *
  26  * sendfile(2) and related extensions:
  27  * Copyright (c) 1998, David Greenman. All rights reserved.
  28  *
  29  * Redistribution and use in source and binary forms, with or without
  30  * modification, are permitted provided that the following conditions
  31  * are met:
  32  * 1. Redistributions of source code must retain the above copyright
  33  *    notice, this list of conditions and the following disclaimer.
  34  * 2. Redistributions in binary form must reproduce the above copyright
  35  *    notice, this list of conditions and the following disclaimer in the
  36  *    documentation and/or other materials provided with the distribution.
  37  * 3. All advertising materials mentioning features or use of this software
  38  *    must display the following acknowledgement:
  39  *      This product includes software developed by the University of
  40  *      California, Berkeley and its contributors.
  41  * 4. Neither the name of the University nor the names of its contributors
  42  *    may be used to endorse or promote products derived from this software
  43  *    without specific prior written permission.
  44  *
  45  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  46  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  48  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  49  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  50  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  51  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  52  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  53  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  54  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  55  * SUCH DAMAGE.
  56  *
  57  *      @(#)uipc_syscalls.c     8.4 (Berkeley) 2/21/94
  58  */
  59
  60
  61
  62 #include <sys/param.h>
  63 #include <sys/systm.h>
  64 #include <sys/filedesc.h>
  65 #include <sys/proc_internal.h>
  66 #include <sys/file_internal.h>
  67 #include <sys/malloc.h>
  68 #include <sys/mbuf.h>
  69 #include <kern/lock.h>
  70 #include <sys/domain.h>
  71 #include <sys/protosw.h>
  72 #include <sys/signalvar.h>
  73 #include <sys/socket.h>
  74 #include <sys/socketvar.h>
  75 #if KTRACE
  76 #include <sys/ktrace.h>
  77 #endif
  78 #include <sys/kernel.h>
  79 #include <sys/uio_internal.h>
  80
  81 #include <bsm/audit_kernel.h>
  82
  83 #include <sys/kdebug.h>
  84 #include <sys/sysproto.h>
  85
  86 #define f_flag f_fglob->fg_flag
  87 #define f_type f_fglob->fg_type
  88 #define f_msgcount f_fglob->fg_msgcount
  89 #define f_cred f_fglob->fg_cred
  90 #define f_ops f_fglob->fg_ops
  91 #define f_offset f_fglob->fg_offset
  92 #define f_data f_fglob->fg_data
  93 #if KDEBUG
  94
  95 #define DBG_LAYER_IN_BEG        NETDBG_CODE(DBG_NETSOCK, 0)
  96 #define DBG_LAYER_IN_END        NETDBG_CODE(DBG_NETSOCK, 2)
  97 #define DBG_LAYER_OUT_BEG       NETDBG_CODE(DBG_NETSOCK, 1)
  98 #define DBG_LAYER_OUT_END       NETDBG_CODE(DBG_NETSOCK, 3)
  99 #define DBG_FNC_SENDMSG         NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
 100 #define DBG_FNC_SENDTO          NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
 101 #define DBG_FNC_SENDIT          NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
 102 #define DBG_FNC_RECVFROM        NETDBG_CODE(DBG_NETSOCK, (5 << 8))
 103 #define DBG_FNC_RECVMSG         NETDBG_CODE(DBG_NETSOCK, (6 << 8))
 104 #define DBG_FNC_RECVIT          NETDBG_CODE(DBG_NETSOCK, (7 << 8))
 105
 106 #endif
 107
 108
 109 #define HACK_FOR_4056224 1
 110 #if HACK_FOR_4056224
 111 static pid_t last_pid_4056224 = 0;
 112 #endif /* HACK_FOR_4056224 */
 113
 114
 115 #if SENDFILE
 116 static void sf_buf_init(void *arg);
 117 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
 118 static struct sf_buf *sf_buf_alloc(void);
 119 static void sf_buf_ref(caddr_t addr, u_int size);
 120 static void sf_buf_free(caddr_t addr, u_int size);
 121
 122 static SLIST_HEAD(, sf_buf) sf_freelist;
 123 static vm_offset_t sf_base;
 124 static struct sf_buf *sf_bufs;
 125 static int sf_buf_alloc_want;
 126 #endif
 127
 128 static int sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
 129                                         int flags, register_t *retval);
 130 static int recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
 131                                         user_addr_t namelenp, register_t *retval);
 132
 133 static int accept1(struct proc *p, struct accept_args *uap, register_t *retval, int compat);
 134 static int getsockname1(struct proc *p, struct getsockname_args *uap,
 135                              register_t *retval, int compat);
 136 static int getpeername1(struct proc *p, struct getpeername_args *uap,
 137                              register_t *retval, int compat);
 138
 139
 140 #if COMPAT_43_SOCKET
 141 struct orecvmsg_args  {
 142         int     s;
 143         struct  omsghdr *msg;
 144         int     flags;
 145 };
 146 struct osendmsg_args {
 147         int s;
 148         caddr_t msg;
 149         int flags;
 150 };
 151 struct osend_args {
 152         int s;
 153         caddr_t buf;
 154         int len;
 155         int flags;
 156 };
 157 struct  orecv_args {
 158         int     s;
 159         caddr_t buf;
 160         int     len;
 161         int     flags;
 162 };
 163
 164 int oaccept(struct proc *p, struct accept_args *uap, register_t *retval);
 165 int ogetpeername(struct proc *p, struct getpeername_args *uap, register_t *retval);
 166 int ogetsockname(struct proc *p, struct getsockname_args *uap, register_t *retval);
 167 int orecv(struct proc *p, struct orecv_args     *uap, register_t *retval);
 168 int orecvfrom(struct proc *p, struct recvfrom_args *uap, register_t *retval);
 169 int orecvmsg(struct proc *p, struct orecvmsg_args *uap, register_t *retval);
 170 int     osend(struct proc *p, struct osend_args *uap, register_t *retval);
 171 int osendmsg(struct proc *p, struct osendmsg_args *uap, register_t *retval);
 172 #endif // COMPAT_43_SOCKET
 173
 174 /*
 175  * System call interface to the socket abstraction.
 176  */
 177
 178 extern  struct fileops socketops;
 179
 180 int
 181 socket(p, uap, retval)
 182         struct proc *p;
 183         register struct socket_args *uap;
 184         register_t *retval;
 185 {
 186         struct socket *so;
 187         struct fileproc *fp;
 188         int fd, error;
 189
 190         AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
 191
 192         error = falloc(p, &fp, &fd);
 193         if (error) {
 194                 return (error);
 195         }
 196         fp->f_flag = FREAD|FWRITE;
 197         fp->f_type = DTYPE_SOCKET;
 198         fp->f_ops = &socketops;
 199
 200         error = socreate(uap->domain, &so, uap->type, uap->protocol);
 201         if (error) {
 202                 fp_free(p, fd, fp);
 203         } else {
 204                 fp->f_data = (caddr_t)so;
 205
 206                 proc_fdlock(p);
 207                 *fdflags(p, fd) &= ~UF_RESERVED;
 208
 209                 fp_drop(p, fd, fp, 1);
 210                 proc_fdunlock(p);
 211
 212                 *retval = fd;
 213         }
 214         return (error);
 215 }
 216
 217 /* ARGSUSED */
 218 int
 219 bind(struct proc *p, struct bind_args *uap, __unused register_t *retval)
 220 {
 221         struct sockaddr *sa;
 222         struct socket *so;
 223         int error;
 224
 225         AUDIT_ARG(fd, uap->s);
 226         error = file_socket(uap->s, &so);
 227         if (error)
 228                 return (error);
 229         error = getsockaddr(&sa, uap->name, uap->namelen);
 230         if (error)
 231                 goto out;
 232         AUDIT_ARG(sockaddr, p, sa);
 233         if (so != NULL)
 234                 error = sobind(so, sa);
 235         else
 236                 error = EBADF;
 237         FREE(sa, M_SONAME);
 238 out:
 239         file_drop(uap->s);
 240         return (error);
 241 }
 242
 243
 244 int
 245 listen(__unused struct proc *p, register struct listen_args *uap,
 246                 __unused register_t *retval)
 247 {
 248         int error;
 249         struct socket * so;
 250
 251         AUDIT_ARG(fd, uap->s);
 252         error = file_socket(uap->s, &so);
 253         if (error)
 254                 return (error);
 255         if (so != NULL)
 256                 error =  solisten(so, uap->backlog);
 257         else
 258                 error = EBADF;
 259         file_drop(uap->s);
 260         return (error);
 261 }
 262
 263 #if !COMPAT_43_SOCKET
 264 #define accept1 accept
 265 #endif
 266
 267
 268
 269 int
 270 accept1(struct proc *p, struct accept_args *uap, register_t *retval, int compat)
 271 {
 272         struct fileproc *fp;
 273         struct sockaddr *sa;
 274         socklen_t namelen;
 275         int error;
 276         struct socket *head, *so = NULL;
 277         lck_mtx_t *mutex_held;
 278         int fd = uap->s;
 279         int newfd;;
 280         short fflag;            /* type must match fp->f_flag */
 281         int dosocklock = 0;
 282
 283         AUDIT_ARG(fd, uap->s);
 284         if (uap->name) {
 285                 error = copyin(uap->anamelen, (caddr_t)&namelen,
 286                         sizeof(socklen_t));
 287                 if(error)
 288                         return (error);
 289         }
 290         error = fp_getfsock(p, fd, &fp, &head);
 291         if (error) {
 292                 if (error == EOPNOTSUPP)
 293                         error = ENOTSOCK;
 294                 return (error);
 295         }
 296         if (head == NULL) {
 297                 error = EBADF;
 298                 goto out;
 299         }
 300
 301         socket_lock(head, 1);
 302
 303         if (head->so_proto->pr_getlock != NULL)  {
 304                 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
 305                 dosocklock = 1;
 306         }
 307         else {
 308                 mutex_held = head->so_proto->pr_domain->dom_mtx;
 309                 dosocklock = 0;
 310         }
 311
 312
 313         if ((head->so_options & SO_ACCEPTCONN) == 0) {
 314                 socket_unlock(head, 1);
 315                 error = EINVAL;
 316                 goto out;
 317         }
 318         if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
 319                 socket_unlock(head, 1);
 320                 error = EWOULDBLOCK;
 321                 goto out;
 322         }
 323         while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
 324                 if (head->so_state & SS_CANTRCVMORE) {
 325                         head->so_error = ECONNABORTED;
 326                         break;
 327                 }
 328                 if (head->so_usecount < 1)
 329                         panic("accept1: head=%x refcount=%d\n", head, head->so_usecount);
 330                 error = msleep((caddr_t)&head->so_timeo, mutex_held, PSOCK | PCATCH,
 331                     "accept", 0);
 332                 if (head->so_usecount < 1)
 333                         panic("accept1: 2 head=%x refcount=%d\n", head, head->so_usecount);
 334                 if ((head->so_state & SS_DRAINING)) {
 335                         error = ECONNABORTED;
 336                 }
 337                 if (error) {
 338                         socket_unlock(head, 1);
 339                         goto out;
 340                 }
 341         }
 342         if (head->so_error) {
 343                 error = head->so_error;
 344                 head->so_error = 0;
 345                 socket_unlock(head, 1);
 346                 goto out;
 347         }
 348
 349
 350         /*
 351          * At this point we know that there is at least one connection
 352          * ready to be accepted. Remove it from the queue prior to
 353          * allocating the file descriptor for it since falloc() may
 354          * block allowing another process to accept the connection
 355          * instead.
 356          */
 357         lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
 358         so = TAILQ_FIRST(&head->so_comp);
 359         TAILQ_REMOVE(&head->so_comp, so, so_list);
 360         head->so_qlen--;
 361         socket_unlock(head, 0); /* unlock head to avoid deadlock with select, keep a ref on head */
 362         fflag = fp->f_flag;
 363         proc_fdlock(p);
 364         error = falloc_locked(p, &fp, &newfd, 1);
 365         if (error) {
 366                 /*
 367                  * Probably ran out of file descriptors. Put the
 368                  * unaccepted connection back onto the queue and
 369                  * do another wakeup so some other process might
 370                  * have a chance at it.
 371                  */
 372                 proc_fdunlock(p);
 373                 socket_lock(head, 0);
 374                 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
 375                 head->so_qlen++;
 376                 wakeup_one((caddr_t)&head->so_timeo);
 377                 socket_unlock(head, 1);
 378                 goto out;
 379         }
 380         *fdflags(p, newfd) &= ~UF_RESERVED;
 381         *retval = newfd;
 382         fp->f_type = DTYPE_SOCKET;
 383         fp->f_flag = fflag;
 384         fp->f_ops = &socketops;
 385         fp->f_data = (caddr_t)so;
 386         fp_drop(p, newfd, fp, 1);
 387         proc_fdunlock(p);
 388         socket_lock(head, 0);
 389         if (dosocklock)
 390                 socket_lock(so, 1);
 391         so->so_state &= ~SS_COMP;
 392         so->so_head = NULL;
 393         sa = 0;
 394         (void) soacceptlock(so, &sa, 0);
 395         socket_unlock(head, 1);
 396         if (sa == 0) {
 397                 namelen = 0;
 398                 if (uap->name)
 399                         goto gotnoname;
 400                 if (dosocklock)
 401                         socket_unlock(so, 1);
 402                 error = 0;
 403                 goto out;
 404         }
 405         AUDIT_ARG(sockaddr, p, sa);
 406         if (uap->name) {
 407                 /* check sa_len before it is destroyed */
 408                 if (namelen > sa->sa_len)
 409                         namelen = sa->sa_len;
 410 #if COMPAT_43_SOCKET
 411                 if (compat)
 412                         ((struct osockaddr *)sa)->sa_family =
 413                             sa->sa_family;
 414 #endif
 415                 error = copyout(sa, uap->name, namelen);
 416                 if (!error)
 417 gotnoname:
 418                         error = copyout((caddr_t)&namelen, uap->anamelen,
 419                                                 sizeof(socklen_t));
 420         }
 421         FREE(sa, M_SONAME);
 422         if (dosocklock)
 423                 socket_unlock(so, 1);
 424 out:
 425         file_drop(fd);
 426         return (error);
 427 }
 428
 429 int
 430 accept(struct proc *p, struct accept_args *uap, register_t *retval)
 431 {
 432
 433         return (accept1(p, uap, retval, 0));
 434 }
 435
 436 #if COMPAT_43_SOCKET
 437 int
 438 oaccept(struct proc *p, struct accept_args *uap, register_t *retval)
 439 {
 440
 441         return (accept1(p, uap, retval, 1));
 442 }
 443 #endif /* COMPAT_43_SOCKET */
 444
 445 /* ARGSUSED */
 446 int
 447 connect(struct proc *p, struct connect_args *uap, __unused register_t *retval)
 448 {
 449         struct socket *so;
 450         struct sockaddr *sa;
 451         lck_mtx_t *mutex_held;
 452         int error;
 453         int fd = uap->s;
 454
 455         AUDIT_ARG(fd, uap->s);
 456         error = file_socket( fd, &so);
 457         if (error)
 458                 return (error);
 459         if (so == NULL) {
 460                 error = EBADF;
 461                 goto out;
 462         }
 463
 464         socket_lock(so, 1);
 465
 466         if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
 467                 socket_unlock(so, 1);
 468                 error = EALREADY;
 469                 goto out;
 470         }
 471         error = getsockaddr(&sa, uap->name, uap->namelen);
 472         if (error)  {
 473                 socket_unlock(so, 1);
 474                 goto out;
 475         }
 476         AUDIT_ARG(sockaddr, p, sa);
 477         error = soconnectlock(so, sa, 0);
 478         if (error)
 479                 goto bad;
 480         if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
 481                 FREE(sa, M_SONAME);
 482                 socket_unlock(so, 1);
 483                 error = EINPROGRESS;
 484                 goto out;
 485         }
 486         while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
 487                 if (so->so_proto->pr_getlock != NULL)
 488                         mutex_held = (*so->so_proto->pr_getlock)(so, 0);
 489                 else
 490                         mutex_held = so->so_proto->pr_domain->dom_mtx;
 491                 error = msleep((caddr_t)&so->so_timeo, mutex_held, PSOCK | PCATCH,
 492                     "connec", 0);
 493                 if ((so->so_state & SS_DRAINING)) {
 494                         error = ECONNABORTED;
 495                 }
 496                 if (error)
 497                         break;
 498         }
 499         if (error == 0) {
 500                 error = so->so_error;
 501                 so->so_error = 0;
 502         }
 503 bad:
 504         so->so_state &= ~SS_ISCONNECTING;
 505         socket_unlock(so, 1);
 506         FREE(sa, M_SONAME);
 507         if (error == ERESTART)
 508                 error = EINTR;
 509 out:
 510         file_drop(fd);
 511         return (error);
 512 }
 513
 514 int
 515 socketpair(struct proc *p, struct socketpair_args *uap, __unused register_t *retval)
 516 {
 517         struct fileproc *fp1, *fp2;
 518         struct socket *so1, *so2;
 519         int fd, error, sv[2];
 520
 521         AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
 522         error = socreate(uap->domain, &so1, uap->type, uap->protocol);
 523         if (error)
 524                 return (error);
 525         error = socreate(uap->domain, &so2, uap->type, uap->protocol);
 526         if (error)
 527                 goto free1;
 528
 529         error = falloc(p, &fp1, &fd);
 530         if (error) {
 531                 goto free2;
 532         }
 533         fp1->f_flag = FREAD|FWRITE;
 534         fp1->f_type = DTYPE_SOCKET;
 535         fp1->f_ops = &socketops;
 536         fp1->f_data = (caddr_t)so1;
 537         sv[0] = fd;
 538
 539         error = falloc(p, &fp2, &fd);
 540         if (error) {
 541                 goto free3;
 542         }
 543         fp2->f_flag = FREAD|FWRITE;
 544         fp2->f_type = DTYPE_SOCKET;
 545         fp2->f_ops = &socketops;
 546         fp2->f_data = (caddr_t)so2;
 547         sv[1] = fd;
 548
 549         error = soconnect2(so1, so2);
 550         if (error) {
 551                 goto free4;
 552         }
 553         if (uap->type == SOCK_DGRAM) {
 554                 /*
 555                  * Datagram socket connection is asymmetric.
 556                  */
 557                  error = soconnect2(so2, so1);
 558                  if (error) {
 559                          goto free4;
 560                  }
 561         }
 562
 563         proc_fdlock(p);
 564         *fdflags(p, sv[0]) &= ~UF_RESERVED;
 565         *fdflags(p, sv[1]) &= ~UF_RESERVED;
 566         fp_drop(p, sv[0], fp1, 1);
 567         fp_drop(p, sv[1], fp2, 1);
 568         proc_fdunlock(p);
 569
 570         error = copyout((caddr_t)sv, uap->rsv, 2 * sizeof(int));
 571 #if 0   /* old pipe(2) syscall compatability, unused these days */
 572         retval[0] = sv[0];              /* XXX ??? */
 573         retval[1] = sv[1];              /* XXX ??? */
 574 #endif /* 0 */
 575         return (error);
 576 free4:
 577         fp_free(p, sv[1], fp2);
 578 free3:
 579         fp_free(p, sv[0], fp1);
 580 free2:
 581         (void)soclose(so2);
 582 free1:
 583         (void)soclose(so1);
 584         return (error);
 585 }
 586
 587 static int
 588 sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
 589                 int flags, register_t *retval)
 590 {
 591         struct mbuf *control;
 592         struct sockaddr *to;
 593         int error;
 594         struct socket *so;
 595         user_ssize_t len;
 596 #if KTRACE
 597         uio_t ktruio = NULL;
 598 #endif
 599
 600         KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0,0,0,0,0);
 601
 602         error = file_socket(s, &so);
 603         if (error )
 604         {
 605             KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error,0,0,0,0);
 606             return (error);
 607         }
 608
 609         if (mp->msg_name) {
 610                 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
 611                 if (error) {
 612                     KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error,0,0,0,0);
 613                         goto out;
 614                 }
 615                 AUDIT_ARG(sockaddr, p, to);
 616         } else {
 617                 to = 0;
 618         }
 619         if (mp->msg_control) {
 620                 if (mp->msg_controllen < ((socklen_t)sizeof(struct cmsghdr))
 621 #if COMPAT_43_SOCKET
 622                     && !(mp->msg_flags & MSG_COMPAT)
 623 #endif
 624                 ) {
 625                         error = EINVAL;
 626                         goto bad;
 627                 }
 628                 error = sockargs(&control, mp->msg_control,
 629                     mp->msg_controllen, MT_CONTROL);
 630                 if (error)
 631                         goto bad;
 632 #if COMPAT_43_SOCKET
 633                 if (mp->msg_flags & MSG_COMPAT) {
 634                         register struct cmsghdr *cm;
 635
 636                         M_PREPEND(control, sizeof(*cm), M_WAIT);
 637                         if (control == 0) {
 638                                 error = ENOBUFS;
 639                                 goto bad;
 640                         } else {
 641                                 cm = mtod(control, struct cmsghdr *);
 642                                 cm->cmsg_len = control->m_len;
 643                                 cm->cmsg_level = SOL_SOCKET;
 644                                 cm->cmsg_type = SCM_RIGHTS;
 645                         }
 646                 }
 647 #endif
 648         } else {
 649                 control = 0;
 650         }
 651
 652 #if KTRACE
 653         if (KTRPOINT(p, KTR_GENIO)) {
 654                 ktruio = uio_duplicate(uiop);
 655         }
 656 #endif
 657
 658         len = uio_resid(uiop);
 659         if (so == NULL)
 660                 error = EBADF;
 661         else
 662                 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0, control,
 663                                                              flags);
 664         if (error) {
 665                 if (uio_resid(uiop) != len && (error == ERESTART ||
 666                     error == EINTR || error == EWOULDBLOCK))
 667                         error = 0;
 668                 /* Generation of SIGPIPE can be controlled per socket */
 669                 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
 670                         psignal(p, SIGPIPE);
 671         }
 672         if (error == 0)
 673                 *retval = (int)(len - uio_resid(uiop));
 674 bad:
 675 #if KTRACE
 676         if (ktruio != NULL) {
 677                 if (error == 0) {
 678                         uio_setresid(ktruio, retval[0]);
 679                         ktrgenio(p->p_tracep, s, UIO_WRITE, ktruio, error);
 680                 }
 681                 uio_free(ktruio);
 682         }
 683 #endif
 684         if (to)
 685                 FREE(to, M_SONAME);
 686         KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error,0,0,0,0);
 687 out:
 688         file_drop(s);
 689         return (error);
 690 }
 691
 692
 693 int
 694 sendto(struct proc *p, struct sendto_args *uap, register_t *retval)
 695 {
 696         struct user_msghdr msg;
 697         int error;
 698         uio_t auio = NULL;
 699
 700         KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0,0,0,0,0);
 701         AUDIT_ARG(fd, uap->s);
 702
 703         auio = uio_create(1, 0,
 704                                   (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
 705                                   UIO_WRITE);
 706         if (auio == NULL) {
 707                 return (ENOMEM);
 708         }
 709         uio_addiov(auio, uap->buf, uap->len);
 710
 711         msg.msg_name = uap->to;
 712         msg.msg_namelen = uap->tolen;
 713         /* no need to set up msg_iov.  sendit uses uio_t we send it */
 714         msg.msg_iov = 0;
 715         msg.msg_iovlen = 0;
 716         msg.msg_control = 0;
 717         msg.msg_flags = 0;
 718
 719         error = sendit(p, uap->s, &msg, auio, uap->flags, retval);
 720
 721         if (auio != NULL) {
 722                 uio_free(auio);
 723         }
 724
 725 #if HACK_FOR_4056224
 726         /*
 727          * Radar 4056224
 728          * Temporary workaround to let send() and recv() work over a pipe for binary compatibility
 729          * This will be removed in the release following Tiger
 730          */
 731         if (error == ENOTSOCK) {
 732                 struct fileproc *fp;
 733
 734         if (fp_lookup(p, uap->s, &fp, 0) == 0) {
 735                         (void) fp_drop(p, uap->s, fp,0);
 736
 737                         if (fp->f_type == DTYPE_PIPE) {
 738                                 struct write_args write_uap;
 739                                 user_ssize_t write_retval;
 740
 741                                 if (p->p_pid > last_pid_4056224) {
 742                                         last_pid_4056224 = p->p_pid;
 743
 744                                         printf("%s[%d] uses send/recv on a pipe\n",
 745                                                 p->p_comm, p->p_pid);
 746                                 }
 747
 748                                 bzero(&write_uap, sizeof(struct write_args));
 749                                 write_uap.fd = uap->s;
 750                                 write_uap.cbuf = uap->buf;
 751                                 write_uap.nbyte = uap->len;
 752
 753                                 error = write(p, &write_uap, &write_retval);
 754                                 *retval = (int)write_retval;
 755                         }
 756                 }
 757         }
 758 #endif /* HACK_FOR_4056224 */
 759
 760         KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval,0,0,0);
 761
 762         return(error);
 763 }
 764
 765 #if COMPAT_43_SOCKET
 766 int
 767 osend(__unused struct proc *p,
 768           __unused struct osend_args *uap,
 769           __unused register_t *retval)
 770 {
 771         /* these are no longer supported and in fact
 772          * there is no way to call it directly.
 773          * LP64todo - remove this once we're sure there are no clients
 774          */
 775         return (ENOTSUP);
 776 }
 777
 778 int
 779 osendmsg(__unused struct proc *p,
 780                  __unused struct osendmsg_args *uap,
 781                  __unused register_t *retval)
 782 {
 783         /* these are no longer supported and in fact
 784          * there is no way to call it directly.
 785          * LP64todo - remove this once we're sure there are no clients
 786          */
 787         return (ENOTSUP);
 788 }
 789 #endif
 790
 791
 792 int
 793 sendmsg(struct proc *p, register struct sendmsg_args *uap, register_t *retval)
 794 {
 795         struct msghdr msg;
 796         struct user_msghdr user_msg;
 797         caddr_t msghdrp;
 798         int     size_of_msghdr;
 799         int error;
 800         int size_of_iovec;
 801         uio_t auio = NULL;
 802         struct user_iovec *iovp;
 803
 804         KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0,0,0,0,0);
 805         AUDIT_ARG(fd, uap->s);
 806         if (IS_64BIT_PROCESS(p)) {
 807                 msghdrp = (caddr_t) &user_msg;
 808                 size_of_msghdr = sizeof(user_msg);
 809                 size_of_iovec = sizeof(struct user_iovec);
 810         }
 811         else {
 812                 msghdrp = (caddr_t) &msg;
 813                 size_of_msghdr = sizeof(msg);
 814                 size_of_iovec = sizeof(struct iovec);
 815         }
 816         error = copyin(uap->msg, msghdrp, size_of_msghdr);
 817         if (error)
 818         {
 819             KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error,0,0,0,0);
 820             return (error);
 821         }
 822
 823         /* only need to copy if user process is not 64-bit */
 824         if (!IS_64BIT_PROCESS(p)) {
 825                 user_msg.msg_flags = msg.msg_flags;
 826                 user_msg.msg_controllen = msg.msg_controllen;
 827                 user_msg.msg_control = CAST_USER_ADDR_T(msg.msg_control);
 828                 user_msg.msg_iovlen = msg.msg_iovlen;
 829                 user_msg.msg_iov = CAST_USER_ADDR_T(msg.msg_iov);
 830                 user_msg.msg_namelen = msg.msg_namelen;
 831                 user_msg.msg_name = CAST_USER_ADDR_T(msg.msg_name);
 832         }
 833
 834         if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
 835                 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,0,0,0,0);
 836                 return (EMSGSIZE);
 837         }
 838
 839         /* allocate a uio large enough to hold the number of iovecs passed */
 840         auio = uio_create(user_msg.msg_iovlen, 0,
 841                                   (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
 842                                   UIO_WRITE);
 843         if (auio == NULL) {
 844                 error = ENOBUFS;
 845                 goto done;
 846         }
 847
 848         if (user_msg.msg_iovlen) {
 849                 /* get location of iovecs within the uio.  then copyin the iovecs from
 850                  * user space.
 851                  */
 852                 iovp = uio_iovsaddr(auio);
 853                 if (iovp == NULL) {
 854                         error = ENOBUFS;
 855                         goto done;
 856                 }
 857                 error = copyin(user_msg.msg_iov, (caddr_t)iovp, (user_msg.msg_iovlen * size_of_iovec));
 858                 if (error)
 859                         goto done;
 860                 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
 861
 862                 /* finish setup of uio_t */
 863                 uio_calculateresid(auio);
 864         }
 865         else {
 866                 user_msg.msg_iov = 0;
 867         }
 868
 869 #if COMPAT_43_SOCKET
 870         user_msg.msg_flags = 0;
 871 #endif
 872         error = sendit(p, uap->s, &user_msg, auio, uap->flags, retval);
 873 done:
 874         if (auio != NULL) {
 875                 uio_free(auio);
 876         }
 877         KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error,0,0,0,0);
 878
 879         return (error);
 880 }
 881
 882 static int
 883 recvit(p, s, mp, uiop, namelenp, retval)
 884         register struct proc *p;
 885         int s;
 886         register struct user_msghdr *mp;
 887         uio_t uiop;
 888         user_addr_t namelenp;
 889         register_t *retval;
 890 {
 891         int len, error;
 892         struct mbuf *m, *control = 0;
 893         user_addr_t ctlbuf;
 894         struct socket *so;
 895         struct sockaddr *fromsa = 0;
 896         struct fileproc *fp;
 897 #if KTRACE
 898         uio_t ktruio = NULL;
 899 #endif
 900
 901         KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0,0,0,0,0);
 902         proc_fdlock(p);
 903         if ( (error = fp_lookup(p, s, &fp, 1)) ) {
 904             KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error,0,0,0,0);
 905                 proc_fdunlock(p);
 906             return (error);
 907         }
 908         if (fp->f_type != DTYPE_SOCKET) {
 909                 fp_drop(p, s, fp,1);
 910                 proc_fdunlock(p);
 911                 return(ENOTSOCK);
 912         }
 913
 914         so = (struct socket *)fp->f_data;
 915
 916         proc_fdunlock(p);
 917         if (uio_resid(uiop) < 0) {
 918                 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL,0,0,0,0);
 919                 error = EINVAL;
 920                 goto out1;
 921         }
 922 #if KTRACE
 923         if (KTRPOINT(p, KTR_GENIO)) {
 924                 ktruio = uio_duplicate(uiop);
 925         }
 926 #endif
 927
 928         len = uio_resid(uiop);
 929         if (so == NULL)
 930                 error = EBADF;
 931         else {
 932                 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
 933                         (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
 934                         &mp->msg_flags);
 935         }
 936         AUDIT_ARG(sockaddr, p, fromsa);
 937         if (error) {
 938                 if (uio_resid(uiop) != len && (error == ERESTART ||
 939                     error == EINTR || error == EWOULDBLOCK))
 940                         error = 0;
 941         }
 942 #if KTRACE
 943         if (ktruio != NULL) {
 944                 if (error == 0) {
 945                         uio_setresid(ktruio, len - uio_resid(uiop));
 946                         ktrgenio(p->p_tracep, s, UIO_WRITE, ktruio, error);
 947                 }
 948                 uio_free(ktruio);
 949         }
 950 #endif
 951         if (error)
 952                 goto out;
 953         *retval = len - uio_resid(uiop);
 954         if (mp->msg_name) {
 955                 len = mp->msg_namelen;
 956                 if (len <= 0 || fromsa == 0)
 957                         len = 0;
 958                 else {
 959 #ifndef MIN
 960 #define MIN(a,b) ((a)>(b)?(b):(a))
 961 #endif
 962                         /* save sa_len before it is destroyed by MSG_COMPAT */
 963                         len = MIN(len, fromsa->sa_len);
 964 #if COMPAT_43_SOCKET
 965                         if (mp->msg_flags & MSG_COMPAT)
 966                                 ((struct osockaddr *)fromsa)->sa_family =
 967                                     fromsa->sa_family;
 968 #endif
 969                         error = copyout(fromsa, mp->msg_name, (unsigned)len);
 970                         if (error)
 971                                 goto out;
 972                 }
 973                 mp->msg_namelen = len;
 974                 if (namelenp &&
 975                     (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) {
 976 #if COMPAT_43_SOCKET
 977                         if (mp->msg_flags & MSG_COMPAT)
 978                                 error = 0;      /* old recvfrom didn't check */
 979                         else
 980 #endif
 981                         goto out;
 982                 }
 983         }
 984         if (mp->msg_control) {
 985 #if COMPAT_43_SOCKET
 986                 /*
 987                  * We assume that old recvmsg calls won't receive access
 988                  * rights and other control info, esp. as control info
 989                  * is always optional and those options didn't exist in 4.3.
 990                  * If we receive rights, trim the cmsghdr; anything else
 991                  * is tossed.
 992                  */
 993                 if (control && mp->msg_flags & MSG_COMPAT) {
 994                         if (mtod(control, struct cmsghdr *)->cmsg_level !=
 995                             SOL_SOCKET ||
 996                             mtod(control, struct cmsghdr *)->cmsg_type !=
 997                             SCM_RIGHTS) {
 998                                 mp->msg_controllen = 0;
 999                                 goto out;
1000                         }
1001                         control->m_len -= sizeof (struct cmsghdr);
1002                         control->m_data += sizeof (struct cmsghdr);
1003                 }
1004 #endif
1005                 len = mp->msg_controllen;
1006                 m = control;
1007                 mp->msg_controllen = 0;
1008                 ctlbuf = mp->msg_control;
1009
1010                 while (m && len > 0) {
1011                         unsigned int tocopy;
1012
1013                         if (len >= m->m_len)
1014                                 tocopy = m->m_len;
1015                         else {
1016                                 mp->msg_flags |= MSG_CTRUNC;
1017                                 tocopy = len;
1018                         }
1019
1020                         error = copyout((caddr_t)mtod(m, caddr_t), ctlbuf, tocopy);
1021                         if (error)
1022                                 goto out;
1023
1024                         ctlbuf += tocopy;
1025                         len -= tocopy;
1026                         m = m->m_next;
1027                 }
1028                 mp->msg_controllen = ctlbuf - mp->msg_control;
1029         }
1030 out:
1031         if (fromsa)
1032                 FREE(fromsa, M_SONAME);
1033         if (control)
1034                 m_freem(control);
1035         KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error,0,0,0,0);
1036 out1:
1037         fp_drop(p, s, fp, 0);
1038         return (error);
1039 }
1040
1041
1042 int
1043 recvfrom(p, uap, retval)
1044         struct proc *p;
1045         register struct recvfrom_args /* {
1046                 int     s;
1047                 caddr_t buf;
1048                 size_t  len;
1049                 int     flags;
1050                 caddr_t from;
1051                 int     *fromlenaddr;
1052         } */ *uap;
1053         register_t *retval;
1054 {
1055         struct user_msghdr msg;
1056         int error;
1057         uio_t auio = NULL;
1058
1059         KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0,0,0,0,0);
1060         AUDIT_ARG(fd, uap->s);
1061
1062         if (uap->fromlenaddr) {
1063                 error = copyin(uap->fromlenaddr,
1064                     (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
1065                 if (error)
1066                         return (error);
1067         } else
1068                 msg.msg_namelen = 0;
1069         msg.msg_name = uap->from;
1070         auio = uio_create(1, 0,
1071                                   (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1072                                   UIO_READ);
1073         if (auio == NULL) {
1074                 return (ENOMEM);
1075         }
1076
1077         uio_addiov(auio, uap->buf, uap->len);
1078         /* no need to set up msg_iov.  recvit uses uio_t we send it */
1079         msg.msg_iov = 0;
1080         msg.msg_iovlen = 0;
1081         msg.msg_control = 0;
1082         msg.msg_controllen = 0;
1083         msg.msg_flags = uap->flags;
1084         error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
1085         if (auio != NULL) {
1086                 uio_free(auio);
1087         }
1088
1089 #if HACK_FOR_4056224
1090         /*
1091          * Radar 4056224
1092          * Temporary workaround to let send() and recv() work over a pipe for binary compatibility
1093          * This will be removed in the release following Tiger
1094          */
1095         if (error == ENOTSOCK && proc_is64bit(p) == 0) {
1096                 struct fileproc *fp;
1097
1098         if (fp_lookup(p, uap->s, &fp, 0) == 0) {
1099                         (void) fp_drop(p, uap->s, fp,0);
1100
1101                         if (fp->f_type == DTYPE_PIPE) {
1102                                 struct read_args read_uap;
1103                                 user_ssize_t read_retval;
1104
1105                                 if (p->p_pid > last_pid_4056224) {
1106                                         last_pid_4056224 = p->p_pid;
1107
1108                                         printf("%s[%d] uses send/recv on a pipe\n",
1109                                                 p->p_comm, p->p_pid);
1110                                 }
1111
1112                                 bzero(&read_uap, sizeof(struct read_args));
1113                                 read_uap.fd = uap->s;
1114                                 read_uap.cbuf = uap->buf;
1115                                 read_uap.nbyte = uap->len;
1116
1117                                 error = read(p, &read_uap, &read_retval);
1118                                 *retval = (int)read_retval;
1119                         }
1120                 }
1121         }
1122 #endif /* HACK_FOR_4056224 */
1123
1124         KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error,0,0,0,0);
1125
1126         return (error);
1127 }
1128
1129 #if COMPAT_43_SOCKET
1130 int
1131 orecvfrom(struct proc *p, struct recvfrom_args *uap, register_t *retval)
1132 {
1133
1134         uap->flags |= MSG_COMPAT;
1135         return (recvfrom(p, uap, retval));
1136 }
1137 #endif
1138
1139
1140 #if COMPAT_43_SOCKET
1141 int
1142 orecv(__unused struct proc *p, __unused struct orecv_args       *uap,
1143                 __unused register_t *retval)
1144 {
1145         /* these are no longer supported and in fact
1146          * there is no way to call it directly.
1147          * LP64todo - remove this once we're sure there are no clients
1148          */
1149
1150         return (ENOTSUP);
1151 }
1152
1153 /*
1154  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
1155  * overlays the new one, missing only the flags, and with the (old) access
1156  * rights where the control fields are now.
1157  */
1158 int
1159 orecvmsg(__unused struct proc *p, __unused struct orecvmsg_args *uap,
1160                 __unused register_t *retval)
1161 {
1162         /* these are no longer supported and in fact
1163          * there is no way to call it directly.
1164          * LP64todo - remove this once we're sure there are no clients
1165          */
1166
1167         return (ENOTSUP);
1168
1169 }
1170 #endif
1171
1172 int
1173 recvmsg(p, uap, retval)
1174         struct proc *p;
1175         struct recvmsg_args *uap;
1176         register_t *retval;
1177 {
1178         struct msghdr msg;
1179         struct user_msghdr user_msg;
1180         caddr_t msghdrp;
1181         int     size_of_msghdr;
1182         user_addr_t uiov;
1183         register int error;
1184         int size_of_iovec;
1185         uio_t auio = NULL;
1186         struct user_iovec *iovp;
1187
1188         KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0,0,0,0,0);
1189         AUDIT_ARG(fd, uap->s);
1190         if (IS_64BIT_PROCESS(p)) {
1191                 msghdrp = (caddr_t) &user_msg;
1192                 size_of_msghdr = sizeof(user_msg);
1193                 size_of_iovec = sizeof(struct user_iovec);
1194         }
1195         else {
1196                 msghdrp = (caddr_t) &msg;
1197                 size_of_msghdr = sizeof(msg);
1198                 size_of_iovec = sizeof(struct iovec);
1199         }
1200         error = copyin(uap->msg, msghdrp, size_of_msghdr);
1201         if (error)
1202         {
1203                 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error,0,0,0,0);
1204                 return (error);
1205         }
1206
1207         /* only need to copy if user process is not 64-bit */
1208         if (!IS_64BIT_PROCESS(p)) {
1209                 user_msg.msg_flags = msg.msg_flags;
1210                 user_msg.msg_controllen = msg.msg_controllen;
1211                 user_msg.msg_control = CAST_USER_ADDR_T(msg.msg_control);
1212                 user_msg.msg_iovlen = msg.msg_iovlen;
1213                 user_msg.msg_iov = CAST_USER_ADDR_T(msg.msg_iov);
1214                 user_msg.msg_namelen = msg.msg_namelen;
1215                 user_msg.msg_name = CAST_USER_ADDR_T(msg.msg_name);
1216         }
1217
1218         if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1219                 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,0,0,0,0);
1220                 return (EMSGSIZE);
1221         }
1222
1223 #if COMPAT_43_SOCKET
1224         user_msg.msg_flags = uap->flags &~ MSG_COMPAT;
1225 #else
1226         user_msg.msg_flags = uap->flags;
1227 #endif
1228
1229         /* allocate a uio large enough to hold the number of iovecs passed */
1230         auio = uio_create(user_msg.msg_iovlen, 0,
1231                                   (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1232                                   UIO_READ);
1233         if (auio == NULL) {
1234                 error = ENOMEM;
1235                 goto done;
1236         }
1237
1238         /* get location of iovecs within the uio.  then copyin the iovecs from
1239          * user space.
1240          */
1241         iovp = uio_iovsaddr(auio);
1242         if (iovp == NULL) {
1243                 error = ENOMEM;
1244                 goto done;
1245         }
1246         uiov = user_msg.msg_iov;
1247         user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1248         error = copyin(uiov, (caddr_t)iovp, (user_msg.msg_iovlen * size_of_iovec));
1249         if (error)
1250                 goto done;
1251
1252         /* finish setup of uio_t */
1253         uio_calculateresid(auio);
1254
1255         error = recvit(p, uap->s, &user_msg, auio, 0, retval);
1256         if (!error) {
1257                 user_msg.msg_iov = uiov;
1258                 /* only need to copy if user process is not 64-bit */
1259                 if (!IS_64BIT_PROCESS(p)) {
1260                         // LP64todo - do all these change?  if not, then no need to copy all of them!
1261                         msg.msg_flags = user_msg.msg_flags;
1262                         msg.msg_controllen = user_msg.msg_controllen;
1263                         msg.msg_control = CAST_DOWN(caddr_t, user_msg.msg_control);
1264                         msg.msg_iovlen = user_msg.msg_iovlen;
1265                         msg.msg_iov = (struct iovec *) CAST_DOWN(caddr_t, user_msg.msg_iov);
1266                         msg.msg_namelen = user_msg.msg_namelen;
1267                         msg.msg_name = CAST_DOWN(caddr_t, user_msg.msg_name);
1268                 }
1269                 error = copyout(msghdrp, uap->msg, size_of_msghdr);
1270         }
1271 done:
1272         if (auio != NULL) {
1273                 uio_free(auio);
1274         }
1275         KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error,0,0,0,0);
1276         return (error);
1277 }
1278
1279 /* ARGSUSED */
1280 int
1281 shutdown(__unused struct proc *p, struct shutdown_args *uap, __unused register_t *retval)
1282 {
1283         struct socket * so;
1284         int error;
1285
1286         AUDIT_ARG(fd, uap->s);
1287         error = file_socket(uap->s, &so);
1288         if (error)
1289                 return (error);
1290         if (so == NULL) {
1291                 error = EBADF;
1292                 goto out;
1293         }
1294         error =  soshutdown((struct socket *)so, uap->how);
1295 out:
1296         file_drop(uap->s);
1297         return(error);
1298 }
1299
1300
1301
1302
1303
1304 /* ARGSUSED */
1305 int
1306 setsockopt(struct proc *p, struct setsockopt_args *uap, __unused register_t *retval)
1307 {
1308         struct socket * so;
1309         struct sockopt sopt;
1310         int error;
1311
1312         AUDIT_ARG(fd, uap->s);
1313         if (uap->val == 0 && uap->valsize != 0)
1314                 return (EFAULT);
1315         if (uap->valsize < 0)
1316                 return (EINVAL);
1317
1318         error = file_socket(uap->s, &so);
1319         if (error)
1320                 return (error);
1321
1322         sopt.sopt_dir = SOPT_SET;
1323         sopt.sopt_level = uap->level;
1324         sopt.sopt_name = uap->name;
1325         sopt.sopt_val = uap->val;
1326         sopt.sopt_valsize = uap->valsize;
1327         sopt.sopt_p = p;
1328
1329         if (so == NULL) {
1330                 error = EINVAL;
1331                 goto out;
1332         }
1333         error = sosetopt(so, &sopt);
1334 out:
1335         file_drop(uap->s);
1336         return(error);
1337 }
1338
1339
1340
1341 int
1342 getsockopt(struct proc *p, struct getsockopt_args  *uap, __unused register_t *retval)
1343 {
1344         int             error;
1345         socklen_t       valsize;
1346         struct sockopt  sopt;
1347         struct socket * so;
1348
1349         error = file_socket(uap->s, &so);
1350         if (error)
1351                 return (error);
1352         if (uap->val) {
1353                 error = copyin(uap->avalsize, (caddr_t)&valsize, sizeof (valsize));
1354                 if (error)
1355                         goto out;
1356                 if (valsize < 0) {
1357                         error = EINVAL;
1358                         goto out;
1359                 }
1360         } else
1361                 valsize = 0;
1362
1363         sopt.sopt_dir = SOPT_GET;
1364         sopt.sopt_level = uap->level;
1365         sopt.sopt_name = uap->name;
1366         sopt.sopt_val = uap->val;
1367         sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1368         sopt.sopt_p = p;
1369
1370         if (so == NULL) {
1371                 error = EBADF;
1372                 goto out;
1373         }
1374         error = sogetopt((struct socket *)so, &sopt);
1375         if (error == 0) {
1376                 valsize = sopt.sopt_valsize;
1377                 error = copyout((caddr_t)&valsize, uap->avalsize, sizeof (valsize));
1378         }
1379 out:
1380         file_drop(uap->s);
1381         return (error);
1382 }
1383
1384
1385 /*
1386  * Get socket name.
1387  */
1388 /* ARGSUSED */
1389 static int
1390 getsockname1(__unused struct proc *p, struct getsockname_args *uap, __unused register_t *retval,
1391         int compat)
1392 {
1393         struct socket *so;
1394         struct sockaddr *sa;
1395         socklen_t len;
1396         int error;
1397
1398         error = file_socket(uap->fdes, &so);
1399         if (error)
1400                 return (error);
1401         error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
1402         if (error)
1403                 goto out;
1404         if (so == NULL) {
1405                 error = EBADF;
1406                 goto out;
1407         }
1408         sa = 0;
1409         socket_lock(so, 1);
1410         error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1411         if (error == 0)
1412         {
1413                 struct socket_filter_entry *filter;
1414                 int     filtered = 0;
1415                 for (filter = so->so_filt; filter && error == 0;
1416                          filter = filter->sfe_next_onsocket) {
1417                         if (filter->sfe_filter->sf_filter.sf_getsockname) {
1418                                 if (!filtered) {
1419                                         filtered = 1;
1420                                         sflt_use(so);
1421                                         socket_unlock(so, 0);
1422                                 }
1423                                 error = filter->sfe_filter->sf_filter.sf_getsockname(filter->sfe_cookie,
1424                                                         so, &sa);
1425                         }
1426                 }
1427
1428                 if (error == EJUSTRETURN)
1429                         error = 0;
1430
1431                 if (filtered) {
1432                         socket_lock(so, 0);
1433                         sflt_unuse(so);
1434                 }
1435         }
1436         socket_unlock(so, 1);
1437         if (error)
1438                 goto bad;
1439         if (sa == 0) {
1440                 len = 0;
1441                 goto gotnothing;
1442         }
1443
1444         len = MIN(len, sa->sa_len);
1445 #if COMPAT_43_SOCKET
1446         if (compat)
1447                 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
1448 #endif
1449         error = copyout((caddr_t)sa, uap->asa, len);
1450         if (error == 0)
1451 gotnothing:
1452                 error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
1453 bad:
1454         if (sa)
1455                 FREE(sa, M_SONAME);
1456 out:
1457         file_drop(uap->fdes);
1458         return (error);
1459 }
1460
1461 int
1462 getsockname(struct proc *p, struct getsockname_args *uap, register_t *retval)
1463 {
1464         return (getsockname1(p, uap, retval, 0));
1465 }
1466
1467 #if COMPAT_43_SOCKET
1468 int
1469 ogetsockname(struct proc *p, struct getsockname_args *uap, register_t *retval)
1470 {
1471         return (getsockname1(p, uap, retval, 1));
1472 }
1473 #endif /* COMPAT_43_SOCKET */
1474
1475 /*
1476  * Get name of peer for connected socket.
1477  */
1478 /* ARGSUSED */
1479 int
1480 getpeername1(__unused struct proc *p, struct getpeername_args *uap, __unused register_t *retval,
1481         int compat)
1482 {
1483         struct socket *so;
1484         struct sockaddr *sa;
1485         socklen_t len;
1486         int error;
1487
1488         error = file_socket(uap->fdes, &so);
1489         if (error)
1490                 return (error);
1491         if (so == NULL) {
1492                 error = EBADF;
1493                 goto out;
1494         }
1495
1496         socket_lock(so, 1);
1497
1498         if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1499                 socket_unlock(so, 1);
1500                 error = ENOTCONN;
1501                 goto out;
1502         }
1503         error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
1504         if (error) {
1505                 socket_unlock(so, 1);
1506                 goto out;
1507         }
1508         sa = 0;
1509         error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1510         if (error == 0)
1511         {
1512                 struct socket_filter_entry *filter;
1513                 int     filtered = 0;
1514                 for (filter = so->so_filt; filter && error == 0;
1515                          filter = filter->sfe_next_onsocket) {
1516                         if (filter->sfe_filter->sf_filter.sf_getpeername) {
1517                                 if (!filtered) {
1518                                         filtered = 1;
1519                                         sflt_use(so);
1520                                         socket_unlock(so, 0);
1521                                 }
1522                                 error = filter->sfe_filter->sf_filter.sf_getpeername(filter->sfe_cookie,
1523                                                         so, &sa);
1524                         }
1525                 }
1526
1527                 if (error == EJUSTRETURN)
1528                         error = 0;
1529
1530                 if (filtered) {
1531                         socket_lock(so, 0);
1532                         sflt_unuse(so);
1533                 }
1534         }
1535         socket_unlock(so, 1);
1536         if (error)
1537                 goto bad;
1538         if (sa == 0) {
1539                 len = 0;
1540                 goto gotnothing;
1541         }
1542         len = MIN(len, sa->sa_len);
1543 #if COMPAT_43_SOCKET
1544         if (compat)
1545                 ((struct osockaddr *)sa)->sa_family =
1546                     sa->sa_family;
1547 #endif
1548         error = copyout(sa, uap->asa, len);
1549         if (error)
1550                 goto bad;
1551 gotnothing:
1552         error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
1553 bad:
1554         if (sa) FREE(sa, M_SONAME);
1555 out:
1556         file_drop(uap->fdes);
1557         return (error);
1558 }
1559
1560 int
1561 getpeername(struct proc *p, struct getpeername_args *uap, register_t *retval)
1562 {
1563
1564         return (getpeername1(p, uap, retval, 0));
1565 }
1566
1567 #if COMPAT_43_SOCKET
1568 int
1569 ogetpeername(struct proc *p, struct getpeername_args *uap, register_t *retval)
1570 {
1571
1572         return (getpeername1(p, uap, retval, 1));
1573 }
1574 #endif /* COMPAT_43_SOCKET */
1575
1576 int
1577 sockargs(mp, data, buflen, type)
1578         struct mbuf **mp;
1579         user_addr_t data;
1580         int buflen, type;
1581 {
1582         register struct sockaddr *sa;
1583         register struct mbuf *m;
1584         int error;
1585
1586         if ((u_int)buflen > MLEN) {
1587 #if COMPAT_43_SOCKET
1588                 if (type == MT_SONAME && (u_int)buflen <= 112)
1589                         buflen = MLEN;          /* unix domain compat. hack */
1590                 else
1591 #endif
1592                 if ((u_int)buflen > MCLBYTES)
1593                         return (EINVAL);
1594         }
1595         m = m_get(M_WAIT, type);
1596         if (m == NULL)
1597                 return (ENOBUFS);
1598         if ((u_int)buflen > MLEN) {
1599                 MCLGET(m, M_WAIT);
1600                 if ((m->m_flags & M_EXT) == 0) {
1601                         m_free(m);
1602                         return ENOBUFS;
1603                 }
1604         }
1605         m->m_len = buflen;
1606         error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
1607         if (error)
1608                 (void) m_free(m);
1609         else {
1610                 *mp = m;
1611                 if (type == MT_SONAME) {
1612                         sa = mtod(m, struct sockaddr *);
1613
1614 #if COMPAT_43_SOCKET && BYTE_ORDER != BIG_ENDIAN
1615                         if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1616                                 sa->sa_family = sa->sa_len;
1617 #endif
1618                         sa->sa_len = buflen;
1619                 }
1620         }
1621         return (error);
1622 }
1623
1624 /*
1625  * Given a user_addr_t of length len, allocate and fill out a *sa.
1626  */
1627 int
1628 getsockaddr(struct sockaddr **namp, user_addr_t uaddr, size_t len)
1629 {
1630         struct sockaddr *sa;
1631         int error;
1632
1633         if (len > SOCK_MAXADDRLEN)
1634                 return ENAMETOOLONG;
1635
1636         if (len == 0)
1637              return EINVAL;
1638
1639         MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1640         if (sa == NULL) {
1641                 return ENOMEM;
1642         }
1643         error = copyin(uaddr, (caddr_t)sa, len);
1644         if (error) {
1645                 FREE(sa, M_SONAME);
1646         } else {
1647 #if COMPAT_43_SOCKET && BYTE_ORDER != BIG_ENDIAN
1648                 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1649                         sa->sa_family = sa->sa_len;
1650 #endif
1651                 sa->sa_len = len;
1652                 *namp = sa;
1653         }
1654         return error;
1655 }
1656
1657
1658 #if SENDFILE
1659 /*
1660  * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
1661  * XXX - The sf_buf functions are currently private to sendfile(2), so have
1662  * been made static, but may be useful in the future for doing zero-copy in
1663  * other parts of the networking code.
1664  */
1665 static void
1666 sf_buf_init(void *arg)
1667 {
1668         int i;
1669
1670         SLIST_INIT(&sf_freelist);
1671         kmem_alloc_pageable(kernel_map, &sf_base, nsfbufs * PAGE_SIZE);
1672         MALLOC(sf_bufs, struct sf_buf *, nsfbufs * sizeof(struct sf_buf), M_TEMP, M_NOWAIT|M_ZERO);
1673         if (sf_bufs == NULL)
1674                 return;         /* XXX silently fail leaving sf_bufs NULL */
1675
1676         for (i = 0; i < nsfbufs; i++) {
1677                 sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
1678                 SLIST_INSERT_HEAD(&sf_freelist, &sf_bufs[i], free_list);
1679         }
1680 }
1681
1682 /*
1683  * Get an sf_buf from the freelist. Will block if none are available.
1684  */
1685 static struct sf_buf *
1686 sf_buf_alloc()
1687 {
1688         struct sf_buf *sf;
1689
1690         while ((sf = SLIST_FIRST(&sf_freelist)) == NULL) {
1691                 sf_buf_alloc_want = 1;
1692                 tsleep(&sf_freelist, PVM, "sfbufa", 0);
1693         }
1694         SLIST_REMOVE_HEAD(&sf_freelist, free_list);
1695         sf->refcnt = 1;
1696         return (sf);
1697 }
1698
1699 #define dtosf(x)        (&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT])
1700 static void
1701 sf_buf_ref(caddr_t addr, u_int size)
1702 {
1703         struct sf_buf *sf;
1704
1705         sf = dtosf(addr);
1706         if (sf->refcnt == 0)
1707                 panic("sf_buf_ref: referencing a free sf_buf");
1708         sf->refcnt++;
1709 }
1710
1711 /*
1712  * Lose a reference to an sf_buf. When none left, detach mapped page
1713  * and release resources back to the system.
1714  *
1715  * Must be called at splimp.
1716  */
1717 static void
1718 sf_buf_free(caddr_t addr, u_int size)
1719 {
1720         struct sf_buf *sf;
1721         struct vm_page *m;
1722
1723         sf = dtosf(addr);
1724         if (sf->refcnt == 0)
1725                 panic("sf_buf_free: freeing free sf_buf");
1726         sf->refcnt--;
1727         if (sf->refcnt == 0) {
1728                 pmap_qremove((vm_offset_t)addr, 1);
1729                 m = sf->m;
1730                 vm_page_unwire(m, 0);
1731                 /*
1732                  * Check for the object going away on us. This can
1733                  * happen since we don't hold a reference to it.
1734                  * If so, we're responsible for freeing the page.
1735                  */
1736                 if (m->wire_count == 0 && m->object == NULL)
1737                         vm_page_lock_queues();
1738                         vm_page_free(m);
1739                         vm_page_unlock_queues();
1740                 sf->m = NULL;
1741                 SLIST_INSERT_HEAD(&sf_freelist, sf, free_list);
1742                 if (sf_buf_alloc_want) {
1743                         sf_buf_alloc_want = 0;
1744                         wakeup(&sf_freelist);
1745                 }
1746         }
1747 }
1748
1749 /*
1750  * sendfile(2).
1751  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1752  *       struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1753  *
1754  * Send a file specified by 'fd' and starting at 'offset' to a socket
1755  * specified by 's'. Send only 'nbytes' of the file or until EOF if
1756  * nbytes == 0. Optionally add a header and/or trailer to the socket
1757  * output. If specified, write the total number of bytes sent into *sbytes.
1758  */
1759 int
1760 sendfile(struct proc *p, struct sendfile_args *uap)
1761 {
1762         struct fileproc *fp;
1763         struct vnode *vp;
1764         struct vm_object *obj;
1765         struct socket *so;
1766         struct mbuf *m;
1767         struct sf_buf *sf;
1768         struct vm_page *pg;
1769         struct writev_args nuap;
1770         struct sf_hdtr hdtr;
1771         off_t off, xfsize, sbytes = 0;
1772         int error = 0, s;
1773
1774         if (sf_bufs == NULL) {
1775                 /* Fail if initialization failed */
1776                 return ENOSYS;
1777         }
1778
1779         /*
1780          * Do argument checking. Must be a regular file in, stream
1781          * type and connected socket out, positive offset.
1782          */
1783         if (error = fp_getfvp(p, uap->fd, &fp, &vp))
1784                 goto done;
1785         if (fp->f_flag & FREAD) == 0) {
1786                 error = EBADF;
1787                 goto done1;
1788         }
1789         obj = vp->v_object;
1790         if (vp->v_type != VREG || obj == NULL) {
1791                 error = EINVAL;
1792                 goto done1;
1793         }
1794         error = file_socket(uap->s, &so);
1795         if (error)
1796                 goto done1;
1797         if (so == NULL) {
1798                 error = EBADF;
1799                 goto done2;
1800         }
1801
1802         socket_lock(so, 1);
1803
1804         if (so->so_type != SOCK_STREAM) {
1805                 error = EINVAL;
1806                 goto done3;
1807         }
1808         if ((so->so_state & SS_ISCONNECTED) == 0) {
1809                 error = ENOTCONN;
1810                 goto done3;
1811         }
1812         if (uap->offset < 0) {
1813                 error = EINVAL;
1814                 goto done3;
1815         }
1816
1817         /*
1818          * If specified, get the pointer to the sf_hdtr struct for
1819          * any headers/trailers.
1820          */
1821         if (uap->hdtr != NULL) {
1822                 error = copyin(CAST_USER_ADDR_T(uap->hdtr), &hdtr, sizeof(hdtr));
1823                 if (error)
1824                         goto done3;
1825                 /*
1826                  * Send any headers. Wimp out and use writev(2).
1827                  */
1828                 if (hdtr.headers != NULL) {
1829                         nuap.fd = uap->s;
1830                         nuap.iovp = hdtr.headers;
1831                         nuap.iovcnt = hdtr.hdr_cnt;
1832                         error = writev(p, &nuap);
1833                         if (error)
1834                                 goto done3;
1835                         sbytes += p->p_retval[0];
1836                 }
1837         }
1838
1839         /*
1840          * Protect against multiple writers to the socket.
1841          */
1842         (void) sblock(&so->so_snd, M_WAIT);
1843
1844         /*
1845          * Loop through the pages in the file, starting with the requested
1846          * offset. Get a file page (do I/O if necessary), map the file page
1847          * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1848          * it on the socket.
1849          */
1850         for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1851                 vm_object_offset_t pindex;
1852                 vm_object_offset_t pgoff;
1853
1854                 pindex = OFF_TO_IDX(off);
1855 retry_lookup:
1856                 /*
1857                  * Calculate the amount to transfer. Not to exceed a page,
1858                  * the EOF, or the passed in nbytes.
1859                  */
1860                 xfsize = obj->un_pager.vnp.vnp_size - off;
1861                 if (xfsize > PAGE_SIZE_64)
1862                         xfsize = PAGE_SIZE;
1863                 pgoff = (vm_object_offset_t)(off & PAGE_MASK_64);
1864                 if (PAGE_SIZE - pgoff < xfsize)
1865                         xfsize = PAGE_SIZE_64 - pgoff;
1866                 if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1867                         xfsize = uap->nbytes - sbytes;
1868                 if (xfsize <= 0)
1869                         break;
1870                 /*
1871                  * Optimize the non-blocking case by looking at the socket space
1872                  * before going to the extra work of constituting the sf_buf.
1873                  */
1874                 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1875                         if (so->so_state & SS_CANTSENDMORE)
1876                                 error = EPIPE;
1877                         else
1878                                 error = EAGAIN;
1879                         sbunlock(&so->so_snd, 0); /* will release lock */
1880                         goto done2;
1881                 }
1882                 /*
1883                  * Attempt to look up the page. If the page doesn't exist or the
1884                  * part we're interested in isn't valid, then read it from disk.
1885                  * If some other part of the kernel has this page (i.e. it's busy),
1886                  * then disk I/O may be occuring on it, so wait and retry.
1887                  */
1888                 pg = vm_page_lookup(obj, pindex);
1889                 if (pg == NULL || (!(pg->flags & PG_BUSY) && !pg->busy &&
1890                     !vm_page_is_valid(pg, pgoff, xfsize))) {
1891                         struct uio auio;
1892                         struct iovec aiov;
1893                         int bsize;
1894
1895                         if (pg == NULL) {
1896                                 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL);
1897                                 if (pg == NULL) {
1898                                         VM_WAIT;
1899                                         goto retry_lookup;
1900                                 }
1901                                 /*
1902                                  * don't just clear PG_BUSY manually -
1903                                  * vm_page_alloc() should be considered opaque,
1904                                  * use the VM routine provided to clear
1905                                  * PG_BUSY.
1906                                  */
1907                                 vm_page_wakeup(pg);
1908
1909                         }
1910                         /*
1911                          * Ensure that our page is still around when the I/O completes.
1912                          */
1913                         vm_page_io_start(pg);
1914                         vm_page_wire(pg);
1915                         /*
1916                          * Get the page from backing store.
1917                          */
1918                         bsize = vp->v_mount->mnt_vfsstat.f_iosize;
1919                         auio.uio_iov = &aiov;
1920                         auio.uio_iovcnt = 1;
1921                         aiov.iov_base = 0;
1922                         aiov.iov_len = MAXBSIZE;
1923                         auio.uio_offset = trunc_page(off);
1924                         auio.uio_segflg = UIO_NOCOPY;
1925                         auio.uio_rw = UIO_READ;
1926                         uio_setresid(&auio, MAXBSIZE);
1927                         error = VOP_READ(vp, &auio, IO_VMIO | ((MAXBSIZE / bsize) << 16),
1928                                 p->p_ucred);
1929                         vm_page_flag_clear(pg, PG_ZERO);
1930                         vm_page_io_finish(pg);
1931                         if (error) {
1932                                 vm_page_unwire(pg, 0);
1933                                 /*
1934                                  * See if anyone else might know about this page.
1935                                  * If not and it is not valid, then free it.
1936                                  */
1937                                 if (pg->wire_count == 0 && pg->valid == 0 &&
1938                                     pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1939                                     pg->hold_count == 0)
1940                                         vm_page_lock_queues();
1941                                         vm_page_free(pg);
1942                                         vm_page_unlock_queues();
1943                                 sbunlock(&so->so_snd, 0); /* will release socket lock */
1944                                 goto done2;
1945                         }
1946                 } else {
1947                         if ((pg->flags & PG_BUSY) || pg->busy)  {
1948                                 s = splvm();
1949                                 if ((pg->flags & PG_BUSY) || pg->busy) {
1950                                         /*
1951                                          * Page is busy. Wait and retry.
1952                                          */
1953                                         vm_page_flag_set(pg, PG_WANTED);
1954                                         tsleep(pg, PVM, "sfpbsy", 0);
1955                                         goto retry_lookup;
1956                                 }
1957                         }
1958                         /*
1959                          * Protect from having the page ripped out from beneath us.
1960                          */
1961                         vm_page_wire(pg);
1962                 }
1963                 /*
1964                  * Allocate a kernel virtual page and insert the physical page
1965                  * into it.
1966                  */
1967                 sf = sf_buf_alloc();
1968                 sf->m = pg;
1969                 pmap_qenter(sf->kva, &pg, 1);
1970                 /*
1971                  * Get an mbuf header and set it up as having external storage.
1972                  */
1973                 MGETHDR(m, M_WAIT, MT_DATA);
1974                 if (m == NULL) {
1975                         error = ENOBUFS;
1976                         sbunlock(&so->so_snd, 0); /* will release socket lock */
1977                         goto done2;
1978                 }
1979                 m->m_ext.ext_free = sf_buf_free;
1980                 m->m_ext.ext_ref = sf_buf_ref;
1981                 m->m_ext.ext_buf = (void *)sf->kva;
1982                 m->m_ext.ext_size = PAGE_SIZE;
1983                 m->m_data = (char *) sf->kva + pgoff;
1984                 m->m_flags |= M_EXT;
1985                 m->m_pkthdr.len = m->m_len = xfsize;
1986                 /*
1987                  * Add the buffer to the socket buffer chain.
1988                  */
1989 retry_space:
1990                 /*
1991                  * Make sure that the socket is still able to take more data.
1992                  * CANTSENDMORE being true usually means that the connection
1993                  * was closed. so_error is true when an error was sensed after
1994                  * a previous send.
1995                  * The state is checked after the page mapping and buffer
1996                  * allocation above since those operations may block and make
1997                  * any socket checks stale. From this point forward, nothing
1998                  * blocks before the pru_send (or more accurately, any blocking
1999                  * results in a loop back to here to re-check).
2000                  */
2001                 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
2002                         if (so->so_state & SS_CANTSENDMORE) {
2003                                 error = EPIPE;
2004                         } else {
2005                                 error = so->so_error;
2006                                 so->so_error = 0;
2007                         }
2008                         m_freem(m);
2009                         sbunlock(&so->so_snd, 0); /* will release socket lock */
2010                         goto done2;
2011                 }
2012                 /*
2013                  * Wait for socket space to become available. We do this just
2014                  * after checking the connection state above in order to avoid
2015                  * a race condition with sbwait().
2016                  */
2017                 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
2018                         if (so->so_state & SS_NBIO) {
2019                                 m_freem(m);
2020                                 sbunlock(&so->so_snd, 0); /* will release socket lock */
2021                                 error = EAGAIN;
2022                                 goto done2;
2023                         }
2024                         error = sbwait(&so->so_snd);
2025                         /*
2026                          * An error from sbwait usually indicates that we've
2027                          * been interrupted by a signal. If we've sent anything
2028                          * then return bytes sent, otherwise return the error.
2029                          */
2030                         if (error) {
2031                                 m_freem(m);
2032                                 sbunlock(&so->so_snd, 0);
2033                                 goto done2;
2034                         }
2035                         goto retry_space;
2036                 }
2037                 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, p);
2038                 splx(s);
2039                 if (error) {
2040                         sbunlock(&so->so_snd, 0); /* will release socket lock */
2041                         goto done2;
2042                 }
2043         }
2044         sbunlock(&so->so_snd, 0); /* will release socket lock */
2045
2046         /*
2047          * Send trailers. Wimp out and use writev(2).
2048          */
2049         if (uap->hdtr != NULL && hdtr.trailers != NULL) {
2050                         nuap.fd = uap->s;
2051                         nuap.iovp = hdtr.trailers;
2052                         nuap.iovcnt = hdtr.trl_cnt;
2053                         error = writev(p, &nuap);
2054                         if (error)
2055                                 goto done2;
2056                         sbytes += p->p_retval[0];
2057         }
2058 done2:
2059         file_drop(uap->s);
2060 done1:
2061         file_drop(uap->fd);
2062 done:
2063         if (uap->sbytes != NULL) {
2064                 /* XXX this appears bogus for some early failure conditions */
2065                 copyout(&sbytes, CAST_USER_ADDR_T(uap->sbytes), sizeof(off_t));
2066         }
2067         return (error);
2068 done3:
2069         socket_unlock(so, 1);
2070         goto done2;
2071 }
2072
2073 #endif