bsd/kern/uipc_socket.c

   1 /*
   2  * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
  23 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  24 /*
  25  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  26  *      The Regents of the University of California.  All rights reserved.
  27  *
  28  * Redistribution and use in source and binary forms, with or without
  29  * modification, are permitted provided that the following conditions
  30  * are met:
  31  * 1. Redistributions of source code must retain the above copyright
  32  *    notice, this list of conditions and the following disclaimer.
  33  * 2. Redistributions in binary form must reproduce the above copyright
  34  *    notice, this list of conditions and the following disclaimer in the
  35  *    documentation and/or other materials provided with the distribution.
  36  * 3. All advertising materials mentioning features or use of this software
  37  *    must display the following acknowledgement:
  38  *      This product includes software developed by the University of
  39  *      California, Berkeley and its contributors.
  40  * 4. Neither the name of the University nor the names of its contributors
  41  *    may be used to endorse or promote products derived from this software
  42  *    without specific prior written permission.
  43  *
  44  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  45  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  46  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  47  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  48  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  49  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  50  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  51  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  52  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  53  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  54  * SUCH DAMAGE.
  55  *
  56  *      @(#)uipc_socket.c       8.3 (Berkeley) 4/15/94
  57  * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.16 2001/06/14 20:46:06 ume Exp $
  58  */
  59
  60 #include <sys/param.h>
  61 #include <sys/systm.h>
  62 #include <sys/filedesc.h>
  63 #include <sys/proc_internal.h>
  64 #include <sys/kauth.h>
  65 #include <sys/file_internal.h>
  66 #include <sys/fcntl.h>
  67 #include <sys/malloc.h>
  68 #include <sys/mbuf.h>
  69 #include <sys/domain.h>
  70 #include <sys/kernel.h>
  71 #include <sys/event.h>
  72 #include <sys/poll.h>
  73 #include <sys/protosw.h>
  74 #include <sys/socket.h>
  75 #include <sys/socketvar.h>
  76 #include <sys/resourcevar.h>
  77 #include <sys/signalvar.h>
  78 #include <sys/sysctl.h>
  79 #include <sys/uio.h>
  80 #include <sys/ev.h>
  81 #include <sys/kdebug.h>
  82 #include <net/route.h>
  83 #include <netinet/in.h>
  84 #include <netinet/in_pcb.h>
  85 #include <kern/zalloc.h>
  86 #include <kern/locks.h>
  87 #include <machine/limits.h>
  88
  89 int                     so_cache_hw = 0;
  90 int                     so_cache_timeouts = 0;
  91 int                     so_cache_max_freed = 0;
  92 int                     cached_sock_count = 0;
  93 struct socket           *socket_cache_head = 0;
  94 struct socket           *socket_cache_tail = 0;
  95 u_long                  so_cache_time = 0;
  96 int                     so_cache_init_done = 0;
  97 struct zone             *so_cache_zone;
  98 extern int              get_inpcb_str_size();
  99 extern int              get_tcp_str_size();
 100
 101 static lck_grp_t                *so_cache_mtx_grp;
 102 static lck_attr_t               *so_cache_mtx_attr;
 103 static lck_grp_attr_t   *so_cache_mtx_grp_attr;
 104 lck_mtx_t                               *so_cache_mtx;
 105
 106 #include <machine/limits.h>
 107
 108 static void     filt_sordetach(struct knote *kn);
 109 static int      filt_soread(struct knote *kn, long hint);
 110 static void     filt_sowdetach(struct knote *kn);
 111 static int      filt_sowrite(struct knote *kn, long hint);
 112 static int      filt_solisten(struct knote *kn, long hint);
 113
 114 static struct filterops solisten_filtops =
 115   { 1, NULL, filt_sordetach, filt_solisten };
 116 static struct filterops soread_filtops =
 117   { 1, NULL, filt_sordetach, filt_soread };
 118 static struct filterops sowrite_filtops =
 119   { 1, NULL, filt_sowdetach, filt_sowrite };
 120
 121 #define EVEN_MORE_LOCKING_DEBUG 0
 122 int socket_debug = 0;
 123 int socket_zone = M_SOCKET;
 124 so_gen_t        so_gencnt;      /* generation count for sockets */
 125
 126 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
 127 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
 128
 129 #define DBG_LAYER_IN_BEG        NETDBG_CODE(DBG_NETSOCK, 0)
 130 #define DBG_LAYER_IN_END        NETDBG_CODE(DBG_NETSOCK, 2)
 131 #define DBG_LAYER_OUT_BEG       NETDBG_CODE(DBG_NETSOCK, 1)
 132 #define DBG_LAYER_OUT_END       NETDBG_CODE(DBG_NETSOCK, 3)
 133 #define DBG_FNC_SOSEND          NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1)
 134 #define DBG_FNC_SORECEIVE       NETDBG_CODE(DBG_NETSOCK, (8 << 8))
 135 #define DBG_FNC_SOSHUTDOWN      NETDBG_CODE(DBG_NETSOCK, (9 << 8))
 136
 137 #define MAX_SOOPTGETM_SIZE      (128 * MCLBYTES)
 138
 139
 140 SYSCTL_DECL(_kern_ipc);
 141
 142 static int somaxconn = SOMAXCONN;
 143 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn,
 144            0, "");
 145
 146 /* Should we get a maximum also ??? */
 147 static int sosendmaxchain = 65536;
 148 static int sosendminchain = 16384;
 149 static int sorecvmincopy  = 16384;
 150 SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW, &sosendminchain,
 151            0, "");
 152 SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, CTLFLAG_RW, &sorecvmincopy,
 153            0, "");
 154
 155 void  so_cache_timer();
 156
 157 /*
 158  * Socket operation routines.
 159  * These routines are called by the routines in
 160  * sys_socket.c or from a system process, and
 161  * implement the semantics of socket operations by
 162  * switching out to the protocol specific routines.
 163  */
 164
 165 #ifdef __APPLE__
 166
 167 vm_size_t       so_cache_zone_element_size;
 168
 169 static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid);
 170
 171
 172 void socketinit()
 173 {
 174     vm_size_t   str_size;
 175
 176         if (so_cache_init_done) {
 177                 printf("socketinit: already called...\n");
 178                 return;
 179         }
 180
 181         /*
 182          * allocate lock group attribute and group for socket cache mutex
 183          */
 184         so_cache_mtx_grp_attr = lck_grp_attr_alloc_init();
 185         lck_grp_attr_setdefault(so_cache_mtx_grp_attr);
 186
 187         so_cache_mtx_grp = lck_grp_alloc_init("so_cache", so_cache_mtx_grp_attr);
 188
 189         /*
 190          * allocate the lock attribute for socket cache mutex
 191          */
 192         so_cache_mtx_attr = lck_attr_alloc_init();
 193         lck_attr_setdefault(so_cache_mtx_attr);
 194
 195     so_cache_init_done = 1;
 196
 197     so_cache_mtx = lck_mtx_alloc_init(so_cache_mtx_grp, so_cache_mtx_attr);     /* cached sockets mutex */
 198
 199     if (so_cache_mtx == NULL)
 200                 return; /* we're hosed... */
 201
 202     str_size = (vm_size_t)( sizeof(struct socket) + 4 +
 203                             get_inpcb_str_size()  + 4 +
 204                             get_tcp_str_size());
 205     so_cache_zone = zinit (str_size, 120000*str_size, 8192, "socache zone");
 206 #if TEMPDEBUG
 207     printf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size);
 208 #endif
 209     timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
 210
 211     so_cache_zone_element_size = str_size;
 212
 213     sflt_init();
 214
 215 }
 216
 217 void   cached_sock_alloc(so, waitok)
 218 struct socket **so;
 219 int           waitok;
 220
 221 {
 222     caddr_t     temp;
 223     register u_long  offset;
 224
 225
 226         lck_mtx_lock(so_cache_mtx);
 227
 228     if (cached_sock_count) {
 229             cached_sock_count--;
 230             *so = socket_cache_head;
 231             if (*so == 0)
 232                     panic("cached_sock_alloc: cached sock is null");
 233
 234             socket_cache_head = socket_cache_head->cache_next;
 235             if (socket_cache_head)
 236                     socket_cache_head->cache_prev = 0;
 237             else
 238                     socket_cache_tail = 0;
 239
 240                 lck_mtx_unlock(so_cache_mtx);
 241
 242             temp = (*so)->so_saved_pcb;
 243             bzero((caddr_t)*so, sizeof(struct socket));
 244 #if TEMPDEBUG
 245             kprintf("cached_sock_alloc - retreiving cached sock %x - count == %d\n", *so,
 246                    cached_sock_count);
 247 #endif
 248             (*so)->so_saved_pcb = temp;
 249             (*so)->cached_in_sock_layer = 1;
 250
 251     }
 252     else {
 253 #if TEMPDEBUG
 254             kprintf("Allocating cached sock %x from memory\n", *so);
 255 #endif
 256
 257             lck_mtx_unlock(so_cache_mtx);
 258
 259             if (waitok)
 260                  *so = (struct socket *) zalloc(so_cache_zone);
 261             else
 262                  *so = (struct socket *) zalloc_noblock(so_cache_zone);
 263
 264             if (*so == 0)
 265                  return;
 266
 267             bzero((caddr_t)*so, sizeof(struct socket));
 268
 269             /*
 270              * Define offsets for extra structures into our single block of
 271              * memory. Align extra structures on longword boundaries.
 272              */
 273
 274
 275             offset = (u_long) *so;
 276             offset += sizeof(struct socket);
 277             if (offset & 0x3) {
 278                 offset += 4;
 279                 offset &= 0xfffffffc;
 280             }
 281             (*so)->so_saved_pcb = (caddr_t) offset;
 282             offset += get_inpcb_str_size();
 283             if (offset & 0x3) {
 284                 offset += 4;
 285                 offset &= 0xfffffffc;
 286             }
 287
 288             ((struct inpcb *) (*so)->so_saved_pcb)->inp_saved_ppcb = (caddr_t) offset;
 289 #if TEMPDEBUG
 290             kprintf("Allocating cached socket - %x, pcb=%x tcpcb=%x\n", *so,
 291                     (*so)->so_saved_pcb,
 292                     ((struct inpcb *)(*so)->so_saved_pcb)->inp_saved_ppcb);
 293 #endif
 294     }
 295
 296     (*so)->cached_in_sock_layer = 1;
 297 }
 298
 299
 300 void cached_sock_free(so)
 301 struct socket *so;
 302 {
 303
 304         lck_mtx_lock(so_cache_mtx);
 305
 306         if (++cached_sock_count > MAX_CACHED_SOCKETS) {
 307                 --cached_sock_count;
 308                 lck_mtx_unlock(so_cache_mtx);
 309 #if TEMPDEBUG
 310                 kprintf("Freeing overflowed cached socket %x\n", so);
 311 #endif
 312                 zfree(so_cache_zone, so);
 313         }
 314         else {
 315 #if TEMPDEBUG
 316                 kprintf("Freeing socket %x into cache\n", so);
 317 #endif
 318                 if (so_cache_hw < cached_sock_count)
 319                         so_cache_hw = cached_sock_count;
 320
 321                 so->cache_next = socket_cache_head;
 322                 so->cache_prev = 0;
 323                 if (socket_cache_head)
 324                         socket_cache_head->cache_prev = so;
 325                 else
 326                         socket_cache_tail = so;
 327
 328                 so->cache_timestamp = so_cache_time;
 329                 socket_cache_head = so;
 330                 lck_mtx_unlock(so_cache_mtx);
 331         }
 332
 333 #if TEMPDEBUG
 334         kprintf("Freed cached sock %x into cache - count is %d\n", so, cached_sock_count);
 335 #endif
 336
 337
 338 }
 339
 340
 341 void so_cache_timer()
 342 {
 343         register struct socket  *p;
 344         register int            n_freed = 0;
 345
 346
 347         lck_mtx_lock(so_cache_mtx);
 348
 349         ++so_cache_time;
 350
 351         while ( (p = socket_cache_tail) )
 352         {
 353                 if ((so_cache_time - p->cache_timestamp) < SO_CACHE_TIME_LIMIT)
 354                         break;
 355
 356                 so_cache_timeouts++;
 357
 358                 if ( (socket_cache_tail = p->cache_prev) )
 359                         p->cache_prev->cache_next = 0;
 360                 if (--cached_sock_count == 0)
 361                         socket_cache_head = 0;
 362
 363
 364                 zfree(so_cache_zone, p);
 365
 366                 if (++n_freed >= SO_CACHE_MAX_FREE_BATCH)
 367                 {
 368                         so_cache_max_freed++;
 369                         break;
 370                 }
 371         }
 372         lck_mtx_unlock(so_cache_mtx);
 373
 374         timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
 375
 376
 377 }
 378 #endif /* __APPLE__ */
 379
 380 /*
 381  * Get a socket structure from our zone, and initialize it.
 382  * We don't implement `waitok' yet (see comments in uipc_domain.c).
 383  * Note that it would probably be better to allocate socket
 384  * and PCB at the same time, but I'm not convinced that all
 385  * the protocols can be easily modified to do this.
 386  */
 387 struct socket *
 388 soalloc(waitok, dom, type)
 389         int waitok;
 390         int dom;
 391         int type;
 392 {
 393         struct socket *so;
 394
 395         if ((dom == PF_INET) && (type == SOCK_STREAM))
 396             cached_sock_alloc(&so, waitok);
 397         else
 398         {
 399              MALLOC_ZONE(so, struct socket *, sizeof(*so), socket_zone, M_WAITOK);
 400              if (so)
 401                   bzero(so, sizeof *so);
 402         }
 403         /* XXX race condition for reentrant kernel */
 404 //###LD Atomic add for so_gencnt
 405         if (so) {
 406              so->so_gencnt = ++so_gencnt;
 407              so->so_zone = socket_zone;
 408         }
 409
 410         return so;
 411 }
 412
 413 int
 414 socreate(dom, aso, type, proto)
 415         int dom;
 416         struct socket **aso;
 417         register int type;
 418         int proto;
 419 {
 420         struct proc *p = current_proc();
 421         register struct protosw *prp;
 422         register struct socket *so;
 423         register int error = 0;
 424 #if TCPDEBUG
 425         extern int tcpconsdebug;
 426 #endif
 427         if (proto)
 428                 prp = pffindproto(dom, proto, type);
 429         else
 430                 prp = pffindtype(dom, type);
 431
 432         if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
 433                 return (EPROTONOSUPPORT);
 434 #ifndef __APPLE__
 435
 436         if (p->p_prison && jail_socket_unixiproute_only &&
 437             prp->pr_domain->dom_family != PF_LOCAL &&
 438             prp->pr_domain->dom_family != PF_INET &&
 439             prp->pr_domain->dom_family != PF_ROUTE) {
 440                 return (EPROTONOSUPPORT);
 441         }
 442
 443 #endif
 444         if (prp->pr_type != type)
 445                 return (EPROTOTYPE);
 446         so = soalloc(p != 0, dom, type);
 447         if (so == 0)
 448                 return (ENOBUFS);
 449
 450         TAILQ_INIT(&so->so_incomp);
 451         TAILQ_INIT(&so->so_comp);
 452         so->so_type = type;
 453
 454 #ifdef __APPLE__
 455         if (p != 0) {
 456                 so->so_uid = kauth_cred_getuid(kauth_cred_get());
 457                 if (!suser(kauth_cred_get(),NULL))
 458                         so->so_state = SS_PRIV;
 459         }
 460 #else
 461         so->so_cred = kauth_cred_get_with_ref();
 462 #endif
 463         so->so_proto = prp;
 464 #ifdef __APPLE__
 465         so->so_rcv.sb_flags |= SB_RECV; /* XXX */
 466         so->so_rcv.sb_so = so->so_snd.sb_so = so;
 467 #endif
 468
 469 //### Attachement will create the per pcb lock if necessary and increase refcount
 470
 471         error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
 472         if (error) {
 473                 /*
 474                  * Warning:
 475                  * If so_pcb is not zero, the socket will be leaked,
 476                  * so protocol attachment handler must be coded carefuly
 477                  */
 478                 so->so_state |= SS_NOFDREF;
 479                 sofreelastref(so, 1);
 480                 return (error);
 481         }
 482         so->so_usecount++;
 483 #ifdef __APPLE__
 484         prp->pr_domain->dom_refs++;
 485         TAILQ_INIT(&so->so_evlist);
 486
 487         /* Attach socket filters for this protocol */
 488         sflt_initsock(so);
 489 #if TCPDEBUG
 490         if (tcpconsdebug == 2)
 491                 so->so_options |= SO_DEBUG;
 492 #endif
 493 #endif
 494
 495         *aso = so;
 496         return (0);
 497 }
 498
 499 int
 500 sobind(so, nam)
 501         struct socket *so;
 502         struct sockaddr *nam;
 503
 504 {
 505         struct proc *p = current_proc();
 506         int error = 0;
 507         struct socket_filter_entry      *filter;
 508         int                                                     filtered = 0;
 509
 510         socket_lock(so, 1);
 511
 512         /* Socket filter */
 513         error = 0;
 514         for (filter = so->so_filt; filter && (error == 0);
 515                  filter = filter->sfe_next_onsocket) {
 516                 if (filter->sfe_filter->sf_filter.sf_bind) {
 517                         if (filtered == 0) {
 518                                 filtered = 1;
 519                                 sflt_use(so);
 520                                 socket_unlock(so, 0);
 521                         }
 522                         error = filter->sfe_filter->sf_filter.sf_bind(
 523                                                 filter->sfe_cookie, so, nam);
 524                 }
 525         }
 526         if (filtered != 0) {
 527                 socket_lock(so, 0);
 528                 sflt_unuse(so);
 529         }
 530         /* End socket filter */
 531
 532         if (error == 0)
 533                 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
 534
 535         socket_unlock(so, 1);
 536
 537         if (error == EJUSTRETURN)
 538                 error = 0;
 539
 540         return (error);
 541 }
 542
 543 void
 544 sodealloc(so)
 545         struct socket *so;
 546 {
 547         so->so_gencnt = ++so_gencnt;
 548
 549 #ifndef __APPLE__
 550         if (so->so_rcv.sb_hiwat)
 551                 (void)chgsbsize(so->so_cred->cr_uidinfo,
 552                     &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY);
 553         if (so->so_snd.sb_hiwat)
 554                 (void)chgsbsize(so->so_cred->cr_uidinfo,
 555                     &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
 556 #ifdef INET
 557         if (so->so_accf != NULL) {
 558                 if (so->so_accf->so_accept_filter != NULL &&
 559                         so->so_accf->so_accept_filter->accf_destroy != NULL) {
 560                         so->so_accf->so_accept_filter->accf_destroy(so);
 561                 }
 562                 if (so->so_accf->so_accept_filter_str != NULL)
 563                         FREE(so->so_accf->so_accept_filter_str, M_ACCF);
 564                 FREE(so->so_accf, M_ACCF);
 565         }
 566 #endif /* INET */
 567         kauth_cred_rele(so->so_cred);
 568         zfreei(so->so_zone, so);
 569 #else
 570         if (so->cached_in_sock_layer == 1)
 571              cached_sock_free(so);
 572         else {
 573              if (so->cached_in_sock_layer == -1)
 574                         panic("sodealloc: double dealloc: so=%x\n", so);
 575              so->cached_in_sock_layer = -1;
 576              FREE_ZONE(so, sizeof(*so), so->so_zone);
 577         }
 578 #endif /* __APPLE__ */
 579 }
 580
 581 int
 582 solisten(so, backlog)
 583         register struct socket *so;
 584         int backlog;
 585
 586 {
 587         struct proc *p = current_proc();
 588         int error;
 589
 590         socket_lock(so, 1);
 591
 592         {
 593                 struct socket_filter_entry      *filter;
 594                 int                                                     filtered = 0;
 595                 error = 0;
 596                 for (filter = so->so_filt; filter && (error == 0);
 597                          filter = filter->sfe_next_onsocket) {
 598                         if (filter->sfe_filter->sf_filter.sf_listen) {
 599                                 if (filtered == 0) {
 600                                         filtered = 1;
 601                                         sflt_use(so);
 602                                         socket_unlock(so, 0);
 603                                 }
 604                                 error = filter->sfe_filter->sf_filter.sf_listen(
 605                                                         filter->sfe_cookie, so);
 606                         }
 607                 }
 608                 if (filtered != 0) {
 609                         socket_lock(so, 0);
 610                         sflt_unuse(so);
 611                 }
 612         }
 613
 614         if (error == 0) {
 615                 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
 616         }
 617
 618         if (error) {
 619                 socket_unlock(so, 1);
 620                 if (error == EJUSTRETURN)
 621                         error = 0;
 622                 return (error);
 623         }
 624
 625         if (TAILQ_EMPTY(&so->so_comp))
 626                 so->so_options |= SO_ACCEPTCONN;
 627         if (backlog < 0 || backlog > somaxconn)
 628                 backlog = somaxconn;
 629         so->so_qlimit = backlog;
 630
 631         socket_unlock(so, 1);
 632         return (0);
 633 }
 634
 635 void
 636 sofreelastref(so, dealloc)
 637         register struct socket *so;
 638         int dealloc;
 639 {
 640         int error;
 641         struct socket *head = so->so_head;
 642
 643         /*### Assume socket is locked */
 644
 645         if ((!(so->so_flags & SOF_PCBCLEARING)) || ((so->so_state & SS_NOFDREF) == 0)) {
 646 #ifdef __APPLE__
 647                 selthreadclear(&so->so_snd.sb_sel);
 648                 selthreadclear(&so->so_rcv.sb_sel);
 649 #endif
 650                 return;
 651         }
 652         if (head != NULL) {
 653                 socket_lock(head, 1);
 654                 if (so->so_state & SS_INCOMP) {
 655                         TAILQ_REMOVE(&head->so_incomp, so, so_list);
 656                         head->so_incqlen--;
 657                 } else if (so->so_state & SS_COMP) {
 658                         /*
 659                          * We must not decommission a socket that's
 660                          * on the accept(2) queue.  If we do, then
 661                          * accept(2) may hang after select(2) indicated
 662                          * that the listening socket was ready.
 663                          */
 664 #ifdef __APPLE__
 665                         selthreadclear(&so->so_snd.sb_sel);
 666                         selthreadclear(&so->so_rcv.sb_sel);
 667 #endif
 668                         socket_unlock(head, 1);
 669                         return;
 670                 } else {
 671                         panic("sofree: not queued");
 672                 }
 673                 head->so_qlen--;
 674                 so->so_state &= ~SS_INCOMP;
 675                 so->so_head = NULL;
 676                 socket_unlock(head, 1);
 677         }
 678 #ifdef __APPLE__
 679         selthreadclear(&so->so_snd.sb_sel);
 680         sbrelease(&so->so_snd);
 681 #endif
 682         sorflush(so);
 683
 684         /* 3932268: disable upcall */
 685         so->so_rcv.sb_flags &= ~SB_UPCALL;
 686         so->so_snd.sb_flags &= ~SB_UPCALL;
 687
 688         if (dealloc)
 689                 sodealloc(so);
 690 }
 691
 692 /*
 693  * Close a socket on last file table reference removal.
 694  * Initiate disconnect if connected.
 695  * Free socket when disconnect complete.
 696  */
 697 int
 698 soclose_locked(so)
 699         register struct socket *so;
 700 {
 701         int error = 0;
 702         lck_mtx_t * mutex_held;
 703         struct timespec ts;
 704
 705         if (so->so_usecount == 0) {
 706                 panic("soclose: so=%x refcount=0\n", so);
 707         }
 708
 709         sflt_notify(so, sock_evt_closing, NULL);
 710
 711         if ((so->so_options & SO_ACCEPTCONN)) {
 712                 struct socket *sp;
 713
 714                 /* We do not want new connection to be added to the connection queues */
 715                 so->so_options &= ~SO_ACCEPTCONN;
 716
 717                 while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) {
 718                         /* A bit tricky here. We need to keep
 719                          * a lock if it's a protocol global lock
 720                          * but we want the head, not the socket locked
 721                          * in the case of per-socket lock...
 722                          */
 723                         if (so->so_proto->pr_getlock != NULL)
 724                                 socket_lock(sp, 1);
 725                         if (so->so_proto->pr_getlock != NULL)
 726                                 socket_unlock(so, 0);
 727                         (void) soabort(sp);
 728                         if (so->so_proto->pr_getlock != NULL)
 729                                 socket_lock(so, 0);
 730                         if (so->so_proto->pr_getlock != NULL)
 731                                 socket_unlock(sp, 1);
 732                 }
 733
 734                 while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) {
 735                         if (so->so_proto->pr_getlock != NULL)
 736                                 socket_lock(sp, 1);
 737
 738                         /* Dequeue from so_comp since sofree() won't do it */
 739                         TAILQ_REMOVE(&so->so_comp, sp, so_list);
 740                         so->so_qlen--;
 741                         sp->so_state &= ~SS_COMP;
 742                         sp->so_head = NULL;
 743
 744                         if (so->so_proto->pr_getlock != NULL)
 745                                 socket_unlock(so, 0);
 746                         (void) soabort(sp);
 747                         if (so->so_proto->pr_getlock != NULL)
 748                                 socket_lock(so, 0);
 749                         if (so->so_proto->pr_getlock != NULL)
 750                                 socket_unlock(sp, 1);
 751                 }
 752         }
 753         if (so->so_pcb == 0) {
 754                 /* 3915887: mark the socket as ready for dealloc */
 755                 so->so_flags |= SOF_PCBCLEARING;
 756                 goto discard;
 757         }
 758         if (so->so_state & SS_ISCONNECTED) {
 759                 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
 760                         error = sodisconnectlocked(so);
 761                         if (error)
 762                                 goto drop;
 763                 }
 764                 if (so->so_options & SO_LINGER) {
 765                         if ((so->so_state & SS_ISDISCONNECTING) &&
 766                             (so->so_state & SS_NBIO))
 767                                 goto drop;
 768                         if (so->so_proto->pr_getlock != NULL)
 769                                 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
 770                         else
 771                                 mutex_held = so->so_proto->pr_domain->dom_mtx;
 772                         while (so->so_state & SS_ISCONNECTED) {
 773                                 ts.tv_sec = (so->so_linger/100);
 774                                 ts.tv_nsec = (so->so_linger % 100) * NSEC_PER_USEC * 1000 * 10;
 775                                 error = msleep((caddr_t)&so->so_timeo, mutex_held,
 776                                     PSOCK | PCATCH, "soclos", &ts);
 777                                 if (error) {
 778                                         /* It's OK when the time fires, don't report an error */
 779                                         if (error == EWOULDBLOCK)
 780                                                 error = 0;
 781                                         break;
 782                                 }
 783                         }
 784                 }
 785         }
 786 drop:
 787         if (so->so_usecount == 0)
 788                 panic("soclose: usecount is zero so=%x\n", so);
 789         if (so->so_pcb && !(so->so_flags & SOF_PCBCLEARING)) {
 790                 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
 791                 if (error == 0)
 792                         error = error2;
 793         }
 794         if (so->so_usecount <= 0)
 795                 panic("soclose: usecount is zero so=%x\n", so);
 796 discard:
 797         if (so->so_pcb && so->so_state & SS_NOFDREF)
 798                 panic("soclose: NOFDREF");
 799         so->so_state |= SS_NOFDREF;
 800 #ifdef __APPLE__
 801         so->so_proto->pr_domain->dom_refs--;
 802         evsofree(so);
 803 #endif
 804         so->so_usecount--;
 805         sofree(so);
 806         return (error);
 807 }
 808
 809 int
 810 soclose(so)
 811         register struct socket *so;
 812 {
 813         int error = 0;
 814         socket_lock(so, 1);
 815         if (so->so_retaincnt == 0)
 816                 error = soclose_locked(so);
 817         else {  /* if the FD is going away, but socket is retained in kernel remove its reference */
 818                 so->so_usecount--;
 819                 if (so->so_usecount < 2)
 820                         panic("soclose: retaincnt non null and so=%x usecount=%x\n", so->so_usecount);
 821         }
 822         socket_unlock(so, 1);
 823         return (error);
 824 }
 825
 826
 827 /*
 828  * Must be called at splnet...
 829  */
 830 //#### Should already be locked
 831 int
 832 soabort(so)
 833         struct socket *so;
 834 {
 835         int error;
 836
 837 #ifdef MORE_LOCKING_DEBUG
 838         lck_mtx_t * mutex_held;
 839
 840         if (so->so_proto->pr_getlock != NULL)
 841                 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
 842         else
 843                 mutex_held = so->so_proto->pr_domain->dom_mtx;
 844         lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
 845 #endif
 846
 847         error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
 848         if (error) {
 849                 sofree(so);
 850                 return error;
 851         }
 852         return (0);
 853 }
 854
 855 int
 856 soacceptlock(so, nam, dolock)
 857         register struct socket *so;
 858         struct sockaddr **nam;
 859         int dolock;
 860 {
 861         int error;
 862
 863         if (dolock) socket_lock(so, 1);
 864
 865         if ((so->so_state & SS_NOFDREF) == 0)
 866                 panic("soaccept: !NOFDREF");
 867         so->so_state &= ~SS_NOFDREF;
 868         error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
 869
 870         if (dolock) socket_unlock(so, 1);
 871         return (error);
 872 }
 873 int
 874 soaccept(so, nam)
 875         register struct socket *so;
 876         struct sockaddr **nam;
 877 {
 878         return (soacceptlock(so, nam, 1));
 879 }
 880
 881 int
 882 soconnectlock(so, nam, dolock)
 883         register struct socket *so;
 884         struct sockaddr *nam;
 885         int dolock;
 886
 887 {
 888         int s;
 889         int error;
 890         struct proc *p = current_proc();
 891
 892         if (dolock) socket_lock(so, 1);
 893
 894         if (so->so_options & SO_ACCEPTCONN) {
 895                 if (dolock) socket_unlock(so, 1);
 896                 return (EOPNOTSUPP);
 897         }
 898         /*
 899          * If protocol is connection-based, can only connect once.
 900          * Otherwise, if connected, try to disconnect first.
 901          * This allows user to disconnect by connecting to, e.g.,
 902          * a null address.
 903          */
 904         if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
 905             ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
 906             (error = sodisconnectlocked(so))))
 907                 error = EISCONN;
 908         else {
 909                 /*
 910                  * Run connect filter before calling protocol:
 911                  *  - non-blocking connect returns before completion;
 912                  */
 913                 {
 914                         struct socket_filter_entry      *filter;
 915                         int                                                     filtered = 0;
 916                         error = 0;
 917                         for (filter = so->so_filt; filter && (error == 0);
 918                                  filter = filter->sfe_next_onsocket) {
 919                                 if (filter->sfe_filter->sf_filter.sf_connect_out) {
 920                                         if (filtered == 0) {
 921                                                 filtered = 1;
 922                                                 sflt_use(so);
 923                                                 socket_unlock(so, 0);
 924                                         }
 925                                         error = filter->sfe_filter->sf_filter.sf_connect_out(
 926                                                                 filter->sfe_cookie, so, nam);
 927                                 }
 928                         }
 929                         if (filtered != 0) {
 930                                 socket_lock(so, 0);
 931                                 sflt_unuse(so);
 932                         }
 933                 }
 934                 if (error) {
 935                         if (error == EJUSTRETURN)
 936                                 error = 0;
 937                         if (dolock) socket_unlock(so, 1);
 938                         return error;
 939                 }
 940
 941                 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
 942         }
 943         if (dolock) socket_unlock(so, 1);
 944         return (error);
 945 }
 946
 947 int
 948 soconnect(so, nam)
 949         register struct socket *so;
 950         struct sockaddr *nam;
 951 {
 952         return (soconnectlock(so, nam, 1));
 953 }
 954
 955 int
 956 soconnect2(so1, so2)
 957         register struct socket *so1;
 958         struct socket *so2;
 959 {
 960         int error;
 961 //####### Assumes so1 is already locked /
 962
 963         socket_lock(so2, 1);
 964
 965         error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
 966
 967         socket_unlock(so2, 1);
 968         return (error);
 969 }
 970
 971
 972 int
 973 sodisconnectlocked(so)
 974         register struct socket *so;
 975 {
 976         int error;
 977
 978         if ((so->so_state & SS_ISCONNECTED) == 0) {
 979                 error = ENOTCONN;
 980                 goto bad;
 981         }
 982         if (so->so_state & SS_ISDISCONNECTING) {
 983                 error = EALREADY;
 984                 goto bad;
 985         }
 986
 987         error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
 988
 989         if (error == 0) {
 990                 sflt_notify(so, sock_evt_disconnected, NULL);
 991         }
 992
 993 bad:
 994         return (error);
 995 }
 996 //### Locking version
 997 int
 998 sodisconnect(so)
 999         register struct socket *so;
1000 {
1001         int error;
1002
1003         socket_lock(so, 1);
1004         error = sodisconnectlocked(so);
1005         socket_unlock(so, 1);
1006         return(error);
1007 }
1008
1009 #define SBLOCKWAIT(f)   (((f) & MSG_DONTWAIT) ? M_DONTWAIT : M_WAIT)
1010
1011 /*
1012  * sosendcheck will lock the socket buffer if it isn't locked and
1013  * verify that there is space for the data being inserted.
1014  */
1015
1016 static int
1017 sosendcheck(
1018         struct socket *so,
1019         struct sockaddr *addr,
1020         long resid,
1021         long clen,
1022         long atomic,
1023         int flags,
1024         int *sblocked)
1025 {
1026         int error = 0;
1027         long space;
1028
1029 restart:
1030         if (*sblocked == 0) {
1031                 error = sblock(&so->so_snd, SBLOCKWAIT(flags));
1032                 if (error)
1033                         return error;
1034                 *sblocked = 1;
1035         }
1036
1037         if (so->so_state & SS_CANTSENDMORE)
1038                 return EPIPE;
1039
1040         if (so->so_error) {
1041                 error = so->so_error;
1042                 so->so_error = 0;
1043                 return error;
1044         }
1045
1046         if ((so->so_state & SS_ISCONNECTED) == 0) {
1047                 /*
1048                  * `sendto' and `sendmsg' is allowed on a connection-
1049                  * based socket if it supports implied connect.
1050                  * Return ENOTCONN if not connected and no address is
1051                  * supplied.
1052                  */
1053                 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
1054                         (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
1055                         if ((so->so_state & SS_ISCONFIRMING) == 0 &&
1056                                 !(resid == 0 && clen != 0))
1057                                 return ENOTCONN;
1058                 } else if (addr == 0 && !(flags&MSG_HOLD))
1059                         return (so->so_proto->pr_flags & PR_CONNREQUIRED) ? ENOTCONN : EDESTADDRREQ;
1060         }
1061         space = sbspace(&so->so_snd);
1062         if (flags & MSG_OOB)
1063                 space += 1024;
1064         if ((atomic && resid > so->so_snd.sb_hiwat) ||
1065                 clen > so->so_snd.sb_hiwat)
1066                 return EMSGSIZE;
1067         if (space < resid + clen &&
1068                 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
1069                 if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO))
1070                         return EWOULDBLOCK;
1071                 sbunlock(&so->so_snd, 1);
1072                 error = sbwait(&so->so_snd);
1073                 if (error) {
1074                         return error;
1075                 }
1076                 goto restart;
1077         }
1078
1079         return 0;
1080 }
1081
1082 /*
1083  * Send on a socket.
1084  * If send must go all at once and message is larger than
1085  * send buffering, then hard error.
1086  * Lock against other senders.
1087  * If must go all at once and not enough room now, then
1088  * inform user that this would block and do nothing.
1089  * Otherwise, if nonblocking, send as much as possible.
1090  * The data to be sent is described by "uio" if nonzero,
1091  * otherwise by the mbuf chain "top" (which must be null
1092  * if uio is not).  Data provided in mbuf chain must be small
1093  * enough to send all at once.
1094  *
1095  * Returns nonzero on error, timeout or signal; callers
1096  * must check for short counts if EINTR/ERESTART are returned.
1097  * Data and control buffers are freed on return.
1098  * Experiment:
1099  * MSG_HOLD: go thru most of sosend(), but just enqueue the mbuf
1100  * MSG_SEND: go thru as for MSG_HOLD on current fragment, then
1101  *  point at the mbuf chain being constructed and go from there.
1102  */
1103 int
1104 sosend(so, addr, uio, top, control, flags)
1105         register struct socket *so;
1106         struct sockaddr *addr;
1107         struct uio *uio;
1108         struct mbuf *top;
1109         struct mbuf *control;
1110         int flags;
1111
1112 {
1113         struct mbuf **mp;
1114         register struct mbuf *m, *freelist = NULL;
1115         register long space, len, resid;
1116         int clen = 0, error, dontroute, mlen, sendflags;
1117         int atomic = sosendallatonce(so) || top;
1118         int sblocked = 0;
1119         struct proc *p = current_proc();
1120
1121         if (uio)
1122                 // LP64todo - fix this!
1123                 resid = uio_resid(uio);
1124         else
1125                 resid = top->m_pkthdr.len;
1126
1127         KERNEL_DEBUG((DBG_FNC_SOSEND | DBG_FUNC_START),
1128                      so,
1129                      resid,
1130                      so->so_snd.sb_cc,
1131                      so->so_snd.sb_lowat,
1132                      so->so_snd.sb_hiwat);
1133
1134         socket_lock(so, 1);
1135
1136         /*
1137          * In theory resid should be unsigned.
1138          * However, space must be signed, as it might be less than 0
1139          * if we over-committed, and we must use a signed comparison
1140          * of space and resid.  On the other hand, a negative resid
1141          * causes us to loop sending 0-length segments to the protocol.
1142          *
1143          * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
1144          * type sockets since that's an error.
1145          */
1146         if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
1147                 error = EINVAL;
1148                 socket_unlock(so, 1);
1149                 goto out;
1150         }
1151
1152         dontroute =
1153             (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
1154             (so->so_proto->pr_flags & PR_ATOMIC);
1155         if (p)
1156                 p->p_stats->p_ru.ru_msgsnd++;
1157         if (control)
1158                 clen = control->m_len;
1159
1160         do {
1161                 error = sosendcheck(so, addr, resid, clen, atomic, flags, &sblocked);
1162                 if (error) {
1163                         if (sblocked)
1164                                 goto release;
1165                         else {
1166                                 socket_unlock(so, 1);
1167                                 goto out;
1168                         }
1169                 }
1170                 mp = &top;
1171                 space = sbspace(&so->so_snd) - clen + ((flags & MSG_OOB) ? 1024 : 0);
1172
1173                 do {
1174
1175                     if (uio == NULL) {
1176                                 /*
1177                                  * Data is prepackaged in "top".
1178                                  */
1179                                 resid = 0;
1180                                 if (flags & MSG_EOR)
1181                                         top->m_flags |= M_EOR;
1182                         } else {
1183                                 int             chainlength;
1184                                 int             bytes_to_copy;
1185
1186                                 bytes_to_copy = min(resid, space);
1187
1188                                 if (sosendminchain > 0) {
1189                                         chainlength = 0;
1190                                 } else
1191                                         chainlength = sosendmaxchain;
1192
1193                                 socket_unlock(so, 0);
1194
1195                                 do {
1196                                         int num_needed;
1197                                         int hdrs_needed = (top == 0) ? 1 : 0;
1198
1199                                         /*
1200                                          * try to maintain a local cache of mbuf clusters needed to complete this write
1201                                          * the list is further limited to the number that are currently needed to fill the socket
1202                                          * this mechanism allows a large number of mbufs/clusters to be grabbed under a single
1203                                          * mbuf lock... if we can't get any clusters, than fall back to trying for mbufs
1204                                          * if we fail early (or miscalcluate the number needed) make sure to release any clusters
1205                                          * we haven't yet consumed.
1206                                          */
1207                                         if (freelist == NULL && bytes_to_copy > MCLBYTES) {
1208                                                 num_needed = bytes_to_copy / NBPG;
1209
1210                                                 if ((bytes_to_copy - (num_needed * NBPG)) >= MINCLSIZE)
1211                                                         num_needed++;
1212
1213                                                 freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, NBPG);
1214                                                 /* Fall back to cluster size if allocation failed */
1215                                         }
1216
1217                                         if (freelist == NULL && bytes_to_copy > MINCLSIZE) {
1218                                                 num_needed = bytes_to_copy / MCLBYTES;
1219
1220                                                 if ((bytes_to_copy - (num_needed * MCLBYTES)) >= MINCLSIZE)
1221                                                         num_needed++;
1222
1223                                                 freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, MCLBYTES);
1224                                                 /* Fall back to a single mbuf if allocation failed */
1225                                         }
1226
1227                                         if (freelist == NULL) {
1228                                                 if (top == 0)
1229                                                         MGETHDR(freelist, M_WAIT, MT_DATA);
1230                                                 else
1231                                                         MGET(freelist, M_WAIT, MT_DATA);
1232
1233                                                 if (freelist == NULL) {
1234                                                         error = ENOBUFS;
1235                                                         socket_lock(so, 0);
1236                                                         if (sblocked) {
1237                                                                 goto release;
1238                                                         } else {
1239                                                                 socket_unlock(so, 1);
1240                                                                 goto out;
1241                                                         }
1242                                                 }
1243                                                 /*
1244                                                  * For datagram protocols, leave room
1245                                                  * for protocol headers in first mbuf.
1246                                                  */
1247                                                 if (atomic && top == 0 && bytes_to_copy < MHLEN)
1248                                                         MH_ALIGN(freelist, bytes_to_copy);
1249                                         }
1250                                         m = freelist;
1251                                         freelist = m->m_next;
1252                                         m->m_next = NULL;
1253
1254                                         if ((m->m_flags & M_EXT))
1255                                                 mlen = m->m_ext.ext_size;
1256                                         else if ((m->m_flags & M_PKTHDR))
1257                                                 mlen = MHLEN - m_leadingspace(m);
1258                                         else
1259                                                 mlen = MLEN;
1260                                         len = min(mlen, bytes_to_copy);
1261
1262                                         chainlength += len;
1263
1264                                         space -= len;
1265
1266                                         error = uiomove(mtod(m, caddr_t), (int)len, uio);
1267
1268                                         // LP64todo - fix this!
1269                                         resid = uio_resid(uio);
1270
1271                                         m->m_len = len;
1272                                         *mp = m;
1273                                         top->m_pkthdr.len += len;
1274                                         if (error)
1275                                                 break;
1276                                         mp = &m->m_next;
1277                                         if (resid <= 0) {
1278                                                 if (flags & MSG_EOR)
1279                                                         top->m_flags |= M_EOR;
1280                                                 break;
1281                                         }
1282                                         bytes_to_copy = min(resid, space);
1283
1284                                 } while (space > 0 && (chainlength < sosendmaxchain || atomic || resid < MINCLSIZE));
1285
1286                                 socket_lock(so, 0);
1287
1288                                 if (error)
1289                                         goto release;
1290                         }
1291
1292                     if (flags & (MSG_HOLD|MSG_SEND))
1293                     {   /* Enqueue for later, go away if HOLD */
1294                         register struct mbuf *mb1;
1295                         if (so->so_temp && (flags & MSG_FLUSH))
1296                         {       m_freem(so->so_temp);
1297                                 so->so_temp = NULL;
1298                         }
1299                         if (so->so_temp)
1300                                 so->so_tail->m_next = top;
1301                         else
1302                                 so->so_temp = top;
1303                         mb1 = top;
1304                         while (mb1->m_next)
1305                                 mb1 = mb1->m_next;
1306                         so->so_tail = mb1;
1307                         if (flags&MSG_HOLD)
1308                         {       top = NULL;
1309                                 goto release;
1310                         }
1311                         top = so->so_temp;
1312                     }
1313                     if (dontroute)
1314                             so->so_options |= SO_DONTROUTE;
1315                     /* Compute flags here, for pru_send and NKEs */
1316                     sendflags = (flags & MSG_OOB) ? PRUS_OOB :
1317                         /*
1318                          * If the user set MSG_EOF, the protocol
1319                          * understands this flag and nothing left to
1320                          * send then use PRU_SEND_EOF instead of PRU_SEND.
1321                          */
1322                         ((flags & MSG_EOF) &&
1323                          (so->so_proto->pr_flags & PR_IMPLOPCL) &&
1324                          (resid <= 0)) ?
1325                                 PRUS_EOF :
1326                         /* If there is more to send set PRUS_MORETOCOME */
1327                         (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0;
1328
1329                         /*
1330                          * Socket filter processing
1331                          */
1332                         {
1333                                 struct socket_filter_entry *filter;
1334                                 int                                                     filtered;
1335
1336                                 filtered = 0;
1337                                 error = 0;
1338                                 for (filter = so->so_filt; filter && (error == 0);
1339                                          filter = filter->sfe_next_onsocket) {
1340                                         if (filter->sfe_filter->sf_filter.sf_data_out) {
1341                                                 int so_flags = 0;
1342                                                 if (filtered == 0) {
1343                                                         filtered = 1;
1344                                                         /*
1345                                                          * We don't let sbunlock unlock the socket because
1346                                                          * we don't want it to decrement the usecount.
1347                                                          */
1348                                                         sbunlock(&so->so_snd, 1);
1349                                                         sblocked = 0;
1350                                                         socket_unlock(so, 0);
1351                                                         so_flags = (sendflags & MSG_OOB) ? sock_data_filt_flag_oob : 0;
1352                                                 }
1353                                                 error = filter->sfe_filter->sf_filter.sf_data_out(
1354                                                                         filter->sfe_cookie, so, addr, &top, &control, so_flags);
1355                                         }
1356                                 }
1357
1358                                 if (filtered) {
1359                                         /*
1360                                          * At this point, we've run at least one filter.
1361                                          * The socket is unlocked as is the socket buffer.
1362                                          */
1363                                         socket_lock(so, 0);
1364                                         if (error == EJUSTRETURN) {
1365                                                 error = 0;
1366                                                 clen = 0;
1367                                                 control = 0;
1368                                                 top = 0;
1369                                                 socket_unlock(so, 1);
1370                                                 goto out;
1371                                         }
1372                                         else if (error) {
1373                                                 socket_unlock(so, 1);
1374                                                 goto out;
1375                                         }
1376
1377
1378                                         /* Verify our state again, this will lock the socket buffer */
1379                                         error = sosendcheck(so, addr, top->m_pkthdr.len,
1380                                                                 control ? control->m_pkthdr.len : 0,
1381                                                                 atomic, flags, &sblocked);
1382                                         if (error) {
1383                                                 if (sblocked) {
1384                                                         /* sbunlock at release will unlock the socket */
1385                                                         goto release;
1386                                                 }
1387                                                 else {
1388                                                         socket_unlock(so, 1);
1389                                                         goto out;
1390                                                 }
1391                                         }
1392                                 }
1393                         }
1394                         /*
1395                          * End Socket filter processing
1396                          */
1397
1398                         if (error == EJUSTRETURN) {
1399                                 /* A socket filter handled this data */
1400                                 error = 0;
1401                         }
1402                         else {
1403                                 error = (*so->so_proto->pr_usrreqs->pru_send)(so,
1404                                                         sendflags, top, addr, control, p);
1405                         }
1406 #ifdef __APPLE__
1407                     if (flags & MSG_SEND)
1408                         so->so_temp = NULL;
1409 #endif
1410                     if (dontroute)
1411                             so->so_options &= ~SO_DONTROUTE;
1412                     clen = 0;
1413                     control = 0;
1414                     top = 0;
1415                     mp = &top;
1416                     if (error)
1417                         goto release;
1418                 } while (resid && space > 0);
1419         } while (resid);
1420
1421 release:
1422         sbunlock(&so->so_snd, 0);       /* will unlock socket */
1423 out:
1424         if (top)
1425                 m_freem(top);
1426         if (control)
1427                 m_freem(control);
1428         if (freelist)
1429                 m_freem_list(freelist);
1430
1431         KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END,
1432                      so,
1433                      resid,
1434                      so->so_snd.sb_cc,
1435                      space,
1436                      error);
1437
1438         return (error);
1439 }
1440
1441 /*
1442  * Implement receive operations on a socket.
1443  * We depend on the way that records are added to the sockbuf
1444  * by sbappend*.  In particular, each record (mbufs linked through m_next)
1445  * must begin with an address if the protocol so specifies,
1446  * followed by an optional mbuf or mbufs containing ancillary data,
1447  * and then zero or more mbufs of data.
1448  * In order to avoid blocking network interrupts for the entire time here,
1449  * we splx() while doing the actual copy to user space.
1450  * Although the sockbuf is locked, new data may still be appended,
1451  * and thus we must maintain consistency of the sockbuf during that time.
1452  *
1453  * The caller may receive the data as a single mbuf chain by supplying
1454  * an mbuf **mp0 for use in returning the chain.  The uio is then used
1455  * only for the count in uio_resid.
1456  */
1457 int
1458 soreceive(so, psa, uio, mp0, controlp, flagsp)
1459         register struct socket *so;
1460         struct sockaddr **psa;
1461         struct uio *uio;
1462         struct mbuf **mp0;
1463         struct mbuf **controlp;
1464         int *flagsp;
1465 {
1466         register struct mbuf *m, **mp, *ml = NULL;
1467         register int flags, len, error, offset;
1468         struct protosw *pr = so->so_proto;
1469         struct mbuf *nextrecord;
1470         int moff, type = 0;
1471                 // LP64todo - fix this!
1472         int orig_resid = uio_resid(uio);
1473         volatile struct mbuf *free_list;
1474         volatile int delayed_copy_len;
1475         int can_delay;
1476         int need_event;
1477         struct proc *p = current_proc();
1478
1479
1480                 // LP64todo - fix this!
1481         KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START,
1482                      so,
1483                      uio_resid(uio),
1484                      so->so_rcv.sb_cc,
1485                      so->so_rcv.sb_lowat,
1486                      so->so_rcv.sb_hiwat);
1487
1488         socket_lock(so, 1);
1489
1490 #ifdef MORE_LOCKING_DEBUG
1491         if (so->so_usecount == 1)
1492                 panic("soreceive: so=%x no other reference on socket\n", so);
1493 #endif
1494         mp = mp0;
1495         if (psa)
1496                 *psa = 0;
1497         if (controlp)
1498                 *controlp = 0;
1499         if (flagsp)
1500                 flags = *flagsp &~ MSG_EOR;
1501         else
1502                 flags = 0;
1503         /*
1504          * When SO_WANTOOBFLAG is set we try to get out-of-band data
1505          * regardless of the flags argument. Here is the case were
1506          * out-of-band data is not inline.
1507          */
1508         if ((flags & MSG_OOB) ||
1509             ((so->so_options & SO_WANTOOBFLAG) != 0 &&
1510              (so->so_options & SO_OOBINLINE) == 0 &&
1511              (so->so_oobmark || (so->so_state & SS_RCVATMARK)))) {
1512                 m = m_get(M_WAIT, MT_DATA);
1513                 if (m == NULL) {
1514                         socket_unlock(so, 1);
1515                         KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, ENOBUFS,0,0,0,0);
1516                         return (ENOBUFS);
1517                 }
1518                 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
1519                 if (error)
1520                         goto bad;
1521                 socket_unlock(so, 0);
1522                 do {
1523                 // LP64todo - fix this!
1524                         error = uiomove(mtod(m, caddr_t),
1525                             (int) min(uio_resid(uio), m->m_len), uio);
1526                         m = m_free(m);
1527                 } while (uio_resid(uio) && error == 0 && m);
1528                 socket_lock(so, 0);
1529 bad:
1530                 if (m)
1531                         m_freem(m);
1532 #ifdef __APPLE__
1533                 if ((so->so_options & SO_WANTOOBFLAG) != 0) {
1534                         if (error == EWOULDBLOCK || error == EINVAL) {
1535                                 /*
1536                                  * Let's try to get normal data:
1537                                  *  EWOULDBLOCK: out-of-band data not receive yet;
1538                                  *  EINVAL: out-of-band data already read.
1539                                  */
1540                                 error = 0;
1541                                 goto nooob;
1542                         } else if (error == 0 && flagsp)
1543                                 *flagsp |= MSG_OOB;
1544                 }
1545                 socket_unlock(so, 1);
1546                 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1547 #endif
1548                 return (error);
1549         }
1550 nooob:
1551         if (mp)
1552                 *mp = (struct mbuf *)0;
1553         if (so->so_state & SS_ISCONFIRMING && uio_resid(uio))
1554                 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
1555
1556
1557         free_list = (struct mbuf *)0;
1558         delayed_copy_len = 0;
1559 restart:
1560 #ifdef MORE_LOCKING_DEBUG
1561         if (so->so_usecount <= 1)
1562                 printf("soreceive: sblock so=%x ref=%d on socket\n", so, so->so_usecount);
1563 #endif
1564         error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
1565         if (error) {
1566                 socket_unlock(so, 1);
1567                 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1568                 return (error);
1569         }
1570
1571         m = so->so_rcv.sb_mb;
1572         /*
1573          * If we have less data than requested, block awaiting more
1574          * (subject to any timeout) if:
1575          *   1. the current count is less than the low water mark, or
1576          *   2. MSG_WAITALL is set, and it is possible to do the entire
1577          *      receive operation at once if we block (resid <= hiwat).
1578          *   3. MSG_DONTWAIT is not set
1579          * If MSG_WAITALL is set but resid is larger than the receive buffer,
1580          * we have to do the receive in sections, and thus risk returning
1581          * a short count if a timeout or signal occurs after we start.
1582          */
1583         if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
1584             so->so_rcv.sb_cc < uio_resid(uio)) &&
1585            (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
1586             ((flags & MSG_WAITALL) && uio_resid(uio) <= so->so_rcv.sb_hiwat)) &&
1587             m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
1588
1589                 KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
1590                 if (so->so_error) {
1591                         if (m)
1592                                 goto dontblock;
1593                         error = so->so_error;
1594                         if ((flags & MSG_PEEK) == 0)
1595                                 so->so_error = 0;
1596                         goto release;
1597                 }
1598                 if (so->so_state & SS_CANTRCVMORE) {
1599                         if (m)
1600                                 goto dontblock;
1601                         else
1602                                 goto release;
1603                 }
1604                 for (; m; m = m->m_next)
1605                         if (m->m_type == MT_OOBDATA  || (m->m_flags & M_EOR)) {
1606                                 m = so->so_rcv.sb_mb;
1607                                 goto dontblock;
1608                         }
1609                 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
1610                     (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
1611                         error = ENOTCONN;
1612                         goto release;
1613                 }
1614                 if (uio_resid(uio) == 0)
1615                         goto release;
1616                 if ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO))) {
1617                         error = EWOULDBLOCK;
1618                         goto release;
1619                 }
1620                 sbunlock(&so->so_rcv, 1);
1621 #ifdef EVEN_MORE_LOCKING_DEBUG
1622                 if (socket_debug)
1623                     printf("Waiting for socket data\n");
1624 #endif
1625
1626                 error = sbwait(&so->so_rcv);
1627 #ifdef EVEN_MORE_LOCKING_DEBUG
1628                 if (socket_debug)
1629                     printf("SORECEIVE - sbwait returned %d\n", error);
1630 #endif
1631                 if (so->so_usecount < 1)
1632                         panic("soreceive: after 2nd sblock so=%x ref=%d on socket\n", so, so->so_usecount);
1633                 if (error) {
1634                         socket_unlock(so, 1);
1635                     KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1636                     return (error);
1637                 }
1638                 goto restart;
1639         }
1640 dontblock:
1641 #ifndef __APPLE__
1642         if (uio->uio_procp)
1643                 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
1644 #else   /* __APPLE__ */
1645         /*
1646          * 2207985
1647          * This should be uio->uio-procp; however, some callers of this
1648          * function use auto variables with stack garbage, and fail to
1649          * fill out the uio structure properly.
1650          */
1651         if (p)
1652                 p->p_stats->p_ru.ru_msgrcv++;
1653 #endif  /* __APPLE__ */
1654         nextrecord = m->m_nextpkt;
1655         if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) {
1656                 KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
1657                 orig_resid = 0;
1658                 if (psa) {
1659                         *psa = dup_sockaddr(mtod(m, struct sockaddr *),
1660                                             mp0 == 0);
1661                         if ((*psa == 0) && (flags & MSG_NEEDSA)) {
1662                                 error = EWOULDBLOCK;
1663                                 goto release;
1664                         }
1665                 }
1666                 if (flags & MSG_PEEK) {
1667                         m = m->m_next;
1668                 } else {
1669                         sbfree(&so->so_rcv, m);
1670                         if (m->m_next == 0 && so->so_rcv.sb_cc != 0)
1671                                 panic("soreceive: about to create invalid socketbuf");
1672                         MFREE(m, so->so_rcv.sb_mb);
1673                         m = so->so_rcv.sb_mb;
1674                 }
1675         }
1676         while (m && m->m_type == MT_CONTROL && error == 0) {
1677                 if (flags & MSG_PEEK) {
1678                         if (controlp)
1679                                 *controlp = m_copy(m, 0, m->m_len);
1680                         m = m->m_next;
1681                 } else {
1682                         sbfree(&so->so_rcv, m);
1683                         if (controlp) {
1684                                 if (pr->pr_domain->dom_externalize &&
1685                                     mtod(m, struct cmsghdr *)->cmsg_type ==
1686                                     SCM_RIGHTS) {
1687                                    socket_unlock(so, 0); /* release socket lock: see 3903171 */
1688                                    error = (*pr->pr_domain->dom_externalize)(m);
1689                                    socket_lock(so, 0);
1690                                 }
1691                                 *controlp = m;
1692                                 if (m->m_next == 0 && so->so_rcv.sb_cc != 0)
1693                                         panic("soreceive: so->so_rcv.sb_mb->m_next == 0 && so->so_rcv.sb_cc != 0");
1694                                 so->so_rcv.sb_mb = m->m_next;
1695                                 m->m_next = 0;
1696                                 m = so->so_rcv.sb_mb;
1697                         } else {
1698                                 MFREE(m, so->so_rcv.sb_mb);
1699                                 m = so->so_rcv.sb_mb;
1700                         }
1701                 }
1702                 if (controlp) {
1703                         orig_resid = 0;
1704                         controlp = &(*controlp)->m_next;
1705                 }
1706         }
1707         if (m) {
1708                 if ((flags & MSG_PEEK) == 0)
1709                         m->m_nextpkt = nextrecord;
1710                 type = m->m_type;
1711                 if (type == MT_OOBDATA)
1712                         flags |= MSG_OOB;
1713         }
1714         moff = 0;
1715         offset = 0;
1716
1717         if (!(flags & MSG_PEEK) && uio_resid(uio) > sorecvmincopy)
1718                 can_delay = 1;
1719         else
1720                 can_delay = 0;
1721
1722         need_event = 0;
1723
1724         while (m && (uio_resid(uio) - delayed_copy_len) > 0 && error == 0) {
1725                 if (m->m_type == MT_OOBDATA) {
1726                         if (type != MT_OOBDATA)
1727                                 break;
1728                 } else if (type == MT_OOBDATA)
1729                         break;
1730 #ifndef __APPLE__
1731 /*
1732  * This assertion needs rework.  The trouble is Appletalk is uses many
1733  * mbuf types (NOT listed in mbuf.h!) which will trigger this panic.
1734  * For now just remove the assertion...  CSM 9/98
1735  */
1736                 else
1737                     KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
1738                         ("receive 3"));
1739 #else
1740                 /*
1741                  * Make sure to allways set MSG_OOB event when getting
1742                  * out of band data inline.
1743                  */
1744                 if ((so->so_options & SO_WANTOOBFLAG) != 0 &&
1745                         (so->so_options & SO_OOBINLINE) != 0 &&
1746                         (so->so_state & SS_RCVATMARK) != 0) {
1747                         flags |= MSG_OOB;
1748                 }
1749 #endif
1750                 so->so_state &= ~SS_RCVATMARK;
1751                 // LP64todo - fix this!
1752                 len = uio_resid(uio) - delayed_copy_len;
1753                 if (so->so_oobmark && len > so->so_oobmark - offset)
1754                         len = so->so_oobmark - offset;
1755                 if (len > m->m_len - moff)
1756                         len = m->m_len - moff;
1757                 /*
1758                  * If mp is set, just pass back the mbufs.
1759                  * Otherwise copy them out via the uio, then free.
1760                  * Sockbuf must be consistent here (points to current mbuf,
1761                  * it points to next record) when we drop priority;
1762                  * we must note any additions to the sockbuf when we
1763                  * block interrupts again.
1764                  */
1765                 if (mp == 0) {
1766                         if (can_delay && len == m->m_len) {
1767                                 /*
1768                                  * only delay the copy if we're consuming the
1769                                  * mbuf and we're NOT in MSG_PEEK mode
1770                                  * and we have enough data to make it worthwile
1771                                  * to drop and retake the funnel... can_delay
1772                                  * reflects the state of the 2 latter constraints
1773                                  * moff should always be zero in these cases
1774                                  */
1775                                 delayed_copy_len += len;
1776                         } else {
1777
1778                                 if (delayed_copy_len) {
1779                                         error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1780
1781                                         if (error) {
1782                                                 goto release;
1783                                         }
1784                                         if (m != so->so_rcv.sb_mb) {
1785                                                 /*
1786                                                  * can only get here if MSG_PEEK is not set
1787                                                  * therefore, m should point at the head of the rcv queue...
1788                                                  * if it doesn't, it means something drastically changed
1789                                                  * while we were out from behind the funnel in sodelayed_copy...
1790                                                  * perhaps a RST on the stream... in any event, the stream has
1791                                                  * been interrupted... it's probably best just to return
1792                                                  * whatever data we've moved and let the caller sort it out...
1793                                                  */
1794                                                 break;
1795                                         }
1796                                 }
1797                                 socket_unlock(so, 0);
1798                                 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
1799                                 socket_lock(so, 0);
1800
1801                                 if (error)
1802                                         goto release;
1803                         }
1804                 } else
1805                         uio_setresid(uio, (uio_resid(uio) - len));
1806
1807                 if (len == m->m_len - moff) {
1808                         if (m->m_flags & M_EOR)
1809                                 flags |= MSG_EOR;
1810                         if (flags & MSG_PEEK) {
1811                                 m = m->m_next;
1812                                 moff = 0;
1813                         } else {
1814                                 nextrecord = m->m_nextpkt;
1815                                 sbfree(&so->so_rcv, m);
1816                                 m->m_nextpkt = NULL;
1817
1818                                 if (mp) {
1819                                         *mp = m;
1820                                         mp = &m->m_next;
1821                                         so->so_rcv.sb_mb = m = m->m_next;
1822                                         *mp = (struct mbuf *)0;
1823                                 } else {
1824                                         if (free_list == NULL)
1825                                             free_list = m;
1826                                         else
1827                                             ml->m_next = m;
1828                                         ml = m;
1829                                         so->so_rcv.sb_mb = m = m->m_next;
1830                                         ml->m_next = 0;
1831                                 }
1832                                 if (m)
1833                                         m->m_nextpkt = nextrecord;
1834                         }
1835                 } else {
1836                         if (flags & MSG_PEEK)
1837                                 moff += len;
1838                         else {
1839                                 if (mp)
1840                                         *mp = m_copym(m, 0, len, M_WAIT);
1841                                 m->m_data += len;
1842                                 m->m_len -= len;
1843                                 so->so_rcv.sb_cc -= len;
1844                         }
1845                 }
1846                 if (so->so_oobmark) {
1847                         if ((flags & MSG_PEEK) == 0) {
1848                                 so->so_oobmark -= len;
1849                                 if (so->so_oobmark == 0) {
1850                                     so->so_state |= SS_RCVATMARK;
1851                                     /*
1852                                      * delay posting the actual event until after
1853                                      * any delayed copy processing has finished
1854                                      */
1855                                     need_event = 1;
1856                                     break;
1857                                 }
1858                         } else {
1859                                 offset += len;
1860                                 if (offset == so->so_oobmark)
1861                                         break;
1862                         }
1863                 }
1864                 if (flags & MSG_EOR)
1865                         break;
1866                 /*
1867                  * If the MSG_WAITALL or MSG_WAITSTREAM flag is set (for non-atomic socket),
1868                  * we must not quit until "uio->uio_resid == 0" or an error
1869                  * termination.  If a signal/timeout occurs, return
1870                  * with a short count but without error.
1871                  * Keep sockbuf locked against other readers.
1872                  */
1873                 while (flags & (MSG_WAITALL|MSG_WAITSTREAM) && m == 0 && (uio_resid(uio) - delayed_copy_len) > 0 &&
1874                     !sosendallatonce(so) && !nextrecord) {
1875                         if (so->so_error || so->so_state & SS_CANTRCVMORE)
1876                                 goto release;
1877
1878                         if (pr->pr_flags & PR_WANTRCVD && so->so_pcb && (((struct inpcb *)so->so_pcb)->inp_state != INPCB_STATE_DEAD))
1879                                 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1880                         if (sbwait(&so->so_rcv)) {
1881                                 error = 0;
1882                                 goto release;
1883                         }
1884                         /*
1885                          * have to wait until after we get back from the sbwait to do the copy because
1886                          * we will drop the funnel if we have enough data that has been delayed... by dropping
1887                          * the funnel we open up a window allowing the netisr thread to process the incoming packets
1888                          * and to change the state of this socket... we're issuing the sbwait because
1889                          * the socket is empty and we're expecting the netisr thread to wake us up when more
1890                          * packets arrive... if we allow that processing to happen and then sbwait, we
1891                          * could stall forever with packets sitting in the socket if no further packets
1892                          * arrive from the remote side.
1893                          *
1894                          * we want to copy before we've collected all the data to satisfy this request to
1895                          * allow the copy to overlap the incoming packet processing on an MP system
1896                          */
1897                         if (delayed_copy_len > sorecvmincopy && (delayed_copy_len > (so->so_rcv.sb_hiwat / 2))) {
1898
1899                                 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1900
1901                                 if (error)
1902                                         goto release;
1903                         }
1904                         m = so->so_rcv.sb_mb;
1905                         if (m) {
1906                                 nextrecord = m->m_nextpkt;
1907                         }
1908                 }
1909         }
1910 #ifdef MORE_LOCKING_DEBUG
1911         if (so->so_usecount <= 1)
1912                 panic("soreceive: after big while so=%x ref=%d on socket\n", so, so->so_usecount);
1913 #endif
1914
1915         if (m && pr->pr_flags & PR_ATOMIC) {
1916 #ifdef __APPLE__
1917                 if (so->so_options & SO_DONTTRUNC)
1918                         flags |= MSG_RCVMORE;
1919                 else {
1920 #endif
1921                         flags |= MSG_TRUNC;
1922                         if ((flags & MSG_PEEK) == 0)
1923                                 (void) sbdroprecord(&so->so_rcv);
1924 #ifdef __APPLE__
1925                 }
1926 #endif
1927         }
1928         if ((flags & MSG_PEEK) == 0) {
1929                 if (m == 0)
1930                         so->so_rcv.sb_mb = nextrecord;
1931                 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
1932                         (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1933         }
1934 #ifdef __APPLE__
1935         if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0)
1936                 flags |= MSG_HAVEMORE;
1937
1938         if (delayed_copy_len) {
1939                 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1940
1941                 if (error)
1942                         goto release;
1943         }
1944         if (free_list) {
1945                 m_freem_list((struct mbuf *)free_list);
1946                 free_list = (struct mbuf *)0;
1947         }
1948         if (need_event)
1949                 postevent(so, 0, EV_OOB);
1950 #endif
1951         if (orig_resid == uio_resid(uio) && orig_resid &&
1952             (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
1953                 sbunlock(&so->so_rcv, 1);
1954                 goto restart;
1955         }
1956
1957         if (flagsp)
1958                 *flagsp |= flags;
1959 release:
1960 #ifdef MORE_LOCKING_DEBUG
1961         if (so->so_usecount <= 1)
1962                 panic("soreceive: release so=%x ref=%d on socket\n", so, so->so_usecount);
1963 #endif
1964         if (delayed_copy_len) {
1965                 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1966         }
1967         if (free_list) {
1968                 m_freem_list((struct mbuf *)free_list);
1969         }
1970         sbunlock(&so->so_rcv, 0);       /* will unlock socket */
1971
1972                 // LP64todo - fix this!
1973         KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END,
1974                      so,
1975                      uio_resid(uio),
1976                      so->so_rcv.sb_cc,
1977                      0,
1978                      error);
1979
1980         return (error);
1981 }
1982
1983
1984 static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid)
1985 {
1986         int         error  = 0;
1987         struct mbuf *m;
1988
1989         m = *free_list;
1990
1991         socket_unlock(so, 0);
1992
1993         while (m && error == 0) {
1994
1995                 error = uiomove(mtod(m, caddr_t), (int)m->m_len, uio);
1996
1997                 m = m->m_next;
1998         }
1999         m_freem_list(*free_list);
2000
2001         *free_list = (struct mbuf *)NULL;
2002         *resid = 0;
2003
2004         socket_lock(so, 0);
2005
2006         return (error);
2007 }
2008
2009
2010 int
2011 soshutdown(so, how)
2012         register struct socket *so;
2013         register int how;
2014 {
2015         register struct protosw *pr = so->so_proto;
2016         int ret;
2017
2018         socket_lock(so, 1);
2019
2020         sflt_notify(so, sock_evt_shutdown, &how);
2021
2022         if (how != SHUT_WR) {
2023                 sorflush(so);
2024                 postevent(so, 0, EV_RCLOSED);
2025         }
2026         if (how != SHUT_RD) {
2027             ret = ((*pr->pr_usrreqs->pru_shutdown)(so));
2028             postevent(so, 0, EV_WCLOSED);
2029             KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
2030                 socket_unlock(so, 1);
2031             return(ret);
2032         }
2033
2034         KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
2035         socket_unlock(so, 1);
2036         return (0);
2037 }
2038
2039 void
2040 sorflush(so)
2041         register struct socket *so;
2042 {
2043         register struct sockbuf *sb = &so->so_rcv;
2044         register struct protosw *pr = so->so_proto;
2045         struct sockbuf asb;
2046
2047 #ifdef MORE_LOCKING_DEBUG
2048         lck_mtx_t * mutex_held;
2049
2050         if (so->so_proto->pr_getlock != NULL)
2051                 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2052         else
2053                 mutex_held = so->so_proto->pr_domain->dom_mtx;
2054         lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2055 #endif
2056
2057         sflt_notify(so, sock_evt_flush_read, NULL);
2058
2059         sb->sb_flags |= SB_NOINTR;
2060         (void) sblock(sb, M_WAIT);
2061         socantrcvmore(so);
2062         sbunlock(sb, 1);
2063 #ifdef __APPLE__
2064         selthreadclear(&sb->sb_sel);
2065 #endif
2066         asb = *sb;
2067         bzero((caddr_t)sb, sizeof (*sb));
2068         sb->sb_so = so; /* reestablish link to socket */
2069         if (asb.sb_flags & SB_KNOTE) {
2070                 sb->sb_sel.si_note = asb.sb_sel.si_note;
2071                 sb->sb_flags = SB_KNOTE;
2072         }
2073         if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
2074                 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
2075         sbrelease(&asb);
2076 }
2077
2078 /*
2079  * Perhaps this routine, and sooptcopyout(), below, ought to come in
2080  * an additional variant to handle the case where the option value needs
2081  * to be some kind of integer, but not a specific size.
2082  * In addition to their use here, these functions are also called by the
2083  * protocol-level pr_ctloutput() routines.
2084  */
2085 int
2086 sooptcopyin(sopt, buf, len, minlen)
2087         struct  sockopt *sopt;
2088         void    *buf;
2089         size_t  len;
2090         size_t  minlen;
2091 {
2092         size_t  valsize;
2093
2094         /*
2095          * If the user gives us more than we wanted, we ignore it,
2096          * but if we don't get the minimum length the caller
2097          * wants, we return EINVAL.  On success, sopt->sopt_valsize
2098          * is set to however much we actually retrieved.
2099          */
2100         if ((valsize = sopt->sopt_valsize) < minlen)
2101                 return EINVAL;
2102         if (valsize > len)
2103                 sopt->sopt_valsize = valsize = len;
2104
2105         if (sopt->sopt_p != 0)
2106                 return (copyin(sopt->sopt_val, buf, valsize));
2107
2108         bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), buf, valsize);
2109         return 0;
2110 }
2111
2112 int
2113 sosetopt(so, sopt)
2114         struct socket *so;
2115         struct sockopt *sopt;
2116 {
2117         int     error, optval;
2118         struct  linger l;
2119         struct  timeval tv;
2120         short   val;
2121
2122         socket_lock(so, 1);
2123
2124         if (sopt->sopt_dir != SOPT_SET) {
2125                 sopt->sopt_dir = SOPT_SET;
2126         }
2127
2128         {
2129                 struct socket_filter_entry      *filter;
2130                 int                                                     filtered = 0;
2131                 error = 0;
2132                 for (filter = so->so_filt; filter && (error == 0);
2133                          filter = filter->sfe_next_onsocket) {
2134                         if (filter->sfe_filter->sf_filter.sf_setoption) {
2135                                 if (filtered == 0) {
2136                                         filtered = 1;
2137                                         sflt_use(so);
2138                                         socket_unlock(so, 0);
2139                                 }
2140                                 error = filter->sfe_filter->sf_filter.sf_setoption(
2141                                                         filter->sfe_cookie, so, sopt);
2142                         }
2143                 }
2144
2145                 if (filtered != 0) {
2146                         socket_lock(so, 0);
2147                         sflt_unuse(so);
2148
2149                         if (error) {
2150                                 if (error == EJUSTRETURN)
2151                                         error = 0;
2152                                 goto bad;
2153                         }
2154                 }
2155         }
2156
2157         error = 0;
2158         if (sopt->sopt_level != SOL_SOCKET) {
2159                 if (so->so_proto && so->so_proto->pr_ctloutput) {
2160                         error = (*so->so_proto->pr_ctloutput)
2161                                   (so, sopt);
2162                         socket_unlock(so, 1);
2163                         return (error);
2164                 }
2165                 error = ENOPROTOOPT;
2166         } else {
2167                 switch (sopt->sopt_name) {
2168                 case SO_LINGER:
2169                 case SO_LINGER_SEC:
2170                         error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
2171                         if (error)
2172                                 goto bad;
2173
2174                         so->so_linger = (sopt->sopt_name == SO_LINGER) ? l.l_linger : l.l_linger * hz;
2175                         if (l.l_onoff)
2176                                 so->so_options |= SO_LINGER;
2177                         else
2178                                 so->so_options &= ~SO_LINGER;
2179                         break;
2180
2181                 case SO_DEBUG:
2182                 case SO_KEEPALIVE:
2183                 case SO_DONTROUTE:
2184                 case SO_USELOOPBACK:
2185                 case SO_BROADCAST:
2186                 case SO_REUSEADDR:
2187                 case SO_REUSEPORT:
2188                 case SO_OOBINLINE:
2189                 case SO_TIMESTAMP:
2190 #ifdef __APPLE__
2191                 case SO_DONTTRUNC:
2192                 case SO_WANTMORE:
2193                 case SO_WANTOOBFLAG:
2194 #endif
2195                         error = sooptcopyin(sopt, &optval, sizeof optval,
2196                                             sizeof optval);
2197                         if (error)
2198                                 goto bad;
2199                         if (optval)
2200                                 so->so_options |= sopt->sopt_name;
2201                         else
2202                                 so->so_options &= ~sopt->sopt_name;
2203                         break;
2204
2205                 case SO_SNDBUF:
2206                 case SO_RCVBUF:
2207                 case SO_SNDLOWAT:
2208                 case SO_RCVLOWAT:
2209                         error = sooptcopyin(sopt, &optval, sizeof optval,
2210                                             sizeof optval);
2211                         if (error)
2212                                 goto bad;
2213
2214                         /*
2215                          * Values < 1 make no sense for any of these
2216                          * options, so disallow them.
2217                          */
2218                         if (optval < 1) {
2219                                 error = EINVAL;
2220                                 goto bad;
2221                         }
2222
2223                         switch (sopt->sopt_name) {
2224                         case SO_SNDBUF:
2225                         case SO_RCVBUF:
2226                                 if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
2227                                               &so->so_snd : &so->so_rcv,
2228                                               (u_long) optval) == 0) {
2229                                         error = ENOBUFS;
2230                                         goto bad;
2231                                 }
2232                                 break;
2233
2234                         /*
2235                          * Make sure the low-water is never greater than
2236                          * the high-water.
2237                          */
2238                         case SO_SNDLOWAT:
2239                                 so->so_snd.sb_lowat =
2240                                     (optval > so->so_snd.sb_hiwat) ?
2241                                     so->so_snd.sb_hiwat : optval;
2242                                 break;
2243                         case SO_RCVLOWAT:
2244                                 so->so_rcv.sb_lowat =
2245                                     (optval > so->so_rcv.sb_hiwat) ?
2246                                     so->so_rcv.sb_hiwat : optval;
2247                                 break;
2248                         }
2249                         break;
2250
2251                 case SO_SNDTIMEO:
2252                 case SO_RCVTIMEO:
2253                         error = sooptcopyin(sopt, &tv, sizeof tv,
2254                                             sizeof tv);
2255                         if (error)
2256                                 goto bad;
2257
2258                         if (tv.tv_sec < 0 || tv.tv_sec > LONG_MAX ||
2259                             tv.tv_usec < 0 || tv.tv_usec >= 1000000) {
2260                                 error = EDOM;
2261                                 goto bad;
2262                         }
2263
2264                         switch (sopt->sopt_name) {
2265                         case SO_SNDTIMEO:
2266                                 so->so_snd.sb_timeo = tv;
2267                                 break;
2268                         case SO_RCVTIMEO:
2269                                 so->so_rcv.sb_timeo = tv;
2270                                 break;
2271                         }
2272                         break;
2273
2274                 case SO_NKE:
2275                 {
2276                         struct so_nke nke;
2277
2278                         error = sooptcopyin(sopt, &nke,
2279                                                                 sizeof nke, sizeof nke);
2280                         if (error)
2281                           goto bad;
2282
2283                         error = sflt_attach_private(so, NULL, nke.nke_handle, 1);
2284                         break;
2285                 }
2286
2287                 case SO_NOSIGPIPE:
2288                         error = sooptcopyin(sopt, &optval, sizeof optval,
2289                                             sizeof optval);
2290                         if (error)
2291                                 goto bad;
2292                         if (optval)
2293                                 so->so_flags |= SOF_NOSIGPIPE;
2294                         else
2295                                 so->so_flags &= ~SOF_NOSIGPIPE;
2296
2297                         break;
2298
2299                 case SO_NOADDRERR:
2300                         error = sooptcopyin(sopt, &optval, sizeof optval,
2301                                             sizeof optval);
2302                         if (error)
2303                                 goto bad;
2304                         if (optval)
2305                                 so->so_flags |= SOF_NOADDRAVAIL;
2306                         else
2307                                 so->so_flags &= ~SOF_NOADDRAVAIL;
2308
2309                         break;
2310
2311                 default:
2312                         error = ENOPROTOOPT;
2313                         break;
2314                 }
2315                 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
2316                         (void) ((*so->so_proto->pr_ctloutput)
2317                                   (so, sopt));
2318                 }
2319         }
2320 bad:
2321         socket_unlock(so, 1);
2322         return (error);
2323 }
2324
2325 /* Helper routine for getsockopt */
2326 int
2327 sooptcopyout(sopt, buf, len)
2328         struct  sockopt *sopt;
2329         void    *buf;
2330         size_t  len;
2331 {
2332         int     error;
2333         size_t  valsize;
2334
2335         error = 0;
2336
2337         /*
2338          * Documented get behavior is that we always return a value,
2339          * possibly truncated to fit in the user's buffer.
2340          * Traditional behavior is that we always tell the user
2341          * precisely how much we copied, rather than something useful
2342          * like the total amount we had available for her.
2343          * Note that this interface is not idempotent; the entire answer must
2344          * generated ahead of time.
2345          */
2346         valsize = min(len, sopt->sopt_valsize);
2347         sopt->sopt_valsize = valsize;
2348         if (sopt->sopt_val != USER_ADDR_NULL) {
2349                 if (sopt->sopt_p != 0)
2350                         error = copyout(buf, sopt->sopt_val, valsize);
2351                 else
2352                         bcopy(buf, CAST_DOWN(caddr_t, sopt->sopt_val), valsize);
2353         }
2354         return error;
2355 }
2356
2357 int
2358 sogetopt(so, sopt)
2359         struct socket *so;
2360         struct sockopt *sopt;
2361 {
2362         int     error, optval;
2363         struct  linger l;
2364         struct  timeval tv;
2365
2366         if (sopt->sopt_dir != SOPT_GET) {
2367                 sopt->sopt_dir = SOPT_GET;
2368         }
2369
2370         socket_lock(so, 1);
2371
2372         {
2373                 struct socket_filter_entry      *filter;
2374                 int                                                     filtered = 0;
2375                 error = 0;
2376                 for (filter = so->so_filt; filter && (error == 0);
2377                          filter = filter->sfe_next_onsocket) {
2378                         if (filter->sfe_filter->sf_filter.sf_getoption) {
2379                                 if (filtered == 0) {
2380                                         filtered = 1;
2381                                         sflt_use(so);
2382                                         socket_unlock(so, 0);
2383                                 }
2384                                 error = filter->sfe_filter->sf_filter.sf_getoption(
2385                                                         filter->sfe_cookie, so, sopt);
2386                         }
2387                 }
2388                 if (filtered != 0) {
2389                         socket_lock(so, 0);
2390                         sflt_unuse(so);
2391
2392                         if (error) {
2393                                 if (error == EJUSTRETURN)
2394                                         error = 0;
2395                                 socket_unlock(so, 1);
2396                                 return error;
2397                         }
2398                 }
2399         }
2400
2401         error = 0;
2402         if (sopt->sopt_level != SOL_SOCKET) {
2403                 if (so->so_proto && so->so_proto->pr_ctloutput) {
2404                         error = (*so->so_proto->pr_ctloutput)
2405                                   (so, sopt);
2406                         socket_unlock(so, 1);
2407                         return (error);
2408                 } else {
2409                         socket_unlock(so, 1);
2410                         return (ENOPROTOOPT);
2411                 }
2412         } else {
2413                 switch (sopt->sopt_name) {
2414                 case SO_LINGER:
2415                 case SO_LINGER_SEC:
2416                         l.l_onoff = so->so_options & SO_LINGER;
2417                         l.l_linger = (sopt->sopt_name == SO_LINGER) ? so->so_linger :
2418                                 so->so_linger / hz;
2419                         error = sooptcopyout(sopt, &l, sizeof l);
2420                         break;
2421
2422                 case SO_USELOOPBACK:
2423                 case SO_DONTROUTE:
2424                 case SO_DEBUG:
2425                 case SO_KEEPALIVE:
2426                 case SO_REUSEADDR:
2427                 case SO_REUSEPORT:
2428                 case SO_BROADCAST:
2429                 case SO_OOBINLINE:
2430                 case SO_TIMESTAMP:
2431 #ifdef __APPLE__
2432                 case SO_DONTTRUNC:
2433                 case SO_WANTMORE:
2434                 case SO_WANTOOBFLAG:
2435 #endif
2436                         optval = so->so_options & sopt->sopt_name;
2437 integer:
2438                         error = sooptcopyout(sopt, &optval, sizeof optval);
2439                         break;
2440
2441                 case SO_TYPE:
2442                         optval = so->so_type;
2443                         goto integer;
2444
2445 #ifdef __APPLE__
2446                 case SO_NREAD:
2447                 {
2448                         int pkt_total;
2449                         struct mbuf *m1;
2450
2451                         pkt_total = 0;
2452                         m1 = so->so_rcv.sb_mb;
2453                         if (so->so_proto->pr_flags & PR_ATOMIC)
2454                         {
2455                                 while (m1) {
2456                                         if (m1->m_type == MT_DATA)
2457                                                 pkt_total += m1->m_len;
2458                                         m1 = m1->m_next;
2459                                 }
2460                                 optval = pkt_total;
2461                         } else
2462                                 optval = so->so_rcv.sb_cc;
2463                         goto integer;
2464                 }
2465                 case SO_NWRITE:
2466                         optval = so->so_snd.sb_cc;
2467                         goto integer;
2468 #endif
2469                 case SO_ERROR:
2470                         optval = so->so_error;
2471                         so->so_error = 0;
2472                         goto integer;
2473
2474                 case SO_SNDBUF:
2475                         optval = so->so_snd.sb_hiwat;
2476                         goto integer;
2477
2478                 case SO_RCVBUF:
2479                         optval = so->so_rcv.sb_hiwat;
2480                         goto integer;
2481
2482                 case SO_SNDLOWAT:
2483                         optval = so->so_snd.sb_lowat;
2484                         goto integer;
2485
2486                 case SO_RCVLOWAT:
2487                         optval = so->so_rcv.sb_lowat;
2488                         goto integer;
2489
2490                 case SO_SNDTIMEO:
2491                 case SO_RCVTIMEO:
2492                         tv = (sopt->sopt_name == SO_SNDTIMEO ?
2493                                   so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
2494
2495                         error = sooptcopyout(sopt, &tv, sizeof tv);
2496                         break;
2497
2498                 case SO_NOSIGPIPE:
2499                         optval = (so->so_flags & SOF_NOSIGPIPE);
2500                         goto integer;
2501
2502                 case SO_NOADDRERR:
2503                         optval = (so->so_flags & SOF_NOADDRAVAIL);
2504                         goto integer;
2505
2506                 default:
2507                         error = ENOPROTOOPT;
2508                         break;
2509                 }
2510                 socket_unlock(so, 1);
2511                 return (error);
2512         }
2513 }
2514
2515 /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
2516 int
2517 soopt_getm(struct sockopt *sopt, struct mbuf **mp)
2518 {
2519         struct mbuf *m, *m_prev;
2520         int sopt_size = sopt->sopt_valsize;
2521
2522         if (sopt_size > MAX_SOOPTGETM_SIZE)
2523                 return EMSGSIZE;
2524
2525         MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2526         if (m == 0)
2527                 return ENOBUFS;
2528         if (sopt_size > MLEN) {
2529                 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2530                 if ((m->m_flags & M_EXT) == 0) {
2531                         m_free(m);
2532                         return ENOBUFS;
2533                 }
2534                 m->m_len = min(MCLBYTES, sopt_size);
2535         } else {
2536                 m->m_len = min(MLEN, sopt_size);
2537         }
2538         sopt_size -= m->m_len;
2539         *mp = m;
2540         m_prev = m;
2541
2542         while (sopt_size) {
2543                 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2544                 if (m == 0) {
2545                         m_freem(*mp);
2546                         return ENOBUFS;
2547                 }
2548                 if (sopt_size > MLEN) {
2549                         MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2550                         if ((m->m_flags & M_EXT) == 0) {
2551                                 m_freem(*mp);
2552                                 return ENOBUFS;
2553                         }
2554                         m->m_len = min(MCLBYTES, sopt_size);
2555                 } else {
2556                         m->m_len = min(MLEN, sopt_size);
2557                 }
2558                 sopt_size -= m->m_len;
2559                 m_prev->m_next = m;
2560                 m_prev = m;
2561         }
2562         return 0;
2563 }
2564
2565 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
2566 int
2567 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
2568 {
2569         struct mbuf *m0 = m;
2570
2571         if (sopt->sopt_val == USER_ADDR_NULL)
2572                 return 0;
2573         while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2574                 if (sopt->sopt_p != NULL) {
2575                         int error;
2576
2577                         error = copyin(sopt->sopt_val, mtod(m, char *), m->m_len);
2578                         if (error != 0) {
2579                                 m_freem(m0);
2580                                 return(error);
2581                         }
2582                 } else
2583                         bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), mtod(m, char *), m->m_len);
2584                 sopt->sopt_valsize -= m->m_len;
2585                 sopt->sopt_val += m->m_len;
2586                 m = m->m_next;
2587         }
2588         if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
2589                 panic("soopt_mcopyin");
2590         return 0;
2591 }
2592
2593 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
2594 int
2595 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
2596 {
2597         struct mbuf *m0 = m;
2598         size_t valsize = 0;
2599
2600         if (sopt->sopt_val == USER_ADDR_NULL)
2601                 return 0;
2602         while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2603                 if (sopt->sopt_p != NULL) {
2604                         int error;
2605
2606                         error = copyout(mtod(m, char *), sopt->sopt_val, m->m_len);
2607                         if (error != 0) {
2608                                 m_freem(m0);
2609                                 return(error);
2610                         }
2611                 } else
2612                         bcopy(mtod(m, char *), CAST_DOWN(caddr_t, sopt->sopt_val), m->m_len);
2613                sopt->sopt_valsize -= m->m_len;
2614                sopt->sopt_val += m->m_len;
2615                valsize += m->m_len;
2616                m = m->m_next;
2617         }
2618         if (m != NULL) {
2619                 /* enough soopt buffer should be given from user-land */
2620                 m_freem(m0);
2621                 return(EINVAL);
2622         }
2623         sopt->sopt_valsize = valsize;
2624         return 0;
2625 }
2626
2627 void
2628 sohasoutofband(so)
2629         register struct socket *so;
2630 {
2631         struct proc *p;
2632
2633         if (so->so_pgid < 0)
2634                 gsignal(-so->so_pgid, SIGURG);
2635         else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
2636                 psignal(p, SIGURG);
2637         selwakeup(&so->so_rcv.sb_sel);
2638 }
2639
2640 int
2641 sopoll(struct socket *so, int events, __unused kauth_cred_t cred, void * wql)
2642 {
2643         struct proc *p = current_proc();
2644         int revents = 0;
2645
2646         socket_lock(so, 1);
2647
2648         if (events & (POLLIN | POLLRDNORM))
2649                 if (soreadable(so))
2650                         revents |= events & (POLLIN | POLLRDNORM);
2651
2652         if (events & (POLLOUT | POLLWRNORM))
2653                 if (sowriteable(so))
2654                         revents |= events & (POLLOUT | POLLWRNORM);
2655
2656         if (events & (POLLPRI | POLLRDBAND))
2657                 if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
2658                         revents |= events & (POLLPRI | POLLRDBAND);
2659
2660         if (revents == 0) {
2661                 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
2662                         /* Darwin sets the flag first, BSD calls selrecord first */
2663                         so->so_rcv.sb_flags |= SB_SEL;
2664                         selrecord(p, &so->so_rcv.sb_sel, wql);
2665                 }
2666
2667                 if (events & (POLLOUT | POLLWRNORM)) {
2668                         /* Darwin sets the flag first, BSD calls selrecord first */
2669                         so->so_snd.sb_flags |= SB_SEL;
2670                         selrecord(p, &so->so_snd.sb_sel, wql);
2671                 }
2672         }
2673
2674         socket_unlock(so, 1);
2675         return (revents);
2676 }
2677
2678 int     soo_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p);
2679
2680 int
2681 soo_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct proc *p)
2682 {
2683         struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2684         struct sockbuf *sb;
2685         socket_lock(so, 1);
2686
2687         switch (kn->kn_filter) {
2688         case EVFILT_READ:
2689                 if (so->so_options & SO_ACCEPTCONN)
2690                         kn->kn_fop = &solisten_filtops;
2691                 else
2692                         kn->kn_fop = &soread_filtops;
2693                 sb = &so->so_rcv;
2694                 break;
2695         case EVFILT_WRITE:
2696                 kn->kn_fop = &sowrite_filtops;
2697                 sb = &so->so_snd;
2698                 break;
2699         default:
2700                 socket_unlock(so, 1);
2701                 return (1);
2702         }
2703
2704         if (KNOTE_ATTACH(&sb->sb_sel.si_note, kn))
2705                 sb->sb_flags |= SB_KNOTE;
2706         socket_unlock(so, 1);
2707         return (0);
2708 }
2709
2710 static void
2711 filt_sordetach(struct knote *kn)
2712 {
2713         struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2714
2715         socket_lock(so, 1);
2716         if (so->so_rcv.sb_flags & SB_KNOTE)
2717                 if (KNOTE_DETACH(&so->so_rcv.sb_sel.si_note, kn))
2718                         so->so_rcv.sb_flags &= ~SB_KNOTE;
2719         socket_unlock(so, 1);
2720 }
2721
2722 /*ARGSUSED*/
2723 static int
2724 filt_soread(struct knote *kn, long hint)
2725 {
2726         struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2727
2728         if ((hint & SO_FILT_HINT_LOCKED) == 0)
2729                 socket_lock(so, 1);
2730
2731         if (so->so_oobmark) {
2732                 if (kn->kn_flags & EV_OOBAND) {
2733                         kn->kn_data = so->so_rcv.sb_cc - so->so_oobmark;
2734                         if ((hint & SO_FILT_HINT_LOCKED) == 0)
2735                                 socket_unlock(so, 1);
2736                         return (1);
2737                 }
2738                 kn->kn_data = so->so_oobmark;
2739                 kn->kn_flags |= EV_OOBAND;
2740         } else {
2741                 kn->kn_data = so->so_rcv.sb_cc;
2742                 if (so->so_state & SS_CANTRCVMORE) {
2743                         kn->kn_flags |= EV_EOF;
2744                         kn->kn_fflags = so->so_error;
2745                         if ((hint & SO_FILT_HINT_LOCKED) == 0)
2746                                 socket_unlock(so, 1);
2747                         return (1);
2748                 }
2749         }
2750
2751         if (so->so_state & SS_RCVATMARK) {
2752                 if (kn->kn_flags & EV_OOBAND) {
2753                         if ((hint & SO_FILT_HINT_LOCKED) == 0)
2754                                 socket_unlock(so, 1);
2755                         return (1);
2756                 }
2757                 kn->kn_flags |= EV_OOBAND;
2758         } else if (kn->kn_flags & EV_OOBAND) {
2759                 kn->kn_data = 0;
2760                 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2761                         socket_unlock(so, 1);
2762                 return (0);
2763         }
2764
2765         if (so->so_error) {     /* temporary udp error */
2766                 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2767                         socket_unlock(so, 1);
2768                 return (1);
2769         }
2770
2771         if ((hint & SO_FILT_HINT_LOCKED) == 0)
2772                 socket_unlock(so, 1);
2773
2774         return( kn->kn_flags & EV_OOBAND ||
2775                 kn->kn_data >= ((kn->kn_sfflags & NOTE_LOWAT) ?
2776                                 kn->kn_sdata : so->so_rcv.sb_lowat));
2777 }
2778
2779 static void
2780 filt_sowdetach(struct knote *kn)
2781 {
2782         struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2783         socket_lock(so, 1);
2784
2785         if(so->so_snd.sb_flags & SB_KNOTE)
2786                 if (KNOTE_DETACH(&so->so_snd.sb_sel.si_note, kn))
2787                         so->so_snd.sb_flags &= ~SB_KNOTE;
2788         socket_unlock(so, 1);
2789 }
2790
2791 /*ARGSUSED*/
2792 static int
2793 filt_sowrite(struct knote *kn, long hint)
2794 {
2795         struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2796
2797         if ((hint & SO_FILT_HINT_LOCKED) == 0)
2798                 socket_lock(so, 1);
2799
2800         kn->kn_data = sbspace(&so->so_snd);
2801         if (so->so_state & SS_CANTSENDMORE) {
2802                 kn->kn_flags |= EV_EOF;
2803                 kn->kn_fflags = so->so_error;
2804                 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2805                         socket_unlock(so, 1);
2806                 return (1);
2807         }
2808         if (so->so_error) {     /* temporary udp error */
2809                 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2810                         socket_unlock(so, 1);
2811                 return (1);
2812         }
2813         if (((so->so_state & SS_ISCONNECTED) == 0) &&
2814             (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
2815                 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2816                         socket_unlock(so, 1);
2817                 return (0);
2818         }
2819         if ((hint & SO_FILT_HINT_LOCKED) == 0)
2820                 socket_unlock(so, 1);
2821         if (kn->kn_sfflags & NOTE_LOWAT)
2822                 return (kn->kn_data >= kn->kn_sdata);
2823         return (kn->kn_data >= so->so_snd.sb_lowat);
2824 }
2825
2826 /*ARGSUSED*/
2827 static int
2828 filt_solisten(struct knote *kn, long hint)
2829 {
2830         struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2831         int isempty;
2832
2833         if ((hint & SO_FILT_HINT_LOCKED) == 0)
2834                 socket_lock(so, 1);
2835         kn->kn_data = so->so_qlen;
2836         isempty = ! TAILQ_EMPTY(&so->so_comp);
2837         if ((hint & SO_FILT_HINT_LOCKED) == 0)
2838                 socket_unlock(so, 1);
2839         return (isempty);
2840 }
2841
2842
2843 int
2844 socket_lock(so, refcount)
2845         struct socket *so;
2846         int refcount;
2847 {
2848         int error = 0, lr, lr_saved;
2849 #ifdef __ppc__
2850         __asm__ volatile("mflr %0" : "=r" (lr));
2851         lr_saved = lr;
2852 #endif
2853
2854         if (so->so_proto->pr_lock) {
2855                 error = (*so->so_proto->pr_lock)(so, refcount, lr_saved);
2856         }
2857         else {
2858 #ifdef MORE_LOCKING_DEBUG
2859                 lck_mtx_assert(so->so_proto->pr_domain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED);
2860 #endif
2861                 lck_mtx_lock(so->so_proto->pr_domain->dom_mtx);
2862                 if (refcount)
2863                         so->so_usecount++;
2864                 so->reserved3 = (void*)lr_saved; /* save caller for refcount going to zero */
2865         }
2866
2867         return(error);
2868
2869 }
2870
2871 int
2872 socket_unlock(so, refcount)
2873         struct socket *so;
2874         int refcount;
2875 {
2876         int error = 0, lr, lr_saved;
2877         lck_mtx_t * mutex_held;
2878
2879 #ifdef __ppc__
2880 __asm__ volatile("mflr %0" : "=r" (lr));
2881         lr_saved = lr;
2882 #endif
2883
2884
2885
2886         if (so->so_proto == NULL)
2887                 panic("socket_unlock null so_proto so=%x\n", so);
2888
2889         if (so && so->so_proto->pr_unlock)
2890                 error = (*so->so_proto->pr_unlock)(so, refcount, lr_saved);
2891         else {
2892                 mutex_held = so->so_proto->pr_domain->dom_mtx;
2893 #ifdef MORE_LOCKING_DEBUG
2894                 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2895 #endif
2896                 if (refcount) {
2897                         if (so->so_usecount <= 0)
2898                                 panic("socket_unlock: bad refcount so=%x value=%d\n", so, so->so_usecount);
2899                         so->so_usecount--;
2900                         if (so->so_usecount == 0) {
2901                                 sofreelastref(so, 1);
2902                         }
2903                         else
2904                                 so->reserved4 = (void*)lr_saved; /* save caller */
2905                 }
2906                 lck_mtx_unlock(mutex_held);
2907         }
2908
2909         return(error);
2910 }
2911 //### Called with socket locked, will unlock socket
2912 void
2913 sofree(so)
2914         struct socket *so;
2915 {
2916
2917         int lr, lr_saved;
2918         lck_mtx_t * mutex_held;
2919 #ifdef __ppc__
2920         __asm__ volatile("mflr %0" : "=r" (lr));
2921         lr_saved = lr;
2922 #endif
2923         if (so->so_proto->pr_getlock != NULL)
2924                 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2925         else
2926                 mutex_held = so->so_proto->pr_domain->dom_mtx;
2927         lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2928
2929         /* Remove the filters */
2930         sflt_termsock(so);
2931
2932         sofreelastref(so, 0);
2933 }
2934
2935 void
2936 soreference(so)
2937         struct socket *so;
2938 {
2939         socket_lock(so, 1);     /* locks & take one reference on socket */
2940         socket_unlock(so, 0);   /* unlock only */
2941 }
2942
2943 void
2944 sodereference(so)
2945         struct socket *so;
2946 {
2947         socket_lock(so, 0);
2948         socket_unlock(so, 1);
2949 }