bsd/kern/uipc_socket2.c

   1 /*
   2  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23 /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
  24 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  25 /*
  26  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  27  *      The Regents of the University of California.  All rights reserved.
  28  *
  29  * Redistribution and use in source and binary forms, with or without
  30  * modification, are permitted provided that the following conditions
  31  * are met:
  32  * 1. Redistributions of source code must retain the above copyright
  33  *    notice, this list of conditions and the following disclaimer.
  34  * 2. Redistributions in binary form must reproduce the above copyright
  35  *    notice, this list of conditions and the following disclaimer in the
  36  *    documentation and/or other materials provided with the distribution.
  37  * 3. All advertising materials mentioning features or use of this software
  38  *    must display the following acknowledgement:
  39  *      This product includes software developed by the University of
  40  *      California, Berkeley and its contributors.
  41  * 4. Neither the name of the University nor the names of its contributors
  42  *    may be used to endorse or promote products derived from this software
  43  *    without specific prior written permission.
  44  *
  45  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  46  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  48  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  49  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  50  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  51  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  52  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  53  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  54  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  55  * SUCH DAMAGE.
  56  *
  57  *      @(#)uipc_socket2.c      8.1 (Berkeley) 6/10/93
  58  * $FreeBSD: src/sys/kern/uipc_socket2.c,v 1.55.2.9 2001/07/26 18:53:02 peter Exp $
  59  */
  60
  61 #include <sys/param.h>
  62 #include <sys/systm.h>
  63 #include <sys/domain.h>
  64 #include <sys/kernel.h>
  65 #include <sys/proc_internal.h>
  66 #include <sys/kauth.h>
  67 #include <sys/malloc.h>
  68 #include <sys/mbuf.h>
  69 #include <sys/protosw.h>
  70 #include <sys/stat.h>
  71 #include <sys/socket.h>
  72 #include <sys/socketvar.h>
  73 #include <sys/signalvar.h>
  74 #include <sys/sysctl.h>
  75 #include <sys/ev.h>
  76 #include <kern/locks.h>
  77 #include <net/route.h>
  78 #include <netinet/in.h>
  79 #include <netinet/in_pcb.h>
  80 #include <sys/kdebug.h>
  81
  82 #define DBG_FNC_SBDROP  NETDBG_CODE(DBG_NETSOCK, 4)
  83 #define DBG_FNC_SBAPPEND        NETDBG_CODE(DBG_NETSOCK, 5)
  84
  85
  86 /*
  87  * Primitive routines for operating on sockets and socket buffers
  88  */
  89
  90 u_long  sb_max = SB_MAX;                /* XXX should be static */
  91
  92 static  u_long sb_efficiency = 8;       /* parameter for sbreserve() */
  93
  94 /*
  95  * Procedures to manipulate state flags of socket
  96  * and do appropriate wakeups.  Normal sequence from the
  97  * active (originating) side is that soisconnecting() is
  98  * called during processing of connect() call,
  99  * resulting in an eventual call to soisconnected() if/when the
 100  * connection is established.  When the connection is torn down
 101  * soisdisconnecting() is called during processing of disconnect() call,
 102  * and soisdisconnected() is called when the connection to the peer
 103  * is totally severed.  The semantics of these routines are such that
 104  * connectionless protocols can call soisconnected() and soisdisconnected()
 105  * only, bypassing the in-progress calls when setting up a ``connection''
 106  * takes no time.
 107  *
 108  * From the passive side, a socket is created with
 109  * two queues of sockets: so_incomp for connections in progress
 110  * and so_comp for connections already made and awaiting user acceptance.
 111  * As a protocol is preparing incoming connections, it creates a socket
 112  * structure queued on so_incomp by calling sonewconn().  When the connection
 113  * is established, soisconnected() is called, and transfers the
 114  * socket structure to so_comp, making it available to accept().
 115  *
 116  * If a socket is closed with sockets on either
 117  * so_incomp or so_comp, these sockets are dropped.
 118  *
 119  * If higher level protocols are implemented in
 120  * the kernel, the wakeups done here will sometimes
 121  * cause software-interrupt process scheduling.
 122  */
 123 void
 124 soisconnecting(so)
 125         register struct socket *so;
 126 {
 127
 128         so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
 129         so->so_state |= SS_ISCONNECTING;
 130
 131         sflt_notify(so, sock_evt_connecting, NULL);
 132 }
 133
 134 void
 135 soisconnected(so)
 136         struct socket *so;
 137 {
 138         struct socket *head = so->so_head;
 139
 140         so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
 141         so->so_state |= SS_ISCONNECTED;
 142
 143         sflt_notify(so, sock_evt_connected, NULL);
 144
 145         if (head && (so->so_state & SS_INCOMP)) {
 146                 so->so_state &= ~SS_INCOMP;
 147                 so->so_state |= SS_COMP;
 148                 if (head->so_proto->pr_getlock != NULL) {
 149                         socket_unlock(so, 0);
 150                         socket_lock(head, 1);
 151                 }
 152                 postevent(head, 0, EV_RCONN);
 153                 TAILQ_REMOVE(&head->so_incomp, so, so_list);
 154                 head->so_incqlen--;
 155                 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
 156                 sorwakeup(head);
 157                 wakeup_one((caddr_t)&head->so_timeo);
 158                 if (head->so_proto->pr_getlock != NULL) {
 159                         socket_unlock(head, 1);
 160                         socket_lock(so, 0);
 161                 }
 162         } else {
 163                 postevent(so, 0, EV_WCONN);
 164                 wakeup((caddr_t)&so->so_timeo);
 165                 sorwakeup(so);
 166                 sowwakeup(so);
 167         }
 168 }
 169
 170 void
 171 soisdisconnecting(so)
 172         register struct socket *so;
 173 {
 174         so->so_state &= ~SS_ISCONNECTING;
 175         so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
 176         sflt_notify(so, sock_evt_disconnecting, NULL);
 177         wakeup((caddr_t)&so->so_timeo);
 178         sowwakeup(so);
 179         sorwakeup(so);
 180 }
 181
 182 void
 183 soisdisconnected(so)
 184         register struct socket *so;
 185 {
 186         so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
 187         so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
 188         sflt_notify(so, sock_evt_disconnected, NULL);
 189         wakeup((caddr_t)&so->so_timeo);
 190         sowwakeup(so);
 191         sorwakeup(so);
 192 }
 193
 194 /*
 195  * Return a random connection that hasn't been serviced yet and
 196  * is eligible for discard.  There is a one in qlen chance that
 197  * we will return a null, saying that there are no dropable
 198  * requests.  In this case, the protocol specific code should drop
 199  * the new request.  This insures fairness.
 200  *
 201  * This may be used in conjunction with protocol specific queue
 202  * congestion routines.
 203  */
 204 struct socket *
 205 sodropablereq(head)
 206         register struct socket *head;
 207 {
 208         struct socket *so, *sonext = NULL;
 209         unsigned int i, j, qlen;
 210         static int rnd;
 211         static struct timeval old_runtime;
 212         static unsigned int cur_cnt, old_cnt;
 213         struct timeval tv;
 214
 215         microtime(&tv);
 216         if ((i = (tv.tv_sec - old_runtime.tv_sec)) != 0) {
 217                 old_runtime = tv;
 218                 old_cnt = cur_cnt / i;
 219                 cur_cnt = 0;
 220         }
 221
 222         so = TAILQ_FIRST(&head->so_incomp);
 223         if (!so)
 224                 return (NULL);
 225
 226         qlen = head->so_incqlen;
 227         if (++cur_cnt > qlen || old_cnt > qlen) {
 228                 rnd = (314159 * rnd + 66329) & 0xffff;
 229                 j = ((qlen + 1) * rnd) >> 16;
 230 //###LD To clean up
 231                 while (j-- && so) {
 232 //                      if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
 233                                 socket_lock(so, 1);
 234                                 sonext = TAILQ_NEXT(so, so_list);
 235 //                              in_pcb_check_state(so->so_pcb, WNT_RELEASE, 0);
 236                                 socket_unlock(so, 1);
 237                                 so = sonext;
 238                 }
 239         }
 240
 241 //      if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) == WNT_STOPUSING)
 242 //              return (NULL);
 243 //      else
 244                 return (so);
 245 }
 246
 247 /*
 248  * When an attempt at a new connection is noted on a socket
 249  * which accepts connections, sonewconn is called.  If the
 250  * connection is possible (subject to space constraints, etc.)
 251  * then we allocate a new structure, propoerly linked into the
 252  * data structure of the original socket, and return this.
 253  * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
 254  */
 255 static struct socket *
 256 sonewconn_internal(head, connstatus)
 257         register struct socket *head;
 258         int connstatus;
 259 {
 260         int error = 0;
 261         register struct socket *so;
 262         lck_mtx_t *mutex_held;
 263
 264         if (head->so_proto->pr_getlock != NULL)
 265                 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
 266         else
 267                 mutex_held = head->so_proto->pr_domain->dom_mtx;
 268         lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
 269
 270         if (head->so_qlen > 3 * head->so_qlimit / 2)
 271                 return ((struct socket *)0);
 272         so = soalloc(1, head->so_proto->pr_domain->dom_family, head->so_type);
 273         if (so == NULL)
 274                 return ((struct socket *)0);
 275         /* check if head was closed during the soalloc */
 276         if (head->so_proto == NULL) {
 277           sodealloc(so);
 278           return ((struct socket *)0);
 279         }
 280
 281         so->so_head = head;
 282         so->so_type = head->so_type;
 283         so->so_options = head->so_options &~ SO_ACCEPTCONN;
 284         so->so_linger = head->so_linger;
 285         so->so_state = head->so_state | SS_NOFDREF;
 286         so->so_proto = head->so_proto;
 287         so->so_timeo = head->so_timeo;
 288         so->so_pgid  = head->so_pgid;
 289         so->so_uid = head->so_uid;
 290         so->so_usecount = 1;
 291
 292         if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
 293                 sflt_termsock(so);
 294                 sodealloc(so);
 295                 return ((struct socket *)0);
 296         }
 297
 298         /*
 299          * Must be done with head unlocked to avoid deadlock with pcb list
 300          */
 301         socket_unlock(head, 0);
 302         if (((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL) != 0) || error) {
 303                 sflt_termsock(so);
 304                 sodealloc(so);
 305                 socket_lock(head, 0);
 306                 return ((struct socket *)0);
 307         }
 308         socket_lock(head, 0);
 309 #ifdef __APPLE__
 310         so->so_proto->pr_domain->dom_refs++;
 311 #endif
 312
 313         if (connstatus) {
 314                 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
 315                 so->so_state |= SS_COMP;
 316         } else {
 317                 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
 318                 so->so_state |= SS_INCOMP;
 319                 head->so_incqlen++;
 320         }
 321         head->so_qlen++;
 322 #ifdef __APPLE__
 323         so->so_rcv.sb_so = so->so_snd.sb_so = so;
 324         TAILQ_INIT(&so->so_evlist);
 325
 326         /* Attach socket filters for this protocol */
 327         sflt_initsock(so);
 328 #endif
 329         if (connstatus) {
 330                 so->so_state |= connstatus;
 331                 sorwakeup(head);
 332                 wakeup((caddr_t)&head->so_timeo);
 333         }
 334         return (so);
 335 }
 336
 337
 338 struct socket *
 339 sonewconn(
 340         struct socket *head,
 341         int connstatus,
 342         const struct sockaddr *from)
 343 {
 344         int error = 0;
 345         struct socket_filter_entry      *filter;
 346         int                                                     filtered = 0;
 347
 348         error = 0;
 349         for (filter = head->so_filt; filter && (error == 0);
 350                  filter = filter->sfe_next_onsocket) {
 351                 if (filter->sfe_filter->sf_filter.sf_connect_in) {
 352                         if (filtered == 0) {
 353                                 filtered = 1;
 354                                 sflt_use(head);
 355                                 socket_unlock(head, 0);
 356                         }
 357                         error = filter->sfe_filter->sf_filter.sf_connect_in(
 358                                                 filter->sfe_cookie, head, from);
 359                 }
 360         }
 361         if (filtered != 0) {
 362                 socket_lock(head, 0);
 363                 sflt_unuse(head);
 364         }
 365
 366         if (error) {
 367                 return NULL;
 368         }
 369
 370         return sonewconn_internal(head, connstatus);
 371 }
 372
 373 /*
 374  * Socantsendmore indicates that no more data will be sent on the
 375  * socket; it would normally be applied to a socket when the user
 376  * informs the system that no more data is to be sent, by the protocol
 377  * code (in case PRU_SHUTDOWN).  Socantrcvmore indicates that no more data
 378  * will be received, and will normally be applied to the socket by a
 379  * protocol when it detects that the peer will send no more data.
 380  * Data queued for reading in the socket may yet be read.
 381  */
 382
 383 void
 384 socantsendmore(so)
 385         struct socket *so;
 386 {
 387         so->so_state |= SS_CANTSENDMORE;
 388         sflt_notify(so, sock_evt_cantsendmore, NULL);
 389         sowwakeup(so);
 390 }
 391
 392 void
 393 socantrcvmore(so)
 394         struct socket *so;
 395 {
 396         so->so_state |= SS_CANTRCVMORE;
 397         sflt_notify(so, sock_evt_cantrecvmore, NULL);
 398         sorwakeup(so);
 399 }
 400
 401 /*
 402  * Wait for data to arrive at/drain from a socket buffer.
 403  */
 404 int
 405 sbwait(sb)
 406         struct sockbuf *sb;
 407 {
 408         int error = 0, lr, lr_saved;
 409         struct socket *so = sb->sb_so;
 410         lck_mtx_t *mutex_held;
 411         struct timespec ts;
 412
 413 #ifdef __ppc__
 414         __asm__ volatile("mflr %0" : "=r" (lr));
 415         lr_saved = lr;
 416 #endif
 417
 418
 419         if (so->so_proto->pr_getlock != NULL)
 420                 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
 421         else
 422                 mutex_held = so->so_proto->pr_domain->dom_mtx;
 423
 424         sb->sb_flags |= SB_WAIT;
 425
 426         if (so->so_usecount < 1)
 427                 panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount);
 428         ts.tv_sec = sb->sb_timeo.tv_sec;
 429         ts.tv_nsec = sb->sb_timeo.tv_usec * 1000;
 430         error = msleep((caddr_t)&sb->sb_cc, mutex_held,
 431                 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
 432                 &ts);
 433
 434         lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
 435
 436         if (so->so_usecount < 1)
 437                 panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount);
 438
 439         if ((so->so_state & SS_DRAINING)) {
 440                 error = EBADF;
 441         }
 442
 443         return (error);
 444 }
 445
 446 /*
 447  * Lock a sockbuf already known to be locked;
 448  * return any error returned from sleep (EINTR).
 449  */
 450 int
 451 sb_lock(sb)
 452         register struct sockbuf *sb;
 453 {
 454         struct socket *so = sb->sb_so;
 455         lck_mtx_t * mutex_held;
 456         int error = 0, lr, lr_saved;
 457
 458 #ifdef __ppc__
 459         __asm__ volatile("mflr %0" : "=r" (lr));
 460         lr_saved = lr;
 461 #endif
 462
 463         if (so == NULL)
 464                 panic("sb_lock: null so back pointer sb=%x\n", sb);
 465
 466         while (sb->sb_flags & SB_LOCK) {
 467                 sb->sb_flags |= SB_WANT;
 468                 if (so->so_proto->pr_getlock != NULL)
 469                         mutex_held = (*so->so_proto->pr_getlock)(so, 0);
 470                 else
 471                         mutex_held = so->so_proto->pr_domain->dom_mtx;
 472                 if (so->so_usecount < 1)
 473                         panic("sb_lock: so=%x refcount=%d\n", so, so->so_usecount);
 474                 error = msleep((caddr_t)&sb->sb_flags, mutex_held,
 475                         (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sblock", 0);
 476                 if (so->so_usecount < 1)
 477                         panic("sb_lock: 2 so=%x refcount=%d\n", so, so->so_usecount);
 478                 if (error)
 479                         return (error);
 480         }
 481         sb->sb_flags |= SB_LOCK;
 482         return (0);
 483 }
 484
 485 /*
 486  * Wakeup processes waiting on a socket buffer.
 487  * Do asynchronous notification via SIGIO
 488  * if the socket has the SS_ASYNC flag set.
 489  */
 490 void
 491 sowakeup(so, sb)
 492         register struct socket *so;
 493         register struct sockbuf *sb;
 494 {
 495         struct proc *p = current_proc();
 496         sb->sb_flags &= ~SB_SEL;
 497         selwakeup(&sb->sb_sel);
 498         if (sb->sb_flags & SB_WAIT) {
 499                 sb->sb_flags &= ~SB_WAIT;
 500                 wakeup((caddr_t)&sb->sb_cc);
 501         }
 502         if (so->so_state & SS_ASYNC) {
 503                 if (so->so_pgid < 0)
 504                         gsignal(-so->so_pgid, SIGIO);
 505                 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
 506                         psignal(p, SIGIO);
 507         }
 508         if (sb->sb_flags & SB_KNOTE) {
 509                 KNOTE(&sb->sb_sel.si_note, SO_FILT_HINT_LOCKED);
 510         }
 511         if (sb->sb_flags & SB_UPCALL) {
 512                 socket_unlock(so, 0);
 513                 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
 514                 socket_lock(so, 0);
 515         }
 516 }
 517
 518 /*
 519  * Socket buffer (struct sockbuf) utility routines.
 520  *
 521  * Each socket contains two socket buffers: one for sending data and
 522  * one for receiving data.  Each buffer contains a queue of mbufs,
 523  * information about the number of mbufs and amount of data in the
 524  * queue, and other fields allowing select() statements and notification
 525  * on data availability to be implemented.
 526  *
 527  * Data stored in a socket buffer is maintained as a list of records.
 528  * Each record is a list of mbufs chained together with the m_next
 529  * field.  Records are chained together with the m_nextpkt field. The upper
 530  * level routine soreceive() expects the following conventions to be
 531  * observed when placing information in the receive buffer:
 532  *
 533  * 1. If the protocol requires each message be preceded by the sender's
 534  *    name, then a record containing that name must be present before
 535  *    any associated data (mbuf's must be of type MT_SONAME).
 536  * 2. If the protocol supports the exchange of ``access rights'' (really
 537  *    just additional data associated with the message), and there are
 538  *    ``rights'' to be received, then a record containing this data
 539  *    should be present (mbuf's must be of type MT_RIGHTS).
 540  * 3. If a name or rights record exists, then it must be followed by
 541  *    a data record, perhaps of zero length.
 542  *
 543  * Before using a new socket structure it is first necessary to reserve
 544  * buffer space to the socket, by calling sbreserve().  This should commit
 545  * some of the available buffer space in the system buffer pool for the
 546  * socket (currently, it does nothing but enforce limits).  The space
 547  * should be released by calling sbrelease() when the socket is destroyed.
 548  */
 549
 550 int
 551 soreserve(so, sndcc, rcvcc)
 552         register struct socket *so;
 553         u_long sndcc, rcvcc;
 554 {
 555
 556         if (sbreserve(&so->so_snd, sndcc) == 0)
 557                 goto bad;
 558         if (sbreserve(&so->so_rcv, rcvcc) == 0)
 559                 goto bad2;
 560         if (so->so_rcv.sb_lowat == 0)
 561                 so->so_rcv.sb_lowat = 1;
 562         if (so->so_snd.sb_lowat == 0)
 563                 so->so_snd.sb_lowat = MCLBYTES;
 564         if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
 565                 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
 566         return (0);
 567 bad2:
 568 #ifdef __APPLE__
 569         selthreadclear(&so->so_snd.sb_sel);
 570 #endif
 571         sbrelease(&so->so_snd);
 572 bad:
 573         return (ENOBUFS);
 574 }
 575
 576 /*
 577  * Allot mbufs to a sockbuf.
 578  * Attempt to scale mbmax so that mbcnt doesn't become limiting
 579  * if buffering efficiency is near the normal case.
 580  */
 581 int
 582 sbreserve(sb, cc)
 583         struct sockbuf *sb;
 584         u_long cc;
 585 {
 586         if ((u_quad_t)cc > (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES))
 587                 return (0);
 588         sb->sb_hiwat = cc;
 589         sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
 590         if (sb->sb_lowat > sb->sb_hiwat)
 591                 sb->sb_lowat = sb->sb_hiwat;
 592         return (1);
 593 }
 594
 595 /*
 596  * Free mbufs held by a socket, and reserved mbuf space.
 597  */
 598  /*  WARNING needs to do selthreadclear() before calling this */
 599 void
 600 sbrelease(sb)
 601         struct sockbuf *sb;
 602 {
 603
 604         sbflush(sb);
 605         sb->sb_hiwat = 0;
 606         sb->sb_mbmax = 0;
 607
 608 }
 609
 610 /*
 611  * Routines to add and remove
 612  * data from an mbuf queue.
 613  *
 614  * The routines sbappend() or sbappendrecord() are normally called to
 615  * append new mbufs to a socket buffer, after checking that adequate
 616  * space is available, comparing the function sbspace() with the amount
 617  * of data to be added.  sbappendrecord() differs from sbappend() in
 618  * that data supplied is treated as the beginning of a new record.
 619  * To place a sender's address, optional access rights, and data in a
 620  * socket receive buffer, sbappendaddr() should be used.  To place
 621  * access rights and data in a socket receive buffer, sbappendrights()
 622  * should be used.  In either case, the new data begins a new record.
 623  * Note that unlike sbappend() and sbappendrecord(), these routines check
 624  * for the caller that there will be enough space to store the data.
 625  * Each fails if there is not enough space, or if it cannot find mbufs
 626  * to store additional information in.
 627  *
 628  * Reliable protocols may use the socket send buffer to hold data
 629  * awaiting acknowledgement.  Data is normally copied from a socket
 630  * send buffer in a protocol with m_copy for output to a peer,
 631  * and then removing the data from the socket buffer with sbdrop()
 632  * or sbdroprecord() when the data is acknowledged by the peer.
 633  */
 634
 635 /*
 636  * Append mbuf chain m to the last record in the
 637  * socket buffer sb.  The additional space associated
 638  * the mbuf chain is recorded in sb.  Empty mbufs are
 639  * discarded and mbufs are compacted where possible.
 640  */
 641 int
 642 sbappend(sb, m)
 643         struct sockbuf *sb;
 644         struct mbuf *m;
 645 {
 646         register struct mbuf *n, *sb_first;
 647         int result = 0;
 648         int error = 0;
 649         int     filtered = 0;
 650
 651
 652         KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_START), sb, m->m_len, 0, 0, 0);
 653
 654         if (m == 0)
 655                 return 0;
 656
 657 again:
 658         sb_first = n = sb->sb_mb;
 659         if (n) {
 660                 while (n->m_nextpkt)
 661                         n = n->m_nextpkt;
 662                 do {
 663                         if (n->m_flags & M_EOR) {
 664                                 result = sbappendrecord(sb, m); /* XXXXXX!!!! */
 665                                 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
 666                                 return result;
 667                         }
 668                 } while (n->m_next && (n = n->m_next));
 669         }
 670
 671         if (!filtered && (sb->sb_flags & SB_RECV) != 0) {
 672                 error = sflt_data_in(sb->sb_so, NULL, &m, NULL, 0, &filtered);
 673                 if (error) {
 674                         /* no data was appended, caller should not call sowakeup */
 675                         return 0;
 676                 }
 677
 678                 /*
 679                   If we any filters, the socket lock was dropped. n and sb_first
 680                   cached data from the socket buffer. This cache is not valid
 681                   since we dropped the lock. We must start over. Since filtered
 682                   is set we won't run through the filters a second time. We just
 683                   set n and sb_start again.
 684                 */
 685                 if (filtered)
 686                         goto again;
 687         }
 688
 689         result = sbcompress(sb, m, n);
 690
 691         KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
 692
 693         return result;
 694 }
 695
 696 #ifdef SOCKBUF_DEBUG
 697 void
 698 sbcheck(sb)
 699         register struct sockbuf *sb;
 700 {
 701         register struct mbuf *m;
 702         register struct mbuf *n = 0;
 703         register u_long len = 0, mbcnt = 0;
 704         lck_mtx_t *mutex_held;
 705
 706         if (sb->sb_so->so_proto->pr_getlock != NULL)
 707                 mutex_held = (*sb->sb_so->so_proto->pr_getlock)(sb->sb_so, 0);
 708         else
 709                 mutex_held = sb->sb_so->so_proto->pr_domain->dom_mtx;
 710
 711         lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
 712
 713         if (sbchecking == 0)
 714                 return;
 715
 716         for (m = sb->sb_mb; m; m = n) {
 717             n = m->m_nextpkt;
 718             for (; m; m = m->m_next) {
 719                 len += m->m_len;
 720                 mbcnt += MSIZE;
 721                 if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
 722                     mbcnt += m->m_ext.ext_size;
 723             }
 724         }
 725         if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
 726                 panic("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
 727                     mbcnt, sb->sb_mbcnt);
 728         }
 729 }
 730 #endif
 731
 732 /*
 733  * As above, except the mbuf chain
 734  * begins a new record.
 735  */
 736 int
 737 sbappendrecord(sb, m0)
 738         register struct sockbuf *sb;
 739         register struct mbuf *m0;
 740 {
 741         register struct mbuf *m;
 742         int result = 0;
 743
 744         if (m0 == 0)
 745                 return 0;
 746
 747         if ((sb->sb_flags & SB_RECV) != 0) {
 748                 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL, sock_data_filt_flag_record, NULL);
 749                 if (error != 0) {
 750                         if (error != EJUSTRETURN)
 751                                 m_freem(m0);
 752                         return 0;
 753                 }
 754         }
 755
 756         m = sb->sb_mb;
 757         if (m)
 758                 while (m->m_nextpkt)
 759                         m = m->m_nextpkt;
 760         /*
 761          * Put the first mbuf on the queue.
 762          * Note this permits zero length records.
 763          */
 764         sballoc(sb, m0);
 765         if (m)
 766                 m->m_nextpkt = m0;
 767         else
 768                 sb->sb_mb = m0;
 769         m = m0->m_next;
 770         m0->m_next = 0;
 771         if (m && (m0->m_flags & M_EOR)) {
 772                 m0->m_flags &= ~M_EOR;
 773                 m->m_flags |= M_EOR;
 774         }
 775         return sbcompress(sb, m, m0);
 776 }
 777
 778 /*
 779  * As above except that OOB data
 780  * is inserted at the beginning of the sockbuf,
 781  * but after any other OOB data.
 782  */
 783 int
 784 sbinsertoob(sb, m0)
 785         struct sockbuf *sb;
 786         struct mbuf *m0;
 787 {
 788         struct mbuf *m;
 789         struct mbuf **mp;
 790
 791         if (m0 == 0)
 792                 return 0;
 793
 794         if ((sb->sb_flags & SB_RECV) != 0) {
 795                 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL,
 796                                                                  sock_data_filt_flag_oob, NULL);
 797
 798                 if (error) {
 799                         if (error != EJUSTRETURN) {
 800                                 m_freem(m0);
 801                         }
 802                         return 0;
 803                 }
 804         }
 805
 806         for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) {
 807             m = *mp;
 808             again:
 809                 switch (m->m_type) {
 810
 811                 case MT_OOBDATA:
 812                         continue;               /* WANT next train */
 813
 814                 case MT_CONTROL:
 815                         m = m->m_next;
 816                         if (m)
 817                                 goto again;     /* inspect THIS train further */
 818                 }
 819                 break;
 820         }
 821         /*
 822          * Put the first mbuf on the queue.
 823          * Note this permits zero length records.
 824          */
 825         sballoc(sb, m0);
 826         m0->m_nextpkt = *mp;
 827         *mp = m0;
 828         m = m0->m_next;
 829         m0->m_next = 0;
 830         if (m && (m0->m_flags & M_EOR)) {
 831                 m0->m_flags &= ~M_EOR;
 832                 m->m_flags |= M_EOR;
 833         }
 834         return sbcompress(sb, m, m0);
 835 }
 836
 837 /*
 838  * Append address and data, and optionally, control (ancillary) data
 839  * to the receive queue of a socket.  If present,
 840  * m0 must include a packet header with total length.
 841  * Returns 0 if no space in sockbuf or insufficient mbufs.
 842  */
 843 static int
 844 sbappendaddr_internal(sb, asa, m0, control)
 845         register struct sockbuf *sb;
 846         struct sockaddr *asa;
 847         struct mbuf *m0, *control;
 848 {
 849         register struct mbuf *m, *n;
 850         int space = asa->sa_len;
 851
 852         if (m0 && (m0->m_flags & M_PKTHDR) == 0)
 853                 panic("sbappendaddr");
 854
 855         if (m0)
 856                 space += m0->m_pkthdr.len;
 857         for (n = control; n; n = n->m_next) {
 858                 space += n->m_len;
 859                 if (n->m_next == 0)     /* keep pointer to last control buf */
 860                         break;
 861         }
 862         if (space > sbspace(sb))
 863                 return (0);
 864         if (asa->sa_len > MLEN)
 865                 return (0);
 866         MGET(m, M_DONTWAIT, MT_SONAME);
 867         if (m == 0)
 868                 return (0);
 869         m->m_len = asa->sa_len;
 870         bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
 871         if (n)
 872                 n->m_next = m0;         /* concatenate data to control */
 873         else
 874                 control = m0;
 875         m->m_next = control;
 876         for (n = m; n; n = n->m_next)
 877                 sballoc(sb, n);
 878         n = sb->sb_mb;
 879         if (n) {
 880                 while (n->m_nextpkt)
 881                         n = n->m_nextpkt;
 882                 n->m_nextpkt = m;
 883         } else
 884                 sb->sb_mb = m;
 885         postevent(0,sb,EV_RWBYTES);
 886         return (1);
 887 }
 888
 889 int
 890 sbappendaddr(
 891         struct sockbuf* sb,
 892         struct sockaddr* asa,
 893         struct mbuf *m0,
 894         struct mbuf *control,
 895         int     *error_out)
 896 {
 897         int result = 0;
 898
 899         if (error_out) *error_out = 0;
 900
 901         if (m0 && (m0->m_flags & M_PKTHDR) == 0)
 902                 panic("sbappendaddrorfree");
 903
 904         /* Call socket data in filters */
 905         if ((sb->sb_flags & SB_RECV) != 0) {
 906                 int error;
 907                 error = sflt_data_in(sb->sb_so, asa, &m0, &control, 0, NULL);
 908                 if (error) {
 909                         if (error != EJUSTRETURN) {
 910                                 if (m0) m_freem(m0);
 911                                 if (control) m_freem(control);
 912                                 if (error_out) *error_out = error;
 913                         }
 914                         return 0;
 915                 }
 916         }
 917
 918         result = sbappendaddr_internal(sb, asa, m0, control);
 919         if (result == 0) {
 920                 if (m0) m_freem(m0);
 921                 if (control) m_freem(control);
 922                 if (error_out) *error_out = ENOBUFS;
 923         }
 924
 925         return result;
 926 }
 927
 928 static int
 929 sbappendcontrol_internal(sb, m0, control)
 930         struct sockbuf *sb;
 931         struct mbuf *control, *m0;
 932 {
 933         register struct mbuf *m, *n;
 934         int space = 0;
 935
 936         if (control == 0)
 937                 panic("sbappendcontrol");
 938
 939         for (m = control; ; m = m->m_next) {
 940                 space += m->m_len;
 941                 if (m->m_next == 0)
 942                         break;
 943         }
 944         n = m;                  /* save pointer to last control buffer */
 945         for (m = m0; m; m = m->m_next)
 946                 space += m->m_len;
 947         if (space > sbspace(sb))
 948                 return (0);
 949         n->m_next = m0;                 /* concatenate data to control */
 950         for (m = control; m; m = m->m_next)
 951                 sballoc(sb, m);
 952         n = sb->sb_mb;
 953         if (n) {
 954                 while (n->m_nextpkt)
 955                         n = n->m_nextpkt;
 956                 n->m_nextpkt = control;
 957         } else
 958                 sb->sb_mb = control;
 959         postevent(0,sb,EV_RWBYTES);
 960         return (1);
 961 }
 962
 963 int
 964 sbappendcontrol(
 965         struct sockbuf  *sb,
 966         struct mbuf             *m0,
 967         struct mbuf             *control,
 968         int                             *error_out)
 969 {
 970         int result = 0;
 971
 972         if (error_out) *error_out = 0;
 973
 974         if (sb->sb_flags & SB_RECV) {
 975                 int error;
 976                 error = sflt_data_in(sb->sb_so, NULL, &m0, &control, 0, NULL);
 977                 if (error) {
 978                         if (error != EJUSTRETURN) {
 979                                 if (m0) m_freem(m0);
 980                                 if (control) m_freem(control);
 981                                 if (error_out) *error_out = error;
 982                         }
 983                         return 0;
 984                 }
 985         }
 986
 987         result = sbappendcontrol_internal(sb, m0, control);
 988         if (result == 0) {
 989                 if (m0) m_freem(m0);
 990                 if (control) m_freem(control);
 991                 if (error_out) *error_out = ENOBUFS;
 992         }
 993
 994         return result;
 995 }
 996
 997 /*
 998  * Compress mbuf chain m into the socket
 999  * buffer sb following mbuf n.  If n
1000  * is null, the buffer is presumed empty.
1001  */
1002 static int
1003 sbcompress(sb, m, n)
1004         register struct sockbuf *sb;
1005         register struct mbuf *m, *n;
1006 {
1007         register int eor = 0;
1008         register struct mbuf *o;
1009
1010         while (m) {
1011                 eor |= m->m_flags & M_EOR;
1012                 if (m->m_len == 0 &&
1013                     (eor == 0 ||
1014                      (((o = m->m_next) || (o = n)) &&
1015                       o->m_type == m->m_type))) {
1016                         m = m_free(m);
1017                         continue;
1018                 }
1019                 if (n && (n->m_flags & M_EOR) == 0 &&
1020 #ifndef __APPLE__
1021                     M_WRITABLE(n) &&
1022 #endif
1023                     m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
1024                     m->m_len <= M_TRAILINGSPACE(n) &&
1025                     n->m_type == m->m_type) {
1026                         bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
1027                             (unsigned)m->m_len);
1028                         n->m_len += m->m_len;
1029                         sb->sb_cc += m->m_len;
1030                         m = m_free(m);
1031                         continue;
1032                 }
1033                 if (n)
1034                         n->m_next = m;
1035                 else
1036                         sb->sb_mb = m;
1037                 sballoc(sb, m);
1038                 n = m;
1039                 m->m_flags &= ~M_EOR;
1040                 m = m->m_next;
1041                 n->m_next = 0;
1042         }
1043         if (eor) {
1044                 if (n)
1045                         n->m_flags |= eor;
1046                 else
1047                         printf("semi-panic: sbcompress\n");
1048         }
1049         postevent(0,sb, EV_RWBYTES);
1050         return 1;
1051 }
1052
1053 /*
1054  * Free all mbufs in a sockbuf.
1055  * Check that all resources are reclaimed.
1056  */
1057 void
1058 sbflush(sb)
1059         register struct sockbuf *sb;
1060 {
1061         if (sb->sb_so == NULL)
1062                 panic ("sbflush sb->sb_so already null sb=%x\n", sb);
1063         (void)sblock(sb, M_WAIT);
1064         while (sb->sb_mbcnt) {
1065                 /*
1066                  * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
1067                  * we would loop forever. Panic instead.
1068                  */
1069                 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
1070                         break;
1071                 sbdrop(sb, (int)sb->sb_cc);
1072         }
1073         if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt || sb->sb_so == NULL)
1074                 panic("sbflush: cc %ld || mb %p || mbcnt %ld sb_so=%x", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt, sb->sb_so);
1075
1076         postevent(0, sb, EV_RWBYTES);
1077         sbunlock(sb, 1);        /* keep socket locked */
1078
1079 }
1080
1081 /*
1082  * Drop data from (the front of) a sockbuf.
1083  * use m_freem_list to free the mbuf structures
1084  * under a single lock... this is done by pruning
1085  * the top of the tree from the body by keeping track
1086  * of where we get to in the tree and then zeroing the
1087  * two pertinent pointers m_nextpkt and m_next
1088  * the socket buffer is then updated to point at the new
1089  * top of the tree and the pruned area is released via
1090  * m_freem_list.
1091  */
1092 void
1093 sbdrop(sb, len)
1094         register struct sockbuf *sb;
1095         register int len;
1096 {
1097         register struct mbuf *m, *free_list, *ml;
1098         struct mbuf *next, *last;
1099
1100         KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_START), sb, len, 0, 0, 0);
1101
1102         next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
1103         free_list = last = m;
1104         ml = (struct mbuf *)0;
1105
1106         while (len > 0) {
1107                 if (m == 0) {
1108                   if (next == 0) {
1109                     /* temporarily replacing this panic with printf because
1110                      * it occurs occasionally when closing a socket when there
1111                      * is no harm in ignoring it.  This problem will be investigated
1112                      * further.
1113                      */
1114                     /* panic("sbdrop"); */
1115                     printf("sbdrop - count not zero\n");
1116                     len = 0;
1117                     /* zero the counts. if we have no mbufs, we have no data (PR-2986815) */
1118                     sb->sb_cc = 0;
1119                     sb->sb_mbcnt = 0;
1120                     break;
1121                   }
1122                   m = last = next;
1123                   next = m->m_nextpkt;
1124                   continue;
1125                 }
1126                 if (m->m_len > len) {
1127                         m->m_len -= len;
1128                         m->m_data += len;
1129                         sb->sb_cc -= len;
1130                         break;
1131                 }
1132                 len -= m->m_len;
1133                 sbfree(sb, m);
1134
1135                 ml = m;
1136                 m = m->m_next;
1137         }
1138         while (m && m->m_len == 0) {
1139                 sbfree(sb, m);
1140
1141                 ml = m;
1142                 m = m->m_next;
1143         }
1144         if (ml) {
1145                 ml->m_next = (struct mbuf *)0;
1146                 last->m_nextpkt = (struct mbuf *)0;
1147                 m_freem_list(free_list);
1148         }
1149         if (m) {
1150                 sb->sb_mb = m;
1151                 m->m_nextpkt = next;
1152         } else
1153                 sb->sb_mb = next;
1154
1155         postevent(0, sb, EV_RWBYTES);
1156
1157         KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_END), sb, 0, 0, 0, 0);
1158 }
1159
1160 /*
1161  * Drop a record off the front of a sockbuf
1162  * and move the next record to the front.
1163  */
1164 void
1165 sbdroprecord(sb)
1166         register struct sockbuf *sb;
1167 {
1168         register struct mbuf *m, *mn;
1169
1170         m = sb->sb_mb;
1171         if (m) {
1172                 sb->sb_mb = m->m_nextpkt;
1173                 do {
1174                         sbfree(sb, m);
1175                         MFREE(m, mn);
1176                         m = mn;
1177                 } while (m);
1178         }
1179         postevent(0, sb, EV_RWBYTES);
1180 }
1181
1182 /*
1183  * Create a "control" mbuf containing the specified data
1184  * with the specified type for presentation on a socket buffer.
1185  */
1186 struct mbuf *
1187 sbcreatecontrol(p, size, type, level)
1188         caddr_t p;
1189         register int size;
1190         int type, level;
1191 {
1192         register struct cmsghdr *cp;
1193         struct mbuf *m;
1194
1195         if (CMSG_SPACE((u_int)size) > MLEN)
1196                 return ((struct mbuf *) NULL);
1197         if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
1198                 return ((struct mbuf *) NULL);
1199         cp = mtod(m, struct cmsghdr *);
1200         /* XXX check size? */
1201         (void)memcpy(CMSG_DATA(cp), p, size);
1202         m->m_len = CMSG_SPACE(size);
1203         cp->cmsg_len = CMSG_LEN(size);
1204         cp->cmsg_level = level;
1205         cp->cmsg_type = type;
1206         return (m);
1207 }
1208
1209 /*
1210  * Some routines that return EOPNOTSUPP for entry points that are not
1211  * supported by a protocol.  Fill in as needed.
1212  */
1213 int
1214 pru_abort_notsupp(struct socket *so)
1215 {
1216         return EOPNOTSUPP;
1217 }
1218
1219
1220 int
1221 pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
1222 {
1223         return EOPNOTSUPP;
1224 }
1225
1226 int
1227 pru_attach_notsupp(struct socket *so, int proto, struct proc *p)
1228 {
1229         return EOPNOTSUPP;
1230 }
1231
1232 int
1233 pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1234 {
1235         return EOPNOTSUPP;
1236 }
1237
1238 int
1239 pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1240 {
1241         return EOPNOTSUPP;
1242 }
1243
1244 int
1245 pru_connect2_notsupp(struct socket *so1, struct socket *so2)
1246 {
1247         return EOPNOTSUPP;
1248 }
1249
1250 int
1251 pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
1252                     struct ifnet *ifp, struct proc *p)
1253 {
1254         return EOPNOTSUPP;
1255 }
1256
1257 int
1258 pru_detach_notsupp(struct socket *so)
1259 {
1260         return EOPNOTSUPP;
1261 }
1262
1263 int
1264 pru_disconnect_notsupp(struct socket *so)
1265 {
1266         return EOPNOTSUPP;
1267 }
1268
1269 int
1270 pru_listen_notsupp(struct socket *so, struct proc *p)
1271 {
1272         return EOPNOTSUPP;
1273 }
1274
1275 int
1276 pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
1277 {
1278         return EOPNOTSUPP;
1279 }
1280
1281 int
1282 pru_rcvd_notsupp(struct socket *so, int flags)
1283 {
1284         return EOPNOTSUPP;
1285 }
1286
1287 int
1288 pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
1289 {
1290         return EOPNOTSUPP;
1291 }
1292
1293 int
1294 pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
1295                  struct sockaddr *addr, struct mbuf *control,
1296                  struct proc *p)
1297
1298 {
1299         return EOPNOTSUPP;
1300 }
1301
1302
1303 /*
1304  * This isn't really a ``null'' operation, but it's the default one
1305  * and doesn't do anything destructive.
1306  */
1307 int
1308 pru_sense_null(struct socket *so, struct stat *sb)
1309 {
1310         sb->st_blksize = so->so_snd.sb_hiwat;
1311         return 0;
1312 }
1313
1314
1315 int     pru_sosend_notsupp(struct socket *so, struct sockaddr *addr,
1316                    struct uio *uio, struct mbuf *top,
1317                    struct mbuf *control, int flags)
1318
1319 {
1320     return EOPNOTSUPP;
1321 }
1322
1323 int     pru_soreceive_notsupp(struct socket *so,
1324                       struct sockaddr **paddr,
1325                       struct uio *uio, struct mbuf **mp0,
1326                       struct mbuf **controlp, int *flagsp)
1327 {
1328     return EOPNOTSUPP;
1329 }
1330
1331 int
1332
1333 pru_shutdown_notsupp(struct socket *so)
1334 {
1335         return EOPNOTSUPP;
1336 }
1337
1338 int
1339 pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
1340 {
1341         return EOPNOTSUPP;
1342 }
1343
1344 int     pru_sosend(struct socket *so, struct sockaddr *addr,
1345                    struct uio *uio, struct mbuf *top,
1346                    struct mbuf *control, int flags)
1347 {
1348         return EOPNOTSUPP;
1349 }
1350
1351 int     pru_soreceive(struct socket *so,
1352                       struct sockaddr **paddr,
1353                       struct uio *uio, struct mbuf **mp0,
1354                       struct mbuf **controlp, int *flagsp)
1355 {
1356         return EOPNOTSUPP;
1357 }
1358
1359
1360 int
1361 pru_sopoll_notsupp(__unused struct socket *so, __unused int events,
1362                    __unused kauth_cred_t cred, __unused void *wql)
1363 {
1364     return EOPNOTSUPP;
1365 }
1366
1367
1368 #ifdef __APPLE__
1369 /*
1370  * The following are macros on BSD and functions on Darwin
1371  */
1372
1373 /*
1374  * Do we need to notify the other side when I/O is possible?
1375  */
1376
1377 int
1378 sb_notify(struct sockbuf *sb)
1379 {
1380         return ((sb->sb_flags & (SB_WAIT|SB_SEL|SB_ASYNC|SB_UPCALL|SB_KNOTE)) != 0);
1381 }
1382
1383 /*
1384  * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
1385  * This is problematical if the fields are unsigned, as the space might
1386  * still be negative (cc > hiwat or mbcnt > mbmax).  Should detect
1387  * overflow and return 0.  Should use "lmin" but it doesn't exist now.
1388  */
1389 long
1390 sbspace(struct sockbuf *sb)
1391 {
1392     return ((long) imin((int)(sb->sb_hiwat - sb->sb_cc),
1393          (int)(sb->sb_mbmax - sb->sb_mbcnt)));
1394 }
1395
1396 /* do we have to send all at once on a socket? */
1397 int
1398 sosendallatonce(struct socket *so)
1399 {
1400     return (so->so_proto->pr_flags & PR_ATOMIC);
1401 }
1402
1403 /* can we read something from so? */
1404 int
1405 soreadable(struct socket *so)
1406 {
1407     return (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
1408         (so->so_state & SS_CANTRCVMORE) ||
1409         so->so_comp.tqh_first || so->so_error);
1410 }
1411
1412 /* can we write something to so? */
1413
1414 int
1415 sowriteable(struct socket *so)
1416 {
1417     return ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat &&
1418         ((so->so_state&SS_ISCONNECTED) ||
1419           (so->so_proto->pr_flags&PR_CONNREQUIRED)==0)) ||
1420      (so->so_state & SS_CANTSENDMORE) ||
1421      so->so_error);
1422 }
1423
1424 /* adjust counters in sb reflecting allocation of m */
1425
1426 void
1427 sballoc(struct sockbuf *sb, struct mbuf *m)
1428 {
1429         sb->sb_cc += m->m_len;
1430         sb->sb_mbcnt += MSIZE;
1431         if (m->m_flags & M_EXT)
1432                 sb->sb_mbcnt += m->m_ext.ext_size;
1433 }
1434
1435 /* adjust counters in sb reflecting freeing of m */
1436 void
1437 sbfree(struct sockbuf *sb, struct mbuf *m)
1438 {
1439         sb->sb_cc -= m->m_len;
1440         sb->sb_mbcnt -= MSIZE;
1441         if (m->m_flags & M_EXT)
1442                 sb->sb_mbcnt -= m->m_ext.ext_size;
1443 }
1444
1445 /*
1446  * Set lock on sockbuf sb; sleep if lock is already held.
1447  * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
1448  * Returns error without lock if sleep is interrupted.
1449  */
1450 int
1451 sblock(struct sockbuf *sb, int wf)
1452 {
1453         return(sb->sb_flags & SB_LOCK ?
1454                 ((wf == M_WAIT) ? sb_lock(sb) : EWOULDBLOCK) :
1455                 (sb->sb_flags |= SB_LOCK), 0);
1456 }
1457
1458 /* release lock on sockbuf sb */
1459 void
1460 sbunlock(struct sockbuf *sb, int keeplocked)
1461 {
1462         struct socket *so = sb->sb_so;
1463         int lr, lr_saved;
1464         lck_mtx_t *mutex_held;
1465
1466 #ifdef __ppc__
1467         __asm__ volatile("mflr %0" : "=r" (lr));
1468         lr_saved = lr;
1469 #endif
1470         sb->sb_flags &= ~SB_LOCK;
1471
1472         if (so->so_proto->pr_getlock != NULL)
1473                 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
1474         else
1475                 mutex_held = so->so_proto->pr_domain->dom_mtx;
1476
1477         if (keeplocked == 0)
1478                 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
1479
1480         if (sb->sb_flags & SB_WANT) {
1481                 sb->sb_flags &= ~SB_WANT;
1482                 if (so->so_usecount < 0)
1483                         panic("sbunlock: b4 wakeup so=%x ref=%d lr=%x sb_flags=%x\n", sb->sb_so, so->so_usecount, lr_saved, sb->sb_flags);
1484
1485                 wakeup((caddr_t)&(sb)->sb_flags);
1486         }
1487         if (keeplocked == 0) {  /* unlock on exit */
1488                 so->so_usecount--;
1489                 if (so->so_usecount < 0)
1490                         panic("sbunlock: unlock on exit so=%x lr=%x sb_flags=%x\n", so, so->so_usecount,lr_saved, sb->sb_flags);
1491                 so->reserved4= lr_saved;
1492                 lck_mtx_unlock(mutex_held);
1493         }
1494 }
1495
1496 void
1497 sorwakeup(struct socket * so)
1498 {
1499   if (sb_notify(&so->so_rcv))
1500         sowakeup(so, &so->so_rcv);
1501 }
1502
1503 void
1504 sowwakeup(struct socket * so)
1505 {
1506   if (sb_notify(&so->so_snd))
1507         sowakeup(so, &so->so_snd);
1508 }
1509 #endif __APPLE__
1510
1511 /*
1512  * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
1513  */
1514 struct sockaddr *
1515 dup_sockaddr(sa, canwait)
1516         struct sockaddr *sa;
1517         int canwait;
1518 {
1519         struct sockaddr *sa2;
1520
1521         MALLOC(sa2, struct sockaddr *, sa->sa_len, M_SONAME,
1522                canwait ? M_WAITOK : M_NOWAIT);
1523         if (sa2)
1524                 bcopy(sa, sa2, sa->sa_len);
1525         return sa2;
1526 }
1527
1528 /*
1529  * Create an external-format (``xsocket'') structure using the information
1530  * in the kernel-format socket structure pointed to by so.  This is done
1531  * to reduce the spew of irrelevant information over this interface,
1532  * to isolate user code from changes in the kernel structure, and
1533  * potentially to provide information-hiding if we decide that
1534  * some of this information should be hidden from users.
1535  */
1536 void
1537 sotoxsocket(struct socket *so, struct xsocket *xso)
1538 {
1539         xso->xso_len = sizeof *xso;
1540         xso->xso_so = so;
1541         xso->so_type = so->so_type;
1542         xso->so_options = so->so_options;
1543         xso->so_linger = so->so_linger;
1544         xso->so_state = so->so_state;
1545         xso->so_pcb = so->so_pcb;
1546         if (so->so_proto) {
1547                 xso->xso_protocol = so->so_proto->pr_protocol;
1548                 xso->xso_family = so->so_proto->pr_domain->dom_family;
1549         }
1550         else
1551                 xso->xso_protocol = xso->xso_family = 0;
1552         xso->so_qlen = so->so_qlen;
1553         xso->so_incqlen = so->so_incqlen;
1554         xso->so_qlimit = so->so_qlimit;
1555         xso->so_timeo = so->so_timeo;
1556         xso->so_error = so->so_error;
1557         xso->so_pgid = so->so_pgid;
1558         xso->so_oobmark = so->so_oobmark;
1559         sbtoxsockbuf(&so->so_snd, &xso->so_snd);
1560         sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
1561         xso->so_uid = so->so_uid;
1562 }
1563
1564 /*
1565  * This does the same for sockbufs.  Note that the xsockbuf structure,
1566  * since it is always embedded in a socket, does not include a self
1567  * pointer nor a length.  We make this entry point public in case
1568  * some other mechanism needs it.
1569  */
1570 void
1571 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
1572 {
1573         xsb->sb_cc = sb->sb_cc;
1574         xsb->sb_hiwat = sb->sb_hiwat;
1575         xsb->sb_mbcnt = sb->sb_mbcnt;
1576         xsb->sb_mbmax = sb->sb_mbmax;
1577         xsb->sb_lowat = sb->sb_lowat;
1578         xsb->sb_flags = sb->sb_flags;
1579         xsb->sb_timeo = (u_long)(sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick;
1580         if (xsb->sb_timeo == 0 && sb->sb_timeo.tv_usec != 0)
1581                 xsb->sb_timeo = 1;
1582 }
1583
1584 /*
1585  * Here is the definition of some of the basic objects in the kern.ipc
1586  * branch of the MIB.
1587  */
1588 SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
1589
1590 /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
1591 static int dummy;
1592 SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
1593
1594 SYSCTL_INT(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLFLAG_RW,
1595     &sb_max, 0, "Maximum socket buffer size");
1596 SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD,
1597     &maxsockets, 0, "Maximum number of sockets avaliable");
1598 SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
1599            &sb_efficiency, 0, "");
1600 SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, &nmbclusters, 0, "");
1601