bsd/kern/uipc_socket2.c

   1 /*
   2  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
  23 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  24 /*
  25  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  26  *      The Regents of the University of California.  All rights reserved.
  27  *
  28  * Redistribution and use in source and binary forms, with or without
  29  * modification, are permitted provided that the following conditions
  30  * are met:
  31  * 1. Redistributions of source code must retain the above copyright
  32  *    notice, this list of conditions and the following disclaimer.
  33  * 2. Redistributions in binary form must reproduce the above copyright
  34  *    notice, this list of conditions and the following disclaimer in the
  35  *    documentation and/or other materials provided with the distribution.
  36  * 3. All advertising materials mentioning features or use of this software
  37  *    must display the following acknowledgement:
  38  *      This product includes software developed by the University of
  39  *      California, Berkeley and its contributors.
  40  * 4. Neither the name of the University nor the names of its contributors
  41  *    may be used to endorse or promote products derived from this software
  42  *    without specific prior written permission.
  43  *
  44  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  45  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  46  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  47  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  48  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  49  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  50  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  51  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  52  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  53  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  54  * SUCH DAMAGE.
  55  *
  56  *      @(#)uipc_socket2.c      8.1 (Berkeley) 6/10/93
  57  * $FreeBSD: src/sys/kern/uipc_socket2.c,v 1.55.2.9 2001/07/26 18:53:02 peter Exp $
  58  */
  59
  60 #include <sys/param.h>
  61 #include <sys/systm.h>
  62 #include <sys/domain.h>
  63 #include <sys/kernel.h>
  64 #include <sys/proc_internal.h>
  65 #include <sys/kauth.h>
  66 #include <sys/malloc.h>
  67 #include <sys/mbuf.h>
  68 #include <sys/protosw.h>
  69 #include <sys/stat.h>
  70 #include <sys/socket.h>
  71 #include <sys/socketvar.h>
  72 #include <sys/signalvar.h>
  73 #include <sys/sysctl.h>
  74 #include <sys/ev.h>
  75 #include <kern/locks.h>
  76 #include <net/route.h>
  77 #include <netinet/in.h>
  78 #include <netinet/in_pcb.h>
  79 #include <sys/kdebug.h>
  80
  81 #define DBG_FNC_SBDROP  NETDBG_CODE(DBG_NETSOCK, 4)
  82 #define DBG_FNC_SBAPPEND        NETDBG_CODE(DBG_NETSOCK, 5)
  83
  84
  85 /*
  86  * Primitive routines for operating on sockets and socket buffers
  87  */
  88
  89 u_long  sb_max = SB_MAX;                /* XXX should be static */
  90
  91 static  u_long sb_efficiency = 8;       /* parameter for sbreserve() */
  92
  93 /*
  94  * Procedures to manipulate state flags of socket
  95  * and do appropriate wakeups.  Normal sequence from the
  96  * active (originating) side is that soisconnecting() is
  97  * called during processing of connect() call,
  98  * resulting in an eventual call to soisconnected() if/when the
  99  * connection is established.  When the connection is torn down
 100  * soisdisconnecting() is called during processing of disconnect() call,
 101  * and soisdisconnected() is called when the connection to the peer
 102  * is totally severed.  The semantics of these routines are such that
 103  * connectionless protocols can call soisconnected() and soisdisconnected()
 104  * only, bypassing the in-progress calls when setting up a ``connection''
 105  * takes no time.
 106  *
 107  * From the passive side, a socket is created with
 108  * two queues of sockets: so_incomp for connections in progress
 109  * and so_comp for connections already made and awaiting user acceptance.
 110  * As a protocol is preparing incoming connections, it creates a socket
 111  * structure queued on so_incomp by calling sonewconn().  When the connection
 112  * is established, soisconnected() is called, and transfers the
 113  * socket structure to so_comp, making it available to accept().
 114  *
 115  * If a socket is closed with sockets on either
 116  * so_incomp or so_comp, these sockets are dropped.
 117  *
 118  * If higher level protocols are implemented in
 119  * the kernel, the wakeups done here will sometimes
 120  * cause software-interrupt process scheduling.
 121  */
 122 void
 123 soisconnecting(so)
 124         register struct socket *so;
 125 {
 126
 127         so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
 128         so->so_state |= SS_ISCONNECTING;
 129
 130         sflt_notify(so, sock_evt_connecting, NULL);
 131 }
 132
 133 void
 134 soisconnected(so)
 135         struct socket *so;
 136 {
 137         struct socket *head = so->so_head;
 138
 139         so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
 140         so->so_state |= SS_ISCONNECTED;
 141
 142         sflt_notify(so, sock_evt_connected, NULL);
 143
 144         if (head && (so->so_state & SS_INCOMP)) {
 145                 so->so_state &= ~SS_INCOMP;
 146                 so->so_state |= SS_COMP;
 147                 if (head->so_proto->pr_getlock != NULL) {
 148                         socket_unlock(so, 0);
 149                         socket_lock(head, 1);
 150                 }
 151                 postevent(head, 0, EV_RCONN);
 152                 TAILQ_REMOVE(&head->so_incomp, so, so_list);
 153                 head->so_incqlen--;
 154                 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
 155                 sorwakeup(head);
 156                 wakeup_one((caddr_t)&head->so_timeo);
 157                 if (head->so_proto->pr_getlock != NULL) {
 158                         socket_unlock(head, 1);
 159                         socket_lock(so, 0);
 160                 }
 161         } else {
 162                 postevent(so, 0, EV_WCONN);
 163                 wakeup((caddr_t)&so->so_timeo);
 164                 sorwakeup(so);
 165                 sowwakeup(so);
 166         }
 167 }
 168
 169 void
 170 soisdisconnecting(so)
 171         register struct socket *so;
 172 {
 173         so->so_state &= ~SS_ISCONNECTING;
 174         so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
 175         sflt_notify(so, sock_evt_disconnecting, NULL);
 176         wakeup((caddr_t)&so->so_timeo);
 177         sowwakeup(so);
 178         sorwakeup(so);
 179 }
 180
 181 void
 182 soisdisconnected(so)
 183         register struct socket *so;
 184 {
 185         so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
 186         so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
 187         sflt_notify(so, sock_evt_disconnected, NULL);
 188         wakeup((caddr_t)&so->so_timeo);
 189         sowwakeup(so);
 190         sorwakeup(so);
 191 }
 192
 193 /*
 194  * Return a random connection that hasn't been serviced yet and
 195  * is eligible for discard.  There is a one in qlen chance that
 196  * we will return a null, saying that there are no dropable
 197  * requests.  In this case, the protocol specific code should drop
 198  * the new request.  This insures fairness.
 199  *
 200  * This may be used in conjunction with protocol specific queue
 201  * congestion routines.
 202  */
 203 struct socket *
 204 sodropablereq(head)
 205         register struct socket *head;
 206 {
 207         struct socket *so, *sonext = NULL;
 208         unsigned int i, j, qlen;
 209         static int rnd;
 210         static struct timeval old_runtime;
 211         static unsigned int cur_cnt, old_cnt;
 212         struct timeval tv;
 213
 214         microtime(&tv);
 215         if ((i = (tv.tv_sec - old_runtime.tv_sec)) != 0) {
 216                 old_runtime = tv;
 217                 old_cnt = cur_cnt / i;
 218                 cur_cnt = 0;
 219         }
 220
 221         so = TAILQ_FIRST(&head->so_incomp);
 222         if (!so)
 223                 return (NULL);
 224
 225         qlen = head->so_incqlen;
 226         if (++cur_cnt > qlen || old_cnt > qlen) {
 227                 rnd = (314159 * rnd + 66329) & 0xffff;
 228                 j = ((qlen + 1) * rnd) >> 16;
 229 //###LD To clean up
 230                 while (j-- && so) {
 231 //                      if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
 232                                 socket_lock(so, 1);
 233                                 sonext = TAILQ_NEXT(so, so_list);
 234 //                              in_pcb_check_state(so->so_pcb, WNT_RELEASE, 0);
 235                                 socket_unlock(so, 1);
 236                                 so = sonext;
 237                 }
 238         }
 239
 240 //      if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) == WNT_STOPUSING)
 241 //              return (NULL);
 242 //      else
 243                 return (so);
 244 }
 245
 246 /*
 247  * When an attempt at a new connection is noted on a socket
 248  * which accepts connections, sonewconn is called.  If the
 249  * connection is possible (subject to space constraints, etc.)
 250  * then we allocate a new structure, propoerly linked into the
 251  * data structure of the original socket, and return this.
 252  * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
 253  */
 254 static struct socket *
 255 sonewconn_internal(head, connstatus)
 256         register struct socket *head;
 257         int connstatus;
 258 {
 259         int error = 0;
 260         register struct socket *so;
 261         lck_mtx_t *mutex_held;
 262
 263         if (head->so_proto->pr_getlock != NULL)
 264                 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
 265         else
 266                 mutex_held = head->so_proto->pr_domain->dom_mtx;
 267         lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
 268
 269         if (head->so_qlen > 3 * head->so_qlimit / 2)
 270                 return ((struct socket *)0);
 271         so = soalloc(1, head->so_proto->pr_domain->dom_family, head->so_type);
 272         if (so == NULL)
 273                 return ((struct socket *)0);
 274         /* check if head was closed during the soalloc */
 275         if (head->so_proto == NULL) {
 276           sodealloc(so);
 277           return ((struct socket *)0);
 278         }
 279
 280         so->so_head = head;
 281         so->so_type = head->so_type;
 282         so->so_options = head->so_options &~ SO_ACCEPTCONN;
 283         so->so_linger = head->so_linger;
 284         so->so_state = head->so_state | SS_NOFDREF;
 285         so->so_proto = head->so_proto;
 286         so->so_timeo = head->so_timeo;
 287         so->so_pgid  = head->so_pgid;
 288         so->so_uid = head->so_uid;
 289         so->so_usecount = 1;
 290
 291 #ifdef __APPLE__
 292         so->so_rcv.sb_flags |= SB_RECV; /* XXX */
 293         so->so_rcv.sb_so = so->so_snd.sb_so = so;
 294         TAILQ_INIT(&so->so_evlist);
 295 #endif
 296
 297         if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
 298                 sflt_termsock(so);
 299                 sodealloc(so);
 300                 return ((struct socket *)0);
 301         }
 302
 303         /*
 304          * Must be done with head unlocked to avoid deadlock for protocol with per socket mutexes.
 305          */
 306         if (head->so_proto->pr_unlock)
 307                 socket_unlock(head, 0);
 308         if (((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL) != 0) || error) {
 309                 sflt_termsock(so);
 310                 sodealloc(so);
 311                 if (head->so_proto->pr_unlock)
 312                         socket_lock(head, 0);
 313                 return ((struct socket *)0);
 314         }
 315         if (head->so_proto->pr_unlock)
 316                 socket_lock(head, 0);
 317 #ifdef __APPLE__
 318         so->so_proto->pr_domain->dom_refs++;
 319 #endif
 320
 321         if (connstatus) {
 322                 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
 323                 so->so_state |= SS_COMP;
 324         } else {
 325                 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
 326                 so->so_state |= SS_INCOMP;
 327                 head->so_incqlen++;
 328         }
 329         head->so_qlen++;
 330
 331 #ifdef __APPLE__
 332         /* Attach socket filters for this protocol */
 333         sflt_initsock(so);
 334 #endif
 335         if (connstatus) {
 336                 so->so_state |= connstatus;
 337                 sorwakeup(head);
 338                 wakeup((caddr_t)&head->so_timeo);
 339         }
 340         return (so);
 341 }
 342
 343
 344 struct socket *
 345 sonewconn(
 346         struct socket *head,
 347         int connstatus,
 348         const struct sockaddr *from)
 349 {
 350         int error = 0;
 351         struct socket_filter_entry      *filter;
 352         int                                                     filtered = 0;
 353
 354         error = 0;
 355         for (filter = head->so_filt; filter && (error == 0);
 356                  filter = filter->sfe_next_onsocket) {
 357                 if (filter->sfe_filter->sf_filter.sf_connect_in) {
 358                         if (filtered == 0) {
 359                                 filtered = 1;
 360                                 sflt_use(head);
 361                                 socket_unlock(head, 0);
 362                         }
 363                         error = filter->sfe_filter->sf_filter.sf_connect_in(
 364                                                 filter->sfe_cookie, head, from);
 365                 }
 366         }
 367         if (filtered != 0) {
 368                 socket_lock(head, 0);
 369                 sflt_unuse(head);
 370         }
 371
 372         if (error) {
 373                 return NULL;
 374         }
 375
 376         return sonewconn_internal(head, connstatus);
 377 }
 378
 379 /*
 380  * Socantsendmore indicates that no more data will be sent on the
 381  * socket; it would normally be applied to a socket when the user
 382  * informs the system that no more data is to be sent, by the protocol
 383  * code (in case PRU_SHUTDOWN).  Socantrcvmore indicates that no more data
 384  * will be received, and will normally be applied to the socket by a
 385  * protocol when it detects that the peer will send no more data.
 386  * Data queued for reading in the socket may yet be read.
 387  */
 388
 389 void
 390 socantsendmore(so)
 391         struct socket *so;
 392 {
 393         so->so_state |= SS_CANTSENDMORE;
 394         sflt_notify(so, sock_evt_cantsendmore, NULL);
 395         sowwakeup(so);
 396 }
 397
 398 void
 399 socantrcvmore(so)
 400         struct socket *so;
 401 {
 402         so->so_state |= SS_CANTRCVMORE;
 403         sflt_notify(so, sock_evt_cantrecvmore, NULL);
 404         sorwakeup(so);
 405 }
 406
 407 /*
 408  * Wait for data to arrive at/drain from a socket buffer.
 409  */
 410 int
 411 sbwait(sb)
 412         struct sockbuf *sb;
 413 {
 414         int error = 0, lr, lr_saved;
 415         struct socket *so = sb->sb_so;
 416         lck_mtx_t *mutex_held;
 417         struct timespec ts;
 418
 419 #ifdef __ppc__
 420         __asm__ volatile("mflr %0" : "=r" (lr));
 421         lr_saved = lr;
 422 #endif
 423
 424
 425         if (so->so_proto->pr_getlock != NULL)
 426                 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
 427         else
 428                 mutex_held = so->so_proto->pr_domain->dom_mtx;
 429
 430         sb->sb_flags |= SB_WAIT;
 431
 432         if (so->so_usecount < 1)
 433                 panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount);
 434         ts.tv_sec = sb->sb_timeo.tv_sec;
 435         ts.tv_nsec = sb->sb_timeo.tv_usec * 1000;
 436         error = msleep((caddr_t)&sb->sb_cc, mutex_held,
 437                 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
 438                 &ts);
 439
 440         lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
 441
 442         if (so->so_usecount < 1)
 443                 panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount);
 444
 445         if ((so->so_state & SS_DRAINING)) {
 446                 error = EBADF;
 447         }
 448
 449         return (error);
 450 }
 451
 452 /*
 453  * Lock a sockbuf already known to be locked;
 454  * return any error returned from sleep (EINTR).
 455  */
 456 int
 457 sb_lock(sb)
 458         register struct sockbuf *sb;
 459 {
 460         struct socket *so = sb->sb_so;
 461         lck_mtx_t * mutex_held;
 462         int error = 0, lr, lr_saved;
 463
 464 #ifdef __ppc__
 465         __asm__ volatile("mflr %0" : "=r" (lr));
 466         lr_saved = lr;
 467 #endif
 468
 469         if (so == NULL)
 470                 panic("sb_lock: null so back pointer sb=%x\n", sb);
 471
 472         while (sb->sb_flags & SB_LOCK) {
 473                 sb->sb_flags |= SB_WANT;
 474                 if (so->so_proto->pr_getlock != NULL)
 475                         mutex_held = (*so->so_proto->pr_getlock)(so, 0);
 476                 else
 477                         mutex_held = so->so_proto->pr_domain->dom_mtx;
 478                 if (so->so_usecount < 1)
 479                         panic("sb_lock: so=%x refcount=%d\n", so, so->so_usecount);
 480                 error = msleep((caddr_t)&sb->sb_flags, mutex_held,
 481                         (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sblock", 0);
 482                 if (so->so_usecount < 1)
 483                         panic("sb_lock: 2 so=%x refcount=%d\n", so, so->so_usecount);
 484                 if (error)
 485                         return (error);
 486         }
 487         sb->sb_flags |= SB_LOCK;
 488         return (0);
 489 }
 490
 491 /*
 492  * Wakeup processes waiting on a socket buffer.
 493  * Do asynchronous notification via SIGIO
 494  * if the socket has the SS_ASYNC flag set.
 495  */
 496 void
 497 sowakeup(so, sb)
 498         register struct socket *so;
 499         register struct sockbuf *sb;
 500 {
 501         struct proc *p = current_proc();
 502         sb->sb_flags &= ~SB_SEL;
 503         selwakeup(&sb->sb_sel);
 504         if (sb->sb_flags & SB_WAIT) {
 505                 sb->sb_flags &= ~SB_WAIT;
 506                 wakeup((caddr_t)&sb->sb_cc);
 507         }
 508         if (so->so_state & SS_ASYNC) {
 509                 if (so->so_pgid < 0)
 510                         gsignal(-so->so_pgid, SIGIO);
 511                 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
 512                         psignal(p, SIGIO);
 513         }
 514         if (sb->sb_flags & SB_KNOTE) {
 515                 KNOTE(&sb->sb_sel.si_note, SO_FILT_HINT_LOCKED);
 516         }
 517         if (sb->sb_flags & SB_UPCALL) {
 518                 socket_unlock(so, 0);
 519                 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
 520                 socket_lock(so, 0);
 521         }
 522 }
 523
 524 /*
 525  * Socket buffer (struct sockbuf) utility routines.
 526  *
 527  * Each socket contains two socket buffers: one for sending data and
 528  * one for receiving data.  Each buffer contains a queue of mbufs,
 529  * information about the number of mbufs and amount of data in the
 530  * queue, and other fields allowing select() statements and notification
 531  * on data availability to be implemented.
 532  *
 533  * Data stored in a socket buffer is maintained as a list of records.
 534  * Each record is a list of mbufs chained together with the m_next
 535  * field.  Records are chained together with the m_nextpkt field. The upper
 536  * level routine soreceive() expects the following conventions to be
 537  * observed when placing information in the receive buffer:
 538  *
 539  * 1. If the protocol requires each message be preceded by the sender's
 540  *    name, then a record containing that name must be present before
 541  *    any associated data (mbuf's must be of type MT_SONAME).
 542  * 2. If the protocol supports the exchange of ``access rights'' (really
 543  *    just additional data associated with the message), and there are
 544  *    ``rights'' to be received, then a record containing this data
 545  *    should be present (mbuf's must be of type MT_RIGHTS).
 546  * 3. If a name or rights record exists, then it must be followed by
 547  *    a data record, perhaps of zero length.
 548  *
 549  * Before using a new socket structure it is first necessary to reserve
 550  * buffer space to the socket, by calling sbreserve().  This should commit
 551  * some of the available buffer space in the system buffer pool for the
 552  * socket (currently, it does nothing but enforce limits).  The space
 553  * should be released by calling sbrelease() when the socket is destroyed.
 554  */
 555
 556 int
 557 soreserve(so, sndcc, rcvcc)
 558         register struct socket *so;
 559         u_long sndcc, rcvcc;
 560 {
 561
 562         if (sbreserve(&so->so_snd, sndcc) == 0)
 563                 goto bad;
 564         if (sbreserve(&so->so_rcv, rcvcc) == 0)
 565                 goto bad2;
 566         if (so->so_rcv.sb_lowat == 0)
 567                 so->so_rcv.sb_lowat = 1;
 568         if (so->so_snd.sb_lowat == 0)
 569                 so->so_snd.sb_lowat = MCLBYTES;
 570         if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
 571                 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
 572         return (0);
 573 bad2:
 574 #ifdef __APPLE__
 575         selthreadclear(&so->so_snd.sb_sel);
 576 #endif
 577         sbrelease(&so->so_snd);
 578 bad:
 579         return (ENOBUFS);
 580 }
 581
 582 /*
 583  * Allot mbufs to a sockbuf.
 584  * Attempt to scale mbmax so that mbcnt doesn't become limiting
 585  * if buffering efficiency is near the normal case.
 586  */
 587 int
 588 sbreserve(sb, cc)
 589         struct sockbuf *sb;
 590         u_long cc;
 591 {
 592         if ((u_quad_t)cc > (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES))
 593                 return (0);
 594         sb->sb_hiwat = cc;
 595         sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
 596         if (sb->sb_lowat > sb->sb_hiwat)
 597                 sb->sb_lowat = sb->sb_hiwat;
 598         return (1);
 599 }
 600
 601 /*
 602  * Free mbufs held by a socket, and reserved mbuf space.
 603  */
 604  /*  WARNING needs to do selthreadclear() before calling this */
 605 void
 606 sbrelease(sb)
 607         struct sockbuf *sb;
 608 {
 609
 610         sbflush(sb);
 611         sb->sb_hiwat = 0;
 612         sb->sb_mbmax = 0;
 613
 614 }
 615
 616 /*
 617  * Routines to add and remove
 618  * data from an mbuf queue.
 619  *
 620  * The routines sbappend() or sbappendrecord() are normally called to
 621  * append new mbufs to a socket buffer, after checking that adequate
 622  * space is available, comparing the function sbspace() with the amount
 623  * of data to be added.  sbappendrecord() differs from sbappend() in
 624  * that data supplied is treated as the beginning of a new record.
 625  * To place a sender's address, optional access rights, and data in a
 626  * socket receive buffer, sbappendaddr() should be used.  To place
 627  * access rights and data in a socket receive buffer, sbappendrights()
 628  * should be used.  In either case, the new data begins a new record.
 629  * Note that unlike sbappend() and sbappendrecord(), these routines check
 630  * for the caller that there will be enough space to store the data.
 631  * Each fails if there is not enough space, or if it cannot find mbufs
 632  * to store additional information in.
 633  *
 634  * Reliable protocols may use the socket send buffer to hold data
 635  * awaiting acknowledgement.  Data is normally copied from a socket
 636  * send buffer in a protocol with m_copy for output to a peer,
 637  * and then removing the data from the socket buffer with sbdrop()
 638  * or sbdroprecord() when the data is acknowledged by the peer.
 639  */
 640
 641 /*
 642  * Append mbuf chain m to the last record in the
 643  * socket buffer sb.  The additional space associated
 644  * the mbuf chain is recorded in sb.  Empty mbufs are
 645  * discarded and mbufs are compacted where possible.
 646  */
 647 int
 648 sbappend(sb, m)
 649         struct sockbuf *sb;
 650         struct mbuf *m;
 651 {
 652         register struct mbuf *n, *sb_first;
 653         int result = 0;
 654         int error = 0;
 655         int     filtered = 0;
 656
 657
 658         KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_START), sb, m->m_len, 0, 0, 0);
 659
 660         if (m == 0)
 661                 return 0;
 662
 663 again:
 664         sb_first = n = sb->sb_mb;
 665         if (n) {
 666                 while (n->m_nextpkt)
 667                         n = n->m_nextpkt;
 668                 do {
 669                         if (n->m_flags & M_EOR) {
 670                                 result = sbappendrecord(sb, m); /* XXXXXX!!!! */
 671                                 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
 672                                 return result;
 673                         }
 674                 } while (n->m_next && (n = n->m_next));
 675         }
 676
 677         if (!filtered && (sb->sb_flags & SB_RECV) != 0) {
 678                 error = sflt_data_in(sb->sb_so, NULL, &m, NULL, 0, &filtered);
 679                 if (error) {
 680                         /* no data was appended, caller should not call sowakeup */
 681                         return 0;
 682                 }
 683
 684                 /*
 685                   If we any filters, the socket lock was dropped. n and sb_first
 686                   cached data from the socket buffer. This cache is not valid
 687                   since we dropped the lock. We must start over. Since filtered
 688                   is set we won't run through the filters a second time. We just
 689                   set n and sb_start again.
 690                 */
 691                 if (filtered)
 692                         goto again;
 693         }
 694
 695         result = sbcompress(sb, m, n);
 696
 697         KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
 698
 699         return result;
 700 }
 701
 702 #ifdef SOCKBUF_DEBUG
 703 void
 704 sbcheck(sb)
 705         register struct sockbuf *sb;
 706 {
 707         register struct mbuf *m;
 708         register struct mbuf *n = 0;
 709         register u_long len = 0, mbcnt = 0;
 710         lck_mtx_t *mutex_held;
 711
 712         if (sb->sb_so->so_proto->pr_getlock != NULL)
 713                 mutex_held = (*sb->sb_so->so_proto->pr_getlock)(sb->sb_so, 0);
 714         else
 715                 mutex_held = sb->sb_so->so_proto->pr_domain->dom_mtx;
 716
 717         lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
 718
 719         if (sbchecking == 0)
 720                 return;
 721
 722         for (m = sb->sb_mb; m; m = n) {
 723             n = m->m_nextpkt;
 724             for (; m; m = m->m_next) {
 725                 len += m->m_len;
 726                 mbcnt += MSIZE;
 727                 if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
 728                     mbcnt += m->m_ext.ext_size;
 729             }
 730         }
 731         if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
 732                 panic("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
 733                     mbcnt, sb->sb_mbcnt);
 734         }
 735 }
 736 #endif
 737
 738 /*
 739  * As above, except the mbuf chain
 740  * begins a new record.
 741  */
 742 int
 743 sbappendrecord(sb, m0)
 744         register struct sockbuf *sb;
 745         register struct mbuf *m0;
 746 {
 747         register struct mbuf *m;
 748         int result = 0;
 749
 750         if (m0 == 0)
 751                 return 0;
 752
 753         if ((sb->sb_flags & SB_RECV) != 0) {
 754                 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL, sock_data_filt_flag_record, NULL);
 755                 if (error != 0) {
 756                         if (error != EJUSTRETURN)
 757                                 m_freem(m0);
 758                         return 0;
 759                 }
 760         }
 761
 762         m = sb->sb_mb;
 763         if (m)
 764                 while (m->m_nextpkt)
 765                         m = m->m_nextpkt;
 766         /*
 767          * Put the first mbuf on the queue.
 768          * Note this permits zero length records.
 769          */
 770         sballoc(sb, m0);
 771         if (m)
 772                 m->m_nextpkt = m0;
 773         else
 774                 sb->sb_mb = m0;
 775         m = m0->m_next;
 776         m0->m_next = 0;
 777         if (m && (m0->m_flags & M_EOR)) {
 778                 m0->m_flags &= ~M_EOR;
 779                 m->m_flags |= M_EOR;
 780         }
 781         return sbcompress(sb, m, m0);
 782 }
 783
 784 /*
 785  * As above except that OOB data
 786  * is inserted at the beginning of the sockbuf,
 787  * but after any other OOB data.
 788  */
 789 int
 790 sbinsertoob(sb, m0)
 791         struct sockbuf *sb;
 792         struct mbuf *m0;
 793 {
 794         struct mbuf *m;
 795         struct mbuf **mp;
 796
 797         if (m0 == 0)
 798                 return 0;
 799
 800         if ((sb->sb_flags & SB_RECV) != 0) {
 801                 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL,
 802                                                                  sock_data_filt_flag_oob, NULL);
 803
 804                 if (error) {
 805                         if (error != EJUSTRETURN) {
 806                                 m_freem(m0);
 807                         }
 808                         return 0;
 809                 }
 810         }
 811
 812         for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) {
 813             m = *mp;
 814             again:
 815                 switch (m->m_type) {
 816
 817                 case MT_OOBDATA:
 818                         continue;               /* WANT next train */
 819
 820                 case MT_CONTROL:
 821                         m = m->m_next;
 822                         if (m)
 823                                 goto again;     /* inspect THIS train further */
 824                 }
 825                 break;
 826         }
 827         /*
 828          * Put the first mbuf on the queue.
 829          * Note this permits zero length records.
 830          */
 831         sballoc(sb, m0);
 832         m0->m_nextpkt = *mp;
 833         *mp = m0;
 834         m = m0->m_next;
 835         m0->m_next = 0;
 836         if (m && (m0->m_flags & M_EOR)) {
 837                 m0->m_flags &= ~M_EOR;
 838                 m->m_flags |= M_EOR;
 839         }
 840         return sbcompress(sb, m, m0);
 841 }
 842
 843 /*
 844  * Append address and data, and optionally, control (ancillary) data
 845  * to the receive queue of a socket.  If present,
 846  * m0 must include a packet header with total length.
 847  * Returns 0 if no space in sockbuf or insufficient mbufs.
 848  */
 849 static int
 850 sbappendaddr_internal(sb, asa, m0, control)
 851         register struct sockbuf *sb;
 852         struct sockaddr *asa;
 853         struct mbuf *m0, *control;
 854 {
 855         register struct mbuf *m, *n;
 856         int space = asa->sa_len;
 857
 858         if (m0 && (m0->m_flags & M_PKTHDR) == 0)
 859                 panic("sbappendaddr");
 860
 861         if (m0)
 862                 space += m0->m_pkthdr.len;
 863         for (n = control; n; n = n->m_next) {
 864                 space += n->m_len;
 865                 if (n->m_next == 0)     /* keep pointer to last control buf */
 866                         break;
 867         }
 868         if (space > sbspace(sb))
 869                 return (0);
 870         if (asa->sa_len > MLEN)
 871                 return (0);
 872         MGET(m, M_DONTWAIT, MT_SONAME);
 873         if (m == 0)
 874                 return (0);
 875         m->m_len = asa->sa_len;
 876         bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
 877         if (n)
 878                 n->m_next = m0;         /* concatenate data to control */
 879         else
 880                 control = m0;
 881         m->m_next = control;
 882         for (n = m; n; n = n->m_next)
 883                 sballoc(sb, n);
 884         n = sb->sb_mb;
 885         if (n) {
 886                 while (n->m_nextpkt)
 887                         n = n->m_nextpkt;
 888                 n->m_nextpkt = m;
 889         } else
 890                 sb->sb_mb = m;
 891         postevent(0,sb,EV_RWBYTES);
 892         return (1);
 893 }
 894
 895 int
 896 sbappendaddr(
 897         struct sockbuf* sb,
 898         struct sockaddr* asa,
 899         struct mbuf *m0,
 900         struct mbuf *control,
 901         int     *error_out)
 902 {
 903         int result = 0;
 904
 905         if (error_out) *error_out = 0;
 906
 907         if (m0 && (m0->m_flags & M_PKTHDR) == 0)
 908                 panic("sbappendaddrorfree");
 909
 910         /* Call socket data in filters */
 911         if ((sb->sb_flags & SB_RECV) != 0) {
 912                 int error;
 913                 error = sflt_data_in(sb->sb_so, asa, &m0, &control, 0, NULL);
 914                 if (error) {
 915                         if (error != EJUSTRETURN) {
 916                                 if (m0) m_freem(m0);
 917                                 if (control) m_freem(control);
 918                                 if (error_out) *error_out = error;
 919                         }
 920                         return 0;
 921                 }
 922         }
 923
 924         result = sbappendaddr_internal(sb, asa, m0, control);
 925         if (result == 0) {
 926                 if (m0) m_freem(m0);
 927                 if (control) m_freem(control);
 928                 if (error_out) *error_out = ENOBUFS;
 929         }
 930
 931         return result;
 932 }
 933
 934 static int
 935 sbappendcontrol_internal(sb, m0, control)
 936         struct sockbuf *sb;
 937         struct mbuf *control, *m0;
 938 {
 939         register struct mbuf *m, *n;
 940         int space = 0;
 941
 942         if (control == 0)
 943                 panic("sbappendcontrol");
 944
 945         for (m = control; ; m = m->m_next) {
 946                 space += m->m_len;
 947                 if (m->m_next == 0)
 948                         break;
 949         }
 950         n = m;                  /* save pointer to last control buffer */
 951         for (m = m0; m; m = m->m_next)
 952                 space += m->m_len;
 953         if (space > sbspace(sb))
 954                 return (0);
 955         n->m_next = m0;                 /* concatenate data to control */
 956         for (m = control; m; m = m->m_next)
 957                 sballoc(sb, m);
 958         n = sb->sb_mb;
 959         if (n) {
 960                 while (n->m_nextpkt)
 961                         n = n->m_nextpkt;
 962                 n->m_nextpkt = control;
 963         } else
 964                 sb->sb_mb = control;
 965         postevent(0,sb,EV_RWBYTES);
 966         return (1);
 967 }
 968
 969 int
 970 sbappendcontrol(
 971         struct sockbuf  *sb,
 972         struct mbuf             *m0,
 973         struct mbuf             *control,
 974         int                             *error_out)
 975 {
 976         int result = 0;
 977
 978         if (error_out) *error_out = 0;
 979
 980         if (sb->sb_flags & SB_RECV) {
 981                 int error;
 982                 error = sflt_data_in(sb->sb_so, NULL, &m0, &control, 0, NULL);
 983                 if (error) {
 984                         if (error != EJUSTRETURN) {
 985                                 if (m0) m_freem(m0);
 986                                 if (control) m_freem(control);
 987                                 if (error_out) *error_out = error;
 988                         }
 989                         return 0;
 990                 }
 991         }
 992
 993         result = sbappendcontrol_internal(sb, m0, control);
 994         if (result == 0) {
 995                 if (m0) m_freem(m0);
 996                 if (control) m_freem(control);
 997                 if (error_out) *error_out = ENOBUFS;
 998         }
 999
1000         return result;
1001 }
1002
1003 /*
1004  * Compress mbuf chain m into the socket
1005  * buffer sb following mbuf n.  If n
1006  * is null, the buffer is presumed empty.
1007  */
1008 static int
1009 sbcompress(sb, m, n)
1010         register struct sockbuf *sb;
1011         register struct mbuf *m, *n;
1012 {
1013         register int eor = 0;
1014         register struct mbuf *o;
1015
1016         while (m) {
1017                 eor |= m->m_flags & M_EOR;
1018                 if (m->m_len == 0 &&
1019                     (eor == 0 ||
1020                      (((o = m->m_next) || (o = n)) &&
1021                       o->m_type == m->m_type))) {
1022                         m = m_free(m);
1023                         continue;
1024                 }
1025                 if (n && (n->m_flags & M_EOR) == 0 &&
1026 #ifndef __APPLE__
1027                     M_WRITABLE(n) &&
1028 #endif
1029                     m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
1030                     m->m_len <= M_TRAILINGSPACE(n) &&
1031                     n->m_type == m->m_type) {
1032                         bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
1033                             (unsigned)m->m_len);
1034                         n->m_len += m->m_len;
1035                         sb->sb_cc += m->m_len;
1036                         m = m_free(m);
1037                         continue;
1038                 }
1039                 if (n)
1040                         n->m_next = m;
1041                 else
1042                         sb->sb_mb = m;
1043                 sballoc(sb, m);
1044                 n = m;
1045                 m->m_flags &= ~M_EOR;
1046                 m = m->m_next;
1047                 n->m_next = 0;
1048         }
1049         if (eor) {
1050                 if (n)
1051                         n->m_flags |= eor;
1052                 else
1053                         printf("semi-panic: sbcompress\n");
1054         }
1055         postevent(0,sb, EV_RWBYTES);
1056         return 1;
1057 }
1058
1059 /*
1060  * Free all mbufs in a sockbuf.
1061  * Check that all resources are reclaimed.
1062  */
1063 void
1064 sbflush(sb)
1065         register struct sockbuf *sb;
1066 {
1067         if (sb->sb_so == NULL)
1068                 panic ("sbflush sb->sb_so already null sb=%x\n", sb);
1069         (void)sblock(sb, M_WAIT);
1070         while (sb->sb_mbcnt) {
1071                 /*
1072                  * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
1073                  * we would loop forever. Panic instead.
1074                  */
1075                 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
1076                         break;
1077                 sbdrop(sb, (int)sb->sb_cc);
1078         }
1079         if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt || sb->sb_so == NULL)
1080                 panic("sbflush: cc %ld || mb %p || mbcnt %ld sb_so=%x", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt, sb->sb_so);
1081
1082         postevent(0, sb, EV_RWBYTES);
1083         sbunlock(sb, 1);        /* keep socket locked */
1084
1085 }
1086
1087 /*
1088  * Drop data from (the front of) a sockbuf.
1089  * use m_freem_list to free the mbuf structures
1090  * under a single lock... this is done by pruning
1091  * the top of the tree from the body by keeping track
1092  * of where we get to in the tree and then zeroing the
1093  * two pertinent pointers m_nextpkt and m_next
1094  * the socket buffer is then updated to point at the new
1095  * top of the tree and the pruned area is released via
1096  * m_freem_list.
1097  */
1098 void
1099 sbdrop(sb, len)
1100         register struct sockbuf *sb;
1101         register int len;
1102 {
1103         register struct mbuf *m, *free_list, *ml;
1104         struct mbuf *next, *last;
1105
1106         KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_START), sb, len, 0, 0, 0);
1107
1108         next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
1109         free_list = last = m;
1110         ml = (struct mbuf *)0;
1111
1112         while (len > 0) {
1113                 if (m == 0) {
1114                   if (next == 0) {
1115                     /* temporarily replacing this panic with printf because
1116                      * it occurs occasionally when closing a socket when there
1117                      * is no harm in ignoring it.  This problem will be investigated
1118                      * further.
1119                      */
1120                     /* panic("sbdrop"); */
1121                     printf("sbdrop - count not zero\n");
1122                     len = 0;
1123                     /* zero the counts. if we have no mbufs, we have no data (PR-2986815) */
1124                     sb->sb_cc = 0;
1125                     sb->sb_mbcnt = 0;
1126                     break;
1127                   }
1128                   m = last = next;
1129                   next = m->m_nextpkt;
1130                   continue;
1131                 }
1132                 if (m->m_len > len) {
1133                         m->m_len -= len;
1134                         m->m_data += len;
1135                         sb->sb_cc -= len;
1136                         break;
1137                 }
1138                 len -= m->m_len;
1139                 sbfree(sb, m);
1140
1141                 ml = m;
1142                 m = m->m_next;
1143         }
1144         while (m && m->m_len == 0) {
1145                 sbfree(sb, m);
1146
1147                 ml = m;
1148                 m = m->m_next;
1149         }
1150         if (ml) {
1151                 ml->m_next = (struct mbuf *)0;
1152                 last->m_nextpkt = (struct mbuf *)0;
1153                 m_freem_list(free_list);
1154         }
1155         if (m) {
1156                 sb->sb_mb = m;
1157                 m->m_nextpkt = next;
1158         } else
1159                 sb->sb_mb = next;
1160
1161         postevent(0, sb, EV_RWBYTES);
1162
1163         KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_END), sb, 0, 0, 0, 0);
1164 }
1165
1166 /*
1167  * Drop a record off the front of a sockbuf
1168  * and move the next record to the front.
1169  */
1170 void
1171 sbdroprecord(sb)
1172         register struct sockbuf *sb;
1173 {
1174         register struct mbuf *m, *mn;
1175
1176         m = sb->sb_mb;
1177         if (m) {
1178                 sb->sb_mb = m->m_nextpkt;
1179                 do {
1180                         sbfree(sb, m);
1181                         MFREE(m, mn);
1182                         m = mn;
1183                 } while (m);
1184         }
1185         postevent(0, sb, EV_RWBYTES);
1186 }
1187
1188 /*
1189  * Create a "control" mbuf containing the specified data
1190  * with the specified type for presentation on a socket buffer.
1191  */
1192 struct mbuf *
1193 sbcreatecontrol(p, size, type, level)
1194         caddr_t p;
1195         register int size;
1196         int type, level;
1197 {
1198         register struct cmsghdr *cp;
1199         struct mbuf *m;
1200
1201         if (CMSG_SPACE((u_int)size) > MLEN)
1202                 return ((struct mbuf *) NULL);
1203         if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
1204                 return ((struct mbuf *) NULL);
1205         cp = mtod(m, struct cmsghdr *);
1206         /* XXX check size? */
1207         (void)memcpy(CMSG_DATA(cp), p, size);
1208         m->m_len = CMSG_SPACE(size);
1209         cp->cmsg_len = CMSG_LEN(size);
1210         cp->cmsg_level = level;
1211         cp->cmsg_type = type;
1212         return (m);
1213 }
1214
1215 /*
1216  * Some routines that return EOPNOTSUPP for entry points that are not
1217  * supported by a protocol.  Fill in as needed.
1218  */
1219 int
1220 pru_abort_notsupp(struct socket *so)
1221 {
1222         return EOPNOTSUPP;
1223 }
1224
1225
1226 int
1227 pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
1228 {
1229         return EOPNOTSUPP;
1230 }
1231
1232 int
1233 pru_attach_notsupp(struct socket *so, int proto, struct proc *p)
1234 {
1235         return EOPNOTSUPP;
1236 }
1237
1238 int
1239 pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1240 {
1241         return EOPNOTSUPP;
1242 }
1243
1244 int
1245 pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1246 {
1247         return EOPNOTSUPP;
1248 }
1249
1250 int
1251 pru_connect2_notsupp(struct socket *so1, struct socket *so2)
1252 {
1253         return EOPNOTSUPP;
1254 }
1255
1256 int
1257 pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
1258                     struct ifnet *ifp, struct proc *p)
1259 {
1260         return EOPNOTSUPP;
1261 }
1262
1263 int
1264 pru_detach_notsupp(struct socket *so)
1265 {
1266         return EOPNOTSUPP;
1267 }
1268
1269 int
1270 pru_disconnect_notsupp(struct socket *so)
1271 {
1272         return EOPNOTSUPP;
1273 }
1274
1275 int
1276 pru_listen_notsupp(struct socket *so, struct proc *p)
1277 {
1278         return EOPNOTSUPP;
1279 }
1280
1281 int
1282 pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
1283 {
1284         return EOPNOTSUPP;
1285 }
1286
1287 int
1288 pru_rcvd_notsupp(struct socket *so, int flags)
1289 {
1290         return EOPNOTSUPP;
1291 }
1292
1293 int
1294 pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
1295 {
1296         return EOPNOTSUPP;
1297 }
1298
1299 int
1300 pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
1301                  struct sockaddr *addr, struct mbuf *control,
1302                  struct proc *p)
1303
1304 {
1305         return EOPNOTSUPP;
1306 }
1307
1308
1309 /*
1310  * This isn't really a ``null'' operation, but it's the default one
1311  * and doesn't do anything destructive.
1312  */
1313 int
1314 pru_sense_null(struct socket *so, struct stat *sb)
1315 {
1316         sb->st_blksize = so->so_snd.sb_hiwat;
1317         return 0;
1318 }
1319
1320
1321 int     pru_sosend_notsupp(struct socket *so, struct sockaddr *addr,
1322                    struct uio *uio, struct mbuf *top,
1323                    struct mbuf *control, int flags)
1324
1325 {
1326     return EOPNOTSUPP;
1327 }
1328
1329 int     pru_soreceive_notsupp(struct socket *so,
1330                       struct sockaddr **paddr,
1331                       struct uio *uio, struct mbuf **mp0,
1332                       struct mbuf **controlp, int *flagsp)
1333 {
1334     return EOPNOTSUPP;
1335 }
1336
1337 int
1338
1339 pru_shutdown_notsupp(struct socket *so)
1340 {
1341         return EOPNOTSUPP;
1342 }
1343
1344 int
1345 pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
1346 {
1347         return EOPNOTSUPP;
1348 }
1349
1350 int     pru_sosend(struct socket *so, struct sockaddr *addr,
1351                    struct uio *uio, struct mbuf *top,
1352                    struct mbuf *control, int flags)
1353 {
1354         return EOPNOTSUPP;
1355 }
1356
1357 int     pru_soreceive(struct socket *so,
1358                       struct sockaddr **paddr,
1359                       struct uio *uio, struct mbuf **mp0,
1360                       struct mbuf **controlp, int *flagsp)
1361 {
1362         return EOPNOTSUPP;
1363 }
1364
1365
1366 int
1367 pru_sopoll_notsupp(__unused struct socket *so, __unused int events,
1368                    __unused kauth_cred_t cred, __unused void *wql)
1369 {
1370     return EOPNOTSUPP;
1371 }
1372
1373
1374 #ifdef __APPLE__
1375 /*
1376  * The following are macros on BSD and functions on Darwin
1377  */
1378
1379 /*
1380  * Do we need to notify the other side when I/O is possible?
1381  */
1382
1383 int
1384 sb_notify(struct sockbuf *sb)
1385 {
1386         return ((sb->sb_flags & (SB_WAIT|SB_SEL|SB_ASYNC|SB_UPCALL|SB_KNOTE)) != 0);
1387 }
1388
1389 /*
1390  * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
1391  * This is problematical if the fields are unsigned, as the space might
1392  * still be negative (cc > hiwat or mbcnt > mbmax).  Should detect
1393  * overflow and return 0.  Should use "lmin" but it doesn't exist now.
1394  */
1395 long
1396 sbspace(struct sockbuf *sb)
1397 {
1398     return ((long) imin((int)(sb->sb_hiwat - sb->sb_cc),
1399          (int)(sb->sb_mbmax - sb->sb_mbcnt)));
1400 }
1401
1402 /* do we have to send all at once on a socket? */
1403 int
1404 sosendallatonce(struct socket *so)
1405 {
1406     return (so->so_proto->pr_flags & PR_ATOMIC);
1407 }
1408
1409 /* can we read something from so? */
1410 int
1411 soreadable(struct socket *so)
1412 {
1413     return (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
1414         (so->so_state & SS_CANTRCVMORE) ||
1415         so->so_comp.tqh_first || so->so_error);
1416 }
1417
1418 /* can we write something to so? */
1419
1420 int
1421 sowriteable(struct socket *so)
1422 {
1423     return ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat &&
1424         ((so->so_state&SS_ISCONNECTED) ||
1425           (so->so_proto->pr_flags&PR_CONNREQUIRED)==0)) ||
1426      (so->so_state & SS_CANTSENDMORE) ||
1427      so->so_error);
1428 }
1429
1430 /* adjust counters in sb reflecting allocation of m */
1431
1432 void
1433 sballoc(struct sockbuf *sb, struct mbuf *m)
1434 {
1435         sb->sb_cc += m->m_len;
1436         sb->sb_mbcnt += MSIZE;
1437         if (m->m_flags & M_EXT)
1438                 sb->sb_mbcnt += m->m_ext.ext_size;
1439 }
1440
1441 /* adjust counters in sb reflecting freeing of m */
1442 void
1443 sbfree(struct sockbuf *sb, struct mbuf *m)
1444 {
1445         sb->sb_cc -= m->m_len;
1446         sb->sb_mbcnt -= MSIZE;
1447         if (m->m_flags & M_EXT)
1448                 sb->sb_mbcnt -= m->m_ext.ext_size;
1449 }
1450
1451 /*
1452  * Set lock on sockbuf sb; sleep if lock is already held.
1453  * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
1454  * Returns error without lock if sleep is interrupted.
1455  */
1456 int
1457 sblock(struct sockbuf *sb, int wf)
1458 {
1459         return(sb->sb_flags & SB_LOCK ?
1460                 ((wf == M_WAIT) ? sb_lock(sb) : EWOULDBLOCK) :
1461                 (sb->sb_flags |= SB_LOCK), 0);
1462 }
1463
1464 /* release lock on sockbuf sb */
1465 void
1466 sbunlock(struct sockbuf *sb, int keeplocked)
1467 {
1468         struct socket *so = sb->sb_so;
1469         int lr, lr_saved;
1470         lck_mtx_t *mutex_held;
1471
1472 #ifdef __ppc__
1473         __asm__ volatile("mflr %0" : "=r" (lr));
1474         lr_saved = lr;
1475 #endif
1476         sb->sb_flags &= ~SB_LOCK;
1477
1478         if (so->so_proto->pr_getlock != NULL)
1479                 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
1480         else
1481                 mutex_held = so->so_proto->pr_domain->dom_mtx;
1482
1483         if (keeplocked == 0)
1484                 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
1485
1486         if (sb->sb_flags & SB_WANT) {
1487                 sb->sb_flags &= ~SB_WANT;
1488                 if (so->so_usecount < 0)
1489                         panic("sbunlock: b4 wakeup so=%x ref=%d lr=%x sb_flags=%x\n", sb->sb_so, so->so_usecount, lr_saved, sb->sb_flags);
1490
1491                 wakeup((caddr_t)&(sb)->sb_flags);
1492         }
1493         if (keeplocked == 0) {  /* unlock on exit */
1494                 so->so_usecount--;
1495                 if (so->so_usecount < 0)
1496                         panic("sbunlock: unlock on exit so=%x lr=%x sb_flags=%x\n", so, so->so_usecount,lr_saved, sb->sb_flags);
1497                 so->reserved4= lr_saved;
1498                 lck_mtx_unlock(mutex_held);
1499         }
1500 }
1501
1502 void
1503 sorwakeup(struct socket * so)
1504 {
1505   if (sb_notify(&so->so_rcv))
1506         sowakeup(so, &so->so_rcv);
1507 }
1508
1509 void
1510 sowwakeup(struct socket * so)
1511 {
1512   if (sb_notify(&so->so_snd))
1513         sowakeup(so, &so->so_snd);
1514 }
1515 #endif __APPLE__
1516
1517 /*
1518  * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
1519  */
1520 struct sockaddr *
1521 dup_sockaddr(sa, canwait)
1522         struct sockaddr *sa;
1523         int canwait;
1524 {
1525         struct sockaddr *sa2;
1526
1527         MALLOC(sa2, struct sockaddr *, sa->sa_len, M_SONAME,
1528                canwait ? M_WAITOK : M_NOWAIT);
1529         if (sa2)
1530                 bcopy(sa, sa2, sa->sa_len);
1531         return sa2;
1532 }
1533
1534 /*
1535  * Create an external-format (``xsocket'') structure using the information
1536  * in the kernel-format socket structure pointed to by so.  This is done
1537  * to reduce the spew of irrelevant information over this interface,
1538  * to isolate user code from changes in the kernel structure, and
1539  * potentially to provide information-hiding if we decide that
1540  * some of this information should be hidden from users.
1541  */
1542 void
1543 sotoxsocket(struct socket *so, struct xsocket *xso)
1544 {
1545         xso->xso_len = sizeof *xso;
1546         xso->xso_so = so;
1547         xso->so_type = so->so_type;
1548         xso->so_options = so->so_options;
1549         xso->so_linger = so->so_linger;
1550         xso->so_state = so->so_state;
1551         xso->so_pcb = so->so_pcb;
1552         if (so->so_proto) {
1553                 xso->xso_protocol = so->so_proto->pr_protocol;
1554                 xso->xso_family = so->so_proto->pr_domain->dom_family;
1555         }
1556         else
1557                 xso->xso_protocol = xso->xso_family = 0;
1558         xso->so_qlen = so->so_qlen;
1559         xso->so_incqlen = so->so_incqlen;
1560         xso->so_qlimit = so->so_qlimit;
1561         xso->so_timeo = so->so_timeo;
1562         xso->so_error = so->so_error;
1563         xso->so_pgid = so->so_pgid;
1564         xso->so_oobmark = so->so_oobmark;
1565         sbtoxsockbuf(&so->so_snd, &xso->so_snd);
1566         sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
1567         xso->so_uid = so->so_uid;
1568 }
1569
1570 /*
1571  * This does the same for sockbufs.  Note that the xsockbuf structure,
1572  * since it is always embedded in a socket, does not include a self
1573  * pointer nor a length.  We make this entry point public in case
1574  * some other mechanism needs it.
1575  */
1576 void
1577 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
1578 {
1579         xsb->sb_cc = sb->sb_cc;
1580         xsb->sb_hiwat = sb->sb_hiwat;
1581         xsb->sb_mbcnt = sb->sb_mbcnt;
1582         xsb->sb_mbmax = sb->sb_mbmax;
1583         xsb->sb_lowat = sb->sb_lowat;
1584         xsb->sb_flags = sb->sb_flags;
1585         xsb->sb_timeo = (u_long)(sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick;
1586         if (xsb->sb_timeo == 0 && sb->sb_timeo.tv_usec != 0)
1587                 xsb->sb_timeo = 1;
1588 }
1589
1590 /*
1591  * Here is the definition of some of the basic objects in the kern.ipc
1592  * branch of the MIB.
1593  */
1594 SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
1595
1596 /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
1597 static int dummy;
1598 SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
1599
1600 SYSCTL_INT(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLFLAG_RW,
1601     &sb_max, 0, "Maximum socket buffer size");
1602 SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD,
1603     &maxsockets, 0, "Maximum number of sockets avaliable");
1604 SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
1605            &sb_efficiency, 0, "");
1606 SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, &nmbclusters, 0, "");
1607