bsd/kern/uipc_socket2.c

   1 /*
   2  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
  23 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  24 /*
  25  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  26  *      The Regents of the University of California.  All rights reserved.
  27  *
  28  * Redistribution and use in source and binary forms, with or without
  29  * modification, are permitted provided that the following conditions
  30  * are met:
  31  * 1. Redistributions of source code must retain the above copyright
  32  *    notice, this list of conditions and the following disclaimer.
  33  * 2. Redistributions in binary form must reproduce the above copyright
  34  *    notice, this list of conditions and the following disclaimer in the
  35  *    documentation and/or other materials provided with the distribution.
  36  * 3. All advertising materials mentioning features or use of this software
  37  *    must display the following acknowledgement:
  38  *      This product includes software developed by the University of
  39  *      California, Berkeley and its contributors.
  40  * 4. Neither the name of the University nor the names of its contributors
  41  *    may be used to endorse or promote products derived from this software
  42  *    without specific prior written permission.
  43  *
  44  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  45  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  46  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  47  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  48  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  49  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  50  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  51  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  52  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  53  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  54  * SUCH DAMAGE.
  55  *
  56  *      @(#)uipc_socket2.c      8.1 (Berkeley) 6/10/93
  57  * $FreeBSD: src/sys/kern/uipc_socket2.c,v 1.55.2.9 2001/07/26 18:53:02 peter Exp $
  58  */
  59
  60 #include <sys/param.h>
  61 #include <sys/systm.h>
  62 #include <sys/domain.h>
  63 #include <sys/kernel.h>
  64 #include <sys/proc_internal.h>
  65 #include <sys/kauth.h>
  66 #include <sys/malloc.h>
  67 #include <sys/mbuf.h>
  68 #include <sys/protosw.h>
  69 #include <sys/stat.h>
  70 #include <sys/socket.h>
  71 #include <sys/socketvar.h>
  72 #include <sys/signalvar.h>
  73 #include <sys/sysctl.h>
  74 #include <sys/ev.h>
  75 #include <kern/locks.h>
  76 #include <net/route.h>
  77 #include <netinet/in.h>
  78 #include <netinet/in_pcb.h>
  79 #include <sys/kdebug.h>
  80
  81 #define DBG_FNC_SBDROP  NETDBG_CODE(DBG_NETSOCK, 4)
  82 #define DBG_FNC_SBAPPEND        NETDBG_CODE(DBG_NETSOCK, 5)
  83
  84
  85 /*
  86  * Primitive routines for operating on sockets and socket buffers
  87  */
  88
  89 u_long  sb_max = SB_MAX;                /* XXX should be static */
  90
  91 static  u_long sb_efficiency = 8;       /* parameter for sbreserve() */
  92
  93 /*
  94  * Procedures to manipulate state flags of socket
  95  * and do appropriate wakeups.  Normal sequence from the
  96  * active (originating) side is that soisconnecting() is
  97  * called during processing of connect() call,
  98  * resulting in an eventual call to soisconnected() if/when the
  99  * connection is established.  When the connection is torn down
 100  * soisdisconnecting() is called during processing of disconnect() call,
 101  * and soisdisconnected() is called when the connection to the peer
 102  * is totally severed.  The semantics of these routines are such that
 103  * connectionless protocols can call soisconnected() and soisdisconnected()
 104  * only, bypassing the in-progress calls when setting up a ``connection''
 105  * takes no time.
 106  *
 107  * From the passive side, a socket is created with
 108  * two queues of sockets: so_incomp for connections in progress
 109  * and so_comp for connections already made and awaiting user acceptance.
 110  * As a protocol is preparing incoming connections, it creates a socket
 111  * structure queued on so_incomp by calling sonewconn().  When the connection
 112  * is established, soisconnected() is called, and transfers the
 113  * socket structure to so_comp, making it available to accept().
 114  *
 115  * If a socket is closed with sockets on either
 116  * so_incomp or so_comp, these sockets are dropped.
 117  *
 118  * If higher level protocols are implemented in
 119  * the kernel, the wakeups done here will sometimes
 120  * cause software-interrupt process scheduling.
 121  */
 122 void
 123 soisconnecting(so)
 124         register struct socket *so;
 125 {
 126
 127         so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
 128         so->so_state |= SS_ISCONNECTING;
 129
 130         sflt_notify(so, sock_evt_connecting, NULL);
 131 }
 132
 133 void
 134 soisconnected(so)
 135         struct socket *so;
 136 {
 137         struct socket *head = so->so_head;
 138
 139         so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
 140         so->so_state |= SS_ISCONNECTED;
 141
 142         sflt_notify(so, sock_evt_connected, NULL);
 143
 144         if (head && (so->so_state & SS_INCOMP)) {
 145                 so->so_state &= ~SS_INCOMP;
 146                 so->so_state |= SS_COMP;
 147                 if (head->so_proto->pr_getlock != NULL) {
 148                         socket_unlock(so, 0);
 149                         socket_lock(head, 1);
 150                 }
 151                 postevent(head, 0, EV_RCONN);
 152                 TAILQ_REMOVE(&head->so_incomp, so, so_list);
 153                 head->so_incqlen--;
 154                 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
 155                 sorwakeup(head);
 156                 wakeup_one((caddr_t)&head->so_timeo);
 157                 if (head->so_proto->pr_getlock != NULL) {
 158                         socket_unlock(head, 1);
 159                         socket_lock(so, 0);
 160                 }
 161         } else {
 162                 postevent(so, 0, EV_WCONN);
 163                 wakeup((caddr_t)&so->so_timeo);
 164                 sorwakeup(so);
 165                 sowwakeup(so);
 166         }
 167 }
 168
 169 void
 170 soisdisconnecting(so)
 171         register struct socket *so;
 172 {
 173         so->so_state &= ~SS_ISCONNECTING;
 174         so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
 175         sflt_notify(so, sock_evt_disconnecting, NULL);
 176         wakeup((caddr_t)&so->so_timeo);
 177         sowwakeup(so);
 178         sorwakeup(so);
 179 }
 180
 181 void
 182 soisdisconnected(so)
 183         register struct socket *so;
 184 {
 185         so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
 186         so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
 187         sflt_notify(so, sock_evt_disconnected, NULL);
 188         wakeup((caddr_t)&so->so_timeo);
 189         sowwakeup(so);
 190         sorwakeup(so);
 191 }
 192
 193 /*
 194  * Return a random connection that hasn't been serviced yet and
 195  * is eligible for discard.  There is a one in qlen chance that
 196  * we will return a null, saying that there are no dropable
 197  * requests.  In this case, the protocol specific code should drop
 198  * the new request.  This insures fairness.
 199  *
 200  * This may be used in conjunction with protocol specific queue
 201  * congestion routines.
 202  */
 203 struct socket *
 204 sodropablereq(head)
 205         register struct socket *head;
 206 {
 207         struct socket *so, *sonext = NULL;
 208         unsigned int i, j, qlen;
 209         static int rnd;
 210         static struct timeval old_runtime;
 211         static unsigned int cur_cnt, old_cnt;
 212         struct timeval tv;
 213
 214         microtime(&tv);
 215         if ((i = (tv.tv_sec - old_runtime.tv_sec)) != 0) {
 216                 old_runtime = tv;
 217                 old_cnt = cur_cnt / i;
 218                 cur_cnt = 0;
 219         }
 220
 221         so = TAILQ_FIRST(&head->so_incomp);
 222         if (!so)
 223                 return (NULL);
 224
 225         qlen = head->so_incqlen;
 226         if (++cur_cnt > qlen || old_cnt > qlen) {
 227                 rnd = (314159 * rnd + 66329) & 0xffff;
 228                 j = ((qlen + 1) * rnd) >> 16;
 229 //###LD To clean up
 230                 while (j-- && so) {
 231 //                      if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
 232                                 socket_lock(so, 1);
 233                                 sonext = TAILQ_NEXT(so, so_list);
 234 //                              in_pcb_check_state(so->so_pcb, WNT_RELEASE, 0);
 235                                 socket_unlock(so, 1);
 236                                 so = sonext;
 237                 }
 238         }
 239
 240 //      if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) == WNT_STOPUSING)
 241 //              return (NULL);
 242 //      else
 243                 return (so);
 244 }
 245
 246 /*
 247  * When an attempt at a new connection is noted on a socket
 248  * which accepts connections, sonewconn is called.  If the
 249  * connection is possible (subject to space constraints, etc.)
 250  * then we allocate a new structure, propoerly linked into the
 251  * data structure of the original socket, and return this.
 252  * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
 253  */
 254 static struct socket *
 255 sonewconn_internal(head, connstatus)
 256         register struct socket *head;
 257         int connstatus;
 258 {
 259         int error = 0;
 260         register struct socket *so;
 261         lck_mtx_t *mutex_held;
 262
 263         if (head->so_proto->pr_getlock != NULL)
 264                 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
 265         else
 266                 mutex_held = head->so_proto->pr_domain->dom_mtx;
 267         lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
 268
 269         if (head->so_qlen > 3 * head->so_qlimit / 2)
 270                 return ((struct socket *)0);
 271         so = soalloc(1, head->so_proto->pr_domain->dom_family, head->so_type);
 272         if (so == NULL)
 273                 return ((struct socket *)0);
 274         /* check if head was closed during the soalloc */
 275         if (head->so_proto == NULL) {
 276           sodealloc(so);
 277           return ((struct socket *)0);
 278         }
 279
 280         so->so_head = head;
 281         so->so_type = head->so_type;
 282         so->so_options = head->so_options &~ SO_ACCEPTCONN;
 283         so->so_linger = head->so_linger;
 284         so->so_state = head->so_state | SS_NOFDREF;
 285         so->so_proto = head->so_proto;
 286         so->so_timeo = head->so_timeo;
 287         so->so_pgid  = head->so_pgid;
 288         so->so_uid = head->so_uid;
 289         so->so_usecount = 1;
 290
 291         if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
 292                 sflt_termsock(so);
 293                 sodealloc(so);
 294                 return ((struct socket *)0);
 295         }
 296
 297         /*
 298          * Must be done with head unlocked to avoid deadlock for protocol with per socket mutexes.
 299          */
 300         if (head->so_proto->pr_unlock)
 301                 socket_unlock(head, 0);
 302         if (((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL) != 0) || error) {
 303                 sflt_termsock(so);
 304                 sodealloc(so);
 305                 if (head->so_proto->pr_unlock)
 306                         socket_lock(head, 0);
 307                 return ((struct socket *)0);
 308         }
 309         if (head->so_proto->pr_unlock)
 310                 socket_lock(head, 0);
 311 #ifdef __APPLE__
 312         so->so_proto->pr_domain->dom_refs++;
 313 #endif
 314
 315         if (connstatus) {
 316                 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
 317                 so->so_state |= SS_COMP;
 318         } else {
 319                 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
 320                 so->so_state |= SS_INCOMP;
 321                 head->so_incqlen++;
 322         }
 323         head->so_qlen++;
 324 #ifdef __APPLE__
 325         so->so_rcv.sb_so = so->so_snd.sb_so = so;
 326         TAILQ_INIT(&so->so_evlist);
 327
 328         /* Attach socket filters for this protocol */
 329         sflt_initsock(so);
 330 #endif
 331         if (connstatus) {
 332                 so->so_state |= connstatus;
 333                 sorwakeup(head);
 334                 wakeup((caddr_t)&head->so_timeo);
 335         }
 336         return (so);
 337 }
 338
 339
 340 struct socket *
 341 sonewconn(
 342         struct socket *head,
 343         int connstatus,
 344         const struct sockaddr *from)
 345 {
 346         int error = 0;
 347         struct socket_filter_entry      *filter;
 348         int                                                     filtered = 0;
 349
 350         error = 0;
 351         for (filter = head->so_filt; filter && (error == 0);
 352                  filter = filter->sfe_next_onsocket) {
 353                 if (filter->sfe_filter->sf_filter.sf_connect_in) {
 354                         if (filtered == 0) {
 355                                 filtered = 1;
 356                                 sflt_use(head);
 357                                 socket_unlock(head, 0);
 358                         }
 359                         error = filter->sfe_filter->sf_filter.sf_connect_in(
 360                                                 filter->sfe_cookie, head, from);
 361                 }
 362         }
 363         if (filtered != 0) {
 364                 socket_lock(head, 0);
 365                 sflt_unuse(head);
 366         }
 367
 368         if (error) {
 369                 return NULL;
 370         }
 371
 372         return sonewconn_internal(head, connstatus);
 373 }
 374
 375 /*
 376  * Socantsendmore indicates that no more data will be sent on the
 377  * socket; it would normally be applied to a socket when the user
 378  * informs the system that no more data is to be sent, by the protocol
 379  * code (in case PRU_SHUTDOWN).  Socantrcvmore indicates that no more data
 380  * will be received, and will normally be applied to the socket by a
 381  * protocol when it detects that the peer will send no more data.
 382  * Data queued for reading in the socket may yet be read.
 383  */
 384
 385 void
 386 socantsendmore(so)
 387         struct socket *so;
 388 {
 389         so->so_state |= SS_CANTSENDMORE;
 390         sflt_notify(so, sock_evt_cantsendmore, NULL);
 391         sowwakeup(so);
 392 }
 393
 394 void
 395 socantrcvmore(so)
 396         struct socket *so;
 397 {
 398         so->so_state |= SS_CANTRCVMORE;
 399         sflt_notify(so, sock_evt_cantrecvmore, NULL);
 400         sorwakeup(so);
 401 }
 402
 403 /*
 404  * Wait for data to arrive at/drain from a socket buffer.
 405  */
 406 int
 407 sbwait(sb)
 408         struct sockbuf *sb;
 409 {
 410         int error = 0, lr, lr_saved;
 411         struct socket *so = sb->sb_so;
 412         lck_mtx_t *mutex_held;
 413         struct timespec ts;
 414
 415 #ifdef __ppc__
 416         __asm__ volatile("mflr %0" : "=r" (lr));
 417         lr_saved = lr;
 418 #endif
 419
 420
 421         if (so->so_proto->pr_getlock != NULL)
 422                 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
 423         else
 424                 mutex_held = so->so_proto->pr_domain->dom_mtx;
 425
 426         sb->sb_flags |= SB_WAIT;
 427
 428         if (so->so_usecount < 1)
 429                 panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount);
 430         ts.tv_sec = sb->sb_timeo.tv_sec;
 431         ts.tv_nsec = sb->sb_timeo.tv_usec * 1000;
 432         error = msleep((caddr_t)&sb->sb_cc, mutex_held,
 433                 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
 434                 &ts);
 435
 436         lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
 437
 438         if (so->so_usecount < 1)
 439                 panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount);
 440
 441         if ((so->so_state & SS_DRAINING)) {
 442                 error = EBADF;
 443         }
 444
 445         return (error);
 446 }
 447
 448 /*
 449  * Lock a sockbuf already known to be locked;
 450  * return any error returned from sleep (EINTR).
 451  */
 452 int
 453 sb_lock(sb)
 454         register struct sockbuf *sb;
 455 {
 456         struct socket *so = sb->sb_so;
 457         lck_mtx_t * mutex_held;
 458         int error = 0, lr, lr_saved;
 459
 460 #ifdef __ppc__
 461         __asm__ volatile("mflr %0" : "=r" (lr));
 462         lr_saved = lr;
 463 #endif
 464
 465         if (so == NULL)
 466                 panic("sb_lock: null so back pointer sb=%x\n", sb);
 467
 468         while (sb->sb_flags & SB_LOCK) {
 469                 sb->sb_flags |= SB_WANT;
 470                 if (so->so_proto->pr_getlock != NULL)
 471                         mutex_held = (*so->so_proto->pr_getlock)(so, 0);
 472                 else
 473                         mutex_held = so->so_proto->pr_domain->dom_mtx;
 474                 if (so->so_usecount < 1)
 475                         panic("sb_lock: so=%x refcount=%d\n", so, so->so_usecount);
 476                 error = msleep((caddr_t)&sb->sb_flags, mutex_held,
 477                         (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sblock", 0);
 478                 if (so->so_usecount < 1)
 479                         panic("sb_lock: 2 so=%x refcount=%d\n", so, so->so_usecount);
 480                 if (error)
 481                         return (error);
 482         }
 483         sb->sb_flags |= SB_LOCK;
 484         return (0);
 485 }
 486
 487 /*
 488  * Wakeup processes waiting on a socket buffer.
 489  * Do asynchronous notification via SIGIO
 490  * if the socket has the SS_ASYNC flag set.
 491  */
 492 void
 493 sowakeup(so, sb)
 494         register struct socket *so;
 495         register struct sockbuf *sb;
 496 {
 497         struct proc *p = current_proc();
 498         sb->sb_flags &= ~SB_SEL;
 499         selwakeup(&sb->sb_sel);
 500         if (sb->sb_flags & SB_WAIT) {
 501                 sb->sb_flags &= ~SB_WAIT;
 502                 wakeup((caddr_t)&sb->sb_cc);
 503         }
 504         if (so->so_state & SS_ASYNC) {
 505                 if (so->so_pgid < 0)
 506                         gsignal(-so->so_pgid, SIGIO);
 507                 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
 508                         psignal(p, SIGIO);
 509         }
 510         if (sb->sb_flags & SB_KNOTE) {
 511                 KNOTE(&sb->sb_sel.si_note, SO_FILT_HINT_LOCKED);
 512         }
 513         if (sb->sb_flags & SB_UPCALL) {
 514                 socket_unlock(so, 0);
 515                 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
 516                 socket_lock(so, 0);
 517         }
 518 }
 519
 520 /*
 521  * Socket buffer (struct sockbuf) utility routines.
 522  *
 523  * Each socket contains two socket buffers: one for sending data and
 524  * one for receiving data.  Each buffer contains a queue of mbufs,
 525  * information about the number of mbufs and amount of data in the
 526  * queue, and other fields allowing select() statements and notification
 527  * on data availability to be implemented.
 528  *
 529  * Data stored in a socket buffer is maintained as a list of records.
 530  * Each record is a list of mbufs chained together with the m_next
 531  * field.  Records are chained together with the m_nextpkt field. The upper
 532  * level routine soreceive() expects the following conventions to be
 533  * observed when placing information in the receive buffer:
 534  *
 535  * 1. If the protocol requires each message be preceded by the sender's
 536  *    name, then a record containing that name must be present before
 537  *    any associated data (mbuf's must be of type MT_SONAME).
 538  * 2. If the protocol supports the exchange of ``access rights'' (really
 539  *    just additional data associated with the message), and there are
 540  *    ``rights'' to be received, then a record containing this data
 541  *    should be present (mbuf's must be of type MT_RIGHTS).
 542  * 3. If a name or rights record exists, then it must be followed by
 543  *    a data record, perhaps of zero length.
 544  *
 545  * Before using a new socket structure it is first necessary to reserve
 546  * buffer space to the socket, by calling sbreserve().  This should commit
 547  * some of the available buffer space in the system buffer pool for the
 548  * socket (currently, it does nothing but enforce limits).  The space
 549  * should be released by calling sbrelease() when the socket is destroyed.
 550  */
 551
 552 int
 553 soreserve(so, sndcc, rcvcc)
 554         register struct socket *so;
 555         u_long sndcc, rcvcc;
 556 {
 557
 558         if (sbreserve(&so->so_snd, sndcc) == 0)
 559                 goto bad;
 560         if (sbreserve(&so->so_rcv, rcvcc) == 0)
 561                 goto bad2;
 562         if (so->so_rcv.sb_lowat == 0)
 563                 so->so_rcv.sb_lowat = 1;
 564         if (so->so_snd.sb_lowat == 0)
 565                 so->so_snd.sb_lowat = MCLBYTES;
 566         if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
 567                 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
 568         return (0);
 569 bad2:
 570 #ifdef __APPLE__
 571         selthreadclear(&so->so_snd.sb_sel);
 572 #endif
 573         sbrelease(&so->so_snd);
 574 bad:
 575         return (ENOBUFS);
 576 }
 577
 578 /*
 579  * Allot mbufs to a sockbuf.
 580  * Attempt to scale mbmax so that mbcnt doesn't become limiting
 581  * if buffering efficiency is near the normal case.
 582  */
 583 int
 584 sbreserve(sb, cc)
 585         struct sockbuf *sb;
 586         u_long cc;
 587 {
 588         if ((u_quad_t)cc > (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES))
 589                 return (0);
 590         sb->sb_hiwat = cc;
 591         sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
 592         if (sb->sb_lowat > sb->sb_hiwat)
 593                 sb->sb_lowat = sb->sb_hiwat;
 594         return (1);
 595 }
 596
 597 /*
 598  * Free mbufs held by a socket, and reserved mbuf space.
 599  */
 600  /*  WARNING needs to do selthreadclear() before calling this */
 601 void
 602 sbrelease(sb)
 603         struct sockbuf *sb;
 604 {
 605
 606         sbflush(sb);
 607         sb->sb_hiwat = 0;
 608         sb->sb_mbmax = 0;
 609
 610 }
 611
 612 /*
 613  * Routines to add and remove
 614  * data from an mbuf queue.
 615  *
 616  * The routines sbappend() or sbappendrecord() are normally called to
 617  * append new mbufs to a socket buffer, after checking that adequate
 618  * space is available, comparing the function sbspace() with the amount
 619  * of data to be added.  sbappendrecord() differs from sbappend() in
 620  * that data supplied is treated as the beginning of a new record.
 621  * To place a sender's address, optional access rights, and data in a
 622  * socket receive buffer, sbappendaddr() should be used.  To place
 623  * access rights and data in a socket receive buffer, sbappendrights()
 624  * should be used.  In either case, the new data begins a new record.
 625  * Note that unlike sbappend() and sbappendrecord(), these routines check
 626  * for the caller that there will be enough space to store the data.
 627  * Each fails if there is not enough space, or if it cannot find mbufs
 628  * to store additional information in.
 629  *
 630  * Reliable protocols may use the socket send buffer to hold data
 631  * awaiting acknowledgement.  Data is normally copied from a socket
 632  * send buffer in a protocol with m_copy for output to a peer,
 633  * and then removing the data from the socket buffer with sbdrop()
 634  * or sbdroprecord() when the data is acknowledged by the peer.
 635  */
 636
 637 /*
 638  * Append mbuf chain m to the last record in the
 639  * socket buffer sb.  The additional space associated
 640  * the mbuf chain is recorded in sb.  Empty mbufs are
 641  * discarded and mbufs are compacted where possible.
 642  */
 643 int
 644 sbappend(sb, m)
 645         struct sockbuf *sb;
 646         struct mbuf *m;
 647 {
 648         register struct mbuf *n, *sb_first;
 649         int result = 0;
 650         int error = 0;
 651         int     filtered = 0;
 652
 653
 654         KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_START), sb, m->m_len, 0, 0, 0);
 655
 656         if (m == 0)
 657                 return 0;
 658
 659 again:
 660         sb_first = n = sb->sb_mb;
 661         if (n) {
 662                 while (n->m_nextpkt)
 663                         n = n->m_nextpkt;
 664                 do {
 665                         if (n->m_flags & M_EOR) {
 666                                 result = sbappendrecord(sb, m); /* XXXXXX!!!! */
 667                                 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
 668                                 return result;
 669                         }
 670                 } while (n->m_next && (n = n->m_next));
 671         }
 672
 673         if (!filtered && (sb->sb_flags & SB_RECV) != 0) {
 674                 error = sflt_data_in(sb->sb_so, NULL, &m, NULL, 0, &filtered);
 675                 if (error) {
 676                         /* no data was appended, caller should not call sowakeup */
 677                         return 0;
 678                 }
 679
 680                 /*
 681                   If we any filters, the socket lock was dropped. n and sb_first
 682                   cached data from the socket buffer. This cache is not valid
 683                   since we dropped the lock. We must start over. Since filtered
 684                   is set we won't run through the filters a second time. We just
 685                   set n and sb_start again.
 686                 */
 687                 if (filtered)
 688                         goto again;
 689         }
 690
 691         result = sbcompress(sb, m, n);
 692
 693         KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
 694
 695         return result;
 696 }
 697
 698 #ifdef SOCKBUF_DEBUG
 699 void
 700 sbcheck(sb)
 701         register struct sockbuf *sb;
 702 {
 703         register struct mbuf *m;
 704         register struct mbuf *n = 0;
 705         register u_long len = 0, mbcnt = 0;
 706         lck_mtx_t *mutex_held;
 707
 708         if (sb->sb_so->so_proto->pr_getlock != NULL)
 709                 mutex_held = (*sb->sb_so->so_proto->pr_getlock)(sb->sb_so, 0);
 710         else
 711                 mutex_held = sb->sb_so->so_proto->pr_domain->dom_mtx;
 712
 713         lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
 714
 715         if (sbchecking == 0)
 716                 return;
 717
 718         for (m = sb->sb_mb; m; m = n) {
 719             n = m->m_nextpkt;
 720             for (; m; m = m->m_next) {
 721                 len += m->m_len;
 722                 mbcnt += MSIZE;
 723                 if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
 724                     mbcnt += m->m_ext.ext_size;
 725             }
 726         }
 727         if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
 728                 panic("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
 729                     mbcnt, sb->sb_mbcnt);
 730         }
 731 }
 732 #endif
 733
 734 /*
 735  * As above, except the mbuf chain
 736  * begins a new record.
 737  */
 738 int
 739 sbappendrecord(sb, m0)
 740         register struct sockbuf *sb;
 741         register struct mbuf *m0;
 742 {
 743         register struct mbuf *m;
 744         int result = 0;
 745
 746         if (m0 == 0)
 747                 return 0;
 748
 749         if ((sb->sb_flags & SB_RECV) != 0) {
 750                 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL, sock_data_filt_flag_record, NULL);
 751                 if (error != 0) {
 752                         if (error != EJUSTRETURN)
 753                                 m_freem(m0);
 754                         return 0;
 755                 }
 756         }
 757
 758         m = sb->sb_mb;
 759         if (m)
 760                 while (m->m_nextpkt)
 761                         m = m->m_nextpkt;
 762         /*
 763          * Put the first mbuf on the queue.
 764          * Note this permits zero length records.
 765          */
 766         sballoc(sb, m0);
 767         if (m)
 768                 m->m_nextpkt = m0;
 769         else
 770                 sb->sb_mb = m0;
 771         m = m0->m_next;
 772         m0->m_next = 0;
 773         if (m && (m0->m_flags & M_EOR)) {
 774                 m0->m_flags &= ~M_EOR;
 775                 m->m_flags |= M_EOR;
 776         }
 777         return sbcompress(sb, m, m0);
 778 }
 779
 780 /*
 781  * As above except that OOB data
 782  * is inserted at the beginning of the sockbuf,
 783  * but after any other OOB data.
 784  */
 785 int
 786 sbinsertoob(sb, m0)
 787         struct sockbuf *sb;
 788         struct mbuf *m0;
 789 {
 790         struct mbuf *m;
 791         struct mbuf **mp;
 792
 793         if (m0 == 0)
 794                 return 0;
 795
 796         if ((sb->sb_flags & SB_RECV) != 0) {
 797                 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL,
 798                                                                  sock_data_filt_flag_oob, NULL);
 799
 800                 if (error) {
 801                         if (error != EJUSTRETURN) {
 802                                 m_freem(m0);
 803                         }
 804                         return 0;
 805                 }
 806         }
 807
 808         for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) {
 809             m = *mp;
 810             again:
 811                 switch (m->m_type) {
 812
 813                 case MT_OOBDATA:
 814                         continue;               /* WANT next train */
 815
 816                 case MT_CONTROL:
 817                         m = m->m_next;
 818                         if (m)
 819                                 goto again;     /* inspect THIS train further */
 820                 }
 821                 break;
 822         }
 823         /*
 824          * Put the first mbuf on the queue.
 825          * Note this permits zero length records.
 826          */
 827         sballoc(sb, m0);
 828         m0->m_nextpkt = *mp;
 829         *mp = m0;
 830         m = m0->m_next;
 831         m0->m_next = 0;
 832         if (m && (m0->m_flags & M_EOR)) {
 833                 m0->m_flags &= ~M_EOR;
 834                 m->m_flags |= M_EOR;
 835         }
 836         return sbcompress(sb, m, m0);
 837 }
 838
 839 /*
 840  * Append address and data, and optionally, control (ancillary) data
 841  * to the receive queue of a socket.  If present,
 842  * m0 must include a packet header with total length.
 843  * Returns 0 if no space in sockbuf or insufficient mbufs.
 844  */
 845 static int
 846 sbappendaddr_internal(sb, asa, m0, control)
 847         register struct sockbuf *sb;
 848         struct sockaddr *asa;
 849         struct mbuf *m0, *control;
 850 {
 851         register struct mbuf *m, *n;
 852         int space = asa->sa_len;
 853
 854         if (m0 && (m0->m_flags & M_PKTHDR) == 0)
 855                 panic("sbappendaddr");
 856
 857         if (m0)
 858                 space += m0->m_pkthdr.len;
 859         for (n = control; n; n = n->m_next) {
 860                 space += n->m_len;
 861                 if (n->m_next == 0)     /* keep pointer to last control buf */
 862                         break;
 863         }
 864         if (space > sbspace(sb))
 865                 return (0);
 866         if (asa->sa_len > MLEN)
 867                 return (0);
 868         MGET(m, M_DONTWAIT, MT_SONAME);
 869         if (m == 0)
 870                 return (0);
 871         m->m_len = asa->sa_len;
 872         bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
 873         if (n)
 874                 n->m_next = m0;         /* concatenate data to control */
 875         else
 876                 control = m0;
 877         m->m_next = control;
 878         for (n = m; n; n = n->m_next)
 879                 sballoc(sb, n);
 880         n = sb->sb_mb;
 881         if (n) {
 882                 while (n->m_nextpkt)
 883                         n = n->m_nextpkt;
 884                 n->m_nextpkt = m;
 885         } else
 886                 sb->sb_mb = m;
 887         postevent(0,sb,EV_RWBYTES);
 888         return (1);
 889 }
 890
 891 int
 892 sbappendaddr(
 893         struct sockbuf* sb,
 894         struct sockaddr* asa,
 895         struct mbuf *m0,
 896         struct mbuf *control,
 897         int     *error_out)
 898 {
 899         int result = 0;
 900
 901         if (error_out) *error_out = 0;
 902
 903         if (m0 && (m0->m_flags & M_PKTHDR) == 0)
 904                 panic("sbappendaddrorfree");
 905
 906         /* Call socket data in filters */
 907         if ((sb->sb_flags & SB_RECV) != 0) {
 908                 int error;
 909                 error = sflt_data_in(sb->sb_so, asa, &m0, &control, 0, NULL);
 910                 if (error) {
 911                         if (error != EJUSTRETURN) {
 912                                 if (m0) m_freem(m0);
 913                                 if (control) m_freem(control);
 914                                 if (error_out) *error_out = error;
 915                         }
 916                         return 0;
 917                 }
 918         }
 919
 920         result = sbappendaddr_internal(sb, asa, m0, control);
 921         if (result == 0) {
 922                 if (m0) m_freem(m0);
 923                 if (control) m_freem(control);
 924                 if (error_out) *error_out = ENOBUFS;
 925         }
 926
 927         return result;
 928 }
 929
 930 static int
 931 sbappendcontrol_internal(sb, m0, control)
 932         struct sockbuf *sb;
 933         struct mbuf *control, *m0;
 934 {
 935         register struct mbuf *m, *n;
 936         int space = 0;
 937
 938         if (control == 0)
 939                 panic("sbappendcontrol");
 940
 941         for (m = control; ; m = m->m_next) {
 942                 space += m->m_len;
 943                 if (m->m_next == 0)
 944                         break;
 945         }
 946         n = m;                  /* save pointer to last control buffer */
 947         for (m = m0; m; m = m->m_next)
 948                 space += m->m_len;
 949         if (space > sbspace(sb))
 950                 return (0);
 951         n->m_next = m0;                 /* concatenate data to control */
 952         for (m = control; m; m = m->m_next)
 953                 sballoc(sb, m);
 954         n = sb->sb_mb;
 955         if (n) {
 956                 while (n->m_nextpkt)
 957                         n = n->m_nextpkt;
 958                 n->m_nextpkt = control;
 959         } else
 960                 sb->sb_mb = control;
 961         postevent(0,sb,EV_RWBYTES);
 962         return (1);
 963 }
 964
 965 int
 966 sbappendcontrol(
 967         struct sockbuf  *sb,
 968         struct mbuf             *m0,
 969         struct mbuf             *control,
 970         int                             *error_out)
 971 {
 972         int result = 0;
 973
 974         if (error_out) *error_out = 0;
 975
 976         if (sb->sb_flags & SB_RECV) {
 977                 int error;
 978                 error = sflt_data_in(sb->sb_so, NULL, &m0, &control, 0, NULL);
 979                 if (error) {
 980                         if (error != EJUSTRETURN) {
 981                                 if (m0) m_freem(m0);
 982                                 if (control) m_freem(control);
 983                                 if (error_out) *error_out = error;
 984                         }
 985                         return 0;
 986                 }
 987         }
 988
 989         result = sbappendcontrol_internal(sb, m0, control);
 990         if (result == 0) {
 991                 if (m0) m_freem(m0);
 992                 if (control) m_freem(control);
 993                 if (error_out) *error_out = ENOBUFS;
 994         }
 995
 996         return result;
 997 }
 998
 999 /*
1000  * Compress mbuf chain m into the socket
1001  * buffer sb following mbuf n.  If n
1002  * is null, the buffer is presumed empty.
1003  */
1004 static int
1005 sbcompress(sb, m, n)
1006         register struct sockbuf *sb;
1007         register struct mbuf *m, *n;
1008 {
1009         register int eor = 0;
1010         register struct mbuf *o;
1011
1012         while (m) {
1013                 eor |= m->m_flags & M_EOR;
1014                 if (m->m_len == 0 &&
1015                     (eor == 0 ||
1016                      (((o = m->m_next) || (o = n)) &&
1017                       o->m_type == m->m_type))) {
1018                         m = m_free(m);
1019                         continue;
1020                 }
1021                 if (n && (n->m_flags & M_EOR) == 0 &&
1022 #ifndef __APPLE__
1023                     M_WRITABLE(n) &&
1024 #endif
1025                     m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
1026                     m->m_len <= M_TRAILINGSPACE(n) &&
1027                     n->m_type == m->m_type) {
1028                         bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
1029                             (unsigned)m->m_len);
1030                         n->m_len += m->m_len;
1031                         sb->sb_cc += m->m_len;
1032                         m = m_free(m);
1033                         continue;
1034                 }
1035                 if (n)
1036                         n->m_next = m;
1037                 else
1038                         sb->sb_mb = m;
1039                 sballoc(sb, m);
1040                 n = m;
1041                 m->m_flags &= ~M_EOR;
1042                 m = m->m_next;
1043                 n->m_next = 0;
1044         }
1045         if (eor) {
1046                 if (n)
1047                         n->m_flags |= eor;
1048                 else
1049                         printf("semi-panic: sbcompress\n");
1050         }
1051         postevent(0,sb, EV_RWBYTES);
1052         return 1;
1053 }
1054
1055 /*
1056  * Free all mbufs in a sockbuf.
1057  * Check that all resources are reclaimed.
1058  */
1059 void
1060 sbflush(sb)
1061         register struct sockbuf *sb;
1062 {
1063         if (sb->sb_so == NULL)
1064                 panic ("sbflush sb->sb_so already null sb=%x\n", sb);
1065         (void)sblock(sb, M_WAIT);
1066         while (sb->sb_mbcnt) {
1067                 /*
1068                  * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
1069                  * we would loop forever. Panic instead.
1070                  */
1071                 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
1072                         break;
1073                 sbdrop(sb, (int)sb->sb_cc);
1074         }
1075         if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt || sb->sb_so == NULL)
1076                 panic("sbflush: cc %ld || mb %p || mbcnt %ld sb_so=%x", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt, sb->sb_so);
1077
1078         postevent(0, sb, EV_RWBYTES);
1079         sbunlock(sb, 1);        /* keep socket locked */
1080
1081 }
1082
1083 /*
1084  * Drop data from (the front of) a sockbuf.
1085  * use m_freem_list to free the mbuf structures
1086  * under a single lock... this is done by pruning
1087  * the top of the tree from the body by keeping track
1088  * of where we get to in the tree and then zeroing the
1089  * two pertinent pointers m_nextpkt and m_next
1090  * the socket buffer is then updated to point at the new
1091  * top of the tree and the pruned area is released via
1092  * m_freem_list.
1093  */
1094 void
1095 sbdrop(sb, len)
1096         register struct sockbuf *sb;
1097         register int len;
1098 {
1099         register struct mbuf *m, *free_list, *ml;
1100         struct mbuf *next, *last;
1101
1102         KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_START), sb, len, 0, 0, 0);
1103
1104         next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
1105         free_list = last = m;
1106         ml = (struct mbuf *)0;
1107
1108         while (len > 0) {
1109                 if (m == 0) {
1110                   if (next == 0) {
1111                     /* temporarily replacing this panic with printf because
1112                      * it occurs occasionally when closing a socket when there
1113                      * is no harm in ignoring it.  This problem will be investigated
1114                      * further.
1115                      */
1116                     /* panic("sbdrop"); */
1117                     printf("sbdrop - count not zero\n");
1118                     len = 0;
1119                     /* zero the counts. if we have no mbufs, we have no data (PR-2986815) */
1120                     sb->sb_cc = 0;
1121                     sb->sb_mbcnt = 0;
1122                     break;
1123                   }
1124                   m = last = next;
1125                   next = m->m_nextpkt;
1126                   continue;
1127                 }
1128                 if (m->m_len > len) {
1129                         m->m_len -= len;
1130                         m->m_data += len;
1131                         sb->sb_cc -= len;
1132                         break;
1133                 }
1134                 len -= m->m_len;
1135                 sbfree(sb, m);
1136
1137                 ml = m;
1138                 m = m->m_next;
1139         }
1140         while (m && m->m_len == 0) {
1141                 sbfree(sb, m);
1142
1143                 ml = m;
1144                 m = m->m_next;
1145         }
1146         if (ml) {
1147                 ml->m_next = (struct mbuf *)0;
1148                 last->m_nextpkt = (struct mbuf *)0;
1149                 m_freem_list(free_list);
1150         }
1151         if (m) {
1152                 sb->sb_mb = m;
1153                 m->m_nextpkt = next;
1154         } else
1155                 sb->sb_mb = next;
1156
1157         postevent(0, sb, EV_RWBYTES);
1158
1159         KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_END), sb, 0, 0, 0, 0);
1160 }
1161
1162 /*
1163  * Drop a record off the front of a sockbuf
1164  * and move the next record to the front.
1165  */
1166 void
1167 sbdroprecord(sb)
1168         register struct sockbuf *sb;
1169 {
1170         register struct mbuf *m, *mn;
1171
1172         m = sb->sb_mb;
1173         if (m) {
1174                 sb->sb_mb = m->m_nextpkt;
1175                 do {
1176                         sbfree(sb, m);
1177                         MFREE(m, mn);
1178                         m = mn;
1179                 } while (m);
1180         }
1181         postevent(0, sb, EV_RWBYTES);
1182 }
1183
1184 /*
1185  * Create a "control" mbuf containing the specified data
1186  * with the specified type for presentation on a socket buffer.
1187  */
1188 struct mbuf *
1189 sbcreatecontrol(p, size, type, level)
1190         caddr_t p;
1191         register int size;
1192         int type, level;
1193 {
1194         register struct cmsghdr *cp;
1195         struct mbuf *m;
1196
1197         if (CMSG_SPACE((u_int)size) > MLEN)
1198                 return ((struct mbuf *) NULL);
1199         if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
1200                 return ((struct mbuf *) NULL);
1201         cp = mtod(m, struct cmsghdr *);
1202         /* XXX check size? */
1203         (void)memcpy(CMSG_DATA(cp), p, size);
1204         m->m_len = CMSG_SPACE(size);
1205         cp->cmsg_len = CMSG_LEN(size);
1206         cp->cmsg_level = level;
1207         cp->cmsg_type = type;
1208         return (m);
1209 }
1210
1211 /*
1212  * Some routines that return EOPNOTSUPP for entry points that are not
1213  * supported by a protocol.  Fill in as needed.
1214  */
1215 int
1216 pru_abort_notsupp(struct socket *so)
1217 {
1218         return EOPNOTSUPP;
1219 }
1220
1221
1222 int
1223 pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
1224 {
1225         return EOPNOTSUPP;
1226 }
1227
1228 int
1229 pru_attach_notsupp(struct socket *so, int proto, struct proc *p)
1230 {
1231         return EOPNOTSUPP;
1232 }
1233
1234 int
1235 pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1236 {
1237         return EOPNOTSUPP;
1238 }
1239
1240 int
1241 pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1242 {
1243         return EOPNOTSUPP;
1244 }
1245
1246 int
1247 pru_connect2_notsupp(struct socket *so1, struct socket *so2)
1248 {
1249         return EOPNOTSUPP;
1250 }
1251
1252 int
1253 pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
1254                     struct ifnet *ifp, struct proc *p)
1255 {
1256         return EOPNOTSUPP;
1257 }
1258
1259 int
1260 pru_detach_notsupp(struct socket *so)
1261 {
1262         return EOPNOTSUPP;
1263 }
1264
1265 int
1266 pru_disconnect_notsupp(struct socket *so)
1267 {
1268         return EOPNOTSUPP;
1269 }
1270
1271 int
1272 pru_listen_notsupp(struct socket *so, struct proc *p)
1273 {
1274         return EOPNOTSUPP;
1275 }
1276
1277 int
1278 pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
1279 {
1280         return EOPNOTSUPP;
1281 }
1282
1283 int
1284 pru_rcvd_notsupp(struct socket *so, int flags)
1285 {
1286         return EOPNOTSUPP;
1287 }
1288
1289 int
1290 pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
1291 {
1292         return EOPNOTSUPP;
1293 }
1294
1295 int
1296 pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
1297                  struct sockaddr *addr, struct mbuf *control,
1298                  struct proc *p)
1299
1300 {
1301         return EOPNOTSUPP;
1302 }
1303
1304
1305 /*
1306  * This isn't really a ``null'' operation, but it's the default one
1307  * and doesn't do anything destructive.
1308  */
1309 int
1310 pru_sense_null(struct socket *so, struct stat *sb)
1311 {
1312         sb->st_blksize = so->so_snd.sb_hiwat;
1313         return 0;
1314 }
1315
1316
1317 int     pru_sosend_notsupp(struct socket *so, struct sockaddr *addr,
1318                    struct uio *uio, struct mbuf *top,
1319                    struct mbuf *control, int flags)
1320
1321 {
1322     return EOPNOTSUPP;
1323 }
1324
1325 int     pru_soreceive_notsupp(struct socket *so,
1326                       struct sockaddr **paddr,
1327                       struct uio *uio, struct mbuf **mp0,
1328                       struct mbuf **controlp, int *flagsp)
1329 {
1330     return EOPNOTSUPP;
1331 }
1332
1333 int
1334
1335 pru_shutdown_notsupp(struct socket *so)
1336 {
1337         return EOPNOTSUPP;
1338 }
1339
1340 int
1341 pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
1342 {
1343         return EOPNOTSUPP;
1344 }
1345
1346 int     pru_sosend(struct socket *so, struct sockaddr *addr,
1347                    struct uio *uio, struct mbuf *top,
1348                    struct mbuf *control, int flags)
1349 {
1350         return EOPNOTSUPP;
1351 }
1352
1353 int     pru_soreceive(struct socket *so,
1354                       struct sockaddr **paddr,
1355                       struct uio *uio, struct mbuf **mp0,
1356                       struct mbuf **controlp, int *flagsp)
1357 {
1358         return EOPNOTSUPP;
1359 }
1360
1361
1362 int
1363 pru_sopoll_notsupp(__unused struct socket *so, __unused int events,
1364                    __unused kauth_cred_t cred, __unused void *wql)
1365 {
1366     return EOPNOTSUPP;
1367 }
1368
1369
1370 #ifdef __APPLE__
1371 /*
1372  * The following are macros on BSD and functions on Darwin
1373  */
1374
1375 /*
1376  * Do we need to notify the other side when I/O is possible?
1377  */
1378
1379 int
1380 sb_notify(struct sockbuf *sb)
1381 {
1382         return ((sb->sb_flags & (SB_WAIT|SB_SEL|SB_ASYNC|SB_UPCALL|SB_KNOTE)) != 0);
1383 }
1384
1385 /*
1386  * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
1387  * This is problematical if the fields are unsigned, as the space might
1388  * still be negative (cc > hiwat or mbcnt > mbmax).  Should detect
1389  * overflow and return 0.  Should use "lmin" but it doesn't exist now.
1390  */
1391 long
1392 sbspace(struct sockbuf *sb)
1393 {
1394     return ((long) imin((int)(sb->sb_hiwat - sb->sb_cc),
1395          (int)(sb->sb_mbmax - sb->sb_mbcnt)));
1396 }
1397
1398 /* do we have to send all at once on a socket? */
1399 int
1400 sosendallatonce(struct socket *so)
1401 {
1402     return (so->so_proto->pr_flags & PR_ATOMIC);
1403 }
1404
1405 /* can we read something from so? */
1406 int
1407 soreadable(struct socket *so)
1408 {
1409     return (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
1410         (so->so_state & SS_CANTRCVMORE) ||
1411         so->so_comp.tqh_first || so->so_error);
1412 }
1413
1414 /* can we write something to so? */
1415
1416 int
1417 sowriteable(struct socket *so)
1418 {
1419     return ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat &&
1420         ((so->so_state&SS_ISCONNECTED) ||
1421           (so->so_proto->pr_flags&PR_CONNREQUIRED)==0)) ||
1422      (so->so_state & SS_CANTSENDMORE) ||
1423      so->so_error);
1424 }
1425
1426 /* adjust counters in sb reflecting allocation of m */
1427
1428 void
1429 sballoc(struct sockbuf *sb, struct mbuf *m)
1430 {
1431         sb->sb_cc += m->m_len;
1432         sb->sb_mbcnt += MSIZE;
1433         if (m->m_flags & M_EXT)
1434                 sb->sb_mbcnt += m->m_ext.ext_size;
1435 }
1436
1437 /* adjust counters in sb reflecting freeing of m */
1438 void
1439 sbfree(struct sockbuf *sb, struct mbuf *m)
1440 {
1441         sb->sb_cc -= m->m_len;
1442         sb->sb_mbcnt -= MSIZE;
1443         if (m->m_flags & M_EXT)
1444                 sb->sb_mbcnt -= m->m_ext.ext_size;
1445 }
1446
1447 /*
1448  * Set lock on sockbuf sb; sleep if lock is already held.
1449  * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
1450  * Returns error without lock if sleep is interrupted.
1451  */
1452 int
1453 sblock(struct sockbuf *sb, int wf)
1454 {
1455         return(sb->sb_flags & SB_LOCK ?
1456                 ((wf == M_WAIT) ? sb_lock(sb) : EWOULDBLOCK) :
1457                 (sb->sb_flags |= SB_LOCK), 0);
1458 }
1459
1460 /* release lock on sockbuf sb */
1461 void
1462 sbunlock(struct sockbuf *sb, int keeplocked)
1463 {
1464         struct socket *so = sb->sb_so;
1465         int lr, lr_saved;
1466         lck_mtx_t *mutex_held;
1467
1468 #ifdef __ppc__
1469         __asm__ volatile("mflr %0" : "=r" (lr));
1470         lr_saved = lr;
1471 #endif
1472         sb->sb_flags &= ~SB_LOCK;
1473
1474         if (so->so_proto->pr_getlock != NULL)
1475                 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
1476         else
1477                 mutex_held = so->so_proto->pr_domain->dom_mtx;
1478
1479         if (keeplocked == 0)
1480                 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
1481
1482         if (sb->sb_flags & SB_WANT) {
1483                 sb->sb_flags &= ~SB_WANT;
1484                 if (so->so_usecount < 0)
1485                         panic("sbunlock: b4 wakeup so=%x ref=%d lr=%x sb_flags=%x\n", sb->sb_so, so->so_usecount, lr_saved, sb->sb_flags);
1486
1487                 wakeup((caddr_t)&(sb)->sb_flags);
1488         }
1489         if (keeplocked == 0) {  /* unlock on exit */
1490                 so->so_usecount--;
1491                 if (so->so_usecount < 0)
1492                         panic("sbunlock: unlock on exit so=%x lr=%x sb_flags=%x\n", so, so->so_usecount,lr_saved, sb->sb_flags);
1493                 so->reserved4= lr_saved;
1494                 lck_mtx_unlock(mutex_held);
1495         }
1496 }
1497
1498 void
1499 sorwakeup(struct socket * so)
1500 {
1501   if (sb_notify(&so->so_rcv))
1502         sowakeup(so, &so->so_rcv);
1503 }
1504
1505 void
1506 sowwakeup(struct socket * so)
1507 {
1508   if (sb_notify(&so->so_snd))
1509         sowakeup(so, &so->so_snd);
1510 }
1511 #endif __APPLE__
1512
1513 /*
1514  * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
1515  */
1516 struct sockaddr *
1517 dup_sockaddr(sa, canwait)
1518         struct sockaddr *sa;
1519         int canwait;
1520 {
1521         struct sockaddr *sa2;
1522
1523         MALLOC(sa2, struct sockaddr *, sa->sa_len, M_SONAME,
1524                canwait ? M_WAITOK : M_NOWAIT);
1525         if (sa2)
1526                 bcopy(sa, sa2, sa->sa_len);
1527         return sa2;
1528 }
1529
1530 /*
1531  * Create an external-format (``xsocket'') structure using the information
1532  * in the kernel-format socket structure pointed to by so.  This is done
1533  * to reduce the spew of irrelevant information over this interface,
1534  * to isolate user code from changes in the kernel structure, and
1535  * potentially to provide information-hiding if we decide that
1536  * some of this information should be hidden from users.
1537  */
1538 void
1539 sotoxsocket(struct socket *so, struct xsocket *xso)
1540 {
1541         xso->xso_len = sizeof *xso;
1542         xso->xso_so = so;
1543         xso->so_type = so->so_type;
1544         xso->so_options = so->so_options;
1545         xso->so_linger = so->so_linger;
1546         xso->so_state = so->so_state;
1547         xso->so_pcb = so->so_pcb;
1548         if (so->so_proto) {
1549                 xso->xso_protocol = so->so_proto->pr_protocol;
1550                 xso->xso_family = so->so_proto->pr_domain->dom_family;
1551         }
1552         else
1553                 xso->xso_protocol = xso->xso_family = 0;
1554         xso->so_qlen = so->so_qlen;
1555         xso->so_incqlen = so->so_incqlen;
1556         xso->so_qlimit = so->so_qlimit;
1557         xso->so_timeo = so->so_timeo;
1558         xso->so_error = so->so_error;
1559         xso->so_pgid = so->so_pgid;
1560         xso->so_oobmark = so->so_oobmark;
1561         sbtoxsockbuf(&so->so_snd, &xso->so_snd);
1562         sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
1563         xso->so_uid = so->so_uid;
1564 }
1565
1566 /*
1567  * This does the same for sockbufs.  Note that the xsockbuf structure,
1568  * since it is always embedded in a socket, does not include a self
1569  * pointer nor a length.  We make this entry point public in case
1570  * some other mechanism needs it.
1571  */
1572 void
1573 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
1574 {
1575         xsb->sb_cc = sb->sb_cc;
1576         xsb->sb_hiwat = sb->sb_hiwat;
1577         xsb->sb_mbcnt = sb->sb_mbcnt;
1578         xsb->sb_mbmax = sb->sb_mbmax;
1579         xsb->sb_lowat = sb->sb_lowat;
1580         xsb->sb_flags = sb->sb_flags;
1581         xsb->sb_timeo = (u_long)(sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick;
1582         if (xsb->sb_timeo == 0 && sb->sb_timeo.tv_usec != 0)
1583                 xsb->sb_timeo = 1;
1584 }
1585
1586 /*
1587  * Here is the definition of some of the basic objects in the kern.ipc
1588  * branch of the MIB.
1589  */
1590 SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
1591
1592 /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
1593 static int dummy;
1594 SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
1595
1596 SYSCTL_INT(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLFLAG_RW,
1597     &sb_max, 0, "Maximum socket buffer size");
1598 SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD,
1599     &maxsockets, 0, "Maximum number of sockets avaliable");
1600 SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
1601            &sb_efficiency, 0, "");
1602 SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, &nmbclusters, 0, "");
1603