bsd/kern/uipc_socket2.c

   1 /*
   2  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
  29 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  30 /*
  31  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  32  *      The Regents of the University of California.  All rights reserved.
  33  *
  34  * Redistribution and use in source and binary forms, with or without
  35  * modification, are permitted provided that the following conditions
  36  * are met:
  37  * 1. Redistributions of source code must retain the above copyright
  38  *    notice, this list of conditions and the following disclaimer.
  39  * 2. Redistributions in binary form must reproduce the above copyright
  40  *    notice, this list of conditions and the following disclaimer in the
  41  *    documentation and/or other materials provided with the distribution.
  42  * 3. All advertising materials mentioning features or use of this software
  43  *    must display the following acknowledgement:
  44  *      This product includes software developed by the University of
  45  *      California, Berkeley and its contributors.
  46  * 4. Neither the name of the University nor the names of its contributors
  47  *    may be used to endorse or promote products derived from this software
  48  *    without specific prior written permission.
  49  *
  50  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  51  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  53  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  54  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  55  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  56  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  57  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  59  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  60  * SUCH DAMAGE.
  61  *
  62  *      @(#)uipc_socket2.c      8.1 (Berkeley) 6/10/93
  63  * $FreeBSD: src/sys/kern/uipc_socket2.c,v 1.55.2.9 2001/07/26 18:53:02 peter Exp $
  64  */
  65
  66 #include <sys/param.h>
  67 #include <sys/systm.h>
  68 #include <sys/domain.h>
  69 #include <sys/kernel.h>
  70 #include <sys/proc_internal.h>
  71 #include <sys/kauth.h>
  72 #include <sys/malloc.h>
  73 #include <sys/mbuf.h>
  74 #include <sys/protosw.h>
  75 #include <sys/stat.h>
  76 #include <sys/socket.h>
  77 #include <sys/socketvar.h>
  78 #include <sys/signalvar.h>
  79 #include <sys/sysctl.h>
  80 #include <sys/ev.h>
  81 #include <kern/locks.h>
  82 #include <net/route.h>
  83 #include <netinet/in.h>
  84 #include <netinet/in_pcb.h>
  85 #include <sys/kdebug.h>
  86
  87 #define DBG_FNC_SBDROP  NETDBG_CODE(DBG_NETSOCK, 4)
  88 #define DBG_FNC_SBAPPEND        NETDBG_CODE(DBG_NETSOCK, 5)
  89
  90
  91 /*
  92  * Primitive routines for operating on sockets and socket buffers
  93  */
  94
  95 u_long  sb_max = SB_MAX;                /* XXX should be static */
  96
  97 static  u_long sb_efficiency = 8;       /* parameter for sbreserve() */
  98
  99 /*
 100  * Procedures to manipulate state flags of socket
 101  * and do appropriate wakeups.  Normal sequence from the
 102  * active (originating) side is that soisconnecting() is
 103  * called during processing of connect() call,
 104  * resulting in an eventual call to soisconnected() if/when the
 105  * connection is established.  When the connection is torn down
 106  * soisdisconnecting() is called during processing of disconnect() call,
 107  * and soisdisconnected() is called when the connection to the peer
 108  * is totally severed.  The semantics of these routines are such that
 109  * connectionless protocols can call soisconnected() and soisdisconnected()
 110  * only, bypassing the in-progress calls when setting up a ``connection''
 111  * takes no time.
 112  *
 113  * From the passive side, a socket is created with
 114  * two queues of sockets: so_incomp for connections in progress
 115  * and so_comp for connections already made and awaiting user acceptance.
 116  * As a protocol is preparing incoming connections, it creates a socket
 117  * structure queued on so_incomp by calling sonewconn().  When the connection
 118  * is established, soisconnected() is called, and transfers the
 119  * socket structure to so_comp, making it available to accept().
 120  *
 121  * If a socket is closed with sockets on either
 122  * so_incomp or so_comp, these sockets are dropped.
 123  *
 124  * If higher level protocols are implemented in
 125  * the kernel, the wakeups done here will sometimes
 126  * cause software-interrupt process scheduling.
 127  */
 128 void
 129 soisconnecting(so)
 130         register struct socket *so;
 131 {
 132
 133         so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
 134         so->so_state |= SS_ISCONNECTING;
 135
 136         sflt_notify(so, sock_evt_connecting, NULL);
 137 }
 138
 139 void
 140 soisconnected(so)
 141         struct socket *so;
 142 {
 143         struct socket *head = so->so_head;
 144
 145         so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
 146         so->so_state |= SS_ISCONNECTED;
 147
 148         sflt_notify(so, sock_evt_connected, NULL);
 149
 150         if (head && (so->so_state & SS_INCOMP)) {
 151                 so->so_state &= ~SS_INCOMP;
 152                 so->so_state |= SS_COMP;
 153                 if (head->so_proto->pr_getlock != NULL) {
 154                         socket_unlock(so, 0);
 155                         socket_lock(head, 1);
 156                 }
 157                 postevent(head, 0, EV_RCONN);
 158                 TAILQ_REMOVE(&head->so_incomp, so, so_list);
 159                 head->so_incqlen--;
 160                 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
 161                 sorwakeup(head);
 162                 wakeup_one((caddr_t)&head->so_timeo);
 163                 if (head->so_proto->pr_getlock != NULL) {
 164                         socket_unlock(head, 1);
 165                         socket_lock(so, 0);
 166                 }
 167         } else {
 168                 postevent(so, 0, EV_WCONN);
 169                 wakeup((caddr_t)&so->so_timeo);
 170                 sorwakeup(so);
 171                 sowwakeup(so);
 172         }
 173 }
 174
 175 void
 176 soisdisconnecting(so)
 177         register struct socket *so;
 178 {
 179         so->so_state &= ~SS_ISCONNECTING;
 180         so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
 181         sflt_notify(so, sock_evt_disconnecting, NULL);
 182         wakeup((caddr_t)&so->so_timeo);
 183         sowwakeup(so);
 184         sorwakeup(so);
 185 }
 186
 187 void
 188 soisdisconnected(so)
 189         register struct socket *so;
 190 {
 191         so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
 192         so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
 193         sflt_notify(so, sock_evt_disconnected, NULL);
 194         wakeup((caddr_t)&so->so_timeo);
 195         sowwakeup(so);
 196         sorwakeup(so);
 197 }
 198
 199 /*
 200  * Return a random connection that hasn't been serviced yet and
 201  * is eligible for discard.  There is a one in qlen chance that
 202  * we will return a null, saying that there are no dropable
 203  * requests.  In this case, the protocol specific code should drop
 204  * the new request.  This insures fairness.
 205  *
 206  * This may be used in conjunction with protocol specific queue
 207  * congestion routines.
 208  */
 209 struct socket *
 210 sodropablereq(head)
 211         register struct socket *head;
 212 {
 213         struct socket *so, *sonext = NULL;
 214         unsigned int i, j, qlen;
 215         static int rnd;
 216         static struct timeval old_runtime;
 217         static unsigned int cur_cnt, old_cnt;
 218         struct timeval tv;
 219
 220         microtime(&tv);
 221         if ((i = (tv.tv_sec - old_runtime.tv_sec)) != 0) {
 222                 old_runtime = tv;
 223                 old_cnt = cur_cnt / i;
 224                 cur_cnt = 0;
 225         }
 226
 227         so = TAILQ_FIRST(&head->so_incomp);
 228         if (!so)
 229                 return (NULL);
 230
 231         qlen = head->so_incqlen;
 232         if (++cur_cnt > qlen || old_cnt > qlen) {
 233                 rnd = (314159 * rnd + 66329) & 0xffff;
 234                 j = ((qlen + 1) * rnd) >> 16;
 235 //###LD To clean up
 236                 while (j-- && so) {
 237 //                      if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
 238                                 socket_lock(so, 1);
 239                                 sonext = TAILQ_NEXT(so, so_list);
 240 //                              in_pcb_check_state(so->so_pcb, WNT_RELEASE, 0);
 241                                 socket_unlock(so, 1);
 242                                 so = sonext;
 243                 }
 244         }
 245
 246 //      if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) == WNT_STOPUSING)
 247 //              return (NULL);
 248 //      else
 249                 return (so);
 250 }
 251
 252 /*
 253  * When an attempt at a new connection is noted on a socket
 254  * which accepts connections, sonewconn is called.  If the
 255  * connection is possible (subject to space constraints, etc.)
 256  * then we allocate a new structure, propoerly linked into the
 257  * data structure of the original socket, and return this.
 258  * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
 259  */
 260 static struct socket *
 261 sonewconn_internal(head, connstatus)
 262         register struct socket *head;
 263         int connstatus;
 264 {
 265         int error = 0;
 266         register struct socket *so;
 267         lck_mtx_t *mutex_held;
 268
 269         if (head->so_proto->pr_getlock != NULL)
 270                 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
 271         else
 272                 mutex_held = head->so_proto->pr_domain->dom_mtx;
 273         lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
 274
 275         if (head->so_qlen > 3 * head->so_qlimit / 2)
 276                 return ((struct socket *)0);
 277         so = soalloc(1, head->so_proto->pr_domain->dom_family, head->so_type);
 278         if (so == NULL)
 279                 return ((struct socket *)0);
 280         /* check if head was closed during the soalloc */
 281         if (head->so_proto == NULL) {
 282           sodealloc(so);
 283           return ((struct socket *)0);
 284         }
 285
 286         so->so_head = head;
 287         so->so_type = head->so_type;
 288         so->so_options = head->so_options &~ SO_ACCEPTCONN;
 289         so->so_linger = head->so_linger;
 290         so->so_state = head->so_state | SS_NOFDREF;
 291         so->so_proto = head->so_proto;
 292         so->so_timeo = head->so_timeo;
 293         so->so_pgid  = head->so_pgid;
 294         so->so_uid = head->so_uid;
 295         so->so_usecount = 1;
 296
 297 #ifdef __APPLE__
 298         so->so_rcv.sb_flags |= SB_RECV; /* XXX */
 299         so->so_rcv.sb_so = so->so_snd.sb_so = so;
 300         TAILQ_INIT(&so->so_evlist);
 301 #endif
 302
 303         if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
 304                 sflt_termsock(so);
 305                 sodealloc(so);
 306                 return ((struct socket *)0);
 307         }
 308
 309         /*
 310          * Must be done with head unlocked to avoid deadlock for protocol with per socket mutexes.
 311          */
 312         if (head->so_proto->pr_unlock)
 313                 socket_unlock(head, 0);
 314         if (((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL) != 0) || error) {
 315                 sflt_termsock(so);
 316                 sodealloc(so);
 317                 if (head->so_proto->pr_unlock)
 318                         socket_lock(head, 0);
 319                 return ((struct socket *)0);
 320         }
 321         if (head->so_proto->pr_unlock)
 322                 socket_lock(head, 0);
 323 #ifdef __APPLE__
 324         so->so_proto->pr_domain->dom_refs++;
 325 #endif
 326
 327         if (connstatus) {
 328                 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
 329                 so->so_state |= SS_COMP;
 330         } else {
 331                 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
 332                 so->so_state |= SS_INCOMP;
 333                 head->so_incqlen++;
 334         }
 335         head->so_qlen++;
 336
 337 #ifdef __APPLE__
 338         /* Attach socket filters for this protocol */
 339         sflt_initsock(so);
 340 #endif
 341         if (connstatus) {
 342                 so->so_state |= connstatus;
 343                 sorwakeup(head);
 344                 wakeup((caddr_t)&head->so_timeo);
 345         }
 346         return (so);
 347 }
 348
 349
 350 struct socket *
 351 sonewconn(
 352         struct socket *head,
 353         int connstatus,
 354         const struct sockaddr *from)
 355 {
 356         int error = 0;
 357         struct socket_filter_entry      *filter;
 358         int                                                     filtered = 0;
 359
 360         error = 0;
 361         for (filter = head->so_filt; filter && (error == 0);
 362                  filter = filter->sfe_next_onsocket) {
 363                 if (filter->sfe_filter->sf_filter.sf_connect_in) {
 364                         if (filtered == 0) {
 365                                 filtered = 1;
 366                                 sflt_use(head);
 367                                 socket_unlock(head, 0);
 368                         }
 369                         error = filter->sfe_filter->sf_filter.sf_connect_in(
 370                                                 filter->sfe_cookie, head, from);
 371                 }
 372         }
 373         if (filtered != 0) {
 374                 socket_lock(head, 0);
 375                 sflt_unuse(head);
 376         }
 377
 378         if (error) {
 379                 return NULL;
 380         }
 381
 382         return sonewconn_internal(head, connstatus);
 383 }
 384
 385 /*
 386  * Socantsendmore indicates that no more data will be sent on the
 387  * socket; it would normally be applied to a socket when the user
 388  * informs the system that no more data is to be sent, by the protocol
 389  * code (in case PRU_SHUTDOWN).  Socantrcvmore indicates that no more data
 390  * will be received, and will normally be applied to the socket by a
 391  * protocol when it detects that the peer will send no more data.
 392  * Data queued for reading in the socket may yet be read.
 393  */
 394
 395 void
 396 socantsendmore(so)
 397         struct socket *so;
 398 {
 399         so->so_state |= SS_CANTSENDMORE;
 400         sflt_notify(so, sock_evt_cantsendmore, NULL);
 401         sowwakeup(so);
 402 }
 403
 404 void
 405 socantrcvmore(so)
 406         struct socket *so;
 407 {
 408         so->so_state |= SS_CANTRCVMORE;
 409         sflt_notify(so, sock_evt_cantrecvmore, NULL);
 410         sorwakeup(so);
 411 }
 412
 413 /*
 414  * Wait for data to arrive at/drain from a socket buffer.
 415  */
 416 int
 417 sbwait(sb)
 418         struct sockbuf *sb;
 419 {
 420         int error = 0, lr, lr_saved;
 421         struct socket *so = sb->sb_so;
 422         lck_mtx_t *mutex_held;
 423         struct timespec ts;
 424
 425 #ifdef __ppc__
 426         __asm__ volatile("mflr %0" : "=r" (lr));
 427         lr_saved = lr;
 428 #endif
 429
 430
 431         if (so->so_proto->pr_getlock != NULL)
 432                 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
 433         else
 434                 mutex_held = so->so_proto->pr_domain->dom_mtx;
 435
 436         sb->sb_flags |= SB_WAIT;
 437
 438         if (so->so_usecount < 1)
 439                 panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount);
 440         ts.tv_sec = sb->sb_timeo.tv_sec;
 441         ts.tv_nsec = sb->sb_timeo.tv_usec * 1000;
 442         error = msleep((caddr_t)&sb->sb_cc, mutex_held,
 443                 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
 444                 &ts);
 445
 446         lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
 447
 448         if (so->so_usecount < 1)
 449                 panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount);
 450
 451         if ((so->so_state & SS_DRAINING)) {
 452                 error = EBADF;
 453         }
 454
 455         return (error);
 456 }
 457
 458 /*
 459  * Lock a sockbuf already known to be locked;
 460  * return any error returned from sleep (EINTR).
 461  */
 462 int
 463 sb_lock(sb)
 464         register struct sockbuf *sb;
 465 {
 466         struct socket *so = sb->sb_so;
 467         lck_mtx_t * mutex_held;
 468         int error = 0, lr, lr_saved;
 469
 470 #ifdef __ppc__
 471         __asm__ volatile("mflr %0" : "=r" (lr));
 472         lr_saved = lr;
 473 #endif
 474
 475         if (so == NULL)
 476                 panic("sb_lock: null so back pointer sb=%x\n", sb);
 477
 478         while (sb->sb_flags & SB_LOCK) {
 479                 sb->sb_flags |= SB_WANT;
 480                 if (so->so_proto->pr_getlock != NULL)
 481                         mutex_held = (*so->so_proto->pr_getlock)(so, 0);
 482                 else
 483                         mutex_held = so->so_proto->pr_domain->dom_mtx;
 484                 if (so->so_usecount < 1)
 485                         panic("sb_lock: so=%x refcount=%d\n", so, so->so_usecount);
 486                 error = msleep((caddr_t)&sb->sb_flags, mutex_held,
 487                         (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sblock", 0);
 488                 if (so->so_usecount < 1)
 489                         panic("sb_lock: 2 so=%x refcount=%d\n", so, so->so_usecount);
 490                 if (error)
 491                         return (error);
 492         }
 493         sb->sb_flags |= SB_LOCK;
 494         return (0);
 495 }
 496
 497 /*
 498  * Wakeup processes waiting on a socket buffer.
 499  * Do asynchronous notification via SIGIO
 500  * if the socket has the SS_ASYNC flag set.
 501  */
 502 void
 503 sowakeup(so, sb)
 504         register struct socket *so;
 505         register struct sockbuf *sb;
 506 {
 507         struct proc *p = current_proc();
 508         sb->sb_flags &= ~SB_SEL;
 509         selwakeup(&sb->sb_sel);
 510         if (sb->sb_flags & SB_WAIT) {
 511                 sb->sb_flags &= ~SB_WAIT;
 512                 wakeup((caddr_t)&sb->sb_cc);
 513         }
 514         if (so->so_state & SS_ASYNC) {
 515                 if (so->so_pgid < 0)
 516                         gsignal(-so->so_pgid, SIGIO);
 517                 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
 518                         psignal(p, SIGIO);
 519         }
 520         if (sb->sb_flags & SB_KNOTE) {
 521                 KNOTE(&sb->sb_sel.si_note, SO_FILT_HINT_LOCKED);
 522         }
 523         if (sb->sb_flags & SB_UPCALL) {
 524                 socket_unlock(so, 0);
 525                 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
 526                 socket_lock(so, 0);
 527         }
 528 }
 529
 530 /*
 531  * Socket buffer (struct sockbuf) utility routines.
 532  *
 533  * Each socket contains two socket buffers: one for sending data and
 534  * one for receiving data.  Each buffer contains a queue of mbufs,
 535  * information about the number of mbufs and amount of data in the
 536  * queue, and other fields allowing select() statements and notification
 537  * on data availability to be implemented.
 538  *
 539  * Data stored in a socket buffer is maintained as a list of records.
 540  * Each record is a list of mbufs chained together with the m_next
 541  * field.  Records are chained together with the m_nextpkt field. The upper
 542  * level routine soreceive() expects the following conventions to be
 543  * observed when placing information in the receive buffer:
 544  *
 545  * 1. If the protocol requires each message be preceded by the sender's
 546  *    name, then a record containing that name must be present before
 547  *    any associated data (mbuf's must be of type MT_SONAME).
 548  * 2. If the protocol supports the exchange of ``access rights'' (really
 549  *    just additional data associated with the message), and there are
 550  *    ``rights'' to be received, then a record containing this data
 551  *    should be present (mbuf's must be of type MT_RIGHTS).
 552  * 3. If a name or rights record exists, then it must be followed by
 553  *    a data record, perhaps of zero length.
 554  *
 555  * Before using a new socket structure it is first necessary to reserve
 556  * buffer space to the socket, by calling sbreserve().  This should commit
 557  * some of the available buffer space in the system buffer pool for the
 558  * socket (currently, it does nothing but enforce limits).  The space
 559  * should be released by calling sbrelease() when the socket is destroyed.
 560  */
 561
 562 int
 563 soreserve(so, sndcc, rcvcc)
 564         register struct socket *so;
 565         u_long sndcc, rcvcc;
 566 {
 567
 568         if (sbreserve(&so->so_snd, sndcc) == 0)
 569                 goto bad;
 570         if (sbreserve(&so->so_rcv, rcvcc) == 0)
 571                 goto bad2;
 572         if (so->so_rcv.sb_lowat == 0)
 573                 so->so_rcv.sb_lowat = 1;
 574         if (so->so_snd.sb_lowat == 0)
 575                 so->so_snd.sb_lowat = MCLBYTES;
 576         if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
 577                 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
 578         return (0);
 579 bad2:
 580 #ifdef __APPLE__
 581         selthreadclear(&so->so_snd.sb_sel);
 582 #endif
 583         sbrelease(&so->so_snd);
 584 bad:
 585         return (ENOBUFS);
 586 }
 587
 588 /*
 589  * Allot mbufs to a sockbuf.
 590  * Attempt to scale mbmax so that mbcnt doesn't become limiting
 591  * if buffering efficiency is near the normal case.
 592  */
 593 int
 594 sbreserve(sb, cc)
 595         struct sockbuf *sb;
 596         u_long cc;
 597 {
 598         if ((u_quad_t)cc > (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES))
 599                 return (0);
 600         sb->sb_hiwat = cc;
 601         sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
 602         if (sb->sb_lowat > sb->sb_hiwat)
 603                 sb->sb_lowat = sb->sb_hiwat;
 604         return (1);
 605 }
 606
 607 /*
 608  * Free mbufs held by a socket, and reserved mbuf space.
 609  */
 610  /*  WARNING needs to do selthreadclear() before calling this */
 611 void
 612 sbrelease(sb)
 613         struct sockbuf *sb;
 614 {
 615
 616         sbflush(sb);
 617         sb->sb_hiwat = 0;
 618         sb->sb_mbmax = 0;
 619
 620 }
 621
 622 /*
 623  * Routines to add and remove
 624  * data from an mbuf queue.
 625  *
 626  * The routines sbappend() or sbappendrecord() are normally called to
 627  * append new mbufs to a socket buffer, after checking that adequate
 628  * space is available, comparing the function sbspace() with the amount
 629  * of data to be added.  sbappendrecord() differs from sbappend() in
 630  * that data supplied is treated as the beginning of a new record.
 631  * To place a sender's address, optional access rights, and data in a
 632  * socket receive buffer, sbappendaddr() should be used.  To place
 633  * access rights and data in a socket receive buffer, sbappendrights()
 634  * should be used.  In either case, the new data begins a new record.
 635  * Note that unlike sbappend() and sbappendrecord(), these routines check
 636  * for the caller that there will be enough space to store the data.
 637  * Each fails if there is not enough space, or if it cannot find mbufs
 638  * to store additional information in.
 639  *
 640  * Reliable protocols may use the socket send buffer to hold data
 641  * awaiting acknowledgement.  Data is normally copied from a socket
 642  * send buffer in a protocol with m_copy for output to a peer,
 643  * and then removing the data from the socket buffer with sbdrop()
 644  * or sbdroprecord() when the data is acknowledged by the peer.
 645  */
 646
 647 /*
 648  * Append mbuf chain m to the last record in the
 649  * socket buffer sb.  The additional space associated
 650  * the mbuf chain is recorded in sb.  Empty mbufs are
 651  * discarded and mbufs are compacted where possible.
 652  */
 653 int
 654 sbappend(sb, m)
 655         struct sockbuf *sb;
 656         struct mbuf *m;
 657 {
 658         register struct mbuf *n, *sb_first;
 659         int result = 0;
 660         int error = 0;
 661         int     filtered = 0;
 662
 663
 664         KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_START), sb, m->m_len, 0, 0, 0);
 665
 666         if (m == 0)
 667                 return 0;
 668
 669 again:
 670         sb_first = n = sb->sb_mb;
 671         if (n) {
 672                 while (n->m_nextpkt)
 673                         n = n->m_nextpkt;
 674                 do {
 675                         if (n->m_flags & M_EOR) {
 676                                 result = sbappendrecord(sb, m); /* XXXXXX!!!! */
 677                                 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
 678                                 return result;
 679                         }
 680                 } while (n->m_next && (n = n->m_next));
 681         }
 682
 683         if (!filtered && (sb->sb_flags & SB_RECV) != 0) {
 684                 error = sflt_data_in(sb->sb_so, NULL, &m, NULL, 0, &filtered);
 685                 if (error) {
 686                         /* no data was appended, caller should not call sowakeup */
 687                         return 0;
 688                 }
 689
 690                 /*
 691                   If we any filters, the socket lock was dropped. n and sb_first
 692                   cached data from the socket buffer. This cache is not valid
 693                   since we dropped the lock. We must start over. Since filtered
 694                   is set we won't run through the filters a second time. We just
 695                   set n and sb_start again.
 696                 */
 697                 if (filtered)
 698                         goto again;
 699         }
 700
 701         result = sbcompress(sb, m, n);
 702
 703         KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
 704
 705         return result;
 706 }
 707
 708 #ifdef SOCKBUF_DEBUG
 709 void
 710 sbcheck(sb)
 711         register struct sockbuf *sb;
 712 {
 713         register struct mbuf *m;
 714         register struct mbuf *n = 0;
 715         register u_long len = 0, mbcnt = 0;
 716         lck_mtx_t *mutex_held;
 717
 718         if (sb->sb_so->so_proto->pr_getlock != NULL)
 719                 mutex_held = (*sb->sb_so->so_proto->pr_getlock)(sb->sb_so, 0);
 720         else
 721                 mutex_held = sb->sb_so->so_proto->pr_domain->dom_mtx;
 722
 723         lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
 724
 725         if (sbchecking == 0)
 726                 return;
 727
 728         for (m = sb->sb_mb; m; m = n) {
 729             n = m->m_nextpkt;
 730             for (; m; m = m->m_next) {
 731                 len += m->m_len;
 732                 mbcnt += MSIZE;
 733                 if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
 734                     mbcnt += m->m_ext.ext_size;
 735             }
 736         }
 737         if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
 738                 panic("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
 739                     mbcnt, sb->sb_mbcnt);
 740         }
 741 }
 742 #endif
 743
 744 /*
 745  * As above, except the mbuf chain
 746  * begins a new record.
 747  */
 748 int
 749 sbappendrecord(sb, m0)
 750         register struct sockbuf *sb;
 751         register struct mbuf *m0;
 752 {
 753         register struct mbuf *m;
 754         int result = 0;
 755
 756         if (m0 == 0)
 757                 return 0;
 758
 759         if ((sb->sb_flags & SB_RECV) != 0) {
 760                 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL, sock_data_filt_flag_record, NULL);
 761                 if (error != 0) {
 762                         if (error != EJUSTRETURN)
 763                                 m_freem(m0);
 764                         return 0;
 765                 }
 766         }
 767
 768         m = sb->sb_mb;
 769         if (m)
 770                 while (m->m_nextpkt)
 771                         m = m->m_nextpkt;
 772         /*
 773          * Put the first mbuf on the queue.
 774          * Note this permits zero length records.
 775          */
 776         sballoc(sb, m0);
 777         if (m)
 778                 m->m_nextpkt = m0;
 779         else
 780                 sb->sb_mb = m0;
 781         m = m0->m_next;
 782         m0->m_next = 0;
 783         if (m && (m0->m_flags & M_EOR)) {
 784                 m0->m_flags &= ~M_EOR;
 785                 m->m_flags |= M_EOR;
 786         }
 787         return sbcompress(sb, m, m0);
 788 }
 789
 790 /*
 791  * As above except that OOB data
 792  * is inserted at the beginning of the sockbuf,
 793  * but after any other OOB data.
 794  */
 795 int
 796 sbinsertoob(sb, m0)
 797         struct sockbuf *sb;
 798         struct mbuf *m0;
 799 {
 800         struct mbuf *m;
 801         struct mbuf **mp;
 802
 803         if (m0 == 0)
 804                 return 0;
 805
 806         if ((sb->sb_flags & SB_RECV) != 0) {
 807                 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL,
 808                                                                  sock_data_filt_flag_oob, NULL);
 809
 810                 if (error) {
 811                         if (error != EJUSTRETURN) {
 812                                 m_freem(m0);
 813                         }
 814                         return 0;
 815                 }
 816         }
 817
 818         for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) {
 819             m = *mp;
 820             again:
 821                 switch (m->m_type) {
 822
 823                 case MT_OOBDATA:
 824                         continue;               /* WANT next train */
 825
 826                 case MT_CONTROL:
 827                         m = m->m_next;
 828                         if (m)
 829                                 goto again;     /* inspect THIS train further */
 830                 }
 831                 break;
 832         }
 833         /*
 834          * Put the first mbuf on the queue.
 835          * Note this permits zero length records.
 836          */
 837         sballoc(sb, m0);
 838         m0->m_nextpkt = *mp;
 839         *mp = m0;
 840         m = m0->m_next;
 841         m0->m_next = 0;
 842         if (m && (m0->m_flags & M_EOR)) {
 843                 m0->m_flags &= ~M_EOR;
 844                 m->m_flags |= M_EOR;
 845         }
 846         return sbcompress(sb, m, m0);
 847 }
 848
 849 /*
 850  * Append address and data, and optionally, control (ancillary) data
 851  * to the receive queue of a socket.  If present,
 852  * m0 must include a packet header with total length.
 853  * Returns 0 if no space in sockbuf or insufficient mbufs.
 854  */
 855 static int
 856 sbappendaddr_internal(sb, asa, m0, control)
 857         register struct sockbuf *sb;
 858         struct sockaddr *asa;
 859         struct mbuf *m0, *control;
 860 {
 861         register struct mbuf *m, *n;
 862         int space = asa->sa_len;
 863
 864         if (m0 && (m0->m_flags & M_PKTHDR) == 0)
 865                 panic("sbappendaddr");
 866
 867         if (m0)
 868                 space += m0->m_pkthdr.len;
 869         for (n = control; n; n = n->m_next) {
 870                 space += n->m_len;
 871                 if (n->m_next == 0)     /* keep pointer to last control buf */
 872                         break;
 873         }
 874         if (space > sbspace(sb))
 875                 return (0);
 876         if (asa->sa_len > MLEN)
 877                 return (0);
 878         MGET(m, M_DONTWAIT, MT_SONAME);
 879         if (m == 0)
 880                 return (0);
 881         m->m_len = asa->sa_len;
 882         bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
 883         if (n)
 884                 n->m_next = m0;         /* concatenate data to control */
 885         else
 886                 control = m0;
 887         m->m_next = control;
 888         for (n = m; n; n = n->m_next)
 889                 sballoc(sb, n);
 890         n = sb->sb_mb;
 891         if (n) {
 892                 while (n->m_nextpkt)
 893                         n = n->m_nextpkt;
 894                 n->m_nextpkt = m;
 895         } else
 896                 sb->sb_mb = m;
 897         postevent(0,sb,EV_RWBYTES);
 898         return (1);
 899 }
 900
 901 int
 902 sbappendaddr(
 903         struct sockbuf* sb,
 904         struct sockaddr* asa,
 905         struct mbuf *m0,
 906         struct mbuf *control,
 907         int     *error_out)
 908 {
 909         int result = 0;
 910
 911         if (error_out) *error_out = 0;
 912
 913         if (m0 && (m0->m_flags & M_PKTHDR) == 0)
 914                 panic("sbappendaddrorfree");
 915
 916         /* Call socket data in filters */
 917         if ((sb->sb_flags & SB_RECV) != 0) {
 918                 int error;
 919                 error = sflt_data_in(sb->sb_so, asa, &m0, &control, 0, NULL);
 920                 if (error) {
 921                         if (error != EJUSTRETURN) {
 922                                 if (m0) m_freem(m0);
 923                                 if (control) m_freem(control);
 924                                 if (error_out) *error_out = error;
 925                         }
 926                         return 0;
 927                 }
 928         }
 929
 930         result = sbappendaddr_internal(sb, asa, m0, control);
 931         if (result == 0) {
 932                 if (m0) m_freem(m0);
 933                 if (control) m_freem(control);
 934                 if (error_out) *error_out = ENOBUFS;
 935         }
 936
 937         return result;
 938 }
 939
 940 static int
 941 sbappendcontrol_internal(sb, m0, control)
 942         struct sockbuf *sb;
 943         struct mbuf *control, *m0;
 944 {
 945         register struct mbuf *m, *n;
 946         int space = 0;
 947
 948         if (control == 0)
 949                 panic("sbappendcontrol");
 950
 951         for (m = control; ; m = m->m_next) {
 952                 space += m->m_len;
 953                 if (m->m_next == 0)
 954                         break;
 955         }
 956         n = m;                  /* save pointer to last control buffer */
 957         for (m = m0; m; m = m->m_next)
 958                 space += m->m_len;
 959         if (space > sbspace(sb))
 960                 return (0);
 961         n->m_next = m0;                 /* concatenate data to control */
 962         for (m = control; m; m = m->m_next)
 963                 sballoc(sb, m);
 964         n = sb->sb_mb;
 965         if (n) {
 966                 while (n->m_nextpkt)
 967                         n = n->m_nextpkt;
 968                 n->m_nextpkt = control;
 969         } else
 970                 sb->sb_mb = control;
 971         postevent(0,sb,EV_RWBYTES);
 972         return (1);
 973 }
 974
 975 int
 976 sbappendcontrol(
 977         struct sockbuf  *sb,
 978         struct mbuf             *m0,
 979         struct mbuf             *control,
 980         int                             *error_out)
 981 {
 982         int result = 0;
 983
 984         if (error_out) *error_out = 0;
 985
 986         if (sb->sb_flags & SB_RECV) {
 987                 int error;
 988                 error = sflt_data_in(sb->sb_so, NULL, &m0, &control, 0, NULL);
 989                 if (error) {
 990                         if (error != EJUSTRETURN) {
 991                                 if (m0) m_freem(m0);
 992                                 if (control) m_freem(control);
 993                                 if (error_out) *error_out = error;
 994                         }
 995                         return 0;
 996                 }
 997         }
 998
 999         result = sbappendcontrol_internal(sb, m0, control);
1000         if (result == 0) {
1001                 if (m0) m_freem(m0);
1002                 if (control) m_freem(control);
1003                 if (error_out) *error_out = ENOBUFS;
1004         }
1005
1006         return result;
1007 }
1008
1009 /*
1010  * Compress mbuf chain m into the socket
1011  * buffer sb following mbuf n.  If n
1012  * is null, the buffer is presumed empty.
1013  */
1014 static int
1015 sbcompress(sb, m, n)
1016         register struct sockbuf *sb;
1017         register struct mbuf *m, *n;
1018 {
1019         register int eor = 0;
1020         register struct mbuf *o;
1021
1022         while (m) {
1023                 eor |= m->m_flags & M_EOR;
1024                 if (m->m_len == 0 &&
1025                     (eor == 0 ||
1026                      (((o = m->m_next) || (o = n)) &&
1027                       o->m_type == m->m_type))) {
1028                         m = m_free(m);
1029                         continue;
1030                 }
1031                 if (n && (n->m_flags & M_EOR) == 0 &&
1032 #ifndef __APPLE__
1033                     M_WRITABLE(n) &&
1034 #endif
1035                     m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
1036                     m->m_len <= M_TRAILINGSPACE(n) &&
1037                     n->m_type == m->m_type) {
1038                         bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
1039                             (unsigned)m->m_len);
1040                         n->m_len += m->m_len;
1041                         sb->sb_cc += m->m_len;
1042                         m = m_free(m);
1043                         continue;
1044                 }
1045                 if (n)
1046                         n->m_next = m;
1047                 else
1048                         sb->sb_mb = m;
1049                 sballoc(sb, m);
1050                 n = m;
1051                 m->m_flags &= ~M_EOR;
1052                 m = m->m_next;
1053                 n->m_next = 0;
1054         }
1055         if (eor) {
1056                 if (n)
1057                         n->m_flags |= eor;
1058                 else
1059                         printf("semi-panic: sbcompress\n");
1060         }
1061         postevent(0,sb, EV_RWBYTES);
1062         return 1;
1063 }
1064
1065 /*
1066  * Free all mbufs in a sockbuf.
1067  * Check that all resources are reclaimed.
1068  */
1069 void
1070 sbflush(sb)
1071         register struct sockbuf *sb;
1072 {
1073         if (sb->sb_so == NULL)
1074                 panic ("sbflush sb->sb_so already null sb=%x\n", sb);
1075         (void)sblock(sb, M_WAIT);
1076         while (sb->sb_mbcnt) {
1077                 /*
1078                  * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
1079                  * we would loop forever. Panic instead.
1080                  */
1081                 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
1082                         break;
1083                 sbdrop(sb, (int)sb->sb_cc);
1084         }
1085         if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt || sb->sb_so == NULL)
1086                 panic("sbflush: cc %ld || mb %p || mbcnt %ld sb_so=%x", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt, sb->sb_so);
1087
1088         postevent(0, sb, EV_RWBYTES);
1089         sbunlock(sb, 1);        /* keep socket locked */
1090
1091 }
1092
1093 /*
1094  * Drop data from (the front of) a sockbuf.
1095  * use m_freem_list to free the mbuf structures
1096  * under a single lock... this is done by pruning
1097  * the top of the tree from the body by keeping track
1098  * of where we get to in the tree and then zeroing the
1099  * two pertinent pointers m_nextpkt and m_next
1100  * the socket buffer is then updated to point at the new
1101  * top of the tree and the pruned area is released via
1102  * m_freem_list.
1103  */
1104 void
1105 sbdrop(sb, len)
1106         register struct sockbuf *sb;
1107         register int len;
1108 {
1109         register struct mbuf *m, *free_list, *ml;
1110         struct mbuf *next, *last;
1111
1112         KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_START), sb, len, 0, 0, 0);
1113
1114         next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
1115         free_list = last = m;
1116         ml = (struct mbuf *)0;
1117
1118         while (len > 0) {
1119                 if (m == 0) {
1120                   if (next == 0) {
1121                     /* temporarily replacing this panic with printf because
1122                      * it occurs occasionally when closing a socket when there
1123                      * is no harm in ignoring it.  This problem will be investigated
1124                      * further.
1125                      */
1126                     /* panic("sbdrop"); */
1127                     printf("sbdrop - count not zero\n");
1128                     len = 0;
1129                     /* zero the counts. if we have no mbufs, we have no data (PR-2986815) */
1130                     sb->sb_cc = 0;
1131                     sb->sb_mbcnt = 0;
1132                     break;
1133                   }
1134                   m = last = next;
1135                   next = m->m_nextpkt;
1136                   continue;
1137                 }
1138                 if (m->m_len > len) {
1139                         m->m_len -= len;
1140                         m->m_data += len;
1141                         sb->sb_cc -= len;
1142                         break;
1143                 }
1144                 len -= m->m_len;
1145                 sbfree(sb, m);
1146
1147                 ml = m;
1148                 m = m->m_next;
1149         }
1150         while (m && m->m_len == 0) {
1151                 sbfree(sb, m);
1152
1153                 ml = m;
1154                 m = m->m_next;
1155         }
1156         if (ml) {
1157                 ml->m_next = (struct mbuf *)0;
1158                 last->m_nextpkt = (struct mbuf *)0;
1159                 m_freem_list(free_list);
1160         }
1161         if (m) {
1162                 sb->sb_mb = m;
1163                 m->m_nextpkt = next;
1164         } else
1165                 sb->sb_mb = next;
1166
1167         postevent(0, sb, EV_RWBYTES);
1168
1169         KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_END), sb, 0, 0, 0, 0);
1170 }
1171
1172 /*
1173  * Drop a record off the front of a sockbuf
1174  * and move the next record to the front.
1175  */
1176 void
1177 sbdroprecord(sb)
1178         register struct sockbuf *sb;
1179 {
1180         register struct mbuf *m, *mn;
1181
1182         m = sb->sb_mb;
1183         if (m) {
1184                 sb->sb_mb = m->m_nextpkt;
1185                 do {
1186                         sbfree(sb, m);
1187                         MFREE(m, mn);
1188                         m = mn;
1189                 } while (m);
1190         }
1191         postevent(0, sb, EV_RWBYTES);
1192 }
1193
1194 /*
1195  * Create a "control" mbuf containing the specified data
1196  * with the specified type for presentation on a socket buffer.
1197  */
1198 struct mbuf *
1199 sbcreatecontrol(p, size, type, level)
1200         caddr_t p;
1201         register int size;
1202         int type, level;
1203 {
1204         register struct cmsghdr *cp;
1205         struct mbuf *m;
1206
1207         if (CMSG_SPACE((u_int)size) > MLEN)
1208                 return ((struct mbuf *) NULL);
1209         if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
1210                 return ((struct mbuf *) NULL);
1211         cp = mtod(m, struct cmsghdr *);
1212         /* XXX check size? */
1213         (void)memcpy(CMSG_DATA(cp), p, size);
1214         m->m_len = CMSG_SPACE(size);
1215         cp->cmsg_len = CMSG_LEN(size);
1216         cp->cmsg_level = level;
1217         cp->cmsg_type = type;
1218         return (m);
1219 }
1220
1221 /*
1222  * Some routines that return EOPNOTSUPP for entry points that are not
1223  * supported by a protocol.  Fill in as needed.
1224  */
1225 int
1226 pru_abort_notsupp(struct socket *so)
1227 {
1228         return EOPNOTSUPP;
1229 }
1230
1231
1232 int
1233 pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
1234 {
1235         return EOPNOTSUPP;
1236 }
1237
1238 int
1239 pru_attach_notsupp(struct socket *so, int proto, struct proc *p)
1240 {
1241         return EOPNOTSUPP;
1242 }
1243
1244 int
1245 pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1246 {
1247         return EOPNOTSUPP;
1248 }
1249
1250 int
1251 pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1252 {
1253         return EOPNOTSUPP;
1254 }
1255
1256 int
1257 pru_connect2_notsupp(struct socket *so1, struct socket *so2)
1258 {
1259         return EOPNOTSUPP;
1260 }
1261
1262 int
1263 pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
1264                     struct ifnet *ifp, struct proc *p)
1265 {
1266         return EOPNOTSUPP;
1267 }
1268
1269 int
1270 pru_detach_notsupp(struct socket *so)
1271 {
1272         return EOPNOTSUPP;
1273 }
1274
1275 int
1276 pru_disconnect_notsupp(struct socket *so)
1277 {
1278         return EOPNOTSUPP;
1279 }
1280
1281 int
1282 pru_listen_notsupp(struct socket *so, struct proc *p)
1283 {
1284         return EOPNOTSUPP;
1285 }
1286
1287 int
1288 pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
1289 {
1290         return EOPNOTSUPP;
1291 }
1292
1293 int
1294 pru_rcvd_notsupp(struct socket *so, int flags)
1295 {
1296         return EOPNOTSUPP;
1297 }
1298
1299 int
1300 pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
1301 {
1302         return EOPNOTSUPP;
1303 }
1304
1305 int
1306 pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
1307                  struct sockaddr *addr, struct mbuf *control,
1308                  struct proc *p)
1309
1310 {
1311         return EOPNOTSUPP;
1312 }
1313
1314
1315 /*
1316  * This isn't really a ``null'' operation, but it's the default one
1317  * and doesn't do anything destructive.
1318  */
1319 int
1320 pru_sense_null(struct socket *so, struct stat *sb)
1321 {
1322         sb->st_blksize = so->so_snd.sb_hiwat;
1323         return 0;
1324 }
1325
1326
1327 int     pru_sosend_notsupp(struct socket *so, struct sockaddr *addr,
1328                    struct uio *uio, struct mbuf *top,
1329                    struct mbuf *control, int flags)
1330
1331 {
1332     return EOPNOTSUPP;
1333 }
1334
1335 int     pru_soreceive_notsupp(struct socket *so,
1336                       struct sockaddr **paddr,
1337                       struct uio *uio, struct mbuf **mp0,
1338                       struct mbuf **controlp, int *flagsp)
1339 {
1340     return EOPNOTSUPP;
1341 }
1342
1343 int
1344
1345 pru_shutdown_notsupp(struct socket *so)
1346 {
1347         return EOPNOTSUPP;
1348 }
1349
1350 int
1351 pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
1352 {
1353         return EOPNOTSUPP;
1354 }
1355
1356 int     pru_sosend(struct socket *so, struct sockaddr *addr,
1357                    struct uio *uio, struct mbuf *top,
1358                    struct mbuf *control, int flags)
1359 {
1360         return EOPNOTSUPP;
1361 }
1362
1363 int     pru_soreceive(struct socket *so,
1364                       struct sockaddr **paddr,
1365                       struct uio *uio, struct mbuf **mp0,
1366                       struct mbuf **controlp, int *flagsp)
1367 {
1368         return EOPNOTSUPP;
1369 }
1370
1371
1372 int
1373 pru_sopoll_notsupp(__unused struct socket *so, __unused int events,
1374                    __unused kauth_cred_t cred, __unused void *wql)
1375 {
1376     return EOPNOTSUPP;
1377 }
1378
1379
1380 #ifdef __APPLE__
1381 /*
1382  * The following are macros on BSD and functions on Darwin
1383  */
1384
1385 /*
1386  * Do we need to notify the other side when I/O is possible?
1387  */
1388
1389 int
1390 sb_notify(struct sockbuf *sb)
1391 {
1392         return ((sb->sb_flags & (SB_WAIT|SB_SEL|SB_ASYNC|SB_UPCALL|SB_KNOTE)) != 0);
1393 }
1394
1395 /*
1396  * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
1397  * This is problematical if the fields are unsigned, as the space might
1398  * still be negative (cc > hiwat or mbcnt > mbmax).  Should detect
1399  * overflow and return 0.  Should use "lmin" but it doesn't exist now.
1400  */
1401 long
1402 sbspace(struct sockbuf *sb)
1403 {
1404     return ((long) imin((int)(sb->sb_hiwat - sb->sb_cc),
1405          (int)(sb->sb_mbmax - sb->sb_mbcnt)));
1406 }
1407
1408 /* do we have to send all at once on a socket? */
1409 int
1410 sosendallatonce(struct socket *so)
1411 {
1412     return (so->so_proto->pr_flags & PR_ATOMIC);
1413 }
1414
1415 /* can we read something from so? */
1416 int
1417 soreadable(struct socket *so)
1418 {
1419     return (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
1420         (so->so_state & SS_CANTRCVMORE) ||
1421         so->so_comp.tqh_first || so->so_error);
1422 }
1423
1424 /* can we write something to so? */
1425
1426 int
1427 sowriteable(struct socket *so)
1428 {
1429     return ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat &&
1430         ((so->so_state&SS_ISCONNECTED) ||
1431           (so->so_proto->pr_flags&PR_CONNREQUIRED)==0)) ||
1432      (so->so_state & SS_CANTSENDMORE) ||
1433      so->so_error);
1434 }
1435
1436 /* adjust counters in sb reflecting allocation of m */
1437
1438 void
1439 sballoc(struct sockbuf *sb, struct mbuf *m)
1440 {
1441         sb->sb_cc += m->m_len;
1442         sb->sb_mbcnt += MSIZE;
1443         if (m->m_flags & M_EXT)
1444                 sb->sb_mbcnt += m->m_ext.ext_size;
1445 }
1446
1447 /* adjust counters in sb reflecting freeing of m */
1448 void
1449 sbfree(struct sockbuf *sb, struct mbuf *m)
1450 {
1451         sb->sb_cc -= m->m_len;
1452         sb->sb_mbcnt -= MSIZE;
1453         if (m->m_flags & M_EXT)
1454                 sb->sb_mbcnt -= m->m_ext.ext_size;
1455 }
1456
1457 /*
1458  * Set lock on sockbuf sb; sleep if lock is already held.
1459  * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
1460  * Returns error without lock if sleep is interrupted.
1461  */
1462 int
1463 sblock(struct sockbuf *sb, int wf)
1464 {
1465         return(sb->sb_flags & SB_LOCK ?
1466                 ((wf == M_WAIT) ? sb_lock(sb) : EWOULDBLOCK) :
1467                 (sb->sb_flags |= SB_LOCK), 0);
1468 }
1469
1470 /* release lock on sockbuf sb */
1471 void
1472 sbunlock(struct sockbuf *sb, int keeplocked)
1473 {
1474         struct socket *so = sb->sb_so;
1475         int lr, lr_saved;
1476         lck_mtx_t *mutex_held;
1477
1478 #ifdef __ppc__
1479         __asm__ volatile("mflr %0" : "=r" (lr));
1480         lr_saved = lr;
1481 #endif
1482         sb->sb_flags &= ~SB_LOCK;
1483
1484         if (so->so_proto->pr_getlock != NULL)
1485                 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
1486         else
1487                 mutex_held = so->so_proto->pr_domain->dom_mtx;
1488
1489         if (keeplocked == 0)
1490                 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
1491
1492         if (sb->sb_flags & SB_WANT) {
1493                 sb->sb_flags &= ~SB_WANT;
1494                 if (so->so_usecount < 0)
1495                         panic("sbunlock: b4 wakeup so=%x ref=%d lr=%x sb_flags=%x\n", sb->sb_so, so->so_usecount, lr_saved, sb->sb_flags);
1496
1497                 wakeup((caddr_t)&(sb)->sb_flags);
1498         }
1499         if (keeplocked == 0) {  /* unlock on exit */
1500                 so->so_usecount--;
1501                 if (so->so_usecount < 0)
1502                         panic("sbunlock: unlock on exit so=%x lr=%x sb_flags=%x\n", so, so->so_usecount,lr_saved, sb->sb_flags);
1503                 so->reserved4= lr_saved;
1504                 lck_mtx_unlock(mutex_held);
1505         }
1506 }
1507
1508 void
1509 sorwakeup(struct socket * so)
1510 {
1511   if (sb_notify(&so->so_rcv))
1512         sowakeup(so, &so->so_rcv);
1513 }
1514
1515 void
1516 sowwakeup(struct socket * so)
1517 {
1518   if (sb_notify(&so->so_snd))
1519         sowakeup(so, &so->so_snd);
1520 }
1521 #endif __APPLE__
1522
1523 /*
1524  * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
1525  */
1526 struct sockaddr *
1527 dup_sockaddr(sa, canwait)
1528         struct sockaddr *sa;
1529         int canwait;
1530 {
1531         struct sockaddr *sa2;
1532
1533         MALLOC(sa2, struct sockaddr *, sa->sa_len, M_SONAME,
1534                canwait ? M_WAITOK : M_NOWAIT);
1535         if (sa2)
1536                 bcopy(sa, sa2, sa->sa_len);
1537         return sa2;
1538 }
1539
1540 /*
1541  * Create an external-format (``xsocket'') structure using the information
1542  * in the kernel-format socket structure pointed to by so.  This is done
1543  * to reduce the spew of irrelevant information over this interface,
1544  * to isolate user code from changes in the kernel structure, and
1545  * potentially to provide information-hiding if we decide that
1546  * some of this information should be hidden from users.
1547  */
1548 void
1549 sotoxsocket(struct socket *so, struct xsocket *xso)
1550 {
1551         xso->xso_len = sizeof *xso;
1552         xso->xso_so = so;
1553         xso->so_type = so->so_type;
1554         xso->so_options = so->so_options;
1555         xso->so_linger = so->so_linger;
1556         xso->so_state = so->so_state;
1557         xso->so_pcb = so->so_pcb;
1558         if (so->so_proto) {
1559                 xso->xso_protocol = so->so_proto->pr_protocol;
1560                 xso->xso_family = so->so_proto->pr_domain->dom_family;
1561         }
1562         else
1563                 xso->xso_protocol = xso->xso_family = 0;
1564         xso->so_qlen = so->so_qlen;
1565         xso->so_incqlen = so->so_incqlen;
1566         xso->so_qlimit = so->so_qlimit;
1567         xso->so_timeo = so->so_timeo;
1568         xso->so_error = so->so_error;
1569         xso->so_pgid = so->so_pgid;
1570         xso->so_oobmark = so->so_oobmark;
1571         sbtoxsockbuf(&so->so_snd, &xso->so_snd);
1572         sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
1573         xso->so_uid = so->so_uid;
1574 }
1575
1576 /*
1577  * This does the same for sockbufs.  Note that the xsockbuf structure,
1578  * since it is always embedded in a socket, does not include a self
1579  * pointer nor a length.  We make this entry point public in case
1580  * some other mechanism needs it.
1581  */
1582 void
1583 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
1584 {
1585         xsb->sb_cc = sb->sb_cc;
1586         xsb->sb_hiwat = sb->sb_hiwat;
1587         xsb->sb_mbcnt = sb->sb_mbcnt;
1588         xsb->sb_mbmax = sb->sb_mbmax;
1589         xsb->sb_lowat = sb->sb_lowat;
1590         xsb->sb_flags = sb->sb_flags;
1591         xsb->sb_timeo = (u_long)(sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick;
1592         if (xsb->sb_timeo == 0 && sb->sb_timeo.tv_usec != 0)
1593                 xsb->sb_timeo = 1;
1594 }
1595
1596 /*
1597  * Here is the definition of some of the basic objects in the kern.ipc
1598  * branch of the MIB.
1599  */
1600 SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
1601
1602 /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
1603 static int dummy;
1604 SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
1605
1606 SYSCTL_INT(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLFLAG_RW,
1607     &sb_max, 0, "Maximum socket buffer size");
1608 SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD,
1609     &maxsockets, 0, "Maximum number of sockets avaliable");
1610 SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
1611            &sb_efficiency, 0, "");
1612 SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, &nmbclusters, 0, "");
1613