2 * Copyright (c) 2006 Apple Computer, Inc. All Rights Reserved.
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
30 /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
31 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
33 * Copyright (c) 1982, 1986, 1988, 1990, 1993
34 * The Regents of the University of California. All rights reserved.
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
65 * $FreeBSD: src/sys/kern/uipc_socket2.c,v 1.55.2.9 2001/07/26 18:53:02 peter Exp $
68 #include <sys/param.h>
69 #include <sys/systm.h>
70 #include <sys/domain.h>
71 #include <sys/kernel.h>
72 #include <sys/proc_internal.h>
73 #include <sys/kauth.h>
74 #include <sys/malloc.h>
76 #include <sys/protosw.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <sys/signalvar.h>
81 #include <sys/sysctl.h>
83 #include <kern/locks.h>
84 #include <net/route.h>
85 #include <netinet/in.h>
86 #include <netinet/in_pcb.h>
87 #include <sys/kdebug.h>
89 #define DBG_FNC_SBDROP NETDBG_CODE(DBG_NETSOCK, 4)
90 #define DBG_FNC_SBAPPEND NETDBG_CODE(DBG_NETSOCK, 5)
94 * Primitive routines for operating on sockets and socket buffers
97 u_long sb_max
= SB_MAX
; /* XXX should be static */
99 static u_long sb_efficiency
= 8; /* parameter for sbreserve() */
102 * Procedures to manipulate state flags of socket
103 * and do appropriate wakeups. Normal sequence from the
104 * active (originating) side is that soisconnecting() is
105 * called during processing of connect() call,
106 * resulting in an eventual call to soisconnected() if/when the
107 * connection is established. When the connection is torn down
108 * soisdisconnecting() is called during processing of disconnect() call,
109 * and soisdisconnected() is called when the connection to the peer
110 * is totally severed. The semantics of these routines are such that
111 * connectionless protocols can call soisconnected() and soisdisconnected()
112 * only, bypassing the in-progress calls when setting up a ``connection''
115 * From the passive side, a socket is created with
116 * two queues of sockets: so_incomp for connections in progress
117 * and so_comp for connections already made and awaiting user acceptance.
118 * As a protocol is preparing incoming connections, it creates a socket
119 * structure queued on so_incomp by calling sonewconn(). When the connection
120 * is established, soisconnected() is called, and transfers the
121 * socket structure to so_comp, making it available to accept().
123 * If a socket is closed with sockets on either
124 * so_incomp or so_comp, these sockets are dropped.
126 * If higher level protocols are implemented in
127 * the kernel, the wakeups done here will sometimes
128 * cause software-interrupt process scheduling.
132 register struct socket
*so
;
135 so
->so_state
&= ~(SS_ISCONNECTED
|SS_ISDISCONNECTING
);
136 so
->so_state
|= SS_ISCONNECTING
;
138 sflt_notify(so
, sock_evt_connecting
, NULL
);
145 struct socket
*head
= so
->so_head
;
147 so
->so_state
&= ~(SS_ISCONNECTING
|SS_ISDISCONNECTING
|SS_ISCONFIRMING
);
148 so
->so_state
|= SS_ISCONNECTED
;
150 sflt_notify(so
, sock_evt_connected
, NULL
);
152 if (head
&& (so
->so_state
& SS_INCOMP
)) {
153 so
->so_state
&= ~SS_INCOMP
;
154 so
->so_state
|= SS_COMP
;
155 if (head
->so_proto
->pr_getlock
!= NULL
) {
156 socket_unlock(so
, 0);
157 socket_lock(head
, 1);
159 postevent(head
, 0, EV_RCONN
);
160 TAILQ_REMOVE(&head
->so_incomp
, so
, so_list
);
162 TAILQ_INSERT_TAIL(&head
->so_comp
, so
, so_list
);
164 wakeup_one((caddr_t
)&head
->so_timeo
);
165 if (head
->so_proto
->pr_getlock
!= NULL
) {
166 socket_unlock(head
, 1);
170 postevent(so
, 0, EV_WCONN
);
171 wakeup((caddr_t
)&so
->so_timeo
);
178 soisdisconnecting(so
)
179 register struct socket
*so
;
181 so
->so_state
&= ~SS_ISCONNECTING
;
182 so
->so_state
|= (SS_ISDISCONNECTING
|SS_CANTRCVMORE
|SS_CANTSENDMORE
);
183 sflt_notify(so
, sock_evt_disconnecting
, NULL
);
184 wakeup((caddr_t
)&so
->so_timeo
);
191 register struct socket
*so
;
193 so
->so_state
&= ~(SS_ISCONNECTING
|SS_ISCONNECTED
|SS_ISDISCONNECTING
);
194 so
->so_state
|= (SS_CANTRCVMORE
|SS_CANTSENDMORE
|SS_ISDISCONNECTED
);
195 sflt_notify(so
, sock_evt_disconnected
, NULL
);
196 wakeup((caddr_t
)&so
->so_timeo
);
202 * Return a random connection that hasn't been serviced yet and
203 * is eligible for discard. There is a one in qlen chance that
204 * we will return a null, saying that there are no dropable
205 * requests. In this case, the protocol specific code should drop
206 * the new request. This insures fairness.
208 * This may be used in conjunction with protocol specific queue
209 * congestion routines.
213 register struct socket
*head
;
215 struct socket
*so
, *sonext
= NULL
;
216 unsigned int i
, j
, qlen
;
218 static struct timeval old_runtime
;
219 static unsigned int cur_cnt
, old_cnt
;
223 if ((i
= (tv
.tv_sec
- old_runtime
.tv_sec
)) != 0) {
225 old_cnt
= cur_cnt
/ i
;
229 so
= TAILQ_FIRST(&head
->so_incomp
);
233 qlen
= head
->so_incqlen
;
234 if (++cur_cnt
> qlen
|| old_cnt
> qlen
) {
235 rnd
= (314159 * rnd
+ 66329) & 0xffff;
236 j
= ((qlen
+ 1) * rnd
) >> 16;
239 // if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
241 sonext
= TAILQ_NEXT(so
, so_list
);
242 // in_pcb_check_state(so->so_pcb, WNT_RELEASE, 0);
243 socket_unlock(so
, 1);
248 // if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) == WNT_STOPUSING)
255 * When an attempt at a new connection is noted on a socket
256 * which accepts connections, sonewconn is called. If the
257 * connection is possible (subject to space constraints, etc.)
258 * then we allocate a new structure, propoerly linked into the
259 * data structure of the original socket, and return this.
260 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
262 static struct socket
*
263 sonewconn_internal(head
, connstatus
)
264 register struct socket
*head
;
268 register struct socket
*so
;
269 lck_mtx_t
*mutex_held
;
271 if (head
->so_proto
->pr_getlock
!= NULL
)
272 mutex_held
= (*head
->so_proto
->pr_getlock
)(head
, 0);
274 mutex_held
= head
->so_proto
->pr_domain
->dom_mtx
;
275 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
277 if (head
->so_qlen
> 3 * head
->so_qlimit
/ 2)
278 return ((struct socket
*)0);
279 so
= soalloc(1, head
->so_proto
->pr_domain
->dom_family
, head
->so_type
);
281 return ((struct socket
*)0);
282 /* check if head was closed during the soalloc */
283 if (head
->so_proto
== NULL
) {
285 return ((struct socket
*)0);
289 so
->so_type
= head
->so_type
;
290 so
->so_options
= head
->so_options
&~ SO_ACCEPTCONN
;
291 so
->so_linger
= head
->so_linger
;
292 so
->so_state
= head
->so_state
| SS_NOFDREF
;
293 so
->so_proto
= head
->so_proto
;
294 so
->so_timeo
= head
->so_timeo
;
295 so
->so_pgid
= head
->so_pgid
;
296 so
->so_uid
= head
->so_uid
;
300 so
->so_rcv
.sb_flags
|= SB_RECV
; /* XXX */
301 so
->so_rcv
.sb_so
= so
->so_snd
.sb_so
= so
;
302 TAILQ_INIT(&so
->so_evlist
);
305 if (soreserve(so
, head
->so_snd
.sb_hiwat
, head
->so_rcv
.sb_hiwat
)) {
308 return ((struct socket
*)0);
312 * Must be done with head unlocked to avoid deadlock for protocol with per socket mutexes.
314 if (head
->so_proto
->pr_unlock
)
315 socket_unlock(head
, 0);
316 if (((*so
->so_proto
->pr_usrreqs
->pru_attach
)(so
, 0, NULL
) != 0) || error
) {
319 if (head
->so_proto
->pr_unlock
)
320 socket_lock(head
, 0);
321 return ((struct socket
*)0);
323 if (head
->so_proto
->pr_unlock
)
324 socket_lock(head
, 0);
326 so
->so_proto
->pr_domain
->dom_refs
++;
330 TAILQ_INSERT_TAIL(&head
->so_comp
, so
, so_list
);
331 so
->so_state
|= SS_COMP
;
333 TAILQ_INSERT_TAIL(&head
->so_incomp
, so
, so_list
);
334 so
->so_state
|= SS_INCOMP
;
340 /* Attach socket filters for this protocol */
344 so
->so_state
|= connstatus
;
346 wakeup((caddr_t
)&head
->so_timeo
);
356 const struct sockaddr
*from
)
359 struct socket_filter_entry
*filter
;
363 for (filter
= head
->so_filt
; filter
&& (error
== 0);
364 filter
= filter
->sfe_next_onsocket
) {
365 if (filter
->sfe_filter
->sf_filter
.sf_connect_in
) {
369 socket_unlock(head
, 0);
371 error
= filter
->sfe_filter
->sf_filter
.sf_connect_in(
372 filter
->sfe_cookie
, head
, from
);
376 socket_lock(head
, 0);
384 return sonewconn_internal(head
, connstatus
);
388 * Socantsendmore indicates that no more data will be sent on the
389 * socket; it would normally be applied to a socket when the user
390 * informs the system that no more data is to be sent, by the protocol
391 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
392 * will be received, and will normally be applied to the socket by a
393 * protocol when it detects that the peer will send no more data.
394 * Data queued for reading in the socket may yet be read.
401 so
->so_state
|= SS_CANTSENDMORE
;
402 sflt_notify(so
, sock_evt_cantsendmore
, NULL
);
410 so
->so_state
|= SS_CANTRCVMORE
;
411 sflt_notify(so
, sock_evt_cantrecvmore
, NULL
);
416 * Wait for data to arrive at/drain from a socket buffer.
422 int error
= 0, lr
, lr_saved
;
423 struct socket
*so
= sb
->sb_so
;
424 lck_mtx_t
*mutex_held
;
428 __asm__
volatile("mflr %0" : "=r" (lr
));
433 if (so
->so_proto
->pr_getlock
!= NULL
)
434 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
436 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
438 sb
->sb_flags
|= SB_WAIT
;
440 if (so
->so_usecount
< 1)
441 panic("sbwait: so=%x refcount=%d\n", so
, so
->so_usecount
);
442 ts
.tv_sec
= sb
->sb_timeo
.tv_sec
;
443 ts
.tv_nsec
= sb
->sb_timeo
.tv_usec
* 1000;
444 error
= msleep((caddr_t
)&sb
->sb_cc
, mutex_held
,
445 (sb
->sb_flags
& SB_NOINTR
) ? PSOCK
: PSOCK
| PCATCH
, "sbwait",
448 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
450 if (so
->so_usecount
< 1)
451 panic("sbwait: so=%x refcount=%d\n", so
, so
->so_usecount
);
453 if ((so
->so_state
& SS_DRAINING
)) {
461 * Lock a sockbuf already known to be locked;
462 * return any error returned from sleep (EINTR).
466 register struct sockbuf
*sb
;
468 struct socket
*so
= sb
->sb_so
;
469 lck_mtx_t
* mutex_held
;
470 int error
= 0, lr
, lr_saved
;
473 __asm__
volatile("mflr %0" : "=r" (lr
));
478 panic("sb_lock: null so back pointer sb=%x\n", sb
);
480 while (sb
->sb_flags
& SB_LOCK
) {
481 sb
->sb_flags
|= SB_WANT
;
482 if (so
->so_proto
->pr_getlock
!= NULL
)
483 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
485 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
486 if (so
->so_usecount
< 1)
487 panic("sb_lock: so=%x refcount=%d\n", so
, so
->so_usecount
);
488 error
= msleep((caddr_t
)&sb
->sb_flags
, mutex_held
,
489 (sb
->sb_flags
& SB_NOINTR
) ? PSOCK
: PSOCK
| PCATCH
, "sblock", 0);
490 if (so
->so_usecount
< 1)
491 panic("sb_lock: 2 so=%x refcount=%d\n", so
, so
->so_usecount
);
495 sb
->sb_flags
|= SB_LOCK
;
500 * Wakeup processes waiting on a socket buffer.
501 * Do asynchronous notification via SIGIO
502 * if the socket has the SS_ASYNC flag set.
506 register struct socket
*so
;
507 register struct sockbuf
*sb
;
509 struct proc
*p
= current_proc();
510 sb
->sb_flags
&= ~SB_SEL
;
511 selwakeup(&sb
->sb_sel
);
512 if (sb
->sb_flags
& SB_WAIT
) {
513 sb
->sb_flags
&= ~SB_WAIT
;
514 wakeup((caddr_t
)&sb
->sb_cc
);
516 if (so
->so_state
& SS_ASYNC
) {
518 gsignal(-so
->so_pgid
, SIGIO
);
519 else if (so
->so_pgid
> 0 && (p
= pfind(so
->so_pgid
)) != 0)
522 if (sb
->sb_flags
& SB_KNOTE
) {
523 KNOTE(&sb
->sb_sel
.si_note
, SO_FILT_HINT_LOCKED
);
525 if (sb
->sb_flags
& SB_UPCALL
) {
526 socket_unlock(so
, 0);
527 (*so
->so_upcall
)(so
, so
->so_upcallarg
, M_DONTWAIT
);
533 * Socket buffer (struct sockbuf) utility routines.
535 * Each socket contains two socket buffers: one for sending data and
536 * one for receiving data. Each buffer contains a queue of mbufs,
537 * information about the number of mbufs and amount of data in the
538 * queue, and other fields allowing select() statements and notification
539 * on data availability to be implemented.
541 * Data stored in a socket buffer is maintained as a list of records.
542 * Each record is a list of mbufs chained together with the m_next
543 * field. Records are chained together with the m_nextpkt field. The upper
544 * level routine soreceive() expects the following conventions to be
545 * observed when placing information in the receive buffer:
547 * 1. If the protocol requires each message be preceded by the sender's
548 * name, then a record containing that name must be present before
549 * any associated data (mbuf's must be of type MT_SONAME).
550 * 2. If the protocol supports the exchange of ``access rights'' (really
551 * just additional data associated with the message), and there are
552 * ``rights'' to be received, then a record containing this data
553 * should be present (mbuf's must be of type MT_RIGHTS).
554 * 3. If a name or rights record exists, then it must be followed by
555 * a data record, perhaps of zero length.
557 * Before using a new socket structure it is first necessary to reserve
558 * buffer space to the socket, by calling sbreserve(). This should commit
559 * some of the available buffer space in the system buffer pool for the
560 * socket (currently, it does nothing but enforce limits). The space
561 * should be released by calling sbrelease() when the socket is destroyed.
565 soreserve(so
, sndcc
, rcvcc
)
566 register struct socket
*so
;
570 if (sbreserve(&so
->so_snd
, sndcc
) == 0)
572 if (sbreserve(&so
->so_rcv
, rcvcc
) == 0)
574 if (so
->so_rcv
.sb_lowat
== 0)
575 so
->so_rcv
.sb_lowat
= 1;
576 if (so
->so_snd
.sb_lowat
== 0)
577 so
->so_snd
.sb_lowat
= MCLBYTES
;
578 if (so
->so_snd
.sb_lowat
> so
->so_snd
.sb_hiwat
)
579 so
->so_snd
.sb_lowat
= so
->so_snd
.sb_hiwat
;
583 selthreadclear(&so
->so_snd
.sb_sel
);
585 sbrelease(&so
->so_snd
);
591 * Allot mbufs to a sockbuf.
592 * Attempt to scale mbmax so that mbcnt doesn't become limiting
593 * if buffering efficiency is near the normal case.
600 if ((u_quad_t
)cc
> (u_quad_t
)sb_max
* MCLBYTES
/ (MSIZE
+ MCLBYTES
))
603 sb
->sb_mbmax
= min(cc
* sb_efficiency
, sb_max
);
604 if (sb
->sb_lowat
> sb
->sb_hiwat
)
605 sb
->sb_lowat
= sb
->sb_hiwat
;
610 * Free mbufs held by a socket, and reserved mbuf space.
612 /* WARNING needs to do selthreadclear() before calling this */
625 * Routines to add and remove
626 * data from an mbuf queue.
628 * The routines sbappend() or sbappendrecord() are normally called to
629 * append new mbufs to a socket buffer, after checking that adequate
630 * space is available, comparing the function sbspace() with the amount
631 * of data to be added. sbappendrecord() differs from sbappend() in
632 * that data supplied is treated as the beginning of a new record.
633 * To place a sender's address, optional access rights, and data in a
634 * socket receive buffer, sbappendaddr() should be used. To place
635 * access rights and data in a socket receive buffer, sbappendrights()
636 * should be used. In either case, the new data begins a new record.
637 * Note that unlike sbappend() and sbappendrecord(), these routines check
638 * for the caller that there will be enough space to store the data.
639 * Each fails if there is not enough space, or if it cannot find mbufs
640 * to store additional information in.
642 * Reliable protocols may use the socket send buffer to hold data
643 * awaiting acknowledgement. Data is normally copied from a socket
644 * send buffer in a protocol with m_copy for output to a peer,
645 * and then removing the data from the socket buffer with sbdrop()
646 * or sbdroprecord() when the data is acknowledged by the peer.
650 * Append mbuf chain m to the last record in the
651 * socket buffer sb. The additional space associated
652 * the mbuf chain is recorded in sb. Empty mbufs are
653 * discarded and mbufs are compacted where possible.
660 register struct mbuf
*n
, *sb_first
;
666 KERNEL_DEBUG((DBG_FNC_SBAPPEND
| DBG_FUNC_START
), sb
, m
->m_len
, 0, 0, 0);
672 sb_first
= n
= sb
->sb_mb
;
677 if (n
->m_flags
& M_EOR
) {
678 result
= sbappendrecord(sb
, m
); /* XXXXXX!!!! */
679 KERNEL_DEBUG((DBG_FNC_SBAPPEND
| DBG_FUNC_END
), sb
, sb
->sb_cc
, 0, 0, 0);
682 } while (n
->m_next
&& (n
= n
->m_next
));
685 if (!filtered
&& (sb
->sb_flags
& SB_RECV
) != 0) {
686 error
= sflt_data_in(sb
->sb_so
, NULL
, &m
, NULL
, 0, &filtered
);
688 /* no data was appended, caller should not call sowakeup */
693 If we any filters, the socket lock was dropped. n and sb_first
694 cached data from the socket buffer. This cache is not valid
695 since we dropped the lock. We must start over. Since filtered
696 is set we won't run through the filters a second time. We just
697 set n and sb_start again.
703 result
= sbcompress(sb
, m
, n
);
705 KERNEL_DEBUG((DBG_FNC_SBAPPEND
| DBG_FUNC_END
), sb
, sb
->sb_cc
, 0, 0, 0);
713 register struct sockbuf
*sb
;
715 register struct mbuf
*m
;
716 register struct mbuf
*n
= 0;
717 register u_long len
= 0, mbcnt
= 0;
718 lck_mtx_t
*mutex_held
;
720 if (sb
->sb_so
->so_proto
->pr_getlock
!= NULL
)
721 mutex_held
= (*sb
->sb_so
->so_proto
->pr_getlock
)(sb
->sb_so
, 0);
723 mutex_held
= sb
->sb_so
->so_proto
->pr_domain
->dom_mtx
;
725 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
730 for (m
= sb
->sb_mb
; m
; m
= n
) {
732 for (; m
; m
= m
->m_next
) {
735 if (m
->m_flags
& M_EXT
) /*XXX*/ /* pretty sure this is bogus */
736 mbcnt
+= m
->m_ext
.ext_size
;
739 if (len
!= sb
->sb_cc
|| mbcnt
!= sb
->sb_mbcnt
) {
740 panic("cc %ld != %ld || mbcnt %ld != %ld\n", len
, sb
->sb_cc
,
741 mbcnt
, sb
->sb_mbcnt
);
747 * As above, except the mbuf chain
748 * begins a new record.
751 sbappendrecord(sb
, m0
)
752 register struct sockbuf
*sb
;
753 register struct mbuf
*m0
;
755 register struct mbuf
*m
;
761 if ((sb
->sb_flags
& SB_RECV
) != 0) {
762 int error
= sflt_data_in(sb
->sb_so
, NULL
, &m0
, NULL
, sock_data_filt_flag_record
, NULL
);
764 if (error
!= EJUSTRETURN
)
775 * Put the first mbuf on the queue.
776 * Note this permits zero length records.
785 if (m
&& (m0
->m_flags
& M_EOR
)) {
786 m0
->m_flags
&= ~M_EOR
;
789 return sbcompress(sb
, m
, m0
);
793 * As above except that OOB data
794 * is inserted at the beginning of the sockbuf,
795 * but after any other OOB data.
808 if ((sb
->sb_flags
& SB_RECV
) != 0) {
809 int error
= sflt_data_in(sb
->sb_so
, NULL
, &m0
, NULL
,
810 sock_data_filt_flag_oob
, NULL
);
813 if (error
!= EJUSTRETURN
) {
820 for (mp
= &sb
->sb_mb
; *mp
; mp
= &((*mp
)->m_nextpkt
)) {
826 continue; /* WANT next train */
831 goto again
; /* inspect THIS train further */
836 * Put the first mbuf on the queue.
837 * Note this permits zero length records.
844 if (m
&& (m0
->m_flags
& M_EOR
)) {
845 m0
->m_flags
&= ~M_EOR
;
848 return sbcompress(sb
, m
, m0
);
852 * Append address and data, and optionally, control (ancillary) data
853 * to the receive queue of a socket. If present,
854 * m0 must include a packet header with total length.
855 * Returns 0 if no space in sockbuf or insufficient mbufs.
858 sbappendaddr_internal(sb
, asa
, m0
, control
)
859 register struct sockbuf
*sb
;
860 struct sockaddr
*asa
;
861 struct mbuf
*m0
, *control
;
863 register struct mbuf
*m
, *n
;
864 int space
= asa
->sa_len
;
866 if (m0
&& (m0
->m_flags
& M_PKTHDR
) == 0)
867 panic("sbappendaddr");
870 space
+= m0
->m_pkthdr
.len
;
871 for (n
= control
; n
; n
= n
->m_next
) {
873 if (n
->m_next
== 0) /* keep pointer to last control buf */
876 if (space
> sbspace(sb
))
878 if (asa
->sa_len
> MLEN
)
880 MGET(m
, M_DONTWAIT
, MT_SONAME
);
883 m
->m_len
= asa
->sa_len
;
884 bcopy((caddr_t
)asa
, mtod(m
, caddr_t
), asa
->sa_len
);
886 n
->m_next
= m0
; /* concatenate data to control */
890 for (n
= m
; n
; n
= n
->m_next
)
899 postevent(0,sb
,EV_RWBYTES
);
906 struct sockaddr
* asa
,
908 struct mbuf
*control
,
913 if (error_out
) *error_out
= 0;
915 if (m0
&& (m0
->m_flags
& M_PKTHDR
) == 0)
916 panic("sbappendaddrorfree");
918 /* Call socket data in filters */
919 if ((sb
->sb_flags
& SB_RECV
) != 0) {
921 error
= sflt_data_in(sb
->sb_so
, asa
, &m0
, &control
, 0, NULL
);
923 if (error
!= EJUSTRETURN
) {
925 if (control
) m_freem(control
);
926 if (error_out
) *error_out
= error
;
932 result
= sbappendaddr_internal(sb
, asa
, m0
, control
);
935 if (control
) m_freem(control
);
936 if (error_out
) *error_out
= ENOBUFS
;
943 sbappendcontrol_internal(sb
, m0
, control
)
945 struct mbuf
*control
, *m0
;
947 register struct mbuf
*m
, *n
;
951 panic("sbappendcontrol");
953 for (m
= control
; ; m
= m
->m_next
) {
958 n
= m
; /* save pointer to last control buffer */
959 for (m
= m0
; m
; m
= m
->m_next
)
961 if (space
> sbspace(sb
))
963 n
->m_next
= m0
; /* concatenate data to control */
964 for (m
= control
; m
; m
= m
->m_next
)
970 n
->m_nextpkt
= control
;
973 postevent(0,sb
,EV_RWBYTES
);
981 struct mbuf
*control
,
986 if (error_out
) *error_out
= 0;
988 if (sb
->sb_flags
& SB_RECV
) {
990 error
= sflt_data_in(sb
->sb_so
, NULL
, &m0
, &control
, 0, NULL
);
992 if (error
!= EJUSTRETURN
) {
994 if (control
) m_freem(control
);
995 if (error_out
) *error_out
= error
;
1001 result
= sbappendcontrol_internal(sb
, m0
, control
);
1003 if (m0
) m_freem(m0
);
1004 if (control
) m_freem(control
);
1005 if (error_out
) *error_out
= ENOBUFS
;
1012 * Compress mbuf chain m into the socket
1013 * buffer sb following mbuf n. If n
1014 * is null, the buffer is presumed empty.
1017 sbcompress(sb
, m
, n
)
1018 register struct sockbuf
*sb
;
1019 register struct mbuf
*m
, *n
;
1021 register int eor
= 0;
1022 register struct mbuf
*o
;
1025 eor
|= m
->m_flags
& M_EOR
;
1026 if (m
->m_len
== 0 &&
1028 (((o
= m
->m_next
) || (o
= n
)) &&
1029 o
->m_type
== m
->m_type
))) {
1033 if (n
&& (n
->m_flags
& M_EOR
) == 0 &&
1037 m
->m_len
<= MCLBYTES
/ 4 && /* XXX: Don't copy too much */
1038 m
->m_len
<= M_TRAILINGSPACE(n
) &&
1039 n
->m_type
== m
->m_type
) {
1040 bcopy(mtod(m
, caddr_t
), mtod(n
, caddr_t
) + n
->m_len
,
1041 (unsigned)m
->m_len
);
1042 n
->m_len
+= m
->m_len
;
1043 sb
->sb_cc
+= m
->m_len
;
1053 m
->m_flags
&= ~M_EOR
;
1061 printf("semi-panic: sbcompress\n");
1063 postevent(0,sb
, EV_RWBYTES
);
1068 * Free all mbufs in a sockbuf.
1069 * Check that all resources are reclaimed.
1073 register struct sockbuf
*sb
;
1075 if (sb
->sb_so
== NULL
)
1076 panic ("sbflush sb->sb_so already null sb=%x\n", sb
);
1077 (void)sblock(sb
, M_WAIT
);
1078 while (sb
->sb_mbcnt
) {
1080 * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
1081 * we would loop forever. Panic instead.
1083 if (!sb
->sb_cc
&& (sb
->sb_mb
== NULL
|| sb
->sb_mb
->m_len
))
1085 sbdrop(sb
, (int)sb
->sb_cc
);
1087 if (sb
->sb_cc
|| sb
->sb_mb
|| sb
->sb_mbcnt
|| sb
->sb_so
== NULL
)
1088 panic("sbflush: cc %ld || mb %p || mbcnt %ld sb_so=%x", sb
->sb_cc
, (void *)sb
->sb_mb
, sb
->sb_mbcnt
, sb
->sb_so
);
1090 postevent(0, sb
, EV_RWBYTES
);
1091 sbunlock(sb
, 1); /* keep socket locked */
1096 * Drop data from (the front of) a sockbuf.
1097 * use m_freem_list to free the mbuf structures
1098 * under a single lock... this is done by pruning
1099 * the top of the tree from the body by keeping track
1100 * of where we get to in the tree and then zeroing the
1101 * two pertinent pointers m_nextpkt and m_next
1102 * the socket buffer is then updated to point at the new
1103 * top of the tree and the pruned area is released via
1108 register struct sockbuf
*sb
;
1111 register struct mbuf
*m
, *free_list
, *ml
;
1112 struct mbuf
*next
, *last
;
1114 KERNEL_DEBUG((DBG_FNC_SBDROP
| DBG_FUNC_START
), sb
, len
, 0, 0, 0);
1116 next
= (m
= sb
->sb_mb
) ? m
->m_nextpkt
: 0;
1117 free_list
= last
= m
;
1118 ml
= (struct mbuf
*)0;
1123 /* temporarily replacing this panic with printf because
1124 * it occurs occasionally when closing a socket when there
1125 * is no harm in ignoring it. This problem will be investigated
1128 /* panic("sbdrop"); */
1129 printf("sbdrop - count not zero\n");
1131 /* zero the counts. if we have no mbufs, we have no data (PR-2986815) */
1137 next
= m
->m_nextpkt
;
1140 if (m
->m_len
> len
) {
1152 while (m
&& m
->m_len
== 0) {
1159 ml
->m_next
= (struct mbuf
*)0;
1160 last
->m_nextpkt
= (struct mbuf
*)0;
1161 m_freem_list(free_list
);
1165 m
->m_nextpkt
= next
;
1169 postevent(0, sb
, EV_RWBYTES
);
1171 KERNEL_DEBUG((DBG_FNC_SBDROP
| DBG_FUNC_END
), sb
, 0, 0, 0, 0);
1175 * Drop a record off the front of a sockbuf
1176 * and move the next record to the front.
1180 register struct sockbuf
*sb
;
1182 register struct mbuf
*m
, *mn
;
1186 sb
->sb_mb
= m
->m_nextpkt
;
1193 postevent(0, sb
, EV_RWBYTES
);
1197 * Create a "control" mbuf containing the specified data
1198 * with the specified type for presentation on a socket buffer.
1201 sbcreatecontrol(p
, size
, type
, level
)
1206 register struct cmsghdr
*cp
;
1209 if (CMSG_SPACE((u_int
)size
) > MLEN
)
1210 return ((struct mbuf
*) NULL
);
1211 if ((m
= m_get(M_DONTWAIT
, MT_CONTROL
)) == NULL
)
1212 return ((struct mbuf
*) NULL
);
1213 cp
= mtod(m
, struct cmsghdr
*);
1214 /* XXX check size? */
1215 (void)memcpy(CMSG_DATA(cp
), p
, size
);
1216 m
->m_len
= CMSG_SPACE(size
);
1217 cp
->cmsg_len
= CMSG_LEN(size
);
1218 cp
->cmsg_level
= level
;
1219 cp
->cmsg_type
= type
;
1224 * Some routines that return EOPNOTSUPP for entry points that are not
1225 * supported by a protocol. Fill in as needed.
1228 pru_abort_notsupp(struct socket
*so
)
1235 pru_accept_notsupp(struct socket
*so
, struct sockaddr
**nam
)
1241 pru_attach_notsupp(struct socket
*so
, int proto
, struct proc
*p
)
1247 pru_bind_notsupp(struct socket
*so
, struct sockaddr
*nam
, struct proc
*p
)
1253 pru_connect_notsupp(struct socket
*so
, struct sockaddr
*nam
, struct proc
*p
)
1259 pru_connect2_notsupp(struct socket
*so1
, struct socket
*so2
)
1265 pru_control_notsupp(struct socket
*so
, u_long cmd
, caddr_t data
,
1266 struct ifnet
*ifp
, struct proc
*p
)
1272 pru_detach_notsupp(struct socket
*so
)
1278 pru_disconnect_notsupp(struct socket
*so
)
1284 pru_listen_notsupp(struct socket
*so
, struct proc
*p
)
1290 pru_peeraddr_notsupp(struct socket
*so
, struct sockaddr
**nam
)
1296 pru_rcvd_notsupp(struct socket
*so
, int flags
)
1302 pru_rcvoob_notsupp(struct socket
*so
, struct mbuf
*m
, int flags
)
1308 pru_send_notsupp(struct socket
*so
, int flags
, struct mbuf
*m
,
1309 struct sockaddr
*addr
, struct mbuf
*control
,
1318 * This isn't really a ``null'' operation, but it's the default one
1319 * and doesn't do anything destructive.
1322 pru_sense_null(struct socket
*so
, struct stat
*sb
)
1324 sb
->st_blksize
= so
->so_snd
.sb_hiwat
;
1329 int pru_sosend_notsupp(struct socket
*so
, struct sockaddr
*addr
,
1330 struct uio
*uio
, struct mbuf
*top
,
1331 struct mbuf
*control
, int flags
)
1337 int pru_soreceive_notsupp(struct socket
*so
,
1338 struct sockaddr
**paddr
,
1339 struct uio
*uio
, struct mbuf
**mp0
,
1340 struct mbuf
**controlp
, int *flagsp
)
1347 pru_shutdown_notsupp(struct socket
*so
)
1353 pru_sockaddr_notsupp(struct socket
*so
, struct sockaddr
**nam
)
1358 int pru_sosend(struct socket
*so
, struct sockaddr
*addr
,
1359 struct uio
*uio
, struct mbuf
*top
,
1360 struct mbuf
*control
, int flags
)
1365 int pru_soreceive(struct socket
*so
,
1366 struct sockaddr
**paddr
,
1367 struct uio
*uio
, struct mbuf
**mp0
,
1368 struct mbuf
**controlp
, int *flagsp
)
1375 pru_sopoll_notsupp(__unused
struct socket
*so
, __unused
int events
,
1376 __unused kauth_cred_t cred
, __unused
void *wql
)
1384 * The following are macros on BSD and functions on Darwin
1388 * Do we need to notify the other side when I/O is possible?
1392 sb_notify(struct sockbuf
*sb
)
1394 return ((sb
->sb_flags
& (SB_WAIT
|SB_SEL
|SB_ASYNC
|SB_UPCALL
|SB_KNOTE
)) != 0);
1398 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
1399 * This is problematical if the fields are unsigned, as the space might
1400 * still be negative (cc > hiwat or mbcnt > mbmax). Should detect
1401 * overflow and return 0. Should use "lmin" but it doesn't exist now.
1404 sbspace(struct sockbuf
*sb
)
1406 return ((long) imin((int)(sb
->sb_hiwat
- sb
->sb_cc
),
1407 (int)(sb
->sb_mbmax
- sb
->sb_mbcnt
)));
1410 /* do we have to send all at once on a socket? */
1412 sosendallatonce(struct socket
*so
)
1414 return (so
->so_proto
->pr_flags
& PR_ATOMIC
);
1417 /* can we read something from so? */
1419 soreadable(struct socket
*so
)
1421 return (so
->so_rcv
.sb_cc
>= so
->so_rcv
.sb_lowat
||
1422 (so
->so_state
& SS_CANTRCVMORE
) ||
1423 so
->so_comp
.tqh_first
|| so
->so_error
);
1426 /* can we write something to so? */
1429 sowriteable(struct socket
*so
)
1431 return ((sbspace(&(so
)->so_snd
) >= (so
)->so_snd
.sb_lowat
&&
1432 ((so
->so_state
&SS_ISCONNECTED
) ||
1433 (so
->so_proto
->pr_flags
&PR_CONNREQUIRED
)==0)) ||
1434 (so
->so_state
& SS_CANTSENDMORE
) ||
1438 /* adjust counters in sb reflecting allocation of m */
1441 sballoc(struct sockbuf
*sb
, struct mbuf
*m
)
1443 sb
->sb_cc
+= m
->m_len
;
1444 sb
->sb_mbcnt
+= MSIZE
;
1445 if (m
->m_flags
& M_EXT
)
1446 sb
->sb_mbcnt
+= m
->m_ext
.ext_size
;
1449 /* adjust counters in sb reflecting freeing of m */
1451 sbfree(struct sockbuf
*sb
, struct mbuf
*m
)
1453 sb
->sb_cc
-= m
->m_len
;
1454 sb
->sb_mbcnt
-= MSIZE
;
1455 if (m
->m_flags
& M_EXT
)
1456 sb
->sb_mbcnt
-= m
->m_ext
.ext_size
;
1460 * Set lock on sockbuf sb; sleep if lock is already held.
1461 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
1462 * Returns error without lock if sleep is interrupted.
1465 sblock(struct sockbuf
*sb
, int wf
)
1467 return(sb
->sb_flags
& SB_LOCK
?
1468 ((wf
== M_WAIT
) ? sb_lock(sb
) : EWOULDBLOCK
) :
1469 (sb
->sb_flags
|= SB_LOCK
), 0);
1472 /* release lock on sockbuf sb */
1474 sbunlock(struct sockbuf
*sb
, int keeplocked
)
1476 struct socket
*so
= sb
->sb_so
;
1478 lck_mtx_t
*mutex_held
;
1481 __asm__
volatile("mflr %0" : "=r" (lr
));
1484 sb
->sb_flags
&= ~SB_LOCK
;
1486 if (so
->so_proto
->pr_getlock
!= NULL
)
1487 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
1489 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
1491 if (keeplocked
== 0)
1492 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
1494 if (sb
->sb_flags
& SB_WANT
) {
1495 sb
->sb_flags
&= ~SB_WANT
;
1496 if (so
->so_usecount
< 0)
1497 panic("sbunlock: b4 wakeup so=%x ref=%d lr=%x sb_flags=%x\n", sb
->sb_so
, so
->so_usecount
, lr_saved
, sb
->sb_flags
);
1499 wakeup((caddr_t
)&(sb
)->sb_flags
);
1501 if (keeplocked
== 0) { /* unlock on exit */
1503 if (so
->so_usecount
< 0)
1504 panic("sbunlock: unlock on exit so=%x lr=%x sb_flags=%x\n", so
, so
->so_usecount
,lr_saved
, sb
->sb_flags
);
1505 so
->reserved4
= lr_saved
;
1506 lck_mtx_unlock(mutex_held
);
1511 sorwakeup(struct socket
* so
)
1513 if (sb_notify(&so
->so_rcv
))
1514 sowakeup(so
, &so
->so_rcv
);
1518 sowwakeup(struct socket
* so
)
1520 if (sb_notify(&so
->so_snd
))
1521 sowakeup(so
, &so
->so_snd
);
1526 * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
1529 dup_sockaddr(sa
, canwait
)
1530 struct sockaddr
*sa
;
1533 struct sockaddr
*sa2
;
1535 MALLOC(sa2
, struct sockaddr
*, sa
->sa_len
, M_SONAME
,
1536 canwait
? M_WAITOK
: M_NOWAIT
);
1538 bcopy(sa
, sa2
, sa
->sa_len
);
1543 * Create an external-format (``xsocket'') structure using the information
1544 * in the kernel-format socket structure pointed to by so. This is done
1545 * to reduce the spew of irrelevant information over this interface,
1546 * to isolate user code from changes in the kernel structure, and
1547 * potentially to provide information-hiding if we decide that
1548 * some of this information should be hidden from users.
1551 sotoxsocket(struct socket
*so
, struct xsocket
*xso
)
1553 xso
->xso_len
= sizeof *xso
;
1555 xso
->so_type
= so
->so_type
;
1556 xso
->so_options
= so
->so_options
;
1557 xso
->so_linger
= so
->so_linger
;
1558 xso
->so_state
= so
->so_state
;
1559 xso
->so_pcb
= so
->so_pcb
;
1561 xso
->xso_protocol
= so
->so_proto
->pr_protocol
;
1562 xso
->xso_family
= so
->so_proto
->pr_domain
->dom_family
;
1565 xso
->xso_protocol
= xso
->xso_family
= 0;
1566 xso
->so_qlen
= so
->so_qlen
;
1567 xso
->so_incqlen
= so
->so_incqlen
;
1568 xso
->so_qlimit
= so
->so_qlimit
;
1569 xso
->so_timeo
= so
->so_timeo
;
1570 xso
->so_error
= so
->so_error
;
1571 xso
->so_pgid
= so
->so_pgid
;
1572 xso
->so_oobmark
= so
->so_oobmark
;
1573 sbtoxsockbuf(&so
->so_snd
, &xso
->so_snd
);
1574 sbtoxsockbuf(&so
->so_rcv
, &xso
->so_rcv
);
1575 xso
->so_uid
= so
->so_uid
;
1579 * This does the same for sockbufs. Note that the xsockbuf structure,
1580 * since it is always embedded in a socket, does not include a self
1581 * pointer nor a length. We make this entry point public in case
1582 * some other mechanism needs it.
1585 sbtoxsockbuf(struct sockbuf
*sb
, struct xsockbuf
*xsb
)
1587 xsb
->sb_cc
= sb
->sb_cc
;
1588 xsb
->sb_hiwat
= sb
->sb_hiwat
;
1589 xsb
->sb_mbcnt
= sb
->sb_mbcnt
;
1590 xsb
->sb_mbmax
= sb
->sb_mbmax
;
1591 xsb
->sb_lowat
= sb
->sb_lowat
;
1592 xsb
->sb_flags
= sb
->sb_flags
;
1593 xsb
->sb_timeo
= (u_long
)(sb
->sb_timeo
.tv_sec
* hz
) + sb
->sb_timeo
.tv_usec
/ tick
;
1594 if (xsb
->sb_timeo
== 0 && sb
->sb_timeo
.tv_usec
!= 0)
1599 * Here is the definition of some of the basic objects in the kern.ipc
1600 * branch of the MIB.
1602 SYSCTL_NODE(_kern
, KERN_IPC
, ipc
, CTLFLAG_RW
, 0, "IPC");
1604 /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
1606 SYSCTL_INT(_kern
, KERN_DUMMY
, dummy
, CTLFLAG_RW
, &dummy
, 0, "");
1608 SYSCTL_INT(_kern_ipc
, KIPC_MAXSOCKBUF
, maxsockbuf
, CTLFLAG_RW
,
1609 &sb_max
, 0, "Maximum socket buffer size");
1610 SYSCTL_INT(_kern_ipc
, OID_AUTO
, maxsockets
, CTLFLAG_RD
,
1611 &maxsockets
, 0, "Maximum number of sockets avaliable");
1612 SYSCTL_INT(_kern_ipc
, KIPC_SOCKBUF_WASTE
, sockbuf_waste_factor
, CTLFLAG_RW
,
1613 &sb_efficiency
, 0, "");
1614 SYSCTL_INT(_kern_ipc
, KIPC_NMBCLUSTERS
, nmbclusters
, CTLFLAG_RD
, &nmbclusters
, 0, "");