2 * Copyright (c) 1998-2019 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
30 * Copyright (c) 1982, 1986, 1988, 1990, 1993
31 * The Regents of the University of California. All rights reserved.
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the University of
44 * California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
64 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
65 * support for mandatory and extensible security protections. This notice
66 * is included in support of clause 2.2 (b) of the Apple Public License,
70 #include <sys/param.h>
71 #include <sys/systm.h>
72 #include <sys/domain.h>
73 #include <sys/kernel.h>
74 #include <sys/proc_internal.h>
75 #include <sys/kauth.h>
76 #include <sys/malloc.h>
78 #include <sys/mcache.h>
79 #include <sys/protosw.h>
81 #include <sys/socket.h>
82 #include <sys/socketvar.h>
83 #include <sys/signalvar.h>
84 #include <sys/sysctl.h>
85 #include <sys/syslog.h>
86 #include <sys/unpcb.h>
88 #include <kern/locks.h>
89 #include <net/route.h>
90 #include <net/content_filter.h>
91 #include <netinet/in.h>
92 #include <netinet/in_pcb.h>
93 #include <netinet/tcp_var.h>
94 #include <sys/kdebug.h>
95 #include <libkern/OSAtomic.h>
98 #include <security/mac_framework.h>
101 #include <mach/vm_param.h>
104 #include <netinet/mptcp_var.h>
107 #define DBG_FNC_SBDROP NETDBG_CODE(DBG_NETSOCK, 4)
108 #define DBG_FNC_SBAPPEND NETDBG_CODE(DBG_NETSOCK, 5)
110 SYSCTL_DECL(_kern_ipc
);
112 __private_extern__ u_int32_t net_io_policy_throttle_best_effort
= 0;
113 SYSCTL_INT(_kern_ipc
, OID_AUTO
, throttle_best_effort
,
114 CTLFLAG_RW
| CTLFLAG_LOCKED
, &net_io_policy_throttle_best_effort
, 0, "");
116 static inline void sbcompress(struct sockbuf
*, struct mbuf
*, struct mbuf
*);
117 static struct socket
*sonewconn_internal(struct socket
*, int);
118 static int sbappendcontrol_internal(struct sockbuf
*, struct mbuf
*,
120 static void soevent_ifdenied(struct socket
*);
123 * Primitive routines for operating on sockets and socket buffers
125 static int soqlimitcompat
= 1;
126 static int soqlencomp
= 0;
129 * Based on the number of mbuf clusters configured, high_sb_max and sb_max can
130 * get scaled up or down to suit that memory configuration. high_sb_max is a
131 * higher limit on sb_max that is checked when sb_max gets set through sysctl.
134 u_int32_t sb_max
= SB_MAX
; /* XXX should be static */
135 u_int32_t high_sb_max
= SB_MAX
;
137 static u_int32_t sb_efficiency
= 8; /* parameter for sbreserve() */
138 int32_t total_sbmb_cnt
__attribute__((aligned(8))) = 0;
139 int32_t total_sbmb_cnt_floor
__attribute__((aligned(8))) = 0;
140 int32_t total_sbmb_cnt_peak
__attribute__((aligned(8))) = 0;
141 int64_t sbmb_limreached
__attribute__((aligned(8))) = 0;
143 u_int32_t net_io_policy_log
= 0; /* log socket policy changes */
144 #if CONFIG_PROC_UUID_POLICY
145 u_int32_t net_io_policy_uuid
= 1; /* enable UUID socket policy */
146 #endif /* CONFIG_PROC_UUID_POLICY */
149 * Procedures to manipulate state flags of socket
150 * and do appropriate wakeups. Normal sequence from the
151 * active (originating) side is that soisconnecting() is
152 * called during processing of connect() call,
153 * resulting in an eventual call to soisconnected() if/when the
154 * connection is established. When the connection is torn down
155 * soisdisconnecting() is called during processing of disconnect() call,
156 * and soisdisconnected() is called when the connection to the peer
157 * is totally severed. The semantics of these routines are such that
158 * connectionless protocols can call soisconnected() and soisdisconnected()
159 * only, bypassing the in-progress calls when setting up a ``connection''
162 * From the passive side, a socket is created with
163 * two queues of sockets: so_incomp for connections in progress
164 * and so_comp for connections already made and awaiting user acceptance.
165 * As a protocol is preparing incoming connections, it creates a socket
166 * structure queued on so_incomp by calling sonewconn(). When the connection
167 * is established, soisconnected() is called, and transfers the
168 * socket structure to so_comp, making it available to accept().
170 * If a socket is closed with sockets on either
171 * so_incomp or so_comp, these sockets are dropped.
173 * If higher level protocols are implemented in
174 * the kernel, the wakeups done here will sometimes
175 * cause software-interrupt process scheduling.
178 soisconnecting(struct socket
*so
)
180 so
->so_state
&= ~(SS_ISCONNECTED
| SS_ISDISCONNECTING
);
181 so
->so_state
|= SS_ISCONNECTING
;
183 sflt_notify(so
, sock_evt_connecting
, NULL
);
187 soisconnected(struct socket
*so
)
190 * If socket is subject to filter and is pending initial verdict,
191 * delay marking socket as connected and do not present the connected
192 * socket to user just yet.
194 if (cfil_sock_connected_pending_verdict(so
)) {
198 so
->so_state
&= ~(SS_ISCONNECTING
| SS_ISDISCONNECTING
| SS_ISCONFIRMING
);
199 so
->so_state
|= SS_ISCONNECTED
;
201 soreserve_preconnect(so
, 0);
203 sflt_notify(so
, sock_evt_connected
, NULL
);
205 if (so
->so_head
!= NULL
&& (so
->so_state
& SS_INCOMP
)) {
206 struct socket
*head
= so
->so_head
;
210 * Enforce lock order when the protocol has per socket locks
212 if (head
->so_proto
->pr_getlock
!= NULL
) {
213 socket_lock(head
, 1);
214 so_acquire_accept_list(head
, so
);
217 if (so
->so_head
== head
&& (so
->so_state
& SS_INCOMP
)) {
218 so
->so_state
&= ~SS_INCOMP
;
219 so
->so_state
|= SS_COMP
;
220 TAILQ_REMOVE(&head
->so_incomp
, so
, so_list
);
221 TAILQ_INSERT_TAIL(&head
->so_comp
, so
, so_list
);
225 * We have to release the accept list in
226 * case a socket callback calls sock_accept()
229 so_release_accept_list(head
);
230 socket_unlock(so
, 0);
232 postevent(head
, 0, EV_RCONN
);
234 wakeup_one((caddr_t
)&head
->so_timeo
);
237 socket_unlock(head
, 1);
240 } else if (locked
!= 0) {
241 so_release_accept_list(head
);
242 socket_unlock(head
, 1);
245 postevent(so
, 0, EV_WCONN
);
246 wakeup((caddr_t
)&so
->so_timeo
);
249 soevent(so
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_CONNECTED
|
250 SO_FILT_HINT_CONNINFO_UPDATED
);
255 socanwrite(struct socket
*so
)
257 return (so
->so_state
& SS_ISCONNECTED
) ||
258 !(so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) ||
259 (so
->so_flags1
& SOF1_PRECONNECT_DATA
);
263 soisdisconnecting(struct socket
*so
)
265 so
->so_state
&= ~SS_ISCONNECTING
;
266 so
->so_state
|= (SS_ISDISCONNECTING
| SS_CANTRCVMORE
| SS_CANTSENDMORE
);
267 soevent(so
, SO_FILT_HINT_LOCKED
);
268 sflt_notify(so
, sock_evt_disconnecting
, NULL
);
269 wakeup((caddr_t
)&so
->so_timeo
);
275 soisdisconnected(struct socket
*so
)
277 so
->so_state
&= ~(SS_ISCONNECTING
| SS_ISCONNECTED
| SS_ISDISCONNECTING
);
278 so
->so_state
|= (SS_CANTRCVMORE
| SS_CANTSENDMORE
| SS_ISDISCONNECTED
);
279 soevent(so
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_DISCONNECTED
|
280 SO_FILT_HINT_CONNINFO_UPDATED
);
281 sflt_notify(so
, sock_evt_disconnected
, NULL
);
282 wakeup((caddr_t
)&so
->so_timeo
);
287 /* Notify content filters as soon as we cannot send/receive data */
288 cfil_sock_notify_shutdown(so
, SHUT_RDWR
);
289 #endif /* CONTENT_FILTER */
293 * This function will issue a wakeup like soisdisconnected but it will not
294 * notify the socket filters. This will avoid unlocking the socket
295 * in the midst of closing it.
298 sodisconnectwakeup(struct socket
*so
)
300 so
->so_state
&= ~(SS_ISCONNECTING
| SS_ISCONNECTED
| SS_ISDISCONNECTING
);
301 so
->so_state
|= (SS_CANTRCVMORE
| SS_CANTSENDMORE
| SS_ISDISCONNECTED
);
302 soevent(so
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_DISCONNECTED
|
303 SO_FILT_HINT_CONNINFO_UPDATED
);
304 wakeup((caddr_t
)&so
->so_timeo
);
309 /* Notify content filters as soon as we cannot send/receive data */
310 cfil_sock_notify_shutdown(so
, SHUT_RDWR
);
311 #endif /* CONTENT_FILTER */
315 * When an attempt at a new connection is noted on a socket
316 * which accepts connections, sonewconn is called. If the
317 * connection is possible (subject to space constraints, etc.)
318 * then we allocate a new structure, propoerly linked into the
319 * data structure of the original socket, and return this.
320 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
322 static struct socket
*
323 sonewconn_internal(struct socket
*head
, int connstatus
)
325 int so_qlen
, error
= 0;
327 lck_mtx_t
*mutex_held
;
329 if (head
->so_proto
->pr_getlock
!= NULL
) {
330 mutex_held
= (*head
->so_proto
->pr_getlock
)(head
, 0);
332 mutex_held
= head
->so_proto
->pr_domain
->dom_mtx
;
334 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
338 * This is the default case; so_qlen represents the
339 * sum of both incomplete and completed queues.
341 so_qlen
= head
->so_qlen
;
344 * When kern.ipc.soqlencomp is set to 1, so_qlen
345 * represents only the completed queue. Since we
346 * cannot let the incomplete queue goes unbounded
347 * (in case of SYN flood), we cap the incomplete
348 * queue length to at most somaxconn, and use that
349 * as so_qlen so that we fail immediately below.
351 so_qlen
= head
->so_qlen
- head
->so_incqlen
;
352 if (head
->so_incqlen
> somaxconn
) {
358 (soqlimitcompat
? head
->so_qlimit
: (3 * head
->so_qlimit
/ 2))) {
359 return (struct socket
*)0;
361 so
= soalloc(1, SOCK_DOM(head
), head
->so_type
);
363 return (struct socket
*)0;
365 /* check if head was closed during the soalloc */
366 if (head
->so_proto
== NULL
) {
368 return (struct socket
*)0;
371 so
->so_type
= head
->so_type
;
372 so
->so_options
= head
->so_options
& ~SO_ACCEPTCONN
;
373 so
->so_linger
= head
->so_linger
;
374 so
->so_state
= head
->so_state
| SS_NOFDREF
;
375 so
->so_proto
= head
->so_proto
;
376 so
->so_timeo
= head
->so_timeo
;
377 so
->so_pgid
= head
->so_pgid
;
378 kauth_cred_ref(head
->so_cred
);
379 so
->so_cred
= head
->so_cred
;
380 so
->last_pid
= head
->last_pid
;
381 so
->last_upid
= head
->last_upid
;
382 memcpy(so
->last_uuid
, head
->last_uuid
, sizeof(so
->last_uuid
));
383 if (head
->so_flags
& SOF_DELEGATED
) {
384 so
->e_pid
= head
->e_pid
;
385 so
->e_upid
= head
->e_upid
;
386 memcpy(so
->e_uuid
, head
->e_uuid
, sizeof(so
->e_uuid
));
388 /* inherit socket options stored in so_flags */
389 so
->so_flags
= head
->so_flags
&
390 (SOF_NOSIGPIPE
| SOF_NOADDRAVAIL
| SOF_REUSESHAREUID
|
391 SOF_NOTIFYCONFLICT
| SOF_BINDRANDOMPORT
| SOF_NPX_SETOPTSHUT
|
392 SOF_NODEFUNCT
| SOF_PRIVILEGED_TRAFFIC_CLASS
| SOF_NOTSENT_LOWAT
|
393 SOF_USELRO
| SOF_DELEGATED
);
394 so
->so_flags1
|= SOF1_INBOUND
;
396 so
->next_lock_lr
= 0;
397 so
->next_unlock_lr
= 0;
399 so
->so_rcv
.sb_flags
|= SB_RECV
; /* XXX */
400 so
->so_rcv
.sb_so
= so
->so_snd
.sb_so
= so
;
401 TAILQ_INIT(&so
->so_evlist
);
403 #if CONFIG_MACF_SOCKET
404 mac_socket_label_associate_accept(head
, so
);
407 /* inherit traffic management properties of listener */
409 head
->so_flags1
& (SOF1_TRAFFIC_MGT_SO_BACKGROUND
| SOF1_TC_NET_SERV_TYPE
|
410 SOF1_QOSMARKING_ALLOWED
| SOF1_QOSMARKING_POLICY_OVERRIDE
);
411 so
->so_background_thread
= head
->so_background_thread
;
412 so
->so_traffic_class
= head
->so_traffic_class
;
413 so
->so_netsvctype
= head
->so_netsvctype
;
415 if (soreserve(so
, head
->so_snd
.sb_hiwat
, head
->so_rcv
.sb_hiwat
)) {
417 return (struct socket
*)0;
419 so
->so_rcv
.sb_flags
|= (head
->so_rcv
.sb_flags
& SB_USRSIZE
);
420 so
->so_snd
.sb_flags
|= (head
->so_snd
.sb_flags
& SB_USRSIZE
);
423 * Must be done with head unlocked to avoid deadlock
424 * for protocol with per socket mutexes.
426 if (head
->so_proto
->pr_unlock
) {
427 socket_unlock(head
, 0);
429 if (((*so
->so_proto
->pr_usrreqs
->pru_attach
)(so
, 0, NULL
) != 0) ||
432 if (head
->so_proto
->pr_unlock
) {
433 socket_lock(head
, 0);
435 return (struct socket
*)0;
437 if (head
->so_proto
->pr_unlock
) {
438 socket_lock(head
, 0);
440 * Radar 7385998 Recheck that the head is still accepting
441 * to avoid race condition when head is getting closed.
443 if ((head
->so_options
& SO_ACCEPTCONN
) == 0) {
444 so
->so_state
&= ~SS_NOFDREF
;
446 return (struct socket
*)0;
450 if (so
->so_proto
->pr_copy_last_owner
!= NULL
) {
451 (*so
->so_proto
->pr_copy_last_owner
)(so
, head
);
453 atomic_add_32(&so
->so_proto
->pr_domain
->dom_refs
, 1);
455 /* Insert in head appropriate lists */
456 so_acquire_accept_list(head
, NULL
);
461 * Since this socket is going to be inserted into the incomp
462 * queue, it can be picked up by another thread in
463 * tcp_dropdropablreq to get dropped before it is setup..
464 * To prevent this race, set in-progress flag which can be
467 so
->so_flags
|= SOF_INCOMP_INPROGRESS
;
470 TAILQ_INSERT_TAIL(&head
->so_comp
, so
, so_list
);
471 so
->so_state
|= SS_COMP
;
473 TAILQ_INSERT_TAIL(&head
->so_incomp
, so
, so_list
);
474 so
->so_state
|= SS_INCOMP
;
479 so_release_accept_list(head
);
481 /* Attach socket filters for this protocol */
485 so
->so_state
|= connstatus
;
487 wakeup((caddr_t
)&head
->so_timeo
);
494 sonewconn(struct socket
*head
, int connstatus
, const struct sockaddr
*from
)
496 int error
= sflt_connectin(head
, from
);
501 return sonewconn_internal(head
, connstatus
);
505 * Socantsendmore indicates that no more data will be sent on the
506 * socket; it would normally be applied to a socket when the user
507 * informs the system that no more data is to be sent, by the protocol
508 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
509 * will be received, and will normally be applied to the socket by a
510 * protocol when it detects that the peer will send no more data.
511 * Data queued for reading in the socket may yet be read.
515 socantsendmore(struct socket
*so
)
517 so
->so_state
|= SS_CANTSENDMORE
;
518 soevent(so
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_CANTSENDMORE
);
519 sflt_notify(so
, sock_evt_cantsendmore
, NULL
);
524 socantrcvmore(struct socket
*so
)
526 so
->so_state
|= SS_CANTRCVMORE
;
527 soevent(so
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_CANTRCVMORE
);
528 sflt_notify(so
, sock_evt_cantrecvmore
, NULL
);
533 * Wait for data to arrive at/drain from a socket buffer.
536 sbwait(struct sockbuf
*sb
)
538 boolean_t nointr
= (sb
->sb_flags
& SB_NOINTR
);
539 void *lr_saved
= __builtin_return_address(0);
540 struct socket
*so
= sb
->sb_so
;
541 lck_mtx_t
*mutex_held
;
546 panic("%s: null so, sb=%p sb_flags=0x%x lr=%p\n",
547 __func__
, sb
, sb
->sb_flags
, lr_saved
);
549 } else if (so
->so_usecount
< 1) {
550 panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
551 "lrh= %s\n", __func__
, sb
, sb
->sb_flags
, so
,
552 so
->so_usecount
, lr_saved
, solockhistory_nr(so
));
556 if ((so
->so_state
& SS_DRAINING
) || (so
->so_flags
& SOF_DEFUNCT
)) {
558 if (so
->so_flags
& SOF_DEFUNCT
) {
559 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] "
560 "(%d)\n", __func__
, proc_selfpid(),
561 proc_best_name(current_proc()),
562 (uint64_t)VM_KERNEL_ADDRPERM(so
),
563 SOCK_DOM(so
), SOCK_TYPE(so
), error
);
568 if (so
->so_proto
->pr_getlock
!= NULL
) {
569 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
);
571 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
574 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
576 ts
.tv_sec
= sb
->sb_timeo
.tv_sec
;
577 ts
.tv_nsec
= sb
->sb_timeo
.tv_usec
* 1000;
580 VERIFY(sb
->sb_waiters
!= 0);
582 error
= msleep((caddr_t
)&sb
->sb_cc
, mutex_held
,
583 nointr
? PSOCK
: PSOCK
| PCATCH
,
584 nointr
? "sbwait_nointr" : "sbwait", &ts
);
586 VERIFY(sb
->sb_waiters
!= 0);
589 if (so
->so_usecount
< 1) {
590 panic("%s: 2 sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
591 "lrh= %s\n", __func__
, sb
, sb
->sb_flags
, so
,
592 so
->so_usecount
, lr_saved
, solockhistory_nr(so
));
596 if ((so
->so_state
& SS_DRAINING
) || (so
->so_flags
& SOF_DEFUNCT
)) {
598 if (so
->so_flags
& SOF_DEFUNCT
) {
599 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] "
600 "(%d)\n", __func__
, proc_selfpid(),
601 proc_best_name(current_proc()),
602 (uint64_t)VM_KERNEL_ADDRPERM(so
),
603 SOCK_DOM(so
), SOCK_TYPE(so
), error
);
611 sbwakeup(struct sockbuf
*sb
)
613 if (sb
->sb_waiters
> 0) {
614 wakeup((caddr_t
)&sb
->sb_cc
);
619 * Wakeup processes waiting on a socket buffer.
620 * Do asynchronous notification via SIGIO
621 * if the socket has the SS_ASYNC flag set.
624 sowakeup(struct socket
*so
, struct sockbuf
*sb
, struct socket
*so2
)
626 if (so
->so_flags
& SOF_DEFUNCT
) {
627 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] si 0x%x, "
628 "fl 0x%x [%s]\n", __func__
, proc_selfpid(),
629 proc_best_name(current_proc()),
630 (uint64_t)VM_KERNEL_ADDRPERM(so
), SOCK_DOM(so
),
631 SOCK_TYPE(so
), (uint32_t)sb
->sb_sel
.si_flags
, sb
->sb_flags
,
632 (sb
->sb_flags
& SB_RECV
) ? "rcv" : "snd");
635 sb
->sb_flags
&= ~SB_SEL
;
636 selwakeup(&sb
->sb_sel
);
638 if (so
->so_state
& SS_ASYNC
) {
639 if (so
->so_pgid
< 0) {
640 gsignal(-so
->so_pgid
, SIGIO
);
641 } else if (so
->so_pgid
> 0) {
642 proc_signal(so
->so_pgid
, SIGIO
);
645 if (sb
->sb_flags
& SB_KNOTE
) {
646 KNOTE(&sb
->sb_sel
.si_note
, SO_FILT_HINT_LOCKED
);
648 if (sb
->sb_flags
& SB_UPCALL
) {
649 void (*sb_upcall
)(struct socket
*, void *, int);
650 caddr_t sb_upcallarg
;
651 int lock
= !(sb
->sb_flags
& SB_UPCALL_LOCK
);
653 sb_upcall
= sb
->sb_upcall
;
654 sb_upcallarg
= sb
->sb_upcallarg
;
655 /* Let close know that we're about to do an upcall */
656 so
->so_upcallusecount
++;
660 struct unpcb
*unp
= sotounpcb(so2
);
661 unp
->unp_flags
|= UNP_DONTDISCONNECT
;
664 socket_unlock(so2
, 0);
666 socket_unlock(so
, 0);
668 (*sb_upcall
)(so
, sb_upcallarg
, M_DONTWAIT
);
670 if (so2
&& so
> so2
) {
674 unp
= sotounpcb(so2
);
676 if (unp
->rw_thrcount
== 0) {
677 unp
->unp_flags
&= ~UNP_DONTDISCONNECT
;
684 if (so2
&& so
< so2
) {
688 unp
= sotounpcb(so2
);
690 if (unp
->rw_thrcount
== 0) {
691 unp
->unp_flags
&= ~UNP_DONTDISCONNECT
;
697 so
->so_upcallusecount
--;
698 /* Tell close that it's safe to proceed */
699 if ((so
->so_flags
& SOF_CLOSEWAIT
) &&
700 so
->so_upcallusecount
== 0) {
701 wakeup((caddr_t
)&so
->so_upcallusecount
);
706 * Trap disconnection events for content filters
708 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0) {
709 if ((sb
->sb_flags
& SB_RECV
)) {
710 if (so
->so_state
& (SS_CANTRCVMORE
)) {
711 cfil_sock_notify_shutdown(so
, SHUT_RD
);
714 if (so
->so_state
& (SS_CANTSENDMORE
)) {
715 cfil_sock_notify_shutdown(so
, SHUT_WR
);
719 #endif /* CONTENT_FILTER */
723 * Socket buffer (struct sockbuf) utility routines.
725 * Each socket contains two socket buffers: one for sending data and
726 * one for receiving data. Each buffer contains a queue of mbufs,
727 * information about the number of mbufs and amount of data in the
728 * queue, and other fields allowing select() statements and notification
729 * on data availability to be implemented.
731 * Data stored in a socket buffer is maintained as a list of records.
732 * Each record is a list of mbufs chained together with the m_next
733 * field. Records are chained together with the m_nextpkt field. The upper
734 * level routine soreceive() expects the following conventions to be
735 * observed when placing information in the receive buffer:
737 * 1. If the protocol requires each message be preceded by the sender's
738 * name, then a record containing that name must be present before
739 * any associated data (mbuf's must be of type MT_SONAME).
740 * 2. If the protocol supports the exchange of ``access rights'' (really
741 * just additional data associated with the message), and there are
742 * ``rights'' to be received, then a record containing this data
743 * should be present (mbuf's must be of type MT_RIGHTS).
744 * 3. If a name or rights record exists, then it must be followed by
745 * a data record, perhaps of zero length.
747 * Before using a new socket structure it is first necessary to reserve
748 * buffer space to the socket, by calling sbreserve(). This should commit
749 * some of the available buffer space in the system buffer pool for the
750 * socket (currently, it does nothing but enforce limits). The space
751 * should be released by calling sbrelease() when the socket is destroyed.
759 soreserve(struct socket
*so
, u_int32_t sndcc
, u_int32_t rcvcc
)
762 * We do not want to fail the creation of a socket
763 * when kern.ipc.maxsockbuf is less than the
764 * default socket buffer socket size of the protocol
765 * so force the buffer sizes to be at most the
766 * limit enforced by sbreserve()
768 uint64_t maxcc
= (uint64_t)sb_max
* MCLBYTES
/ (MSIZE
+ MCLBYTES
);
775 if (sbreserve(&so
->so_snd
, sndcc
) == 0) {
778 so
->so_snd
.sb_idealsize
= sndcc
;
781 if (sbreserve(&so
->so_rcv
, rcvcc
) == 0) {
784 so
->so_rcv
.sb_idealsize
= rcvcc
;
787 if (so
->so_rcv
.sb_lowat
== 0) {
788 so
->so_rcv
.sb_lowat
= 1;
790 if (so
->so_snd
.sb_lowat
== 0) {
791 so
->so_snd
.sb_lowat
= MCLBYTES
;
793 if (so
->so_snd
.sb_lowat
> so
->so_snd
.sb_hiwat
) {
794 so
->so_snd
.sb_lowat
= so
->so_snd
.sb_hiwat
;
798 so
->so_snd
.sb_flags
&= ~SB_SEL
;
799 selthreadclear(&so
->so_snd
.sb_sel
);
800 sbrelease(&so
->so_snd
);
806 soreserve_preconnect(struct socket
*so
, unsigned int pre_cc
)
808 /* As of now, same bytes for both preconnect read and write */
809 so
->so_snd
.sb_preconn_hiwat
= pre_cc
;
810 so
->so_rcv
.sb_preconn_hiwat
= pre_cc
;
814 * Allot mbufs to a sockbuf.
815 * Attempt to scale mbmax so that mbcnt doesn't become limiting
816 * if buffering efficiency is near the normal case.
819 sbreserve(struct sockbuf
*sb
, u_int32_t cc
)
821 if ((u_quad_t
)cc
> (u_quad_t
)sb_max
* MCLBYTES
/ (MSIZE
+ MCLBYTES
)) {
825 sb
->sb_mbmax
= min(cc
* sb_efficiency
, sb_max
);
826 if (sb
->sb_lowat
> sb
->sb_hiwat
) {
827 sb
->sb_lowat
= sb
->sb_hiwat
;
833 * Free mbufs held by a socket, and reserved mbuf space.
835 /* WARNING needs to do selthreadclear() before calling this */
837 sbrelease(struct sockbuf
*sb
)
845 * Routines to add and remove
846 * data from an mbuf queue.
848 * The routines sbappend() or sbappendrecord() are normally called to
849 * append new mbufs to a socket buffer, after checking that adequate
850 * space is available, comparing the function sbspace() with the amount
851 * of data to be added. sbappendrecord() differs from sbappend() in
852 * that data supplied is treated as the beginning of a new record.
853 * To place a sender's address, optional access rights, and data in a
854 * socket receive buffer, sbappendaddr() should be used. To place
855 * access rights and data in a socket receive buffer, sbappendrights()
856 * should be used. In either case, the new data begins a new record.
857 * Note that unlike sbappend() and sbappendrecord(), these routines check
858 * for the caller that there will be enough space to store the data.
859 * Each fails if there is not enough space, or if it cannot find mbufs
860 * to store additional information in.
862 * Reliable protocols may use the socket send buffer to hold data
863 * awaiting acknowledgement. Data is normally copied from a socket
864 * send buffer in a protocol with m_copy for output to a peer,
865 * and then removing the data from the socket buffer with sbdrop()
866 * or sbdroprecord() when the data is acknowledged by the peer.
870 * Append mbuf chain m to the last record in the
871 * socket buffer sb. The additional space associated
872 * the mbuf chain is recorded in sb. Empty mbufs are
873 * discarded and mbufs are compacted where possible.
876 sbappend(struct sockbuf
*sb
, struct mbuf
*m
)
878 struct socket
*so
= sb
->sb_so
;
880 if (m
== NULL
|| (sb
->sb_flags
& SB_DROP
)) {
887 SBLASTRECORDCHK(sb
, "sbappend 1");
889 if (sb
->sb_lastrecord
!= NULL
&& (sb
->sb_mbtail
->m_flags
& M_EOR
)) {
890 return sbappendrecord(sb
, m
);
893 if (sb
->sb_flags
& SB_RECV
&& !(m
&& m
->m_flags
& M_SKIPCFIL
)) {
894 int error
= sflt_data_in(so
, NULL
, &m
, NULL
, 0);
895 SBLASTRECORDCHK(sb
, "sbappend 2");
899 error
= cfil_sock_data_in(so
, NULL
, m
, NULL
, 0);
901 #endif /* CONTENT_FILTER */
904 if (error
!= EJUSTRETURN
) {
910 m
->m_flags
&= ~M_SKIPCFIL
;
913 /* If this is the first record, it's also the last record */
914 if (sb
->sb_lastrecord
== NULL
) {
915 sb
->sb_lastrecord
= m
;
918 sbcompress(sb
, m
, sb
->sb_mbtail
);
919 SBLASTRECORDCHK(sb
, "sbappend 3");
924 * Similar to sbappend, except that this is optimized for stream sockets.
927 sbappendstream(struct sockbuf
*sb
, struct mbuf
*m
)
929 struct socket
*so
= sb
->sb_so
;
931 if (m
== NULL
|| (sb
->sb_flags
& SB_DROP
)) {
938 if (m
->m_nextpkt
!= NULL
|| (sb
->sb_mb
!= sb
->sb_lastrecord
)) {
939 panic("sbappendstream: nexpkt %p || mb %p != lastrecord %p\n",
940 m
->m_nextpkt
, sb
->sb_mb
, sb
->sb_lastrecord
);
944 SBLASTMBUFCHK(sb
, __func__
);
946 if (sb
->sb_flags
& SB_RECV
&& !(m
&& m
->m_flags
& M_SKIPCFIL
)) {
947 int error
= sflt_data_in(so
, NULL
, &m
, NULL
, 0);
948 SBLASTRECORDCHK(sb
, "sbappendstream 1");
952 error
= cfil_sock_data_in(so
, NULL
, m
, NULL
, 0);
954 #endif /* CONTENT_FILTER */
957 if (error
!= EJUSTRETURN
) {
963 m
->m_flags
&= ~M_SKIPCFIL
;
966 sbcompress(sb
, m
, sb
->sb_mbtail
);
967 sb
->sb_lastrecord
= sb
->sb_mb
;
968 SBLASTRECORDCHK(sb
, "sbappendstream 2");
974 sbcheck(struct sockbuf
*sb
)
978 u_int32_t len
= 0, mbcnt
= 0;
979 lck_mtx_t
*mutex_held
;
981 if (sb
->sb_so
->so_proto
->pr_getlock
!= NULL
) {
982 mutex_held
= (*sb
->sb_so
->so_proto
->pr_getlock
)(sb
->sb_so
, 0);
984 mutex_held
= sb
->sb_so
->so_proto
->pr_domain
->dom_mtx
;
987 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
989 if (sbchecking
== 0) {
993 for (m
= sb
->sb_mb
; m
; m
= n
) {
995 for (; m
; m
= m
->m_next
) {
998 /* XXX pretty sure this is bogus */
999 if (m
->m_flags
& M_EXT
) {
1000 mbcnt
+= m
->m_ext
.ext_size
;
1004 if (len
!= sb
->sb_cc
|| mbcnt
!= sb
->sb_mbcnt
) {
1005 panic("cc %ld != %ld || mbcnt %ld != %ld\n", len
, sb
->sb_cc
,
1006 mbcnt
, sb
->sb_mbcnt
);
1012 sblastrecordchk(struct sockbuf
*sb
, const char *where
)
1014 struct mbuf
*m
= sb
->sb_mb
;
1016 while (m
&& m
->m_nextpkt
) {
1020 if (m
!= sb
->sb_lastrecord
) {
1021 printf("sblastrecordchk: mb 0x%llx lastrecord 0x%llx "
1023 (uint64_t)VM_KERNEL_ADDRPERM(sb
->sb_mb
),
1024 (uint64_t)VM_KERNEL_ADDRPERM(sb
->sb_lastrecord
),
1025 (uint64_t)VM_KERNEL_ADDRPERM(m
));
1026 printf("packet chain:\n");
1027 for (m
= sb
->sb_mb
; m
!= NULL
; m
= m
->m_nextpkt
) {
1028 printf("\t0x%llx\n", (uint64_t)VM_KERNEL_ADDRPERM(m
));
1030 panic("sblastrecordchk from %s", where
);
1035 sblastmbufchk(struct sockbuf
*sb
, const char *where
)
1037 struct mbuf
*m
= sb
->sb_mb
;
1040 while (m
&& m
->m_nextpkt
) {
1044 while (m
&& m
->m_next
) {
1048 if (m
!= sb
->sb_mbtail
) {
1049 printf("sblastmbufchk: mb 0x%llx mbtail 0x%llx last 0x%llx\n",
1050 (uint64_t)VM_KERNEL_ADDRPERM(sb
->sb_mb
),
1051 (uint64_t)VM_KERNEL_ADDRPERM(sb
->sb_mbtail
),
1052 (uint64_t)VM_KERNEL_ADDRPERM(m
));
1053 printf("packet tree:\n");
1054 for (m
= sb
->sb_mb
; m
!= NULL
; m
= m
->m_nextpkt
) {
1056 for (n
= m
; n
!= NULL
; n
= n
->m_next
) {
1058 (uint64_t)VM_KERNEL_ADDRPERM(n
));
1062 panic("sblastmbufchk from %s", where
);
1067 * Similar to sbappend, except the mbuf chain begins a new record.
1070 sbappendrecord(struct sockbuf
*sb
, struct mbuf
*m0
)
1075 if (m0
== NULL
|| (sb
->sb_flags
& SB_DROP
)) {
1082 for (m
= m0
; m
!= NULL
; m
= m
->m_next
) {
1086 if (space
> sbspace(sb
) && !(sb
->sb_flags
& SB_UNIX
)) {
1091 if (sb
->sb_flags
& SB_RECV
&& !(m0
&& m0
->m_flags
& M_SKIPCFIL
)) {
1092 int error
= sflt_data_in(sb
->sb_so
, NULL
, &m0
, NULL
,
1093 sock_data_filt_flag_record
);
1097 error
= cfil_sock_data_in(sb
->sb_so
, NULL
, m0
, NULL
, 0);
1099 #endif /* CONTENT_FILTER */
1102 SBLASTRECORDCHK(sb
, "sbappendrecord 1");
1103 if (error
!= EJUSTRETURN
) {
1109 m0
->m_flags
&= ~M_SKIPCFIL
;
1113 * Note this permits zero length records.
1116 SBLASTRECORDCHK(sb
, "sbappendrecord 2");
1117 if (sb
->sb_lastrecord
!= NULL
) {
1118 sb
->sb_lastrecord
->m_nextpkt
= m0
;
1122 sb
->sb_lastrecord
= m0
;
1127 if (m
&& (m0
->m_flags
& M_EOR
)) {
1128 m0
->m_flags
&= ~M_EOR
;
1129 m
->m_flags
|= M_EOR
;
1131 sbcompress(sb
, m
, m0
);
1132 SBLASTRECORDCHK(sb
, "sbappendrecord 3");
1137 * Concatenate address (optional), control (optional) and data into one
1138 * single mbuf chain. If sockbuf *sb is passed in, space check will be
1141 * Returns: mbuf chain pointer if succeeded, NULL if failed
1144 sbconcat_mbufs(struct sockbuf
*sb
, struct sockaddr
*asa
, struct mbuf
*m0
, struct mbuf
*control
)
1146 struct mbuf
*m
= NULL
, *n
= NULL
;
1149 if (m0
&& (m0
->m_flags
& M_PKTHDR
) == 0) {
1150 panic("sbconcat_mbufs");
1154 space
+= m0
->m_pkthdr
.len
;
1156 for (n
= control
; n
; n
= n
->m_next
) {
1158 if (n
->m_next
== 0) { /* keep pointer to last control buf */
1164 if (asa
->sa_len
> MLEN
) {
1167 space
+= asa
->sa_len
;
1170 if (sb
!= NULL
&& space
> sbspace(sb
)) {
1175 n
->m_next
= m0
; /* concatenate data to control */
1181 MGET(m
, M_DONTWAIT
, MT_SONAME
);
1184 /* unchain control and data if necessary */
1189 m
->m_len
= asa
->sa_len
;
1190 bcopy((caddr_t
)asa
, mtod(m
, caddr_t
), asa
->sa_len
);
1192 m
->m_next
= control
;
1201 * Queue mbuf chain to the receive queue of a socket.
1202 * Parameter space is the total len of the mbuf chain.
1203 * If passed in, sockbuf space will be checked.
1205 * Returns: 0 Invalid mbuf chain
1209 sbappendchain(struct sockbuf
*sb
, struct mbuf
*m
, int space
)
1211 struct mbuf
*n
, *nlast
;
1217 if (space
!= 0 && space
> sbspace(sb
)) {
1221 for (n
= m
; n
->m_next
!= NULL
; n
= n
->m_next
) {
1227 if (sb
->sb_lastrecord
!= NULL
) {
1228 sb
->sb_lastrecord
->m_nextpkt
= m
;
1232 sb
->sb_lastrecord
= m
;
1233 sb
->sb_mbtail
= nlast
;
1235 SBLASTMBUFCHK(sb
, __func__
);
1236 SBLASTRECORDCHK(sb
, "sbappendadddr 2");
1238 postevent(0, sb
, EV_RWBYTES
);
1243 * Returns: 0 Error: No space/out of mbufs/etc.
1246 * Imputed: (*error_out) errno for error
1248 * sflt_data_in:??? [whatever a filter author chooses]
1251 sbappendaddr(struct sockbuf
*sb
, struct sockaddr
*asa
, struct mbuf
*m0
,
1252 struct mbuf
*control
, int *error_out
)
1255 boolean_t sb_unix
= (sb
->sb_flags
& SB_UNIX
);
1256 struct mbuf
*mbuf_chain
= NULL
;
1262 if (m0
&& (m0
->m_flags
& M_PKTHDR
) == 0) {
1263 panic("sbappendaddrorfree");
1266 if (sb
->sb_flags
& SB_DROP
) {
1270 if (control
!= NULL
&& !sb_unix
) {
1273 if (error_out
!= NULL
) {
1274 *error_out
= EINVAL
;
1279 /* Call socket data in filters */
1280 if (sb
->sb_flags
& SB_RECV
&& !(m0
&& m0
->m_flags
& M_SKIPCFIL
)) {
1282 error
= sflt_data_in(sb
->sb_so
, asa
, &m0
, &control
, 0);
1283 SBLASTRECORDCHK(sb
, __func__
);
1287 error
= cfil_sock_data_in(sb
->sb_so
, asa
, m0
, control
,
1290 #endif /* CONTENT_FILTER */
1293 if (error
!= EJUSTRETURN
) {
1297 if (control
!= NULL
&& !sb_unix
) {
1307 m0
->m_flags
&= ~M_SKIPCFIL
;
1310 mbuf_chain
= sbconcat_mbufs(sb
, asa
, m0
, control
);
1311 SBLASTRECORDCHK(sb
, "sbappendadddr 1");
1312 result
= sbappendchain(sb
, mbuf_chain
, 0);
1317 if (control
!= NULL
&& !sb_unix
) {
1321 *error_out
= ENOBUFS
;
1329 is_cmsg_valid(struct mbuf
*control
, struct cmsghdr
*cmsg
)
1335 if (cmsg
->cmsg_len
< sizeof(struct cmsghdr
)) {
1339 if ((uint8_t *)control
->m_data
>= (uint8_t *)cmsg
+ cmsg
->cmsg_len
) {
1343 if ((uint8_t *)control
->m_data
+ control
->m_len
<
1344 (uint8_t *)cmsg
+ cmsg
->cmsg_len
) {
1352 sbappendcontrol_internal(struct sockbuf
*sb
, struct mbuf
*m0
,
1353 struct mbuf
*control
)
1355 struct mbuf
*m
, *mlast
, *n
;
1359 panic("sbappendcontrol");
1362 for (m
= control
;; m
= m
->m_next
) {
1364 if (m
->m_next
== 0) {
1368 n
= m
; /* save pointer to last control buffer */
1369 for (m
= m0
; m
; m
= m
->m_next
) {
1372 if (space
> sbspace(sb
) && !(sb
->sb_flags
& SB_UNIX
)) {
1375 n
->m_next
= m0
; /* concatenate data to control */
1376 SBLASTRECORDCHK(sb
, "sbappendcontrol 1");
1378 for (m
= control
; m
->m_next
!= NULL
; m
= m
->m_next
) {
1384 if (sb
->sb_lastrecord
!= NULL
) {
1385 sb
->sb_lastrecord
->m_nextpkt
= control
;
1387 sb
->sb_mb
= control
;
1389 sb
->sb_lastrecord
= control
;
1390 sb
->sb_mbtail
= mlast
;
1392 SBLASTMBUFCHK(sb
, __func__
);
1393 SBLASTRECORDCHK(sb
, "sbappendcontrol 2");
1395 postevent(0, sb
, EV_RWBYTES
);
1400 sbappendcontrol(struct sockbuf
*sb
, struct mbuf
*m0
, struct mbuf
*control
,
1404 boolean_t sb_unix
= (sb
->sb_flags
& SB_UNIX
);
1410 if (sb
->sb_flags
& SB_DROP
) {
1414 if (control
!= NULL
&& !sb_unix
) {
1417 if (error_out
!= NULL
) {
1418 *error_out
= EINVAL
;
1423 if (sb
->sb_flags
& SB_RECV
&& !(m0
&& m0
->m_flags
& M_SKIPCFIL
)) {
1426 error
= sflt_data_in(sb
->sb_so
, NULL
, &m0
, &control
, 0);
1427 SBLASTRECORDCHK(sb
, __func__
);
1431 error
= cfil_sock_data_in(sb
->sb_so
, NULL
, m0
, control
,
1434 #endif /* CONTENT_FILTER */
1437 if (error
!= EJUSTRETURN
) {
1441 if (control
!= NULL
&& !sb_unix
) {
1451 m0
->m_flags
&= ~M_SKIPCFIL
;
1454 result
= sbappendcontrol_internal(sb
, m0
, control
);
1459 if (control
!= NULL
&& !sb_unix
) {
1463 *error_out
= ENOBUFS
;
1471 * Append a contiguous TCP data blob with TCP sequence number as control data
1472 * as a new msg to the receive socket buffer.
1475 sbappendmsgstream_rcv(struct sockbuf
*sb
, struct mbuf
*m
, uint32_t seqnum
,
1478 struct mbuf
*m_eor
= NULL
;
1479 u_int32_t data_len
= 0;
1481 struct socket
*so
= sb
->sb_so
;
1487 VERIFY((m
->m_flags
& M_PKTHDR
) && m_pktlen(m
) > 0);
1488 VERIFY(so
->so_msg_state
!= NULL
);
1489 VERIFY(sb
->sb_flags
& SB_RECV
);
1491 /* Keep the TCP sequence number in the mbuf pkthdr */
1492 m
->m_pkthdr
.msg_seq
= seqnum
;
1494 /* find last mbuf and set M_EOR */
1495 for (m_eor
= m
;; m_eor
= m_eor
->m_next
) {
1497 * If the msg is unordered, we need to account for
1498 * these bytes in receive socket buffer size. Otherwise,
1499 * the receive window advertised will shrink because
1500 * of the additional unordered bytes added to the
1504 m_eor
->m_flags
|= M_UNORDERED_DATA
;
1505 data_len
+= m_eor
->m_len
;
1506 so
->so_msg_state
->msg_uno_bytes
+= m_eor
->m_len
;
1508 m_eor
->m_flags
&= ~M_UNORDERED_DATA
;
1510 if (m_eor
->m_next
== NULL
) {
1515 /* set EOR flag at end of byte blob */
1516 m_eor
->m_flags
|= M_EOR
;
1518 /* expand the receive socket buffer to allow unordered data */
1519 if (unordered
&& !sbreserve(sb
, sb
->sb_hiwat
+ data_len
)) {
1521 * Could not allocate memory for unordered data, it
1522 * means this packet will have to be delivered in order
1524 printf("%s: could not reserve space for unordered data\n",
1528 if (!unordered
&& (sb
->sb_mbtail
!= NULL
) &&
1529 !(sb
->sb_mbtail
->m_flags
& M_UNORDERED_DATA
)) {
1530 sb
->sb_mbtail
->m_flags
&= ~M_EOR
;
1531 sbcompress(sb
, m
, sb
->sb_mbtail
);
1534 ret
= sbappendrecord(sb
, m
);
1536 VERIFY(sb
->sb_mbtail
->m_flags
& M_EOR
);
1541 * TCP streams have message based out of order delivery support, or have
1542 * Multipath TCP support, or are regular TCP sockets
1545 sbappendstream_rcvdemux(struct socket
*so
, struct mbuf
*m
, uint32_t seqnum
,
1552 !((so
->so_flags
& SOF_MP_SUBFLOW
) &&
1553 (m
->m_flags
& M_PKTHDR
) &&
1554 (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP_DFIN
))) {
1559 if (so
->so_flags
& SOF_ENABLE_MSGS
) {
1560 ret
= sbappendmsgstream_rcv(&so
->so_rcv
, m
, seqnum
, unordered
);
1563 else if (so
->so_flags
& SOF_MP_SUBFLOW
) {
1564 ret
= sbappendmptcpstream_rcv(&so
->so_rcv
, m
);
1568 ret
= sbappendstream(&so
->so_rcv
, m
);
1575 sbappendmptcpstream_rcv(struct sockbuf
*sb
, struct mbuf
*m
)
1577 struct socket
*so
= sb
->sb_so
;
1579 VERIFY(m
== NULL
|| (m
->m_flags
& M_PKTHDR
));
1580 /* SB_NOCOMPRESS must be set prevent loss of M_PKTHDR data */
1581 VERIFY((sb
->sb_flags
& (SB_RECV
| SB_NOCOMPRESS
)) ==
1582 (SB_RECV
| SB_NOCOMPRESS
));
1584 if (m
== NULL
|| m_pktlen(m
) == 0 || (sb
->sb_flags
& SB_DROP
) ||
1585 (so
->so_state
& SS_CANTRCVMORE
)) {
1586 if (m
&& (m
->m_flags
& M_PKTHDR
) &&
1588 (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP_DFIN
)) {
1589 mptcp_input(tptomptp(sototcpcb(so
))->mpt_mpte
, m
);
1591 } else if (m
!= NULL
) {
1596 /* the socket is not closed, so SOF_MP_SUBFLOW must be set */
1597 VERIFY(so
->so_flags
& SOF_MP_SUBFLOW
);
1599 if (m
->m_nextpkt
!= NULL
|| (sb
->sb_mb
!= sb
->sb_lastrecord
)) {
1600 panic("%s: nexpkt %p || mb %p != lastrecord %p\n", __func__
,
1601 m
->m_nextpkt
, sb
->sb_mb
, sb
->sb_lastrecord
);
1605 SBLASTMBUFCHK(sb
, __func__
);
1607 /* No filter support (SB_RECV) on mptcp subflow sockets */
1609 sbcompress(sb
, m
, sb
->sb_mbtail
);
1610 sb
->sb_lastrecord
= sb
->sb_mb
;
1611 SBLASTRECORDCHK(sb
, __func__
);
1617 * Append message to send socket buffer based on priority.
1620 sbappendmsg_snd(struct sockbuf
*sb
, struct mbuf
*m
)
1622 struct socket
*so
= sb
->sb_so
;
1623 struct msg_priq
*priq
;
1626 VERIFY(so
->so_msg_state
!= NULL
);
1628 if (m
->m_nextpkt
!= NULL
|| (sb
->sb_mb
!= sb
->sb_lastrecord
)) {
1629 panic("sbappendstream: nexpkt %p || mb %p != lastrecord %p\n",
1630 m
->m_nextpkt
, sb
->sb_mb
, sb
->sb_lastrecord
);
1633 SBLASTMBUFCHK(sb
, __func__
);
1635 if (m
== NULL
|| (sb
->sb_flags
& SB_DROP
) || so
->so_msg_state
== NULL
) {
1642 priq
= &so
->so_msg_state
->msg_priq
[m
->m_pkthdr
.msg_pri
];
1644 /* note if we need to propogate M_EOR to the last mbuf */
1645 if (m
->m_flags
& M_EOR
) {
1648 /* Reset M_EOR from the first mbuf */
1649 m
->m_flags
&= ~(M_EOR
);
1652 if (priq
->msgq_head
== NULL
) {
1653 VERIFY(priq
->msgq_tail
== NULL
&& priq
->msgq_lastmsg
== NULL
);
1654 priq
->msgq_head
= priq
->msgq_lastmsg
= m
;
1656 VERIFY(priq
->msgq_tail
->m_next
== NULL
);
1658 /* Check if the last message has M_EOR flag set */
1659 if (priq
->msgq_tail
->m_flags
& M_EOR
) {
1660 /* Insert as a new message */
1661 priq
->msgq_lastmsg
->m_nextpkt
= m
;
1663 /* move the lastmsg pointer */
1664 priq
->msgq_lastmsg
= m
;
1666 /* Append to the existing message */
1667 priq
->msgq_tail
->m_next
= m
;
1671 /* Update accounting and the queue tail pointer */
1673 while (m
->m_next
!= NULL
) {
1675 priq
->msgq_bytes
+= m
->m_len
;
1679 priq
->msgq_bytes
+= m
->m_len
;
1682 m
->m_flags
|= M_EOR
;
1685 * Since the user space can not write a new msg
1686 * without completing the previous one, we can
1687 * reset this flag to start sending again.
1689 priq
->msgq_flags
&= ~(MSGQ_MSG_NOTDONE
);
1692 priq
->msgq_tail
= m
;
1694 SBLASTRECORDCHK(sb
, "sbappendstream 2");
1695 postevent(0, sb
, EV_RWBYTES
);
1700 * Pull data from priority queues to the serial snd queue
1701 * right before sending.
1704 sbpull_unordered_data(struct socket
*so
, int32_t off
, int32_t len
)
1707 struct msg_priq
*priq
= NULL
;
1709 VERIFY(so
->so_msg_state
!= NULL
);
1711 topull
= (off
+ len
) - so
->so_msg_state
->msg_serial_bytes
;
1714 while (i
>= MSG_PRI_MIN
&& topull
> 0) {
1715 struct mbuf
*m
= NULL
, *mqhead
= NULL
, *mend
= NULL
;
1716 priq
= &so
->so_msg_state
->msg_priq
[i
];
1717 if ((priq
->msgq_flags
& MSGQ_MSG_NOTDONE
) &&
1718 priq
->msgq_head
== NULL
) {
1720 * We were in the middle of sending
1721 * a message and we have not seen the
1724 VERIFY(priq
->msgq_lastmsg
== NULL
&&
1725 priq
->msgq_tail
== NULL
);
1728 if (priq
->msgq_head
!= NULL
) {
1729 int32_t bytes
= 0, topull_tmp
= topull
;
1731 * We found a msg while scanning the priority
1732 * queue from high to low priority.
1734 m
= priq
->msgq_head
;
1739 * Move bytes from the priority queue to the
1740 * serial queue. Compute the number of bytes
1743 while (mqhead
->m_next
!= NULL
&& topull_tmp
> 0) {
1744 bytes
+= mqhead
->m_len
;
1745 topull_tmp
-= mqhead
->m_len
;
1747 mqhead
= mqhead
->m_next
;
1750 if (mqhead
->m_next
== NULL
) {
1752 * If we have only one more mbuf left,
1753 * move the last mbuf of this message to
1754 * serial queue and set the head of the
1755 * queue to be the next message.
1757 bytes
+= mqhead
->m_len
;
1759 mqhead
= m
->m_nextpkt
;
1760 if (!(mend
->m_flags
& M_EOR
)) {
1762 * We have not seen the end of
1763 * this message, so we can not
1766 priq
->msgq_flags
|= MSGQ_MSG_NOTDONE
;
1769 mend
->m_flags
&= ~(M_EOR
);
1772 /* propogate the next msg pointer */
1773 mqhead
->m_nextpkt
= m
->m_nextpkt
;
1775 priq
->msgq_head
= mqhead
;
1778 * if the lastmsg pointer points to
1779 * the mbuf that is being dequeued, update
1780 * it to point to the new head.
1782 if (priq
->msgq_lastmsg
== m
) {
1783 priq
->msgq_lastmsg
= priq
->msgq_head
;
1786 m
->m_nextpkt
= NULL
;
1787 mend
->m_next
= NULL
;
1789 if (priq
->msgq_head
== NULL
) {
1790 /* Moved all messages, update tail */
1791 priq
->msgq_tail
= NULL
;
1792 VERIFY(priq
->msgq_lastmsg
== NULL
);
1795 /* Move it to serial sb_mb queue */
1796 if (so
->so_snd
.sb_mb
== NULL
) {
1797 so
->so_snd
.sb_mb
= m
;
1799 so
->so_snd
.sb_mbtail
->m_next
= m
;
1802 priq
->msgq_bytes
-= bytes
;
1803 VERIFY(priq
->msgq_bytes
>= 0);
1804 sbwakeup(&so
->so_snd
);
1806 so
->so_msg_state
->msg_serial_bytes
+= bytes
;
1807 so
->so_snd
.sb_mbtail
= mend
;
1808 so
->so_snd
.sb_lastrecord
= so
->so_snd
.sb_mb
;
1811 (off
+ len
) - so
->so_msg_state
->msg_serial_bytes
;
1813 if (priq
->msgq_flags
& MSGQ_MSG_NOTDONE
) {
1820 sblastrecordchk(&so
->so_snd
, "sbpull_unordered_data");
1821 sblastmbufchk(&so
->so_snd
, "sbpull_unordered_data");
1825 * Compress mbuf chain m into the socket
1826 * buffer sb following mbuf n. If n
1827 * is null, the buffer is presumed empty.
1830 sbcompress(struct sockbuf
*sb
, struct mbuf
*m
, struct mbuf
*n
)
1832 int eor
= 0, compress
= (!(sb
->sb_flags
& SB_NOCOMPRESS
));
1836 /* There is nothing to compress; just update the tail */
1837 for (; n
->m_next
!= NULL
; n
= n
->m_next
) {
1845 eor
|= m
->m_flags
& M_EOR
;
1846 if (compress
&& m
->m_len
== 0 && (eor
== 0 ||
1847 (((o
= m
->m_next
) || (o
= n
)) && o
->m_type
== m
->m_type
))) {
1848 if (sb
->sb_lastrecord
== m
) {
1849 sb
->sb_lastrecord
= m
->m_next
;
1854 if (compress
&& n
!= NULL
&& (n
->m_flags
& M_EOR
) == 0 &&
1858 m
->m_len
<= MCLBYTES
/ 4 && /* XXX: Don't copy too much */
1859 m
->m_len
<= M_TRAILINGSPACE(n
) &&
1860 n
->m_type
== m
->m_type
) {
1861 bcopy(mtod(m
, caddr_t
), mtod(n
, caddr_t
) + n
->m_len
,
1862 (unsigned)m
->m_len
);
1863 n
->m_len
+= m
->m_len
;
1864 sb
->sb_cc
+= m
->m_len
;
1865 if (m
->m_type
!= MT_DATA
&& m
->m_type
!= MT_HEADER
&&
1866 m
->m_type
!= MT_OOBDATA
) {
1867 /* XXX: Probably don't need */
1868 sb
->sb_ctl
+= m
->m_len
;
1871 /* update send byte count */
1872 if (sb
->sb_flags
& SB_SNDBYTE_CNT
) {
1873 inp_incr_sndbytes_total(sb
->sb_so
,
1875 inp_incr_sndbytes_unsent(sb
->sb_so
,
1889 m
->m_flags
&= ~M_EOR
;
1897 printf("semi-panic: sbcompress\n");
1901 SBLASTMBUFCHK(sb
, __func__
);
1902 postevent(0, sb
, EV_RWBYTES
);
1906 sb_empty_assert(struct sockbuf
*sb
, const char *where
)
1908 if (!(sb
->sb_cc
== 0 && sb
->sb_mb
== NULL
&& sb
->sb_mbcnt
== 0 &&
1909 sb
->sb_mbtail
== NULL
&& sb
->sb_lastrecord
== NULL
)) {
1910 panic("%s: sb %p so %p cc %d mbcnt %d mb %p mbtail %p "
1911 "lastrecord %p\n", where
, sb
, sb
->sb_so
, sb
->sb_cc
,
1912 sb
->sb_mbcnt
, sb
->sb_mb
, sb
->sb_mbtail
,
1919 sbflush_priq(struct msg_priq
*priq
)
1922 m
= priq
->msgq_head
;
1926 priq
->msgq_head
= priq
->msgq_tail
= priq
->msgq_lastmsg
= NULL
;
1927 priq
->msgq_bytes
= priq
->msgq_flags
= 0;
1931 * Free all mbufs in a sockbuf.
1932 * Check that all resources are reclaimed.
1935 sbflush(struct sockbuf
*sb
)
1937 void *lr_saved
= __builtin_return_address(0);
1938 struct socket
*so
= sb
->sb_so
;
1941 /* so_usecount may be 0 if we get here from sofreelastref() */
1943 panic("%s: null so, sb=%p sb_flags=0x%x lr=%p\n",
1944 __func__
, sb
, sb
->sb_flags
, lr_saved
);
1946 } else if (so
->so_usecount
< 0) {
1947 panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
1948 "lrh= %s\n", __func__
, sb
, sb
->sb_flags
, so
,
1949 so
->so_usecount
, lr_saved
, solockhistory_nr(so
));
1954 * Obtain lock on the socket buffer (SB_LOCK). This is required
1955 * to prevent the socket buffer from being unexpectedly altered
1956 * while it is used by another thread in socket send/receive.
1958 * sblock() must not fail here, hence the assertion.
1960 (void) sblock(sb
, SBL_WAIT
| SBL_NOINTR
| SBL_IGNDEFUNCT
);
1961 VERIFY(sb
->sb_flags
& SB_LOCK
);
1963 while (sb
->sb_mbcnt
> 0) {
1965 * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
1966 * we would loop forever. Panic instead.
1968 if (!sb
->sb_cc
&& (sb
->sb_mb
== NULL
|| sb
->sb_mb
->m_len
)) {
1971 sbdrop(sb
, (int)sb
->sb_cc
);
1974 if (!(sb
->sb_flags
& SB_RECV
) && (so
->so_flags
& SOF_ENABLE_MSGS
)) {
1975 VERIFY(so
->so_msg_state
!= NULL
);
1976 for (i
= MSG_PRI_MIN
; i
<= MSG_PRI_MAX
; ++i
) {
1977 sbflush_priq(&so
->so_msg_state
->msg_priq
[i
]);
1979 so
->so_msg_state
->msg_serial_bytes
= 0;
1980 so
->so_msg_state
->msg_uno_bytes
= 0;
1983 sb_empty_assert(sb
, __func__
);
1984 postevent(0, sb
, EV_RWBYTES
);
1986 sbunlock(sb
, TRUE
); /* keep socket locked */
1990 * Drop data from (the front of) a sockbuf.
1991 * use m_freem_list to free the mbuf structures
1992 * under a single lock... this is done by pruning
1993 * the top of the tree from the body by keeping track
1994 * of where we get to in the tree and then zeroing the
1995 * two pertinent pointers m_nextpkt and m_next
1996 * the socket buffer is then updated to point at the new
1997 * top of the tree and the pruned area is released via
2001 sbdrop(struct sockbuf
*sb
, int len
)
2003 struct mbuf
*m
, *free_list
, *ml
;
2004 struct mbuf
*next
, *last
;
2006 next
= (m
= sb
->sb_mb
) ? m
->m_nextpkt
: 0;
2008 if (m
!= NULL
&& len
> 0 && !(sb
->sb_flags
& SB_RECV
) &&
2009 ((sb
->sb_so
->so_flags
& SOF_MP_SUBFLOW
) ||
2010 (SOCK_CHECK_DOM(sb
->sb_so
, PF_MULTIPATH
) &&
2011 SOCK_CHECK_PROTO(sb
->sb_so
, IPPROTO_TCP
))) &&
2012 !(sb
->sb_so
->so_flags1
& SOF1_POST_FALLBACK_SYNC
)) {
2013 mptcp_preproc_sbdrop(sb
->sb_so
, m
, (unsigned int)len
);
2015 if (m
!= NULL
&& len
> 0 && !(sb
->sb_flags
& SB_RECV
) &&
2016 (sb
->sb_so
->so_flags
& SOF_MP_SUBFLOW
) &&
2017 (sb
->sb_so
->so_flags1
& SOF1_POST_FALLBACK_SYNC
)) {
2018 mptcp_fallback_sbdrop(sb
->sb_so
, m
, len
);
2021 KERNEL_DEBUG((DBG_FNC_SBDROP
| DBG_FUNC_START
), sb
, len
, 0, 0, 0);
2023 free_list
= last
= m
;
2024 ml
= (struct mbuf
*)0;
2030 * temporarily replacing this panic with printf
2031 * because it occurs occasionally when closing
2032 * a socket when there is no harm in ignoring
2033 * it. This problem will be investigated
2036 /* panic("sbdrop"); */
2037 printf("sbdrop - count not zero\n");
2040 * zero the counts. if we have no mbufs,
2041 * we have no data (PR-2986815)
2045 if (!(sb
->sb_flags
& SB_RECV
) &&
2046 (sb
->sb_so
->so_flags
& SOF_ENABLE_MSGS
)) {
2047 sb
->sb_so
->so_msg_state
->
2048 msg_serial_bytes
= 0;
2053 next
= m
->m_nextpkt
;
2056 if (m
->m_len
> len
) {
2060 /* update the send byte count */
2061 if (sb
->sb_flags
& SB_SNDBYTE_CNT
) {
2062 inp_decr_sndbytes_total(sb
->sb_so
, len
);
2064 if (m
->m_type
!= MT_DATA
&& m
->m_type
!= MT_HEADER
&&
2065 m
->m_type
!= MT_OOBDATA
) {
2076 while (m
&& m
->m_len
== 0) {
2083 ml
->m_next
= (struct mbuf
*)0;
2084 last
->m_nextpkt
= (struct mbuf
*)0;
2085 m_freem_list(free_list
);
2089 m
->m_nextpkt
= next
;
2095 * First part is an inline SB_EMPTY_FIXUP(). Second part
2096 * makes sure sb_lastrecord is up-to-date if we dropped
2097 * part of the last record.
2101 sb
->sb_mbtail
= NULL
;
2102 sb
->sb_lastrecord
= NULL
;
2103 } else if (m
->m_nextpkt
== NULL
) {
2104 sb
->sb_lastrecord
= m
;
2108 cfil_sock_buf_update(sb
);
2109 #endif /* CONTENT_FILTER */
2111 postevent(0, sb
, EV_RWBYTES
);
2113 KERNEL_DEBUG((DBG_FNC_SBDROP
| DBG_FUNC_END
), sb
, 0, 0, 0, 0);
2117 * Drop a record off the front of a sockbuf
2118 * and move the next record to the front.
2121 sbdroprecord(struct sockbuf
*sb
)
2123 struct mbuf
*m
, *mn
;
2127 sb
->sb_mb
= m
->m_nextpkt
;
2135 postevent(0, sb
, EV_RWBYTES
);
2139 * Create a "control" mbuf containing the specified data
2140 * with the specified type for presentation on a socket buffer.
2143 sbcreatecontrol(caddr_t p
, int size
, int type
, int level
)
2148 if (CMSG_SPACE((u_int
)size
) > MLEN
) {
2149 return (struct mbuf
*)NULL
;
2151 if ((m
= m_get(M_DONTWAIT
, MT_CONTROL
)) == NULL
) {
2152 return (struct mbuf
*)NULL
;
2154 cp
= mtod(m
, struct cmsghdr
*);
2155 VERIFY(IS_P2ALIGNED(cp
, sizeof(u_int32_t
)));
2156 /* XXX check size? */
2157 (void) memcpy(CMSG_DATA(cp
), p
, size
);
2158 m
->m_len
= CMSG_SPACE(size
);
2159 cp
->cmsg_len
= CMSG_LEN(size
);
2160 cp
->cmsg_level
= level
;
2161 cp
->cmsg_type
= type
;
2166 sbcreatecontrol_mbuf(caddr_t p
, int size
, int type
, int level
, struct mbuf
**mp
)
2172 *mp
= sbcreatecontrol(p
, size
, type
, level
);
2176 if (CMSG_SPACE((u_int
)size
) + (*mp
)->m_len
> MLEN
) {
2177 mp
= &(*mp
)->m_next
;
2178 *mp
= sbcreatecontrol(p
, size
, type
, level
);
2184 cp
= (struct cmsghdr
*)(void *)(mtod(m
, char *) + m
->m_len
);
2185 /* CMSG_SPACE ensures 32-bit alignment */
2186 VERIFY(IS_P2ALIGNED(cp
, sizeof(u_int32_t
)));
2187 m
->m_len
+= CMSG_SPACE(size
);
2189 /* XXX check size? */
2190 (void) memcpy(CMSG_DATA(cp
), p
, size
);
2191 cp
->cmsg_len
= CMSG_LEN(size
);
2192 cp
->cmsg_level
= level
;
2193 cp
->cmsg_type
= type
;
2200 * Some routines that return EOPNOTSUPP for entry points that are not
2201 * supported by a protocol. Fill in as needed.
2204 pru_abort_notsupp(struct socket
*so
)
2211 pru_accept_notsupp(struct socket
*so
, struct sockaddr
**nam
)
2213 #pragma unused(so, nam)
2218 pru_attach_notsupp(struct socket
*so
, int proto
, struct proc
*p
)
2220 #pragma unused(so, proto, p)
2225 pru_bind_notsupp(struct socket
*so
, struct sockaddr
*nam
, struct proc
*p
)
2227 #pragma unused(so, nam, p)
2232 pru_connect_notsupp(struct socket
*so
, struct sockaddr
*nam
, struct proc
*p
)
2234 #pragma unused(so, nam, p)
2239 pru_connect2_notsupp(struct socket
*so1
, struct socket
*so2
)
2241 #pragma unused(so1, so2)
2246 pru_connectx_notsupp(struct socket
*so
, struct sockaddr
*src
,
2247 struct sockaddr
*dst
, struct proc
*p
, uint32_t ifscope
,
2248 sae_associd_t aid
, sae_connid_t
*pcid
, uint32_t flags
, void *arg
,
2249 uint32_t arglen
, struct uio
*uio
, user_ssize_t
*bytes_written
)
2251 #pragma unused(so, src, dst, p, ifscope, aid, pcid, flags, arg, arglen, uio, bytes_written)
2256 pru_control_notsupp(struct socket
*so
, u_long cmd
, caddr_t data
,
2257 struct ifnet
*ifp
, struct proc
*p
)
2259 #pragma unused(so, cmd, data, ifp, p)
2264 pru_detach_notsupp(struct socket
*so
)
2271 pru_disconnect_notsupp(struct socket
*so
)
2278 pru_disconnectx_notsupp(struct socket
*so
, sae_associd_t aid
, sae_connid_t cid
)
2280 #pragma unused(so, aid, cid)
2285 pru_listen_notsupp(struct socket
*so
, struct proc
*p
)
2287 #pragma unused(so, p)
2292 pru_peeraddr_notsupp(struct socket
*so
, struct sockaddr
**nam
)
2294 #pragma unused(so, nam)
2299 pru_rcvd_notsupp(struct socket
*so
, int flags
)
2301 #pragma unused(so, flags)
2306 pru_rcvoob_notsupp(struct socket
*so
, struct mbuf
*m
, int flags
)
2308 #pragma unused(so, m, flags)
2313 pru_send_notsupp(struct socket
*so
, int flags
, struct mbuf
*m
,
2314 struct sockaddr
*addr
, struct mbuf
*control
, struct proc
*p
)
2316 #pragma unused(so, flags, m, addr, control, p)
2321 pru_send_list_notsupp(struct socket
*so
, int flags
, struct mbuf
*m
,
2322 struct sockaddr
*addr
, struct mbuf
*control
, struct proc
*p
)
2324 #pragma unused(so, flags, m, addr, control, p)
2329 * This isn't really a ``null'' operation, but it's the default one
2330 * and doesn't do anything destructive.
2333 pru_sense_null(struct socket
*so
, void *ub
, int isstat64
)
2335 if (isstat64
!= 0) {
2336 struct stat64
*sb64
;
2338 sb64
= (struct stat64
*)ub
;
2339 sb64
->st_blksize
= so
->so_snd
.sb_hiwat
;
2343 sb
= (struct stat
*)ub
;
2344 sb
->st_blksize
= so
->so_snd
.sb_hiwat
;
2352 pru_sosend_notsupp(struct socket
*so
, struct sockaddr
*addr
, struct uio
*uio
,
2353 struct mbuf
*top
, struct mbuf
*control
, int flags
)
2355 #pragma unused(so, addr, uio, top, control, flags)
2360 pru_sosend_list_notsupp(struct socket
*so
, struct uio
**uio
,
2361 u_int uiocnt
, int flags
)
2363 #pragma unused(so, uio, uiocnt, flags)
2368 pru_soreceive_notsupp(struct socket
*so
, struct sockaddr
**paddr
,
2369 struct uio
*uio
, struct mbuf
**mp0
, struct mbuf
**controlp
, int *flagsp
)
2371 #pragma unused(so, paddr, uio, mp0, controlp, flagsp)
2376 pru_soreceive_list_notsupp(struct socket
*so
,
2377 struct recv_msg_elem
*recv_msg_array
, u_int uiocnt
, int *flagsp
)
2379 #pragma unused(so, recv_msg_array, uiocnt, flagsp)
2384 pru_shutdown_notsupp(struct socket
*so
)
2391 pru_sockaddr_notsupp(struct socket
*so
, struct sockaddr
**nam
)
2393 #pragma unused(so, nam)
2398 pru_sopoll_notsupp(struct socket
*so
, int events
, kauth_cred_t cred
, void *wql
)
2400 #pragma unused(so, events, cred, wql)
2405 pru_socheckopt_null(struct socket
*so
, struct sockopt
*sopt
)
2407 #pragma unused(so, sopt)
2409 * Allow all options for set/get by default.
2415 pru_preconnect_null(struct socket
*so
)
2422 pru_sanitize(struct pr_usrreqs
*pru
)
2424 #define DEFAULT(foo, bar) if ((foo) == NULL) (foo) = (bar)
2425 DEFAULT(pru
->pru_abort
, pru_abort_notsupp
);
2426 DEFAULT(pru
->pru_accept
, pru_accept_notsupp
);
2427 DEFAULT(pru
->pru_attach
, pru_attach_notsupp
);
2428 DEFAULT(pru
->pru_bind
, pru_bind_notsupp
);
2429 DEFAULT(pru
->pru_connect
, pru_connect_notsupp
);
2430 DEFAULT(pru
->pru_connect2
, pru_connect2_notsupp
);
2431 DEFAULT(pru
->pru_connectx
, pru_connectx_notsupp
);
2432 DEFAULT(pru
->pru_control
, pru_control_notsupp
);
2433 DEFAULT(pru
->pru_detach
, pru_detach_notsupp
);
2434 DEFAULT(pru
->pru_disconnect
, pru_disconnect_notsupp
);
2435 DEFAULT(pru
->pru_disconnectx
, pru_disconnectx_notsupp
);
2436 DEFAULT(pru
->pru_listen
, pru_listen_notsupp
);
2437 DEFAULT(pru
->pru_peeraddr
, pru_peeraddr_notsupp
);
2438 DEFAULT(pru
->pru_rcvd
, pru_rcvd_notsupp
);
2439 DEFAULT(pru
->pru_rcvoob
, pru_rcvoob_notsupp
);
2440 DEFAULT(pru
->pru_send
, pru_send_notsupp
);
2441 DEFAULT(pru
->pru_send_list
, pru_send_list_notsupp
);
2442 DEFAULT(pru
->pru_sense
, pru_sense_null
);
2443 DEFAULT(pru
->pru_shutdown
, pru_shutdown_notsupp
);
2444 DEFAULT(pru
->pru_sockaddr
, pru_sockaddr_notsupp
);
2445 DEFAULT(pru
->pru_sopoll
, pru_sopoll_notsupp
);
2446 DEFAULT(pru
->pru_soreceive
, pru_soreceive_notsupp
);
2447 DEFAULT(pru
->pru_soreceive_list
, pru_soreceive_list_notsupp
);
2448 DEFAULT(pru
->pru_sosend
, pru_sosend_notsupp
);
2449 DEFAULT(pru
->pru_sosend_list
, pru_sosend_list_notsupp
);
2450 DEFAULT(pru
->pru_socheckopt
, pru_socheckopt_null
);
2451 DEFAULT(pru
->pru_preconnect
, pru_preconnect_null
);
2456 * The following are macros on BSD and functions on Darwin
2460 * Do we need to notify the other side when I/O is possible?
2464 sb_notify(struct sockbuf
*sb
)
2466 return sb
->sb_waiters
> 0 ||
2467 (sb
->sb_flags
& (SB_SEL
| SB_ASYNC
| SB_UPCALL
| SB_KNOTE
));
2471 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
2472 * This is problematical if the fields are unsigned, as the space might
2473 * still be negative (cc > hiwat or mbcnt > mbmax). Should detect
2474 * overflow and return 0.
2477 sbspace(struct sockbuf
*sb
)
2480 int space
= imin((int)(sb
->sb_hiwat
- sb
->sb_cc
),
2481 (int)(sb
->sb_mbmax
- sb
->sb_mbcnt
));
2483 if (sb
->sb_preconn_hiwat
!= 0) {
2484 space
= imin((int)(sb
->sb_preconn_hiwat
- sb
->sb_cc
), space
);
2491 /* Compensate for data being processed by content filters */
2493 pending
= cfil_sock_data_space(sb
);
2494 #endif /* CONTENT_FILTER */
2495 if (pending
> space
) {
2505 * If this socket has priority queues, check if there is enough
2506 * space in the priority queue for this msg.
2509 msgq_sbspace(struct socket
*so
, struct mbuf
*control
)
2511 int space
= 0, error
;
2512 u_int32_t msgpri
= 0;
2513 VERIFY(so
->so_type
== SOCK_STREAM
&&
2514 SOCK_PROTO(so
) == IPPROTO_TCP
);
2515 if (control
!= NULL
) {
2516 error
= tcp_get_msg_priority(control
, &msgpri
);
2523 space
= (so
->so_snd
.sb_idealsize
/ MSG_PRI_COUNT
) -
2524 so
->so_msg_state
->msg_priq
[msgpri
].msgq_bytes
;
2531 /* do we have to send all at once on a socket? */
2533 sosendallatonce(struct socket
*so
)
2535 return so
->so_proto
->pr_flags
& PR_ATOMIC
;
2538 /* can we read something from so? */
2540 soreadable(struct socket
*so
)
2542 return so
->so_rcv
.sb_cc
>= so
->so_rcv
.sb_lowat
||
2543 ((so
->so_state
& SS_CANTRCVMORE
)
2545 && cfil_sock_data_pending(&so
->so_rcv
) == 0
2546 #endif /* CONTENT_FILTER */
2548 so
->so_comp
.tqh_first
|| so
->so_error
;
2551 /* can we write something to so? */
2554 sowriteable(struct socket
*so
)
2556 if ((so
->so_state
& SS_CANTSENDMORE
) ||
2560 if (so_wait_for_if_feedback(so
) || !socanwrite(so
)) {
2563 if (so
->so_flags1
& SOF1_PRECONNECT_DATA
) {
2567 if (sbspace(&(so
)->so_snd
) >= (so
)->so_snd
.sb_lowat
) {
2568 if (so
->so_flags
& SOF_NOTSENT_LOWAT
) {
2569 if ((SOCK_DOM(so
) == PF_INET6
||
2570 SOCK_DOM(so
) == PF_INET
) &&
2571 so
->so_type
== SOCK_STREAM
) {
2572 return tcp_notsent_lowat_check(so
);
2575 else if ((SOCK_DOM(so
) == PF_MULTIPATH
) &&
2576 (SOCK_PROTO(so
) == IPPROTO_TCP
)) {
2577 return mptcp_notsent_lowat_check(so
);
2590 /* adjust counters in sb reflecting allocation of m */
2593 sballoc(struct sockbuf
*sb
, struct mbuf
*m
)
2596 sb
->sb_cc
+= m
->m_len
;
2597 if (m
->m_type
!= MT_DATA
&& m
->m_type
!= MT_HEADER
&&
2598 m
->m_type
!= MT_OOBDATA
) {
2599 sb
->sb_ctl
+= m
->m_len
;
2601 sb
->sb_mbcnt
+= MSIZE
;
2603 if (m
->m_flags
& M_EXT
) {
2604 sb
->sb_mbcnt
+= m
->m_ext
.ext_size
;
2605 cnt
+= (m
->m_ext
.ext_size
>> MSIZESHIFT
);
2607 OSAddAtomic(cnt
, &total_sbmb_cnt
);
2608 VERIFY(total_sbmb_cnt
> 0);
2609 if (total_sbmb_cnt
> total_sbmb_cnt_peak
) {
2610 total_sbmb_cnt_peak
= total_sbmb_cnt
;
2614 * If data is being added to the send socket buffer,
2615 * update the send byte count
2617 if (sb
->sb_flags
& SB_SNDBYTE_CNT
) {
2618 inp_incr_sndbytes_total(sb
->sb_so
, m
->m_len
);
2619 inp_incr_sndbytes_unsent(sb
->sb_so
, m
->m_len
);
2623 /* adjust counters in sb reflecting freeing of m */
2625 sbfree(struct sockbuf
*sb
, struct mbuf
*m
)
2629 sb
->sb_cc
-= m
->m_len
;
2630 if (m
->m_type
!= MT_DATA
&& m
->m_type
!= MT_HEADER
&&
2631 m
->m_type
!= MT_OOBDATA
) {
2632 sb
->sb_ctl
-= m
->m_len
;
2634 sb
->sb_mbcnt
-= MSIZE
;
2635 if (m
->m_flags
& M_EXT
) {
2636 sb
->sb_mbcnt
-= m
->m_ext
.ext_size
;
2637 cnt
-= (m
->m_ext
.ext_size
>> MSIZESHIFT
);
2639 OSAddAtomic(cnt
, &total_sbmb_cnt
);
2640 VERIFY(total_sbmb_cnt
>= 0);
2641 if (total_sbmb_cnt
< total_sbmb_cnt_floor
) {
2642 total_sbmb_cnt_floor
= total_sbmb_cnt
;
2646 * If data is being removed from the send socket buffer,
2647 * update the send byte count
2649 if (sb
->sb_flags
& SB_SNDBYTE_CNT
) {
2650 inp_decr_sndbytes_total(sb
->sb_so
, m
->m_len
);
2655 * Set lock on sockbuf sb; sleep if lock is already held.
2656 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
2657 * Returns error without lock if sleep is interrupted.
2660 sblock(struct sockbuf
*sb
, uint32_t flags
)
2662 boolean_t nointr
= ((sb
->sb_flags
& SB_NOINTR
) || (flags
& SBL_NOINTR
));
2663 void *lr_saved
= __builtin_return_address(0);
2664 struct socket
*so
= sb
->sb_so
;
2667 thread_t tp
= current_thread();
2669 VERIFY((flags
& SBL_VALID
) == flags
);
2671 /* so_usecount may be 0 if we get here from sofreelastref() */
2673 panic("%s: null so, sb=%p sb_flags=0x%x lr=%p\n",
2674 __func__
, sb
, sb
->sb_flags
, lr_saved
);
2676 } else if (so
->so_usecount
< 0) {
2677 panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
2678 "lrh= %s\n", __func__
, sb
, sb
->sb_flags
, so
,
2679 so
->so_usecount
, lr_saved
, solockhistory_nr(so
));
2684 * The content filter thread must hold the sockbuf lock
2686 if ((so
->so_flags
& SOF_CONTENT_FILTER
) && sb
->sb_cfil_thread
== tp
) {
2688 * Don't panic if we are defunct because SB_LOCK has
2689 * been cleared by sodefunct()
2691 if (!(so
->so_flags
& SOF_DEFUNCT
) && !(sb
->sb_flags
& SB_LOCK
)) {
2692 panic("%s: SB_LOCK not held for %p\n",
2696 /* Keep the sockbuf locked */
2700 if ((sb
->sb_flags
& SB_LOCK
) && !(flags
& SBL_WAIT
)) {
2704 * We may get here from sorflush(), in which case "sb" may not
2705 * point to the real socket buffer. Use the actual socket buffer
2706 * address from the socket instead.
2708 wchan
= (sb
->sb_flags
& SB_RECV
) ?
2709 &so
->so_rcv
.sb_flags
: &so
->so_snd
.sb_flags
;
2712 * A content filter thread has exclusive access to the sockbuf
2713 * until it clears the
2715 while ((sb
->sb_flags
& SB_LOCK
) ||
2716 ((so
->so_flags
& SOF_CONTENT_FILTER
) &&
2717 sb
->sb_cfil_thread
!= NULL
)) {
2718 lck_mtx_t
*mutex_held
;
2721 * XXX: This code should be moved up above outside of this loop;
2722 * however, we may get here as part of sofreelastref(), and
2723 * at that time pr_getlock() may no longer be able to return
2724 * us the lock. This will be fixed in future.
2726 if (so
->so_proto
->pr_getlock
!= NULL
) {
2727 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
);
2729 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
2732 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
2735 VERIFY(sb
->sb_wantlock
!= 0);
2737 error
= msleep(wchan
, mutex_held
,
2738 nointr
? PSOCK
: PSOCK
| PCATCH
,
2739 nointr
? "sb_lock_nointr" : "sb_lock", NULL
);
2741 VERIFY(sb
->sb_wantlock
!= 0);
2744 if (error
== 0 && (so
->so_flags
& SOF_DEFUNCT
) &&
2745 !(flags
& SBL_IGNDEFUNCT
)) {
2747 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] "
2748 "(%d)\n", __func__
, proc_selfpid(),
2749 proc_best_name(current_proc()),
2750 (uint64_t)VM_KERNEL_ADDRPERM(so
),
2751 SOCK_DOM(so
), SOCK_TYPE(so
), error
);
2758 sb
->sb_flags
|= SB_LOCK
;
2763 * Release lock on sockbuf sb
2766 sbunlock(struct sockbuf
*sb
, boolean_t keeplocked
)
2768 void *lr_saved
= __builtin_return_address(0);
2769 struct socket
*so
= sb
->sb_so
;
2770 thread_t tp
= current_thread();
2772 /* so_usecount may be 0 if we get here from sofreelastref() */
2774 panic("%s: null so, sb=%p sb_flags=0x%x lr=%p\n",
2775 __func__
, sb
, sb
->sb_flags
, lr_saved
);
2777 } else if (so
->so_usecount
< 0) {
2778 panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
2779 "lrh= %s\n", __func__
, sb
, sb
->sb_flags
, so
,
2780 so
->so_usecount
, lr_saved
, solockhistory_nr(so
));
2785 * The content filter thread must hold the sockbuf lock
2787 if ((so
->so_flags
& SOF_CONTENT_FILTER
) && sb
->sb_cfil_thread
== tp
) {
2789 * Don't panic if we are defunct because SB_LOCK has
2790 * been cleared by sodefunct()
2792 if (!(so
->so_flags
& SOF_DEFUNCT
) &&
2793 !(sb
->sb_flags
& SB_LOCK
) &&
2794 !(so
->so_state
& SS_DEFUNCT
) &&
2795 !(so
->so_flags1
& SOF1_DEFUNCTINPROG
)) {
2796 panic("%s: SB_LOCK not held for %p\n",
2799 /* Keep the sockbuf locked and proceed */
2801 VERIFY((sb
->sb_flags
& SB_LOCK
) ||
2802 (so
->so_state
& SS_DEFUNCT
) ||
2803 (so
->so_flags1
& SOF1_DEFUNCTINPROG
));
2805 sb
->sb_flags
&= ~SB_LOCK
;
2807 if (sb
->sb_wantlock
> 0) {
2809 * We may get here from sorflush(), in which case "sb"
2810 * may not point to the real socket buffer. Use the
2811 * actual socket buffer address from the socket instead.
2813 wakeup((sb
->sb_flags
& SB_RECV
) ? &so
->so_rcv
.sb_flags
:
2814 &so
->so_snd
.sb_flags
);
2818 if (!keeplocked
) { /* unlock on exit */
2819 if (so
->so_flags
& SOF_MP_SUBFLOW
|| SOCK_DOM(so
) == PF_MULTIPATH
) {
2820 (*so
->so_proto
->pr_unlock
)(so
, 1, lr_saved
);
2822 lck_mtx_t
*mutex_held
;
2824 if (so
->so_proto
->pr_getlock
!= NULL
) {
2825 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
);
2827 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
2830 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
2832 VERIFY(so
->so_usecount
> 0);
2834 so
->unlock_lr
[so
->next_unlock_lr
] = lr_saved
;
2835 so
->next_unlock_lr
= (so
->next_unlock_lr
+ 1) % SO_LCKDBG_MAX
;
2836 lck_mtx_unlock(mutex_held
);
2842 sorwakeup(struct socket
*so
)
2844 if (sb_notify(&so
->so_rcv
)) {
2845 sowakeup(so
, &so
->so_rcv
, NULL
);
2850 sowwakeup(struct socket
*so
)
2852 if (sb_notify(&so
->so_snd
)) {
2853 sowakeup(so
, &so
->so_snd
, NULL
);
2858 soevent(struct socket
*so
, long hint
)
2860 if (so
->so_flags
& SOF_KNOTE
) {
2861 KNOTE(&so
->so_klist
, hint
);
2864 soevupcall(so
, hint
);
2867 * Don't post an event if this a subflow socket or
2868 * the app has opted out of using cellular interface
2870 if ((hint
& SO_FILT_HINT_IFDENIED
) &&
2871 !(so
->so_flags
& SOF_MP_SUBFLOW
) &&
2872 !(so
->so_restrictions
& SO_RESTRICT_DENY_CELLULAR
) &&
2873 !(so
->so_restrictions
& SO_RESTRICT_DENY_EXPENSIVE
) &&
2874 !(so
->so_restrictions
& SO_RESTRICT_DENY_CONSTRAINED
)) {
2875 soevent_ifdenied(so
);
2880 soevupcall(struct socket
*so
, u_int32_t hint
)
2882 if (so
->so_event
!= NULL
) {
2883 caddr_t so_eventarg
= so
->so_eventarg
;
2885 hint
&= so
->so_eventmask
;
2887 so
->so_event(so
, so_eventarg
, hint
);
2893 soevent_ifdenied(struct socket
*so
)
2895 struct kev_netpolicy_ifdenied ev_ifdenied
;
2897 bzero(&ev_ifdenied
, sizeof(ev_ifdenied
));
2899 * The event consumer is interested about the effective {upid,pid,uuid}
2900 * info which can be different than the those related to the process
2901 * that recently performed a system call on the socket, i.e. when the
2902 * socket is delegated.
2904 if (so
->so_flags
& SOF_DELEGATED
) {
2905 ev_ifdenied
.ev_data
.eupid
= so
->e_upid
;
2906 ev_ifdenied
.ev_data
.epid
= so
->e_pid
;
2907 uuid_copy(ev_ifdenied
.ev_data
.euuid
, so
->e_uuid
);
2909 ev_ifdenied
.ev_data
.eupid
= so
->last_upid
;
2910 ev_ifdenied
.ev_data
.epid
= so
->last_pid
;
2911 uuid_copy(ev_ifdenied
.ev_data
.euuid
, so
->last_uuid
);
2914 if (++so
->so_ifdenied_notifies
> 1) {
2916 * Allow for at most one kernel event to be generated per
2917 * socket; so_ifdenied_notifies is reset upon changes in
2918 * the UUID policy. See comments in inp_update_policy.
2920 if (net_io_policy_log
) {
2923 uuid_unparse(ev_ifdenied
.ev_data
.euuid
, buf
);
2924 log(LOG_DEBUG
, "%s[%d]: so 0x%llx [%d,%d] epid %d "
2925 "euuid %s%s has %d redundant events supressed\n",
2926 __func__
, so
->last_pid
,
2927 (uint64_t)VM_KERNEL_ADDRPERM(so
), SOCK_DOM(so
),
2928 SOCK_TYPE(so
), ev_ifdenied
.ev_data
.epid
, buf
,
2929 ((so
->so_flags
& SOF_DELEGATED
) ?
2930 " [delegated]" : ""), so
->so_ifdenied_notifies
);
2933 if (net_io_policy_log
) {
2936 uuid_unparse(ev_ifdenied
.ev_data
.euuid
, buf
);
2937 log(LOG_DEBUG
, "%s[%d]: so 0x%llx [%d,%d] epid %d "
2938 "euuid %s%s event posted\n", __func__
,
2939 so
->last_pid
, (uint64_t)VM_KERNEL_ADDRPERM(so
),
2940 SOCK_DOM(so
), SOCK_TYPE(so
),
2941 ev_ifdenied
.ev_data
.epid
, buf
,
2942 ((so
->so_flags
& SOF_DELEGATED
) ?
2943 " [delegated]" : ""));
2945 netpolicy_post_msg(KEV_NETPOLICY_IFDENIED
, &ev_ifdenied
.ev_data
,
2946 sizeof(ev_ifdenied
));
2951 * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
2954 dup_sockaddr(struct sockaddr
*sa
, int canwait
)
2956 struct sockaddr
*sa2
;
2958 MALLOC(sa2
, struct sockaddr
*, sa
->sa_len
, M_SONAME
,
2959 canwait
? M_WAITOK
: M_NOWAIT
);
2961 bcopy(sa
, sa2
, sa
->sa_len
);
2967 * Create an external-format (``xsocket'') structure using the information
2968 * in the kernel-format socket structure pointed to by so. This is done
2969 * to reduce the spew of irrelevant information over this interface,
2970 * to isolate user code from changes in the kernel structure, and
2971 * potentially to provide information-hiding if we decide that
2972 * some of this information should be hidden from users.
2975 sotoxsocket(struct socket
*so
, struct xsocket
*xso
)
2977 xso
->xso_len
= sizeof(*xso
);
2978 xso
->xso_so
= (_XSOCKET_PTR(struct socket
*))VM_KERNEL_ADDRPERM(so
);
2979 xso
->so_type
= so
->so_type
;
2980 xso
->so_options
= (short)(so
->so_options
& 0xffff);
2981 xso
->so_linger
= so
->so_linger
;
2982 xso
->so_state
= so
->so_state
;
2983 xso
->so_pcb
= (_XSOCKET_PTR(caddr_t
))VM_KERNEL_ADDRPERM(so
->so_pcb
);
2985 xso
->xso_protocol
= SOCK_PROTO(so
);
2986 xso
->xso_family
= SOCK_DOM(so
);
2988 xso
->xso_protocol
= xso
->xso_family
= 0;
2990 xso
->so_qlen
= so
->so_qlen
;
2991 xso
->so_incqlen
= so
->so_incqlen
;
2992 xso
->so_qlimit
= so
->so_qlimit
;
2993 xso
->so_timeo
= so
->so_timeo
;
2994 xso
->so_error
= so
->so_error
;
2995 xso
->so_pgid
= so
->so_pgid
;
2996 xso
->so_oobmark
= so
->so_oobmark
;
2997 sbtoxsockbuf(&so
->so_snd
, &xso
->so_snd
);
2998 sbtoxsockbuf(&so
->so_rcv
, &xso
->so_rcv
);
2999 xso
->so_uid
= kauth_cred_getuid(so
->so_cred
);
3003 #if !CONFIG_EMBEDDED
3006 sotoxsocket64(struct socket
*so
, struct xsocket64
*xso
)
3008 xso
->xso_len
= sizeof(*xso
);
3009 xso
->xso_so
= (u_int64_t
)VM_KERNEL_ADDRPERM(so
);
3010 xso
->so_type
= so
->so_type
;
3011 xso
->so_options
= (short)(so
->so_options
& 0xffff);
3012 xso
->so_linger
= so
->so_linger
;
3013 xso
->so_state
= so
->so_state
;
3014 xso
->so_pcb
= (u_int64_t
)VM_KERNEL_ADDRPERM(so
->so_pcb
);
3016 xso
->xso_protocol
= SOCK_PROTO(so
);
3017 xso
->xso_family
= SOCK_DOM(so
);
3019 xso
->xso_protocol
= xso
->xso_family
= 0;
3021 xso
->so_qlen
= so
->so_qlen
;
3022 xso
->so_incqlen
= so
->so_incqlen
;
3023 xso
->so_qlimit
= so
->so_qlimit
;
3024 xso
->so_timeo
= so
->so_timeo
;
3025 xso
->so_error
= so
->so_error
;
3026 xso
->so_pgid
= so
->so_pgid
;
3027 xso
->so_oobmark
= so
->so_oobmark
;
3028 sbtoxsockbuf(&so
->so_snd
, &xso
->so_snd
);
3029 sbtoxsockbuf(&so
->so_rcv
, &xso
->so_rcv
);
3030 xso
->so_uid
= kauth_cred_getuid(so
->so_cred
);
3033 #endif /* !CONFIG_EMBEDDED */
3036 * This does the same for sockbufs. Note that the xsockbuf structure,
3037 * since it is always embedded in a socket, does not include a self
3038 * pointer nor a length. We make this entry point public in case
3039 * some other mechanism needs it.
3042 sbtoxsockbuf(struct sockbuf
*sb
, struct xsockbuf
*xsb
)
3044 xsb
->sb_cc
= sb
->sb_cc
;
3045 xsb
->sb_hiwat
= sb
->sb_hiwat
;
3046 xsb
->sb_mbcnt
= sb
->sb_mbcnt
;
3047 xsb
->sb_mbmax
= sb
->sb_mbmax
;
3048 xsb
->sb_lowat
= sb
->sb_lowat
;
3049 xsb
->sb_flags
= sb
->sb_flags
;
3050 xsb
->sb_timeo
= (short)
3051 (sb
->sb_timeo
.tv_sec
* hz
) + sb
->sb_timeo
.tv_usec
/ tick
;
3052 if (xsb
->sb_timeo
== 0 && sb
->sb_timeo
.tv_usec
!= 0) {
3058 * Based on the policy set by an all knowing decison maker, throttle sockets
3059 * that either have been marked as belonging to "background" process.
3062 soisthrottled(struct socket
*so
)
3064 return so
->so_flags1
& SOF1_TRAFFIC_MGT_SO_BACKGROUND
;
3068 soisprivilegedtraffic(struct socket
*so
)
3070 return (so
->so_flags
& SOF_PRIVILEGED_TRAFFIC_CLASS
) ? 1 : 0;
3074 soissrcbackground(struct socket
*so
)
3076 return (so
->so_flags1
& SOF1_TRAFFIC_MGT_SO_BACKGROUND
) ||
3077 IS_SO_TC_BACKGROUND(so
->so_traffic_class
);
3081 soissrcrealtime(struct socket
*so
)
3083 return so
->so_traffic_class
>= SO_TC_AV
&&
3084 so
->so_traffic_class
<= SO_TC_VO
;
3088 soissrcbesteffort(struct socket
*so
)
3090 return so
->so_traffic_class
== SO_TC_BE
||
3091 so
->so_traffic_class
== SO_TC_RD
||
3092 so
->so_traffic_class
== SO_TC_OAM
;
3096 soclearfastopen(struct socket
*so
)
3098 if (so
->so_flags1
& SOF1_PRECONNECT_DATA
) {
3099 so
->so_flags1
&= ~SOF1_PRECONNECT_DATA
;
3102 if (so
->so_flags1
& SOF1_DATA_IDEMPOTENT
) {
3103 so
->so_flags1
&= ~SOF1_DATA_IDEMPOTENT
;
3108 sonullevent(struct socket
*so
, void *arg
, uint32_t hint
)
3110 #pragma unused(so, arg, hint)
3114 * Here is the definition of some of the basic objects in the kern.ipc
3115 * branch of the MIB.
3117 SYSCTL_NODE(_kern
, KERN_IPC
, ipc
,
3118 CTLFLAG_RW
| CTLFLAG_LOCKED
| CTLFLAG_ANYBODY
, 0, "IPC");
3120 /* Check that the maximum socket buffer size is within a range */
3123 sysctl_sb_max SYSCTL_HANDLER_ARGS
3125 #pragma unused(oidp, arg1, arg2)
3126 u_int32_t new_value
;
3128 int error
= sysctl_io_number(req
, sb_max
, sizeof(u_int32_t
),
3129 &new_value
, &changed
);
3130 if (!error
&& changed
) {
3131 if (new_value
> LOW_SB_MAX
&& new_value
<= high_sb_max
) {
3140 SYSCTL_PROC(_kern_ipc
, KIPC_MAXSOCKBUF
, maxsockbuf
,
3141 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
3142 &sb_max
, 0, &sysctl_sb_max
, "IU", "Maximum socket buffer size");
3144 SYSCTL_INT(_kern_ipc
, KIPC_SOCKBUF_WASTE
, sockbuf_waste_factor
,
3145 CTLFLAG_RW
| CTLFLAG_LOCKED
, &sb_efficiency
, 0, "");
3147 SYSCTL_INT(_kern_ipc
, KIPC_NMBCLUSTERS
, nmbclusters
,
3148 CTLFLAG_RD
| CTLFLAG_LOCKED
, &nmbclusters
, 0, "");
3150 SYSCTL_INT(_kern_ipc
, OID_AUTO
, njcl
,
3151 CTLFLAG_RD
| CTLFLAG_LOCKED
, &njcl
, 0, "");
3153 SYSCTL_INT(_kern_ipc
, OID_AUTO
, njclbytes
,
3154 CTLFLAG_RD
| CTLFLAG_LOCKED
, &njclbytes
, 0, "");
3156 SYSCTL_INT(_kern_ipc
, KIPC_SOQLIMITCOMPAT
, soqlimitcompat
,
3157 CTLFLAG_RW
| CTLFLAG_LOCKED
, &soqlimitcompat
, 1,
3158 "Enable socket queue limit compatibility");
3161 * Hack alert -- rdar://33572856
3162 * A loopback test we cannot change was failing because it sets
3163 * SO_SENDTIMEO to 5 seconds and that's also the value
3164 * of the minimum persist timer. Because of the persist timer,
3165 * the connection was not idle for 5 seconds and SO_SNDTIMEO
3166 * was not triggering at 5 seconds causing the test failure.
3167 * As a workaround we check the sysctl soqlencomp the test is already
3168 * setting to set disable auto tuning of the receive buffer.
3171 extern u_int32_t tcp_do_autorcvbuf
;
3174 sysctl_soqlencomp SYSCTL_HANDLER_ARGS
3176 #pragma unused(oidp, arg1, arg2)
3177 u_int32_t new_value
;
3179 int error
= sysctl_io_number(req
, soqlencomp
, sizeof(u_int32_t
),
3180 &new_value
, &changed
);
3181 if (!error
&& changed
) {
3182 soqlencomp
= new_value
;
3183 if (new_value
!= 0) {
3184 tcp_do_autorcvbuf
= 0;
3185 tcptv_persmin_val
= 6 * TCP_RETRANSHZ
;
3190 SYSCTL_PROC(_kern_ipc
, OID_AUTO
, soqlencomp
,
3191 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
3192 &soqlencomp
, 0, &sysctl_soqlencomp
, "IU", "");
3194 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sbmb_cnt
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
3195 &total_sbmb_cnt
, 0, "");
3196 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sbmb_cnt_peak
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
3197 &total_sbmb_cnt_peak
, 0, "");
3198 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sbmb_cnt_floor
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
3199 &total_sbmb_cnt_floor
, 0, "");
3200 SYSCTL_QUAD(_kern_ipc
, OID_AUTO
, sbmb_limreached
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
3201 &sbmb_limreached
, "");
3204 SYSCTL_NODE(_kern_ipc
, OID_AUTO
, io_policy
, CTLFLAG_RW
, 0, "network IO policy");
3206 SYSCTL_INT(_kern_ipc_io_policy
, OID_AUTO
, log
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
3207 &net_io_policy_log
, 0, "");
3209 #if CONFIG_PROC_UUID_POLICY
3210 SYSCTL_INT(_kern_ipc_io_policy
, OID_AUTO
, uuid
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
3211 &net_io_policy_uuid
, 0, "");
3212 #endif /* CONFIG_PROC_UUID_POLICY */