]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/uipc_socket2.c
xnu-6153.81.5.tar.gz
[apple/xnu.git] / bsd / kern / uipc_socket2.c
CommitLineData
1c79356b 1/*
0a7de745 2 * Copyright (c) 1998-2019 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39236c6e 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39236c6e 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39236c6e 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39236c6e 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b 27 */
1c79356b
A
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1982, 1986, 1988, 1990, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the University of
44 * California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
62 */
2d21ac55
A
63/*
64 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
65 * support for mandatory and extensible security protections. This notice
66 * is included in support of clause 2.2 (b) of the Apple Public License,
67 * Version 2.0.
68 */
1c79356b
A
69
70#include <sys/param.h>
71#include <sys/systm.h>
72#include <sys/domain.h>
73#include <sys/kernel.h>
91447636
A
74#include <sys/proc_internal.h>
75#include <sys/kauth.h>
1c79356b
A
76#include <sys/malloc.h>
77#include <sys/mbuf.h>
316670eb 78#include <sys/mcache.h>
1c79356b
A
79#include <sys/protosw.h>
80#include <sys/stat.h>
81#include <sys/socket.h>
82#include <sys/socketvar.h>
83#include <sys/signalvar.h>
84#include <sys/sysctl.h>
39236c6e 85#include <sys/syslog.h>
cb323159 86#include <sys/unpcb.h>
1c79356b 87#include <sys/ev.h>
91447636
A
88#include <kern/locks.h>
89#include <net/route.h>
fe8ab488 90#include <net/content_filter.h>
91447636
A
91#include <netinet/in.h>
92#include <netinet/in_pcb.h>
5ba3f43e 93#include <netinet/tcp_var.h>
fa4905b1 94#include <sys/kdebug.h>
2d21ac55
A
95#include <libkern/OSAtomic.h>
96
97#if CONFIG_MACF
98#include <security/mac_framework.h>
99#endif
100
316670eb
A
101#include <mach/vm_param.h>
102
fe8ab488
A
103#if MPTCP
104#include <netinet/mptcp_var.h>
105#endif
fa4905b1 106
0a7de745
A
107#define DBG_FNC_SBDROP NETDBG_CODE(DBG_NETSOCK, 4)
108#define DBG_FNC_SBAPPEND NETDBG_CODE(DBG_NETSOCK, 5)
fa4905b1 109
3e170ce0
A
110SYSCTL_DECL(_kern_ipc);
111
112__private_extern__ u_int32_t net_io_policy_throttle_best_effort = 0;
113SYSCTL_INT(_kern_ipc, OID_AUTO, throttle_best_effort,
114 CTLFLAG_RW | CTLFLAG_LOCKED, &net_io_policy_throttle_best_effort, 0, "");
115
2d21ac55
A
116static inline void sbcompress(struct sockbuf *, struct mbuf *, struct mbuf *);
117static struct socket *sonewconn_internal(struct socket *, int);
2d21ac55
A
118static int sbappendcontrol_internal(struct sockbuf *, struct mbuf *,
119 struct mbuf *);
39236c6e 120static void soevent_ifdenied(struct socket *);
fa4905b1 121
1c79356b
A
122/*
123 * Primitive routines for operating on sockets and socket buffers
124 */
2d21ac55
A
125static int soqlimitcompat = 1;
126static int soqlencomp = 0;
1c79356b 127
39236c6e
A
128/*
129 * Based on the number of mbuf clusters configured, high_sb_max and sb_max can
130 * get scaled up or down to suit that memory configuration. high_sb_max is a
131 * higher limit on sb_max that is checked when sb_max gets set through sysctl.
b0d623f7
A
132 */
133
0a7de745
A
134u_int32_t sb_max = SB_MAX; /* XXX should be static */
135u_int32_t high_sb_max = SB_MAX;
1c79356b 136
0a7de745 137static u_int32_t sb_efficiency = 8; /* parameter for sbreserve() */
fe8ab488 138int32_t total_sbmb_cnt __attribute__((aligned(8))) = 0;
39037602 139int32_t total_sbmb_cnt_floor __attribute__((aligned(8))) = 0;
fe8ab488
A
140int32_t total_sbmb_cnt_peak __attribute__((aligned(8))) = 0;
141int64_t sbmb_limreached __attribute__((aligned(8))) = 0;
316670eb 142
0a7de745 143u_int32_t net_io_policy_log = 0; /* log socket policy changes */
39236c6e 144#if CONFIG_PROC_UUID_POLICY
0a7de745 145u_int32_t net_io_policy_uuid = 1; /* enable UUID socket policy */
39236c6e
A
146#endif /* CONFIG_PROC_UUID_POLICY */
147
1c79356b
A
148/*
149 * Procedures to manipulate state flags of socket
150 * and do appropriate wakeups. Normal sequence from the
151 * active (originating) side is that soisconnecting() is
152 * called during processing of connect() call,
153 * resulting in an eventual call to soisconnected() if/when the
154 * connection is established. When the connection is torn down
9bccf70c 155 * soisdisconnecting() is called during processing of disconnect() call,
1c79356b
A
156 * and soisdisconnected() is called when the connection to the peer
157 * is totally severed. The semantics of these routines are such that
158 * connectionless protocols can call soisconnected() and soisdisconnected()
159 * only, bypassing the in-progress calls when setting up a ``connection''
160 * takes no time.
161 *
162 * From the passive side, a socket is created with
e3027f41
A
163 * two queues of sockets: so_incomp for connections in progress
164 * and so_comp for connections already made and awaiting user acceptance.
9bccf70c 165 * As a protocol is preparing incoming connections, it creates a socket
e3027f41 166 * structure queued on so_incomp by calling sonewconn(). When the connection
1c79356b 167 * is established, soisconnected() is called, and transfers the
e3027f41 168 * socket structure to so_comp, making it available to accept().
1c79356b 169 *
9bccf70c 170 * If a socket is closed with sockets on either
e3027f41 171 * so_incomp or so_comp, these sockets are dropped.
9bccf70c 172 *
1c79356b
A
173 * If higher level protocols are implemented in
174 * the kernel, the wakeups done here will sometimes
175 * cause software-interrupt process scheduling.
176 */
1c79356b 177void
2d21ac55 178soisconnecting(struct socket *so)
1c79356b 179{
0a7de745 180 so->so_state &= ~(SS_ISCONNECTED | SS_ISDISCONNECTING);
1c79356b 181 so->so_state |= SS_ISCONNECTING;
2d21ac55 182
91447636 183 sflt_notify(so, sock_evt_connecting, NULL);
1c79356b
A
184}
185
186void
2d21ac55 187soisconnected(struct socket *so)
9bccf70c 188{
cb323159
A
189 /*
190 * If socket is subject to filter and is pending initial verdict,
191 * delay marking socket as connected and do not present the connected
192 * socket to user just yet.
193 */
194 if (cfil_sock_connected_pending_verdict(so)) {
195 return;
196 }
197
0a7de745 198 so->so_state &= ~(SS_ISCONNECTING | SS_ISDISCONNECTING | SS_ISCONFIRMING);
1c79356b 199 so->so_state |= SS_ISCONNECTED;
2d21ac55 200
3e170ce0
A
201 soreserve_preconnect(so, 0);
202
91447636 203 sflt_notify(so, sock_evt_connected, NULL);
2d21ac55 204
d190cdc3
A
205 if (so->so_head != NULL && (so->so_state & SS_INCOMP)) {
206 struct socket *head = so->so_head;
207 int locked = 0;
0a7de745 208
d190cdc3
A
209 /*
210 * Enforce lock order when the protocol has per socket locks
211 */
ff6e181a 212 if (head->so_proto->pr_getlock != NULL) {
91447636 213 socket_lock(head, 1);
813fb2f6 214 so_acquire_accept_list(head, so);
d190cdc3 215 locked = 1;
ff6e181a 216 }
d190cdc3
A
217 if (so->so_head == head && (so->so_state & SS_INCOMP)) {
218 so->so_state &= ~SS_INCOMP;
219 so->so_state |= SS_COMP;
813fb2f6 220 TAILQ_REMOVE(&head->so_incomp, so, so_list);
d190cdc3 221 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
813fb2f6
A
222 head->so_incqlen--;
223
224 /*
225 * We have to release the accept list in
226 * case a socket callback calls sock_accept()
227 */
228 if (locked != 0) {
229 so_release_accept_list(head);
d190cdc3 230 socket_unlock(so, 0);
813fb2f6 231 }
d190cdc3 232 postevent(head, 0, EV_RCONN);
813fb2f6
A
233 sorwakeup(head);
234 wakeup_one((caddr_t)&head->so_timeo);
d190cdc3 235
813fb2f6
A
236 if (locked != 0) {
237 socket_unlock(head, 1);
238 socket_lock(so, 0);
5ba3f43e 239 }
813fb2f6
A
240 } else if (locked != 0) {
241 so_release_accept_list(head);
d190cdc3 242 socket_unlock(head, 1);
813fb2f6 243 }
1c79356b 244 } else {
91447636 245 postevent(so, 0, EV_WCONN);
1c79356b
A
246 wakeup((caddr_t)&so->so_timeo);
247 sorwakeup(so);
248 sowwakeup(so);
39236c6e
A
249 soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNECTED |
250 SO_FILT_HINT_CONNINFO_UPDATED);
1c79356b
A
251 }
252}
253
3e170ce0
A
254boolean_t
255socanwrite(struct socket *so)
256{
0a7de745 257 return (so->so_state & SS_ISCONNECTED) ||
3e170ce0 258 !(so->so_proto->pr_flags & PR_CONNREQUIRED) ||
0a7de745 259 (so->so_flags1 & SOF1_PRECONNECT_DATA);
3e170ce0
A
260}
261
1c79356b 262void
2d21ac55 263soisdisconnecting(struct socket *so)
9bccf70c 264{
1c79356b 265 so->so_state &= ~SS_ISCONNECTING;
0a7de745 266 so->so_state |= (SS_ISDISCONNECTING | SS_CANTRCVMORE | SS_CANTSENDMORE);
316670eb 267 soevent(so, SO_FILT_HINT_LOCKED);
91447636 268 sflt_notify(so, sock_evt_disconnecting, NULL);
1c79356b
A
269 wakeup((caddr_t)&so->so_timeo);
270 sowwakeup(so);
271 sorwakeup(so);
272}
273
274void
2d21ac55 275soisdisconnected(struct socket *so)
9bccf70c 276{
0a7de745
A
277 so->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
278 so->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
39236c6e
A
279 soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_DISCONNECTED |
280 SO_FILT_HINT_CONNINFO_UPDATED);
91447636 281 sflt_notify(so, sock_evt_disconnected, NULL);
1c79356b
A
282 wakeup((caddr_t)&so->so_timeo);
283 sowwakeup(so);
284 sorwakeup(so);
fe8ab488
A
285
286#if CONTENT_FILTER
287 /* Notify content filters as soon as we cannot send/receive data */
288 cfil_sock_notify_shutdown(so, SHUT_RDWR);
289#endif /* CONTENT_FILTER */
1c79356b
A
290}
291
39236c6e
A
292/*
293 * This function will issue a wakeup like soisdisconnected but it will not
6d2010ae
A
294 * notify the socket filters. This will avoid unlocking the socket
295 * in the midst of closing it.
296 */
297void
298sodisconnectwakeup(struct socket *so)
299{
0a7de745
A
300 so->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
301 so->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
39236c6e
A
302 soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_DISCONNECTED |
303 SO_FILT_HINT_CONNINFO_UPDATED);
6d2010ae
A
304 wakeup((caddr_t)&so->so_timeo);
305 sowwakeup(so);
306 sorwakeup(so);
fe8ab488
A
307
308#if CONTENT_FILTER
309 /* Notify content filters as soon as we cannot send/receive data */
310 cfil_sock_notify_shutdown(so, SHUT_RDWR);
311#endif /* CONTENT_FILTER */
6d2010ae
A
312}
313
1c79356b
A
314/*
315 * When an attempt at a new connection is noted on a socket
316 * which accepts connections, sonewconn is called. If the
317 * connection is possible (subject to space constraints, etc.)
318 * then we allocate a new structure, propoerly linked into the
319 * data structure of the original socket, and return this.
320 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
321 */
91447636 322static struct socket *
2d21ac55 323sonewconn_internal(struct socket *head, int connstatus)
9bccf70c 324{
2d21ac55
A
325 int so_qlen, error = 0;
326 struct socket *so;
91447636
A
327 lck_mtx_t *mutex_held;
328
0a7de745 329 if (head->so_proto->pr_getlock != NULL) {
91447636 330 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
0a7de745 331 } else {
91447636 332 mutex_held = head->so_proto->pr_domain->dom_mtx;
0a7de745 333 }
5ba3f43e 334 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1c79356b 335
2d21ac55
A
336 if (!soqlencomp) {
337 /*
338 * This is the default case; so_qlen represents the
339 * sum of both incomplete and completed queues.
340 */
341 so_qlen = head->so_qlen;
342 } else {
343 /*
344 * When kern.ipc.soqlencomp is set to 1, so_qlen
345 * represents only the completed queue. Since we
346 * cannot let the incomplete queue goes unbounded
347 * (in case of SYN flood), we cap the incomplete
348 * queue length to at most somaxconn, and use that
349 * as so_qlen so that we fail immediately below.
350 */
351 so_qlen = head->so_qlen - head->so_incqlen;
0a7de745 352 if (head->so_incqlen > somaxconn) {
2d21ac55 353 so_qlen = somaxconn;
0a7de745 354 }
2d21ac55
A
355 }
356
357 if (so_qlen >=
0a7de745
A
358 (soqlimitcompat ? head->so_qlimit : (3 * head->so_qlimit / 2))) {
359 return (struct socket *)0;
360 }
39236c6e 361 so = soalloc(1, SOCK_DOM(head), head->so_type);
0a7de745
A
362 if (so == NULL) {
363 return (struct socket *)0;
364 }
9bccf70c
A
365 /* check if head was closed during the soalloc */
366 if (head->so_proto == NULL) {
2d21ac55 367 sodealloc(so);
0a7de745 368 return (struct socket *)0;
1c79356b
A
369 }
370
1c79356b 371 so->so_type = head->so_type;
0a7de745 372 so->so_options = head->so_options & ~SO_ACCEPTCONN;
1c79356b
A
373 so->so_linger = head->so_linger;
374 so->so_state = head->so_state | SS_NOFDREF;
375 so->so_proto = head->so_proto;
376 so->so_timeo = head->so_timeo;
377 so->so_pgid = head->so_pgid;
316670eb
A
378 kauth_cred_ref(head->so_cred);
379 so->so_cred = head->so_cred;
380 so->last_pid = head->last_pid;
381 so->last_upid = head->last_upid;
0a7de745 382 memcpy(so->last_uuid, head->last_uuid, sizeof(so->last_uuid));
39236c6e
A
383 if (head->so_flags & SOF_DELEGATED) {
384 so->e_pid = head->e_pid;
385 so->e_upid = head->e_upid;
0a7de745 386 memcpy(so->e_uuid, head->e_uuid, sizeof(so->e_uuid));
39236c6e 387 }
b0d623f7 388 /* inherit socket options stored in so_flags */
39236c6e
A
389 so->so_flags = head->so_flags &
390 (SOF_NOSIGPIPE | SOF_NOADDRAVAIL | SOF_REUSESHAREUID |
391 SOF_NOTIFYCONFLICT | SOF_BINDRANDOMPORT | SOF_NPX_SETOPTSHUT |
0a7de745 392 SOF_NODEFUNCT | SOF_PRIVILEGED_TRAFFIC_CLASS | SOF_NOTSENT_LOWAT |
39236c6e 393 SOF_USELRO | SOF_DELEGATED);
cb323159 394 so->so_flags1 |= SOF1_INBOUND;
91447636 395 so->so_usecount = 1;
0c530ab8
A
396 so->next_lock_lr = 0;
397 so->next_unlock_lr = 0;
1c79356b 398
0a7de745 399 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
13fec989
A
400 so->so_rcv.sb_so = so->so_snd.sb_so = so;
401 TAILQ_INIT(&so->so_evlist);
13fec989 402
2d21ac55
A
403#if CONFIG_MACF_SOCKET
404 mac_socket_label_associate_accept(head, so);
405#endif
406
d1ecb069 407 /* inherit traffic management properties of listener */
39037602 408 so->so_flags1 |=
cb323159
A
409 head->so_flags1 & (SOF1_TRAFFIC_MGT_SO_BACKGROUND | SOF1_TC_NET_SERV_TYPE |
410 SOF1_QOSMARKING_ALLOWED | SOF1_QOSMARKING_POLICY_OVERRIDE);
d1ecb069 411 so->so_background_thread = head->so_background_thread;
d41d1dae 412 so->so_traffic_class = head->so_traffic_class;
cb323159 413 so->so_netsvctype = head->so_netsvctype;
d1ecb069 414
91447636 415 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
9bccf70c 416 sodealloc(so);
0a7de745 417 return (struct socket *)0;
9bccf70c 418 }
316670eb
A
419 so->so_rcv.sb_flags |= (head->so_rcv.sb_flags & SB_USRSIZE);
420 so->so_snd.sb_flags |= (head->so_snd.sb_flags & SB_USRSIZE);
9bccf70c 421
91447636 422 /*
2d21ac55
A
423 * Must be done with head unlocked to avoid deadlock
424 * for protocol with per socket mutexes.
91447636 425 */
0a7de745 426 if (head->so_proto->pr_unlock) {
37839358 427 socket_unlock(head, 0);
0a7de745 428 }
2d21ac55
A
429 if (((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL) != 0) ||
430 error) {
1c79356b 431 sodealloc(so);
0a7de745 432 if (head->so_proto->pr_unlock) {
37839358 433 socket_lock(head, 0);
0a7de745
A
434 }
435 return (struct socket *)0;
1c79356b 436 }
6d2010ae 437 if (head->so_proto->pr_unlock) {
37839358 438 socket_lock(head, 0);
39236c6e
A
439 /*
440 * Radar 7385998 Recheck that the head is still accepting
6d2010ae
A
441 * to avoid race condition when head is getting closed.
442 */
443 if ((head->so_options & SO_ACCEPTCONN) == 0) {
444 so->so_state &= ~SS_NOFDREF;
445 soclose(so);
0a7de745 446 return (struct socket *)0;
6d2010ae
A
447 }
448 }
449
cb323159
A
450 if (so->so_proto->pr_copy_last_owner != NULL) {
451 (*so->so_proto->pr_copy_last_owner)(so, head);
452 }
39236c6e
A
453 atomic_add_32(&so->so_proto->pr_domain->dom_refs, 1);
454
6d2010ae 455 /* Insert in head appropriate lists */
813fb2f6
A
456 so_acquire_accept_list(head, NULL);
457
6d2010ae
A
458 so->so_head = head;
459
39236c6e
A
460 /*
461 * Since this socket is going to be inserted into the incomp
462 * queue, it can be picked up by another thread in
463 * tcp_dropdropablreq to get dropped before it is setup..
6d2010ae
A
464 * To prevent this race, set in-progress flag which can be
465 * cleared later
466 */
467 so->so_flags |= SOF_INCOMP_INPROGRESS;
1c79356b
A
468
469 if (connstatus) {
470 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
471 so->so_state |= SS_COMP;
472 } else {
473 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
474 so->so_state |= SS_INCOMP;
475 head->so_incqlen++;
476 }
477 head->so_qlen++;
91447636 478
813fb2f6
A
479 so_release_accept_list(head);
480
0c530ab8
A
481 /* Attach socket filters for this protocol */
482 sflt_initsock(so);
2d21ac55 483
91447636
A
484 if (connstatus) {
485 so->so_state |= connstatus;
486 sorwakeup(head);
487 wakeup((caddr_t)&head->so_timeo);
488 }
0a7de745 489 return so;
1c79356b
A
490}
491
91447636
A
492
493struct socket *
2d21ac55 494sonewconn(struct socket *head, int connstatus, const struct sockaddr *from)
91447636 495{
6d2010ae 496 int error = sflt_connectin(head, from);
91447636 497 if (error) {
0a7de745 498 return NULL;
91447636 499 }
2d21ac55 500
0a7de745 501 return sonewconn_internal(head, connstatus);
91447636
A
502}
503
1c79356b
A
504/*
505 * Socantsendmore indicates that no more data will be sent on the
506 * socket; it would normally be applied to a socket when the user
507 * informs the system that no more data is to be sent, by the protocol
508 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
509 * will be received, and will normally be applied to the socket by a
510 * protocol when it detects that the peer will send no more data.
511 * Data queued for reading in the socket may yet be read.
512 */
513
514void
2d21ac55 515socantsendmore(struct socket *so)
9bccf70c 516{
1c79356b 517 so->so_state |= SS_CANTSENDMORE;
39236c6e 518 soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CANTSENDMORE);
91447636 519 sflt_notify(so, sock_evt_cantsendmore, NULL);
1c79356b
A
520 sowwakeup(so);
521}
522
523void
2d21ac55 524socantrcvmore(struct socket *so)
9bccf70c 525{
1c79356b 526 so->so_state |= SS_CANTRCVMORE;
39236c6e 527 soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CANTRCVMORE);
91447636 528 sflt_notify(so, sock_evt_cantrecvmore, NULL);
1c79356b
A
529 sorwakeup(so);
530}
531
532/*
533 * Wait for data to arrive at/drain from a socket buffer.
534 */
535int
2d21ac55 536sbwait(struct sockbuf *sb)
1c79356b 537{
39236c6e
A
538 boolean_t nointr = (sb->sb_flags & SB_NOINTR);
539 void *lr_saved = __builtin_return_address(0);
91447636
A
540 struct socket *so = sb->sb_so;
541 lck_mtx_t *mutex_held;
542 struct timespec ts;
39236c6e 543 int error = 0;
91447636 544
39236c6e
A
545 if (so == NULL) {
546 panic("%s: null so, sb=%p sb_flags=0x%x lr=%p\n",
547 __func__, sb, sb->sb_flags, lr_saved);
548 /* NOTREACHED */
549 } else if (so->so_usecount < 1) {
550 panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
551 "lrh= %s\n", __func__, sb, sb->sb_flags, so,
552 so->so_usecount, lr_saved, solockhistory_nr(so));
553 /* NOTREACHED */
554 }
2d21ac55 555
39037602
A
556 if ((so->so_state & SS_DRAINING) || (so->so_flags & SOF_DEFUNCT)) {
557 error = EBADF;
558 if (so->so_flags & SOF_DEFUNCT) {
559 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] "
560 "(%d)\n", __func__, proc_selfpid(),
561 proc_best_name(current_proc()),
562 (uint64_t)VM_KERNEL_ADDRPERM(so),
563 SOCK_DOM(so), SOCK_TYPE(so), error);
564 }
0a7de745 565 return error;
39037602
A
566 }
567
0a7de745 568 if (so->so_proto->pr_getlock != NULL) {
5ba3f43e 569 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
0a7de745 570 } else {
91447636 571 mutex_held = so->so_proto->pr_domain->dom_mtx;
0a7de745 572 }
1c79356b 573
5ba3f43e 574 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
91447636 575
91447636
A
576 ts.tv_sec = sb->sb_timeo.tv_sec;
577 ts.tv_nsec = sb->sb_timeo.tv_usec * 1000;
39236c6e
A
578
579 sb->sb_waiters++;
580 VERIFY(sb->sb_waiters != 0);
581
91447636 582 error = msleep((caddr_t)&sb->sb_cc, mutex_held,
39236c6e
A
583 nointr ? PSOCK : PSOCK | PCATCH,
584 nointr ? "sbwait_nointr" : "sbwait", &ts);
91447636 585
39236c6e
A
586 VERIFY(sb->sb_waiters != 0);
587 sb->sb_waiters--;
91447636 588
39236c6e
A
589 if (so->so_usecount < 1) {
590 panic("%s: 2 sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
591 "lrh= %s\n", __func__, sb, sb->sb_flags, so,
592 so->so_usecount, lr_saved, solockhistory_nr(so));
593 /* NOTREACHED */
594 }
91447636 595
6d2010ae 596 if ((so->so_state & SS_DRAINING) || (so->so_flags & SOF_DEFUNCT)) {
91447636 597 error = EBADF;
6d2010ae 598 if (so->so_flags & SOF_DEFUNCT) {
39037602 599 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] "
39236c6e 600 "(%d)\n", __func__, proc_selfpid(),
39037602 601 proc_best_name(current_proc()),
39236c6e 602 (uint64_t)VM_KERNEL_ADDRPERM(so),
39037602 603 SOCK_DOM(so), SOCK_TYPE(so), error);
6d2010ae 604 }
91447636
A
605 }
606
0a7de745 607 return error;
1c79356b
A
608}
609
6d2010ae
A
610void
611sbwakeup(struct sockbuf *sb)
612{
0a7de745 613 if (sb->sb_waiters > 0) {
6d2010ae 614 wakeup((caddr_t)&sb->sb_cc);
0a7de745 615 }
6d2010ae
A
616}
617
1c79356b
A
618/*
619 * Wakeup processes waiting on a socket buffer.
620 * Do asynchronous notification via SIGIO
621 * if the socket has the SS_ASYNC flag set.
622 */
623void
cb323159 624sowakeup(struct socket *so, struct sockbuf *sb, struct socket *so2)
1c79356b 625{
6d2010ae 626 if (so->so_flags & SOF_DEFUNCT) {
39037602 627 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] si 0x%x, "
39236c6e 628 "fl 0x%x [%s]\n", __func__, proc_selfpid(),
39037602 629 proc_best_name(current_proc()),
39236c6e
A
630 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
631 SOCK_TYPE(so), (uint32_t)sb->sb_sel.si_flags, sb->sb_flags,
39037602 632 (sb->sb_flags & SB_RECV) ? "rcv" : "snd");
6d2010ae
A
633 }
634
0b4e3aa0 635 sb->sb_flags &= ~SB_SEL;
1c79356b 636 selwakeup(&sb->sb_sel);
6d2010ae 637 sbwakeup(sb);
1c79356b 638 if (so->so_state & SS_ASYNC) {
0a7de745 639 if (so->so_pgid < 0) {
1c79356b 640 gsignal(-so->so_pgid, SIGIO);
0a7de745 641 } else if (so->so_pgid > 0) {
2d21ac55 642 proc_signal(so->so_pgid, SIGIO);
0a7de745 643 }
1c79356b 644 }
91447636
A
645 if (sb->sb_flags & SB_KNOTE) {
646 KNOTE(&sb->sb_sel.si_note, SO_FILT_HINT_LOCKED);
647 }
648 if (sb->sb_flags & SB_UPCALL) {
39236c6e
A
649 void (*sb_upcall)(struct socket *, void *, int);
650 caddr_t sb_upcallarg;
5ba3f43e 651 int lock = !(sb->sb_flags & SB_UPCALL_LOCK);
2d21ac55 652
39236c6e
A
653 sb_upcall = sb->sb_upcall;
654 sb_upcallarg = sb->sb_upcallarg;
2d21ac55 655 /* Let close know that we're about to do an upcall */
316670eb 656 so->so_upcallusecount++;
2d21ac55 657
0a7de745 658 if (lock) {
cb323159
A
659 if (so2) {
660 struct unpcb *unp = sotounpcb(so2);
661 unp->unp_flags |= UNP_DONTDISCONNECT;
662 unp->rw_thrcount++;
663
664 socket_unlock(so2, 0);
665 }
5ba3f43e 666 socket_unlock(so, 0);
0a7de745 667 }
39236c6e 668 (*sb_upcall)(so, sb_upcallarg, M_DONTWAIT);
0a7de745 669 if (lock) {
cb323159
A
670 if (so2 && so > so2) {
671 struct unpcb *unp;
672 socket_lock(so2, 0);
673
674 unp = sotounpcb(so2);
675 unp->rw_thrcount--;
676 if (unp->rw_thrcount == 0) {
677 unp->unp_flags &= ~UNP_DONTDISCONNECT;
678 wakeup(unp);
679 }
680 }
681
5ba3f43e 682 socket_lock(so, 0);
cb323159
A
683
684 if (so2 && so < so2) {
685 struct unpcb *unp;
686 socket_lock(so2, 0);
687
688 unp = sotounpcb(so2);
689 unp->rw_thrcount--;
690 if (unp->rw_thrcount == 0) {
691 unp->unp_flags &= ~UNP_DONTDISCONNECT;
692 wakeup(unp);
693 }
694 }
0a7de745 695 }
2d21ac55 696
316670eb 697 so->so_upcallusecount--;
2d21ac55 698 /* Tell close that it's safe to proceed */
39236c6e 699 if ((so->so_flags & SOF_CLOSEWAIT) &&
0a7de745 700 so->so_upcallusecount == 0) {
39236c6e 701 wakeup((caddr_t)&so->so_upcallusecount);
0a7de745 702 }
91447636 703 }
fe8ab488
A
704#if CONTENT_FILTER
705 /*
706 * Trap disconnection events for content filters
707 */
708 if ((so->so_flags & SOF_CONTENT_FILTER) != 0) {
709 if ((sb->sb_flags & SB_RECV)) {
0a7de745 710 if (so->so_state & (SS_CANTRCVMORE)) {
fe8ab488 711 cfil_sock_notify_shutdown(so, SHUT_RD);
0a7de745 712 }
fe8ab488 713 } else {
0a7de745 714 if (so->so_state & (SS_CANTSENDMORE)) {
fe8ab488 715 cfil_sock_notify_shutdown(so, SHUT_WR);
0a7de745 716 }
fe8ab488
A
717 }
718 }
719#endif /* CONTENT_FILTER */
1c79356b
A
720}
721
722/*
723 * Socket buffer (struct sockbuf) utility routines.
724 *
725 * Each socket contains two socket buffers: one for sending data and
726 * one for receiving data. Each buffer contains a queue of mbufs,
727 * information about the number of mbufs and amount of data in the
728 * queue, and other fields allowing select() statements and notification
729 * on data availability to be implemented.
730 *
731 * Data stored in a socket buffer is maintained as a list of records.
732 * Each record is a list of mbufs chained together with the m_next
733 * field. Records are chained together with the m_nextpkt field. The upper
734 * level routine soreceive() expects the following conventions to be
735 * observed when placing information in the receive buffer:
736 *
737 * 1. If the protocol requires each message be preceded by the sender's
738 * name, then a record containing that name must be present before
739 * any associated data (mbuf's must be of type MT_SONAME).
740 * 2. If the protocol supports the exchange of ``access rights'' (really
741 * just additional data associated with the message), and there are
742 * ``rights'' to be received, then a record containing this data
743 * should be present (mbuf's must be of type MT_RIGHTS).
744 * 3. If a name or rights record exists, then it must be followed by
745 * a data record, perhaps of zero length.
746 *
747 * Before using a new socket structure it is first necessary to reserve
748 * buffer space to the socket, by calling sbreserve(). This should commit
749 * some of the available buffer space in the system buffer pool for the
750 * socket (currently, it does nothing but enforce limits). The space
751 * should be released by calling sbrelease() when the socket is destroyed.
752 */
753
2d21ac55
A
754/*
755 * Returns: 0 Success
756 * ENOBUFS
757 */
1c79356b 758int
b0d623f7 759soreserve(struct socket *so, u_int32_t sndcc, u_int32_t rcvcc)
1c79356b 760{
0a7de745
A
761 /*
762 * We do not want to fail the creation of a socket
763 * when kern.ipc.maxsockbuf is less than the
764 * default socket buffer socket size of the protocol
765 * so force the buffer sizes to be at most the
766 * limit enforced by sbreserve()
767 */
768 uint64_t maxcc = (uint64_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES);
769 if (sndcc > maxcc) {
770 sndcc = maxcc;
771 }
772 if (rcvcc > maxcc) {
773 rcvcc = maxcc;
774 }
775 if (sbreserve(&so->so_snd, sndcc) == 0) {
1c79356b 776 goto bad;
0a7de745 777 } else {
316670eb 778 so->so_snd.sb_idealsize = sndcc;
0a7de745 779 }
316670eb 780
0a7de745 781 if (sbreserve(&so->so_rcv, rcvcc) == 0) {
1c79356b 782 goto bad2;
0a7de745 783 } else {
316670eb 784 so->so_rcv.sb_idealsize = rcvcc;
0a7de745 785 }
316670eb 786
0a7de745 787 if (so->so_rcv.sb_lowat == 0) {
1c79356b 788 so->so_rcv.sb_lowat = 1;
0a7de745
A
789 }
790 if (so->so_snd.sb_lowat == 0) {
1c79356b 791 so->so_snd.sb_lowat = MCLBYTES;
0a7de745
A
792 }
793 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) {
1c79356b 794 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
0a7de745
A
795 }
796 return 0;
1c79356b 797bad2:
39236c6e 798 so->so_snd.sb_flags &= ~SB_SEL;
0b4e3aa0 799 selthreadclear(&so->so_snd.sb_sel);
1c79356b
A
800 sbrelease(&so->so_snd);
801bad:
0a7de745 802 return ENOBUFS;
1c79356b
A
803}
804
3e170ce0
A
805void
806soreserve_preconnect(struct socket *so, unsigned int pre_cc)
807{
808 /* As of now, same bytes for both preconnect read and write */
809 so->so_snd.sb_preconn_hiwat = pre_cc;
810 so->so_rcv.sb_preconn_hiwat = pre_cc;
811}
812
1c79356b
A
813/*
814 * Allot mbufs to a sockbuf.
815 * Attempt to scale mbmax so that mbcnt doesn't become limiting
816 * if buffering efficiency is near the normal case.
817 */
818int
b0d623f7 819sbreserve(struct sockbuf *sb, u_int32_t cc)
1c79356b 820{
0a7de745
A
821 if ((u_quad_t)cc > (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES)) {
822 return 0;
823 }
1c79356b
A
824 sb->sb_hiwat = cc;
825 sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
0a7de745 826 if (sb->sb_lowat > sb->sb_hiwat) {
1c79356b 827 sb->sb_lowat = sb->sb_hiwat;
0a7de745
A
828 }
829 return 1;
1c79356b
A
830}
831
832/*
833 * Free mbufs held by a socket, and reserved mbuf space.
834 */
2d21ac55 835/* WARNING needs to do selthreadclear() before calling this */
1c79356b 836void
2d21ac55 837sbrelease(struct sockbuf *sb)
1c79356b 838{
1c79356b 839 sbflush(sb);
9bccf70c
A
840 sb->sb_hiwat = 0;
841 sb->sb_mbmax = 0;
1c79356b
A
842}
843
844/*
845 * Routines to add and remove
846 * data from an mbuf queue.
847 *
848 * The routines sbappend() or sbappendrecord() are normally called to
849 * append new mbufs to a socket buffer, after checking that adequate
850 * space is available, comparing the function sbspace() with the amount
851 * of data to be added. sbappendrecord() differs from sbappend() in
852 * that data supplied is treated as the beginning of a new record.
853 * To place a sender's address, optional access rights, and data in a
854 * socket receive buffer, sbappendaddr() should be used. To place
855 * access rights and data in a socket receive buffer, sbappendrights()
856 * should be used. In either case, the new data begins a new record.
857 * Note that unlike sbappend() and sbappendrecord(), these routines check
858 * for the caller that there will be enough space to store the data.
859 * Each fails if there is not enough space, or if it cannot find mbufs
860 * to store additional information in.
861 *
862 * Reliable protocols may use the socket send buffer to hold data
863 * awaiting acknowledgement. Data is normally copied from a socket
864 * send buffer in a protocol with m_copy for output to a peer,
865 * and then removing the data from the socket buffer with sbdrop()
866 * or sbdroprecord() when the data is acknowledged by the peer.
867 */
868
869/*
870 * Append mbuf chain m to the last record in the
871 * socket buffer sb. The additional space associated
872 * the mbuf chain is recorded in sb. Empty mbufs are
873 * discarded and mbufs are compacted where possible.
874 */
91447636 875int
2d21ac55 876sbappend(struct sockbuf *sb, struct mbuf *m)
9bccf70c 877{
2d21ac55 878 struct socket *so = sb->sb_so;
1c79356b 879
2d21ac55 880 if (m == NULL || (sb->sb_flags & SB_DROP)) {
0a7de745 881 if (m != NULL) {
2d21ac55 882 m_freem(m);
0a7de745
A
883 }
884 return 0;
2d21ac55 885 }
fa4905b1 886
2d21ac55 887 SBLASTRECORDCHK(sb, "sbappend 1");
fa4905b1 888
0a7de745
A
889 if (sb->sb_lastrecord != NULL && (sb->sb_mbtail->m_flags & M_EOR)) {
890 return sbappendrecord(sb, m);
891 }
2d21ac55 892
fe8ab488 893 if (sb->sb_flags & SB_RECV && !(m && m->m_flags & M_SKIPCFIL)) {
6d2010ae 894 int error = sflt_data_in(so, NULL, &m, NULL, 0);
2d21ac55 895 SBLASTRECORDCHK(sb, "sbappend 2");
fe8ab488
A
896
897#if CONTENT_FILTER
0a7de745 898 if (error == 0) {
fe8ab488 899 error = cfil_sock_data_in(so, NULL, m, NULL, 0);
0a7de745 900 }
fe8ab488
A
901#endif /* CONTENT_FILTER */
902
2d21ac55 903 if (error != 0) {
0a7de745 904 if (error != EJUSTRETURN) {
2d21ac55 905 m_freem(m);
0a7de745
A
906 }
907 return 0;
91447636 908 }
fe8ab488
A
909 } else if (m) {
910 m->m_flags &= ~M_SKIPCFIL;
91447636
A
911 }
912
2d21ac55 913 /* If this is the first record, it's also the last record */
0a7de745 914 if (sb->sb_lastrecord == NULL) {
2d21ac55 915 sb->sb_lastrecord = m;
0a7de745 916 }
fa4905b1 917
2d21ac55
A
918 sbcompress(sb, m, sb->sb_mbtail);
919 SBLASTRECORDCHK(sb, "sbappend 3");
0a7de745 920 return 1;
2d21ac55
A
921}
922
923/*
924 * Similar to sbappend, except that this is optimized for stream sockets.
925 */
926int
927sbappendstream(struct sockbuf *sb, struct mbuf *m)
928{
929 struct socket *so = sb->sb_so;
930
2d21ac55 931 if (m == NULL || (sb->sb_flags & SB_DROP)) {
0a7de745 932 if (m != NULL) {
2d21ac55 933 m_freem(m);
0a7de745
A
934 }
935 return 0;
2d21ac55
A
936 }
937
39236c6e
A
938 if (m->m_nextpkt != NULL || (sb->sb_mb != sb->sb_lastrecord)) {
939 panic("sbappendstream: nexpkt %p || mb %p != lastrecord %p\n",
940 m->m_nextpkt, sb->sb_mb, sb->sb_lastrecord);
941 /* NOTREACHED */
942 }
943
944 SBLASTMBUFCHK(sb, __func__);
945
fe8ab488 946 if (sb->sb_flags & SB_RECV && !(m && m->m_flags & M_SKIPCFIL)) {
6d2010ae 947 int error = sflt_data_in(so, NULL, &m, NULL, 0);
2d21ac55 948 SBLASTRECORDCHK(sb, "sbappendstream 1");
fe8ab488
A
949
950#if CONTENT_FILTER
0a7de745 951 if (error == 0) {
fe8ab488 952 error = cfil_sock_data_in(so, NULL, m, NULL, 0);
0a7de745 953 }
fe8ab488
A
954#endif /* CONTENT_FILTER */
955
2d21ac55 956 if (error != 0) {
0a7de745 957 if (error != EJUSTRETURN) {
2d21ac55 958 m_freem(m);
0a7de745
A
959 }
960 return 0;
2d21ac55 961 }
fe8ab488
A
962 } else if (m) {
963 m->m_flags &= ~M_SKIPCFIL;
2d21ac55
A
964 }
965
966 sbcompress(sb, m, sb->sb_mbtail);
967 sb->sb_lastrecord = sb->sb_mb;
968 SBLASTRECORDCHK(sb, "sbappendstream 2");
0a7de745 969 return 1;
1c79356b
A
970}
971
972#ifdef SOCKBUF_DEBUG
973void
2d21ac55 974sbcheck(struct sockbuf *sb)
1c79356b 975{
2d21ac55
A
976 struct mbuf *m;
977 struct mbuf *n = 0;
b0d623f7 978 u_int32_t len = 0, mbcnt = 0;
91447636
A
979 lck_mtx_t *mutex_held;
980
0a7de745 981 if (sb->sb_so->so_proto->pr_getlock != NULL) {
91447636 982 mutex_held = (*sb->sb_so->so_proto->pr_getlock)(sb->sb_so, 0);
0a7de745 983 } else {
91447636 984 mutex_held = sb->sb_so->so_proto->pr_domain->dom_mtx;
0a7de745 985 }
91447636 986
5ba3f43e 987 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
91447636 988
0a7de745 989 if (sbchecking == 0) {
91447636 990 return;
0a7de745 991 }
1c79356b
A
992
993 for (m = sb->sb_mb; m; m = n) {
2d21ac55
A
994 n = m->m_nextpkt;
995 for (; m; m = m->m_next) {
996 len += m->m_len;
997 mbcnt += MSIZE;
998 /* XXX pretty sure this is bogus */
0a7de745 999 if (m->m_flags & M_EXT) {
2d21ac55 1000 mbcnt += m->m_ext.ext_size;
0a7de745 1001 }
2d21ac55
A
1002 }
1003 }
1004 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
1005 panic("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
1006 mbcnt, sb->sb_mbcnt);
1c79356b
A
1007 }
1008}
1009#endif
1010
2d21ac55
A
1011void
1012sblastrecordchk(struct sockbuf *sb, const char *where)
1013{
1014 struct mbuf *m = sb->sb_mb;
1015
0a7de745 1016 while (m && m->m_nextpkt) {
2d21ac55 1017 m = m->m_nextpkt;
0a7de745 1018 }
2d21ac55
A
1019
1020 if (m != sb->sb_lastrecord) {
fe8ab488
A
1021 printf("sblastrecordchk: mb 0x%llx lastrecord 0x%llx "
1022 "last 0x%llx\n",
1023 (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_mb),
1024 (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_lastrecord),
1025 (uint64_t)VM_KERNEL_ADDRPERM(m));
2d21ac55 1026 printf("packet chain:\n");
0a7de745 1027 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
fe8ab488 1028 printf("\t0x%llx\n", (uint64_t)VM_KERNEL_ADDRPERM(m));
0a7de745 1029 }
2d21ac55
A
1030 panic("sblastrecordchk from %s", where);
1031 }
1032}
1033
1034void
1035sblastmbufchk(struct sockbuf *sb, const char *where)
1036{
1037 struct mbuf *m = sb->sb_mb;
1038 struct mbuf *n;
1039
0a7de745 1040 while (m && m->m_nextpkt) {
2d21ac55 1041 m = m->m_nextpkt;
0a7de745 1042 }
2d21ac55 1043
0a7de745 1044 while (m && m->m_next) {
2d21ac55 1045 m = m->m_next;
0a7de745 1046 }
2d21ac55
A
1047
1048 if (m != sb->sb_mbtail) {
fe8ab488
A
1049 printf("sblastmbufchk: mb 0x%llx mbtail 0x%llx last 0x%llx\n",
1050 (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_mb),
1051 (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_mbtail),
1052 (uint64_t)VM_KERNEL_ADDRPERM(m));
2d21ac55
A
1053 printf("packet tree:\n");
1054 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
1055 printf("\t");
0a7de745 1056 for (n = m; n != NULL; n = n->m_next) {
3e170ce0
A
1057 printf("0x%llx ",
1058 (uint64_t)VM_KERNEL_ADDRPERM(n));
0a7de745 1059 }
2d21ac55
A
1060 printf("\n");
1061 }
1062 panic("sblastmbufchk from %s", where);
1063 }
1064}
1065
1c79356b 1066/*
2d21ac55 1067 * Similar to sbappend, except the mbuf chain begins a new record.
1c79356b 1068 */
91447636 1069int
2d21ac55 1070sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
1c79356b 1071{
2d21ac55
A
1072 struct mbuf *m;
1073 int space = 0;
9bccf70c 1074
2d21ac55 1075 if (m0 == NULL || (sb->sb_flags & SB_DROP)) {
0a7de745 1076 if (m0 != NULL) {
2d21ac55 1077 m_freem(m0);
0a7de745
A
1078 }
1079 return 0;
2d21ac55
A
1080 }
1081
0a7de745 1082 for (m = m0; m != NULL; m = m->m_next) {
2d21ac55 1083 space += m->m_len;
0a7de745 1084 }
2d21ac55
A
1085
1086 if (space > sbspace(sb) && !(sb->sb_flags & SB_UNIX)) {
1087 m_freem(m0);
0a7de745 1088 return 0;
2d21ac55
A
1089 }
1090
fe8ab488 1091 if (sb->sb_flags & SB_RECV && !(m0 && m0->m_flags & M_SKIPCFIL)) {
2d21ac55 1092 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL,
6d2010ae 1093 sock_data_filt_flag_record);
fe8ab488
A
1094
1095#if CONTENT_FILTER
0a7de745 1096 if (error == 0) {
fe8ab488 1097 error = cfil_sock_data_in(sb->sb_so, NULL, m0, NULL, 0);
0a7de745 1098 }
fe8ab488
A
1099#endif /* CONTENT_FILTER */
1100
91447636 1101 if (error != 0) {
2d21ac55 1102 SBLASTRECORDCHK(sb, "sbappendrecord 1");
0a7de745 1103 if (error != EJUSTRETURN) {
91447636 1104 m_freem(m0);
0a7de745
A
1105 }
1106 return 0;
1c79356b 1107 }
fe8ab488
A
1108 } else if (m0) {
1109 m0->m_flags &= ~M_SKIPCFIL;
1c79356b 1110 }
2d21ac55 1111
1c79356b 1112 /*
1c79356b
A
1113 * Note this permits zero length records.
1114 */
1115 sballoc(sb, m0);
2d21ac55
A
1116 SBLASTRECORDCHK(sb, "sbappendrecord 2");
1117 if (sb->sb_lastrecord != NULL) {
1118 sb->sb_lastrecord->m_nextpkt = m0;
39236c6e 1119 } else {
1c79356b 1120 sb->sb_mb = m0;
2d21ac55
A
1121 }
1122 sb->sb_lastrecord = m0;
4a3eedf9 1123 sb->sb_mbtail = m0;
2d21ac55 1124
1c79356b
A
1125 m = m0->m_next;
1126 m0->m_next = 0;
1127 if (m && (m0->m_flags & M_EOR)) {
1128 m0->m_flags &= ~M_EOR;
1129 m->m_flags |= M_EOR;
1130 }
2d21ac55
A
1131 sbcompress(sb, m, m0);
1132 SBLASTRECORDCHK(sb, "sbappendrecord 3");
0a7de745 1133 return 1;
1c79356b
A
1134}
1135
1c79356b 1136/*
d9a64523
A
1137 * Concatenate address (optional), control (optional) and data into one
1138 * single mbuf chain. If sockbuf *sb is passed in, space check will be
1139 * performed.
2d21ac55 1140 *
d9a64523 1141 * Returns: mbuf chain pointer if succeeded, NULL if failed
1c79356b 1142 */
d9a64523
A
1143struct mbuf *
1144sbconcat_mbufs(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0, struct mbuf *control)
1c79356b 1145{
d9a64523
A
1146 struct mbuf *m = NULL, *n = NULL;
1147 int space = 0;
1c79356b 1148
0a7de745 1149 if (m0 && (m0->m_flags & M_PKTHDR) == 0) {
d9a64523 1150 panic("sbconcat_mbufs");
0a7de745 1151 }
1c79356b 1152
0a7de745 1153 if (m0) {
1c79356b 1154 space += m0->m_pkthdr.len;
0a7de745 1155 }
1c79356b
A
1156 for (n = control; n; n = n->m_next) {
1157 space += n->m_len;
0a7de745 1158 if (n->m_next == 0) { /* keep pointer to last control buf */
1c79356b 1159 break;
0a7de745 1160 }
1c79356b 1161 }
d9a64523
A
1162
1163 if (asa != NULL) {
0a7de745
A
1164 if (asa->sa_len > MLEN) {
1165 return NULL;
1166 }
d9a64523
A
1167 space += asa->sa_len;
1168 }
1169
0a7de745
A
1170 if (sb != NULL && space > sbspace(sb)) {
1171 return NULL;
1172 }
d9a64523 1173
0a7de745
A
1174 if (n) {
1175 n->m_next = m0; /* concatenate data to control */
1176 } else {
1c79356b 1177 control = m0;
0a7de745 1178 }
2d21ac55 1179
d9a64523
A
1180 if (asa != NULL) {
1181 MGET(m, M_DONTWAIT, MT_SONAME);
1182 if (m == 0) {
1183 if (n) {
1184 /* unchain control and data if necessary */
1185 n->m_next = NULL;
1186 }
0a7de745 1187 return NULL;
d9a64523
A
1188 }
1189 m->m_len = asa->sa_len;
1190 bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
1191
1192 m->m_next = control;
1193 } else {
1194 m = control;
1195 }
1196
0a7de745 1197 return m;
d9a64523
A
1198}
1199
1200/*
1201 * Queue mbuf chain to the receive queue of a socket.
1202 * Parameter space is the total len of the mbuf chain.
1203 * If passed in, sockbuf space will be checked.
1204 *
1205 * Returns: 0 Invalid mbuf chain
1206 * 1 Success
1207 */
1208int
1209sbappendchain(struct sockbuf *sb, struct mbuf *m, int space)
1210{
1211 struct mbuf *n, *nlast;
1212
0a7de745
A
1213 if (m == NULL) {
1214 return 0;
1215 }
d9a64523 1216
0a7de745
A
1217 if (space != 0 && space > sbspace(sb)) {
1218 return 0;
1219 }
2d21ac55 1220
0a7de745 1221 for (n = m; n->m_next != NULL; n = n->m_next) {
1c79356b 1222 sballoc(sb, n);
0a7de745 1223 }
2d21ac55
A
1224 sballoc(sb, n);
1225 nlast = n;
1226
1227 if (sb->sb_lastrecord != NULL) {
1228 sb->sb_lastrecord->m_nextpkt = m;
1229 } else {
1c79356b 1230 sb->sb_mb = m;
2d21ac55
A
1231 }
1232 sb->sb_lastrecord = m;
1233 sb->sb_mbtail = nlast;
1234
1235 SBLASTMBUFCHK(sb, __func__);
1236 SBLASTRECORDCHK(sb, "sbappendadddr 2");
1237
1238 postevent(0, sb, EV_RWBYTES);
0a7de745 1239 return 1;
1c79356b
A
1240}
1241
2d21ac55
A
1242/*
1243 * Returns: 0 Error: No space/out of mbufs/etc.
1244 * 1 Success
1245 *
1246 * Imputed: (*error_out) errno for error
1247 * ENOBUFS
1248 * sflt_data_in:??? [whatever a filter author chooses]
1249 */
1c79356b 1250int
2d21ac55
A
1251sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0,
1252 struct mbuf *control, int *error_out)
91447636
A
1253{
1254 int result = 0;
2d21ac55 1255 boolean_t sb_unix = (sb->sb_flags & SB_UNIX);
d9a64523 1256 struct mbuf *mbuf_chain = NULL;
2d21ac55 1257
0a7de745 1258 if (error_out) {
2d21ac55 1259 *error_out = 0;
0a7de745 1260 }
2d21ac55 1261
0a7de745 1262 if (m0 && (m0->m_flags & M_PKTHDR) == 0) {
91447636 1263 panic("sbappendaddrorfree");
0a7de745 1264 }
2d21ac55
A
1265
1266 if (sb->sb_flags & SB_DROP) {
0a7de745 1267 if (m0 != NULL) {
2d21ac55 1268 m_freem(m0);
0a7de745
A
1269 }
1270 if (control != NULL && !sb_unix) {
2d21ac55 1271 m_freem(control);
0a7de745
A
1272 }
1273 if (error_out != NULL) {
2d21ac55 1274 *error_out = EINVAL;
0a7de745
A
1275 }
1276 return 0;
2d21ac55
A
1277 }
1278
91447636 1279 /* Call socket data in filters */
fe8ab488 1280 if (sb->sb_flags & SB_RECV && !(m0 && m0->m_flags & M_SKIPCFIL)) {
91447636 1281 int error;
6d2010ae 1282 error = sflt_data_in(sb->sb_so, asa, &m0, &control, 0);
2d21ac55 1283 SBLASTRECORDCHK(sb, __func__);
fe8ab488
A
1284
1285#if CONTENT_FILTER
0a7de745 1286 if (error == 0) {
3e170ce0
A
1287 error = cfil_sock_data_in(sb->sb_so, asa, m0, control,
1288 0);
0a7de745 1289 }
fe8ab488
A
1290#endif /* CONTENT_FILTER */
1291
91447636
A
1292 if (error) {
1293 if (error != EJUSTRETURN) {
0a7de745 1294 if (m0) {
2d21ac55 1295 m_freem(m0);
0a7de745
A
1296 }
1297 if (control != NULL && !sb_unix) {
2d21ac55 1298 m_freem(control);
0a7de745
A
1299 }
1300 if (error_out) {
2d21ac55 1301 *error_out = error;
0a7de745 1302 }
91447636 1303 }
0a7de745 1304 return 0;
91447636 1305 }
fe8ab488
A
1306 } else if (m0) {
1307 m0->m_flags &= ~M_SKIPCFIL;
91447636 1308 }
2d21ac55 1309
d9a64523
A
1310 mbuf_chain = sbconcat_mbufs(sb, asa, m0, control);
1311 SBLASTRECORDCHK(sb, "sbappendadddr 1");
1312 result = sbappendchain(sb, mbuf_chain, 0);
91447636 1313 if (result == 0) {
0a7de745 1314 if (m0) {
2d21ac55 1315 m_freem(m0);
0a7de745
A
1316 }
1317 if (control != NULL && !sb_unix) {
2d21ac55 1318 m_freem(control);
0a7de745
A
1319 }
1320 if (error_out) {
2d21ac55 1321 *error_out = ENOBUFS;
0a7de745 1322 }
91447636 1323 }
2d21ac55 1324
0a7de745
A
1325 return result;
1326}
1327
1328inline boolean_t
1329is_cmsg_valid(struct mbuf *control, struct cmsghdr *cmsg)
1330{
1331 if (cmsg == NULL) {
1332 return FALSE;
1333 }
1334
1335 if (cmsg->cmsg_len < sizeof(struct cmsghdr)) {
1336 return FALSE;
1337 }
1338
1339 if ((uint8_t *)control->m_data >= (uint8_t *)cmsg + cmsg->cmsg_len) {
1340 return FALSE;
1341 }
1342
1343 if ((uint8_t *)control->m_data + control->m_len <
1344 (uint8_t *)cmsg + cmsg->cmsg_len) {
1345 return FALSE;
1346 }
1347
1348 return TRUE;
91447636
A
1349}
1350
1351static int
2d21ac55
A
1352sbappendcontrol_internal(struct sockbuf *sb, struct mbuf *m0,
1353 struct mbuf *control)
1c79356b 1354{
2d21ac55 1355 struct mbuf *m, *mlast, *n;
1c79356b 1356 int space = 0;
1c79356b 1357
0a7de745 1358 if (control == 0) {
1c79356b 1359 panic("sbappendcontrol");
0a7de745 1360 }
1c79356b 1361
0a7de745 1362 for (m = control;; m = m->m_next) {
1c79356b 1363 space += m->m_len;
0a7de745 1364 if (m->m_next == 0) {
1c79356b 1365 break;
0a7de745 1366 }
1c79356b 1367 }
0a7de745
A
1368 n = m; /* save pointer to last control buffer */
1369 for (m = m0; m; m = m->m_next) {
1c79356b 1370 space += m->m_len;
0a7de745
A
1371 }
1372 if (space > sbspace(sb) && !(sb->sb_flags & SB_UNIX)) {
1373 return 0;
1374 }
1375 n->m_next = m0; /* concatenate data to control */
2d21ac55
A
1376 SBLASTRECORDCHK(sb, "sbappendcontrol 1");
1377
0a7de745 1378 for (m = control; m->m_next != NULL; m = m->m_next) {
1c79356b 1379 sballoc(sb, m);
0a7de745 1380 }
2d21ac55
A
1381 sballoc(sb, m);
1382 mlast = m;
1383
1384 if (sb->sb_lastrecord != NULL) {
1385 sb->sb_lastrecord->m_nextpkt = control;
1386 } else {
1c79356b 1387 sb->sb_mb = control;
2d21ac55
A
1388 }
1389 sb->sb_lastrecord = control;
1390 sb->sb_mbtail = mlast;
1391
1392 SBLASTMBUFCHK(sb, __func__);
1393 SBLASTRECORDCHK(sb, "sbappendcontrol 2");
1394
1395 postevent(0, sb, EV_RWBYTES);
0a7de745 1396 return 1;
1c79356b
A
1397}
1398
91447636 1399int
0a7de745 1400sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control,
2d21ac55 1401 int *error_out)
91447636
A
1402{
1403 int result = 0;
2d21ac55
A
1404 boolean_t sb_unix = (sb->sb_flags & SB_UNIX);
1405
0a7de745 1406 if (error_out) {
2d21ac55 1407 *error_out = 0;
0a7de745 1408 }
2d21ac55
A
1409
1410 if (sb->sb_flags & SB_DROP) {
0a7de745 1411 if (m0 != NULL) {
2d21ac55 1412 m_freem(m0);
0a7de745
A
1413 }
1414 if (control != NULL && !sb_unix) {
2d21ac55 1415 m_freem(control);
0a7de745
A
1416 }
1417 if (error_out != NULL) {
2d21ac55 1418 *error_out = EINVAL;
0a7de745
A
1419 }
1420 return 0;
2d21ac55
A
1421 }
1422
fe8ab488 1423 if (sb->sb_flags & SB_RECV && !(m0 && m0->m_flags & M_SKIPCFIL)) {
91447636 1424 int error;
2d21ac55 1425
6d2010ae 1426 error = sflt_data_in(sb->sb_so, NULL, &m0, &control, 0);
2d21ac55 1427 SBLASTRECORDCHK(sb, __func__);
fe8ab488
A
1428
1429#if CONTENT_FILTER
0a7de745 1430 if (error == 0) {
3e170ce0
A
1431 error = cfil_sock_data_in(sb->sb_so, NULL, m0, control,
1432 0);
0a7de745 1433 }
fe8ab488
A
1434#endif /* CONTENT_FILTER */
1435
91447636
A
1436 if (error) {
1437 if (error != EJUSTRETURN) {
0a7de745 1438 if (m0) {
2d21ac55 1439 m_freem(m0);
0a7de745
A
1440 }
1441 if (control != NULL && !sb_unix) {
2d21ac55 1442 m_freem(control);
0a7de745
A
1443 }
1444 if (error_out) {
2d21ac55 1445 *error_out = error;
0a7de745 1446 }
91447636 1447 }
0a7de745 1448 return 0;
91447636 1449 }
fe8ab488
A
1450 } else if (m0) {
1451 m0->m_flags &= ~M_SKIPCFIL;
91447636 1452 }
2d21ac55 1453
91447636
A
1454 result = sbappendcontrol_internal(sb, m0, control);
1455 if (result == 0) {
0a7de745 1456 if (m0) {
2d21ac55 1457 m_freem(m0);
0a7de745
A
1458 }
1459 if (control != NULL && !sb_unix) {
2d21ac55 1460 m_freem(control);
0a7de745
A
1461 }
1462 if (error_out) {
2d21ac55 1463 *error_out = ENOBUFS;
0a7de745 1464 }
91447636 1465 }
2d21ac55 1466
0a7de745 1467 return result;
91447636
A
1468}
1469
39236c6e
A
1470/*
1471 * Append a contiguous TCP data blob with TCP sequence number as control data
1472 * as a new msg to the receive socket buffer.
1473 */
1474int
1475sbappendmsgstream_rcv(struct sockbuf *sb, struct mbuf *m, uint32_t seqnum,
1476 int unordered)
1477{
1478 struct mbuf *m_eor = NULL;
1479 u_int32_t data_len = 0;
1480 int ret = 0;
1481 struct socket *so = sb->sb_so;
1482
0a7de745
A
1483 if (m == NULL) {
1484 return 0;
1485 }
d9a64523 1486
39236c6e
A
1487 VERIFY((m->m_flags & M_PKTHDR) && m_pktlen(m) > 0);
1488 VERIFY(so->so_msg_state != NULL);
1489 VERIFY(sb->sb_flags & SB_RECV);
1490
1491 /* Keep the TCP sequence number in the mbuf pkthdr */
1492 m->m_pkthdr.msg_seq = seqnum;
1493
1494 /* find last mbuf and set M_EOR */
0a7de745 1495 for (m_eor = m;; m_eor = m_eor->m_next) {
39236c6e
A
1496 /*
1497 * If the msg is unordered, we need to account for
1498 * these bytes in receive socket buffer size. Otherwise,
1499 * the receive window advertised will shrink because
1500 * of the additional unordered bytes added to the
1501 * receive buffer.
1502 */
1503 if (unordered) {
1504 m_eor->m_flags |= M_UNORDERED_DATA;
1505 data_len += m_eor->m_len;
1506 so->so_msg_state->msg_uno_bytes += m_eor->m_len;
fe8ab488 1507 } else {
39236c6e
A
1508 m_eor->m_flags &= ~M_UNORDERED_DATA;
1509 }
0a7de745 1510 if (m_eor->m_next == NULL) {
39236c6e 1511 break;
0a7de745 1512 }
39236c6e
A
1513 }
1514
1515 /* set EOR flag at end of byte blob */
1516 m_eor->m_flags |= M_EOR;
1517
1518 /* expand the receive socket buffer to allow unordered data */
1519 if (unordered && !sbreserve(sb, sb->sb_hiwat + data_len)) {
1520 /*
1521 * Could not allocate memory for unordered data, it
1522 * means this packet will have to be delivered in order
1523 */
1524 printf("%s: could not reserve space for unordered data\n",
1525 __func__);
1526 }
1527
fe8ab488 1528 if (!unordered && (sb->sb_mbtail != NULL) &&
0a7de745 1529 !(sb->sb_mbtail->m_flags & M_UNORDERED_DATA)) {
fe8ab488
A
1530 sb->sb_mbtail->m_flags &= ~M_EOR;
1531 sbcompress(sb, m, sb->sb_mbtail);
1532 ret = 1;
1533 } else {
1534 ret = sbappendrecord(sb, m);
1535 }
1536 VERIFY(sb->sb_mbtail->m_flags & M_EOR);
0a7de745 1537 return ret;
39236c6e
A
1538}
1539
1540/*
1541 * TCP streams have message based out of order delivery support, or have
1542 * Multipath TCP support, or are regular TCP sockets
1543 */
1544int
1545sbappendstream_rcvdemux(struct socket *so, struct mbuf *m, uint32_t seqnum,
0a7de745 1546 int unordered)
39236c6e
A
1547{
1548 int ret = 0;
1549
5c9f4661
A
1550 if ((m != NULL) &&
1551 m_pktlen(m) <= 0 &&
1552 !((so->so_flags & SOF_MP_SUBFLOW) &&
0a7de745
A
1553 (m->m_flags & M_PKTHDR) &&
1554 (m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN))) {
39236c6e 1555 m_freem(m);
0a7de745 1556 return ret;
39236c6e
A
1557 }
1558
1559 if (so->so_flags & SOF_ENABLE_MSGS) {
1560 ret = sbappendmsgstream_rcv(&so->so_rcv, m, seqnum, unordered);
1561 }
1562#if MPTCP
a39ff7e2 1563 else if (so->so_flags & SOF_MP_SUBFLOW) {
39236c6e
A
1564 ret = sbappendmptcpstream_rcv(&so->so_rcv, m);
1565 }
1566#endif /* MPTCP */
1567 else {
1568 ret = sbappendstream(&so->so_rcv, m);
1569 }
0a7de745 1570 return ret;
39236c6e
A
1571}
1572
1573#if MPTCP
1574int
1575sbappendmptcpstream_rcv(struct sockbuf *sb, struct mbuf *m)
1576{
1577 struct socket *so = sb->sb_so;
1578
1579 VERIFY(m == NULL || (m->m_flags & M_PKTHDR));
1580 /* SB_NOCOMPRESS must be set prevent loss of M_PKTHDR data */
0a7de745
A
1581 VERIFY((sb->sb_flags & (SB_RECV | SB_NOCOMPRESS)) ==
1582 (SB_RECV | SB_NOCOMPRESS));
39236c6e
A
1583
1584 if (m == NULL || m_pktlen(m) == 0 || (sb->sb_flags & SB_DROP) ||
1585 (so->so_state & SS_CANTRCVMORE)) {
a39ff7e2
A
1586 if (m && (m->m_flags & M_PKTHDR) &&
1587 m_pktlen(m) == 0 &&
5c9f4661
A
1588 (m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN)) {
1589 mptcp_input(tptomptp(sototcpcb(so))->mpt_mpte, m);
0a7de745 1590 return 1;
5c9f4661 1591 } else if (m != NULL) {
39236c6e 1592 m_freem(m);
5c9f4661 1593 }
0a7de745 1594 return 0;
39236c6e
A
1595 }
1596 /* the socket is not closed, so SOF_MP_SUBFLOW must be set */
1597 VERIFY(so->so_flags & SOF_MP_SUBFLOW);
1598
1599 if (m->m_nextpkt != NULL || (sb->sb_mb != sb->sb_lastrecord)) {
1600 panic("%s: nexpkt %p || mb %p != lastrecord %p\n", __func__,
1601 m->m_nextpkt, sb->sb_mb, sb->sb_lastrecord);
1602 /* NOTREACHED */
1603 }
1604
1605 SBLASTMBUFCHK(sb, __func__);
1606
39236c6e
A
1607 /* No filter support (SB_RECV) on mptcp subflow sockets */
1608
1609 sbcompress(sb, m, sb->sb_mbtail);
1610 sb->sb_lastrecord = sb->sb_mb;
1611 SBLASTRECORDCHK(sb, __func__);
0a7de745 1612 return 1;
39236c6e
A
1613}
1614#endif /* MPTCP */
1615
1616/*
1617 * Append message to send socket buffer based on priority.
1618 */
1619int
1620sbappendmsg_snd(struct sockbuf *sb, struct mbuf *m)
1621{
1622 struct socket *so = sb->sb_so;
1623 struct msg_priq *priq;
1624 int set_eor = 0;
1625
1626 VERIFY(so->so_msg_state != NULL);
1627
0a7de745 1628 if (m->m_nextpkt != NULL || (sb->sb_mb != sb->sb_lastrecord)) {
39236c6e
A
1629 panic("sbappendstream: nexpkt %p || mb %p != lastrecord %p\n",
1630 m->m_nextpkt, sb->sb_mb, sb->sb_lastrecord);
0a7de745 1631 }
39236c6e
A
1632
1633 SBLASTMBUFCHK(sb, __func__);
1634
1635 if (m == NULL || (sb->sb_flags & SB_DROP) || so->so_msg_state == NULL) {
0a7de745 1636 if (m != NULL) {
39236c6e 1637 m_freem(m);
0a7de745
A
1638 }
1639 return 0;
39236c6e
A
1640 }
1641
1642 priq = &so->so_msg_state->msg_priq[m->m_pkthdr.msg_pri];
1643
1644 /* note if we need to propogate M_EOR to the last mbuf */
1645 if (m->m_flags & M_EOR) {
1646 set_eor = 1;
1647
1648 /* Reset M_EOR from the first mbuf */
1649 m->m_flags &= ~(M_EOR);
1650 }
1651
1652 if (priq->msgq_head == NULL) {
1653 VERIFY(priq->msgq_tail == NULL && priq->msgq_lastmsg == NULL);
1654 priq->msgq_head = priq->msgq_lastmsg = m;
1655 } else {
1656 VERIFY(priq->msgq_tail->m_next == NULL);
1657
1658 /* Check if the last message has M_EOR flag set */
1659 if (priq->msgq_tail->m_flags & M_EOR) {
1660 /* Insert as a new message */
1661 priq->msgq_lastmsg->m_nextpkt = m;
1662
1663 /* move the lastmsg pointer */
1664 priq->msgq_lastmsg = m;
1665 } else {
1666 /* Append to the existing message */
1667 priq->msgq_tail->m_next = m;
1668 }
1669 }
1670
1671 /* Update accounting and the queue tail pointer */
1672
1673 while (m->m_next != NULL) {
1674 sballoc(sb, m);
1675 priq->msgq_bytes += m->m_len;
1676 m = m->m_next;
1677 }
1678 sballoc(sb, m);
1679 priq->msgq_bytes += m->m_len;
1680
1681 if (set_eor) {
1682 m->m_flags |= M_EOR;
1683
1684 /*
1685 * Since the user space can not write a new msg
1686 * without completing the previous one, we can
1687 * reset this flag to start sending again.
1688 */
1689 priq->msgq_flags &= ~(MSGQ_MSG_NOTDONE);
1690 }
1691
1692 priq->msgq_tail = m;
1693
1694 SBLASTRECORDCHK(sb, "sbappendstream 2");
1695 postevent(0, sb, EV_RWBYTES);
0a7de745 1696 return 1;
39236c6e
A
1697}
1698
1699/*
1700 * Pull data from priority queues to the serial snd queue
1701 * right before sending.
1702 */
1703void
1704sbpull_unordered_data(struct socket *so, int32_t off, int32_t len)
1705{
1706 int32_t topull, i;
1707 struct msg_priq *priq = NULL;
1708
1709 VERIFY(so->so_msg_state != NULL);
1710
1711 topull = (off + len) - so->so_msg_state->msg_serial_bytes;
1712
1713 i = MSG_PRI_MAX;
1714 while (i >= MSG_PRI_MIN && topull > 0) {
1715 struct mbuf *m = NULL, *mqhead = NULL, *mend = NULL;
1716 priq = &so->so_msg_state->msg_priq[i];
1717 if ((priq->msgq_flags & MSGQ_MSG_NOTDONE) &&
1718 priq->msgq_head == NULL) {
1719 /*
1720 * We were in the middle of sending
1721 * a message and we have not seen the
1722 * end of it.
1723 */
1724 VERIFY(priq->msgq_lastmsg == NULL &&
1725 priq->msgq_tail == NULL);
1726 return;
1727 }
1728 if (priq->msgq_head != NULL) {
1729 int32_t bytes = 0, topull_tmp = topull;
1730 /*
1731 * We found a msg while scanning the priority
1732 * queue from high to low priority.
1733 */
1734 m = priq->msgq_head;
1735 mqhead = m;
1736 mend = m;
1737
1738 /*
1739 * Move bytes from the priority queue to the
1740 * serial queue. Compute the number of bytes
1741 * being added.
1742 */
1743 while (mqhead->m_next != NULL && topull_tmp > 0) {
1744 bytes += mqhead->m_len;
1745 topull_tmp -= mqhead->m_len;
1746 mend = mqhead;
1747 mqhead = mqhead->m_next;
1748 }
1749
1750 if (mqhead->m_next == NULL) {
1751 /*
1752 * If we have only one more mbuf left,
1753 * move the last mbuf of this message to
1754 * serial queue and set the head of the
1755 * queue to be the next message.
1756 */
1757 bytes += mqhead->m_len;
1758 mend = mqhead;
1759 mqhead = m->m_nextpkt;
1760 if (!(mend->m_flags & M_EOR)) {
1761 /*
1762 * We have not seen the end of
1763 * this message, so we can not
1764 * pull anymore.
1765 */
1766 priq->msgq_flags |= MSGQ_MSG_NOTDONE;
1767 } else {
1768 /* Reset M_EOR */
1769 mend->m_flags &= ~(M_EOR);
1770 }
1771 } else {
1772 /* propogate the next msg pointer */
1773 mqhead->m_nextpkt = m->m_nextpkt;
1774 }
1775 priq->msgq_head = mqhead;
1776
1777 /*
1778 * if the lastmsg pointer points to
1779 * the mbuf that is being dequeued, update
1780 * it to point to the new head.
1781 */
0a7de745 1782 if (priq->msgq_lastmsg == m) {
39236c6e 1783 priq->msgq_lastmsg = priq->msgq_head;
0a7de745 1784 }
39236c6e
A
1785
1786 m->m_nextpkt = NULL;
1787 mend->m_next = NULL;
1788
1789 if (priq->msgq_head == NULL) {
1790 /* Moved all messages, update tail */
1791 priq->msgq_tail = NULL;
1792 VERIFY(priq->msgq_lastmsg == NULL);
1793 }
1794
1795 /* Move it to serial sb_mb queue */
1796 if (so->so_snd.sb_mb == NULL) {
1797 so->so_snd.sb_mb = m;
1798 } else {
1799 so->so_snd.sb_mbtail->m_next = m;
1800 }
1801
1802 priq->msgq_bytes -= bytes;
1803 VERIFY(priq->msgq_bytes >= 0);
1804 sbwakeup(&so->so_snd);
1805
1806 so->so_msg_state->msg_serial_bytes += bytes;
1807 so->so_snd.sb_mbtail = mend;
1808 so->so_snd.sb_lastrecord = so->so_snd.sb_mb;
1809
1810 topull =
1811 (off + len) - so->so_msg_state->msg_serial_bytes;
1812
0a7de745 1813 if (priq->msgq_flags & MSGQ_MSG_NOTDONE) {
39236c6e 1814 break;
0a7de745 1815 }
39236c6e
A
1816 } else {
1817 --i;
1818 }
1819 }
1820 sblastrecordchk(&so->so_snd, "sbpull_unordered_data");
1821 sblastmbufchk(&so->so_snd, "sbpull_unordered_data");
1822}
1823
1c79356b
A
1824/*
1825 * Compress mbuf chain m into the socket
1826 * buffer sb following mbuf n. If n
1827 * is null, the buffer is presumed empty.
1828 */
2d21ac55
A
1829static inline void
1830sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
1831{
39236c6e 1832 int eor = 0, compress = (!(sb->sb_flags & SB_NOCOMPRESS));
2d21ac55
A
1833 struct mbuf *o;
1834
1835 if (m == NULL) {
1836 /* There is nothing to compress; just update the tail */
0a7de745 1837 for (; n->m_next != NULL; n = n->m_next) {
2d21ac55 1838 ;
0a7de745 1839 }
2d21ac55
A
1840 sb->sb_mbtail = n;
1841 goto done;
1842 }
1c79356b 1843
39236c6e 1844 while (m != NULL) {
1c79356b 1845 eor |= m->m_flags & M_EOR;
39236c6e 1846 if (compress && m->m_len == 0 && (eor == 0 ||
2d21ac55 1847 (((o = m->m_next) || (o = n)) && o->m_type == m->m_type))) {
0a7de745 1848 if (sb->sb_lastrecord == m) {
2d21ac55 1849 sb->sb_lastrecord = m->m_next;
0a7de745 1850 }
1c79356b
A
1851 m = m_free(m);
1852 continue;
1853 }
39236c6e 1854 if (compress && n != NULL && (n->m_flags & M_EOR) == 0 &&
9bccf70c
A
1855#ifndef __APPLE__
1856 M_WRITABLE(n) &&
1857#endif
1858 m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
1859 m->m_len <= M_TRAILINGSPACE(n) &&
1c79356b
A
1860 n->m_type == m->m_type) {
1861 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
1862 (unsigned)m->m_len);
1863 n->m_len += m->m_len;
1864 sb->sb_cc += m->m_len;
2d21ac55 1865 if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
39236c6e
A
1866 m->m_type != MT_OOBDATA) {
1867 /* XXX: Probably don't need */
2d21ac55 1868 sb->sb_ctl += m->m_len;
39236c6e 1869 }
39037602
A
1870
1871 /* update send byte count */
1872 if (sb->sb_flags & SB_SNDBYTE_CNT) {
1873 inp_incr_sndbytes_total(sb->sb_so,
1874 m->m_len);
1875 inp_incr_sndbytes_unsent(sb->sb_so,
1876 m->m_len);
1877 }
1c79356b
A
1878 m = m_free(m);
1879 continue;
1880 }
0a7de745 1881 if (n != NULL) {
1c79356b 1882 n->m_next = m;
0a7de745 1883 } else {
1c79356b 1884 sb->sb_mb = m;
0a7de745 1885 }
2d21ac55 1886 sb->sb_mbtail = m;
1c79356b
A
1887 sballoc(sb, m);
1888 n = m;
1889 m->m_flags &= ~M_EOR;
1890 m = m->m_next;
39236c6e 1891 n->m_next = NULL;
1c79356b 1892 }
39236c6e 1893 if (eor != 0) {
0a7de745 1894 if (n != NULL) {
1c79356b 1895 n->m_flags |= eor;
0a7de745 1896 } else {
1c79356b 1897 printf("semi-panic: sbcompress\n");
0a7de745 1898 }
1c79356b 1899 }
2d21ac55
A
1900done:
1901 SBLASTMBUFCHK(sb, __func__);
1902 postevent(0, sb, EV_RWBYTES);
1903}
1904
1905void
1906sb_empty_assert(struct sockbuf *sb, const char *where)
1907{
1908 if (!(sb->sb_cc == 0 && sb->sb_mb == NULL && sb->sb_mbcnt == 0 &&
1909 sb->sb_mbtail == NULL && sb->sb_lastrecord == NULL)) {
b0d623f7 1910 panic("%s: sb %p so %p cc %d mbcnt %d mb %p mbtail %p "
2d21ac55 1911 "lastrecord %p\n", where, sb, sb->sb_so, sb->sb_cc,
39236c6e
A
1912 sb->sb_mbcnt, sb->sb_mb, sb->sb_mbtail,
1913 sb->sb_lastrecord);
2d21ac55
A
1914 /* NOTREACHED */
1915 }
1c79356b
A
1916}
1917
39236c6e
A
1918static void
1919sbflush_priq(struct msg_priq *priq)
1920{
1921 struct mbuf *m;
1922 m = priq->msgq_head;
0a7de745 1923 if (m != NULL) {
39236c6e 1924 m_freem_list(m);
0a7de745 1925 }
39236c6e
A
1926 priq->msgq_head = priq->msgq_tail = priq->msgq_lastmsg = NULL;
1927 priq->msgq_bytes = priq->msgq_flags = 0;
1928}
1929
1c79356b
A
1930/*
1931 * Free all mbufs in a sockbuf.
1932 * Check that all resources are reclaimed.
1933 */
1934void
2d21ac55 1935sbflush(struct sockbuf *sb)
1c79356b 1936{
39236c6e
A
1937 void *lr_saved = __builtin_return_address(0);
1938 struct socket *so = sb->sb_so;
39236c6e
A
1939 u_int32_t i;
1940
1941 /* so_usecount may be 0 if we get here from sofreelastref() */
1942 if (so == NULL) {
1943 panic("%s: null so, sb=%p sb_flags=0x%x lr=%p\n",
1944 __func__, sb, sb->sb_flags, lr_saved);
1945 /* NOTREACHED */
1946 } else if (so->so_usecount < 0) {
1947 panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
1948 "lrh= %s\n", __func__, sb, sb->sb_flags, so,
1949 so->so_usecount, lr_saved, solockhistory_nr(so));
1950 /* NOTREACHED */
1951 }
39236c6e
A
1952
1953 /*
1954 * Obtain lock on the socket buffer (SB_LOCK). This is required
1955 * to prevent the socket buffer from being unexpectedly altered
1956 * while it is used by another thread in socket send/receive.
1957 *
1958 * sblock() must not fail here, hence the assertion.
1959 */
1960 (void) sblock(sb, SBL_WAIT | SBL_NOINTR | SBL_IGNDEFUNCT);
1961 VERIFY(sb->sb_flags & SB_LOCK);
1962
1963 while (sb->sb_mbcnt > 0) {
9bccf70c
A
1964 /*
1965 * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
1966 * we would loop forever. Panic instead.
1967 */
0a7de745 1968 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len)) {
9bccf70c 1969 break;
0a7de745 1970 }
1c79356b 1971 sbdrop(sb, (int)sb->sb_cc);
9bccf70c 1972 }
39236c6e
A
1973
1974 if (!(sb->sb_flags & SB_RECV) && (so->so_flags & SOF_ENABLE_MSGS)) {
1975 VERIFY(so->so_msg_state != NULL);
1976 for (i = MSG_PRI_MIN; i <= MSG_PRI_MAX; ++i) {
1977 sbflush_priq(&so->so_msg_state->msg_priq[i]);
1978 }
1979 so->so_msg_state->msg_serial_bytes = 0;
1980 so->so_msg_state->msg_uno_bytes = 0;
1981 }
1982
2d21ac55 1983 sb_empty_assert(sb, __func__);
1c79356b 1984 postevent(0, sb, EV_RWBYTES);
91447636 1985
0a7de745 1986 sbunlock(sb, TRUE); /* keep socket locked */
1c79356b
A
1987}
1988
1989/*
1990 * Drop data from (the front of) a sockbuf.
9bccf70c
A
1991 * use m_freem_list to free the mbuf structures
1992 * under a single lock... this is done by pruning
1993 * the top of the tree from the body by keeping track
1994 * of where we get to in the tree and then zeroing the
1995 * two pertinent pointers m_nextpkt and m_next
1996 * the socket buffer is then updated to point at the new
1997 * top of the tree and the pruned area is released via
1998 * m_freem_list.
1c79356b
A
1999 */
2000void
2d21ac55 2001sbdrop(struct sockbuf *sb, int len)
1c79356b 2002{
2d21ac55 2003 struct mbuf *m, *free_list, *ml;
fa4905b1 2004 struct mbuf *next, *last;
1c79356b 2005
39236c6e
A
2006 next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
2007#if MPTCP
5ba3f43e 2008 if (m != NULL && len > 0 && !(sb->sb_flags & SB_RECV) &&
39236c6e 2009 ((sb->sb_so->so_flags & SOF_MP_SUBFLOW) ||
0a7de745
A
2010 (SOCK_CHECK_DOM(sb->sb_so, PF_MULTIPATH) &&
2011 SOCK_CHECK_PROTO(sb->sb_so, IPPROTO_TCP))) &&
5ba3f43e 2012 !(sb->sb_so->so_flags1 & SOF1_POST_FALLBACK_SYNC)) {
490019cf 2013 mptcp_preproc_sbdrop(sb->sb_so, m, (unsigned int)len);
39236c6e 2014 }
5ba3f43e
A
2015 if (m != NULL && len > 0 && !(sb->sb_flags & SB_RECV) &&
2016 (sb->sb_so->so_flags & SOF_MP_SUBFLOW) &&
2017 (sb->sb_so->so_flags1 & SOF1_POST_FALLBACK_SYNC)) {
2018 mptcp_fallback_sbdrop(sb->sb_so, m, len);
2019 }
39236c6e 2020#endif /* MPTCP */
fa4905b1
A
2021 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_START), sb, len, 0, 0, 0);
2022
fa4905b1
A
2023 free_list = last = m;
2024 ml = (struct mbuf *)0;
2025
1c79356b 2026 while (len > 0) {
39037602
A
2027 if (m == NULL) {
2028 if (next == NULL) {
2d21ac55
A
2029 /*
2030 * temporarily replacing this panic with printf
2031 * because it occurs occasionally when closing
2032 * a socket when there is no harm in ignoring
2033 * it. This problem will be investigated
2034 * further.
2035 */
2036 /* panic("sbdrop"); */
2037 printf("sbdrop - count not zero\n");
2038 len = 0;
2039 /*
2040 * zero the counts. if we have no mbufs,
2041 * we have no data (PR-2986815)
2042 */
2043 sb->sb_cc = 0;
2044 sb->sb_mbcnt = 0;
39236c6e
A
2045 if (!(sb->sb_flags & SB_RECV) &&
2046 (sb->sb_so->so_flags & SOF_ENABLE_MSGS)) {
2047 sb->sb_so->so_msg_state->
0a7de745 2048 msg_serial_bytes = 0;
39236c6e 2049 }
2d21ac55
A
2050 break;
2051 }
2052 m = last = next;
2053 next = m->m_nextpkt;
2054 continue;
1c79356b
A
2055 }
2056 if (m->m_len > len) {
2057 m->m_len -= len;
2058 m->m_data += len;
2059 sb->sb_cc -= len;
39037602 2060 /* update the send byte count */
0a7de745
A
2061 if (sb->sb_flags & SB_SNDBYTE_CNT) {
2062 inp_decr_sndbytes_total(sb->sb_so, len);
2063 }
2d21ac55 2064 if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
0a7de745 2065 m->m_type != MT_OOBDATA) {
2d21ac55 2066 sb->sb_ctl -= len;
0a7de745 2067 }
1c79356b
A
2068 break;
2069 }
2070 len -= m->m_len;
2071 sbfree(sb, m);
fa4905b1
A
2072
2073 ml = m;
2074 m = m->m_next;
1c79356b
A
2075 }
2076 while (m && m->m_len == 0) {
2077 sbfree(sb, m);
fa4905b1
A
2078
2079 ml = m;
2080 m = m->m_next;
2081 }
2082 if (ml) {
2d21ac55 2083 ml->m_next = (struct mbuf *)0;
fa4905b1 2084 last->m_nextpkt = (struct mbuf *)0;
2d21ac55 2085 m_freem_list(free_list);
1c79356b
A
2086 }
2087 if (m) {
2088 sb->sb_mb = m;
2089 m->m_nextpkt = next;
2d21ac55 2090 } else {
1c79356b 2091 sb->sb_mb = next;
2d21ac55
A
2092 }
2093
2094 /*
2095 * First part is an inline SB_EMPTY_FIXUP(). Second part
2096 * makes sure sb_lastrecord is up-to-date if we dropped
2097 * part of the last record.
2098 */
2099 m = sb->sb_mb;
2100 if (m == NULL) {
2101 sb->sb_mbtail = NULL;
2102 sb->sb_lastrecord = NULL;
2103 } else if (m->m_nextpkt == NULL) {
2104 sb->sb_lastrecord = m;
2105 }
fa4905b1 2106
fe8ab488
A
2107#if CONTENT_FILTER
2108 cfil_sock_buf_update(sb);
2109#endif /* CONTENT_FILTER */
2110
1c79356b 2111 postevent(0, sb, EV_RWBYTES);
fa4905b1
A
2112
2113 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_END), sb, 0, 0, 0, 0);
1c79356b
A
2114}
2115
2116/*
2117 * Drop a record off the front of a sockbuf
2118 * and move the next record to the front.
2119 */
2120void
2d21ac55 2121sbdroprecord(struct sockbuf *sb)
1c79356b 2122{
2d21ac55 2123 struct mbuf *m, *mn;
1c79356b
A
2124
2125 m = sb->sb_mb;
2126 if (m) {
2127 sb->sb_mb = m->m_nextpkt;
2128 do {
2129 sbfree(sb, m);
2130 MFREE(m, mn);
9bccf70c
A
2131 m = mn;
2132 } while (m);
1c79356b 2133 }
2d21ac55 2134 SB_EMPTY_FIXUP(sb);
1c79356b
A
2135 postevent(0, sb, EV_RWBYTES);
2136}
2137
2138/*
2139 * Create a "control" mbuf containing the specified data
2140 * with the specified type for presentation on a socket buffer.
2141 */
2142struct mbuf *
2d21ac55 2143sbcreatecontrol(caddr_t p, int size, int type, int level)
1c79356b 2144{
2d21ac55 2145 struct cmsghdr *cp;
1c79356b
A
2146 struct mbuf *m;
2147
0a7de745
A
2148 if (CMSG_SPACE((u_int)size) > MLEN) {
2149 return (struct mbuf *)NULL;
2150 }
2151 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL) {
2152 return (struct mbuf *)NULL;
2153 }
1c79356b 2154 cp = mtod(m, struct cmsghdr *);
0a7de745 2155 VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
1c79356b 2156 /* XXX check size? */
2d21ac55 2157 (void) memcpy(CMSG_DATA(cp), p, size);
9bccf70c
A
2158 m->m_len = CMSG_SPACE(size);
2159 cp->cmsg_len = CMSG_LEN(size);
1c79356b
A
2160 cp->cmsg_level = level;
2161 cp->cmsg_type = type;
0a7de745 2162 return m;
1c79356b
A
2163}
2164
39236c6e
A
2165struct mbuf **
2166sbcreatecontrol_mbuf(caddr_t p, int size, int type, int level, struct mbuf **mp)
6d2010ae 2167{
39236c6e 2168 struct mbuf *m;
6d2010ae
A
2169 struct cmsghdr *cp;
2170
39236c6e 2171 if (*mp == NULL) {
6d2010ae 2172 *mp = sbcreatecontrol(p, size, type, level);
0a7de745 2173 return mp;
6d2010ae 2174 }
316670eb 2175
39236c6e 2176 if (CMSG_SPACE((u_int)size) + (*mp)->m_len > MLEN) {
6d2010ae
A
2177 mp = &(*mp)->m_next;
2178 *mp = sbcreatecontrol(p, size, type, level);
0a7de745 2179 return mp;
6d2010ae 2180 }
316670eb 2181
6d2010ae 2182 m = *mp;
316670eb
A
2183
2184 cp = (struct cmsghdr *)(void *)(mtod(m, char *) + m->m_len);
2185 /* CMSG_SPACE ensures 32-bit alignment */
0a7de745 2186 VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
6d2010ae 2187 m->m_len += CMSG_SPACE(size);
316670eb 2188
6d2010ae
A
2189 /* XXX check size? */
2190 (void) memcpy(CMSG_DATA(cp), p, size);
2191 cp->cmsg_len = CMSG_LEN(size);
2192 cp->cmsg_level = level;
2193 cp->cmsg_type = type;
316670eb 2194
0a7de745 2195 return mp;
6d2010ae
A
2196}
2197
2198
1c79356b
A
2199/*
2200 * Some routines that return EOPNOTSUPP for entry points that are not
2201 * supported by a protocol. Fill in as needed.
2202 */
2203int
39236c6e
A
2204pru_abort_notsupp(struct socket *so)
2205{
2206#pragma unused(so)
0a7de745 2207 return EOPNOTSUPP;
39236c6e
A
2208}
2209
2210int
2211pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
1c79356b 2212{
39236c6e 2213#pragma unused(so, nam)
0a7de745 2214 return EOPNOTSUPP;
1c79356b
A
2215}
2216
1c79356b 2217int
39236c6e 2218pru_attach_notsupp(struct socket *so, int proto, struct proc *p)
1c79356b 2219{
39236c6e 2220#pragma unused(so, proto, p)
0a7de745 2221 return EOPNOTSUPP;
1c79356b
A
2222}
2223
2224int
39236c6e 2225pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1c79356b 2226{
39236c6e 2227#pragma unused(so, nam, p)
0a7de745 2228 return EOPNOTSUPP;
1c79356b
A
2229}
2230
2231int
39236c6e 2232pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1c79356b 2233{
39236c6e 2234#pragma unused(so, nam, p)
0a7de745 2235 return EOPNOTSUPP;
1c79356b
A
2236}
2237
2238int
39236c6e 2239pru_connect2_notsupp(struct socket *so1, struct socket *so2)
1c79356b 2240{
39236c6e 2241#pragma unused(so1, so2)
0a7de745 2242 return EOPNOTSUPP;
1c79356b
A
2243}
2244
2245int
813fb2f6
A
2246pru_connectx_notsupp(struct socket *so, struct sockaddr *src,
2247 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
3e170ce0
A
2248 sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
2249 uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written)
1c79356b 2250{
813fb2f6 2251#pragma unused(so, src, dst, p, ifscope, aid, pcid, flags, arg, arglen, uio, bytes_written)
0a7de745 2252 return EOPNOTSUPP;
1c79356b
A
2253}
2254
2255int
39236c6e
A
2256pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
2257 struct ifnet *ifp, struct proc *p)
1c79356b 2258{
39236c6e 2259#pragma unused(so, cmd, data, ifp, p)
0a7de745 2260 return EOPNOTSUPP;
1c79356b
A
2261}
2262
2263int
39236c6e 2264pru_detach_notsupp(struct socket *so)
1c79356b 2265{
39236c6e 2266#pragma unused(so)
0a7de745 2267 return EOPNOTSUPP;
1c79356b
A
2268}
2269
2270int
39236c6e 2271pru_disconnect_notsupp(struct socket *so)
1c79356b 2272{
39236c6e 2273#pragma unused(so)
0a7de745 2274 return EOPNOTSUPP;
1c79356b
A
2275}
2276
2277int
3e170ce0 2278pru_disconnectx_notsupp(struct socket *so, sae_associd_t aid, sae_connid_t cid)
1c79356b 2279{
39236c6e 2280#pragma unused(so, aid, cid)
0a7de745 2281 return EOPNOTSUPP;
1c79356b
A
2282}
2283
2284int
39236c6e 2285pru_listen_notsupp(struct socket *so, struct proc *p)
1c79356b 2286{
39236c6e 2287#pragma unused(so, p)
0a7de745 2288 return EOPNOTSUPP;
1c79356b
A
2289}
2290
1c79356b 2291int
39236c6e 2292pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
1c79356b 2293{
39236c6e 2294#pragma unused(so, nam)
0a7de745 2295 return EOPNOTSUPP;
1c79356b
A
2296}
2297
2298int
39236c6e
A
2299pru_rcvd_notsupp(struct socket *so, int flags)
2300{
2301#pragma unused(so, flags)
0a7de745 2302 return EOPNOTSUPP;
39236c6e 2303}
1c79356b 2304
39236c6e
A
2305int
2306pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
1c79356b 2307{
39236c6e 2308#pragma unused(so, m, flags)
0a7de745 2309 return EOPNOTSUPP;
1c79356b
A
2310}
2311
39236c6e
A
2312int
2313pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
2314 struct sockaddr *addr, struct mbuf *control, struct proc *p)
2315{
2316#pragma unused(so, flags, m, addr, control, p)
0a7de745 2317 return EOPNOTSUPP;
39236c6e 2318}
1c79356b 2319
fe8ab488
A
2320int
2321pru_send_list_notsupp(struct socket *so, int flags, struct mbuf *m,
2322 struct sockaddr *addr, struct mbuf *control, struct proc *p)
2323{
2324#pragma unused(so, flags, m, addr, control, p)
0a7de745 2325 return EOPNOTSUPP;
fe8ab488
A
2326}
2327
1c79356b
A
2328/*
2329 * This isn't really a ``null'' operation, but it's the default one
2330 * and doesn't do anything destructive.
2331 */
2332int
2d21ac55 2333pru_sense_null(struct socket *so, void *ub, int isstat64)
1c79356b 2334{
2d21ac55
A
2335 if (isstat64 != 0) {
2336 struct stat64 *sb64;
1c79356b 2337
2d21ac55
A
2338 sb64 = (struct stat64 *)ub;
2339 sb64->st_blksize = so->so_snd.sb_hiwat;
2340 } else {
2341 struct stat *sb;
1c79356b 2342
2d21ac55
A
2343 sb = (struct stat *)ub;
2344 sb->st_blksize = so->so_snd.sb_hiwat;
2345 }
1c79356b 2346
0a7de745 2347 return 0;
1c79356b
A
2348}
2349
1c79356b
A
2350
2351int
39236c6e
A
2352pru_sosend_notsupp(struct socket *so, struct sockaddr *addr, struct uio *uio,
2353 struct mbuf *top, struct mbuf *control, int flags)
1c79356b 2354{
39236c6e 2355#pragma unused(so, addr, uio, top, control, flags)
0a7de745 2356 return EOPNOTSUPP;
1c79356b
A
2357}
2358
fe8ab488 2359int
3e170ce0
A
2360pru_sosend_list_notsupp(struct socket *so, struct uio **uio,
2361 u_int uiocnt, int flags)
fe8ab488 2362{
3e170ce0 2363#pragma unused(so, uio, uiocnt, flags)
0a7de745 2364 return EOPNOTSUPP;
fe8ab488
A
2365}
2366
1c79356b 2367int
39236c6e
A
2368pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr,
2369 struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
1c79356b 2370{
39236c6e 2371#pragma unused(so, paddr, uio, mp0, controlp, flagsp)
0a7de745 2372 return EOPNOTSUPP;
1c79356b
A
2373}
2374
fe8ab488 2375int
39037602 2376pru_soreceive_list_notsupp(struct socket *so,
3e170ce0 2377 struct recv_msg_elem *recv_msg_array, u_int uiocnt, int *flagsp)
fe8ab488 2378{
3e170ce0 2379#pragma unused(so, recv_msg_array, uiocnt, flagsp)
0a7de745 2380 return EOPNOTSUPP;
fe8ab488
A
2381}
2382
2d21ac55 2383int
39236c6e 2384pru_shutdown_notsupp(struct socket *so)
1c79356b 2385{
39236c6e 2386#pragma unused(so)
0a7de745 2387 return EOPNOTSUPP;
1c79356b
A
2388}
2389
2d21ac55 2390int
39236c6e 2391pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
1c79356b 2392{
39236c6e 2393#pragma unused(so, nam)
0a7de745 2394 return EOPNOTSUPP;
1c79356b
A
2395}
2396
91447636 2397int
39236c6e 2398pru_sopoll_notsupp(struct socket *so, int events, kauth_cred_t cred, void *wql)
1c79356b 2399{
39236c6e 2400#pragma unused(so, events, cred, wql)
0a7de745 2401 return EOPNOTSUPP;
1c79356b
A
2402}
2403
39236c6e
A
2404int
2405pru_socheckopt_null(struct socket *so, struct sockopt *sopt)
2406{
2407#pragma unused(so, sopt)
2408 /*
2409 * Allow all options for set/get by default.
2410 */
0a7de745 2411 return 0;
39236c6e
A
2412}
2413
3e170ce0
A
2414static int
2415pru_preconnect_null(struct socket *so)
2416{
2417#pragma unused(so)
0a7de745 2418 return 0;
3e170ce0
A
2419}
2420
39236c6e
A
2421void
2422pru_sanitize(struct pr_usrreqs *pru)
2423{
0a7de745 2424#define DEFAULT(foo, bar) if ((foo) == NULL) (foo) = (bar)
39236c6e
A
2425 DEFAULT(pru->pru_abort, pru_abort_notsupp);
2426 DEFAULT(pru->pru_accept, pru_accept_notsupp);
2427 DEFAULT(pru->pru_attach, pru_attach_notsupp);
2428 DEFAULT(pru->pru_bind, pru_bind_notsupp);
2429 DEFAULT(pru->pru_connect, pru_connect_notsupp);
2430 DEFAULT(pru->pru_connect2, pru_connect2_notsupp);
2431 DEFAULT(pru->pru_connectx, pru_connectx_notsupp);
2432 DEFAULT(pru->pru_control, pru_control_notsupp);
2433 DEFAULT(pru->pru_detach, pru_detach_notsupp);
2434 DEFAULT(pru->pru_disconnect, pru_disconnect_notsupp);
2435 DEFAULT(pru->pru_disconnectx, pru_disconnectx_notsupp);
2436 DEFAULT(pru->pru_listen, pru_listen_notsupp);
39236c6e
A
2437 DEFAULT(pru->pru_peeraddr, pru_peeraddr_notsupp);
2438 DEFAULT(pru->pru_rcvd, pru_rcvd_notsupp);
2439 DEFAULT(pru->pru_rcvoob, pru_rcvoob_notsupp);
2440 DEFAULT(pru->pru_send, pru_send_notsupp);
fe8ab488 2441 DEFAULT(pru->pru_send_list, pru_send_list_notsupp);
39236c6e
A
2442 DEFAULT(pru->pru_sense, pru_sense_null);
2443 DEFAULT(pru->pru_shutdown, pru_shutdown_notsupp);
2444 DEFAULT(pru->pru_sockaddr, pru_sockaddr_notsupp);
2445 DEFAULT(pru->pru_sopoll, pru_sopoll_notsupp);
2446 DEFAULT(pru->pru_soreceive, pru_soreceive_notsupp);
fe8ab488 2447 DEFAULT(pru->pru_soreceive_list, pru_soreceive_list_notsupp);
39236c6e 2448 DEFAULT(pru->pru_sosend, pru_sosend_notsupp);
fe8ab488 2449 DEFAULT(pru->pru_sosend_list, pru_sosend_list_notsupp);
39236c6e 2450 DEFAULT(pru->pru_socheckopt, pru_socheckopt_null);
3e170ce0 2451 DEFAULT(pru->pru_preconnect, pru_preconnect_null);
39236c6e
A
2452#undef DEFAULT
2453}
1c79356b 2454
9bccf70c
A
2455/*
2456 * The following are macros on BSD and functions on Darwin
2457 */
1c79356b 2458
0b4e3aa0
A
2459/*
2460 * Do we need to notify the other side when I/O is possible?
2461 */
2462
2d21ac55 2463int
0b4e3aa0
A
2464sb_notify(struct sockbuf *sb)
2465{
0a7de745
A
2466 return sb->sb_waiters > 0 ||
2467 (sb->sb_flags & (SB_SEL | SB_ASYNC | SB_UPCALL | SB_KNOTE));
0b4e3aa0
A
2468}
2469
2470/*
2471 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
2472 * This is problematical if the fields are unsigned, as the space might
2473 * still be negative (cc > hiwat or mbcnt > mbmax). Should detect
39236c6e 2474 * overflow and return 0.
0b4e3aa0 2475 */
b0d623f7 2476int
0b4e3aa0
A
2477sbspace(struct sockbuf *sb)
2478{
fe8ab488 2479 int pending = 0;
39236c6e
A
2480 int space = imin((int)(sb->sb_hiwat - sb->sb_cc),
2481 (int)(sb->sb_mbmax - sb->sb_mbcnt));
3e170ce0 2482
0a7de745 2483 if (sb->sb_preconn_hiwat != 0) {
3e170ce0 2484 space = imin((int)(sb->sb_preconn_hiwat - sb->sb_cc), space);
0a7de745 2485 }
3e170ce0 2486
0a7de745 2487 if (space < 0) {
b0d623f7 2488 space = 0;
0a7de745 2489 }
b0d623f7 2490
fe8ab488
A
2491 /* Compensate for data being processed by content filters */
2492#if CONTENT_FILTER
2493 pending = cfil_sock_data_space(sb);
2494#endif /* CONTENT_FILTER */
0a7de745 2495 if (pending > space) {
fe8ab488 2496 space = 0;
0a7de745 2497 } else {
fe8ab488 2498 space -= pending;
0a7de745 2499 }
fe8ab488 2500
0a7de745 2501 return space;
39236c6e
A
2502}
2503
2504/*
2505 * If this socket has priority queues, check if there is enough
2506 * space in the priority queue for this msg.
2507 */
2508int
2509msgq_sbspace(struct socket *so, struct mbuf *control)
2510{
2511 int space = 0, error;
5ba3f43e 2512 u_int32_t msgpri = 0;
3e170ce0 2513 VERIFY(so->so_type == SOCK_STREAM &&
0a7de745 2514 SOCK_PROTO(so) == IPPROTO_TCP);
fe8ab488
A
2515 if (control != NULL) {
2516 error = tcp_get_msg_priority(control, &msgpri);
0a7de745
A
2517 if (error) {
2518 return 0;
2519 }
fe8ab488
A
2520 } else {
2521 msgpri = MSG_PRI_0;
2522 }
39236c6e
A
2523 space = (so->so_snd.sb_idealsize / MSG_PRI_COUNT) -
2524 so->so_msg_state->msg_priq[msgpri].msgq_bytes;
0a7de745 2525 if (space < 0) {
39236c6e 2526 space = 0;
0a7de745
A
2527 }
2528 return space;
0b4e3aa0
A
2529}
2530
2531/* do we have to send all at once on a socket? */
2532int
2533sosendallatonce(struct socket *so)
2534{
0a7de745 2535 return so->so_proto->pr_flags & PR_ATOMIC;
0b4e3aa0
A
2536}
2537
2538/* can we read something from so? */
2539int
2540soreadable(struct socket *so)
2541{
0a7de745
A
2542 return so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
2543 ((so->so_state & SS_CANTRCVMORE)
fe8ab488 2544#if CONTENT_FILTER
0a7de745 2545 && cfil_sock_data_pending(&so->so_rcv) == 0
fe8ab488 2546#endif /* CONTENT_FILTER */
0a7de745
A
2547 ) ||
2548 so->so_comp.tqh_first || so->so_error;
0b4e3aa0
A
2549}
2550
2551/* can we write something to so? */
2552
2553int
2554sowriteable(struct socket *so)
2555{
fe8ab488 2556 if ((so->so_state & SS_CANTSENDMORE) ||
0a7de745
A
2557 so->so_error > 0) {
2558 return 1;
2559 }
2560 if (so_wait_for_if_feedback(so) || !socanwrite(so)) {
2561 return 0;
2562 }
2563 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
2564 return 1;
2565 }
fe8ab488 2566
3e170ce0 2567 if (sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat) {
fe8ab488 2568 if (so->so_flags & SOF_NOTSENT_LOWAT) {
3e170ce0
A
2569 if ((SOCK_DOM(so) == PF_INET6 ||
2570 SOCK_DOM(so) == PF_INET) &&
2571 so->so_type == SOCK_STREAM) {
0a7de745 2572 return tcp_notsent_lowat_check(so);
fe8ab488
A
2573 }
2574#if MPTCP
2575 else if ((SOCK_DOM(so) == PF_MULTIPATH) &&
2576 (SOCK_PROTO(so) == IPPROTO_TCP)) {
0a7de745 2577 return mptcp_notsent_lowat_check(so);
fe8ab488
A
2578 }
2579#endif
2580 else {
0a7de745 2581 return 1;
fe8ab488
A
2582 }
2583 } else {
0a7de745 2584 return 1;
fe8ab488
A
2585 }
2586 }
0a7de745 2587 return 0;
0b4e3aa0
A
2588}
2589
2590/* adjust counters in sb reflecting allocation of m */
2591
2592void
2593sballoc(struct sockbuf *sb, struct mbuf *m)
2594{
316670eb 2595 u_int32_t cnt = 1;
39236c6e
A
2596 sb->sb_cc += m->m_len;
2597 if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
0a7de745 2598 m->m_type != MT_OOBDATA) {
2d21ac55 2599 sb->sb_ctl += m->m_len;
0a7de745 2600 }
39236c6e
A
2601 sb->sb_mbcnt += MSIZE;
2602
2d21ac55 2603 if (m->m_flags & M_EXT) {
39236c6e
A
2604 sb->sb_mbcnt += m->m_ext.ext_size;
2605 cnt += (m->m_ext.ext_size >> MSIZESHIFT);
2d21ac55 2606 }
316670eb
A
2607 OSAddAtomic(cnt, &total_sbmb_cnt);
2608 VERIFY(total_sbmb_cnt > 0);
0a7de745 2609 if (total_sbmb_cnt > total_sbmb_cnt_peak) {
fe8ab488 2610 total_sbmb_cnt_peak = total_sbmb_cnt;
0a7de745 2611 }
3e170ce0
A
2612
2613 /*
39037602 2614 * If data is being added to the send socket buffer,
3e170ce0
A
2615 * update the send byte count
2616 */
39037602
A
2617 if (sb->sb_flags & SB_SNDBYTE_CNT) {
2618 inp_incr_sndbytes_total(sb->sb_so, m->m_len);
2619 inp_incr_sndbytes_unsent(sb->sb_so, m->m_len);
2620 }
0b4e3aa0
A
2621}
2622
2623/* adjust counters in sb reflecting freeing of m */
2624void
2625sbfree(struct sockbuf *sb, struct mbuf *m)
2626{
2d21ac55 2627 int cnt = -1;
316670eb 2628
2d21ac55 2629 sb->sb_cc -= m->m_len;
39236c6e 2630 if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
0a7de745 2631 m->m_type != MT_OOBDATA) {
2d21ac55 2632 sb->sb_ctl -= m->m_len;
0a7de745 2633 }
39236c6e 2634 sb->sb_mbcnt -= MSIZE;
2d21ac55 2635 if (m->m_flags & M_EXT) {
39236c6e
A
2636 sb->sb_mbcnt -= m->m_ext.ext_size;
2637 cnt -= (m->m_ext.ext_size >> MSIZESHIFT);
2d21ac55 2638 }
316670eb
A
2639 OSAddAtomic(cnt, &total_sbmb_cnt);
2640 VERIFY(total_sbmb_cnt >= 0);
0a7de745 2641 if (total_sbmb_cnt < total_sbmb_cnt_floor) {
39037602 2642 total_sbmb_cnt_floor = total_sbmb_cnt;
0a7de745 2643 }
3e170ce0
A
2644
2645 /*
2646 * If data is being removed from the send socket buffer,
2647 * update the send byte count
2648 */
0a7de745 2649 if (sb->sb_flags & SB_SNDBYTE_CNT) {
39037602 2650 inp_decr_sndbytes_total(sb->sb_so, m->m_len);
0a7de745 2651 }
0b4e3aa0
A
2652}
2653
2654/*
2655 * Set lock on sockbuf sb; sleep if lock is already held.
2656 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
2657 * Returns error without lock if sleep is interrupted.
2658 */
2659int
39236c6e 2660sblock(struct sockbuf *sb, uint32_t flags)
0b4e3aa0 2661{
39236c6e
A
2662 boolean_t nointr = ((sb->sb_flags & SB_NOINTR) || (flags & SBL_NOINTR));
2663 void *lr_saved = __builtin_return_address(0);
2664 struct socket *so = sb->sb_so;
2665 void * wchan;
6601e61a 2666 int error = 0;
fe8ab488 2667 thread_t tp = current_thread();
6601e61a 2668
39236c6e 2669 VERIFY((flags & SBL_VALID) == flags);
6601e61a 2670
39236c6e
A
2671 /* so_usecount may be 0 if we get here from sofreelastref() */
2672 if (so == NULL) {
2673 panic("%s: null so, sb=%p sb_flags=0x%x lr=%p\n",
2674 __func__, sb, sb->sb_flags, lr_saved);
2675 /* NOTREACHED */
2676 } else if (so->so_usecount < 0) {
2677 panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
2678 "lrh= %s\n", __func__, sb, sb->sb_flags, so,
2679 so->so_usecount, lr_saved, solockhistory_nr(so));
2680 /* NOTREACHED */
2681 }
2682
fe8ab488
A
2683 /*
2684 * The content filter thread must hold the sockbuf lock
2685 */
2686 if ((so->so_flags & SOF_CONTENT_FILTER) && sb->sb_cfil_thread == tp) {
2687 /*
2688 * Don't panic if we are defunct because SB_LOCK has
2689 * been cleared by sodefunct()
2690 */
0a7de745 2691 if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK)) {
fe8ab488 2692 panic("%s: SB_LOCK not held for %p\n",
3e170ce0 2693 __func__, sb);
0a7de745 2694 }
fe8ab488
A
2695
2696 /* Keep the sockbuf locked */
0a7de745 2697 return 0;
fe8ab488
A
2698 }
2699
0a7de745
A
2700 if ((sb->sb_flags & SB_LOCK) && !(flags & SBL_WAIT)) {
2701 return EWOULDBLOCK;
2702 }
39236c6e
A
2703 /*
2704 * We may get here from sorflush(), in which case "sb" may not
2705 * point to the real socket buffer. Use the actual socket buffer
2706 * address from the socket instead.
2707 */
2708 wchan = (sb->sb_flags & SB_RECV) ?
2709 &so->so_rcv.sb_flags : &so->so_snd.sb_flags;
2710
fe8ab488
A
2711 /*
2712 * A content filter thread has exclusive access to the sockbuf
2713 * until it clears the
2714 */
2715 while ((sb->sb_flags & SB_LOCK) ||
0a7de745
A
2716 ((so->so_flags & SOF_CONTENT_FILTER) &&
2717 sb->sb_cfil_thread != NULL)) {
39236c6e
A
2718 lck_mtx_t *mutex_held;
2719
2720 /*
2721 * XXX: This code should be moved up above outside of this loop;
2722 * however, we may get here as part of sofreelastref(), and
2723 * at that time pr_getlock() may no longer be able to return
2724 * us the lock. This will be fixed in future.
2725 */
0a7de745 2726 if (so->so_proto->pr_getlock != NULL) {
5ba3f43e 2727 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
0a7de745 2728 } else {
39236c6e 2729 mutex_held = so->so_proto->pr_domain->dom_mtx;
0a7de745 2730 }
39236c6e 2731
5ba3f43e 2732 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
39236c6e
A
2733
2734 sb->sb_wantlock++;
2735 VERIFY(sb->sb_wantlock != 0);
2736
2737 error = msleep(wchan, mutex_held,
2738 nointr ? PSOCK : PSOCK | PCATCH,
2739 nointr ? "sb_lock_nointr" : "sb_lock", NULL);
2740
2741 VERIFY(sb->sb_wantlock != 0);
2742 sb->sb_wantlock--;
2743
2744 if (error == 0 && (so->so_flags & SOF_DEFUNCT) &&
2745 !(flags & SBL_IGNDEFUNCT)) {
2746 error = EBADF;
39037602 2747 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] "
39236c6e 2748 "(%d)\n", __func__, proc_selfpid(),
39037602 2749 proc_best_name(current_proc()),
39236c6e 2750 (uint64_t)VM_KERNEL_ADDRPERM(so),
39037602 2751 SOCK_DOM(so), SOCK_TYPE(so), error);
39236c6e
A
2752 }
2753
0a7de745
A
2754 if (error != 0) {
2755 return error;
2756 }
39236c6e
A
2757 }
2758 sb->sb_flags |= SB_LOCK;
0a7de745 2759 return 0;
0b4e3aa0
A
2760}
2761
39236c6e
A
2762/*
2763 * Release lock on sockbuf sb
2764 */
0b4e3aa0 2765void
39236c6e 2766sbunlock(struct sockbuf *sb, boolean_t keeplocked)
0b4e3aa0 2767{
39236c6e 2768 void *lr_saved = __builtin_return_address(0);
91447636 2769 struct socket *so = sb->sb_so;
fe8ab488 2770 thread_t tp = current_thread();
91447636 2771
39236c6e
A
2772 /* so_usecount may be 0 if we get here from sofreelastref() */
2773 if (so == NULL) {
2774 panic("%s: null so, sb=%p sb_flags=0x%x lr=%p\n",
2775 __func__, sb, sb->sb_flags, lr_saved);
2776 /* NOTREACHED */
2777 } else if (so->so_usecount < 0) {
2778 panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
2779 "lrh= %s\n", __func__, sb, sb->sb_flags, so,
2780 so->so_usecount, lr_saved, solockhistory_nr(so));
2781 /* NOTREACHED */
2782 }
91447636 2783
fe8ab488
A
2784 /*
2785 * The content filter thread must hold the sockbuf lock
2786 */
2787 if ((so->so_flags & SOF_CONTENT_FILTER) && sb->sb_cfil_thread == tp) {
39236c6e 2788 /*
fe8ab488
A
2789 * Don't panic if we are defunct because SB_LOCK has
2790 * been cleared by sodefunct()
39236c6e 2791 */
fe8ab488 2792 if (!(so->so_flags & SOF_DEFUNCT) &&
3e170ce0 2793 !(sb->sb_flags & SB_LOCK) &&
fe8ab488
A
2794 !(so->so_state & SS_DEFUNCT) &&
2795 !(so->so_flags1 & SOF1_DEFUNCTINPROG)) {
2796 panic("%s: SB_LOCK not held for %p\n",
3e170ce0 2797 __func__, sb);
fe8ab488 2798 }
3e170ce0 2799 /* Keep the sockbuf locked and proceed */
fe8ab488
A
2800 } else {
2801 VERIFY((sb->sb_flags & SB_LOCK) ||
3e170ce0
A
2802 (so->so_state & SS_DEFUNCT) ||
2803 (so->so_flags1 & SOF1_DEFUNCTINPROG));
fe8ab488
A
2804
2805 sb->sb_flags &= ~SB_LOCK;
2806
2807 if (sb->sb_wantlock > 0) {
2808 /*
3e170ce0
A
2809 * We may get here from sorflush(), in which case "sb"
2810 * may not point to the real socket buffer. Use the
2811 * actual socket buffer address from the socket instead.
fe8ab488
A
2812 */
2813 wakeup((sb->sb_flags & SB_RECV) ? &so->so_rcv.sb_flags :
2814 &so->so_snd.sb_flags);
2815 }
2d21ac55 2816 }
39236c6e 2817
0a7de745 2818 if (!keeplocked) { /* unlock on exit */
cb323159
A
2819 if (so->so_flags & SOF_MP_SUBFLOW || SOCK_DOM(so) == PF_MULTIPATH) {
2820 (*so->so_proto->pr_unlock)(so, 1, lr_saved);
0a7de745 2821 } else {
cb323159 2822 lck_mtx_t *mutex_held;
b0d623f7 2823
cb323159
A
2824 if (so->so_proto->pr_getlock != NULL) {
2825 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
2826 } else {
2827 mutex_held = so->so_proto->pr_domain->dom_mtx;
2828 }
0c530ab8 2829
cb323159
A
2830 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
2831
2832 VERIFY(so->so_usecount > 0);
2833 so->so_usecount--;
2834 so->unlock_lr[so->next_unlock_lr] = lr_saved;
2835 so->next_unlock_lr = (so->next_unlock_lr + 1) % SO_LCKDBG_MAX;
2836 lck_mtx_unlock(mutex_held);
2837 }
91447636 2838 }
0b4e3aa0
A
2839}
2840
2841void
2d21ac55 2842sorwakeup(struct socket *so)
0b4e3aa0 2843{
0a7de745 2844 if (sb_notify(&so->so_rcv)) {
cb323159 2845 sowakeup(so, &so->so_rcv, NULL);
0a7de745 2846 }
0b4e3aa0
A
2847}
2848
2849void
2d21ac55 2850sowwakeup(struct socket *so)
0b4e3aa0 2851{
0a7de745 2852 if (sb_notify(&so->so_snd)) {
cb323159 2853 sowakeup(so, &so->so_snd, NULL);
0a7de745 2854 }
0b4e3aa0 2855}
316670eb
A
2856
2857void
2858soevent(struct socket *so, long hint)
2859{
0a7de745 2860 if (so->so_flags & SOF_KNOTE) {
316670eb 2861 KNOTE(&so->so_klist, hint);
0a7de745 2862 }
39236c6e
A
2863
2864 soevupcall(so, hint);
2865
fe8ab488
A
2866 /*
2867 * Don't post an event if this a subflow socket or
2868 * the app has opted out of using cellular interface
2869 */
3e170ce0 2870 if ((hint & SO_FILT_HINT_IFDENIED) &&
fe8ab488
A
2871 !(so->so_flags & SOF_MP_SUBFLOW) &&
2872 !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR) &&
cb323159
A
2873 !(so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE) &&
2874 !(so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED)) {
39236c6e 2875 soevent_ifdenied(so);
0a7de745 2876 }
316670eb
A
2877}
2878
39236c6e
A
2879void
2880soevupcall(struct socket *so, u_int32_t hint)
2881{
fe8ab488 2882 if (so->so_event != NULL) {
39236c6e
A
2883 caddr_t so_eventarg = so->so_eventarg;
2884
2885 hint &= so->so_eventmask;
0a7de745 2886 if (hint != 0) {
39236c6e 2887 so->so_event(so, so_eventarg, hint);
0a7de745 2888 }
39236c6e
A
2889 }
2890}
2891
2892static void
2893soevent_ifdenied(struct socket *so)
2894{
2895 struct kev_netpolicy_ifdenied ev_ifdenied;
2896
0a7de745 2897 bzero(&ev_ifdenied, sizeof(ev_ifdenied));
39236c6e
A
2898 /*
2899 * The event consumer is interested about the effective {upid,pid,uuid}
2900 * info which can be different than the those related to the process
2901 * that recently performed a system call on the socket, i.e. when the
2902 * socket is delegated.
2903 */
2904 if (so->so_flags & SOF_DELEGATED) {
2905 ev_ifdenied.ev_data.eupid = so->e_upid;
2906 ev_ifdenied.ev_data.epid = so->e_pid;
2907 uuid_copy(ev_ifdenied.ev_data.euuid, so->e_uuid);
2908 } else {
2909 ev_ifdenied.ev_data.eupid = so->last_upid;
2910 ev_ifdenied.ev_data.epid = so->last_pid;
2911 uuid_copy(ev_ifdenied.ev_data.euuid, so->last_uuid);
2912 }
2913
2914 if (++so->so_ifdenied_notifies > 1) {
2915 /*
2916 * Allow for at most one kernel event to be generated per
2917 * socket; so_ifdenied_notifies is reset upon changes in
2918 * the UUID policy. See comments in inp_update_policy.
2919 */
2920 if (net_io_policy_log) {
2921 uuid_string_t buf;
2922
2923 uuid_unparse(ev_ifdenied.ev_data.euuid, buf);
2924 log(LOG_DEBUG, "%s[%d]: so 0x%llx [%d,%d] epid %d "
2925 "euuid %s%s has %d redundant events supressed\n",
2926 __func__, so->last_pid,
2927 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
2928 SOCK_TYPE(so), ev_ifdenied.ev_data.epid, buf,
2929 ((so->so_flags & SOF_DELEGATED) ?
2930 " [delegated]" : ""), so->so_ifdenied_notifies);
2931 }
2932 } else {
2933 if (net_io_policy_log) {
2934 uuid_string_t buf;
2935
2936 uuid_unparse(ev_ifdenied.ev_data.euuid, buf);
2937 log(LOG_DEBUG, "%s[%d]: so 0x%llx [%d,%d] epid %d "
2938 "euuid %s%s event posted\n", __func__,
2939 so->last_pid, (uint64_t)VM_KERNEL_ADDRPERM(so),
2940 SOCK_DOM(so), SOCK_TYPE(so),
2941 ev_ifdenied.ev_data.epid, buf,
2942 ((so->so_flags & SOF_DELEGATED) ?
2943 " [delegated]" : ""));
2944 }
2945 netpolicy_post_msg(KEV_NETPOLICY_IFDENIED, &ev_ifdenied.ev_data,
0a7de745 2946 sizeof(ev_ifdenied));
39236c6e
A
2947 }
2948}
0b4e3aa0 2949
1c79356b
A
2950/*
2951 * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
2952 */
2953struct sockaddr *
2d21ac55 2954dup_sockaddr(struct sockaddr *sa, int canwait)
1c79356b
A
2955{
2956 struct sockaddr *sa2;
2957
2d21ac55
A
2958 MALLOC(sa2, struct sockaddr *, sa->sa_len, M_SONAME,
2959 canwait ? M_WAITOK : M_NOWAIT);
0a7de745 2960 if (sa2) {
1c79356b 2961 bcopy(sa, sa2, sa->sa_len);
0a7de745
A
2962 }
2963 return sa2;
1c79356b
A
2964}
2965
2966/*
2967 * Create an external-format (``xsocket'') structure using the information
2968 * in the kernel-format socket structure pointed to by so. This is done
2969 * to reduce the spew of irrelevant information over this interface,
2970 * to isolate user code from changes in the kernel structure, and
2971 * potentially to provide information-hiding if we decide that
2972 * some of this information should be hidden from users.
2973 */
2974void
2975sotoxsocket(struct socket *so, struct xsocket *xso)
2976{
0a7de745 2977 xso->xso_len = sizeof(*xso);
316670eb 2978 xso->xso_so = (_XSOCKET_PTR(struct socket *))VM_KERNEL_ADDRPERM(so);
1c79356b 2979 xso->so_type = so->so_type;
316670eb 2980 xso->so_options = (short)(so->so_options & 0xffff);
1c79356b
A
2981 xso->so_linger = so->so_linger;
2982 xso->so_state = so->so_state;
316670eb 2983 xso->so_pcb = (_XSOCKET_PTR(caddr_t))VM_KERNEL_ADDRPERM(so->so_pcb);
91447636 2984 if (so->so_proto) {
39236c6e
A
2985 xso->xso_protocol = SOCK_PROTO(so);
2986 xso->xso_family = SOCK_DOM(so);
2d21ac55 2987 } else {
91447636 2988 xso->xso_protocol = xso->xso_family = 0;
2d21ac55 2989 }
1c79356b
A
2990 xso->so_qlen = so->so_qlen;
2991 xso->so_incqlen = so->so_incqlen;
2992 xso->so_qlimit = so->so_qlimit;
2993 xso->so_timeo = so->so_timeo;
2994 xso->so_error = so->so_error;
2995 xso->so_pgid = so->so_pgid;
2996 xso->so_oobmark = so->so_oobmark;
2997 sbtoxsockbuf(&so->so_snd, &xso->so_snd);
2998 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
316670eb 2999 xso->so_uid = kauth_cred_getuid(so->so_cred);
1c79356b
A
3000}
3001
b0d623f7 3002
5ba3f43e 3003#if !CONFIG_EMBEDDED
b0d623f7
A
3004
3005void
3006sotoxsocket64(struct socket *so, struct xsocket64 *xso)
3007{
0a7de745 3008 xso->xso_len = sizeof(*xso);
39236c6e
A
3009 xso->xso_so = (u_int64_t)VM_KERNEL_ADDRPERM(so);
3010 xso->so_type = so->so_type;
3011 xso->so_options = (short)(so->so_options & 0xffff);
3012 xso->so_linger = so->so_linger;
3013 xso->so_state = so->so_state;
3014 xso->so_pcb = (u_int64_t)VM_KERNEL_ADDRPERM(so->so_pcb);
3015 if (so->so_proto) {
3016 xso->xso_protocol = SOCK_PROTO(so);
3017 xso->xso_family = SOCK_DOM(so);
3018 } else {
3019 xso->xso_protocol = xso->xso_family = 0;
3020 }
3021 xso->so_qlen = so->so_qlen;
3022 xso->so_incqlen = so->so_incqlen;
3023 xso->so_qlimit = so->so_qlimit;
3024 xso->so_timeo = so->so_timeo;
3025 xso->so_error = so->so_error;
3026 xso->so_pgid = so->so_pgid;
3027 xso->so_oobmark = so->so_oobmark;
3028 sbtoxsockbuf(&so->so_snd, &xso->so_snd);
3029 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
3030 xso->so_uid = kauth_cred_getuid(so->so_cred);
3031}
3032
5ba3f43e 3033#endif /* !CONFIG_EMBEDDED */
b0d623f7 3034
1c79356b
A
3035/*
3036 * This does the same for sockbufs. Note that the xsockbuf structure,
3037 * since it is always embedded in a socket, does not include a self
3038 * pointer nor a length. We make this entry point public in case
3039 * some other mechanism needs it.
3040 */
3041void
3042sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
3043{
3044 xsb->sb_cc = sb->sb_cc;
3045 xsb->sb_hiwat = sb->sb_hiwat;
3046 xsb->sb_mbcnt = sb->sb_mbcnt;
3047 xsb->sb_mbmax = sb->sb_mbmax;
3048 xsb->sb_lowat = sb->sb_lowat;
3049 xsb->sb_flags = sb->sb_flags;
b0d623f7 3050 xsb->sb_timeo = (short)
2d21ac55 3051 (sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick;
0a7de745 3052 if (xsb->sb_timeo == 0 && sb->sb_timeo.tv_usec != 0) {
91447636 3053 xsb->sb_timeo = 1;
0a7de745 3054 }
1c79356b
A
3055}
3056
316670eb
A
3057/*
3058 * Based on the policy set by an all knowing decison maker, throttle sockets
3059 * that either have been marked as belonging to "background" process.
3060 */
3e170ce0 3061inline int
316670eb 3062soisthrottled(struct socket *so)
d1ecb069 3063{
0a7de745 3064 return so->so_flags1 & SOF1_TRAFFIC_MGT_SO_BACKGROUND;
d1ecb069
A
3065}
3066
3e170ce0 3067inline int
316670eb
A
3068soisprivilegedtraffic(struct socket *so)
3069{
0a7de745 3070 return (so->so_flags & SOF_PRIVILEGED_TRAFFIC_CLASS) ? 1 : 0;
39236c6e
A
3071}
3072
3e170ce0 3073inline int
39236c6e
A
3074soissrcbackground(struct socket *so)
3075{
0a7de745
A
3076 return (so->so_flags1 & SOF1_TRAFFIC_MGT_SO_BACKGROUND) ||
3077 IS_SO_TC_BACKGROUND(so->so_traffic_class);
316670eb 3078}
d41d1dae 3079
3e170ce0 3080inline int
fe8ab488
A
3081soissrcrealtime(struct socket *so)
3082{
0a7de745
A
3083 return so->so_traffic_class >= SO_TC_AV &&
3084 so->so_traffic_class <= SO_TC_VO;
3e170ce0
A
3085}
3086
3087inline int
3088soissrcbesteffort(struct socket *so)
3089{
0a7de745
A
3090 return so->so_traffic_class == SO_TC_BE ||
3091 so->so_traffic_class == SO_TC_RD ||
3092 so->so_traffic_class == SO_TC_OAM;
fe8ab488
A
3093}
3094
5ba3f43e
A
3095void
3096soclearfastopen(struct socket *so)
3097{
0a7de745 3098 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
5ba3f43e 3099 so->so_flags1 &= ~SOF1_PRECONNECT_DATA;
0a7de745 3100 }
5ba3f43e 3101
0a7de745 3102 if (so->so_flags1 & SOF1_DATA_IDEMPOTENT) {
5ba3f43e 3103 so->so_flags1 &= ~SOF1_DATA_IDEMPOTENT;
0a7de745 3104 }
5ba3f43e
A
3105}
3106
fe8ab488
A
3107void
3108sonullevent(struct socket *so, void *arg, uint32_t hint)
3109{
3110#pragma unused(so, arg, hint)
3111}
3112
1c79356b
A
3113/*
3114 * Here is the definition of some of the basic objects in the kern.ipc
3115 * branch of the MIB.
3116 */
39236c6e 3117SYSCTL_NODE(_kern, KERN_IPC, ipc,
0a7de745 3118 CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, 0, "IPC");
1c79356b 3119
b0d623f7
A
3120/* Check that the maximum socket buffer size is within a range */
3121
3122static int
39236c6e 3123sysctl_sb_max SYSCTL_HANDLER_ARGS
b0d623f7 3124{
39236c6e 3125#pragma unused(oidp, arg1, arg2)
b0d623f7
A
3126 u_int32_t new_value;
3127 int changed = 0;
0a7de745 3128 int error = sysctl_io_number(req, sb_max, sizeof(u_int32_t),
39236c6e 3129 &new_value, &changed);
b0d623f7 3130 if (!error && changed) {
39236c6e 3131 if (new_value > LOW_SB_MAX && new_value <= high_sb_max) {
b0d623f7
A
3132 sb_max = new_value;
3133 } else {
3134 error = ERANGE;
3135 }
3136 }
0a7de745 3137 return error;
b0d623f7
A
3138}
3139
39236c6e 3140SYSCTL_PROC(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf,
0a7de745
A
3141 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
3142 &sb_max, 0, &sysctl_sb_max, "IU", "Maximum socket buffer size");
39236c6e 3143
39236c6e 3144SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor,
0a7de745 3145 CTLFLAG_RW | CTLFLAG_LOCKED, &sb_efficiency, 0, "");
39236c6e
A
3146
3147SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters,
0a7de745 3148 CTLFLAG_RD | CTLFLAG_LOCKED, &nmbclusters, 0, "");
39236c6e
A
3149
3150SYSCTL_INT(_kern_ipc, OID_AUTO, njcl,
0a7de745 3151 CTLFLAG_RD | CTLFLAG_LOCKED, &njcl, 0, "");
39236c6e
A
3152
3153SYSCTL_INT(_kern_ipc, OID_AUTO, njclbytes,
0a7de745 3154 CTLFLAG_RD | CTLFLAG_LOCKED, &njclbytes, 0, "");
39236c6e
A
3155
3156SYSCTL_INT(_kern_ipc, KIPC_SOQLIMITCOMPAT, soqlimitcompat,
0a7de745
A
3157 CTLFLAG_RW | CTLFLAG_LOCKED, &soqlimitcompat, 1,
3158 "Enable socket queue limit compatibility");
1c79356b 3159
5ba3f43e
A
3160/*
3161 * Hack alert -- rdar://33572856
3162 * A loopback test we cannot change was failing because it sets
3163 * SO_SENDTIMEO to 5 seconds and that's also the value
3164 * of the minimum persist timer. Because of the persist timer,
3165 * the connection was not idle for 5 seconds and SO_SNDTIMEO
3166 * was not triggering at 5 seconds causing the test failure.
3167 * As a workaround we check the sysctl soqlencomp the test is already
3168 * setting to set disable auto tuning of the receive buffer.
3169 */
3170
3171extern u_int32_t tcp_do_autorcvbuf;
3172
3173static int
3174sysctl_soqlencomp SYSCTL_HANDLER_ARGS
3175{
3176#pragma unused(oidp, arg1, arg2)
3177 u_int32_t new_value;
3178 int changed = 0;
0a7de745 3179 int error = sysctl_io_number(req, soqlencomp, sizeof(u_int32_t),
5ba3f43e
A
3180 &new_value, &changed);
3181 if (!error && changed) {
3182 soqlencomp = new_value;
3183 if (new_value != 0) {
3184 tcp_do_autorcvbuf = 0;
3185 tcptv_persmin_val = 6 * TCP_RETRANSHZ;
3186 }
3187 }
0a7de745 3188 return error;
5ba3f43e
A
3189}
3190SYSCTL_PROC(_kern_ipc, OID_AUTO, soqlencomp,
0a7de745
A
3191 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
3192 &soqlencomp, 0, &sysctl_soqlencomp, "IU", "");
316670eb 3193
39037602 3194SYSCTL_INT(_kern_ipc, OID_AUTO, sbmb_cnt, CTLFLAG_RD | CTLFLAG_LOCKED,
0a7de745 3195 &total_sbmb_cnt, 0, "");
39037602 3196SYSCTL_INT(_kern_ipc, OID_AUTO, sbmb_cnt_peak, CTLFLAG_RD | CTLFLAG_LOCKED,
0a7de745 3197 &total_sbmb_cnt_peak, 0, "");
39037602 3198SYSCTL_INT(_kern_ipc, OID_AUTO, sbmb_cnt_floor, CTLFLAG_RD | CTLFLAG_LOCKED,
0a7de745 3199 &total_sbmb_cnt_floor, 0, "");
39037602 3200SYSCTL_QUAD(_kern_ipc, OID_AUTO, sbmb_limreached, CTLFLAG_RD | CTLFLAG_LOCKED,
0a7de745 3201 &sbmb_limreached, "");
39037602
A
3202
3203
316670eb
A
3204SYSCTL_NODE(_kern_ipc, OID_AUTO, io_policy, CTLFLAG_RW, 0, "network IO policy");
3205
39236c6e 3206SYSCTL_INT(_kern_ipc_io_policy, OID_AUTO, log, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 3207 &net_io_policy_log, 0, "");
39236c6e
A
3208
3209#if CONFIG_PROC_UUID_POLICY
3210SYSCTL_INT(_kern_ipc_io_policy, OID_AUTO, uuid, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 3211 &net_io_policy_uuid, 0, "");
39236c6e 3212#endif /* CONFIG_PROC_UUID_POLICY */