]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/uipc_socket2.c
xnu-7195.50.7.100.1.tar.gz
[apple/xnu.git] / bsd / kern / uipc_socket2.c
CommitLineData
1c79356b 1/*
bca245ac 2 * Copyright (c) 1998-2020 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39236c6e 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39236c6e 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39236c6e 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39236c6e 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b 27 */
1c79356b
A
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1982, 1986, 1988, 1990, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the University of
44 * California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
62 */
2d21ac55
A
63/*
64 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
65 * support for mandatory and extensible security protections. This notice
66 * is included in support of clause 2.2 (b) of the Apple Public License,
67 * Version 2.0.
68 */
1c79356b
A
69
70#include <sys/param.h>
71#include <sys/systm.h>
72#include <sys/domain.h>
73#include <sys/kernel.h>
91447636
A
74#include <sys/proc_internal.h>
75#include <sys/kauth.h>
1c79356b
A
76#include <sys/malloc.h>
77#include <sys/mbuf.h>
316670eb 78#include <sys/mcache.h>
1c79356b
A
79#include <sys/protosw.h>
80#include <sys/stat.h>
81#include <sys/socket.h>
82#include <sys/socketvar.h>
83#include <sys/signalvar.h>
84#include <sys/sysctl.h>
39236c6e 85#include <sys/syslog.h>
cb323159 86#include <sys/unpcb.h>
1c79356b 87#include <sys/ev.h>
91447636
A
88#include <kern/locks.h>
89#include <net/route.h>
fe8ab488 90#include <net/content_filter.h>
91447636
A
91#include <netinet/in.h>
92#include <netinet/in_pcb.h>
5ba3f43e 93#include <netinet/tcp_var.h>
fa4905b1 94#include <sys/kdebug.h>
2d21ac55
A
95#include <libkern/OSAtomic.h>
96
97#if CONFIG_MACF
98#include <security/mac_framework.h>
99#endif
100
316670eb
A
101#include <mach/vm_param.h>
102
fe8ab488
A
103#if MPTCP
104#include <netinet/mptcp_var.h>
105#endif
fa4905b1 106
0a7de745
A
107#define DBG_FNC_SBDROP NETDBG_CODE(DBG_NETSOCK, 4)
108#define DBG_FNC_SBAPPEND NETDBG_CODE(DBG_NETSOCK, 5)
fa4905b1 109
3e170ce0
A
110SYSCTL_DECL(_kern_ipc);
111
112__private_extern__ u_int32_t net_io_policy_throttle_best_effort = 0;
113SYSCTL_INT(_kern_ipc, OID_AUTO, throttle_best_effort,
114 CTLFLAG_RW | CTLFLAG_LOCKED, &net_io_policy_throttle_best_effort, 0, "");
115
2d21ac55
A
116static inline void sbcompress(struct sockbuf *, struct mbuf *, struct mbuf *);
117static struct socket *sonewconn_internal(struct socket *, int);
2d21ac55
A
118static int sbappendcontrol_internal(struct sockbuf *, struct mbuf *,
119 struct mbuf *);
39236c6e 120static void soevent_ifdenied(struct socket *);
fa4905b1 121
bca245ac
A
122static int sbappendrecord_common(struct sockbuf *sb, struct mbuf *m0, boolean_t nodrop);
123static int sbappend_common(struct sockbuf *sb, struct mbuf *m, boolean_t nodrop);
124
1c79356b
A
125/*
126 * Primitive routines for operating on sockets and socket buffers
127 */
2d21ac55
A
128static int soqlimitcompat = 1;
129static int soqlencomp = 0;
1c79356b 130
39236c6e
A
131/*
132 * Based on the number of mbuf clusters configured, high_sb_max and sb_max can
133 * get scaled up or down to suit that memory configuration. high_sb_max is a
134 * higher limit on sb_max that is checked when sb_max gets set through sysctl.
b0d623f7
A
135 */
136
0a7de745
A
137u_int32_t sb_max = SB_MAX; /* XXX should be static */
138u_int32_t high_sb_max = SB_MAX;
1c79356b 139
0a7de745 140static u_int32_t sb_efficiency = 8; /* parameter for sbreserve() */
fe8ab488 141int32_t total_sbmb_cnt __attribute__((aligned(8))) = 0;
39037602 142int32_t total_sbmb_cnt_floor __attribute__((aligned(8))) = 0;
fe8ab488
A
143int32_t total_sbmb_cnt_peak __attribute__((aligned(8))) = 0;
144int64_t sbmb_limreached __attribute__((aligned(8))) = 0;
316670eb 145
0a7de745 146u_int32_t net_io_policy_log = 0; /* log socket policy changes */
39236c6e 147#if CONFIG_PROC_UUID_POLICY
0a7de745 148u_int32_t net_io_policy_uuid = 1; /* enable UUID socket policy */
39236c6e
A
149#endif /* CONFIG_PROC_UUID_POLICY */
150
1c79356b
A
151/*
152 * Procedures to manipulate state flags of socket
153 * and do appropriate wakeups. Normal sequence from the
154 * active (originating) side is that soisconnecting() is
155 * called during processing of connect() call,
156 * resulting in an eventual call to soisconnected() if/when the
157 * connection is established. When the connection is torn down
9bccf70c 158 * soisdisconnecting() is called during processing of disconnect() call,
1c79356b
A
159 * and soisdisconnected() is called when the connection to the peer
160 * is totally severed. The semantics of these routines are such that
161 * connectionless protocols can call soisconnected() and soisdisconnected()
162 * only, bypassing the in-progress calls when setting up a ``connection''
163 * takes no time.
164 *
165 * From the passive side, a socket is created with
e3027f41
A
166 * two queues of sockets: so_incomp for connections in progress
167 * and so_comp for connections already made and awaiting user acceptance.
9bccf70c 168 * As a protocol is preparing incoming connections, it creates a socket
e3027f41 169 * structure queued on so_incomp by calling sonewconn(). When the connection
1c79356b 170 * is established, soisconnected() is called, and transfers the
e3027f41 171 * socket structure to so_comp, making it available to accept().
1c79356b 172 *
9bccf70c 173 * If a socket is closed with sockets on either
e3027f41 174 * so_incomp or so_comp, these sockets are dropped.
9bccf70c 175 *
1c79356b
A
176 * If higher level protocols are implemented in
177 * the kernel, the wakeups done here will sometimes
178 * cause software-interrupt process scheduling.
179 */
1c79356b 180void
2d21ac55 181soisconnecting(struct socket *so)
1c79356b 182{
0a7de745 183 so->so_state &= ~(SS_ISCONNECTED | SS_ISDISCONNECTING);
1c79356b 184 so->so_state |= SS_ISCONNECTING;
2d21ac55 185
91447636 186 sflt_notify(so, sock_evt_connecting, NULL);
1c79356b
A
187}
188
189void
2d21ac55 190soisconnected(struct socket *so)
9bccf70c 191{
cb323159
A
192 /*
193 * If socket is subject to filter and is pending initial verdict,
194 * delay marking socket as connected and do not present the connected
195 * socket to user just yet.
196 */
197 if (cfil_sock_connected_pending_verdict(so)) {
198 return;
199 }
200
0a7de745 201 so->so_state &= ~(SS_ISCONNECTING | SS_ISDISCONNECTING | SS_ISCONFIRMING);
1c79356b 202 so->so_state |= SS_ISCONNECTED;
2d21ac55 203
3e170ce0
A
204 soreserve_preconnect(so, 0);
205
91447636 206 sflt_notify(so, sock_evt_connected, NULL);
2d21ac55 207
d190cdc3
A
208 if (so->so_head != NULL && (so->so_state & SS_INCOMP)) {
209 struct socket *head = so->so_head;
210 int locked = 0;
0a7de745 211
d190cdc3
A
212 /*
213 * Enforce lock order when the protocol has per socket locks
214 */
ff6e181a 215 if (head->so_proto->pr_getlock != NULL) {
91447636 216 socket_lock(head, 1);
813fb2f6 217 so_acquire_accept_list(head, so);
d190cdc3 218 locked = 1;
ff6e181a 219 }
d190cdc3
A
220 if (so->so_head == head && (so->so_state & SS_INCOMP)) {
221 so->so_state &= ~SS_INCOMP;
222 so->so_state |= SS_COMP;
813fb2f6 223 TAILQ_REMOVE(&head->so_incomp, so, so_list);
d190cdc3 224 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
813fb2f6
A
225 head->so_incqlen--;
226
227 /*
228 * We have to release the accept list in
229 * case a socket callback calls sock_accept()
230 */
231 if (locked != 0) {
232 so_release_accept_list(head);
d190cdc3 233 socket_unlock(so, 0);
813fb2f6 234 }
813fb2f6
A
235 sorwakeup(head);
236 wakeup_one((caddr_t)&head->so_timeo);
d190cdc3 237
813fb2f6
A
238 if (locked != 0) {
239 socket_unlock(head, 1);
240 socket_lock(so, 0);
5ba3f43e 241 }
813fb2f6
A
242 } else if (locked != 0) {
243 so_release_accept_list(head);
d190cdc3 244 socket_unlock(head, 1);
813fb2f6 245 }
1c79356b 246 } else {
1c79356b
A
247 wakeup((caddr_t)&so->so_timeo);
248 sorwakeup(so);
249 sowwakeup(so);
39236c6e
A
250 soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNECTED |
251 SO_FILT_HINT_CONNINFO_UPDATED);
1c79356b
A
252 }
253}
254
3e170ce0
A
255boolean_t
256socanwrite(struct socket *so)
257{
0a7de745 258 return (so->so_state & SS_ISCONNECTED) ||
3e170ce0 259 !(so->so_proto->pr_flags & PR_CONNREQUIRED) ||
0a7de745 260 (so->so_flags1 & SOF1_PRECONNECT_DATA);
3e170ce0
A
261}
262
1c79356b 263void
2d21ac55 264soisdisconnecting(struct socket *so)
9bccf70c 265{
1c79356b 266 so->so_state &= ~SS_ISCONNECTING;
0a7de745 267 so->so_state |= (SS_ISDISCONNECTING | SS_CANTRCVMORE | SS_CANTSENDMORE);
316670eb 268 soevent(so, SO_FILT_HINT_LOCKED);
91447636 269 sflt_notify(so, sock_evt_disconnecting, NULL);
1c79356b
A
270 wakeup((caddr_t)&so->so_timeo);
271 sowwakeup(so);
272 sorwakeup(so);
273}
274
275void
2d21ac55 276soisdisconnected(struct socket *so)
9bccf70c 277{
0a7de745
A
278 so->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
279 so->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
39236c6e
A
280 soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_DISCONNECTED |
281 SO_FILT_HINT_CONNINFO_UPDATED);
91447636 282 sflt_notify(so, sock_evt_disconnected, NULL);
1c79356b
A
283 wakeup((caddr_t)&so->so_timeo);
284 sowwakeup(so);
285 sorwakeup(so);
fe8ab488
A
286
287#if CONTENT_FILTER
288 /* Notify content filters as soon as we cannot send/receive data */
289 cfil_sock_notify_shutdown(so, SHUT_RDWR);
290#endif /* CONTENT_FILTER */
1c79356b
A
291}
292
39236c6e
A
293/*
294 * This function will issue a wakeup like soisdisconnected but it will not
6d2010ae
A
295 * notify the socket filters. This will avoid unlocking the socket
296 * in the midst of closing it.
297 */
298void
299sodisconnectwakeup(struct socket *so)
300{
0a7de745
A
301 so->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
302 so->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
39236c6e
A
303 soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_DISCONNECTED |
304 SO_FILT_HINT_CONNINFO_UPDATED);
6d2010ae
A
305 wakeup((caddr_t)&so->so_timeo);
306 sowwakeup(so);
307 sorwakeup(so);
fe8ab488
A
308
309#if CONTENT_FILTER
310 /* Notify content filters as soon as we cannot send/receive data */
311 cfil_sock_notify_shutdown(so, SHUT_RDWR);
312#endif /* CONTENT_FILTER */
6d2010ae
A
313}
314
1c79356b
A
315/*
316 * When an attempt at a new connection is noted on a socket
317 * which accepts connections, sonewconn is called. If the
318 * connection is possible (subject to space constraints, etc.)
319 * then we allocate a new structure, propoerly linked into the
320 * data structure of the original socket, and return this.
321 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
322 */
91447636 323static struct socket *
2d21ac55 324sonewconn_internal(struct socket *head, int connstatus)
9bccf70c 325{
2d21ac55
A
326 int so_qlen, error = 0;
327 struct socket *so;
91447636
A
328 lck_mtx_t *mutex_held;
329
0a7de745 330 if (head->so_proto->pr_getlock != NULL) {
91447636 331 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
0a7de745 332 } else {
91447636 333 mutex_held = head->so_proto->pr_domain->dom_mtx;
0a7de745 334 }
5ba3f43e 335 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1c79356b 336
2d21ac55
A
337 if (!soqlencomp) {
338 /*
339 * This is the default case; so_qlen represents the
340 * sum of both incomplete and completed queues.
341 */
342 so_qlen = head->so_qlen;
343 } else {
344 /*
345 * When kern.ipc.soqlencomp is set to 1, so_qlen
346 * represents only the completed queue. Since we
347 * cannot let the incomplete queue goes unbounded
348 * (in case of SYN flood), we cap the incomplete
349 * queue length to at most somaxconn, and use that
350 * as so_qlen so that we fail immediately below.
351 */
352 so_qlen = head->so_qlen - head->so_incqlen;
0a7de745 353 if (head->so_incqlen > somaxconn) {
2d21ac55 354 so_qlen = somaxconn;
0a7de745 355 }
2d21ac55
A
356 }
357
358 if (so_qlen >=
0a7de745
A
359 (soqlimitcompat ? head->so_qlimit : (3 * head->so_qlimit / 2))) {
360 return (struct socket *)0;
361 }
39236c6e 362 so = soalloc(1, SOCK_DOM(head), head->so_type);
0a7de745
A
363 if (so == NULL) {
364 return (struct socket *)0;
365 }
9bccf70c
A
366 /* check if head was closed during the soalloc */
367 if (head->so_proto == NULL) {
2d21ac55 368 sodealloc(so);
0a7de745 369 return (struct socket *)0;
1c79356b
A
370 }
371
1c79356b 372 so->so_type = head->so_type;
0a7de745 373 so->so_options = head->so_options & ~SO_ACCEPTCONN;
1c79356b
A
374 so->so_linger = head->so_linger;
375 so->so_state = head->so_state | SS_NOFDREF;
376 so->so_proto = head->so_proto;
377 so->so_timeo = head->so_timeo;
378 so->so_pgid = head->so_pgid;
316670eb
A
379 kauth_cred_ref(head->so_cred);
380 so->so_cred = head->so_cred;
381 so->last_pid = head->last_pid;
382 so->last_upid = head->last_upid;
0a7de745 383 memcpy(so->last_uuid, head->last_uuid, sizeof(so->last_uuid));
39236c6e
A
384 if (head->so_flags & SOF_DELEGATED) {
385 so->e_pid = head->e_pid;
386 so->e_upid = head->e_upid;
0a7de745 387 memcpy(so->e_uuid, head->e_uuid, sizeof(so->e_uuid));
39236c6e 388 }
b0d623f7 389 /* inherit socket options stored in so_flags */
39236c6e
A
390 so->so_flags = head->so_flags &
391 (SOF_NOSIGPIPE | SOF_NOADDRAVAIL | SOF_REUSESHAREUID |
392 SOF_NOTIFYCONFLICT | SOF_BINDRANDOMPORT | SOF_NPX_SETOPTSHUT |
0a7de745 393 SOF_NODEFUNCT | SOF_PRIVILEGED_TRAFFIC_CLASS | SOF_NOTSENT_LOWAT |
f427ee49 394 SOF_DELEGATED);
cb323159 395 so->so_flags1 |= SOF1_INBOUND;
91447636 396 so->so_usecount = 1;
0c530ab8
A
397 so->next_lock_lr = 0;
398 so->next_unlock_lr = 0;
1c79356b 399
0a7de745 400 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
13fec989 401 so->so_rcv.sb_so = so->so_snd.sb_so = so;
2d21ac55 402
d1ecb069 403 /* inherit traffic management properties of listener */
39037602 404 so->so_flags1 |=
cb323159
A
405 head->so_flags1 & (SOF1_TRAFFIC_MGT_SO_BACKGROUND | SOF1_TC_NET_SERV_TYPE |
406 SOF1_QOSMARKING_ALLOWED | SOF1_QOSMARKING_POLICY_OVERRIDE);
d1ecb069 407 so->so_background_thread = head->so_background_thread;
d41d1dae 408 so->so_traffic_class = head->so_traffic_class;
cb323159 409 so->so_netsvctype = head->so_netsvctype;
d1ecb069 410
91447636 411 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
9bccf70c 412 sodealloc(so);
0a7de745 413 return (struct socket *)0;
9bccf70c 414 }
316670eb
A
415 so->so_rcv.sb_flags |= (head->so_rcv.sb_flags & SB_USRSIZE);
416 so->so_snd.sb_flags |= (head->so_snd.sb_flags & SB_USRSIZE);
9bccf70c 417
91447636 418 /*
2d21ac55
A
419 * Must be done with head unlocked to avoid deadlock
420 * for protocol with per socket mutexes.
91447636 421 */
0a7de745 422 if (head->so_proto->pr_unlock) {
37839358 423 socket_unlock(head, 0);
0a7de745 424 }
2d21ac55
A
425 if (((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL) != 0) ||
426 error) {
1c79356b 427 sodealloc(so);
0a7de745 428 if (head->so_proto->pr_unlock) {
37839358 429 socket_lock(head, 0);
0a7de745
A
430 }
431 return (struct socket *)0;
1c79356b 432 }
6d2010ae 433 if (head->so_proto->pr_unlock) {
37839358 434 socket_lock(head, 0);
39236c6e
A
435 /*
436 * Radar 7385998 Recheck that the head is still accepting
6d2010ae
A
437 * to avoid race condition when head is getting closed.
438 */
439 if ((head->so_options & SO_ACCEPTCONN) == 0) {
440 so->so_state &= ~SS_NOFDREF;
441 soclose(so);
0a7de745 442 return (struct socket *)0;
6d2010ae
A
443 }
444 }
445
cb323159
A
446 if (so->so_proto->pr_copy_last_owner != NULL) {
447 (*so->so_proto->pr_copy_last_owner)(so, head);
448 }
39236c6e
A
449 atomic_add_32(&so->so_proto->pr_domain->dom_refs, 1);
450
6d2010ae 451 /* Insert in head appropriate lists */
813fb2f6
A
452 so_acquire_accept_list(head, NULL);
453
6d2010ae
A
454 so->so_head = head;
455
39236c6e
A
456 /*
457 * Since this socket is going to be inserted into the incomp
458 * queue, it can be picked up by another thread in
459 * tcp_dropdropablreq to get dropped before it is setup..
6d2010ae
A
460 * To prevent this race, set in-progress flag which can be
461 * cleared later
462 */
463 so->so_flags |= SOF_INCOMP_INPROGRESS;
1c79356b
A
464
465 if (connstatus) {
466 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
467 so->so_state |= SS_COMP;
468 } else {
469 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
470 so->so_state |= SS_INCOMP;
471 head->so_incqlen++;
472 }
473 head->so_qlen++;
91447636 474
813fb2f6
A
475 so_release_accept_list(head);
476
0c530ab8
A
477 /* Attach socket filters for this protocol */
478 sflt_initsock(so);
2d21ac55 479
91447636
A
480 if (connstatus) {
481 so->so_state |= connstatus;
482 sorwakeup(head);
483 wakeup((caddr_t)&head->so_timeo);
484 }
0a7de745 485 return so;
1c79356b
A
486}
487
91447636
A
488
489struct socket *
2d21ac55 490sonewconn(struct socket *head, int connstatus, const struct sockaddr *from)
91447636 491{
6d2010ae 492 int error = sflt_connectin(head, from);
91447636 493 if (error) {
0a7de745 494 return NULL;
91447636 495 }
2d21ac55 496
0a7de745 497 return sonewconn_internal(head, connstatus);
91447636
A
498}
499
1c79356b
A
500/*
501 * Socantsendmore indicates that no more data will be sent on the
502 * socket; it would normally be applied to a socket when the user
503 * informs the system that no more data is to be sent, by the protocol
504 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
505 * will be received, and will normally be applied to the socket by a
506 * protocol when it detects that the peer will send no more data.
507 * Data queued for reading in the socket may yet be read.
508 */
509
510void
2d21ac55 511socantsendmore(struct socket *so)
9bccf70c 512{
1c79356b 513 so->so_state |= SS_CANTSENDMORE;
39236c6e 514 soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CANTSENDMORE);
91447636 515 sflt_notify(so, sock_evt_cantsendmore, NULL);
1c79356b
A
516 sowwakeup(so);
517}
518
519void
2d21ac55 520socantrcvmore(struct socket *so)
9bccf70c 521{
1c79356b 522 so->so_state |= SS_CANTRCVMORE;
39236c6e 523 soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CANTRCVMORE);
91447636 524 sflt_notify(so, sock_evt_cantrecvmore, NULL);
1c79356b
A
525 sorwakeup(so);
526}
527
528/*
529 * Wait for data to arrive at/drain from a socket buffer.
530 */
531int
2d21ac55 532sbwait(struct sockbuf *sb)
1c79356b 533{
39236c6e
A
534 boolean_t nointr = (sb->sb_flags & SB_NOINTR);
535 void *lr_saved = __builtin_return_address(0);
91447636
A
536 struct socket *so = sb->sb_so;
537 lck_mtx_t *mutex_held;
538 struct timespec ts;
39236c6e 539 int error = 0;
91447636 540
39236c6e
A
541 if (so == NULL) {
542 panic("%s: null so, sb=%p sb_flags=0x%x lr=%p\n",
543 __func__, sb, sb->sb_flags, lr_saved);
544 /* NOTREACHED */
545 } else if (so->so_usecount < 1) {
546 panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
547 "lrh= %s\n", __func__, sb, sb->sb_flags, so,
548 so->so_usecount, lr_saved, solockhistory_nr(so));
549 /* NOTREACHED */
550 }
2d21ac55 551
39037602
A
552 if ((so->so_state & SS_DRAINING) || (so->so_flags & SOF_DEFUNCT)) {
553 error = EBADF;
554 if (so->so_flags & SOF_DEFUNCT) {
555 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] "
556 "(%d)\n", __func__, proc_selfpid(),
557 proc_best_name(current_proc()),
558 (uint64_t)VM_KERNEL_ADDRPERM(so),
559 SOCK_DOM(so), SOCK_TYPE(so), error);
560 }
0a7de745 561 return error;
39037602
A
562 }
563
0a7de745 564 if (so->so_proto->pr_getlock != NULL) {
5ba3f43e 565 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
0a7de745 566 } else {
91447636 567 mutex_held = so->so_proto->pr_domain->dom_mtx;
0a7de745 568 }
1c79356b 569
5ba3f43e 570 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
91447636 571
91447636
A
572 ts.tv_sec = sb->sb_timeo.tv_sec;
573 ts.tv_nsec = sb->sb_timeo.tv_usec * 1000;
39236c6e
A
574
575 sb->sb_waiters++;
576 VERIFY(sb->sb_waiters != 0);
577
91447636 578 error = msleep((caddr_t)&sb->sb_cc, mutex_held,
39236c6e
A
579 nointr ? PSOCK : PSOCK | PCATCH,
580 nointr ? "sbwait_nointr" : "sbwait", &ts);
91447636 581
39236c6e
A
582 VERIFY(sb->sb_waiters != 0);
583 sb->sb_waiters--;
91447636 584
39236c6e
A
585 if (so->so_usecount < 1) {
586 panic("%s: 2 sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
587 "lrh= %s\n", __func__, sb, sb->sb_flags, so,
588 so->so_usecount, lr_saved, solockhistory_nr(so));
589 /* NOTREACHED */
590 }
91447636 591
6d2010ae 592 if ((so->so_state & SS_DRAINING) || (so->so_flags & SOF_DEFUNCT)) {
91447636 593 error = EBADF;
6d2010ae 594 if (so->so_flags & SOF_DEFUNCT) {
39037602 595 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] "
39236c6e 596 "(%d)\n", __func__, proc_selfpid(),
39037602 597 proc_best_name(current_proc()),
39236c6e 598 (uint64_t)VM_KERNEL_ADDRPERM(so),
39037602 599 SOCK_DOM(so), SOCK_TYPE(so), error);
6d2010ae 600 }
91447636
A
601 }
602
0a7de745 603 return error;
1c79356b
A
604}
605
6d2010ae
A
606void
607sbwakeup(struct sockbuf *sb)
608{
0a7de745 609 if (sb->sb_waiters > 0) {
6d2010ae 610 wakeup((caddr_t)&sb->sb_cc);
0a7de745 611 }
6d2010ae
A
612}
613
1c79356b
A
614/*
615 * Wakeup processes waiting on a socket buffer.
616 * Do asynchronous notification via SIGIO
617 * if the socket has the SS_ASYNC flag set.
618 */
619void
cb323159 620sowakeup(struct socket *so, struct sockbuf *sb, struct socket *so2)
1c79356b 621{
6d2010ae 622 if (so->so_flags & SOF_DEFUNCT) {
39037602 623 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] si 0x%x, "
39236c6e 624 "fl 0x%x [%s]\n", __func__, proc_selfpid(),
39037602 625 proc_best_name(current_proc()),
39236c6e
A
626 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
627 SOCK_TYPE(so), (uint32_t)sb->sb_sel.si_flags, sb->sb_flags,
39037602 628 (sb->sb_flags & SB_RECV) ? "rcv" : "snd");
6d2010ae
A
629 }
630
0b4e3aa0 631 sb->sb_flags &= ~SB_SEL;
1c79356b 632 selwakeup(&sb->sb_sel);
6d2010ae 633 sbwakeup(sb);
1c79356b 634 if (so->so_state & SS_ASYNC) {
0a7de745 635 if (so->so_pgid < 0) {
1c79356b 636 gsignal(-so->so_pgid, SIGIO);
0a7de745 637 } else if (so->so_pgid > 0) {
2d21ac55 638 proc_signal(so->so_pgid, SIGIO);
0a7de745 639 }
1c79356b 640 }
91447636
A
641 if (sb->sb_flags & SB_KNOTE) {
642 KNOTE(&sb->sb_sel.si_note, SO_FILT_HINT_LOCKED);
643 }
644 if (sb->sb_flags & SB_UPCALL) {
39236c6e
A
645 void (*sb_upcall)(struct socket *, void *, int);
646 caddr_t sb_upcallarg;
5ba3f43e 647 int lock = !(sb->sb_flags & SB_UPCALL_LOCK);
2d21ac55 648
39236c6e
A
649 sb_upcall = sb->sb_upcall;
650 sb_upcallarg = sb->sb_upcallarg;
2d21ac55 651 /* Let close know that we're about to do an upcall */
316670eb 652 so->so_upcallusecount++;
2d21ac55 653
0a7de745 654 if (lock) {
cb323159
A
655 if (so2) {
656 struct unpcb *unp = sotounpcb(so2);
657 unp->unp_flags |= UNP_DONTDISCONNECT;
658 unp->rw_thrcount++;
659
660 socket_unlock(so2, 0);
661 }
5ba3f43e 662 socket_unlock(so, 0);
0a7de745 663 }
39236c6e 664 (*sb_upcall)(so, sb_upcallarg, M_DONTWAIT);
0a7de745 665 if (lock) {
cb323159
A
666 if (so2 && so > so2) {
667 struct unpcb *unp;
668 socket_lock(so2, 0);
669
670 unp = sotounpcb(so2);
671 unp->rw_thrcount--;
672 if (unp->rw_thrcount == 0) {
673 unp->unp_flags &= ~UNP_DONTDISCONNECT;
674 wakeup(unp);
675 }
676 }
677
5ba3f43e 678 socket_lock(so, 0);
cb323159
A
679
680 if (so2 && so < so2) {
681 struct unpcb *unp;
682 socket_lock(so2, 0);
683
684 unp = sotounpcb(so2);
685 unp->rw_thrcount--;
686 if (unp->rw_thrcount == 0) {
687 unp->unp_flags &= ~UNP_DONTDISCONNECT;
688 wakeup(unp);
689 }
690 }
0a7de745 691 }
2d21ac55 692
316670eb 693 so->so_upcallusecount--;
2d21ac55 694 /* Tell close that it's safe to proceed */
39236c6e 695 if ((so->so_flags & SOF_CLOSEWAIT) &&
0a7de745 696 so->so_upcallusecount == 0) {
39236c6e 697 wakeup((caddr_t)&so->so_upcallusecount);
0a7de745 698 }
91447636 699 }
fe8ab488
A
700#if CONTENT_FILTER
701 /*
702 * Trap disconnection events for content filters
703 */
704 if ((so->so_flags & SOF_CONTENT_FILTER) != 0) {
705 if ((sb->sb_flags & SB_RECV)) {
0a7de745 706 if (so->so_state & (SS_CANTRCVMORE)) {
fe8ab488 707 cfil_sock_notify_shutdown(so, SHUT_RD);
0a7de745 708 }
fe8ab488 709 } else {
0a7de745 710 if (so->so_state & (SS_CANTSENDMORE)) {
fe8ab488 711 cfil_sock_notify_shutdown(so, SHUT_WR);
0a7de745 712 }
fe8ab488
A
713 }
714 }
715#endif /* CONTENT_FILTER */
1c79356b
A
716}
717
718/*
719 * Socket buffer (struct sockbuf) utility routines.
720 *
721 * Each socket contains two socket buffers: one for sending data and
722 * one for receiving data. Each buffer contains a queue of mbufs,
723 * information about the number of mbufs and amount of data in the
724 * queue, and other fields allowing select() statements and notification
725 * on data availability to be implemented.
726 *
727 * Data stored in a socket buffer is maintained as a list of records.
728 * Each record is a list of mbufs chained together with the m_next
729 * field. Records are chained together with the m_nextpkt field. The upper
730 * level routine soreceive() expects the following conventions to be
731 * observed when placing information in the receive buffer:
732 *
733 * 1. If the protocol requires each message be preceded by the sender's
734 * name, then a record containing that name must be present before
735 * any associated data (mbuf's must be of type MT_SONAME).
736 * 2. If the protocol supports the exchange of ``access rights'' (really
737 * just additional data associated with the message), and there are
738 * ``rights'' to be received, then a record containing this data
739 * should be present (mbuf's must be of type MT_RIGHTS).
740 * 3. If a name or rights record exists, then it must be followed by
741 * a data record, perhaps of zero length.
742 *
743 * Before using a new socket structure it is first necessary to reserve
744 * buffer space to the socket, by calling sbreserve(). This should commit
745 * some of the available buffer space in the system buffer pool for the
746 * socket (currently, it does nothing but enforce limits). The space
747 * should be released by calling sbrelease() when the socket is destroyed.
748 */
749
2d21ac55
A
750/*
751 * Returns: 0 Success
752 * ENOBUFS
753 */
1c79356b 754int
f427ee49 755soreserve(struct socket *so, uint32_t sndcc, uint32_t rcvcc)
1c79356b 756{
0a7de745
A
757 /*
758 * We do not want to fail the creation of a socket
759 * when kern.ipc.maxsockbuf is less than the
760 * default socket buffer socket size of the protocol
761 * so force the buffer sizes to be at most the
762 * limit enforced by sbreserve()
763 */
764 uint64_t maxcc = (uint64_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES);
765 if (sndcc > maxcc) {
f427ee49 766 sndcc = (uint32_t)maxcc;
0a7de745
A
767 }
768 if (rcvcc > maxcc) {
f427ee49 769 rcvcc = (uint32_t)maxcc;
0a7de745
A
770 }
771 if (sbreserve(&so->so_snd, sndcc) == 0) {
1c79356b 772 goto bad;
0a7de745 773 } else {
316670eb 774 so->so_snd.sb_idealsize = sndcc;
0a7de745 775 }
316670eb 776
0a7de745 777 if (sbreserve(&so->so_rcv, rcvcc) == 0) {
1c79356b 778 goto bad2;
0a7de745 779 } else {
316670eb 780 so->so_rcv.sb_idealsize = rcvcc;
0a7de745 781 }
316670eb 782
0a7de745 783 if (so->so_rcv.sb_lowat == 0) {
1c79356b 784 so->so_rcv.sb_lowat = 1;
0a7de745
A
785 }
786 if (so->so_snd.sb_lowat == 0) {
1c79356b 787 so->so_snd.sb_lowat = MCLBYTES;
0a7de745
A
788 }
789 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) {
1c79356b 790 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
0a7de745
A
791 }
792 return 0;
1c79356b 793bad2:
39236c6e 794 so->so_snd.sb_flags &= ~SB_SEL;
0b4e3aa0 795 selthreadclear(&so->so_snd.sb_sel);
1c79356b
A
796 sbrelease(&so->so_snd);
797bad:
0a7de745 798 return ENOBUFS;
1c79356b
A
799}
800
3e170ce0
A
801void
802soreserve_preconnect(struct socket *so, unsigned int pre_cc)
803{
804 /* As of now, same bytes for both preconnect read and write */
805 so->so_snd.sb_preconn_hiwat = pre_cc;
806 so->so_rcv.sb_preconn_hiwat = pre_cc;
807}
808
1c79356b
A
809/*
810 * Allot mbufs to a sockbuf.
811 * Attempt to scale mbmax so that mbcnt doesn't become limiting
812 * if buffering efficiency is near the normal case.
813 */
814int
b0d623f7 815sbreserve(struct sockbuf *sb, u_int32_t cc)
1c79356b 816{
f427ee49
A
817 if ((u_quad_t)cc > (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES) ||
818 (cc > sb->sb_hiwat && (sb->sb_flags & SB_LIMITED))) {
0a7de745
A
819 return 0;
820 }
1c79356b
A
821 sb->sb_hiwat = cc;
822 sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
0a7de745 823 if (sb->sb_lowat > sb->sb_hiwat) {
1c79356b 824 sb->sb_lowat = sb->sb_hiwat;
0a7de745
A
825 }
826 return 1;
1c79356b
A
827}
828
829/*
830 * Free mbufs held by a socket, and reserved mbuf space.
831 */
2d21ac55 832/* WARNING needs to do selthreadclear() before calling this */
1c79356b 833void
2d21ac55 834sbrelease(struct sockbuf *sb)
1c79356b 835{
1c79356b 836 sbflush(sb);
9bccf70c
A
837 sb->sb_hiwat = 0;
838 sb->sb_mbmax = 0;
1c79356b
A
839}
840
841/*
842 * Routines to add and remove
843 * data from an mbuf queue.
844 *
845 * The routines sbappend() or sbappendrecord() are normally called to
846 * append new mbufs to a socket buffer, after checking that adequate
847 * space is available, comparing the function sbspace() with the amount
848 * of data to be added. sbappendrecord() differs from sbappend() in
849 * that data supplied is treated as the beginning of a new record.
850 * To place a sender's address, optional access rights, and data in a
851 * socket receive buffer, sbappendaddr() should be used. To place
852 * access rights and data in a socket receive buffer, sbappendrights()
853 * should be used. In either case, the new data begins a new record.
854 * Note that unlike sbappend() and sbappendrecord(), these routines check
855 * for the caller that there will be enough space to store the data.
856 * Each fails if there is not enough space, or if it cannot find mbufs
857 * to store additional information in.
858 *
859 * Reliable protocols may use the socket send buffer to hold data
860 * awaiting acknowledgement. Data is normally copied from a socket
861 * send buffer in a protocol with m_copy for output to a peer,
862 * and then removing the data from the socket buffer with sbdrop()
863 * or sbdroprecord() when the data is acknowledged by the peer.
864 */
865
866/*
867 * Append mbuf chain m to the last record in the
868 * socket buffer sb. The additional space associated
869 * the mbuf chain is recorded in sb. Empty mbufs are
870 * discarded and mbufs are compacted where possible.
871 */
bca245ac
A
872static int
873sbappend_common(struct sockbuf *sb, struct mbuf *m, boolean_t nodrop)
9bccf70c 874{
2d21ac55 875 struct socket *so = sb->sb_so;
1c79356b 876
2d21ac55 877 if (m == NULL || (sb->sb_flags & SB_DROP)) {
bca245ac 878 if (m != NULL && !nodrop) {
2d21ac55 879 m_freem(m);
0a7de745
A
880 }
881 return 0;
2d21ac55 882 }
fa4905b1 883
2d21ac55 884 SBLASTRECORDCHK(sb, "sbappend 1");
fa4905b1 885
0a7de745 886 if (sb->sb_lastrecord != NULL && (sb->sb_mbtail->m_flags & M_EOR)) {
bca245ac 887 return sbappendrecord_common(sb, m, nodrop);
0a7de745 888 }
2d21ac55 889
bca245ac
A
890 if (SOCK_DOM(sb->sb_so) == PF_INET || SOCK_DOM(sb->sb_so) == PF_INET6) {
891 ASSERT(nodrop == FALSE);
892 if (sb->sb_flags & SB_RECV && !(m && m->m_flags & M_SKIPCFIL)) {
893 int error = sflt_data_in(so, NULL, &m, NULL, 0);
894 SBLASTRECORDCHK(sb, "sbappend 2");
fe8ab488
A
895
896#if CONTENT_FILTER
bca245ac
A
897 if (error == 0) {
898 error = cfil_sock_data_in(so, NULL, m, NULL, 0);
899 }
fe8ab488
A
900#endif /* CONTENT_FILTER */
901
bca245ac
A
902 if (error != 0) {
903 if (error != EJUSTRETURN) {
904 m_freem(m);
905 }
906 return 0;
0a7de745 907 }
bca245ac
A
908 } else if (m) {
909 m->m_flags &= ~M_SKIPCFIL;
91447636 910 }
91447636
A
911 }
912
2d21ac55 913 /* If this is the first record, it's also the last record */
0a7de745 914 if (sb->sb_lastrecord == NULL) {
2d21ac55 915 sb->sb_lastrecord = m;
0a7de745 916 }
fa4905b1 917
2d21ac55
A
918 sbcompress(sb, m, sb->sb_mbtail);
919 SBLASTRECORDCHK(sb, "sbappend 3");
0a7de745 920 return 1;
2d21ac55
A
921}
922
bca245ac
A
923int
924sbappend(struct sockbuf *sb, struct mbuf *m)
925{
926 return sbappend_common(sb, m, FALSE);
927}
928
929int
930sbappend_nodrop(struct sockbuf *sb, struct mbuf *m)
931{
932 return sbappend_common(sb, m, TRUE);
933}
934
2d21ac55
A
935/*
936 * Similar to sbappend, except that this is optimized for stream sockets.
937 */
938int
939sbappendstream(struct sockbuf *sb, struct mbuf *m)
940{
941 struct socket *so = sb->sb_so;
942
2d21ac55 943 if (m == NULL || (sb->sb_flags & SB_DROP)) {
0a7de745 944 if (m != NULL) {
2d21ac55 945 m_freem(m);
0a7de745
A
946 }
947 return 0;
2d21ac55
A
948 }
949
39236c6e
A
950 if (m->m_nextpkt != NULL || (sb->sb_mb != sb->sb_lastrecord)) {
951 panic("sbappendstream: nexpkt %p || mb %p != lastrecord %p\n",
952 m->m_nextpkt, sb->sb_mb, sb->sb_lastrecord);
953 /* NOTREACHED */
954 }
955
956 SBLASTMBUFCHK(sb, __func__);
957
bca245ac
A
958 if (SOCK_DOM(sb->sb_so) == PF_INET || SOCK_DOM(sb->sb_so) == PF_INET6) {
959 if (sb->sb_flags & SB_RECV && !(m && m->m_flags & M_SKIPCFIL)) {
960 int error = sflt_data_in(so, NULL, &m, NULL, 0);
961 SBLASTRECORDCHK(sb, "sbappendstream 1");
fe8ab488
A
962
963#if CONTENT_FILTER
bca245ac
A
964 if (error == 0) {
965 error = cfil_sock_data_in(so, NULL, m, NULL, 0);
966 }
fe8ab488
A
967#endif /* CONTENT_FILTER */
968
bca245ac
A
969 if (error != 0) {
970 if (error != EJUSTRETURN) {
971 m_freem(m);
972 }
973 return 0;
0a7de745 974 }
bca245ac
A
975 } else if (m) {
976 m->m_flags &= ~M_SKIPCFIL;
2d21ac55
A
977 }
978 }
979
980 sbcompress(sb, m, sb->sb_mbtail);
981 sb->sb_lastrecord = sb->sb_mb;
982 SBLASTRECORDCHK(sb, "sbappendstream 2");
0a7de745 983 return 1;
1c79356b
A
984}
985
986#ifdef SOCKBUF_DEBUG
987void
2d21ac55 988sbcheck(struct sockbuf *sb)
1c79356b 989{
2d21ac55
A
990 struct mbuf *m;
991 struct mbuf *n = 0;
b0d623f7 992 u_int32_t len = 0, mbcnt = 0;
91447636
A
993 lck_mtx_t *mutex_held;
994
0a7de745 995 if (sb->sb_so->so_proto->pr_getlock != NULL) {
91447636 996 mutex_held = (*sb->sb_so->so_proto->pr_getlock)(sb->sb_so, 0);
0a7de745 997 } else {
91447636 998 mutex_held = sb->sb_so->so_proto->pr_domain->dom_mtx;
0a7de745 999 }
91447636 1000
5ba3f43e 1001 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
91447636 1002
0a7de745 1003 if (sbchecking == 0) {
91447636 1004 return;
0a7de745 1005 }
1c79356b
A
1006
1007 for (m = sb->sb_mb; m; m = n) {
2d21ac55
A
1008 n = m->m_nextpkt;
1009 for (; m; m = m->m_next) {
1010 len += m->m_len;
1011 mbcnt += MSIZE;
1012 /* XXX pretty sure this is bogus */
0a7de745 1013 if (m->m_flags & M_EXT) {
2d21ac55 1014 mbcnt += m->m_ext.ext_size;
0a7de745 1015 }
2d21ac55
A
1016 }
1017 }
1018 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
1019 panic("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
1020 mbcnt, sb->sb_mbcnt);
1c79356b
A
1021 }
1022}
1023#endif
1024
2d21ac55
A
1025void
1026sblastrecordchk(struct sockbuf *sb, const char *where)
1027{
1028 struct mbuf *m = sb->sb_mb;
1029
0a7de745 1030 while (m && m->m_nextpkt) {
2d21ac55 1031 m = m->m_nextpkt;
0a7de745 1032 }
2d21ac55
A
1033
1034 if (m != sb->sb_lastrecord) {
fe8ab488
A
1035 printf("sblastrecordchk: mb 0x%llx lastrecord 0x%llx "
1036 "last 0x%llx\n",
1037 (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_mb),
1038 (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_lastrecord),
1039 (uint64_t)VM_KERNEL_ADDRPERM(m));
2d21ac55 1040 printf("packet chain:\n");
0a7de745 1041 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
fe8ab488 1042 printf("\t0x%llx\n", (uint64_t)VM_KERNEL_ADDRPERM(m));
0a7de745 1043 }
2d21ac55
A
1044 panic("sblastrecordchk from %s", where);
1045 }
1046}
1047
1048void
1049sblastmbufchk(struct sockbuf *sb, const char *where)
1050{
1051 struct mbuf *m = sb->sb_mb;
1052 struct mbuf *n;
1053
0a7de745 1054 while (m && m->m_nextpkt) {
2d21ac55 1055 m = m->m_nextpkt;
0a7de745 1056 }
2d21ac55 1057
0a7de745 1058 while (m && m->m_next) {
2d21ac55 1059 m = m->m_next;
0a7de745 1060 }
2d21ac55
A
1061
1062 if (m != sb->sb_mbtail) {
fe8ab488
A
1063 printf("sblastmbufchk: mb 0x%llx mbtail 0x%llx last 0x%llx\n",
1064 (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_mb),
1065 (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_mbtail),
1066 (uint64_t)VM_KERNEL_ADDRPERM(m));
2d21ac55
A
1067 printf("packet tree:\n");
1068 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
1069 printf("\t");
0a7de745 1070 for (n = m; n != NULL; n = n->m_next) {
3e170ce0
A
1071 printf("0x%llx ",
1072 (uint64_t)VM_KERNEL_ADDRPERM(n));
0a7de745 1073 }
2d21ac55
A
1074 printf("\n");
1075 }
1076 panic("sblastmbufchk from %s", where);
1077 }
1078}
1079
1c79356b 1080/*
2d21ac55 1081 * Similar to sbappend, except the mbuf chain begins a new record.
1c79356b 1082 */
bca245ac
A
1083static int
1084sbappendrecord_common(struct sockbuf *sb, struct mbuf *m0, boolean_t nodrop)
1c79356b 1085{
2d21ac55
A
1086 struct mbuf *m;
1087 int space = 0;
9bccf70c 1088
2d21ac55 1089 if (m0 == NULL || (sb->sb_flags & SB_DROP)) {
bca245ac 1090 if (m0 != NULL && nodrop == FALSE) {
2d21ac55 1091 m_freem(m0);
0a7de745
A
1092 }
1093 return 0;
2d21ac55
A
1094 }
1095
0a7de745 1096 for (m = m0; m != NULL; m = m->m_next) {
2d21ac55 1097 space += m->m_len;
0a7de745 1098 }
2d21ac55
A
1099
1100 if (space > sbspace(sb) && !(sb->sb_flags & SB_UNIX)) {
bca245ac
A
1101 if (nodrop == FALSE) {
1102 m_freem(m0);
1103 }
0a7de745 1104 return 0;
2d21ac55
A
1105 }
1106
bca245ac
A
1107 if (SOCK_DOM(sb->sb_so) == PF_INET || SOCK_DOM(sb->sb_so) == PF_INET6) {
1108 ASSERT(nodrop == FALSE);
1109 if (sb->sb_flags & SB_RECV && !(m0 && m0->m_flags & M_SKIPCFIL)) {
1110 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL,
1111 sock_data_filt_flag_record);
fe8ab488
A
1112
1113#if CONTENT_FILTER
bca245ac
A
1114 if (error == 0) {
1115 error = cfil_sock_data_in(sb->sb_so, NULL, m0, NULL, 0);
1116 }
fe8ab488
A
1117#endif /* CONTENT_FILTER */
1118
bca245ac
A
1119 if (error != 0) {
1120 SBLASTRECORDCHK(sb, "sbappendrecord 1");
1121 if (error != EJUSTRETURN) {
1122 m_freem(m0);
1123 }
1124 return 0;
0a7de745 1125 }
bca245ac
A
1126 } else if (m0) {
1127 m0->m_flags &= ~M_SKIPCFIL;
1c79356b 1128 }
1c79356b 1129 }
2d21ac55 1130
1c79356b 1131 /*
1c79356b
A
1132 * Note this permits zero length records.
1133 */
1134 sballoc(sb, m0);
2d21ac55
A
1135 SBLASTRECORDCHK(sb, "sbappendrecord 2");
1136 if (sb->sb_lastrecord != NULL) {
1137 sb->sb_lastrecord->m_nextpkt = m0;
39236c6e 1138 } else {
1c79356b 1139 sb->sb_mb = m0;
2d21ac55
A
1140 }
1141 sb->sb_lastrecord = m0;
4a3eedf9 1142 sb->sb_mbtail = m0;
2d21ac55 1143
1c79356b
A
1144 m = m0->m_next;
1145 m0->m_next = 0;
1146 if (m && (m0->m_flags & M_EOR)) {
1147 m0->m_flags &= ~M_EOR;
1148 m->m_flags |= M_EOR;
1149 }
2d21ac55
A
1150 sbcompress(sb, m, m0);
1151 SBLASTRECORDCHK(sb, "sbappendrecord 3");
0a7de745 1152 return 1;
1c79356b
A
1153}
1154
bca245ac
A
1155int
1156sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
1157{
1158 return sbappendrecord_common(sb, m0, FALSE);
1159}
1160
1161int
1162sbappendrecord_nodrop(struct sockbuf *sb, struct mbuf *m0)
1163{
1164 return sbappendrecord_common(sb, m0, TRUE);
1165}
1166
1c79356b 1167/*
d9a64523
A
1168 * Concatenate address (optional), control (optional) and data into one
1169 * single mbuf chain. If sockbuf *sb is passed in, space check will be
1170 * performed.
2d21ac55 1171 *
d9a64523 1172 * Returns: mbuf chain pointer if succeeded, NULL if failed
1c79356b 1173 */
d9a64523
A
1174struct mbuf *
1175sbconcat_mbufs(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0, struct mbuf *control)
1c79356b 1176{
d9a64523
A
1177 struct mbuf *m = NULL, *n = NULL;
1178 int space = 0;
1c79356b 1179
0a7de745 1180 if (m0 && (m0->m_flags & M_PKTHDR) == 0) {
d9a64523 1181 panic("sbconcat_mbufs");
0a7de745 1182 }
1c79356b 1183
0a7de745 1184 if (m0) {
1c79356b 1185 space += m0->m_pkthdr.len;
0a7de745 1186 }
1c79356b
A
1187 for (n = control; n; n = n->m_next) {
1188 space += n->m_len;
0a7de745 1189 if (n->m_next == 0) { /* keep pointer to last control buf */
1c79356b 1190 break;
0a7de745 1191 }
1c79356b 1192 }
d9a64523
A
1193
1194 if (asa != NULL) {
0a7de745
A
1195 if (asa->sa_len > MLEN) {
1196 return NULL;
1197 }
d9a64523
A
1198 space += asa->sa_len;
1199 }
1200
0a7de745
A
1201 if (sb != NULL && space > sbspace(sb)) {
1202 return NULL;
1203 }
d9a64523 1204
0a7de745
A
1205 if (n) {
1206 n->m_next = m0; /* concatenate data to control */
1207 } else {
1c79356b 1208 control = m0;
0a7de745 1209 }
2d21ac55 1210
d9a64523
A
1211 if (asa != NULL) {
1212 MGET(m, M_DONTWAIT, MT_SONAME);
1213 if (m == 0) {
1214 if (n) {
1215 /* unchain control and data if necessary */
1216 n->m_next = NULL;
1217 }
0a7de745 1218 return NULL;
d9a64523
A
1219 }
1220 m->m_len = asa->sa_len;
1221 bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
1222
1223 m->m_next = control;
1224 } else {
1225 m = control;
1226 }
1227
0a7de745 1228 return m;
d9a64523
A
1229}
1230
1231/*
1232 * Queue mbuf chain to the receive queue of a socket.
1233 * Parameter space is the total len of the mbuf chain.
1234 * If passed in, sockbuf space will be checked.
1235 *
1236 * Returns: 0 Invalid mbuf chain
1237 * 1 Success
1238 */
1239int
1240sbappendchain(struct sockbuf *sb, struct mbuf *m, int space)
1241{
1242 struct mbuf *n, *nlast;
1243
0a7de745
A
1244 if (m == NULL) {
1245 return 0;
1246 }
d9a64523 1247
0a7de745
A
1248 if (space != 0 && space > sbspace(sb)) {
1249 return 0;
1250 }
2d21ac55 1251
0a7de745 1252 for (n = m; n->m_next != NULL; n = n->m_next) {
1c79356b 1253 sballoc(sb, n);
0a7de745 1254 }
2d21ac55
A
1255 sballoc(sb, n);
1256 nlast = n;
1257
1258 if (sb->sb_lastrecord != NULL) {
1259 sb->sb_lastrecord->m_nextpkt = m;
1260 } else {
1c79356b 1261 sb->sb_mb = m;
2d21ac55
A
1262 }
1263 sb->sb_lastrecord = m;
1264 sb->sb_mbtail = nlast;
1265
1266 SBLASTMBUFCHK(sb, __func__);
1267 SBLASTRECORDCHK(sb, "sbappendadddr 2");
0a7de745 1268 return 1;
1c79356b
A
1269}
1270
2d21ac55
A
1271/*
1272 * Returns: 0 Error: No space/out of mbufs/etc.
1273 * 1 Success
1274 *
1275 * Imputed: (*error_out) errno for error
1276 * ENOBUFS
1277 * sflt_data_in:??? [whatever a filter author chooses]
1278 */
1c79356b 1279int
2d21ac55
A
1280sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0,
1281 struct mbuf *control, int *error_out)
91447636
A
1282{
1283 int result = 0;
2d21ac55 1284 boolean_t sb_unix = (sb->sb_flags & SB_UNIX);
d9a64523 1285 struct mbuf *mbuf_chain = NULL;
2d21ac55 1286
0a7de745 1287 if (error_out) {
2d21ac55 1288 *error_out = 0;
0a7de745 1289 }
2d21ac55 1290
0a7de745 1291 if (m0 && (m0->m_flags & M_PKTHDR) == 0) {
91447636 1292 panic("sbappendaddrorfree");
0a7de745 1293 }
2d21ac55
A
1294
1295 if (sb->sb_flags & SB_DROP) {
0a7de745 1296 if (m0 != NULL) {
2d21ac55 1297 m_freem(m0);
0a7de745
A
1298 }
1299 if (control != NULL && !sb_unix) {
2d21ac55 1300 m_freem(control);
0a7de745
A
1301 }
1302 if (error_out != NULL) {
2d21ac55 1303 *error_out = EINVAL;
0a7de745
A
1304 }
1305 return 0;
2d21ac55
A
1306 }
1307
bca245ac
A
1308 if (SOCK_DOM(sb->sb_so) == PF_INET || SOCK_DOM(sb->sb_so) == PF_INET6) {
1309 /* Call socket data in filters */
1310 if (sb->sb_flags & SB_RECV && !(m0 && m0->m_flags & M_SKIPCFIL)) {
1311 int error;
1312 error = sflt_data_in(sb->sb_so, asa, &m0, &control, 0);
1313 SBLASTRECORDCHK(sb, __func__);
fe8ab488
A
1314
1315#if CONTENT_FILTER
bca245ac
A
1316 if (error == 0) {
1317 error = cfil_sock_data_in(sb->sb_so, asa, m0, control,
1318 0);
1319 }
fe8ab488
A
1320#endif /* CONTENT_FILTER */
1321
bca245ac
A
1322 if (error) {
1323 if (error != EJUSTRETURN) {
1324 if (m0) {
1325 m_freem(m0);
1326 }
1327 if (control != NULL && !sb_unix) {
1328 m_freem(control);
1329 }
1330 if (error_out) {
1331 *error_out = error;
1332 }
0a7de745 1333 }
bca245ac 1334 return 0;
91447636 1335 }
bca245ac
A
1336 } else if (m0) {
1337 m0->m_flags &= ~M_SKIPCFIL;
91447636
A
1338 }
1339 }
2d21ac55 1340
d9a64523
A
1341 mbuf_chain = sbconcat_mbufs(sb, asa, m0, control);
1342 SBLASTRECORDCHK(sb, "sbappendadddr 1");
1343 result = sbappendchain(sb, mbuf_chain, 0);
91447636 1344 if (result == 0) {
0a7de745 1345 if (m0) {
2d21ac55 1346 m_freem(m0);
0a7de745
A
1347 }
1348 if (control != NULL && !sb_unix) {
2d21ac55 1349 m_freem(control);
0a7de745
A
1350 }
1351 if (error_out) {
2d21ac55 1352 *error_out = ENOBUFS;
0a7de745 1353 }
91447636 1354 }
2d21ac55 1355
0a7de745
A
1356 return result;
1357}
1358
1359inline boolean_t
1360is_cmsg_valid(struct mbuf *control, struct cmsghdr *cmsg)
1361{
1362 if (cmsg == NULL) {
1363 return FALSE;
1364 }
1365
1366 if (cmsg->cmsg_len < sizeof(struct cmsghdr)) {
1367 return FALSE;
1368 }
1369
1370 if ((uint8_t *)control->m_data >= (uint8_t *)cmsg + cmsg->cmsg_len) {
1371 return FALSE;
1372 }
1373
1374 if ((uint8_t *)control->m_data + control->m_len <
1375 (uint8_t *)cmsg + cmsg->cmsg_len) {
1376 return FALSE;
1377 }
1378
1379 return TRUE;
91447636
A
1380}
1381
1382static int
2d21ac55
A
1383sbappendcontrol_internal(struct sockbuf *sb, struct mbuf *m0,
1384 struct mbuf *control)
1c79356b 1385{
2d21ac55 1386 struct mbuf *m, *mlast, *n;
1c79356b 1387 int space = 0;
1c79356b 1388
0a7de745 1389 if (control == 0) {
1c79356b 1390 panic("sbappendcontrol");
0a7de745 1391 }
1c79356b 1392
0a7de745 1393 for (m = control;; m = m->m_next) {
1c79356b 1394 space += m->m_len;
0a7de745 1395 if (m->m_next == 0) {
1c79356b 1396 break;
0a7de745 1397 }
1c79356b 1398 }
0a7de745
A
1399 n = m; /* save pointer to last control buffer */
1400 for (m = m0; m; m = m->m_next) {
1c79356b 1401 space += m->m_len;
0a7de745
A
1402 }
1403 if (space > sbspace(sb) && !(sb->sb_flags & SB_UNIX)) {
1404 return 0;
1405 }
1406 n->m_next = m0; /* concatenate data to control */
2d21ac55
A
1407 SBLASTRECORDCHK(sb, "sbappendcontrol 1");
1408
0a7de745 1409 for (m = control; m->m_next != NULL; m = m->m_next) {
1c79356b 1410 sballoc(sb, m);
0a7de745 1411 }
2d21ac55
A
1412 sballoc(sb, m);
1413 mlast = m;
1414
1415 if (sb->sb_lastrecord != NULL) {
1416 sb->sb_lastrecord->m_nextpkt = control;
1417 } else {
1c79356b 1418 sb->sb_mb = control;
2d21ac55
A
1419 }
1420 sb->sb_lastrecord = control;
1421 sb->sb_mbtail = mlast;
1422
1423 SBLASTMBUFCHK(sb, __func__);
1424 SBLASTRECORDCHK(sb, "sbappendcontrol 2");
0a7de745 1425 return 1;
1c79356b
A
1426}
1427
91447636 1428int
0a7de745 1429sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control,
2d21ac55 1430 int *error_out)
91447636
A
1431{
1432 int result = 0;
2d21ac55
A
1433 boolean_t sb_unix = (sb->sb_flags & SB_UNIX);
1434
0a7de745 1435 if (error_out) {
2d21ac55 1436 *error_out = 0;
0a7de745 1437 }
2d21ac55
A
1438
1439 if (sb->sb_flags & SB_DROP) {
0a7de745 1440 if (m0 != NULL) {
2d21ac55 1441 m_freem(m0);
0a7de745
A
1442 }
1443 if (control != NULL && !sb_unix) {
2d21ac55 1444 m_freem(control);
0a7de745
A
1445 }
1446 if (error_out != NULL) {
2d21ac55 1447 *error_out = EINVAL;
0a7de745
A
1448 }
1449 return 0;
2d21ac55
A
1450 }
1451
bca245ac
A
1452 if (SOCK_DOM(sb->sb_so) == PF_INET || SOCK_DOM(sb->sb_so) == PF_INET6) {
1453 if (sb->sb_flags & SB_RECV && !(m0 && m0->m_flags & M_SKIPCFIL)) {
1454 int error;
2d21ac55 1455
bca245ac
A
1456 error = sflt_data_in(sb->sb_so, NULL, &m0, &control, 0);
1457 SBLASTRECORDCHK(sb, __func__);
fe8ab488
A
1458
1459#if CONTENT_FILTER
bca245ac
A
1460 if (error == 0) {
1461 error = cfil_sock_data_in(sb->sb_so, NULL, m0, control,
1462 0);
1463 }
fe8ab488
A
1464#endif /* CONTENT_FILTER */
1465
bca245ac
A
1466 if (error) {
1467 if (error != EJUSTRETURN) {
1468 if (m0) {
1469 m_freem(m0);
1470 }
1471 if (control != NULL && !sb_unix) {
1472 m_freem(control);
1473 }
1474 if (error_out) {
1475 *error_out = error;
1476 }
0a7de745 1477 }
bca245ac 1478 return 0;
91447636 1479 }
bca245ac
A
1480 } else if (m0) {
1481 m0->m_flags &= ~M_SKIPCFIL;
91447636
A
1482 }
1483 }
2d21ac55 1484
91447636
A
1485 result = sbappendcontrol_internal(sb, m0, control);
1486 if (result == 0) {
0a7de745 1487 if (m0) {
2d21ac55 1488 m_freem(m0);
0a7de745
A
1489 }
1490 if (control != NULL && !sb_unix) {
2d21ac55 1491 m_freem(control);
0a7de745
A
1492 }
1493 if (error_out) {
2d21ac55 1494 *error_out = ENOBUFS;
0a7de745 1495 }
91447636 1496 }
2d21ac55 1497
0a7de745 1498 return result;
91447636
A
1499}
1500
39236c6e 1501/*
f427ee49 1502 * TCP streams have Multipath TCP support or are regular TCP sockets.
39236c6e
A
1503 */
1504int
f427ee49 1505sbappendstream_rcvdemux(struct socket *so, struct mbuf *m)
39236c6e
A
1506{
1507 int ret = 0;
1508
5c9f4661
A
1509 if ((m != NULL) &&
1510 m_pktlen(m) <= 0 &&
1511 !((so->so_flags & SOF_MP_SUBFLOW) &&
0a7de745
A
1512 (m->m_flags & M_PKTHDR) &&
1513 (m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN))) {
39236c6e 1514 m_freem(m);
0a7de745 1515 return ret;
39236c6e
A
1516 }
1517
39236c6e 1518#if MPTCP
f427ee49
A
1519 if (so->so_flags & SOF_MP_SUBFLOW) {
1520 return sbappendmptcpstream_rcv(&so->so_rcv, m);
1521 } else
39236c6e 1522#endif /* MPTCP */
f427ee49
A
1523 {
1524 return sbappendstream(&so->so_rcv, m);
39236c6e 1525 }
39236c6e
A
1526}
1527
1528#if MPTCP
1529int
1530sbappendmptcpstream_rcv(struct sockbuf *sb, struct mbuf *m)
1531{
1532 struct socket *so = sb->sb_so;
1533
1534 VERIFY(m == NULL || (m->m_flags & M_PKTHDR));
1535 /* SB_NOCOMPRESS must be set prevent loss of M_PKTHDR data */
0a7de745
A
1536 VERIFY((sb->sb_flags & (SB_RECV | SB_NOCOMPRESS)) ==
1537 (SB_RECV | SB_NOCOMPRESS));
39236c6e
A
1538
1539 if (m == NULL || m_pktlen(m) == 0 || (sb->sb_flags & SB_DROP) ||
1540 (so->so_state & SS_CANTRCVMORE)) {
a39ff7e2
A
1541 if (m && (m->m_flags & M_PKTHDR) &&
1542 m_pktlen(m) == 0 &&
5c9f4661
A
1543 (m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN)) {
1544 mptcp_input(tptomptp(sototcpcb(so))->mpt_mpte, m);
0a7de745 1545 return 1;
5c9f4661 1546 } else if (m != NULL) {
39236c6e 1547 m_freem(m);
5c9f4661 1548 }
0a7de745 1549 return 0;
39236c6e
A
1550 }
1551 /* the socket is not closed, so SOF_MP_SUBFLOW must be set */
1552 VERIFY(so->so_flags & SOF_MP_SUBFLOW);
1553
1554 if (m->m_nextpkt != NULL || (sb->sb_mb != sb->sb_lastrecord)) {
1555 panic("%s: nexpkt %p || mb %p != lastrecord %p\n", __func__,
1556 m->m_nextpkt, sb->sb_mb, sb->sb_lastrecord);
1557 /* NOTREACHED */
1558 }
1559
1560 SBLASTMBUFCHK(sb, __func__);
1561
39236c6e
A
1562 /* No filter support (SB_RECV) on mptcp subflow sockets */
1563
1564 sbcompress(sb, m, sb->sb_mbtail);
1565 sb->sb_lastrecord = sb->sb_mb;
1566 SBLASTRECORDCHK(sb, __func__);
0a7de745 1567 return 1;
39236c6e
A
1568}
1569#endif /* MPTCP */
1570
1c79356b
A
1571/*
1572 * Compress mbuf chain m into the socket
1573 * buffer sb following mbuf n. If n
1574 * is null, the buffer is presumed empty.
1575 */
2d21ac55
A
1576static inline void
1577sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
1578{
39236c6e 1579 int eor = 0, compress = (!(sb->sb_flags & SB_NOCOMPRESS));
2d21ac55
A
1580 struct mbuf *o;
1581
1582 if (m == NULL) {
1583 /* There is nothing to compress; just update the tail */
0a7de745 1584 for (; n->m_next != NULL; n = n->m_next) {
2d21ac55 1585 ;
0a7de745 1586 }
2d21ac55
A
1587 sb->sb_mbtail = n;
1588 goto done;
1589 }
1c79356b 1590
39236c6e 1591 while (m != NULL) {
1c79356b 1592 eor |= m->m_flags & M_EOR;
39236c6e 1593 if (compress && m->m_len == 0 && (eor == 0 ||
2d21ac55 1594 (((o = m->m_next) || (o = n)) && o->m_type == m->m_type))) {
0a7de745 1595 if (sb->sb_lastrecord == m) {
2d21ac55 1596 sb->sb_lastrecord = m->m_next;
0a7de745 1597 }
1c79356b
A
1598 m = m_free(m);
1599 continue;
1600 }
39236c6e 1601 if (compress && n != NULL && (n->m_flags & M_EOR) == 0 &&
9bccf70c
A
1602#ifndef __APPLE__
1603 M_WRITABLE(n) &&
1604#endif
1605 m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
1606 m->m_len <= M_TRAILINGSPACE(n) &&
1c79356b
A
1607 n->m_type == m->m_type) {
1608 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
1609 (unsigned)m->m_len);
1610 n->m_len += m->m_len;
1611 sb->sb_cc += m->m_len;
2d21ac55 1612 if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
39236c6e
A
1613 m->m_type != MT_OOBDATA) {
1614 /* XXX: Probably don't need */
2d21ac55 1615 sb->sb_ctl += m->m_len;
39236c6e 1616 }
39037602
A
1617
1618 /* update send byte count */
1619 if (sb->sb_flags & SB_SNDBYTE_CNT) {
1620 inp_incr_sndbytes_total(sb->sb_so,
1621 m->m_len);
1622 inp_incr_sndbytes_unsent(sb->sb_so,
1623 m->m_len);
1624 }
1c79356b
A
1625 m = m_free(m);
1626 continue;
1627 }
0a7de745 1628 if (n != NULL) {
1c79356b 1629 n->m_next = m;
0a7de745 1630 } else {
1c79356b 1631 sb->sb_mb = m;
0a7de745 1632 }
2d21ac55 1633 sb->sb_mbtail = m;
1c79356b
A
1634 sballoc(sb, m);
1635 n = m;
1636 m->m_flags &= ~M_EOR;
1637 m = m->m_next;
39236c6e 1638 n->m_next = NULL;
1c79356b 1639 }
39236c6e 1640 if (eor != 0) {
0a7de745 1641 if (n != NULL) {
1c79356b 1642 n->m_flags |= eor;
0a7de745 1643 } else {
1c79356b 1644 printf("semi-panic: sbcompress\n");
0a7de745 1645 }
1c79356b 1646 }
2d21ac55
A
1647done:
1648 SBLASTMBUFCHK(sb, __func__);
2d21ac55
A
1649}
1650
1651void
1652sb_empty_assert(struct sockbuf *sb, const char *where)
1653{
1654 if (!(sb->sb_cc == 0 && sb->sb_mb == NULL && sb->sb_mbcnt == 0 &&
1655 sb->sb_mbtail == NULL && sb->sb_lastrecord == NULL)) {
b0d623f7 1656 panic("%s: sb %p so %p cc %d mbcnt %d mb %p mbtail %p "
2d21ac55 1657 "lastrecord %p\n", where, sb, sb->sb_so, sb->sb_cc,
39236c6e
A
1658 sb->sb_mbcnt, sb->sb_mb, sb->sb_mbtail,
1659 sb->sb_lastrecord);
2d21ac55
A
1660 /* NOTREACHED */
1661 }
1c79356b
A
1662}
1663
1664/*
1665 * Free all mbufs in a sockbuf.
1666 * Check that all resources are reclaimed.
1667 */
1668void
2d21ac55 1669sbflush(struct sockbuf *sb)
1c79356b 1670{
39236c6e
A
1671 void *lr_saved = __builtin_return_address(0);
1672 struct socket *so = sb->sb_so;
39236c6e
A
1673
1674 /* so_usecount may be 0 if we get here from sofreelastref() */
1675 if (so == NULL) {
1676 panic("%s: null so, sb=%p sb_flags=0x%x lr=%p\n",
1677 __func__, sb, sb->sb_flags, lr_saved);
1678 /* NOTREACHED */
1679 } else if (so->so_usecount < 0) {
1680 panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
1681 "lrh= %s\n", __func__, sb, sb->sb_flags, so,
1682 so->so_usecount, lr_saved, solockhistory_nr(so));
1683 /* NOTREACHED */
1684 }
39236c6e
A
1685
1686 /*
1687 * Obtain lock on the socket buffer (SB_LOCK). This is required
1688 * to prevent the socket buffer from being unexpectedly altered
1689 * while it is used by another thread in socket send/receive.
1690 *
1691 * sblock() must not fail here, hence the assertion.
1692 */
1693 (void) sblock(sb, SBL_WAIT | SBL_NOINTR | SBL_IGNDEFUNCT);
1694 VERIFY(sb->sb_flags & SB_LOCK);
1695
1696 while (sb->sb_mbcnt > 0) {
9bccf70c
A
1697 /*
1698 * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
1699 * we would loop forever. Panic instead.
1700 */
0a7de745 1701 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len)) {
9bccf70c 1702 break;
0a7de745 1703 }
1c79356b 1704 sbdrop(sb, (int)sb->sb_cc);
9bccf70c 1705 }
39236c6e 1706
2d21ac55 1707 sb_empty_assert(sb, __func__);
0a7de745 1708 sbunlock(sb, TRUE); /* keep socket locked */
1c79356b
A
1709}
1710
1711/*
1712 * Drop data from (the front of) a sockbuf.
9bccf70c
A
1713 * use m_freem_list to free the mbuf structures
1714 * under a single lock... this is done by pruning
1715 * the top of the tree from the body by keeping track
1716 * of where we get to in the tree and then zeroing the
1717 * two pertinent pointers m_nextpkt and m_next
1718 * the socket buffer is then updated to point at the new
1719 * top of the tree and the pruned area is released via
1720 * m_freem_list.
1c79356b
A
1721 */
1722void
2d21ac55 1723sbdrop(struct sockbuf *sb, int len)
1c79356b 1724{
2d21ac55 1725 struct mbuf *m, *free_list, *ml;
fa4905b1 1726 struct mbuf *next, *last;
1c79356b 1727
39236c6e
A
1728 next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
1729#if MPTCP
5ba3f43e 1730 if (m != NULL && len > 0 && !(sb->sb_flags & SB_RECV) &&
39236c6e 1731 ((sb->sb_so->so_flags & SOF_MP_SUBFLOW) ||
0a7de745
A
1732 (SOCK_CHECK_DOM(sb->sb_so, PF_MULTIPATH) &&
1733 SOCK_CHECK_PROTO(sb->sb_so, IPPROTO_TCP))) &&
5ba3f43e 1734 !(sb->sb_so->so_flags1 & SOF1_POST_FALLBACK_SYNC)) {
490019cf 1735 mptcp_preproc_sbdrop(sb->sb_so, m, (unsigned int)len);
39236c6e 1736 }
5ba3f43e
A
1737 if (m != NULL && len > 0 && !(sb->sb_flags & SB_RECV) &&
1738 (sb->sb_so->so_flags & SOF_MP_SUBFLOW) &&
1739 (sb->sb_so->so_flags1 & SOF1_POST_FALLBACK_SYNC)) {
1740 mptcp_fallback_sbdrop(sb->sb_so, m, len);
1741 }
39236c6e 1742#endif /* MPTCP */
fa4905b1
A
1743 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_START), sb, len, 0, 0, 0);
1744
fa4905b1
A
1745 free_list = last = m;
1746 ml = (struct mbuf *)0;
1747
1c79356b 1748 while (len > 0) {
39037602
A
1749 if (m == NULL) {
1750 if (next == NULL) {
2d21ac55
A
1751 /*
1752 * temporarily replacing this panic with printf
1753 * because it occurs occasionally when closing
1754 * a socket when there is no harm in ignoring
1755 * it. This problem will be investigated
1756 * further.
1757 */
1758 /* panic("sbdrop"); */
1759 printf("sbdrop - count not zero\n");
1760 len = 0;
1761 /*
1762 * zero the counts. if we have no mbufs,
1763 * we have no data (PR-2986815)
1764 */
1765 sb->sb_cc = 0;
1766 sb->sb_mbcnt = 0;
1767 break;
1768 }
1769 m = last = next;
1770 next = m->m_nextpkt;
1771 continue;
1c79356b
A
1772 }
1773 if (m->m_len > len) {
1774 m->m_len -= len;
1775 m->m_data += len;
1776 sb->sb_cc -= len;
39037602 1777 /* update the send byte count */
0a7de745
A
1778 if (sb->sb_flags & SB_SNDBYTE_CNT) {
1779 inp_decr_sndbytes_total(sb->sb_so, len);
1780 }
2d21ac55 1781 if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
0a7de745 1782 m->m_type != MT_OOBDATA) {
2d21ac55 1783 sb->sb_ctl -= len;
0a7de745 1784 }
1c79356b
A
1785 break;
1786 }
1787 len -= m->m_len;
1788 sbfree(sb, m);
fa4905b1
A
1789
1790 ml = m;
1791 m = m->m_next;
1c79356b
A
1792 }
1793 while (m && m->m_len == 0) {
1794 sbfree(sb, m);
fa4905b1
A
1795
1796 ml = m;
1797 m = m->m_next;
1798 }
1799 if (ml) {
2d21ac55 1800 ml->m_next = (struct mbuf *)0;
fa4905b1 1801 last->m_nextpkt = (struct mbuf *)0;
2d21ac55 1802 m_freem_list(free_list);
1c79356b
A
1803 }
1804 if (m) {
1805 sb->sb_mb = m;
1806 m->m_nextpkt = next;
2d21ac55 1807 } else {
1c79356b 1808 sb->sb_mb = next;
2d21ac55
A
1809 }
1810
1811 /*
1812 * First part is an inline SB_EMPTY_FIXUP(). Second part
1813 * makes sure sb_lastrecord is up-to-date if we dropped
1814 * part of the last record.
1815 */
1816 m = sb->sb_mb;
1817 if (m == NULL) {
1818 sb->sb_mbtail = NULL;
1819 sb->sb_lastrecord = NULL;
1820 } else if (m->m_nextpkt == NULL) {
1821 sb->sb_lastrecord = m;
1822 }
fa4905b1 1823
fe8ab488
A
1824#if CONTENT_FILTER
1825 cfil_sock_buf_update(sb);
1826#endif /* CONTENT_FILTER */
1827
fa4905b1 1828 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_END), sb, 0, 0, 0, 0);
1c79356b
A
1829}
1830
1831/*
1832 * Drop a record off the front of a sockbuf
1833 * and move the next record to the front.
1834 */
1835void
2d21ac55 1836sbdroprecord(struct sockbuf *sb)
1c79356b 1837{
2d21ac55 1838 struct mbuf *m, *mn;
1c79356b
A
1839
1840 m = sb->sb_mb;
1841 if (m) {
1842 sb->sb_mb = m->m_nextpkt;
1843 do {
1844 sbfree(sb, m);
1845 MFREE(m, mn);
9bccf70c
A
1846 m = mn;
1847 } while (m);
1c79356b 1848 }
2d21ac55 1849 SB_EMPTY_FIXUP(sb);
1c79356b
A
1850}
1851
1852/*
1853 * Create a "control" mbuf containing the specified data
1854 * with the specified type for presentation on a socket buffer.
1855 */
1856struct mbuf *
2d21ac55 1857sbcreatecontrol(caddr_t p, int size, int type, int level)
1c79356b 1858{
2d21ac55 1859 struct cmsghdr *cp;
1c79356b
A
1860 struct mbuf *m;
1861
0a7de745
A
1862 if (CMSG_SPACE((u_int)size) > MLEN) {
1863 return (struct mbuf *)NULL;
1864 }
1865 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL) {
1866 return (struct mbuf *)NULL;
1867 }
1c79356b 1868 cp = mtod(m, struct cmsghdr *);
0a7de745 1869 VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
1c79356b 1870 /* XXX check size? */
2d21ac55 1871 (void) memcpy(CMSG_DATA(cp), p, size);
f427ee49 1872 m->m_len = (int32_t)CMSG_SPACE(size);
9bccf70c 1873 cp->cmsg_len = CMSG_LEN(size);
1c79356b
A
1874 cp->cmsg_level = level;
1875 cp->cmsg_type = type;
0a7de745 1876 return m;
1c79356b
A
1877}
1878
39236c6e
A
1879struct mbuf **
1880sbcreatecontrol_mbuf(caddr_t p, int size, int type, int level, struct mbuf **mp)
6d2010ae 1881{
39236c6e 1882 struct mbuf *m;
6d2010ae
A
1883 struct cmsghdr *cp;
1884
39236c6e 1885 if (*mp == NULL) {
6d2010ae 1886 *mp = sbcreatecontrol(p, size, type, level);
0a7de745 1887 return mp;
6d2010ae 1888 }
316670eb 1889
39236c6e 1890 if (CMSG_SPACE((u_int)size) + (*mp)->m_len > MLEN) {
6d2010ae
A
1891 mp = &(*mp)->m_next;
1892 *mp = sbcreatecontrol(p, size, type, level);
0a7de745 1893 return mp;
6d2010ae 1894 }
316670eb 1895
6d2010ae 1896 m = *mp;
316670eb
A
1897
1898 cp = (struct cmsghdr *)(void *)(mtod(m, char *) + m->m_len);
1899 /* CMSG_SPACE ensures 32-bit alignment */
0a7de745 1900 VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
f427ee49 1901 m->m_len += (int32_t)CMSG_SPACE(size);
316670eb 1902
6d2010ae
A
1903 /* XXX check size? */
1904 (void) memcpy(CMSG_DATA(cp), p, size);
1905 cp->cmsg_len = CMSG_LEN(size);
1906 cp->cmsg_level = level;
1907 cp->cmsg_type = type;
316670eb 1908
0a7de745 1909 return mp;
6d2010ae
A
1910}
1911
1912
1c79356b
A
1913/*
1914 * Some routines that return EOPNOTSUPP for entry points that are not
1915 * supported by a protocol. Fill in as needed.
1916 */
1917int
39236c6e
A
1918pru_abort_notsupp(struct socket *so)
1919{
1920#pragma unused(so)
0a7de745 1921 return EOPNOTSUPP;
39236c6e
A
1922}
1923
1924int
1925pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
1c79356b 1926{
39236c6e 1927#pragma unused(so, nam)
0a7de745 1928 return EOPNOTSUPP;
1c79356b
A
1929}
1930
1c79356b 1931int
39236c6e 1932pru_attach_notsupp(struct socket *so, int proto, struct proc *p)
1c79356b 1933{
39236c6e 1934#pragma unused(so, proto, p)
0a7de745 1935 return EOPNOTSUPP;
1c79356b
A
1936}
1937
1938int
39236c6e 1939pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1c79356b 1940{
39236c6e 1941#pragma unused(so, nam, p)
0a7de745 1942 return EOPNOTSUPP;
1c79356b
A
1943}
1944
1945int
39236c6e 1946pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1c79356b 1947{
39236c6e 1948#pragma unused(so, nam, p)
0a7de745 1949 return EOPNOTSUPP;
1c79356b
A
1950}
1951
1952int
39236c6e 1953pru_connect2_notsupp(struct socket *so1, struct socket *so2)
1c79356b 1954{
39236c6e 1955#pragma unused(so1, so2)
0a7de745 1956 return EOPNOTSUPP;
1c79356b
A
1957}
1958
1959int
813fb2f6
A
1960pru_connectx_notsupp(struct socket *so, struct sockaddr *src,
1961 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
3e170ce0
A
1962 sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
1963 uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written)
1c79356b 1964{
813fb2f6 1965#pragma unused(so, src, dst, p, ifscope, aid, pcid, flags, arg, arglen, uio, bytes_written)
0a7de745 1966 return EOPNOTSUPP;
1c79356b
A
1967}
1968
1969int
39236c6e
A
1970pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
1971 struct ifnet *ifp, struct proc *p)
1c79356b 1972{
39236c6e 1973#pragma unused(so, cmd, data, ifp, p)
0a7de745 1974 return EOPNOTSUPP;
1c79356b
A
1975}
1976
1977int
39236c6e 1978pru_detach_notsupp(struct socket *so)
1c79356b 1979{
39236c6e 1980#pragma unused(so)
0a7de745 1981 return EOPNOTSUPP;
1c79356b
A
1982}
1983
1984int
39236c6e 1985pru_disconnect_notsupp(struct socket *so)
1c79356b 1986{
39236c6e 1987#pragma unused(so)
0a7de745 1988 return EOPNOTSUPP;
1c79356b
A
1989}
1990
1991int
3e170ce0 1992pru_disconnectx_notsupp(struct socket *so, sae_associd_t aid, sae_connid_t cid)
1c79356b 1993{
39236c6e 1994#pragma unused(so, aid, cid)
0a7de745 1995 return EOPNOTSUPP;
1c79356b
A
1996}
1997
1998int
39236c6e 1999pru_listen_notsupp(struct socket *so, struct proc *p)
1c79356b 2000{
39236c6e 2001#pragma unused(so, p)
0a7de745 2002 return EOPNOTSUPP;
1c79356b
A
2003}
2004
1c79356b 2005int
39236c6e 2006pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
1c79356b 2007{
39236c6e 2008#pragma unused(so, nam)
0a7de745 2009 return EOPNOTSUPP;
1c79356b
A
2010}
2011
2012int
39236c6e
A
2013pru_rcvd_notsupp(struct socket *so, int flags)
2014{
2015#pragma unused(so, flags)
0a7de745 2016 return EOPNOTSUPP;
39236c6e 2017}
1c79356b 2018
39236c6e
A
2019int
2020pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
1c79356b 2021{
39236c6e 2022#pragma unused(so, m, flags)
0a7de745 2023 return EOPNOTSUPP;
1c79356b
A
2024}
2025
39236c6e
A
2026int
2027pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
2028 struct sockaddr *addr, struct mbuf *control, struct proc *p)
2029{
2030#pragma unused(so, flags, m, addr, control, p)
0a7de745 2031 return EOPNOTSUPP;
39236c6e 2032}
1c79356b 2033
fe8ab488
A
2034int
2035pru_send_list_notsupp(struct socket *so, int flags, struct mbuf *m,
2036 struct sockaddr *addr, struct mbuf *control, struct proc *p)
2037{
2038#pragma unused(so, flags, m, addr, control, p)
0a7de745 2039 return EOPNOTSUPP;
fe8ab488
A
2040}
2041
1c79356b
A
2042/*
2043 * This isn't really a ``null'' operation, but it's the default one
2044 * and doesn't do anything destructive.
2045 */
2046int
2d21ac55 2047pru_sense_null(struct socket *so, void *ub, int isstat64)
1c79356b 2048{
2d21ac55
A
2049 if (isstat64 != 0) {
2050 struct stat64 *sb64;
1c79356b 2051
2d21ac55
A
2052 sb64 = (struct stat64 *)ub;
2053 sb64->st_blksize = so->so_snd.sb_hiwat;
2054 } else {
2055 struct stat *sb;
1c79356b 2056
2d21ac55
A
2057 sb = (struct stat *)ub;
2058 sb->st_blksize = so->so_snd.sb_hiwat;
2059 }
1c79356b 2060
0a7de745 2061 return 0;
1c79356b
A
2062}
2063
1c79356b
A
2064
2065int
39236c6e
A
2066pru_sosend_notsupp(struct socket *so, struct sockaddr *addr, struct uio *uio,
2067 struct mbuf *top, struct mbuf *control, int flags)
1c79356b 2068{
39236c6e 2069#pragma unused(so, addr, uio, top, control, flags)
0a7de745 2070 return EOPNOTSUPP;
1c79356b
A
2071}
2072
fe8ab488 2073int
3e170ce0
A
2074pru_sosend_list_notsupp(struct socket *so, struct uio **uio,
2075 u_int uiocnt, int flags)
fe8ab488 2076{
3e170ce0 2077#pragma unused(so, uio, uiocnt, flags)
0a7de745 2078 return EOPNOTSUPP;
fe8ab488
A
2079}
2080
1c79356b 2081int
39236c6e
A
2082pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr,
2083 struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
1c79356b 2084{
39236c6e 2085#pragma unused(so, paddr, uio, mp0, controlp, flagsp)
0a7de745 2086 return EOPNOTSUPP;
1c79356b
A
2087}
2088
fe8ab488 2089int
39037602 2090pru_soreceive_list_notsupp(struct socket *so,
3e170ce0 2091 struct recv_msg_elem *recv_msg_array, u_int uiocnt, int *flagsp)
fe8ab488 2092{
3e170ce0 2093#pragma unused(so, recv_msg_array, uiocnt, flagsp)
0a7de745 2094 return EOPNOTSUPP;
fe8ab488
A
2095}
2096
2d21ac55 2097int
39236c6e 2098pru_shutdown_notsupp(struct socket *so)
1c79356b 2099{
39236c6e 2100#pragma unused(so)
0a7de745 2101 return EOPNOTSUPP;
1c79356b
A
2102}
2103
2d21ac55 2104int
39236c6e 2105pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
1c79356b 2106{
39236c6e 2107#pragma unused(so, nam)
0a7de745 2108 return EOPNOTSUPP;
1c79356b
A
2109}
2110
91447636 2111int
39236c6e 2112pru_sopoll_notsupp(struct socket *so, int events, kauth_cred_t cred, void *wql)
1c79356b 2113{
39236c6e 2114#pragma unused(so, events, cred, wql)
0a7de745 2115 return EOPNOTSUPP;
1c79356b
A
2116}
2117
39236c6e
A
2118int
2119pru_socheckopt_null(struct socket *so, struct sockopt *sopt)
2120{
2121#pragma unused(so, sopt)
2122 /*
2123 * Allow all options for set/get by default.
2124 */
0a7de745 2125 return 0;
39236c6e
A
2126}
2127
3e170ce0
A
2128static int
2129pru_preconnect_null(struct socket *so)
2130{
2131#pragma unused(so)
0a7de745 2132 return 0;
3e170ce0
A
2133}
2134
39236c6e
A
2135void
2136pru_sanitize(struct pr_usrreqs *pru)
2137{
0a7de745 2138#define DEFAULT(foo, bar) if ((foo) == NULL) (foo) = (bar)
39236c6e
A
2139 DEFAULT(pru->pru_abort, pru_abort_notsupp);
2140 DEFAULT(pru->pru_accept, pru_accept_notsupp);
2141 DEFAULT(pru->pru_attach, pru_attach_notsupp);
2142 DEFAULT(pru->pru_bind, pru_bind_notsupp);
2143 DEFAULT(pru->pru_connect, pru_connect_notsupp);
2144 DEFAULT(pru->pru_connect2, pru_connect2_notsupp);
2145 DEFAULT(pru->pru_connectx, pru_connectx_notsupp);
2146 DEFAULT(pru->pru_control, pru_control_notsupp);
2147 DEFAULT(pru->pru_detach, pru_detach_notsupp);
2148 DEFAULT(pru->pru_disconnect, pru_disconnect_notsupp);
2149 DEFAULT(pru->pru_disconnectx, pru_disconnectx_notsupp);
2150 DEFAULT(pru->pru_listen, pru_listen_notsupp);
39236c6e
A
2151 DEFAULT(pru->pru_peeraddr, pru_peeraddr_notsupp);
2152 DEFAULT(pru->pru_rcvd, pru_rcvd_notsupp);
2153 DEFAULT(pru->pru_rcvoob, pru_rcvoob_notsupp);
2154 DEFAULT(pru->pru_send, pru_send_notsupp);
fe8ab488 2155 DEFAULT(pru->pru_send_list, pru_send_list_notsupp);
39236c6e
A
2156 DEFAULT(pru->pru_sense, pru_sense_null);
2157 DEFAULT(pru->pru_shutdown, pru_shutdown_notsupp);
2158 DEFAULT(pru->pru_sockaddr, pru_sockaddr_notsupp);
2159 DEFAULT(pru->pru_sopoll, pru_sopoll_notsupp);
2160 DEFAULT(pru->pru_soreceive, pru_soreceive_notsupp);
fe8ab488 2161 DEFAULT(pru->pru_soreceive_list, pru_soreceive_list_notsupp);
39236c6e 2162 DEFAULT(pru->pru_sosend, pru_sosend_notsupp);
fe8ab488 2163 DEFAULT(pru->pru_sosend_list, pru_sosend_list_notsupp);
39236c6e 2164 DEFAULT(pru->pru_socheckopt, pru_socheckopt_null);
3e170ce0 2165 DEFAULT(pru->pru_preconnect, pru_preconnect_null);
39236c6e
A
2166#undef DEFAULT
2167}
1c79356b 2168
9bccf70c
A
2169/*
2170 * The following are macros on BSD and functions on Darwin
2171 */
1c79356b 2172
0b4e3aa0
A
2173/*
2174 * Do we need to notify the other side when I/O is possible?
2175 */
2176
2d21ac55 2177int
0b4e3aa0
A
2178sb_notify(struct sockbuf *sb)
2179{
0a7de745
A
2180 return sb->sb_waiters > 0 ||
2181 (sb->sb_flags & (SB_SEL | SB_ASYNC | SB_UPCALL | SB_KNOTE));
0b4e3aa0
A
2182}
2183
2184/*
2185 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
2186 * This is problematical if the fields are unsigned, as the space might
2187 * still be negative (cc > hiwat or mbcnt > mbmax). Should detect
39236c6e 2188 * overflow and return 0.
0b4e3aa0 2189 */
b0d623f7 2190int
0b4e3aa0
A
2191sbspace(struct sockbuf *sb)
2192{
fe8ab488 2193 int pending = 0;
39236c6e
A
2194 int space = imin((int)(sb->sb_hiwat - sb->sb_cc),
2195 (int)(sb->sb_mbmax - sb->sb_mbcnt));
3e170ce0 2196
0a7de745 2197 if (sb->sb_preconn_hiwat != 0) {
3e170ce0 2198 space = imin((int)(sb->sb_preconn_hiwat - sb->sb_cc), space);
0a7de745 2199 }
3e170ce0 2200
0a7de745 2201 if (space < 0) {
b0d623f7 2202 space = 0;
0a7de745 2203 }
b0d623f7 2204
fe8ab488
A
2205 /* Compensate for data being processed by content filters */
2206#if CONTENT_FILTER
2207 pending = cfil_sock_data_space(sb);
2208#endif /* CONTENT_FILTER */
0a7de745 2209 if (pending > space) {
fe8ab488 2210 space = 0;
0a7de745 2211 } else {
fe8ab488 2212 space -= pending;
0a7de745 2213 }
fe8ab488 2214
0a7de745 2215 return space;
39236c6e
A
2216}
2217
0b4e3aa0
A
2218/* do we have to send all at once on a socket? */
2219int
2220sosendallatonce(struct socket *so)
2221{
0a7de745 2222 return so->so_proto->pr_flags & PR_ATOMIC;
0b4e3aa0
A
2223}
2224
2225/* can we read something from so? */
2226int
2227soreadable(struct socket *so)
2228{
0a7de745
A
2229 return so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
2230 ((so->so_state & SS_CANTRCVMORE)
fe8ab488 2231#if CONTENT_FILTER
0a7de745 2232 && cfil_sock_data_pending(&so->so_rcv) == 0
fe8ab488 2233#endif /* CONTENT_FILTER */
0a7de745
A
2234 ) ||
2235 so->so_comp.tqh_first || so->so_error;
0b4e3aa0
A
2236}
2237
2238/* can we write something to so? */
2239
2240int
2241sowriteable(struct socket *so)
2242{
fe8ab488 2243 if ((so->so_state & SS_CANTSENDMORE) ||
0a7de745
A
2244 so->so_error > 0) {
2245 return 1;
2246 }
2247 if (so_wait_for_if_feedback(so) || !socanwrite(so)) {
2248 return 0;
2249 }
2250 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
2251 return 1;
2252 }
fe8ab488 2253
3e170ce0 2254 if (sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat) {
fe8ab488 2255 if (so->so_flags & SOF_NOTSENT_LOWAT) {
3e170ce0
A
2256 if ((SOCK_DOM(so) == PF_INET6 ||
2257 SOCK_DOM(so) == PF_INET) &&
2258 so->so_type == SOCK_STREAM) {
0a7de745 2259 return tcp_notsent_lowat_check(so);
fe8ab488
A
2260 }
2261#if MPTCP
2262 else if ((SOCK_DOM(so) == PF_MULTIPATH) &&
2263 (SOCK_PROTO(so) == IPPROTO_TCP)) {
0a7de745 2264 return mptcp_notsent_lowat_check(so);
fe8ab488
A
2265 }
2266#endif
2267 else {
0a7de745 2268 return 1;
fe8ab488
A
2269 }
2270 } else {
0a7de745 2271 return 1;
fe8ab488
A
2272 }
2273 }
0a7de745 2274 return 0;
0b4e3aa0
A
2275}
2276
2277/* adjust counters in sb reflecting allocation of m */
2278
2279void
2280sballoc(struct sockbuf *sb, struct mbuf *m)
2281{
316670eb 2282 u_int32_t cnt = 1;
39236c6e
A
2283 sb->sb_cc += m->m_len;
2284 if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
0a7de745 2285 m->m_type != MT_OOBDATA) {
2d21ac55 2286 sb->sb_ctl += m->m_len;
0a7de745 2287 }
39236c6e
A
2288 sb->sb_mbcnt += MSIZE;
2289
2d21ac55 2290 if (m->m_flags & M_EXT) {
39236c6e
A
2291 sb->sb_mbcnt += m->m_ext.ext_size;
2292 cnt += (m->m_ext.ext_size >> MSIZESHIFT);
2d21ac55 2293 }
316670eb
A
2294 OSAddAtomic(cnt, &total_sbmb_cnt);
2295 VERIFY(total_sbmb_cnt > 0);
0a7de745 2296 if (total_sbmb_cnt > total_sbmb_cnt_peak) {
fe8ab488 2297 total_sbmb_cnt_peak = total_sbmb_cnt;
0a7de745 2298 }
3e170ce0
A
2299
2300 /*
39037602 2301 * If data is being added to the send socket buffer,
3e170ce0
A
2302 * update the send byte count
2303 */
39037602
A
2304 if (sb->sb_flags & SB_SNDBYTE_CNT) {
2305 inp_incr_sndbytes_total(sb->sb_so, m->m_len);
2306 inp_incr_sndbytes_unsent(sb->sb_so, m->m_len);
2307 }
0b4e3aa0
A
2308}
2309
2310/* adjust counters in sb reflecting freeing of m */
2311void
2312sbfree(struct sockbuf *sb, struct mbuf *m)
2313{
2d21ac55 2314 int cnt = -1;
316670eb 2315
2d21ac55 2316 sb->sb_cc -= m->m_len;
39236c6e 2317 if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
0a7de745 2318 m->m_type != MT_OOBDATA) {
2d21ac55 2319 sb->sb_ctl -= m->m_len;
0a7de745 2320 }
39236c6e 2321 sb->sb_mbcnt -= MSIZE;
2d21ac55 2322 if (m->m_flags & M_EXT) {
39236c6e
A
2323 sb->sb_mbcnt -= m->m_ext.ext_size;
2324 cnt -= (m->m_ext.ext_size >> MSIZESHIFT);
2d21ac55 2325 }
316670eb
A
2326 OSAddAtomic(cnt, &total_sbmb_cnt);
2327 VERIFY(total_sbmb_cnt >= 0);
0a7de745 2328 if (total_sbmb_cnt < total_sbmb_cnt_floor) {
39037602 2329 total_sbmb_cnt_floor = total_sbmb_cnt;
0a7de745 2330 }
3e170ce0
A
2331
2332 /*
2333 * If data is being removed from the send socket buffer,
2334 * update the send byte count
2335 */
0a7de745 2336 if (sb->sb_flags & SB_SNDBYTE_CNT) {
39037602 2337 inp_decr_sndbytes_total(sb->sb_so, m->m_len);
0a7de745 2338 }
0b4e3aa0
A
2339}
2340
2341/*
2342 * Set lock on sockbuf sb; sleep if lock is already held.
2343 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
2344 * Returns error without lock if sleep is interrupted.
2345 */
2346int
39236c6e 2347sblock(struct sockbuf *sb, uint32_t flags)
0b4e3aa0 2348{
39236c6e
A
2349 boolean_t nointr = ((sb->sb_flags & SB_NOINTR) || (flags & SBL_NOINTR));
2350 void *lr_saved = __builtin_return_address(0);
2351 struct socket *so = sb->sb_so;
2352 void * wchan;
6601e61a 2353 int error = 0;
fe8ab488 2354 thread_t tp = current_thread();
6601e61a 2355
39236c6e 2356 VERIFY((flags & SBL_VALID) == flags);
6601e61a 2357
39236c6e
A
2358 /* so_usecount may be 0 if we get here from sofreelastref() */
2359 if (so == NULL) {
2360 panic("%s: null so, sb=%p sb_flags=0x%x lr=%p\n",
2361 __func__, sb, sb->sb_flags, lr_saved);
2362 /* NOTREACHED */
2363 } else if (so->so_usecount < 0) {
2364 panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
2365 "lrh= %s\n", __func__, sb, sb->sb_flags, so,
2366 so->so_usecount, lr_saved, solockhistory_nr(so));
2367 /* NOTREACHED */
2368 }
2369
fe8ab488
A
2370 /*
2371 * The content filter thread must hold the sockbuf lock
2372 */
2373 if ((so->so_flags & SOF_CONTENT_FILTER) && sb->sb_cfil_thread == tp) {
2374 /*
2375 * Don't panic if we are defunct because SB_LOCK has
2376 * been cleared by sodefunct()
2377 */
0a7de745 2378 if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK)) {
fe8ab488 2379 panic("%s: SB_LOCK not held for %p\n",
3e170ce0 2380 __func__, sb);
0a7de745 2381 }
fe8ab488
A
2382
2383 /* Keep the sockbuf locked */
0a7de745 2384 return 0;
fe8ab488
A
2385 }
2386
0a7de745
A
2387 if ((sb->sb_flags & SB_LOCK) && !(flags & SBL_WAIT)) {
2388 return EWOULDBLOCK;
2389 }
39236c6e
A
2390 /*
2391 * We may get here from sorflush(), in which case "sb" may not
2392 * point to the real socket buffer. Use the actual socket buffer
2393 * address from the socket instead.
2394 */
2395 wchan = (sb->sb_flags & SB_RECV) ?
2396 &so->so_rcv.sb_flags : &so->so_snd.sb_flags;
2397
fe8ab488
A
2398 /*
2399 * A content filter thread has exclusive access to the sockbuf
2400 * until it clears the
2401 */
2402 while ((sb->sb_flags & SB_LOCK) ||
0a7de745
A
2403 ((so->so_flags & SOF_CONTENT_FILTER) &&
2404 sb->sb_cfil_thread != NULL)) {
39236c6e
A
2405 lck_mtx_t *mutex_held;
2406
2407 /*
2408 * XXX: This code should be moved up above outside of this loop;
2409 * however, we may get here as part of sofreelastref(), and
2410 * at that time pr_getlock() may no longer be able to return
2411 * us the lock. This will be fixed in future.
2412 */
0a7de745 2413 if (so->so_proto->pr_getlock != NULL) {
5ba3f43e 2414 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
0a7de745 2415 } else {
39236c6e 2416 mutex_held = so->so_proto->pr_domain->dom_mtx;
0a7de745 2417 }
39236c6e 2418
5ba3f43e 2419 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
39236c6e
A
2420
2421 sb->sb_wantlock++;
2422 VERIFY(sb->sb_wantlock != 0);
2423
2424 error = msleep(wchan, mutex_held,
2425 nointr ? PSOCK : PSOCK | PCATCH,
2426 nointr ? "sb_lock_nointr" : "sb_lock", NULL);
2427
2428 VERIFY(sb->sb_wantlock != 0);
2429 sb->sb_wantlock--;
2430
2431 if (error == 0 && (so->so_flags & SOF_DEFUNCT) &&
2432 !(flags & SBL_IGNDEFUNCT)) {
2433 error = EBADF;
39037602 2434 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] "
39236c6e 2435 "(%d)\n", __func__, proc_selfpid(),
39037602 2436 proc_best_name(current_proc()),
39236c6e 2437 (uint64_t)VM_KERNEL_ADDRPERM(so),
39037602 2438 SOCK_DOM(so), SOCK_TYPE(so), error);
39236c6e
A
2439 }
2440
0a7de745
A
2441 if (error != 0) {
2442 return error;
2443 }
39236c6e
A
2444 }
2445 sb->sb_flags |= SB_LOCK;
0a7de745 2446 return 0;
0b4e3aa0
A
2447}
2448
39236c6e
A
2449/*
2450 * Release lock on sockbuf sb
2451 */
0b4e3aa0 2452void
39236c6e 2453sbunlock(struct sockbuf *sb, boolean_t keeplocked)
0b4e3aa0 2454{
39236c6e 2455 void *lr_saved = __builtin_return_address(0);
91447636 2456 struct socket *so = sb->sb_so;
fe8ab488 2457 thread_t tp = current_thread();
91447636 2458
39236c6e
A
2459 /* so_usecount may be 0 if we get here from sofreelastref() */
2460 if (so == NULL) {
2461 panic("%s: null so, sb=%p sb_flags=0x%x lr=%p\n",
2462 __func__, sb, sb->sb_flags, lr_saved);
2463 /* NOTREACHED */
2464 } else if (so->so_usecount < 0) {
2465 panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
2466 "lrh= %s\n", __func__, sb, sb->sb_flags, so,
2467 so->so_usecount, lr_saved, solockhistory_nr(so));
2468 /* NOTREACHED */
2469 }
91447636 2470
fe8ab488
A
2471 /*
2472 * The content filter thread must hold the sockbuf lock
2473 */
2474 if ((so->so_flags & SOF_CONTENT_FILTER) && sb->sb_cfil_thread == tp) {
39236c6e 2475 /*
fe8ab488
A
2476 * Don't panic if we are defunct because SB_LOCK has
2477 * been cleared by sodefunct()
39236c6e 2478 */
fe8ab488 2479 if (!(so->so_flags & SOF_DEFUNCT) &&
3e170ce0 2480 !(sb->sb_flags & SB_LOCK) &&
fe8ab488
A
2481 !(so->so_state & SS_DEFUNCT) &&
2482 !(so->so_flags1 & SOF1_DEFUNCTINPROG)) {
2483 panic("%s: SB_LOCK not held for %p\n",
3e170ce0 2484 __func__, sb);
fe8ab488 2485 }
3e170ce0 2486 /* Keep the sockbuf locked and proceed */
fe8ab488
A
2487 } else {
2488 VERIFY((sb->sb_flags & SB_LOCK) ||
3e170ce0
A
2489 (so->so_state & SS_DEFUNCT) ||
2490 (so->so_flags1 & SOF1_DEFUNCTINPROG));
fe8ab488
A
2491
2492 sb->sb_flags &= ~SB_LOCK;
2493
2494 if (sb->sb_wantlock > 0) {
2495 /*
3e170ce0
A
2496 * We may get here from sorflush(), in which case "sb"
2497 * may not point to the real socket buffer. Use the
2498 * actual socket buffer address from the socket instead.
fe8ab488
A
2499 */
2500 wakeup((sb->sb_flags & SB_RECV) ? &so->so_rcv.sb_flags :
2501 &so->so_snd.sb_flags);
2502 }
2d21ac55 2503 }
39236c6e 2504
0a7de745 2505 if (!keeplocked) { /* unlock on exit */
cb323159
A
2506 if (so->so_flags & SOF_MP_SUBFLOW || SOCK_DOM(so) == PF_MULTIPATH) {
2507 (*so->so_proto->pr_unlock)(so, 1, lr_saved);
0a7de745 2508 } else {
cb323159 2509 lck_mtx_t *mutex_held;
b0d623f7 2510
cb323159
A
2511 if (so->so_proto->pr_getlock != NULL) {
2512 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
2513 } else {
2514 mutex_held = so->so_proto->pr_domain->dom_mtx;
2515 }
0c530ab8 2516
cb323159
A
2517 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
2518
2519 VERIFY(so->so_usecount > 0);
2520 so->so_usecount--;
2521 so->unlock_lr[so->next_unlock_lr] = lr_saved;
2522 so->next_unlock_lr = (so->next_unlock_lr + 1) % SO_LCKDBG_MAX;
2523 lck_mtx_unlock(mutex_held);
2524 }
91447636 2525 }
0b4e3aa0
A
2526}
2527
2528void
2d21ac55 2529sorwakeup(struct socket *so)
0b4e3aa0 2530{
0a7de745 2531 if (sb_notify(&so->so_rcv)) {
cb323159 2532 sowakeup(so, &so->so_rcv, NULL);
0a7de745 2533 }
0b4e3aa0
A
2534}
2535
2536void
2d21ac55 2537sowwakeup(struct socket *so)
0b4e3aa0 2538{
0a7de745 2539 if (sb_notify(&so->so_snd)) {
cb323159 2540 sowakeup(so, &so->so_snd, NULL);
0a7de745 2541 }
0b4e3aa0 2542}
316670eb
A
2543
2544void
2545soevent(struct socket *so, long hint)
2546{
0a7de745 2547 if (so->so_flags & SOF_KNOTE) {
316670eb 2548 KNOTE(&so->so_klist, hint);
0a7de745 2549 }
39236c6e
A
2550
2551 soevupcall(so, hint);
2552
fe8ab488
A
2553 /*
2554 * Don't post an event if this a subflow socket or
2555 * the app has opted out of using cellular interface
2556 */
3e170ce0 2557 if ((hint & SO_FILT_HINT_IFDENIED) &&
fe8ab488
A
2558 !(so->so_flags & SOF_MP_SUBFLOW) &&
2559 !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR) &&
cb323159
A
2560 !(so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE) &&
2561 !(so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED)) {
39236c6e 2562 soevent_ifdenied(so);
0a7de745 2563 }
316670eb
A
2564}
2565
39236c6e 2566void
f427ee49 2567soevupcall(struct socket *so, long hint)
39236c6e 2568{
fe8ab488 2569 if (so->so_event != NULL) {
39236c6e
A
2570 caddr_t so_eventarg = so->so_eventarg;
2571
2572 hint &= so->so_eventmask;
0a7de745 2573 if (hint != 0) {
39236c6e 2574 so->so_event(so, so_eventarg, hint);
0a7de745 2575 }
39236c6e
A
2576 }
2577}
2578
2579static void
2580soevent_ifdenied(struct socket *so)
2581{
2582 struct kev_netpolicy_ifdenied ev_ifdenied;
2583
0a7de745 2584 bzero(&ev_ifdenied, sizeof(ev_ifdenied));
39236c6e
A
2585 /*
2586 * The event consumer is interested about the effective {upid,pid,uuid}
2587 * info which can be different than the those related to the process
2588 * that recently performed a system call on the socket, i.e. when the
2589 * socket is delegated.
2590 */
2591 if (so->so_flags & SOF_DELEGATED) {
2592 ev_ifdenied.ev_data.eupid = so->e_upid;
2593 ev_ifdenied.ev_data.epid = so->e_pid;
2594 uuid_copy(ev_ifdenied.ev_data.euuid, so->e_uuid);
2595 } else {
2596 ev_ifdenied.ev_data.eupid = so->last_upid;
2597 ev_ifdenied.ev_data.epid = so->last_pid;
2598 uuid_copy(ev_ifdenied.ev_data.euuid, so->last_uuid);
2599 }
2600
2601 if (++so->so_ifdenied_notifies > 1) {
2602 /*
2603 * Allow for at most one kernel event to be generated per
2604 * socket; so_ifdenied_notifies is reset upon changes in
2605 * the UUID policy. See comments in inp_update_policy.
2606 */
2607 if (net_io_policy_log) {
2608 uuid_string_t buf;
2609
2610 uuid_unparse(ev_ifdenied.ev_data.euuid, buf);
f427ee49 2611 log(LOG_DEBUG, "%s[%d]: so 0x%llx [%d,%d] epid %llu "
39236c6e
A
2612 "euuid %s%s has %d redundant events supressed\n",
2613 __func__, so->last_pid,
2614 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
2615 SOCK_TYPE(so), ev_ifdenied.ev_data.epid, buf,
2616 ((so->so_flags & SOF_DELEGATED) ?
2617 " [delegated]" : ""), so->so_ifdenied_notifies);
2618 }
2619 } else {
2620 if (net_io_policy_log) {
2621 uuid_string_t buf;
2622
2623 uuid_unparse(ev_ifdenied.ev_data.euuid, buf);
f427ee49 2624 log(LOG_DEBUG, "%s[%d]: so 0x%llx [%d,%d] epid %llu "
39236c6e
A
2625 "euuid %s%s event posted\n", __func__,
2626 so->last_pid, (uint64_t)VM_KERNEL_ADDRPERM(so),
2627 SOCK_DOM(so), SOCK_TYPE(so),
2628 ev_ifdenied.ev_data.epid, buf,
2629 ((so->so_flags & SOF_DELEGATED) ?
2630 " [delegated]" : ""));
2631 }
2632 netpolicy_post_msg(KEV_NETPOLICY_IFDENIED, &ev_ifdenied.ev_data,
0a7de745 2633 sizeof(ev_ifdenied));
39236c6e
A
2634 }
2635}
0b4e3aa0 2636
1c79356b
A
2637/*
2638 * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
2639 */
2640struct sockaddr *
2d21ac55 2641dup_sockaddr(struct sockaddr *sa, int canwait)
1c79356b
A
2642{
2643 struct sockaddr *sa2;
2644
2d21ac55
A
2645 MALLOC(sa2, struct sockaddr *, sa->sa_len, M_SONAME,
2646 canwait ? M_WAITOK : M_NOWAIT);
0a7de745 2647 if (sa2) {
1c79356b 2648 bcopy(sa, sa2, sa->sa_len);
0a7de745
A
2649 }
2650 return sa2;
1c79356b
A
2651}
2652
2653/*
2654 * Create an external-format (``xsocket'') structure using the information
2655 * in the kernel-format socket structure pointed to by so. This is done
2656 * to reduce the spew of irrelevant information over this interface,
2657 * to isolate user code from changes in the kernel structure, and
2658 * potentially to provide information-hiding if we decide that
2659 * some of this information should be hidden from users.
2660 */
2661void
2662sotoxsocket(struct socket *so, struct xsocket *xso)
2663{
0a7de745 2664 xso->xso_len = sizeof(*xso);
316670eb 2665 xso->xso_so = (_XSOCKET_PTR(struct socket *))VM_KERNEL_ADDRPERM(so);
1c79356b 2666 xso->so_type = so->so_type;
316670eb 2667 xso->so_options = (short)(so->so_options & 0xffff);
1c79356b
A
2668 xso->so_linger = so->so_linger;
2669 xso->so_state = so->so_state;
316670eb 2670 xso->so_pcb = (_XSOCKET_PTR(caddr_t))VM_KERNEL_ADDRPERM(so->so_pcb);
91447636 2671 if (so->so_proto) {
39236c6e
A
2672 xso->xso_protocol = SOCK_PROTO(so);
2673 xso->xso_family = SOCK_DOM(so);
2d21ac55 2674 } else {
91447636 2675 xso->xso_protocol = xso->xso_family = 0;
2d21ac55 2676 }
1c79356b
A
2677 xso->so_qlen = so->so_qlen;
2678 xso->so_incqlen = so->so_incqlen;
2679 xso->so_qlimit = so->so_qlimit;
2680 xso->so_timeo = so->so_timeo;
2681 xso->so_error = so->so_error;
2682 xso->so_pgid = so->so_pgid;
2683 xso->so_oobmark = so->so_oobmark;
2684 sbtoxsockbuf(&so->so_snd, &xso->so_snd);
2685 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
316670eb 2686 xso->so_uid = kauth_cred_getuid(so->so_cred);
1c79356b
A
2687}
2688
b0d623f7 2689
f427ee49 2690#if XNU_TARGET_OS_OSX
b0d623f7
A
2691
2692void
2693sotoxsocket64(struct socket *so, struct xsocket64 *xso)
2694{
0a7de745 2695 xso->xso_len = sizeof(*xso);
39236c6e
A
2696 xso->xso_so = (u_int64_t)VM_KERNEL_ADDRPERM(so);
2697 xso->so_type = so->so_type;
2698 xso->so_options = (short)(so->so_options & 0xffff);
2699 xso->so_linger = so->so_linger;
2700 xso->so_state = so->so_state;
2701 xso->so_pcb = (u_int64_t)VM_KERNEL_ADDRPERM(so->so_pcb);
2702 if (so->so_proto) {
2703 xso->xso_protocol = SOCK_PROTO(so);
2704 xso->xso_family = SOCK_DOM(so);
2705 } else {
2706 xso->xso_protocol = xso->xso_family = 0;
2707 }
2708 xso->so_qlen = so->so_qlen;
2709 xso->so_incqlen = so->so_incqlen;
2710 xso->so_qlimit = so->so_qlimit;
2711 xso->so_timeo = so->so_timeo;
2712 xso->so_error = so->so_error;
2713 xso->so_pgid = so->so_pgid;
2714 xso->so_oobmark = so->so_oobmark;
2715 sbtoxsockbuf(&so->so_snd, &xso->so_snd);
2716 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
2717 xso->so_uid = kauth_cred_getuid(so->so_cred);
2718}
2719
f427ee49 2720#endif /* XNU_TARGET_OS_OSX */
b0d623f7 2721
1c79356b
A
2722/*
2723 * This does the same for sockbufs. Note that the xsockbuf structure,
2724 * since it is always embedded in a socket, does not include a self
2725 * pointer nor a length. We make this entry point public in case
2726 * some other mechanism needs it.
2727 */
2728void
2729sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
2730{
2731 xsb->sb_cc = sb->sb_cc;
2732 xsb->sb_hiwat = sb->sb_hiwat;
2733 xsb->sb_mbcnt = sb->sb_mbcnt;
2734 xsb->sb_mbmax = sb->sb_mbmax;
2735 xsb->sb_lowat = sb->sb_lowat;
f427ee49 2736 xsb->sb_flags = (short)sb->sb_flags;
b0d623f7 2737 xsb->sb_timeo = (short)
f427ee49 2738 ((sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick);
0a7de745 2739 if (xsb->sb_timeo == 0 && sb->sb_timeo.tv_usec != 0) {
91447636 2740 xsb->sb_timeo = 1;
0a7de745 2741 }
1c79356b
A
2742}
2743
316670eb
A
2744/*
2745 * Based on the policy set by an all knowing decison maker, throttle sockets
2746 * that either have been marked as belonging to "background" process.
2747 */
3e170ce0 2748inline int
316670eb 2749soisthrottled(struct socket *so)
d1ecb069 2750{
0a7de745 2751 return so->so_flags1 & SOF1_TRAFFIC_MGT_SO_BACKGROUND;
d1ecb069
A
2752}
2753
3e170ce0 2754inline int
316670eb
A
2755soisprivilegedtraffic(struct socket *so)
2756{
0a7de745 2757 return (so->so_flags & SOF_PRIVILEGED_TRAFFIC_CLASS) ? 1 : 0;
39236c6e
A
2758}
2759
3e170ce0 2760inline int
39236c6e
A
2761soissrcbackground(struct socket *so)
2762{
0a7de745
A
2763 return (so->so_flags1 & SOF1_TRAFFIC_MGT_SO_BACKGROUND) ||
2764 IS_SO_TC_BACKGROUND(so->so_traffic_class);
316670eb 2765}
d41d1dae 2766
3e170ce0 2767inline int
fe8ab488
A
2768soissrcrealtime(struct socket *so)
2769{
0a7de745
A
2770 return so->so_traffic_class >= SO_TC_AV &&
2771 so->so_traffic_class <= SO_TC_VO;
3e170ce0
A
2772}
2773
2774inline int
2775soissrcbesteffort(struct socket *so)
2776{
0a7de745
A
2777 return so->so_traffic_class == SO_TC_BE ||
2778 so->so_traffic_class == SO_TC_RD ||
2779 so->so_traffic_class == SO_TC_OAM;
fe8ab488
A
2780}
2781
5ba3f43e
A
2782void
2783soclearfastopen(struct socket *so)
2784{
0a7de745 2785 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
5ba3f43e 2786 so->so_flags1 &= ~SOF1_PRECONNECT_DATA;
0a7de745 2787 }
5ba3f43e 2788
0a7de745 2789 if (so->so_flags1 & SOF1_DATA_IDEMPOTENT) {
5ba3f43e 2790 so->so_flags1 &= ~SOF1_DATA_IDEMPOTENT;
0a7de745 2791 }
5ba3f43e
A
2792}
2793
fe8ab488 2794void
f427ee49 2795sonullevent(struct socket *so, void *arg, long hint)
fe8ab488
A
2796{
2797#pragma unused(so, arg, hint)
2798}
2799
1c79356b
A
2800/*
2801 * Here is the definition of some of the basic objects in the kern.ipc
2802 * branch of the MIB.
2803 */
39236c6e 2804SYSCTL_NODE(_kern, KERN_IPC, ipc,
0a7de745 2805 CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, 0, "IPC");
1c79356b 2806
b0d623f7
A
2807/* Check that the maximum socket buffer size is within a range */
2808
2809static int
39236c6e 2810sysctl_sb_max SYSCTL_HANDLER_ARGS
b0d623f7 2811{
39236c6e 2812#pragma unused(oidp, arg1, arg2)
b0d623f7
A
2813 u_int32_t new_value;
2814 int changed = 0;
0a7de745 2815 int error = sysctl_io_number(req, sb_max, sizeof(u_int32_t),
39236c6e 2816 &new_value, &changed);
b0d623f7 2817 if (!error && changed) {
39236c6e 2818 if (new_value > LOW_SB_MAX && new_value <= high_sb_max) {
b0d623f7
A
2819 sb_max = new_value;
2820 } else {
2821 error = ERANGE;
2822 }
2823 }
0a7de745 2824 return error;
b0d623f7
A
2825}
2826
39236c6e 2827SYSCTL_PROC(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf,
0a7de745
A
2828 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
2829 &sb_max, 0, &sysctl_sb_max, "IU", "Maximum socket buffer size");
39236c6e 2830
39236c6e 2831SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor,
0a7de745 2832 CTLFLAG_RW | CTLFLAG_LOCKED, &sb_efficiency, 0, "");
39236c6e
A
2833
2834SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters,
0a7de745 2835 CTLFLAG_RD | CTLFLAG_LOCKED, &nmbclusters, 0, "");
39236c6e
A
2836
2837SYSCTL_INT(_kern_ipc, OID_AUTO, njcl,
0a7de745 2838 CTLFLAG_RD | CTLFLAG_LOCKED, &njcl, 0, "");
39236c6e
A
2839
2840SYSCTL_INT(_kern_ipc, OID_AUTO, njclbytes,
0a7de745 2841 CTLFLAG_RD | CTLFLAG_LOCKED, &njclbytes, 0, "");
39236c6e
A
2842
2843SYSCTL_INT(_kern_ipc, KIPC_SOQLIMITCOMPAT, soqlimitcompat,
0a7de745
A
2844 CTLFLAG_RW | CTLFLAG_LOCKED, &soqlimitcompat, 1,
2845 "Enable socket queue limit compatibility");
1c79356b 2846
5ba3f43e
A
2847/*
2848 * Hack alert -- rdar://33572856
2849 * A loopback test we cannot change was failing because it sets
2850 * SO_SENDTIMEO to 5 seconds and that's also the value
2851 * of the minimum persist timer. Because of the persist timer,
2852 * the connection was not idle for 5 seconds and SO_SNDTIMEO
2853 * was not triggering at 5 seconds causing the test failure.
2854 * As a workaround we check the sysctl soqlencomp the test is already
2855 * setting to set disable auto tuning of the receive buffer.
2856 */
2857
2858extern u_int32_t tcp_do_autorcvbuf;
2859
2860static int
2861sysctl_soqlencomp SYSCTL_HANDLER_ARGS
2862{
2863#pragma unused(oidp, arg1, arg2)
2864 u_int32_t new_value;
2865 int changed = 0;
0a7de745 2866 int error = sysctl_io_number(req, soqlencomp, sizeof(u_int32_t),
5ba3f43e
A
2867 &new_value, &changed);
2868 if (!error && changed) {
2869 soqlencomp = new_value;
2870 if (new_value != 0) {
2871 tcp_do_autorcvbuf = 0;
2872 tcptv_persmin_val = 6 * TCP_RETRANSHZ;
2873 }
2874 }
0a7de745 2875 return error;
5ba3f43e
A
2876}
2877SYSCTL_PROC(_kern_ipc, OID_AUTO, soqlencomp,
0a7de745
A
2878 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
2879 &soqlencomp, 0, &sysctl_soqlencomp, "IU", "");
316670eb 2880
39037602 2881SYSCTL_INT(_kern_ipc, OID_AUTO, sbmb_cnt, CTLFLAG_RD | CTLFLAG_LOCKED,
0a7de745 2882 &total_sbmb_cnt, 0, "");
39037602 2883SYSCTL_INT(_kern_ipc, OID_AUTO, sbmb_cnt_peak, CTLFLAG_RD | CTLFLAG_LOCKED,
0a7de745 2884 &total_sbmb_cnt_peak, 0, "");
39037602 2885SYSCTL_INT(_kern_ipc, OID_AUTO, sbmb_cnt_floor, CTLFLAG_RD | CTLFLAG_LOCKED,
0a7de745 2886 &total_sbmb_cnt_floor, 0, "");
39037602 2887SYSCTL_QUAD(_kern_ipc, OID_AUTO, sbmb_limreached, CTLFLAG_RD | CTLFLAG_LOCKED,
0a7de745 2888 &sbmb_limreached, "");
39037602
A
2889
2890
316670eb
A
2891SYSCTL_NODE(_kern_ipc, OID_AUTO, io_policy, CTLFLAG_RW, 0, "network IO policy");
2892
39236c6e 2893SYSCTL_INT(_kern_ipc_io_policy, OID_AUTO, log, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 2894 &net_io_policy_log, 0, "");
39236c6e
A
2895
2896#if CONFIG_PROC_UUID_POLICY
2897SYSCTL_INT(_kern_ipc_io_policy, OID_AUTO, uuid, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 2898 &net_io_policy_uuid, 0, "");
39236c6e 2899#endif /* CONFIG_PROC_UUID_POLICY */