]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_socket2.c
xnu-7195.81.3.tar.gz
[apple/xnu.git] / bsd / kern / uipc_socket2.c
1 /*
2 * Copyright (c) 1998-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1988, 1990, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the University of
44 * California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
62 */
63 /*
64 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
65 * support for mandatory and extensible security protections. This notice
66 * is included in support of clause 2.2 (b) of the Apple Public License,
67 * Version 2.0.
68 */
69
70 #include <sys/param.h>
71 #include <sys/systm.h>
72 #include <sys/domain.h>
73 #include <sys/kernel.h>
74 #include <sys/proc_internal.h>
75 #include <sys/kauth.h>
76 #include <sys/malloc.h>
77 #include <sys/mbuf.h>
78 #include <sys/mcache.h>
79 #include <sys/protosw.h>
80 #include <sys/stat.h>
81 #include <sys/socket.h>
82 #include <sys/socketvar.h>
83 #include <sys/signalvar.h>
84 #include <sys/sysctl.h>
85 #include <sys/syslog.h>
86 #include <sys/unpcb.h>
87 #include <sys/ev.h>
88 #include <kern/locks.h>
89 #include <net/route.h>
90 #include <net/content_filter.h>
91 #include <netinet/in.h>
92 #include <netinet/in_pcb.h>
93 #include <netinet/tcp_var.h>
94 #include <sys/kdebug.h>
95 #include <libkern/OSAtomic.h>
96
97 #if CONFIG_MACF
98 #include <security/mac_framework.h>
99 #endif
100
101 #include <mach/vm_param.h>
102
103 #if MPTCP
104 #include <netinet/mptcp_var.h>
105 #endif
106
107 #define DBG_FNC_SBDROP NETDBG_CODE(DBG_NETSOCK, 4)
108 #define DBG_FNC_SBAPPEND NETDBG_CODE(DBG_NETSOCK, 5)
109
110 SYSCTL_DECL(_kern_ipc);
111
112 __private_extern__ u_int32_t net_io_policy_throttle_best_effort = 0;
113 SYSCTL_INT(_kern_ipc, OID_AUTO, throttle_best_effort,
114 CTLFLAG_RW | CTLFLAG_LOCKED, &net_io_policy_throttle_best_effort, 0, "");
115
116 static inline void sbcompress(struct sockbuf *, struct mbuf *, struct mbuf *);
117 static struct socket *sonewconn_internal(struct socket *, int);
118 static int sbappendcontrol_internal(struct sockbuf *, struct mbuf *,
119 struct mbuf *);
120 static void soevent_ifdenied(struct socket *);
121
122 static int sbappendrecord_common(struct sockbuf *sb, struct mbuf *m0, boolean_t nodrop);
123 static int sbappend_common(struct sockbuf *sb, struct mbuf *m, boolean_t nodrop);
124
125 /*
126 * Primitive routines for operating on sockets and socket buffers
127 */
128 static int soqlimitcompat = 1;
129 static int soqlencomp = 0;
130
131 /*
132 * Based on the number of mbuf clusters configured, high_sb_max and sb_max can
133 * get scaled up or down to suit that memory configuration. high_sb_max is a
134 * higher limit on sb_max that is checked when sb_max gets set through sysctl.
135 */
136
137 u_int32_t sb_max = SB_MAX; /* XXX should be static */
138 u_int32_t high_sb_max = SB_MAX;
139
140 static u_int32_t sb_efficiency = 8; /* parameter for sbreserve() */
141 int32_t total_sbmb_cnt __attribute__((aligned(8))) = 0;
142 int32_t total_sbmb_cnt_floor __attribute__((aligned(8))) = 0;
143 int32_t total_sbmb_cnt_peak __attribute__((aligned(8))) = 0;
144 int64_t sbmb_limreached __attribute__((aligned(8))) = 0;
145
146 u_int32_t net_io_policy_log = 0; /* log socket policy changes */
147 #if CONFIG_PROC_UUID_POLICY
148 u_int32_t net_io_policy_uuid = 1; /* enable UUID socket policy */
149 #endif /* CONFIG_PROC_UUID_POLICY */
150
151 /*
152 * Procedures to manipulate state flags of socket
153 * and do appropriate wakeups. Normal sequence from the
154 * active (originating) side is that soisconnecting() is
155 * called during processing of connect() call,
156 * resulting in an eventual call to soisconnected() if/when the
157 * connection is established. When the connection is torn down
158 * soisdisconnecting() is called during processing of disconnect() call,
159 * and soisdisconnected() is called when the connection to the peer
160 * is totally severed. The semantics of these routines are such that
161 * connectionless protocols can call soisconnected() and soisdisconnected()
162 * only, bypassing the in-progress calls when setting up a ``connection''
163 * takes no time.
164 *
165 * From the passive side, a socket is created with
166 * two queues of sockets: so_incomp for connections in progress
167 * and so_comp for connections already made and awaiting user acceptance.
168 * As a protocol is preparing incoming connections, it creates a socket
169 * structure queued on so_incomp by calling sonewconn(). When the connection
170 * is established, soisconnected() is called, and transfers the
171 * socket structure to so_comp, making it available to accept().
172 *
173 * If a socket is closed with sockets on either
174 * so_incomp or so_comp, these sockets are dropped.
175 *
176 * If higher level protocols are implemented in
177 * the kernel, the wakeups done here will sometimes
178 * cause software-interrupt process scheduling.
179 */
180 void
181 soisconnecting(struct socket *so)
182 {
183 so->so_state &= ~(SS_ISCONNECTED | SS_ISDISCONNECTING);
184 so->so_state |= SS_ISCONNECTING;
185
186 sflt_notify(so, sock_evt_connecting, NULL);
187 }
188
189 void
190 soisconnected(struct socket *so)
191 {
192 /*
193 * If socket is subject to filter and is pending initial verdict,
194 * delay marking socket as connected and do not present the connected
195 * socket to user just yet.
196 */
197 if (cfil_sock_connected_pending_verdict(so)) {
198 return;
199 }
200
201 so->so_state &= ~(SS_ISCONNECTING | SS_ISDISCONNECTING | SS_ISCONFIRMING);
202 so->so_state |= SS_ISCONNECTED;
203
204 soreserve_preconnect(so, 0);
205
206 sflt_notify(so, sock_evt_connected, NULL);
207
208 if (so->so_head != NULL && (so->so_state & SS_INCOMP)) {
209 struct socket *head = so->so_head;
210 int locked = 0;
211
212 /*
213 * Enforce lock order when the protocol has per socket locks
214 */
215 if (head->so_proto->pr_getlock != NULL) {
216 socket_lock(head, 1);
217 so_acquire_accept_list(head, so);
218 locked = 1;
219 }
220 if (so->so_head == head && (so->so_state & SS_INCOMP)) {
221 so->so_state &= ~SS_INCOMP;
222 so->so_state |= SS_COMP;
223 TAILQ_REMOVE(&head->so_incomp, so, so_list);
224 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
225 head->so_incqlen--;
226
227 /*
228 * We have to release the accept list in
229 * case a socket callback calls sock_accept()
230 */
231 if (locked != 0) {
232 so_release_accept_list(head);
233 socket_unlock(so, 0);
234 }
235 sorwakeup(head);
236 wakeup_one((caddr_t)&head->so_timeo);
237
238 if (locked != 0) {
239 socket_unlock(head, 1);
240 socket_lock(so, 0);
241 }
242 } else if (locked != 0) {
243 so_release_accept_list(head);
244 socket_unlock(head, 1);
245 }
246 } else {
247 wakeup((caddr_t)&so->so_timeo);
248 sorwakeup(so);
249 sowwakeup(so);
250 soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNECTED |
251 SO_FILT_HINT_CONNINFO_UPDATED);
252 }
253 }
254
255 boolean_t
256 socanwrite(struct socket *so)
257 {
258 return (so->so_state & SS_ISCONNECTED) ||
259 !(so->so_proto->pr_flags & PR_CONNREQUIRED) ||
260 (so->so_flags1 & SOF1_PRECONNECT_DATA);
261 }
262
263 void
264 soisdisconnecting(struct socket *so)
265 {
266 so->so_state &= ~SS_ISCONNECTING;
267 so->so_state |= (SS_ISDISCONNECTING | SS_CANTRCVMORE | SS_CANTSENDMORE);
268 soevent(so, SO_FILT_HINT_LOCKED);
269 sflt_notify(so, sock_evt_disconnecting, NULL);
270 wakeup((caddr_t)&so->so_timeo);
271 sowwakeup(so);
272 sorwakeup(so);
273 }
274
275 void
276 soisdisconnected(struct socket *so)
277 {
278 so->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
279 so->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
280 soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_DISCONNECTED |
281 SO_FILT_HINT_CONNINFO_UPDATED);
282 sflt_notify(so, sock_evt_disconnected, NULL);
283 wakeup((caddr_t)&so->so_timeo);
284 sowwakeup(so);
285 sorwakeup(so);
286
287 #if CONTENT_FILTER
288 /* Notify content filters as soon as we cannot send/receive data */
289 cfil_sock_notify_shutdown(so, SHUT_RDWR);
290 #endif /* CONTENT_FILTER */
291 }
292
293 /*
294 * This function will issue a wakeup like soisdisconnected but it will not
295 * notify the socket filters. This will avoid unlocking the socket
296 * in the midst of closing it.
297 */
298 void
299 sodisconnectwakeup(struct socket *so)
300 {
301 so->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
302 so->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
303 soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_DISCONNECTED |
304 SO_FILT_HINT_CONNINFO_UPDATED);
305 wakeup((caddr_t)&so->so_timeo);
306 sowwakeup(so);
307 sorwakeup(so);
308
309 #if CONTENT_FILTER
310 /* Notify content filters as soon as we cannot send/receive data */
311 cfil_sock_notify_shutdown(so, SHUT_RDWR);
312 #endif /* CONTENT_FILTER */
313 }
314
315 /*
316 * When an attempt at a new connection is noted on a socket
317 * which accepts connections, sonewconn is called. If the
318 * connection is possible (subject to space constraints, etc.)
319 * then we allocate a new structure, propoerly linked into the
320 * data structure of the original socket, and return this.
321 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
322 */
323 static struct socket *
324 sonewconn_internal(struct socket *head, int connstatus)
325 {
326 int so_qlen, error = 0;
327 struct socket *so;
328 lck_mtx_t *mutex_held;
329
330 if (head->so_proto->pr_getlock != NULL) {
331 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
332 } else {
333 mutex_held = head->so_proto->pr_domain->dom_mtx;
334 }
335 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
336
337 if (!soqlencomp) {
338 /*
339 * This is the default case; so_qlen represents the
340 * sum of both incomplete and completed queues.
341 */
342 so_qlen = head->so_qlen;
343 } else {
344 /*
345 * When kern.ipc.soqlencomp is set to 1, so_qlen
346 * represents only the completed queue. Since we
347 * cannot let the incomplete queue goes unbounded
348 * (in case of SYN flood), we cap the incomplete
349 * queue length to at most somaxconn, and use that
350 * as so_qlen so that we fail immediately below.
351 */
352 so_qlen = head->so_qlen - head->so_incqlen;
353 if (head->so_incqlen > somaxconn) {
354 so_qlen = somaxconn;
355 }
356 }
357
358 if (so_qlen >=
359 (soqlimitcompat ? head->so_qlimit : (3 * head->so_qlimit / 2))) {
360 return (struct socket *)0;
361 }
362 so = soalloc(1, SOCK_DOM(head), head->so_type);
363 if (so == NULL) {
364 return (struct socket *)0;
365 }
366 /* check if head was closed during the soalloc */
367 if (head->so_proto == NULL) {
368 sodealloc(so);
369 return (struct socket *)0;
370 }
371
372 so->so_type = head->so_type;
373 so->so_options = head->so_options & ~SO_ACCEPTCONN;
374 so->so_linger = head->so_linger;
375 so->so_state = head->so_state | SS_NOFDREF;
376 so->so_proto = head->so_proto;
377 so->so_timeo = head->so_timeo;
378 so->so_pgid = head->so_pgid;
379 kauth_cred_ref(head->so_cred);
380 so->so_cred = head->so_cred;
381 so->last_pid = head->last_pid;
382 so->last_upid = head->last_upid;
383 memcpy(so->last_uuid, head->last_uuid, sizeof(so->last_uuid));
384 if (head->so_flags & SOF_DELEGATED) {
385 so->e_pid = head->e_pid;
386 so->e_upid = head->e_upid;
387 memcpy(so->e_uuid, head->e_uuid, sizeof(so->e_uuid));
388 }
389 /* inherit socket options stored in so_flags */
390 so->so_flags = head->so_flags &
391 (SOF_NOSIGPIPE | SOF_NOADDRAVAIL | SOF_REUSESHAREUID |
392 SOF_NOTIFYCONFLICT | SOF_BINDRANDOMPORT | SOF_NPX_SETOPTSHUT |
393 SOF_NODEFUNCT | SOF_PRIVILEGED_TRAFFIC_CLASS | SOF_NOTSENT_LOWAT |
394 SOF_DELEGATED);
395 so->so_flags1 |= SOF1_INBOUND;
396 so->so_usecount = 1;
397 so->next_lock_lr = 0;
398 so->next_unlock_lr = 0;
399
400 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
401 so->so_rcv.sb_so = so->so_snd.sb_so = so;
402
403 /* inherit traffic management properties of listener */
404 so->so_flags1 |=
405 head->so_flags1 & (SOF1_TRAFFIC_MGT_SO_BACKGROUND | SOF1_TC_NET_SERV_TYPE |
406 SOF1_QOSMARKING_ALLOWED | SOF1_QOSMARKING_POLICY_OVERRIDE);
407 so->so_background_thread = head->so_background_thread;
408 so->so_traffic_class = head->so_traffic_class;
409 so->so_netsvctype = head->so_netsvctype;
410
411 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
412 sodealloc(so);
413 return (struct socket *)0;
414 }
415 so->so_rcv.sb_flags |= (head->so_rcv.sb_flags & SB_USRSIZE);
416 so->so_snd.sb_flags |= (head->so_snd.sb_flags & SB_USRSIZE);
417
418 /*
419 * Must be done with head unlocked to avoid deadlock
420 * for protocol with per socket mutexes.
421 */
422 if (head->so_proto->pr_unlock) {
423 socket_unlock(head, 0);
424 }
425 if (((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL) != 0) ||
426 error) {
427 sodealloc(so);
428 if (head->so_proto->pr_unlock) {
429 socket_lock(head, 0);
430 }
431 return (struct socket *)0;
432 }
433 if (head->so_proto->pr_unlock) {
434 socket_lock(head, 0);
435 /*
436 * Radar 7385998 Recheck that the head is still accepting
437 * to avoid race condition when head is getting closed.
438 */
439 if ((head->so_options & SO_ACCEPTCONN) == 0) {
440 so->so_state &= ~SS_NOFDREF;
441 soclose(so);
442 return (struct socket *)0;
443 }
444 }
445
446 if (so->so_proto->pr_copy_last_owner != NULL) {
447 (*so->so_proto->pr_copy_last_owner)(so, head);
448 }
449 atomic_add_32(&so->so_proto->pr_domain->dom_refs, 1);
450
451 /* Insert in head appropriate lists */
452 so_acquire_accept_list(head, NULL);
453
454 so->so_head = head;
455
456 /*
457 * Since this socket is going to be inserted into the incomp
458 * queue, it can be picked up by another thread in
459 * tcp_dropdropablreq to get dropped before it is setup..
460 * To prevent this race, set in-progress flag which can be
461 * cleared later
462 */
463 so->so_flags |= SOF_INCOMP_INPROGRESS;
464
465 if (connstatus) {
466 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
467 so->so_state |= SS_COMP;
468 } else {
469 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
470 so->so_state |= SS_INCOMP;
471 head->so_incqlen++;
472 }
473 head->so_qlen++;
474
475 so_release_accept_list(head);
476
477 /* Attach socket filters for this protocol */
478 sflt_initsock(so);
479
480 if (connstatus) {
481 so->so_state |= connstatus;
482 sorwakeup(head);
483 wakeup((caddr_t)&head->so_timeo);
484 }
485 return so;
486 }
487
488
489 struct socket *
490 sonewconn(struct socket *head, int connstatus, const struct sockaddr *from)
491 {
492 int error = sflt_connectin(head, from);
493 if (error) {
494 return NULL;
495 }
496
497 return sonewconn_internal(head, connstatus);
498 }
499
500 /*
501 * Socantsendmore indicates that no more data will be sent on the
502 * socket; it would normally be applied to a socket when the user
503 * informs the system that no more data is to be sent, by the protocol
504 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
505 * will be received, and will normally be applied to the socket by a
506 * protocol when it detects that the peer will send no more data.
507 * Data queued for reading in the socket may yet be read.
508 */
509
510 void
511 socantsendmore(struct socket *so)
512 {
513 so->so_state |= SS_CANTSENDMORE;
514 soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CANTSENDMORE);
515 sflt_notify(so, sock_evt_cantsendmore, NULL);
516 sowwakeup(so);
517 }
518
519 void
520 socantrcvmore(struct socket *so)
521 {
522 so->so_state |= SS_CANTRCVMORE;
523 soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CANTRCVMORE);
524 sflt_notify(so, sock_evt_cantrecvmore, NULL);
525 sorwakeup(so);
526 }
527
528 /*
529 * Wait for data to arrive at/drain from a socket buffer.
530 */
531 int
532 sbwait(struct sockbuf *sb)
533 {
534 boolean_t nointr = (sb->sb_flags & SB_NOINTR);
535 void *lr_saved = __builtin_return_address(0);
536 struct socket *so = sb->sb_so;
537 lck_mtx_t *mutex_held;
538 struct timespec ts;
539 int error = 0;
540
541 if (so == NULL) {
542 panic("%s: null so, sb=%p sb_flags=0x%x lr=%p\n",
543 __func__, sb, sb->sb_flags, lr_saved);
544 /* NOTREACHED */
545 } else if (so->so_usecount < 1) {
546 panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
547 "lrh= %s\n", __func__, sb, sb->sb_flags, so,
548 so->so_usecount, lr_saved, solockhistory_nr(so));
549 /* NOTREACHED */
550 }
551
552 if ((so->so_state & SS_DRAINING) || (so->so_flags & SOF_DEFUNCT)) {
553 error = EBADF;
554 if (so->so_flags & SOF_DEFUNCT) {
555 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] "
556 "(%d)\n", __func__, proc_selfpid(),
557 proc_best_name(current_proc()),
558 (uint64_t)VM_KERNEL_ADDRPERM(so),
559 SOCK_DOM(so), SOCK_TYPE(so), error);
560 }
561 return error;
562 }
563
564 if (so->so_proto->pr_getlock != NULL) {
565 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
566 } else {
567 mutex_held = so->so_proto->pr_domain->dom_mtx;
568 }
569
570 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
571
572 ts.tv_sec = sb->sb_timeo.tv_sec;
573 ts.tv_nsec = sb->sb_timeo.tv_usec * 1000;
574
575 sb->sb_waiters++;
576 VERIFY(sb->sb_waiters != 0);
577
578 error = msleep((caddr_t)&sb->sb_cc, mutex_held,
579 nointr ? PSOCK : PSOCK | PCATCH,
580 nointr ? "sbwait_nointr" : "sbwait", &ts);
581
582 VERIFY(sb->sb_waiters != 0);
583 sb->sb_waiters--;
584
585 if (so->so_usecount < 1) {
586 panic("%s: 2 sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
587 "lrh= %s\n", __func__, sb, sb->sb_flags, so,
588 so->so_usecount, lr_saved, solockhistory_nr(so));
589 /* NOTREACHED */
590 }
591
592 if ((so->so_state & SS_DRAINING) || (so->so_flags & SOF_DEFUNCT)) {
593 error = EBADF;
594 if (so->so_flags & SOF_DEFUNCT) {
595 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] "
596 "(%d)\n", __func__, proc_selfpid(),
597 proc_best_name(current_proc()),
598 (uint64_t)VM_KERNEL_ADDRPERM(so),
599 SOCK_DOM(so), SOCK_TYPE(so), error);
600 }
601 }
602
603 return error;
604 }
605
606 void
607 sbwakeup(struct sockbuf *sb)
608 {
609 if (sb->sb_waiters > 0) {
610 wakeup((caddr_t)&sb->sb_cc);
611 }
612 }
613
614 /*
615 * Wakeup processes waiting on a socket buffer.
616 * Do asynchronous notification via SIGIO
617 * if the socket has the SS_ASYNC flag set.
618 */
619 void
620 sowakeup(struct socket *so, struct sockbuf *sb, struct socket *so2)
621 {
622 if (so->so_flags & SOF_DEFUNCT) {
623 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] si 0x%x, "
624 "fl 0x%x [%s]\n", __func__, proc_selfpid(),
625 proc_best_name(current_proc()),
626 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
627 SOCK_TYPE(so), (uint32_t)sb->sb_sel.si_flags, sb->sb_flags,
628 (sb->sb_flags & SB_RECV) ? "rcv" : "snd");
629 }
630
631 sb->sb_flags &= ~SB_SEL;
632 selwakeup(&sb->sb_sel);
633 sbwakeup(sb);
634 if (so->so_state & SS_ASYNC) {
635 if (so->so_pgid < 0) {
636 gsignal(-so->so_pgid, SIGIO);
637 } else if (so->so_pgid > 0) {
638 proc_signal(so->so_pgid, SIGIO);
639 }
640 }
641 if (sb->sb_flags & SB_KNOTE) {
642 KNOTE(&sb->sb_sel.si_note, SO_FILT_HINT_LOCKED);
643 }
644 if (sb->sb_flags & SB_UPCALL) {
645 void (*sb_upcall)(struct socket *, void *, int);
646 caddr_t sb_upcallarg;
647 int lock = !(sb->sb_flags & SB_UPCALL_LOCK);
648
649 sb_upcall = sb->sb_upcall;
650 sb_upcallarg = sb->sb_upcallarg;
651 /* Let close know that we're about to do an upcall */
652 so->so_upcallusecount++;
653
654 if (lock) {
655 if (so2) {
656 struct unpcb *unp = sotounpcb(so2);
657 unp->unp_flags |= UNP_DONTDISCONNECT;
658 unp->rw_thrcount++;
659
660 socket_unlock(so2, 0);
661 }
662 socket_unlock(so, 0);
663 }
664 (*sb_upcall)(so, sb_upcallarg, M_DONTWAIT);
665 if (lock) {
666 if (so2 && so > so2) {
667 struct unpcb *unp;
668 socket_lock(so2, 0);
669
670 unp = sotounpcb(so2);
671 unp->rw_thrcount--;
672 if (unp->rw_thrcount == 0) {
673 unp->unp_flags &= ~UNP_DONTDISCONNECT;
674 wakeup(unp);
675 }
676 }
677
678 socket_lock(so, 0);
679
680 if (so2 && so < so2) {
681 struct unpcb *unp;
682 socket_lock(so2, 0);
683
684 unp = sotounpcb(so2);
685 unp->rw_thrcount--;
686 if (unp->rw_thrcount == 0) {
687 unp->unp_flags &= ~UNP_DONTDISCONNECT;
688 wakeup(unp);
689 }
690 }
691 }
692
693 so->so_upcallusecount--;
694 /* Tell close that it's safe to proceed */
695 if ((so->so_flags & SOF_CLOSEWAIT) &&
696 so->so_upcallusecount == 0) {
697 wakeup((caddr_t)&so->so_upcallusecount);
698 }
699 }
700 #if CONTENT_FILTER
701 /*
702 * Trap disconnection events for content filters
703 */
704 if ((so->so_flags & SOF_CONTENT_FILTER) != 0) {
705 if ((sb->sb_flags & SB_RECV)) {
706 if (so->so_state & (SS_CANTRCVMORE)) {
707 cfil_sock_notify_shutdown(so, SHUT_RD);
708 }
709 } else {
710 if (so->so_state & (SS_CANTSENDMORE)) {
711 cfil_sock_notify_shutdown(so, SHUT_WR);
712 }
713 }
714 }
715 #endif /* CONTENT_FILTER */
716 }
717
718 /*
719 * Socket buffer (struct sockbuf) utility routines.
720 *
721 * Each socket contains two socket buffers: one for sending data and
722 * one for receiving data. Each buffer contains a queue of mbufs,
723 * information about the number of mbufs and amount of data in the
724 * queue, and other fields allowing select() statements and notification
725 * on data availability to be implemented.
726 *
727 * Data stored in a socket buffer is maintained as a list of records.
728 * Each record is a list of mbufs chained together with the m_next
729 * field. Records are chained together with the m_nextpkt field. The upper
730 * level routine soreceive() expects the following conventions to be
731 * observed when placing information in the receive buffer:
732 *
733 * 1. If the protocol requires each message be preceded by the sender's
734 * name, then a record containing that name must be present before
735 * any associated data (mbuf's must be of type MT_SONAME).
736 * 2. If the protocol supports the exchange of ``access rights'' (really
737 * just additional data associated with the message), and there are
738 * ``rights'' to be received, then a record containing this data
739 * should be present (mbuf's must be of type MT_RIGHTS).
740 * 3. If a name or rights record exists, then it must be followed by
741 * a data record, perhaps of zero length.
742 *
743 * Before using a new socket structure it is first necessary to reserve
744 * buffer space to the socket, by calling sbreserve(). This should commit
745 * some of the available buffer space in the system buffer pool for the
746 * socket (currently, it does nothing but enforce limits). The space
747 * should be released by calling sbrelease() when the socket is destroyed.
748 */
749
750 /*
751 * Returns: 0 Success
752 * ENOBUFS
753 */
754 int
755 soreserve(struct socket *so, uint32_t sndcc, uint32_t rcvcc)
756 {
757 /*
758 * We do not want to fail the creation of a socket
759 * when kern.ipc.maxsockbuf is less than the
760 * default socket buffer socket size of the protocol
761 * so force the buffer sizes to be at most the
762 * limit enforced by sbreserve()
763 */
764 uint64_t maxcc = (uint64_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES);
765 if (sndcc > maxcc) {
766 sndcc = (uint32_t)maxcc;
767 }
768 if (rcvcc > maxcc) {
769 rcvcc = (uint32_t)maxcc;
770 }
771 if (sbreserve(&so->so_snd, sndcc) == 0) {
772 goto bad;
773 } else {
774 so->so_snd.sb_idealsize = sndcc;
775 }
776
777 if (sbreserve(&so->so_rcv, rcvcc) == 0) {
778 goto bad2;
779 } else {
780 so->so_rcv.sb_idealsize = rcvcc;
781 }
782
783 if (so->so_rcv.sb_lowat == 0) {
784 so->so_rcv.sb_lowat = 1;
785 }
786 if (so->so_snd.sb_lowat == 0) {
787 so->so_snd.sb_lowat = MCLBYTES;
788 }
789 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) {
790 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
791 }
792 return 0;
793 bad2:
794 so->so_snd.sb_flags &= ~SB_SEL;
795 selthreadclear(&so->so_snd.sb_sel);
796 sbrelease(&so->so_snd);
797 bad:
798 return ENOBUFS;
799 }
800
801 void
802 soreserve_preconnect(struct socket *so, unsigned int pre_cc)
803 {
804 /* As of now, same bytes for both preconnect read and write */
805 so->so_snd.sb_preconn_hiwat = pre_cc;
806 so->so_rcv.sb_preconn_hiwat = pre_cc;
807 }
808
809 /*
810 * Allot mbufs to a sockbuf.
811 * Attempt to scale mbmax so that mbcnt doesn't become limiting
812 * if buffering efficiency is near the normal case.
813 */
814 int
815 sbreserve(struct sockbuf *sb, u_int32_t cc)
816 {
817 if ((u_quad_t)cc > (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES) ||
818 (cc > sb->sb_hiwat && (sb->sb_flags & SB_LIMITED))) {
819 return 0;
820 }
821 sb->sb_hiwat = cc;
822 sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
823 if (sb->sb_lowat > sb->sb_hiwat) {
824 sb->sb_lowat = sb->sb_hiwat;
825 }
826 return 1;
827 }
828
829 /*
830 * Free mbufs held by a socket, and reserved mbuf space.
831 */
832 /* WARNING needs to do selthreadclear() before calling this */
833 void
834 sbrelease(struct sockbuf *sb)
835 {
836 sbflush(sb);
837 sb->sb_hiwat = 0;
838 sb->sb_mbmax = 0;
839 }
840
841 /*
842 * Routines to add and remove
843 * data from an mbuf queue.
844 *
845 * The routines sbappend() or sbappendrecord() are normally called to
846 * append new mbufs to a socket buffer, after checking that adequate
847 * space is available, comparing the function sbspace() with the amount
848 * of data to be added. sbappendrecord() differs from sbappend() in
849 * that data supplied is treated as the beginning of a new record.
850 * To place a sender's address, optional access rights, and data in a
851 * socket receive buffer, sbappendaddr() should be used. To place
852 * access rights and data in a socket receive buffer, sbappendrights()
853 * should be used. In either case, the new data begins a new record.
854 * Note that unlike sbappend() and sbappendrecord(), these routines check
855 * for the caller that there will be enough space to store the data.
856 * Each fails if there is not enough space, or if it cannot find mbufs
857 * to store additional information in.
858 *
859 * Reliable protocols may use the socket send buffer to hold data
860 * awaiting acknowledgement. Data is normally copied from a socket
861 * send buffer in a protocol with m_copy for output to a peer,
862 * and then removing the data from the socket buffer with sbdrop()
863 * or sbdroprecord() when the data is acknowledged by the peer.
864 */
865
866 /*
867 * Append mbuf chain m to the last record in the
868 * socket buffer sb. The additional space associated
869 * the mbuf chain is recorded in sb. Empty mbufs are
870 * discarded and mbufs are compacted where possible.
871 */
872 static int
873 sbappend_common(struct sockbuf *sb, struct mbuf *m, boolean_t nodrop)
874 {
875 struct socket *so = sb->sb_so;
876
877 if (m == NULL || (sb->sb_flags & SB_DROP)) {
878 if (m != NULL && !nodrop) {
879 m_freem(m);
880 }
881 return 0;
882 }
883
884 SBLASTRECORDCHK(sb, "sbappend 1");
885
886 if (sb->sb_lastrecord != NULL && (sb->sb_mbtail->m_flags & M_EOR)) {
887 return sbappendrecord_common(sb, m, nodrop);
888 }
889
890 if (SOCK_DOM(sb->sb_so) == PF_INET || SOCK_DOM(sb->sb_so) == PF_INET6) {
891 ASSERT(nodrop == FALSE);
892 if (sb->sb_flags & SB_RECV && !(m && m->m_flags & M_SKIPCFIL)) {
893 int error = sflt_data_in(so, NULL, &m, NULL, 0);
894 SBLASTRECORDCHK(sb, "sbappend 2");
895
896 #if CONTENT_FILTER
897 if (error == 0) {
898 error = cfil_sock_data_in(so, NULL, m, NULL, 0);
899 }
900 #endif /* CONTENT_FILTER */
901
902 if (error != 0) {
903 if (error != EJUSTRETURN) {
904 m_freem(m);
905 }
906 return 0;
907 }
908 } else if (m) {
909 m->m_flags &= ~M_SKIPCFIL;
910 }
911 }
912
913 /* If this is the first record, it's also the last record */
914 if (sb->sb_lastrecord == NULL) {
915 sb->sb_lastrecord = m;
916 }
917
918 sbcompress(sb, m, sb->sb_mbtail);
919 SBLASTRECORDCHK(sb, "sbappend 3");
920 return 1;
921 }
922
923 int
924 sbappend(struct sockbuf *sb, struct mbuf *m)
925 {
926 return sbappend_common(sb, m, FALSE);
927 }
928
929 int
930 sbappend_nodrop(struct sockbuf *sb, struct mbuf *m)
931 {
932 return sbappend_common(sb, m, TRUE);
933 }
934
935 /*
936 * Similar to sbappend, except that this is optimized for stream sockets.
937 */
938 int
939 sbappendstream(struct sockbuf *sb, struct mbuf *m)
940 {
941 struct socket *so = sb->sb_so;
942
943 if (m == NULL || (sb->sb_flags & SB_DROP)) {
944 if (m != NULL) {
945 m_freem(m);
946 }
947 return 0;
948 }
949
950 if (m->m_nextpkt != NULL || (sb->sb_mb != sb->sb_lastrecord)) {
951 panic("sbappendstream: nexpkt %p || mb %p != lastrecord %p\n",
952 m->m_nextpkt, sb->sb_mb, sb->sb_lastrecord);
953 /* NOTREACHED */
954 }
955
956 SBLASTMBUFCHK(sb, __func__);
957
958 if (SOCK_DOM(sb->sb_so) == PF_INET || SOCK_DOM(sb->sb_so) == PF_INET6) {
959 if (sb->sb_flags & SB_RECV && !(m && m->m_flags & M_SKIPCFIL)) {
960 int error = sflt_data_in(so, NULL, &m, NULL, 0);
961 SBLASTRECORDCHK(sb, "sbappendstream 1");
962
963 #if CONTENT_FILTER
964 if (error == 0) {
965 error = cfil_sock_data_in(so, NULL, m, NULL, 0);
966 }
967 #endif /* CONTENT_FILTER */
968
969 if (error != 0) {
970 if (error != EJUSTRETURN) {
971 m_freem(m);
972 }
973 return 0;
974 }
975 } else if (m) {
976 m->m_flags &= ~M_SKIPCFIL;
977 }
978 }
979
980 sbcompress(sb, m, sb->sb_mbtail);
981 sb->sb_lastrecord = sb->sb_mb;
982 SBLASTRECORDCHK(sb, "sbappendstream 2");
983 return 1;
984 }
985
986 #ifdef SOCKBUF_DEBUG
987 void
988 sbcheck(struct sockbuf *sb)
989 {
990 struct mbuf *m;
991 struct mbuf *n = 0;
992 u_int32_t len = 0, mbcnt = 0;
993 lck_mtx_t *mutex_held;
994
995 if (sb->sb_so->so_proto->pr_getlock != NULL) {
996 mutex_held = (*sb->sb_so->so_proto->pr_getlock)(sb->sb_so, 0);
997 } else {
998 mutex_held = sb->sb_so->so_proto->pr_domain->dom_mtx;
999 }
1000
1001 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1002
1003 if (sbchecking == 0) {
1004 return;
1005 }
1006
1007 for (m = sb->sb_mb; m; m = n) {
1008 n = m->m_nextpkt;
1009 for (; m; m = m->m_next) {
1010 len += m->m_len;
1011 mbcnt += MSIZE;
1012 /* XXX pretty sure this is bogus */
1013 if (m->m_flags & M_EXT) {
1014 mbcnt += m->m_ext.ext_size;
1015 }
1016 }
1017 }
1018 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
1019 panic("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
1020 mbcnt, sb->sb_mbcnt);
1021 }
1022 }
1023 #endif
1024
1025 void
1026 sblastrecordchk(struct sockbuf *sb, const char *where)
1027 {
1028 struct mbuf *m = sb->sb_mb;
1029
1030 while (m && m->m_nextpkt) {
1031 m = m->m_nextpkt;
1032 }
1033
1034 if (m != sb->sb_lastrecord) {
1035 printf("sblastrecordchk: mb 0x%llx lastrecord 0x%llx "
1036 "last 0x%llx\n",
1037 (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_mb),
1038 (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_lastrecord),
1039 (uint64_t)VM_KERNEL_ADDRPERM(m));
1040 printf("packet chain:\n");
1041 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
1042 printf("\t0x%llx\n", (uint64_t)VM_KERNEL_ADDRPERM(m));
1043 }
1044 panic("sblastrecordchk from %s", where);
1045 }
1046 }
1047
1048 void
1049 sblastmbufchk(struct sockbuf *sb, const char *where)
1050 {
1051 struct mbuf *m = sb->sb_mb;
1052 struct mbuf *n;
1053
1054 while (m && m->m_nextpkt) {
1055 m = m->m_nextpkt;
1056 }
1057
1058 while (m && m->m_next) {
1059 m = m->m_next;
1060 }
1061
1062 if (m != sb->sb_mbtail) {
1063 printf("sblastmbufchk: mb 0x%llx mbtail 0x%llx last 0x%llx\n",
1064 (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_mb),
1065 (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_mbtail),
1066 (uint64_t)VM_KERNEL_ADDRPERM(m));
1067 printf("packet tree:\n");
1068 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
1069 printf("\t");
1070 for (n = m; n != NULL; n = n->m_next) {
1071 printf("0x%llx ",
1072 (uint64_t)VM_KERNEL_ADDRPERM(n));
1073 }
1074 printf("\n");
1075 }
1076 panic("sblastmbufchk from %s", where);
1077 }
1078 }
1079
1080 /*
1081 * Similar to sbappend, except the mbuf chain begins a new record.
1082 */
1083 static int
1084 sbappendrecord_common(struct sockbuf *sb, struct mbuf *m0, boolean_t nodrop)
1085 {
1086 struct mbuf *m;
1087 int space = 0;
1088
1089 if (m0 == NULL || (sb->sb_flags & SB_DROP)) {
1090 if (m0 != NULL && nodrop == FALSE) {
1091 m_freem(m0);
1092 }
1093 return 0;
1094 }
1095
1096 for (m = m0; m != NULL; m = m->m_next) {
1097 space += m->m_len;
1098 }
1099
1100 if (space > sbspace(sb) && !(sb->sb_flags & SB_UNIX)) {
1101 if (nodrop == FALSE) {
1102 m_freem(m0);
1103 }
1104 return 0;
1105 }
1106
1107 if (SOCK_DOM(sb->sb_so) == PF_INET || SOCK_DOM(sb->sb_so) == PF_INET6) {
1108 ASSERT(nodrop == FALSE);
1109 if (sb->sb_flags & SB_RECV && !(m0 && m0->m_flags & M_SKIPCFIL)) {
1110 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL,
1111 sock_data_filt_flag_record);
1112
1113 #if CONTENT_FILTER
1114 if (error == 0) {
1115 error = cfil_sock_data_in(sb->sb_so, NULL, m0, NULL, 0);
1116 }
1117 #endif /* CONTENT_FILTER */
1118
1119 if (error != 0) {
1120 SBLASTRECORDCHK(sb, "sbappendrecord 1");
1121 if (error != EJUSTRETURN) {
1122 m_freem(m0);
1123 }
1124 return 0;
1125 }
1126 } else if (m0) {
1127 m0->m_flags &= ~M_SKIPCFIL;
1128 }
1129 }
1130
1131 /*
1132 * Note this permits zero length records.
1133 */
1134 sballoc(sb, m0);
1135 SBLASTRECORDCHK(sb, "sbappendrecord 2");
1136 if (sb->sb_lastrecord != NULL) {
1137 sb->sb_lastrecord->m_nextpkt = m0;
1138 } else {
1139 sb->sb_mb = m0;
1140 }
1141 sb->sb_lastrecord = m0;
1142 sb->sb_mbtail = m0;
1143
1144 m = m0->m_next;
1145 m0->m_next = 0;
1146 if (m && (m0->m_flags & M_EOR)) {
1147 m0->m_flags &= ~M_EOR;
1148 m->m_flags |= M_EOR;
1149 }
1150 sbcompress(sb, m, m0);
1151 SBLASTRECORDCHK(sb, "sbappendrecord 3");
1152 return 1;
1153 }
1154
1155 int
1156 sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
1157 {
1158 return sbappendrecord_common(sb, m0, FALSE);
1159 }
1160
1161 int
1162 sbappendrecord_nodrop(struct sockbuf *sb, struct mbuf *m0)
1163 {
1164 return sbappendrecord_common(sb, m0, TRUE);
1165 }
1166
1167 /*
1168 * Concatenate address (optional), control (optional) and data into one
1169 * single mbuf chain. If sockbuf *sb is passed in, space check will be
1170 * performed.
1171 *
1172 * Returns: mbuf chain pointer if succeeded, NULL if failed
1173 */
1174 struct mbuf *
1175 sbconcat_mbufs(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0, struct mbuf *control)
1176 {
1177 struct mbuf *m = NULL, *n = NULL;
1178 int space = 0;
1179
1180 if (m0 && (m0->m_flags & M_PKTHDR) == 0) {
1181 panic("sbconcat_mbufs");
1182 }
1183
1184 if (m0) {
1185 space += m0->m_pkthdr.len;
1186 }
1187 for (n = control; n; n = n->m_next) {
1188 space += n->m_len;
1189 if (n->m_next == 0) { /* keep pointer to last control buf */
1190 break;
1191 }
1192 }
1193
1194 if (asa != NULL) {
1195 if (asa->sa_len > MLEN) {
1196 return NULL;
1197 }
1198 space += asa->sa_len;
1199 }
1200
1201 if (sb != NULL && space > sbspace(sb)) {
1202 return NULL;
1203 }
1204
1205 if (n) {
1206 n->m_next = m0; /* concatenate data to control */
1207 } else {
1208 control = m0;
1209 }
1210
1211 if (asa != NULL) {
1212 MGET(m, M_DONTWAIT, MT_SONAME);
1213 if (m == 0) {
1214 if (n) {
1215 /* unchain control and data if necessary */
1216 n->m_next = NULL;
1217 }
1218 return NULL;
1219 }
1220 m->m_len = asa->sa_len;
1221 bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
1222
1223 m->m_next = control;
1224 } else {
1225 m = control;
1226 }
1227
1228 return m;
1229 }
1230
1231 /*
1232 * Queue mbuf chain to the receive queue of a socket.
1233 * Parameter space is the total len of the mbuf chain.
1234 * If passed in, sockbuf space will be checked.
1235 *
1236 * Returns: 0 Invalid mbuf chain
1237 * 1 Success
1238 */
1239 int
1240 sbappendchain(struct sockbuf *sb, struct mbuf *m, int space)
1241 {
1242 struct mbuf *n, *nlast;
1243
1244 if (m == NULL) {
1245 return 0;
1246 }
1247
1248 if (space != 0 && space > sbspace(sb)) {
1249 return 0;
1250 }
1251
1252 for (n = m; n->m_next != NULL; n = n->m_next) {
1253 sballoc(sb, n);
1254 }
1255 sballoc(sb, n);
1256 nlast = n;
1257
1258 if (sb->sb_lastrecord != NULL) {
1259 sb->sb_lastrecord->m_nextpkt = m;
1260 } else {
1261 sb->sb_mb = m;
1262 }
1263 sb->sb_lastrecord = m;
1264 sb->sb_mbtail = nlast;
1265
1266 SBLASTMBUFCHK(sb, __func__);
1267 SBLASTRECORDCHK(sb, "sbappendadddr 2");
1268 return 1;
1269 }
1270
1271 /*
1272 * Returns: 0 Error: No space/out of mbufs/etc.
1273 * 1 Success
1274 *
1275 * Imputed: (*error_out) errno for error
1276 * ENOBUFS
1277 * sflt_data_in:??? [whatever a filter author chooses]
1278 */
1279 int
1280 sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0,
1281 struct mbuf *control, int *error_out)
1282 {
1283 int result = 0;
1284 boolean_t sb_unix = (sb->sb_flags & SB_UNIX);
1285 struct mbuf *mbuf_chain = NULL;
1286
1287 if (error_out) {
1288 *error_out = 0;
1289 }
1290
1291 if (m0 && (m0->m_flags & M_PKTHDR) == 0) {
1292 panic("sbappendaddrorfree");
1293 }
1294
1295 if (sb->sb_flags & SB_DROP) {
1296 if (m0 != NULL) {
1297 m_freem(m0);
1298 }
1299 if (control != NULL && !sb_unix) {
1300 m_freem(control);
1301 }
1302 if (error_out != NULL) {
1303 *error_out = EINVAL;
1304 }
1305 return 0;
1306 }
1307
1308 if (SOCK_DOM(sb->sb_so) == PF_INET || SOCK_DOM(sb->sb_so) == PF_INET6) {
1309 /* Call socket data in filters */
1310 if (sb->sb_flags & SB_RECV && !(m0 && m0->m_flags & M_SKIPCFIL)) {
1311 int error;
1312 error = sflt_data_in(sb->sb_so, asa, &m0, &control, 0);
1313 SBLASTRECORDCHK(sb, __func__);
1314
1315 #if CONTENT_FILTER
1316 if (error == 0) {
1317 error = cfil_sock_data_in(sb->sb_so, asa, m0, control,
1318 0);
1319 }
1320 #endif /* CONTENT_FILTER */
1321
1322 if (error) {
1323 if (error != EJUSTRETURN) {
1324 if (m0) {
1325 m_freem(m0);
1326 }
1327 if (control != NULL && !sb_unix) {
1328 m_freem(control);
1329 }
1330 if (error_out) {
1331 *error_out = error;
1332 }
1333 }
1334 return 0;
1335 }
1336 } else if (m0) {
1337 m0->m_flags &= ~M_SKIPCFIL;
1338 }
1339 }
1340
1341 mbuf_chain = sbconcat_mbufs(sb, asa, m0, control);
1342 SBLASTRECORDCHK(sb, "sbappendadddr 1");
1343 result = sbappendchain(sb, mbuf_chain, 0);
1344 if (result == 0) {
1345 if (m0) {
1346 m_freem(m0);
1347 }
1348 if (control != NULL && !sb_unix) {
1349 m_freem(control);
1350 }
1351 if (error_out) {
1352 *error_out = ENOBUFS;
1353 }
1354 }
1355
1356 return result;
1357 }
1358
1359 inline boolean_t
1360 is_cmsg_valid(struct mbuf *control, struct cmsghdr *cmsg)
1361 {
1362 if (cmsg == NULL) {
1363 return FALSE;
1364 }
1365
1366 if (cmsg->cmsg_len < sizeof(struct cmsghdr)) {
1367 return FALSE;
1368 }
1369
1370 if ((uint8_t *)control->m_data >= (uint8_t *)cmsg + cmsg->cmsg_len) {
1371 return FALSE;
1372 }
1373
1374 if ((uint8_t *)control->m_data + control->m_len <
1375 (uint8_t *)cmsg + cmsg->cmsg_len) {
1376 return FALSE;
1377 }
1378
1379 return TRUE;
1380 }
1381
1382 static int
1383 sbappendcontrol_internal(struct sockbuf *sb, struct mbuf *m0,
1384 struct mbuf *control)
1385 {
1386 struct mbuf *m, *mlast, *n;
1387 int space = 0;
1388
1389 if (control == 0) {
1390 panic("sbappendcontrol");
1391 }
1392
1393 for (m = control;; m = m->m_next) {
1394 space += m->m_len;
1395 if (m->m_next == 0) {
1396 break;
1397 }
1398 }
1399 n = m; /* save pointer to last control buffer */
1400 for (m = m0; m; m = m->m_next) {
1401 space += m->m_len;
1402 }
1403 if (space > sbspace(sb) && !(sb->sb_flags & SB_UNIX)) {
1404 return 0;
1405 }
1406 n->m_next = m0; /* concatenate data to control */
1407 SBLASTRECORDCHK(sb, "sbappendcontrol 1");
1408
1409 for (m = control; m->m_next != NULL; m = m->m_next) {
1410 sballoc(sb, m);
1411 }
1412 sballoc(sb, m);
1413 mlast = m;
1414
1415 if (sb->sb_lastrecord != NULL) {
1416 sb->sb_lastrecord->m_nextpkt = control;
1417 } else {
1418 sb->sb_mb = control;
1419 }
1420 sb->sb_lastrecord = control;
1421 sb->sb_mbtail = mlast;
1422
1423 SBLASTMBUFCHK(sb, __func__);
1424 SBLASTRECORDCHK(sb, "sbappendcontrol 2");
1425 return 1;
1426 }
1427
1428 int
1429 sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control,
1430 int *error_out)
1431 {
1432 int result = 0;
1433 boolean_t sb_unix = (sb->sb_flags & SB_UNIX);
1434
1435 if (error_out) {
1436 *error_out = 0;
1437 }
1438
1439 if (sb->sb_flags & SB_DROP) {
1440 if (m0 != NULL) {
1441 m_freem(m0);
1442 }
1443 if (control != NULL && !sb_unix) {
1444 m_freem(control);
1445 }
1446 if (error_out != NULL) {
1447 *error_out = EINVAL;
1448 }
1449 return 0;
1450 }
1451
1452 if (SOCK_DOM(sb->sb_so) == PF_INET || SOCK_DOM(sb->sb_so) == PF_INET6) {
1453 if (sb->sb_flags & SB_RECV && !(m0 && m0->m_flags & M_SKIPCFIL)) {
1454 int error;
1455
1456 error = sflt_data_in(sb->sb_so, NULL, &m0, &control, 0);
1457 SBLASTRECORDCHK(sb, __func__);
1458
1459 #if CONTENT_FILTER
1460 if (error == 0) {
1461 error = cfil_sock_data_in(sb->sb_so, NULL, m0, control,
1462 0);
1463 }
1464 #endif /* CONTENT_FILTER */
1465
1466 if (error) {
1467 if (error != EJUSTRETURN) {
1468 if (m0) {
1469 m_freem(m0);
1470 }
1471 if (control != NULL && !sb_unix) {
1472 m_freem(control);
1473 }
1474 if (error_out) {
1475 *error_out = error;
1476 }
1477 }
1478 return 0;
1479 }
1480 } else if (m0) {
1481 m0->m_flags &= ~M_SKIPCFIL;
1482 }
1483 }
1484
1485 result = sbappendcontrol_internal(sb, m0, control);
1486 if (result == 0) {
1487 if (m0) {
1488 m_freem(m0);
1489 }
1490 if (control != NULL && !sb_unix) {
1491 m_freem(control);
1492 }
1493 if (error_out) {
1494 *error_out = ENOBUFS;
1495 }
1496 }
1497
1498 return result;
1499 }
1500
1501 /*
1502 * TCP streams have Multipath TCP support or are regular TCP sockets.
1503 */
1504 int
1505 sbappendstream_rcvdemux(struct socket *so, struct mbuf *m)
1506 {
1507 int ret = 0;
1508
1509 if ((m != NULL) &&
1510 m_pktlen(m) <= 0 &&
1511 !((so->so_flags & SOF_MP_SUBFLOW) &&
1512 (m->m_flags & M_PKTHDR) &&
1513 (m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN))) {
1514 m_freem(m);
1515 return ret;
1516 }
1517
1518 #if MPTCP
1519 if (so->so_flags & SOF_MP_SUBFLOW) {
1520 return sbappendmptcpstream_rcv(&so->so_rcv, m);
1521 } else
1522 #endif /* MPTCP */
1523 {
1524 return sbappendstream(&so->so_rcv, m);
1525 }
1526 }
1527
1528 #if MPTCP
1529 int
1530 sbappendmptcpstream_rcv(struct sockbuf *sb, struct mbuf *m)
1531 {
1532 struct socket *so = sb->sb_so;
1533
1534 VERIFY(m == NULL || (m->m_flags & M_PKTHDR));
1535 /* SB_NOCOMPRESS must be set prevent loss of M_PKTHDR data */
1536 VERIFY((sb->sb_flags & (SB_RECV | SB_NOCOMPRESS)) ==
1537 (SB_RECV | SB_NOCOMPRESS));
1538
1539 if (m == NULL || m_pktlen(m) == 0 || (sb->sb_flags & SB_DROP) ||
1540 (so->so_state & SS_CANTRCVMORE)) {
1541 if (m && (m->m_flags & M_PKTHDR) &&
1542 m_pktlen(m) == 0 &&
1543 (m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN)) {
1544 mptcp_input(tptomptp(sototcpcb(so))->mpt_mpte, m);
1545 return 1;
1546 } else if (m != NULL) {
1547 m_freem(m);
1548 }
1549 return 0;
1550 }
1551 /* the socket is not closed, so SOF_MP_SUBFLOW must be set */
1552 VERIFY(so->so_flags & SOF_MP_SUBFLOW);
1553
1554 if (m->m_nextpkt != NULL || (sb->sb_mb != sb->sb_lastrecord)) {
1555 panic("%s: nexpkt %p || mb %p != lastrecord %p\n", __func__,
1556 m->m_nextpkt, sb->sb_mb, sb->sb_lastrecord);
1557 /* NOTREACHED */
1558 }
1559
1560 SBLASTMBUFCHK(sb, __func__);
1561
1562 /* No filter support (SB_RECV) on mptcp subflow sockets */
1563
1564 sbcompress(sb, m, sb->sb_mbtail);
1565 sb->sb_lastrecord = sb->sb_mb;
1566 SBLASTRECORDCHK(sb, __func__);
1567 return 1;
1568 }
1569 #endif /* MPTCP */
1570
1571 /*
1572 * Compress mbuf chain m into the socket
1573 * buffer sb following mbuf n. If n
1574 * is null, the buffer is presumed empty.
1575 */
1576 static inline void
1577 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
1578 {
1579 int eor = 0, compress = (!(sb->sb_flags & SB_NOCOMPRESS));
1580 struct mbuf *o;
1581
1582 if (m == NULL) {
1583 /* There is nothing to compress; just update the tail */
1584 for (; n->m_next != NULL; n = n->m_next) {
1585 ;
1586 }
1587 sb->sb_mbtail = n;
1588 goto done;
1589 }
1590
1591 while (m != NULL) {
1592 eor |= m->m_flags & M_EOR;
1593 if (compress && m->m_len == 0 && (eor == 0 ||
1594 (((o = m->m_next) || (o = n)) && o->m_type == m->m_type))) {
1595 if (sb->sb_lastrecord == m) {
1596 sb->sb_lastrecord = m->m_next;
1597 }
1598 m = m_free(m);
1599 continue;
1600 }
1601 if (compress && n != NULL && (n->m_flags & M_EOR) == 0 &&
1602 #ifndef __APPLE__
1603 M_WRITABLE(n) &&
1604 #endif
1605 m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
1606 m->m_len <= M_TRAILINGSPACE(n) &&
1607 n->m_type == m->m_type) {
1608 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
1609 (unsigned)m->m_len);
1610 n->m_len += m->m_len;
1611 sb->sb_cc += m->m_len;
1612 if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
1613 m->m_type != MT_OOBDATA) {
1614 /* XXX: Probably don't need */
1615 sb->sb_ctl += m->m_len;
1616 }
1617
1618 /* update send byte count */
1619 if (sb->sb_flags & SB_SNDBYTE_CNT) {
1620 inp_incr_sndbytes_total(sb->sb_so,
1621 m->m_len);
1622 inp_incr_sndbytes_unsent(sb->sb_so,
1623 m->m_len);
1624 }
1625 m = m_free(m);
1626 continue;
1627 }
1628 if (n != NULL) {
1629 n->m_next = m;
1630 } else {
1631 sb->sb_mb = m;
1632 }
1633 sb->sb_mbtail = m;
1634 sballoc(sb, m);
1635 n = m;
1636 m->m_flags &= ~M_EOR;
1637 m = m->m_next;
1638 n->m_next = NULL;
1639 }
1640 if (eor != 0) {
1641 if (n != NULL) {
1642 n->m_flags |= eor;
1643 } else {
1644 printf("semi-panic: sbcompress\n");
1645 }
1646 }
1647 done:
1648 SBLASTMBUFCHK(sb, __func__);
1649 }
1650
1651 void
1652 sb_empty_assert(struct sockbuf *sb, const char *where)
1653 {
1654 if (!(sb->sb_cc == 0 && sb->sb_mb == NULL && sb->sb_mbcnt == 0 &&
1655 sb->sb_mbtail == NULL && sb->sb_lastrecord == NULL)) {
1656 panic("%s: sb %p so %p cc %d mbcnt %d mb %p mbtail %p "
1657 "lastrecord %p\n", where, sb, sb->sb_so, sb->sb_cc,
1658 sb->sb_mbcnt, sb->sb_mb, sb->sb_mbtail,
1659 sb->sb_lastrecord);
1660 /* NOTREACHED */
1661 }
1662 }
1663
1664 /*
1665 * Free all mbufs in a sockbuf.
1666 * Check that all resources are reclaimed.
1667 */
1668 void
1669 sbflush(struct sockbuf *sb)
1670 {
1671 void *lr_saved = __builtin_return_address(0);
1672 struct socket *so = sb->sb_so;
1673
1674 /* so_usecount may be 0 if we get here from sofreelastref() */
1675 if (so == NULL) {
1676 panic("%s: null so, sb=%p sb_flags=0x%x lr=%p\n",
1677 __func__, sb, sb->sb_flags, lr_saved);
1678 /* NOTREACHED */
1679 } else if (so->so_usecount < 0) {
1680 panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
1681 "lrh= %s\n", __func__, sb, sb->sb_flags, so,
1682 so->so_usecount, lr_saved, solockhistory_nr(so));
1683 /* NOTREACHED */
1684 }
1685
1686 /*
1687 * Obtain lock on the socket buffer (SB_LOCK). This is required
1688 * to prevent the socket buffer from being unexpectedly altered
1689 * while it is used by another thread in socket send/receive.
1690 *
1691 * sblock() must not fail here, hence the assertion.
1692 */
1693 (void) sblock(sb, SBL_WAIT | SBL_NOINTR | SBL_IGNDEFUNCT);
1694 VERIFY(sb->sb_flags & SB_LOCK);
1695
1696 while (sb->sb_mbcnt > 0) {
1697 /*
1698 * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
1699 * we would loop forever. Panic instead.
1700 */
1701 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len)) {
1702 break;
1703 }
1704 sbdrop(sb, (int)sb->sb_cc);
1705 }
1706
1707 sb_empty_assert(sb, __func__);
1708 sbunlock(sb, TRUE); /* keep socket locked */
1709 }
1710
1711 /*
1712 * Drop data from (the front of) a sockbuf.
1713 * use m_freem_list to free the mbuf structures
1714 * under a single lock... this is done by pruning
1715 * the top of the tree from the body by keeping track
1716 * of where we get to in the tree and then zeroing the
1717 * two pertinent pointers m_nextpkt and m_next
1718 * the socket buffer is then updated to point at the new
1719 * top of the tree and the pruned area is released via
1720 * m_freem_list.
1721 */
1722 void
1723 sbdrop(struct sockbuf *sb, int len)
1724 {
1725 struct mbuf *m, *free_list, *ml;
1726 struct mbuf *next, *last;
1727
1728 next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
1729 #if MPTCP
1730 if (m != NULL && len > 0 && !(sb->sb_flags & SB_RECV) &&
1731 ((sb->sb_so->so_flags & SOF_MP_SUBFLOW) ||
1732 (SOCK_CHECK_DOM(sb->sb_so, PF_MULTIPATH) &&
1733 SOCK_CHECK_PROTO(sb->sb_so, IPPROTO_TCP))) &&
1734 !(sb->sb_so->so_flags1 & SOF1_POST_FALLBACK_SYNC)) {
1735 mptcp_preproc_sbdrop(sb->sb_so, m, (unsigned int)len);
1736 }
1737 if (m != NULL && len > 0 && !(sb->sb_flags & SB_RECV) &&
1738 (sb->sb_so->so_flags & SOF_MP_SUBFLOW) &&
1739 (sb->sb_so->so_flags1 & SOF1_POST_FALLBACK_SYNC)) {
1740 mptcp_fallback_sbdrop(sb->sb_so, m, len);
1741 }
1742 #endif /* MPTCP */
1743 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_START), sb, len, 0, 0, 0);
1744
1745 free_list = last = m;
1746 ml = (struct mbuf *)0;
1747
1748 while (len > 0) {
1749 if (m == NULL) {
1750 if (next == NULL) {
1751 /*
1752 * temporarily replacing this panic with printf
1753 * because it occurs occasionally when closing
1754 * a socket when there is no harm in ignoring
1755 * it. This problem will be investigated
1756 * further.
1757 */
1758 /* panic("sbdrop"); */
1759 printf("sbdrop - count not zero\n");
1760 len = 0;
1761 /*
1762 * zero the counts. if we have no mbufs,
1763 * we have no data (PR-2986815)
1764 */
1765 sb->sb_cc = 0;
1766 sb->sb_mbcnt = 0;
1767 break;
1768 }
1769 m = last = next;
1770 next = m->m_nextpkt;
1771 continue;
1772 }
1773 if (m->m_len > len) {
1774 m->m_len -= len;
1775 m->m_data += len;
1776 sb->sb_cc -= len;
1777 /* update the send byte count */
1778 if (sb->sb_flags & SB_SNDBYTE_CNT) {
1779 inp_decr_sndbytes_total(sb->sb_so, len);
1780 }
1781 if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
1782 m->m_type != MT_OOBDATA) {
1783 sb->sb_ctl -= len;
1784 }
1785 break;
1786 }
1787 len -= m->m_len;
1788 sbfree(sb, m);
1789
1790 ml = m;
1791 m = m->m_next;
1792 }
1793 while (m && m->m_len == 0) {
1794 sbfree(sb, m);
1795
1796 ml = m;
1797 m = m->m_next;
1798 }
1799 if (ml) {
1800 ml->m_next = (struct mbuf *)0;
1801 last->m_nextpkt = (struct mbuf *)0;
1802 m_freem_list(free_list);
1803 }
1804 if (m) {
1805 sb->sb_mb = m;
1806 m->m_nextpkt = next;
1807 } else {
1808 sb->sb_mb = next;
1809 }
1810
1811 /*
1812 * First part is an inline SB_EMPTY_FIXUP(). Second part
1813 * makes sure sb_lastrecord is up-to-date if we dropped
1814 * part of the last record.
1815 */
1816 m = sb->sb_mb;
1817 if (m == NULL) {
1818 sb->sb_mbtail = NULL;
1819 sb->sb_lastrecord = NULL;
1820 } else if (m->m_nextpkt == NULL) {
1821 sb->sb_lastrecord = m;
1822 }
1823
1824 #if CONTENT_FILTER
1825 cfil_sock_buf_update(sb);
1826 #endif /* CONTENT_FILTER */
1827
1828 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_END), sb, 0, 0, 0, 0);
1829 }
1830
1831 /*
1832 * Drop a record off the front of a sockbuf
1833 * and move the next record to the front.
1834 */
1835 void
1836 sbdroprecord(struct sockbuf *sb)
1837 {
1838 struct mbuf *m, *mn;
1839
1840 m = sb->sb_mb;
1841 if (m) {
1842 sb->sb_mb = m->m_nextpkt;
1843 do {
1844 sbfree(sb, m);
1845 MFREE(m, mn);
1846 m = mn;
1847 } while (m);
1848 }
1849 SB_EMPTY_FIXUP(sb);
1850 }
1851
1852 /*
1853 * Create a "control" mbuf containing the specified data
1854 * with the specified type for presentation on a socket buffer.
1855 */
1856 struct mbuf *
1857 sbcreatecontrol(caddr_t p, int size, int type, int level)
1858 {
1859 struct cmsghdr *cp;
1860 struct mbuf *m;
1861
1862 if (CMSG_SPACE((u_int)size) > MLEN) {
1863 return (struct mbuf *)NULL;
1864 }
1865 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL) {
1866 return (struct mbuf *)NULL;
1867 }
1868 cp = mtod(m, struct cmsghdr *);
1869 VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
1870 /* XXX check size? */
1871 (void) memcpy(CMSG_DATA(cp), p, size);
1872 m->m_len = (int32_t)CMSG_SPACE(size);
1873 cp->cmsg_len = CMSG_LEN(size);
1874 cp->cmsg_level = level;
1875 cp->cmsg_type = type;
1876 return m;
1877 }
1878
1879 struct mbuf **
1880 sbcreatecontrol_mbuf(caddr_t p, int size, int type, int level, struct mbuf **mp)
1881 {
1882 struct mbuf *m;
1883 struct cmsghdr *cp;
1884
1885 if (*mp == NULL) {
1886 *mp = sbcreatecontrol(p, size, type, level);
1887 return mp;
1888 }
1889
1890 if (CMSG_SPACE((u_int)size) + (*mp)->m_len > MLEN) {
1891 mp = &(*mp)->m_next;
1892 *mp = sbcreatecontrol(p, size, type, level);
1893 return mp;
1894 }
1895
1896 m = *mp;
1897
1898 cp = (struct cmsghdr *)(void *)(mtod(m, char *) + m->m_len);
1899 /* CMSG_SPACE ensures 32-bit alignment */
1900 VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
1901 m->m_len += (int32_t)CMSG_SPACE(size);
1902
1903 /* XXX check size? */
1904 (void) memcpy(CMSG_DATA(cp), p, size);
1905 cp->cmsg_len = CMSG_LEN(size);
1906 cp->cmsg_level = level;
1907 cp->cmsg_type = type;
1908
1909 return mp;
1910 }
1911
1912
1913 /*
1914 * Some routines that return EOPNOTSUPP for entry points that are not
1915 * supported by a protocol. Fill in as needed.
1916 */
1917 int
1918 pru_abort_notsupp(struct socket *so)
1919 {
1920 #pragma unused(so)
1921 return EOPNOTSUPP;
1922 }
1923
1924 int
1925 pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
1926 {
1927 #pragma unused(so, nam)
1928 return EOPNOTSUPP;
1929 }
1930
1931 int
1932 pru_attach_notsupp(struct socket *so, int proto, struct proc *p)
1933 {
1934 #pragma unused(so, proto, p)
1935 return EOPNOTSUPP;
1936 }
1937
1938 int
1939 pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1940 {
1941 #pragma unused(so, nam, p)
1942 return EOPNOTSUPP;
1943 }
1944
1945 int
1946 pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1947 {
1948 #pragma unused(so, nam, p)
1949 return EOPNOTSUPP;
1950 }
1951
1952 int
1953 pru_connect2_notsupp(struct socket *so1, struct socket *so2)
1954 {
1955 #pragma unused(so1, so2)
1956 return EOPNOTSUPP;
1957 }
1958
1959 int
1960 pru_connectx_notsupp(struct socket *so, struct sockaddr *src,
1961 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
1962 sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
1963 uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written)
1964 {
1965 #pragma unused(so, src, dst, p, ifscope, aid, pcid, flags, arg, arglen, uio, bytes_written)
1966 return EOPNOTSUPP;
1967 }
1968
1969 int
1970 pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
1971 struct ifnet *ifp, struct proc *p)
1972 {
1973 #pragma unused(so, cmd, data, ifp, p)
1974 return EOPNOTSUPP;
1975 }
1976
1977 int
1978 pru_detach_notsupp(struct socket *so)
1979 {
1980 #pragma unused(so)
1981 return EOPNOTSUPP;
1982 }
1983
1984 int
1985 pru_disconnect_notsupp(struct socket *so)
1986 {
1987 #pragma unused(so)
1988 return EOPNOTSUPP;
1989 }
1990
1991 int
1992 pru_disconnectx_notsupp(struct socket *so, sae_associd_t aid, sae_connid_t cid)
1993 {
1994 #pragma unused(so, aid, cid)
1995 return EOPNOTSUPP;
1996 }
1997
1998 int
1999 pru_listen_notsupp(struct socket *so, struct proc *p)
2000 {
2001 #pragma unused(so, p)
2002 return EOPNOTSUPP;
2003 }
2004
2005 int
2006 pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
2007 {
2008 #pragma unused(so, nam)
2009 return EOPNOTSUPP;
2010 }
2011
2012 int
2013 pru_rcvd_notsupp(struct socket *so, int flags)
2014 {
2015 #pragma unused(so, flags)
2016 return EOPNOTSUPP;
2017 }
2018
2019 int
2020 pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
2021 {
2022 #pragma unused(so, m, flags)
2023 return EOPNOTSUPP;
2024 }
2025
2026 int
2027 pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
2028 struct sockaddr *addr, struct mbuf *control, struct proc *p)
2029 {
2030 #pragma unused(so, flags, m, addr, control, p)
2031 return EOPNOTSUPP;
2032 }
2033
2034 int
2035 pru_send_list_notsupp(struct socket *so, int flags, struct mbuf *m,
2036 struct sockaddr *addr, struct mbuf *control, struct proc *p)
2037 {
2038 #pragma unused(so, flags, m, addr, control, p)
2039 return EOPNOTSUPP;
2040 }
2041
2042 /*
2043 * This isn't really a ``null'' operation, but it's the default one
2044 * and doesn't do anything destructive.
2045 */
2046 int
2047 pru_sense_null(struct socket *so, void *ub, int isstat64)
2048 {
2049 if (isstat64 != 0) {
2050 struct stat64 *sb64;
2051
2052 sb64 = (struct stat64 *)ub;
2053 sb64->st_blksize = so->so_snd.sb_hiwat;
2054 } else {
2055 struct stat *sb;
2056
2057 sb = (struct stat *)ub;
2058 sb->st_blksize = so->so_snd.sb_hiwat;
2059 }
2060
2061 return 0;
2062 }
2063
2064
2065 int
2066 pru_sosend_notsupp(struct socket *so, struct sockaddr *addr, struct uio *uio,
2067 struct mbuf *top, struct mbuf *control, int flags)
2068 {
2069 #pragma unused(so, addr, uio, top, control, flags)
2070 return EOPNOTSUPP;
2071 }
2072
2073 int
2074 pru_sosend_list_notsupp(struct socket *so, struct uio **uio,
2075 u_int uiocnt, int flags)
2076 {
2077 #pragma unused(so, uio, uiocnt, flags)
2078 return EOPNOTSUPP;
2079 }
2080
2081 int
2082 pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr,
2083 struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
2084 {
2085 #pragma unused(so, paddr, uio, mp0, controlp, flagsp)
2086 return EOPNOTSUPP;
2087 }
2088
2089 int
2090 pru_soreceive_list_notsupp(struct socket *so,
2091 struct recv_msg_elem *recv_msg_array, u_int uiocnt, int *flagsp)
2092 {
2093 #pragma unused(so, recv_msg_array, uiocnt, flagsp)
2094 return EOPNOTSUPP;
2095 }
2096
2097 int
2098 pru_shutdown_notsupp(struct socket *so)
2099 {
2100 #pragma unused(so)
2101 return EOPNOTSUPP;
2102 }
2103
2104 int
2105 pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
2106 {
2107 #pragma unused(so, nam)
2108 return EOPNOTSUPP;
2109 }
2110
2111 int
2112 pru_sopoll_notsupp(struct socket *so, int events, kauth_cred_t cred, void *wql)
2113 {
2114 #pragma unused(so, events, cred, wql)
2115 return EOPNOTSUPP;
2116 }
2117
2118 int
2119 pru_socheckopt_null(struct socket *so, struct sockopt *sopt)
2120 {
2121 #pragma unused(so, sopt)
2122 /*
2123 * Allow all options for set/get by default.
2124 */
2125 return 0;
2126 }
2127
2128 static int
2129 pru_preconnect_null(struct socket *so)
2130 {
2131 #pragma unused(so)
2132 return 0;
2133 }
2134
2135 void
2136 pru_sanitize(struct pr_usrreqs *pru)
2137 {
2138 #define DEFAULT(foo, bar) if ((foo) == NULL) (foo) = (bar)
2139 DEFAULT(pru->pru_abort, pru_abort_notsupp);
2140 DEFAULT(pru->pru_accept, pru_accept_notsupp);
2141 DEFAULT(pru->pru_attach, pru_attach_notsupp);
2142 DEFAULT(pru->pru_bind, pru_bind_notsupp);
2143 DEFAULT(pru->pru_connect, pru_connect_notsupp);
2144 DEFAULT(pru->pru_connect2, pru_connect2_notsupp);
2145 DEFAULT(pru->pru_connectx, pru_connectx_notsupp);
2146 DEFAULT(pru->pru_control, pru_control_notsupp);
2147 DEFAULT(pru->pru_detach, pru_detach_notsupp);
2148 DEFAULT(pru->pru_disconnect, pru_disconnect_notsupp);
2149 DEFAULT(pru->pru_disconnectx, pru_disconnectx_notsupp);
2150 DEFAULT(pru->pru_listen, pru_listen_notsupp);
2151 DEFAULT(pru->pru_peeraddr, pru_peeraddr_notsupp);
2152 DEFAULT(pru->pru_rcvd, pru_rcvd_notsupp);
2153 DEFAULT(pru->pru_rcvoob, pru_rcvoob_notsupp);
2154 DEFAULT(pru->pru_send, pru_send_notsupp);
2155 DEFAULT(pru->pru_send_list, pru_send_list_notsupp);
2156 DEFAULT(pru->pru_sense, pru_sense_null);
2157 DEFAULT(pru->pru_shutdown, pru_shutdown_notsupp);
2158 DEFAULT(pru->pru_sockaddr, pru_sockaddr_notsupp);
2159 DEFAULT(pru->pru_sopoll, pru_sopoll_notsupp);
2160 DEFAULT(pru->pru_soreceive, pru_soreceive_notsupp);
2161 DEFAULT(pru->pru_soreceive_list, pru_soreceive_list_notsupp);
2162 DEFAULT(pru->pru_sosend, pru_sosend_notsupp);
2163 DEFAULT(pru->pru_sosend_list, pru_sosend_list_notsupp);
2164 DEFAULT(pru->pru_socheckopt, pru_socheckopt_null);
2165 DEFAULT(pru->pru_preconnect, pru_preconnect_null);
2166 #undef DEFAULT
2167 }
2168
2169 /*
2170 * The following are macros on BSD and functions on Darwin
2171 */
2172
2173 /*
2174 * Do we need to notify the other side when I/O is possible?
2175 */
2176
2177 int
2178 sb_notify(struct sockbuf *sb)
2179 {
2180 return sb->sb_waiters > 0 ||
2181 (sb->sb_flags & (SB_SEL | SB_ASYNC | SB_UPCALL | SB_KNOTE));
2182 }
2183
2184 /*
2185 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
2186 * This is problematical if the fields are unsigned, as the space might
2187 * still be negative (cc > hiwat or mbcnt > mbmax). Should detect
2188 * overflow and return 0.
2189 */
2190 int
2191 sbspace(struct sockbuf *sb)
2192 {
2193 int pending = 0;
2194 int space = imin((int)(sb->sb_hiwat - sb->sb_cc),
2195 (int)(sb->sb_mbmax - sb->sb_mbcnt));
2196
2197 if (sb->sb_preconn_hiwat != 0) {
2198 space = imin((int)(sb->sb_preconn_hiwat - sb->sb_cc), space);
2199 }
2200
2201 if (space < 0) {
2202 space = 0;
2203 }
2204
2205 /* Compensate for data being processed by content filters */
2206 #if CONTENT_FILTER
2207 pending = cfil_sock_data_space(sb);
2208 #endif /* CONTENT_FILTER */
2209 if (pending > space) {
2210 space = 0;
2211 } else {
2212 space -= pending;
2213 }
2214
2215 return space;
2216 }
2217
2218 /* do we have to send all at once on a socket? */
2219 int
2220 sosendallatonce(struct socket *so)
2221 {
2222 return so->so_proto->pr_flags & PR_ATOMIC;
2223 }
2224
2225 /* can we read something from so? */
2226 int
2227 soreadable(struct socket *so)
2228 {
2229 return so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
2230 ((so->so_state & SS_CANTRCVMORE)
2231 #if CONTENT_FILTER
2232 && cfil_sock_data_pending(&so->so_rcv) == 0
2233 #endif /* CONTENT_FILTER */
2234 ) ||
2235 so->so_comp.tqh_first || so->so_error;
2236 }
2237
2238 /* can we write something to so? */
2239
2240 int
2241 sowriteable(struct socket *so)
2242 {
2243 if ((so->so_state & SS_CANTSENDMORE) ||
2244 so->so_error > 0) {
2245 return 1;
2246 }
2247 if (so_wait_for_if_feedback(so) || !socanwrite(so)) {
2248 return 0;
2249 }
2250 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
2251 return 1;
2252 }
2253
2254 if (sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat) {
2255 if (so->so_flags & SOF_NOTSENT_LOWAT) {
2256 if ((SOCK_DOM(so) == PF_INET6 ||
2257 SOCK_DOM(so) == PF_INET) &&
2258 so->so_type == SOCK_STREAM) {
2259 return tcp_notsent_lowat_check(so);
2260 }
2261 #if MPTCP
2262 else if ((SOCK_DOM(so) == PF_MULTIPATH) &&
2263 (SOCK_PROTO(so) == IPPROTO_TCP)) {
2264 return mptcp_notsent_lowat_check(so);
2265 }
2266 #endif
2267 else {
2268 return 1;
2269 }
2270 } else {
2271 return 1;
2272 }
2273 }
2274 return 0;
2275 }
2276
2277 /* adjust counters in sb reflecting allocation of m */
2278
2279 void
2280 sballoc(struct sockbuf *sb, struct mbuf *m)
2281 {
2282 u_int32_t cnt = 1;
2283 sb->sb_cc += m->m_len;
2284 if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
2285 m->m_type != MT_OOBDATA) {
2286 sb->sb_ctl += m->m_len;
2287 }
2288 sb->sb_mbcnt += MSIZE;
2289
2290 if (m->m_flags & M_EXT) {
2291 sb->sb_mbcnt += m->m_ext.ext_size;
2292 cnt += (m->m_ext.ext_size >> MSIZESHIFT);
2293 }
2294 OSAddAtomic(cnt, &total_sbmb_cnt);
2295 VERIFY(total_sbmb_cnt > 0);
2296 if (total_sbmb_cnt > total_sbmb_cnt_peak) {
2297 total_sbmb_cnt_peak = total_sbmb_cnt;
2298 }
2299
2300 /*
2301 * If data is being added to the send socket buffer,
2302 * update the send byte count
2303 */
2304 if (sb->sb_flags & SB_SNDBYTE_CNT) {
2305 inp_incr_sndbytes_total(sb->sb_so, m->m_len);
2306 inp_incr_sndbytes_unsent(sb->sb_so, m->m_len);
2307 }
2308 }
2309
2310 /* adjust counters in sb reflecting freeing of m */
2311 void
2312 sbfree(struct sockbuf *sb, struct mbuf *m)
2313 {
2314 int cnt = -1;
2315
2316 sb->sb_cc -= m->m_len;
2317 if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
2318 m->m_type != MT_OOBDATA) {
2319 sb->sb_ctl -= m->m_len;
2320 }
2321 sb->sb_mbcnt -= MSIZE;
2322 if (m->m_flags & M_EXT) {
2323 sb->sb_mbcnt -= m->m_ext.ext_size;
2324 cnt -= (m->m_ext.ext_size >> MSIZESHIFT);
2325 }
2326 OSAddAtomic(cnt, &total_sbmb_cnt);
2327 VERIFY(total_sbmb_cnt >= 0);
2328 if (total_sbmb_cnt < total_sbmb_cnt_floor) {
2329 total_sbmb_cnt_floor = total_sbmb_cnt;
2330 }
2331
2332 /*
2333 * If data is being removed from the send socket buffer,
2334 * update the send byte count
2335 */
2336 if (sb->sb_flags & SB_SNDBYTE_CNT) {
2337 inp_decr_sndbytes_total(sb->sb_so, m->m_len);
2338 }
2339 }
2340
2341 /*
2342 * Set lock on sockbuf sb; sleep if lock is already held.
2343 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
2344 * Returns error without lock if sleep is interrupted.
2345 */
2346 int
2347 sblock(struct sockbuf *sb, uint32_t flags)
2348 {
2349 boolean_t nointr = ((sb->sb_flags & SB_NOINTR) || (flags & SBL_NOINTR));
2350 void *lr_saved = __builtin_return_address(0);
2351 struct socket *so = sb->sb_so;
2352 void * wchan;
2353 int error = 0;
2354 thread_t tp = current_thread();
2355
2356 VERIFY((flags & SBL_VALID) == flags);
2357
2358 /* so_usecount may be 0 if we get here from sofreelastref() */
2359 if (so == NULL) {
2360 panic("%s: null so, sb=%p sb_flags=0x%x lr=%p\n",
2361 __func__, sb, sb->sb_flags, lr_saved);
2362 /* NOTREACHED */
2363 } else if (so->so_usecount < 0) {
2364 panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
2365 "lrh= %s\n", __func__, sb, sb->sb_flags, so,
2366 so->so_usecount, lr_saved, solockhistory_nr(so));
2367 /* NOTREACHED */
2368 }
2369
2370 /*
2371 * The content filter thread must hold the sockbuf lock
2372 */
2373 if ((so->so_flags & SOF_CONTENT_FILTER) && sb->sb_cfil_thread == tp) {
2374 /*
2375 * Don't panic if we are defunct because SB_LOCK has
2376 * been cleared by sodefunct()
2377 */
2378 if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK)) {
2379 panic("%s: SB_LOCK not held for %p\n",
2380 __func__, sb);
2381 }
2382
2383 /* Keep the sockbuf locked */
2384 return 0;
2385 }
2386
2387 if ((sb->sb_flags & SB_LOCK) && !(flags & SBL_WAIT)) {
2388 return EWOULDBLOCK;
2389 }
2390 /*
2391 * We may get here from sorflush(), in which case "sb" may not
2392 * point to the real socket buffer. Use the actual socket buffer
2393 * address from the socket instead.
2394 */
2395 wchan = (sb->sb_flags & SB_RECV) ?
2396 &so->so_rcv.sb_flags : &so->so_snd.sb_flags;
2397
2398 /*
2399 * A content filter thread has exclusive access to the sockbuf
2400 * until it clears the
2401 */
2402 while ((sb->sb_flags & SB_LOCK) ||
2403 ((so->so_flags & SOF_CONTENT_FILTER) &&
2404 sb->sb_cfil_thread != NULL)) {
2405 lck_mtx_t *mutex_held;
2406
2407 /*
2408 * XXX: This code should be moved up above outside of this loop;
2409 * however, we may get here as part of sofreelastref(), and
2410 * at that time pr_getlock() may no longer be able to return
2411 * us the lock. This will be fixed in future.
2412 */
2413 if (so->so_proto->pr_getlock != NULL) {
2414 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
2415 } else {
2416 mutex_held = so->so_proto->pr_domain->dom_mtx;
2417 }
2418
2419 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
2420
2421 sb->sb_wantlock++;
2422 VERIFY(sb->sb_wantlock != 0);
2423
2424 error = msleep(wchan, mutex_held,
2425 nointr ? PSOCK : PSOCK | PCATCH,
2426 nointr ? "sb_lock_nointr" : "sb_lock", NULL);
2427
2428 VERIFY(sb->sb_wantlock != 0);
2429 sb->sb_wantlock--;
2430
2431 if (error == 0 && (so->so_flags & SOF_DEFUNCT) &&
2432 !(flags & SBL_IGNDEFUNCT)) {
2433 error = EBADF;
2434 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] "
2435 "(%d)\n", __func__, proc_selfpid(),
2436 proc_best_name(current_proc()),
2437 (uint64_t)VM_KERNEL_ADDRPERM(so),
2438 SOCK_DOM(so), SOCK_TYPE(so), error);
2439 }
2440
2441 if (error != 0) {
2442 return error;
2443 }
2444 }
2445 sb->sb_flags |= SB_LOCK;
2446 return 0;
2447 }
2448
2449 /*
2450 * Release lock on sockbuf sb
2451 */
2452 void
2453 sbunlock(struct sockbuf *sb, boolean_t keeplocked)
2454 {
2455 void *lr_saved = __builtin_return_address(0);
2456 struct socket *so = sb->sb_so;
2457 thread_t tp = current_thread();
2458
2459 /* so_usecount may be 0 if we get here from sofreelastref() */
2460 if (so == NULL) {
2461 panic("%s: null so, sb=%p sb_flags=0x%x lr=%p\n",
2462 __func__, sb, sb->sb_flags, lr_saved);
2463 /* NOTREACHED */
2464 } else if (so->so_usecount < 0) {
2465 panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
2466 "lrh= %s\n", __func__, sb, sb->sb_flags, so,
2467 so->so_usecount, lr_saved, solockhistory_nr(so));
2468 /* NOTREACHED */
2469 }
2470
2471 /*
2472 * The content filter thread must hold the sockbuf lock
2473 */
2474 if ((so->so_flags & SOF_CONTENT_FILTER) && sb->sb_cfil_thread == tp) {
2475 /*
2476 * Don't panic if we are defunct because SB_LOCK has
2477 * been cleared by sodefunct()
2478 */
2479 if (!(so->so_flags & SOF_DEFUNCT) &&
2480 !(sb->sb_flags & SB_LOCK) &&
2481 !(so->so_state & SS_DEFUNCT) &&
2482 !(so->so_flags1 & SOF1_DEFUNCTINPROG)) {
2483 panic("%s: SB_LOCK not held for %p\n",
2484 __func__, sb);
2485 }
2486 /* Keep the sockbuf locked and proceed */
2487 } else {
2488 VERIFY((sb->sb_flags & SB_LOCK) ||
2489 (so->so_state & SS_DEFUNCT) ||
2490 (so->so_flags1 & SOF1_DEFUNCTINPROG));
2491
2492 sb->sb_flags &= ~SB_LOCK;
2493
2494 if (sb->sb_wantlock > 0) {
2495 /*
2496 * We may get here from sorflush(), in which case "sb"
2497 * may not point to the real socket buffer. Use the
2498 * actual socket buffer address from the socket instead.
2499 */
2500 wakeup((sb->sb_flags & SB_RECV) ? &so->so_rcv.sb_flags :
2501 &so->so_snd.sb_flags);
2502 }
2503 }
2504
2505 if (!keeplocked) { /* unlock on exit */
2506 if (so->so_flags & SOF_MP_SUBFLOW || SOCK_DOM(so) == PF_MULTIPATH) {
2507 (*so->so_proto->pr_unlock)(so, 1, lr_saved);
2508 } else {
2509 lck_mtx_t *mutex_held;
2510
2511 if (so->so_proto->pr_getlock != NULL) {
2512 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
2513 } else {
2514 mutex_held = so->so_proto->pr_domain->dom_mtx;
2515 }
2516
2517 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
2518
2519 VERIFY(so->so_usecount > 0);
2520 so->so_usecount--;
2521 so->unlock_lr[so->next_unlock_lr] = lr_saved;
2522 so->next_unlock_lr = (so->next_unlock_lr + 1) % SO_LCKDBG_MAX;
2523 lck_mtx_unlock(mutex_held);
2524 }
2525 }
2526 }
2527
2528 void
2529 sorwakeup(struct socket *so)
2530 {
2531 if (sb_notify(&so->so_rcv)) {
2532 sowakeup(so, &so->so_rcv, NULL);
2533 }
2534 }
2535
2536 void
2537 sowwakeup(struct socket *so)
2538 {
2539 if (sb_notify(&so->so_snd)) {
2540 sowakeup(so, &so->so_snd, NULL);
2541 }
2542 }
2543
2544 void
2545 soevent(struct socket *so, long hint)
2546 {
2547 if (so->so_flags & SOF_KNOTE) {
2548 KNOTE(&so->so_klist, hint);
2549 }
2550
2551 soevupcall(so, hint);
2552
2553 /*
2554 * Don't post an event if this a subflow socket or
2555 * the app has opted out of using cellular interface
2556 */
2557 if ((hint & SO_FILT_HINT_IFDENIED) &&
2558 !(so->so_flags & SOF_MP_SUBFLOW) &&
2559 !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR) &&
2560 !(so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE) &&
2561 !(so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED)) {
2562 soevent_ifdenied(so);
2563 }
2564 }
2565
2566 void
2567 soevupcall(struct socket *so, long hint)
2568 {
2569 if (so->so_event != NULL) {
2570 caddr_t so_eventarg = so->so_eventarg;
2571
2572 hint &= so->so_eventmask;
2573 if (hint != 0) {
2574 so->so_event(so, so_eventarg, hint);
2575 }
2576 }
2577 }
2578
2579 static void
2580 soevent_ifdenied(struct socket *so)
2581 {
2582 struct kev_netpolicy_ifdenied ev_ifdenied;
2583
2584 bzero(&ev_ifdenied, sizeof(ev_ifdenied));
2585 /*
2586 * The event consumer is interested about the effective {upid,pid,uuid}
2587 * info which can be different than the those related to the process
2588 * that recently performed a system call on the socket, i.e. when the
2589 * socket is delegated.
2590 */
2591 if (so->so_flags & SOF_DELEGATED) {
2592 ev_ifdenied.ev_data.eupid = so->e_upid;
2593 ev_ifdenied.ev_data.epid = so->e_pid;
2594 uuid_copy(ev_ifdenied.ev_data.euuid, so->e_uuid);
2595 } else {
2596 ev_ifdenied.ev_data.eupid = so->last_upid;
2597 ev_ifdenied.ev_data.epid = so->last_pid;
2598 uuid_copy(ev_ifdenied.ev_data.euuid, so->last_uuid);
2599 }
2600
2601 if (++so->so_ifdenied_notifies > 1) {
2602 /*
2603 * Allow for at most one kernel event to be generated per
2604 * socket; so_ifdenied_notifies is reset upon changes in
2605 * the UUID policy. See comments in inp_update_policy.
2606 */
2607 if (net_io_policy_log) {
2608 uuid_string_t buf;
2609
2610 uuid_unparse(ev_ifdenied.ev_data.euuid, buf);
2611 log(LOG_DEBUG, "%s[%d]: so 0x%llx [%d,%d] epid %llu "
2612 "euuid %s%s has %d redundant events supressed\n",
2613 __func__, so->last_pid,
2614 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
2615 SOCK_TYPE(so), ev_ifdenied.ev_data.epid, buf,
2616 ((so->so_flags & SOF_DELEGATED) ?
2617 " [delegated]" : ""), so->so_ifdenied_notifies);
2618 }
2619 } else {
2620 if (net_io_policy_log) {
2621 uuid_string_t buf;
2622
2623 uuid_unparse(ev_ifdenied.ev_data.euuid, buf);
2624 log(LOG_DEBUG, "%s[%d]: so 0x%llx [%d,%d] epid %llu "
2625 "euuid %s%s event posted\n", __func__,
2626 so->last_pid, (uint64_t)VM_KERNEL_ADDRPERM(so),
2627 SOCK_DOM(so), SOCK_TYPE(so),
2628 ev_ifdenied.ev_data.epid, buf,
2629 ((so->so_flags & SOF_DELEGATED) ?
2630 " [delegated]" : ""));
2631 }
2632 netpolicy_post_msg(KEV_NETPOLICY_IFDENIED, &ev_ifdenied.ev_data,
2633 sizeof(ev_ifdenied));
2634 }
2635 }
2636
2637 /*
2638 * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
2639 */
2640 struct sockaddr *
2641 dup_sockaddr(struct sockaddr *sa, int canwait)
2642 {
2643 struct sockaddr *sa2;
2644
2645 MALLOC(sa2, struct sockaddr *, sa->sa_len, M_SONAME,
2646 canwait ? M_WAITOK : M_NOWAIT);
2647 if (sa2) {
2648 bcopy(sa, sa2, sa->sa_len);
2649 }
2650 return sa2;
2651 }
2652
2653 /*
2654 * Create an external-format (``xsocket'') structure using the information
2655 * in the kernel-format socket structure pointed to by so. This is done
2656 * to reduce the spew of irrelevant information over this interface,
2657 * to isolate user code from changes in the kernel structure, and
2658 * potentially to provide information-hiding if we decide that
2659 * some of this information should be hidden from users.
2660 */
2661 void
2662 sotoxsocket(struct socket *so, struct xsocket *xso)
2663 {
2664 xso->xso_len = sizeof(*xso);
2665 xso->xso_so = (_XSOCKET_PTR(struct socket *))VM_KERNEL_ADDRPERM(so);
2666 xso->so_type = so->so_type;
2667 xso->so_options = (short)(so->so_options & 0xffff);
2668 xso->so_linger = so->so_linger;
2669 xso->so_state = so->so_state;
2670 xso->so_pcb = (_XSOCKET_PTR(caddr_t))VM_KERNEL_ADDRPERM(so->so_pcb);
2671 if (so->so_proto) {
2672 xso->xso_protocol = SOCK_PROTO(so);
2673 xso->xso_family = SOCK_DOM(so);
2674 } else {
2675 xso->xso_protocol = xso->xso_family = 0;
2676 }
2677 xso->so_qlen = so->so_qlen;
2678 xso->so_incqlen = so->so_incqlen;
2679 xso->so_qlimit = so->so_qlimit;
2680 xso->so_timeo = so->so_timeo;
2681 xso->so_error = so->so_error;
2682 xso->so_pgid = so->so_pgid;
2683 xso->so_oobmark = so->so_oobmark;
2684 sbtoxsockbuf(&so->so_snd, &xso->so_snd);
2685 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
2686 xso->so_uid = kauth_cred_getuid(so->so_cred);
2687 }
2688
2689
2690 #if XNU_TARGET_OS_OSX
2691
2692 void
2693 sotoxsocket64(struct socket *so, struct xsocket64 *xso)
2694 {
2695 xso->xso_len = sizeof(*xso);
2696 xso->xso_so = (u_int64_t)VM_KERNEL_ADDRPERM(so);
2697 xso->so_type = so->so_type;
2698 xso->so_options = (short)(so->so_options & 0xffff);
2699 xso->so_linger = so->so_linger;
2700 xso->so_state = so->so_state;
2701 xso->so_pcb = (u_int64_t)VM_KERNEL_ADDRPERM(so->so_pcb);
2702 if (so->so_proto) {
2703 xso->xso_protocol = SOCK_PROTO(so);
2704 xso->xso_family = SOCK_DOM(so);
2705 } else {
2706 xso->xso_protocol = xso->xso_family = 0;
2707 }
2708 xso->so_qlen = so->so_qlen;
2709 xso->so_incqlen = so->so_incqlen;
2710 xso->so_qlimit = so->so_qlimit;
2711 xso->so_timeo = so->so_timeo;
2712 xso->so_error = so->so_error;
2713 xso->so_pgid = so->so_pgid;
2714 xso->so_oobmark = so->so_oobmark;
2715 sbtoxsockbuf(&so->so_snd, &xso->so_snd);
2716 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
2717 xso->so_uid = kauth_cred_getuid(so->so_cred);
2718 }
2719
2720 #endif /* XNU_TARGET_OS_OSX */
2721
2722 /*
2723 * This does the same for sockbufs. Note that the xsockbuf structure,
2724 * since it is always embedded in a socket, does not include a self
2725 * pointer nor a length. We make this entry point public in case
2726 * some other mechanism needs it.
2727 */
2728 void
2729 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
2730 {
2731 xsb->sb_cc = sb->sb_cc;
2732 xsb->sb_hiwat = sb->sb_hiwat;
2733 xsb->sb_mbcnt = sb->sb_mbcnt;
2734 xsb->sb_mbmax = sb->sb_mbmax;
2735 xsb->sb_lowat = sb->sb_lowat;
2736 xsb->sb_flags = (short)sb->sb_flags;
2737 xsb->sb_timeo = (short)
2738 ((sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick);
2739 if (xsb->sb_timeo == 0 && sb->sb_timeo.tv_usec != 0) {
2740 xsb->sb_timeo = 1;
2741 }
2742 }
2743
2744 /*
2745 * Based on the policy set by an all knowing decison maker, throttle sockets
2746 * that either have been marked as belonging to "background" process.
2747 */
2748 inline int
2749 soisthrottled(struct socket *so)
2750 {
2751 return so->so_flags1 & SOF1_TRAFFIC_MGT_SO_BACKGROUND;
2752 }
2753
2754 inline int
2755 soisprivilegedtraffic(struct socket *so)
2756 {
2757 return (so->so_flags & SOF_PRIVILEGED_TRAFFIC_CLASS) ? 1 : 0;
2758 }
2759
2760 inline int
2761 soissrcbackground(struct socket *so)
2762 {
2763 return (so->so_flags1 & SOF1_TRAFFIC_MGT_SO_BACKGROUND) ||
2764 IS_SO_TC_BACKGROUND(so->so_traffic_class);
2765 }
2766
2767 inline int
2768 soissrcrealtime(struct socket *so)
2769 {
2770 return so->so_traffic_class >= SO_TC_AV &&
2771 so->so_traffic_class <= SO_TC_VO;
2772 }
2773
2774 inline int
2775 soissrcbesteffort(struct socket *so)
2776 {
2777 return so->so_traffic_class == SO_TC_BE ||
2778 so->so_traffic_class == SO_TC_RD ||
2779 so->so_traffic_class == SO_TC_OAM;
2780 }
2781
2782 void
2783 soclearfastopen(struct socket *so)
2784 {
2785 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
2786 so->so_flags1 &= ~SOF1_PRECONNECT_DATA;
2787 }
2788
2789 if (so->so_flags1 & SOF1_DATA_IDEMPOTENT) {
2790 so->so_flags1 &= ~SOF1_DATA_IDEMPOTENT;
2791 }
2792 }
2793
2794 void
2795 sonullevent(struct socket *so, void *arg, long hint)
2796 {
2797 #pragma unused(so, arg, hint)
2798 }
2799
2800 /*
2801 * Here is the definition of some of the basic objects in the kern.ipc
2802 * branch of the MIB.
2803 */
2804 SYSCTL_NODE(_kern, KERN_IPC, ipc,
2805 CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, 0, "IPC");
2806
2807 /* Check that the maximum socket buffer size is within a range */
2808
2809 static int
2810 sysctl_sb_max SYSCTL_HANDLER_ARGS
2811 {
2812 #pragma unused(oidp, arg1, arg2)
2813 u_int32_t new_value;
2814 int changed = 0;
2815 int error = sysctl_io_number(req, sb_max, sizeof(u_int32_t),
2816 &new_value, &changed);
2817 if (!error && changed) {
2818 if (new_value > LOW_SB_MAX && new_value <= high_sb_max) {
2819 sb_max = new_value;
2820 } else {
2821 error = ERANGE;
2822 }
2823 }
2824 return error;
2825 }
2826
2827 SYSCTL_PROC(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf,
2828 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
2829 &sb_max, 0, &sysctl_sb_max, "IU", "Maximum socket buffer size");
2830
2831 SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor,
2832 CTLFLAG_RW | CTLFLAG_LOCKED, &sb_efficiency, 0, "");
2833
2834 SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters,
2835 CTLFLAG_RD | CTLFLAG_LOCKED, &nmbclusters, 0, "");
2836
2837 SYSCTL_INT(_kern_ipc, OID_AUTO, njcl,
2838 CTLFLAG_RD | CTLFLAG_LOCKED, &njcl, 0, "");
2839
2840 SYSCTL_INT(_kern_ipc, OID_AUTO, njclbytes,
2841 CTLFLAG_RD | CTLFLAG_LOCKED, &njclbytes, 0, "");
2842
2843 SYSCTL_INT(_kern_ipc, KIPC_SOQLIMITCOMPAT, soqlimitcompat,
2844 CTLFLAG_RW | CTLFLAG_LOCKED, &soqlimitcompat, 1,
2845 "Enable socket queue limit compatibility");
2846
2847 /*
2848 * Hack alert -- rdar://33572856
2849 * A loopback test we cannot change was failing because it sets
2850 * SO_SENDTIMEO to 5 seconds and that's also the value
2851 * of the minimum persist timer. Because of the persist timer,
2852 * the connection was not idle for 5 seconds and SO_SNDTIMEO
2853 * was not triggering at 5 seconds causing the test failure.
2854 * As a workaround we check the sysctl soqlencomp the test is already
2855 * setting to set disable auto tuning of the receive buffer.
2856 */
2857
2858 extern u_int32_t tcp_do_autorcvbuf;
2859
2860 static int
2861 sysctl_soqlencomp SYSCTL_HANDLER_ARGS
2862 {
2863 #pragma unused(oidp, arg1, arg2)
2864 u_int32_t new_value;
2865 int changed = 0;
2866 int error = sysctl_io_number(req, soqlencomp, sizeof(u_int32_t),
2867 &new_value, &changed);
2868 if (!error && changed) {
2869 soqlencomp = new_value;
2870 if (new_value != 0) {
2871 tcp_do_autorcvbuf = 0;
2872 tcptv_persmin_val = 6 * TCP_RETRANSHZ;
2873 }
2874 }
2875 return error;
2876 }
2877 SYSCTL_PROC(_kern_ipc, OID_AUTO, soqlencomp,
2878 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
2879 &soqlencomp, 0, &sysctl_soqlencomp, "IU", "");
2880
2881 SYSCTL_INT(_kern_ipc, OID_AUTO, sbmb_cnt, CTLFLAG_RD | CTLFLAG_LOCKED,
2882 &total_sbmb_cnt, 0, "");
2883 SYSCTL_INT(_kern_ipc, OID_AUTO, sbmb_cnt_peak, CTLFLAG_RD | CTLFLAG_LOCKED,
2884 &total_sbmb_cnt_peak, 0, "");
2885 SYSCTL_INT(_kern_ipc, OID_AUTO, sbmb_cnt_floor, CTLFLAG_RD | CTLFLAG_LOCKED,
2886 &total_sbmb_cnt_floor, 0, "");
2887 SYSCTL_QUAD(_kern_ipc, OID_AUTO, sbmb_limreached, CTLFLAG_RD | CTLFLAG_LOCKED,
2888 &sbmb_limreached, "");
2889
2890
2891 SYSCTL_NODE(_kern_ipc, OID_AUTO, io_policy, CTLFLAG_RW, 0, "network IO policy");
2892
2893 SYSCTL_INT(_kern_ipc_io_policy, OID_AUTO, log, CTLFLAG_RW | CTLFLAG_LOCKED,
2894 &net_io_policy_log, 0, "");
2895
2896 #if CONFIG_PROC_UUID_POLICY
2897 SYSCTL_INT(_kern_ipc_io_policy, OID_AUTO, uuid, CTLFLAG_RW | CTLFLAG_LOCKED,
2898 &net_io_policy_uuid, 0, "");
2899 #endif /* CONFIG_PROC_UUID_POLICY */