]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/uipc_socket2.c
xnu-1228.7.58.tar.gz
[apple/xnu.git] / bsd / kern / uipc_socket2.c
CommitLineData
1c79356b 1/*
2d21ac55 2 * Copyright (c) 1998-2007 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b 27 */
1c79356b
A
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1982, 1986, 1988, 1990, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the University of
44 * California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
9bccf70c 62 * $FreeBSD: src/sys/kern/uipc_socket2.c,v 1.55.2.9 2001/07/26 18:53:02 peter Exp $
1c79356b 63 */
2d21ac55
A
64/*
65 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
66 * support for mandatory and extensible security protections. This notice
67 * is included in support of clause 2.2 (b) of the Apple Public License,
68 * Version 2.0.
69 */
1c79356b
A
70
71#include <sys/param.h>
72#include <sys/systm.h>
73#include <sys/domain.h>
74#include <sys/kernel.h>
91447636
A
75#include <sys/proc_internal.h>
76#include <sys/kauth.h>
1c79356b
A
77#include <sys/malloc.h>
78#include <sys/mbuf.h>
79#include <sys/protosw.h>
80#include <sys/stat.h>
81#include <sys/socket.h>
82#include <sys/socketvar.h>
83#include <sys/signalvar.h>
84#include <sys/sysctl.h>
85#include <sys/ev.h>
91447636
A
86#include <kern/locks.h>
87#include <net/route.h>
88#include <netinet/in.h>
89#include <netinet/in_pcb.h>
fa4905b1 90#include <sys/kdebug.h>
2d21ac55
A
91#include <libkern/OSAtomic.h>
92
93#if CONFIG_MACF
94#include <security/mac_framework.h>
95#endif
96
97/* TODO: this should be in a header file somewhere */
98extern void postevent(struct socket *, struct sockbuf *, int);
fa4905b1
A
99
100#define DBG_FNC_SBDROP NETDBG_CODE(DBG_NETSOCK, 4)
101#define DBG_FNC_SBAPPEND NETDBG_CODE(DBG_NETSOCK, 5)
102
2d21ac55
A
103static inline void sbcompress(struct sockbuf *, struct mbuf *, struct mbuf *);
104static struct socket *sonewconn_internal(struct socket *, int);
105static int sbappendaddr_internal(struct sockbuf *, struct sockaddr *,
106 struct mbuf *, struct mbuf *);
107static int sbappendcontrol_internal(struct sockbuf *, struct mbuf *,
108 struct mbuf *);
fa4905b1 109
1c79356b
A
110/*
111 * Primitive routines for operating on sockets and socket buffers
112 */
2d21ac55
A
113static int soqlimitcompat = 1;
114static int soqlencomp = 0;
1c79356b
A
115
116u_long sb_max = SB_MAX; /* XXX should be static */
117
118static u_long sb_efficiency = 8; /* parameter for sbreserve() */
2d21ac55
A
119__private_extern__ unsigned int total_mb_cnt = 0;
120__private_extern__ unsigned int total_cl_cnt = 0;
121__private_extern__ int sbspace_factor = 8;
1c79356b 122
1c79356b
A
123/*
124 * Procedures to manipulate state flags of socket
125 * and do appropriate wakeups. Normal sequence from the
126 * active (originating) side is that soisconnecting() is
127 * called during processing of connect() call,
128 * resulting in an eventual call to soisconnected() if/when the
129 * connection is established. When the connection is torn down
9bccf70c 130 * soisdisconnecting() is called during processing of disconnect() call,
1c79356b
A
131 * and soisdisconnected() is called when the connection to the peer
132 * is totally severed. The semantics of these routines are such that
133 * connectionless protocols can call soisconnected() and soisdisconnected()
134 * only, bypassing the in-progress calls when setting up a ``connection''
135 * takes no time.
136 *
137 * From the passive side, a socket is created with
e3027f41
A
138 * two queues of sockets: so_incomp for connections in progress
139 * and so_comp for connections already made and awaiting user acceptance.
9bccf70c 140 * As a protocol is preparing incoming connections, it creates a socket
e3027f41 141 * structure queued on so_incomp by calling sonewconn(). When the connection
1c79356b 142 * is established, soisconnected() is called, and transfers the
e3027f41 143 * socket structure to so_comp, making it available to accept().
1c79356b 144 *
9bccf70c 145 * If a socket is closed with sockets on either
e3027f41 146 * so_incomp or so_comp, these sockets are dropped.
9bccf70c 147 *
1c79356b
A
148 * If higher level protocols are implemented in
149 * the kernel, the wakeups done here will sometimes
150 * cause software-interrupt process scheduling.
151 */
1c79356b 152void
2d21ac55 153soisconnecting(struct socket *so)
1c79356b
A
154{
155
156 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
157 so->so_state |= SS_ISCONNECTING;
2d21ac55 158
91447636 159 sflt_notify(so, sock_evt_connecting, NULL);
1c79356b
A
160}
161
162void
2d21ac55 163soisconnected(struct socket *so)
9bccf70c
A
164{
165 struct socket *head = so->so_head;
1c79356b
A
166
167 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
168 so->so_state |= SS_ISCONNECTED;
2d21ac55 169
91447636 170 sflt_notify(so, sock_evt_connected, NULL);
2d21ac55 171
1c79356b 172 if (head && (so->so_state & SS_INCOMP)) {
ff6e181a
A
173 so->so_state &= ~SS_INCOMP;
174 so->so_state |= SS_COMP;
175 if (head->so_proto->pr_getlock != NULL) {
176 socket_unlock(so, 0);
91447636 177 socket_lock(head, 1);
ff6e181a 178 }
91447636 179 postevent(head, 0, EV_RCONN);
1c79356b
A
180 TAILQ_REMOVE(&head->so_incomp, so, so_list);
181 head->so_incqlen--;
1c79356b 182 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
1c79356b 183 sorwakeup(head);
91447636 184 wakeup_one((caddr_t)&head->so_timeo);
ff6e181a 185 if (head->so_proto->pr_getlock != NULL) {
91447636 186 socket_unlock(head, 1);
ff6e181a
A
187 socket_lock(so, 0);
188 }
1c79356b 189 } else {
91447636 190 postevent(so, 0, EV_WCONN);
1c79356b
A
191 wakeup((caddr_t)&so->so_timeo);
192 sorwakeup(so);
193 sowwakeup(so);
194 }
195}
196
197void
2d21ac55 198soisdisconnecting(struct socket *so)
9bccf70c 199{
1c79356b
A
200 so->so_state &= ~SS_ISCONNECTING;
201 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
91447636 202 sflt_notify(so, sock_evt_disconnecting, NULL);
1c79356b
A
203 wakeup((caddr_t)&so->so_timeo);
204 sowwakeup(so);
205 sorwakeup(so);
206}
207
208void
2d21ac55 209soisdisconnected(struct socket *so)
9bccf70c 210{
1c79356b 211 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
9bccf70c 212 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
91447636 213 sflt_notify(so, sock_evt_disconnected, NULL);
1c79356b
A
214 wakeup((caddr_t)&so->so_timeo);
215 sowwakeup(so);
216 sorwakeup(so);
217}
218
1c79356b
A
219/*
220 * When an attempt at a new connection is noted on a socket
221 * which accepts connections, sonewconn is called. If the
222 * connection is possible (subject to space constraints, etc.)
223 * then we allocate a new structure, propoerly linked into the
224 * data structure of the original socket, and return this.
225 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
226 */
91447636 227static struct socket *
2d21ac55 228sonewconn_internal(struct socket *head, int connstatus)
9bccf70c 229{
2d21ac55
A
230 int so_qlen, error = 0;
231 struct socket *so;
91447636
A
232 lck_mtx_t *mutex_held;
233
2d21ac55 234 if (head->so_proto->pr_getlock != NULL)
91447636 235 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
2d21ac55 236 else
91447636
A
237 mutex_held = head->so_proto->pr_domain->dom_mtx;
238 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
1c79356b 239
2d21ac55
A
240 if (!soqlencomp) {
241 /*
242 * This is the default case; so_qlen represents the
243 * sum of both incomplete and completed queues.
244 */
245 so_qlen = head->so_qlen;
246 } else {
247 /*
248 * When kern.ipc.soqlencomp is set to 1, so_qlen
249 * represents only the completed queue. Since we
250 * cannot let the incomplete queue goes unbounded
251 * (in case of SYN flood), we cap the incomplete
252 * queue length to at most somaxconn, and use that
253 * as so_qlen so that we fail immediately below.
254 */
255 so_qlen = head->so_qlen - head->so_incqlen;
256 if (head->so_incqlen > somaxconn)
257 so_qlen = somaxconn;
258 }
259
260 if (so_qlen >=
261 (soqlimitcompat ? head->so_qlimit : (3 * head->so_qlimit / 2)))
1c79356b 262 return ((struct socket *)0);
2d21ac55
A
263 so = soalloc(M_NOWAIT, head->so_proto->pr_domain->dom_family,
264 head->so_type);
1c79356b
A
265 if (so == NULL)
266 return ((struct socket *)0);
9bccf70c
A
267 /* check if head was closed during the soalloc */
268 if (head->so_proto == NULL) {
2d21ac55
A
269 sodealloc(so);
270 return ((struct socket *)0);
1c79356b
A
271 }
272
273 so->so_head = head;
274 so->so_type = head->so_type;
275 so->so_options = head->so_options &~ SO_ACCEPTCONN;
276 so->so_linger = head->so_linger;
277 so->so_state = head->so_state | SS_NOFDREF;
278 so->so_proto = head->so_proto;
279 so->so_timeo = head->so_timeo;
280 so->so_pgid = head->so_pgid;
281 so->so_uid = head->so_uid;
2d21ac55 282 so->so_flags = head->so_flags & (SOF_REUSESHAREUID|SOF_NOTIFYCONFLICT); /* inherit SO_REUSESHAREUID and SO_NOTIFYCONFLICT ocket options */
91447636 283 so->so_usecount = 1;
0c530ab8
A
284 so->next_lock_lr = 0;
285 so->next_unlock_lr = 0;
1c79356b 286
13fec989
A
287#ifdef __APPLE__
288 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
289 so->so_rcv.sb_so = so->so_snd.sb_so = so;
290 TAILQ_INIT(&so->so_evlist);
291#endif
292
2d21ac55
A
293#if CONFIG_MACF_SOCKET
294 mac_socket_label_associate_accept(head, so);
295#endif
296
91447636
A
297 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
298 sflt_termsock(so);
9bccf70c
A
299 sodealloc(so);
300 return ((struct socket *)0);
301 }
302
91447636 303 /*
2d21ac55
A
304 * Must be done with head unlocked to avoid deadlock
305 * for protocol with per socket mutexes.
91447636 306 */
37839358
A
307 if (head->so_proto->pr_unlock)
308 socket_unlock(head, 0);
2d21ac55
A
309 if (((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL) != 0) ||
310 error) {
91447636 311 sflt_termsock(so);
1c79356b 312 sodealloc(so);
37839358
A
313 if (head->so_proto->pr_unlock)
314 socket_lock(head, 0);
1c79356b
A
315 return ((struct socket *)0);
316 }
37839358
A
317 if (head->so_proto->pr_unlock)
318 socket_lock(head, 0);
9bccf70c 319#ifdef __APPLE__
1c79356b 320 so->so_proto->pr_domain->dom_refs++;
9bccf70c 321#endif
1c79356b
A
322
323 if (connstatus) {
324 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
325 so->so_state |= SS_COMP;
326 } else {
327 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
328 so->so_state |= SS_INCOMP;
329 head->so_incqlen++;
330 }
331 head->so_qlen++;
91447636 332
13fec989 333#ifdef __APPLE__
0c530ab8
A
334 /* Attach socket filters for this protocol */
335 sflt_initsock(so);
9bccf70c 336#endif
2d21ac55 337
91447636
A
338 if (connstatus) {
339 so->so_state |= connstatus;
340 sorwakeup(head);
341 wakeup((caddr_t)&head->so_timeo);
342 }
1c79356b
A
343 return (so);
344}
345
91447636
A
346
347struct socket *
2d21ac55 348sonewconn(struct socket *head, int connstatus, const struct sockaddr *from)
91447636
A
349{
350 int error = 0;
2d21ac55
A
351 struct socket_filter_entry *filter;
352 int filtered = 0;
353
91447636 354 for (filter = head->so_filt; filter && (error == 0);
2d21ac55 355 filter = filter->sfe_next_onsocket) {
91447636
A
356 if (filter->sfe_filter->sf_filter.sf_connect_in) {
357 if (filtered == 0) {
358 filtered = 1;
359 sflt_use(head);
360 socket_unlock(head, 0);
361 }
2d21ac55
A
362 error = filter->sfe_filter->sf_filter.
363 sf_connect_in(filter->sfe_cookie, head, from);
91447636
A
364 }
365 }
366 if (filtered != 0) {
367 socket_lock(head, 0);
368 sflt_unuse(head);
369 }
2d21ac55 370
91447636 371 if (error) {
2d21ac55 372 return (NULL);
91447636 373 }
2d21ac55
A
374
375 return (sonewconn_internal(head, connstatus));
91447636
A
376}
377
1c79356b
A
378/*
379 * Socantsendmore indicates that no more data will be sent on the
380 * socket; it would normally be applied to a socket when the user
381 * informs the system that no more data is to be sent, by the protocol
382 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
383 * will be received, and will normally be applied to the socket by a
384 * protocol when it detects that the peer will send no more data.
385 * Data queued for reading in the socket may yet be read.
386 */
387
388void
2d21ac55 389socantsendmore(struct socket *so)
9bccf70c 390{
1c79356b 391 so->so_state |= SS_CANTSENDMORE;
91447636 392 sflt_notify(so, sock_evt_cantsendmore, NULL);
1c79356b
A
393 sowwakeup(so);
394}
395
396void
2d21ac55 397socantrcvmore(struct socket *so)
9bccf70c 398{
1c79356b 399 so->so_state |= SS_CANTRCVMORE;
91447636 400 sflt_notify(so, sock_evt_cantrecvmore, NULL);
1c79356b
A
401 sorwakeup(so);
402}
403
404/*
405 * Wait for data to arrive at/drain from a socket buffer.
2d21ac55
A
406 *
407 * Returns: 0 Success
408 * EBADF
409 * msleep:EINTR
1c79356b
A
410 */
411int
2d21ac55 412sbwait(struct sockbuf *sb)
1c79356b 413{
0c530ab8 414 int error = 0, lr_saved;
91447636
A
415 struct socket *so = sb->sb_so;
416 lck_mtx_t *mutex_held;
417 struct timespec ts;
418
0c530ab8 419 lr_saved = (unsigned int) __builtin_return_address(0);
2d21ac55
A
420
421 if (so->so_proto->pr_getlock != NULL)
91447636 422 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2d21ac55 423 else
91447636 424 mutex_held = so->so_proto->pr_domain->dom_mtx;
1c79356b
A
425
426 sb->sb_flags |= SB_WAIT;
91447636
A
427
428 if (so->so_usecount < 1)
2d21ac55 429 panic("sbwait: so=%p refcount=%d\n", so, so->so_usecount);
91447636
A
430 ts.tv_sec = sb->sb_timeo.tv_sec;
431 ts.tv_nsec = sb->sb_timeo.tv_usec * 1000;
432 error = msleep((caddr_t)&sb->sb_cc, mutex_held,
2d21ac55 433 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait", &ts);
91447636
A
434
435 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
436
437 if (so->so_usecount < 1)
2d21ac55 438 panic("sbwait: so=%p refcount=%d\n", so, so->so_usecount);
91447636
A
439
440 if ((so->so_state & SS_DRAINING)) {
441 error = EBADF;
442 }
443
444 return (error);
1c79356b
A
445}
446
447/*
448 * Lock a sockbuf already known to be locked;
449 * return any error returned from sleep (EINTR).
2d21ac55
A
450 *
451 * Returns: 0 Success
452 * EINTR
1c79356b
A
453 */
454int
2d21ac55 455sb_lock(struct sockbuf *sb)
1c79356b 456{
91447636 457 struct socket *so = sb->sb_so;
2d21ac55 458 lck_mtx_t *mutex_held;
0c530ab8 459 int error = 0;
2d21ac55 460
91447636 461 if (so == NULL)
2d21ac55 462 panic("sb_lock: null so back pointer sb=%p\n", sb);
1c79356b
A
463
464 while (sb->sb_flags & SB_LOCK) {
465 sb->sb_flags |= SB_WANT;
2d21ac55 466 if (so->so_proto->pr_getlock != NULL)
91447636
A
467 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
468 else
469 mutex_held = so->so_proto->pr_domain->dom_mtx;
470 if (so->so_usecount < 1)
2d21ac55
A
471 panic("sb_lock: so=%p refcount=%d\n", so,
472 so->so_usecount);
0c530ab8 473
91447636 474 error = msleep((caddr_t)&sb->sb_flags, mutex_held,
2d21ac55
A
475 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH,
476 "sb_lock", 0);
91447636 477 if (so->so_usecount < 1)
2d21ac55
A
478 panic("sb_lock: 2 so=%p refcount=%d\n", so,
479 so->so_usecount);
480 if (error)
1c79356b
A
481 return (error);
482 }
483 sb->sb_flags |= SB_LOCK;
484 return (0);
485}
486
487/*
488 * Wakeup processes waiting on a socket buffer.
489 * Do asynchronous notification via SIGIO
490 * if the socket has the SS_ASYNC flag set.
491 */
492void
2d21ac55 493sowakeup(struct socket *so, struct sockbuf *sb)
1c79356b 494{
0b4e3aa0 495 sb->sb_flags &= ~SB_SEL;
1c79356b 496 selwakeup(&sb->sb_sel);
1c79356b
A
497 if (sb->sb_flags & SB_WAIT) {
498 sb->sb_flags &= ~SB_WAIT;
499 wakeup((caddr_t)&sb->sb_cc);
500 }
501 if (so->so_state & SS_ASYNC) {
502 if (so->so_pgid < 0)
503 gsignal(-so->so_pgid, SIGIO);
2d21ac55
A
504 else if (so->so_pgid > 0)
505 proc_signal(so->so_pgid, SIGIO);
1c79356b 506 }
91447636
A
507 if (sb->sb_flags & SB_KNOTE) {
508 KNOTE(&sb->sb_sel.si_note, SO_FILT_HINT_LOCKED);
509 }
510 if (sb->sb_flags & SB_UPCALL) {
2d21ac55
A
511 void (*so_upcall)(struct socket *, caddr_t, int);
512 caddr_t so_upcallarg;
513
514 so_upcall = so->so_upcall;
515 so_upcallarg = so->so_upcallarg;
516 /* Let close know that we're about to do an upcall */
517 so->so_flags |= SOF_UPCALLINUSE;
518
91447636 519 socket_unlock(so, 0);
2d21ac55 520 (*so_upcall)(so, so_upcallarg, M_DONTWAIT);
91447636 521 socket_lock(so, 0);
2d21ac55
A
522
523 so->so_flags &= ~SOF_UPCALLINUSE;
524 /* Tell close that it's safe to proceed */
525 if (so->so_flags & SOF_CLOSEWAIT)
526 wakeup((caddr_t)&so->so_upcall);
91447636 527 }
1c79356b
A
528}
529
530/*
531 * Socket buffer (struct sockbuf) utility routines.
532 *
533 * Each socket contains two socket buffers: one for sending data and
534 * one for receiving data. Each buffer contains a queue of mbufs,
535 * information about the number of mbufs and amount of data in the
536 * queue, and other fields allowing select() statements and notification
537 * on data availability to be implemented.
538 *
539 * Data stored in a socket buffer is maintained as a list of records.
540 * Each record is a list of mbufs chained together with the m_next
541 * field. Records are chained together with the m_nextpkt field. The upper
542 * level routine soreceive() expects the following conventions to be
543 * observed when placing information in the receive buffer:
544 *
545 * 1. If the protocol requires each message be preceded by the sender's
546 * name, then a record containing that name must be present before
547 * any associated data (mbuf's must be of type MT_SONAME).
548 * 2. If the protocol supports the exchange of ``access rights'' (really
549 * just additional data associated with the message), and there are
550 * ``rights'' to be received, then a record containing this data
551 * should be present (mbuf's must be of type MT_RIGHTS).
552 * 3. If a name or rights record exists, then it must be followed by
553 * a data record, perhaps of zero length.
554 *
555 * Before using a new socket structure it is first necessary to reserve
556 * buffer space to the socket, by calling sbreserve(). This should commit
557 * some of the available buffer space in the system buffer pool for the
558 * socket (currently, it does nothing but enforce limits). The space
559 * should be released by calling sbrelease() when the socket is destroyed.
560 */
561
2d21ac55
A
562/*
563 * Returns: 0 Success
564 * ENOBUFS
565 */
1c79356b 566int
2d21ac55 567soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
1c79356b 568{
1c79356b
A
569
570 if (sbreserve(&so->so_snd, sndcc) == 0)
571 goto bad;
572 if (sbreserve(&so->so_rcv, rcvcc) == 0)
573 goto bad2;
574 if (so->so_rcv.sb_lowat == 0)
575 so->so_rcv.sb_lowat = 1;
576 if (so->so_snd.sb_lowat == 0)
577 so->so_snd.sb_lowat = MCLBYTES;
578 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
579 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
580 return (0);
581bad2:
9bccf70c 582#ifdef __APPLE__
0b4e3aa0 583 selthreadclear(&so->so_snd.sb_sel);
9bccf70c 584#endif
1c79356b
A
585 sbrelease(&so->so_snd);
586bad:
587 return (ENOBUFS);
588}
589
590/*
591 * Allot mbufs to a sockbuf.
592 * Attempt to scale mbmax so that mbcnt doesn't become limiting
593 * if buffering efficiency is near the normal case.
594 */
595int
2d21ac55 596sbreserve(struct sockbuf *sb, u_long cc)
1c79356b
A
597{
598 if ((u_quad_t)cc > (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES))
599 return (0);
600 sb->sb_hiwat = cc;
601 sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
602 if (sb->sb_lowat > sb->sb_hiwat)
603 sb->sb_lowat = sb->sb_hiwat;
604 return (1);
605}
606
607/*
608 * Free mbufs held by a socket, and reserved mbuf space.
609 */
2d21ac55 610/* WARNING needs to do selthreadclear() before calling this */
1c79356b 611void
2d21ac55 612sbrelease(struct sockbuf *sb)
1c79356b 613{
1c79356b 614 sbflush(sb);
9bccf70c
A
615 sb->sb_hiwat = 0;
616 sb->sb_mbmax = 0;
1c79356b
A
617}
618
619/*
620 * Routines to add and remove
621 * data from an mbuf queue.
622 *
623 * The routines sbappend() or sbappendrecord() are normally called to
624 * append new mbufs to a socket buffer, after checking that adequate
625 * space is available, comparing the function sbspace() with the amount
626 * of data to be added. sbappendrecord() differs from sbappend() in
627 * that data supplied is treated as the beginning of a new record.
628 * To place a sender's address, optional access rights, and data in a
629 * socket receive buffer, sbappendaddr() should be used. To place
630 * access rights and data in a socket receive buffer, sbappendrights()
631 * should be used. In either case, the new data begins a new record.
632 * Note that unlike sbappend() and sbappendrecord(), these routines check
633 * for the caller that there will be enough space to store the data.
634 * Each fails if there is not enough space, or if it cannot find mbufs
635 * to store additional information in.
636 *
637 * Reliable protocols may use the socket send buffer to hold data
638 * awaiting acknowledgement. Data is normally copied from a socket
639 * send buffer in a protocol with m_copy for output to a peer,
640 * and then removing the data from the socket buffer with sbdrop()
641 * or sbdroprecord() when the data is acknowledged by the peer.
642 */
643
644/*
645 * Append mbuf chain m to the last record in the
646 * socket buffer sb. The additional space associated
647 * the mbuf chain is recorded in sb. Empty mbufs are
648 * discarded and mbufs are compacted where possible.
649 */
91447636 650int
2d21ac55 651sbappend(struct sockbuf *sb, struct mbuf *m)
9bccf70c 652{
2d21ac55 653 struct socket *so = sb->sb_so;
1c79356b 654
2d21ac55
A
655 if (m == NULL || (sb->sb_flags & SB_DROP)) {
656 if (m != NULL)
657 m_freem(m);
658 return (0);
659 }
fa4905b1 660
2d21ac55 661 SBLASTRECORDCHK(sb, "sbappend 1");
fa4905b1 662
2d21ac55
A
663 if (sb->sb_lastrecord != NULL && (sb->sb_mbtail->m_flags & M_EOR))
664 return (sbappendrecord(sb, m));
665
666 if (sb->sb_flags & SB_RECV) {
667 int error = sflt_data_in(so, NULL, &m, NULL, 0, NULL);
668 SBLASTRECORDCHK(sb, "sbappend 2");
669 if (error != 0) {
670 if (error != EJUSTRETURN)
671 m_freem(m);
672 return (0);
91447636 673 }
91447636
A
674 }
675
2d21ac55
A
676 /* If this is the first record, it's also the last record */
677 if (sb->sb_lastrecord == NULL)
678 sb->sb_lastrecord = m;
fa4905b1 679
2d21ac55
A
680 sbcompress(sb, m, sb->sb_mbtail);
681 SBLASTRECORDCHK(sb, "sbappend 3");
682 return (1);
683}
684
685/*
686 * Similar to sbappend, except that this is optimized for stream sockets.
687 */
688int
689sbappendstream(struct sockbuf *sb, struct mbuf *m)
690{
691 struct socket *so = sb->sb_so;
692
693 if (m->m_nextpkt != NULL || (sb->sb_mb != sb->sb_lastrecord))
694 panic("sbappendstream: nexpkt %p || mb %p != lastrecord %p\n",
695 m->m_nextpkt, sb->sb_mb, sb->sb_lastrecord);
696
697 SBLASTMBUFCHK(sb, __func__);
698
699 if (m == NULL || (sb->sb_flags & SB_DROP)) {
700 if (m != NULL)
701 m_freem(m);
702 return (0);
703 }
704
705 if (sb->sb_flags & SB_RECV) {
706 int error = sflt_data_in(so, NULL, &m, NULL, 0, NULL);
707 SBLASTRECORDCHK(sb, "sbappendstream 1");
708 if (error != 0) {
709 if (error != EJUSTRETURN)
710 m_freem(m);
711 return (0);
712 }
713 }
714
715 sbcompress(sb, m, sb->sb_mbtail);
716 sb->sb_lastrecord = sb->sb_mb;
717 SBLASTRECORDCHK(sb, "sbappendstream 2");
718 return (1);
1c79356b
A
719}
720
721#ifdef SOCKBUF_DEBUG
722void
2d21ac55 723sbcheck(struct sockbuf *sb)
1c79356b 724{
2d21ac55
A
725 struct mbuf *m;
726 struct mbuf *n = 0;
727 u_long len = 0, mbcnt = 0;
91447636
A
728 lck_mtx_t *mutex_held;
729
2d21ac55 730 if (sb->sb_so->so_proto->pr_getlock != NULL)
91447636 731 mutex_held = (*sb->sb_so->so_proto->pr_getlock)(sb->sb_so, 0);
2d21ac55 732 else
91447636
A
733 mutex_held = sb->sb_so->so_proto->pr_domain->dom_mtx;
734
735 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
736
737 if (sbchecking == 0)
738 return;
1c79356b
A
739
740 for (m = sb->sb_mb; m; m = n) {
2d21ac55
A
741 n = m->m_nextpkt;
742 for (; m; m = m->m_next) {
743 len += m->m_len;
744 mbcnt += MSIZE;
745 /* XXX pretty sure this is bogus */
746 if (m->m_flags & M_EXT)
747 mbcnt += m->m_ext.ext_size;
748 }
749 }
750 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
751 panic("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
752 mbcnt, sb->sb_mbcnt);
1c79356b
A
753 }
754}
755#endif
756
2d21ac55
A
757void
758sblastrecordchk(struct sockbuf *sb, const char *where)
759{
760 struct mbuf *m = sb->sb_mb;
761
762 while (m && m->m_nextpkt)
763 m = m->m_nextpkt;
764
765 if (m != sb->sb_lastrecord) {
766 printf("sblastrecordchk: mb %p lastrecord %p last %p\n",
767 sb->sb_mb, sb->sb_lastrecord, m);
768 printf("packet chain:\n");
769 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
770 printf("\t%p\n", m);
771 panic("sblastrecordchk from %s", where);
772 }
773}
774
775void
776sblastmbufchk(struct sockbuf *sb, const char *where)
777{
778 struct mbuf *m = sb->sb_mb;
779 struct mbuf *n;
780
781 while (m && m->m_nextpkt)
782 m = m->m_nextpkt;
783
784 while (m && m->m_next)
785 m = m->m_next;
786
787 if (m != sb->sb_mbtail) {
788 printf("sblastmbufchk: mb %p mbtail %p last %p\n",
789 sb->sb_mb, sb->sb_mbtail, m);
790 printf("packet tree:\n");
791 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
792 printf("\t");
793 for (n = m; n != NULL; n = n->m_next)
794 printf("%p ", n);
795 printf("\n");
796 }
797 panic("sblastmbufchk from %s", where);
798 }
799}
800
1c79356b 801/*
2d21ac55 802 * Similar to sbappend, except the mbuf chain begins a new record.
1c79356b 803 */
91447636 804int
2d21ac55 805sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
1c79356b 806{
2d21ac55
A
807 struct mbuf *m;
808 int space = 0;
9bccf70c 809
2d21ac55
A
810 if (m0 == NULL || (sb->sb_flags & SB_DROP)) {
811 if (m0 != NULL)
812 m_freem(m0);
813 return (0);
814 }
815
816 for (m = m0; m != NULL; m = m->m_next)
817 space += m->m_len;
818
819 if (space > sbspace(sb) && !(sb->sb_flags & SB_UNIX)) {
820 m_freem(m0);
821 return (0);
822 }
823
824 if (sb->sb_flags & SB_RECV) {
825 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL,
826 sock_data_filt_flag_record, NULL);
91447636 827 if (error != 0) {
2d21ac55 828 SBLASTRECORDCHK(sb, "sbappendrecord 1");
91447636
A
829 if (error != EJUSTRETURN)
830 m_freem(m0);
2d21ac55 831 return (0);
1c79356b 832 }
1c79356b 833 }
2d21ac55 834
1c79356b 835 /*
1c79356b
A
836 * Note this permits zero length records.
837 */
838 sballoc(sb, m0);
2d21ac55
A
839 SBLASTRECORDCHK(sb, "sbappendrecord 2");
840 if (sb->sb_lastrecord != NULL) {
841 sb->sb_lastrecord->m_nextpkt = m0;
842 } else {
1c79356b 843 sb->sb_mb = m0;
2d21ac55
A
844 }
845 sb->sb_lastrecord = m0;
4a3eedf9 846 sb->sb_mbtail = m0;
2d21ac55 847
1c79356b
A
848 m = m0->m_next;
849 m0->m_next = 0;
850 if (m && (m0->m_flags & M_EOR)) {
851 m0->m_flags &= ~M_EOR;
852 m->m_flags |= M_EOR;
853 }
2d21ac55
A
854 sbcompress(sb, m, m0);
855 SBLASTRECORDCHK(sb, "sbappendrecord 3");
856 return (1);
1c79356b
A
857}
858
859/*
860 * As above except that OOB data
861 * is inserted at the beginning of the sockbuf,
862 * but after any other OOB data.
863 */
91447636 864int
2d21ac55 865sbinsertoob(struct sockbuf *sb, struct mbuf *m0)
1c79356b 866{
91447636
A
867 struct mbuf *m;
868 struct mbuf **mp;
1c79356b
A
869
870 if (m0 == 0)
2d21ac55
A
871 return (0);
872
873 SBLASTRECORDCHK(sb, "sbinsertoob 1");
874
91447636
A
875 if ((sb->sb_flags & SB_RECV) != 0) {
876 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL,
2d21ac55
A
877 sock_data_filt_flag_oob, NULL);
878
879 SBLASTRECORDCHK(sb, "sbinsertoob 2");
91447636
A
880 if (error) {
881 if (error != EJUSTRETURN) {
882 m_freem(m0);
883 }
2d21ac55 884 return (0);
1c79356b 885 }
1c79356b 886 }
2d21ac55
A
887
888 for (mp = &sb->sb_mb; *mp; mp = &((*mp)->m_nextpkt)) {
889 m = *mp;
890again:
1c79356b
A
891 switch (m->m_type) {
892
893 case MT_OOBDATA:
894 continue; /* WANT next train */
895
896 case MT_CONTROL:
897 m = m->m_next;
898 if (m)
899 goto again; /* inspect THIS train further */
900 }
901 break;
902 }
903 /*
904 * Put the first mbuf on the queue.
905 * Note this permits zero length records.
906 */
907 sballoc(sb, m0);
908 m0->m_nextpkt = *mp;
2d21ac55
A
909 if (*mp == NULL) {
910 /* m0 is actually the new tail */
911 sb->sb_lastrecord = m0;
912 }
1c79356b
A
913 *mp = m0;
914 m = m0->m_next;
915 m0->m_next = 0;
916 if (m && (m0->m_flags & M_EOR)) {
917 m0->m_flags &= ~M_EOR;
918 m->m_flags |= M_EOR;
919 }
2d21ac55
A
920 sbcompress(sb, m, m0);
921 SBLASTRECORDCHK(sb, "sbinsertoob 3");
922 return (1);
1c79356b
A
923}
924
925/*
926 * Append address and data, and optionally, control (ancillary) data
927 * to the receive queue of a socket. If present,
928 * m0 must include a packet header with total length.
929 * Returns 0 if no space in sockbuf or insufficient mbufs.
2d21ac55
A
930 *
931 * Returns: 0 No space/out of mbufs
932 * 1 Success
1c79356b 933 */
91447636 934static int
2d21ac55
A
935sbappendaddr_internal(struct sockbuf *sb, struct sockaddr *asa,
936 struct mbuf *m0, struct mbuf *control)
1c79356b 937{
2d21ac55 938 struct mbuf *m, *n, *nlast;
1c79356b 939 int space = asa->sa_len;
1c79356b
A
940
941 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
942 panic("sbappendaddr");
943
1c79356b
A
944 if (m0)
945 space += m0->m_pkthdr.len;
946 for (n = control; n; n = n->m_next) {
947 space += n->m_len;
948 if (n->m_next == 0) /* keep pointer to last control buf */
949 break;
950 }
951 if (space > sbspace(sb))
952 return (0);
953 if (asa->sa_len > MLEN)
954 return (0);
955 MGET(m, M_DONTWAIT, MT_SONAME);
956 if (m == 0)
957 return (0);
958 m->m_len = asa->sa_len;
959 bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
960 if (n)
961 n->m_next = m0; /* concatenate data to control */
962 else
963 control = m0;
964 m->m_next = control;
2d21ac55
A
965
966 SBLASTRECORDCHK(sb, "sbappendadddr 1");
967
968 for (n = m; n->m_next != NULL; n = n->m_next)
1c79356b 969 sballoc(sb, n);
2d21ac55
A
970 sballoc(sb, n);
971 nlast = n;
972
973 if (sb->sb_lastrecord != NULL) {
974 sb->sb_lastrecord->m_nextpkt = m;
975 } else {
1c79356b 976 sb->sb_mb = m;
2d21ac55
A
977 }
978 sb->sb_lastrecord = m;
979 sb->sb_mbtail = nlast;
980
981 SBLASTMBUFCHK(sb, __func__);
982 SBLASTRECORDCHK(sb, "sbappendadddr 2");
983
984 postevent(0, sb, EV_RWBYTES);
1c79356b
A
985 return (1);
986}
987
2d21ac55
A
988/*
989 * Returns: 0 Error: No space/out of mbufs/etc.
990 * 1 Success
991 *
992 * Imputed: (*error_out) errno for error
993 * ENOBUFS
994 * sflt_data_in:??? [whatever a filter author chooses]
995 */
1c79356b 996int
2d21ac55
A
997sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0,
998 struct mbuf *control, int *error_out)
91447636
A
999{
1000 int result = 0;
2d21ac55
A
1001 boolean_t sb_unix = (sb->sb_flags & SB_UNIX);
1002
1003 if (error_out)
1004 *error_out = 0;
1005
91447636
A
1006 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
1007 panic("sbappendaddrorfree");
2d21ac55
A
1008
1009 if (sb->sb_flags & SB_DROP) {
1010 if (m0 != NULL)
1011 m_freem(m0);
1012 if (control != NULL && !sb_unix)
1013 m_freem(control);
1014 if (error_out != NULL)
1015 *error_out = EINVAL;
1016 return (0);
1017 }
1018
91447636
A
1019 /* Call socket data in filters */
1020 if ((sb->sb_flags & SB_RECV) != 0) {
1021 int error;
cc9f6e38 1022 error = sflt_data_in(sb->sb_so, asa, &m0, &control, 0, NULL);
2d21ac55 1023 SBLASTRECORDCHK(sb, __func__);
91447636
A
1024 if (error) {
1025 if (error != EJUSTRETURN) {
2d21ac55
A
1026 if (m0)
1027 m_freem(m0);
1028 if (control != NULL && !sb_unix)
1029 m_freem(control);
1030 if (error_out)
1031 *error_out = error;
91447636 1032 }
2d21ac55 1033 return (0);
91447636
A
1034 }
1035 }
2d21ac55 1036
91447636
A
1037 result = sbappendaddr_internal(sb, asa, m0, control);
1038 if (result == 0) {
2d21ac55
A
1039 if (m0)
1040 m_freem(m0);
1041 if (control != NULL && !sb_unix)
1042 m_freem(control);
1043 if (error_out)
1044 *error_out = ENOBUFS;
91447636 1045 }
2d21ac55
A
1046
1047 return (result);
91447636
A
1048}
1049
1050static int
2d21ac55
A
1051sbappendcontrol_internal(struct sockbuf *sb, struct mbuf *m0,
1052 struct mbuf *control)
1c79356b 1053{
2d21ac55 1054 struct mbuf *m, *mlast, *n;
1c79356b 1055 int space = 0;
1c79356b
A
1056
1057 if (control == 0)
1058 panic("sbappendcontrol");
1059
1c79356b
A
1060 for (m = control; ; m = m->m_next) {
1061 space += m->m_len;
1062 if (m->m_next == 0)
1063 break;
1064 }
1065 n = m; /* save pointer to last control buffer */
1066 for (m = m0; m; m = m->m_next)
1067 space += m->m_len;
2d21ac55 1068 if (space > sbspace(sb) && !(sb->sb_flags & SB_UNIX))
1c79356b
A
1069 return (0);
1070 n->m_next = m0; /* concatenate data to control */
2d21ac55
A
1071
1072 SBLASTRECORDCHK(sb, "sbappendcontrol 1");
1073
1074 for (m = control; m->m_next != NULL; m = m->m_next)
1c79356b 1075 sballoc(sb, m);
2d21ac55
A
1076 sballoc(sb, m);
1077 mlast = m;
1078
1079 if (sb->sb_lastrecord != NULL) {
1080 sb->sb_lastrecord->m_nextpkt = control;
1081 } else {
1c79356b 1082 sb->sb_mb = control;
2d21ac55
A
1083 }
1084 sb->sb_lastrecord = control;
1085 sb->sb_mbtail = mlast;
1086
1087 SBLASTMBUFCHK(sb, __func__);
1088 SBLASTRECORDCHK(sb, "sbappendcontrol 2");
1089
1090 postevent(0, sb, EV_RWBYTES);
1c79356b
A
1091 return (1);
1092}
1093
91447636 1094int
2d21ac55
A
1095sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control,
1096 int *error_out)
91447636
A
1097{
1098 int result = 0;
2d21ac55
A
1099 boolean_t sb_unix = (sb->sb_flags & SB_UNIX);
1100
1101 if (error_out)
1102 *error_out = 0;
1103
1104 if (sb->sb_flags & SB_DROP) {
1105 if (m0 != NULL)
1106 m_freem(m0);
1107 if (control != NULL && !sb_unix)
1108 m_freem(control);
1109 if (error_out != NULL)
1110 *error_out = EINVAL;
1111 return (0);
1112 }
1113
91447636
A
1114 if (sb->sb_flags & SB_RECV) {
1115 int error;
2d21ac55 1116
cc9f6e38 1117 error = sflt_data_in(sb->sb_so, NULL, &m0, &control, 0, NULL);
2d21ac55 1118 SBLASTRECORDCHK(sb, __func__);
91447636
A
1119 if (error) {
1120 if (error != EJUSTRETURN) {
2d21ac55
A
1121 if (m0)
1122 m_freem(m0);
1123 if (control != NULL && !sb_unix)
1124 m_freem(control);
1125 if (error_out)
1126 *error_out = error;
91447636 1127 }
2d21ac55 1128 return (0);
91447636
A
1129 }
1130 }
2d21ac55 1131
91447636
A
1132 result = sbappendcontrol_internal(sb, m0, control);
1133 if (result == 0) {
2d21ac55
A
1134 if (m0)
1135 m_freem(m0);
1136 if (control != NULL && !sb_unix)
1137 m_freem(control);
1138 if (error_out)
1139 *error_out = ENOBUFS;
91447636 1140 }
2d21ac55
A
1141
1142 return (result);
91447636
A
1143}
1144
1c79356b
A
1145/*
1146 * Compress mbuf chain m into the socket
1147 * buffer sb following mbuf n. If n
1148 * is null, the buffer is presumed empty.
1149 */
2d21ac55
A
1150static inline void
1151sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
1152{
1153 int eor = 0;
1154 struct mbuf *o;
1155
1156 if (m == NULL) {
1157 /* There is nothing to compress; just update the tail */
1158 for (; n->m_next != NULL; n = n->m_next)
1159 ;
1160 sb->sb_mbtail = n;
1161 goto done;
1162 }
1c79356b
A
1163
1164 while (m) {
1165 eor |= m->m_flags & M_EOR;
2d21ac55
A
1166 if (m->m_len == 0 && (eor == 0 ||
1167 (((o = m->m_next) || (o = n)) && o->m_type == m->m_type))) {
1168 if (sb->sb_lastrecord == m)
1169 sb->sb_lastrecord = m->m_next;
1c79356b
A
1170 m = m_free(m);
1171 continue;
1172 }
9bccf70c
A
1173 if (n && (n->m_flags & M_EOR) == 0 &&
1174#ifndef __APPLE__
1175 M_WRITABLE(n) &&
1176#endif
1177 m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
1178 m->m_len <= M_TRAILINGSPACE(n) &&
1c79356b
A
1179 n->m_type == m->m_type) {
1180 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
1181 (unsigned)m->m_len);
1182 n->m_len += m->m_len;
1183 sb->sb_cc += m->m_len;
2d21ac55
A
1184 if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
1185 m->m_type != MT_OOBDATA)
1186 /* XXX: Probably don't need.*/
1187 sb->sb_ctl += m->m_len;
1c79356b
A
1188 m = m_free(m);
1189 continue;
1190 }
1191 if (n)
1192 n->m_next = m;
1193 else
1194 sb->sb_mb = m;
2d21ac55 1195 sb->sb_mbtail = m;
1c79356b
A
1196 sballoc(sb, m);
1197 n = m;
1198 m->m_flags &= ~M_EOR;
1199 m = m->m_next;
1200 n->m_next = 0;
1201 }
1202 if (eor) {
1203 if (n)
1204 n->m_flags |= eor;
1205 else
1206 printf("semi-panic: sbcompress\n");
1207 }
2d21ac55
A
1208done:
1209 SBLASTMBUFCHK(sb, __func__);
1210 postevent(0, sb, EV_RWBYTES);
1211}
1212
1213void
1214sb_empty_assert(struct sockbuf *sb, const char *where)
1215{
1216 if (!(sb->sb_cc == 0 && sb->sb_mb == NULL && sb->sb_mbcnt == 0 &&
1217 sb->sb_mbtail == NULL && sb->sb_lastrecord == NULL)) {
1218 panic("%s: sb %p so %p cc %ld mbcnt %ld mb %p mbtail %p "
1219 "lastrecord %p\n", where, sb, sb->sb_so, sb->sb_cc,
1220 sb->sb_mbcnt, sb->sb_mb, sb->sb_mbtail, sb->sb_lastrecord);
1221 /* NOTREACHED */
1222 }
1c79356b
A
1223}
1224
1225/*
1226 * Free all mbufs in a sockbuf.
1227 * Check that all resources are reclaimed.
1228 */
1229void
2d21ac55 1230sbflush(struct sockbuf *sb)
1c79356b 1231{
91447636 1232 if (sb->sb_so == NULL)
2d21ac55
A
1233 panic("sbflush sb->sb_so already null sb=%p\n", sb);
1234 (void) sblock(sb, M_WAIT);
9bccf70c
A
1235 while (sb->sb_mbcnt) {
1236 /*
1237 * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
1238 * we would loop forever. Panic instead.
1239 */
1240 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
1241 break;
1c79356b 1242 sbdrop(sb, (int)sb->sb_cc);
9bccf70c 1243 }
2d21ac55 1244 sb_empty_assert(sb, __func__);
1c79356b 1245 postevent(0, sb, EV_RWBYTES);
91447636
A
1246 sbunlock(sb, 1); /* keep socket locked */
1247
1c79356b
A
1248}
1249
1250/*
1251 * Drop data from (the front of) a sockbuf.
9bccf70c
A
1252 * use m_freem_list to free the mbuf structures
1253 * under a single lock... this is done by pruning
1254 * the top of the tree from the body by keeping track
1255 * of where we get to in the tree and then zeroing the
1256 * two pertinent pointers m_nextpkt and m_next
1257 * the socket buffer is then updated to point at the new
1258 * top of the tree and the pruned area is released via
1259 * m_freem_list.
1c79356b
A
1260 */
1261void
2d21ac55 1262sbdrop(struct sockbuf *sb, int len)
1c79356b 1263{
2d21ac55 1264 struct mbuf *m, *free_list, *ml;
fa4905b1 1265 struct mbuf *next, *last;
1c79356b 1266
fa4905b1
A
1267 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_START), sb, len, 0, 0, 0);
1268
1c79356b 1269 next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
fa4905b1
A
1270 free_list = last = m;
1271 ml = (struct mbuf *)0;
1272
1c79356b
A
1273 while (len > 0) {
1274 if (m == 0) {
2d21ac55
A
1275 if (next == 0) {
1276 /*
1277 * temporarily replacing this panic with printf
1278 * because it occurs occasionally when closing
1279 * a socket when there is no harm in ignoring
1280 * it. This problem will be investigated
1281 * further.
1282 */
1283 /* panic("sbdrop"); */
1284 printf("sbdrop - count not zero\n");
1285 len = 0;
1286 /*
1287 * zero the counts. if we have no mbufs,
1288 * we have no data (PR-2986815)
1289 */
1290 sb->sb_cc = 0;
1291 sb->sb_mbcnt = 0;
1292 break;
1293 }
1294 m = last = next;
1295 next = m->m_nextpkt;
1296 continue;
1c79356b
A
1297 }
1298 if (m->m_len > len) {
1299 m->m_len -= len;
1300 m->m_data += len;
1301 sb->sb_cc -= len;
2d21ac55
A
1302 if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
1303 m->m_type != MT_OOBDATA)
1304 sb->sb_ctl -= len;
1c79356b
A
1305 break;
1306 }
1307 len -= m->m_len;
1308 sbfree(sb, m);
fa4905b1
A
1309
1310 ml = m;
1311 m = m->m_next;
1c79356b
A
1312 }
1313 while (m && m->m_len == 0) {
1314 sbfree(sb, m);
fa4905b1
A
1315
1316 ml = m;
1317 m = m->m_next;
1318 }
1319 if (ml) {
2d21ac55 1320 ml->m_next = (struct mbuf *)0;
fa4905b1 1321 last->m_nextpkt = (struct mbuf *)0;
2d21ac55 1322 m_freem_list(free_list);
1c79356b
A
1323 }
1324 if (m) {
1325 sb->sb_mb = m;
1326 m->m_nextpkt = next;
2d21ac55 1327 } else {
1c79356b 1328 sb->sb_mb = next;
2d21ac55
A
1329 }
1330
1331 /*
1332 * First part is an inline SB_EMPTY_FIXUP(). Second part
1333 * makes sure sb_lastrecord is up-to-date if we dropped
1334 * part of the last record.
1335 */
1336 m = sb->sb_mb;
1337 if (m == NULL) {
1338 sb->sb_mbtail = NULL;
1339 sb->sb_lastrecord = NULL;
1340 } else if (m->m_nextpkt == NULL) {
1341 sb->sb_lastrecord = m;
1342 }
fa4905b1 1343
1c79356b 1344 postevent(0, sb, EV_RWBYTES);
fa4905b1
A
1345
1346 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_END), sb, 0, 0, 0, 0);
1c79356b
A
1347}
1348
1349/*
1350 * Drop a record off the front of a sockbuf
1351 * and move the next record to the front.
1352 */
1353void
2d21ac55 1354sbdroprecord(struct sockbuf *sb)
1c79356b 1355{
2d21ac55 1356 struct mbuf *m, *mn;
1c79356b
A
1357
1358 m = sb->sb_mb;
1359 if (m) {
1360 sb->sb_mb = m->m_nextpkt;
1361 do {
1362 sbfree(sb, m);
1363 MFREE(m, mn);
9bccf70c
A
1364 m = mn;
1365 } while (m);
1c79356b 1366 }
2d21ac55 1367 SB_EMPTY_FIXUP(sb);
1c79356b
A
1368 postevent(0, sb, EV_RWBYTES);
1369}
1370
1371/*
1372 * Create a "control" mbuf containing the specified data
1373 * with the specified type for presentation on a socket buffer.
1374 */
1375struct mbuf *
2d21ac55 1376sbcreatecontrol(caddr_t p, int size, int type, int level)
1c79356b 1377{
2d21ac55 1378 struct cmsghdr *cp;
1c79356b
A
1379 struct mbuf *m;
1380
9bccf70c 1381 if (CMSG_SPACE((u_int)size) > MLEN)
2d21ac55 1382 return ((struct mbuf *)NULL);
1c79356b 1383 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
2d21ac55 1384 return ((struct mbuf *)NULL);
1c79356b
A
1385 cp = mtod(m, struct cmsghdr *);
1386 /* XXX check size? */
2d21ac55 1387 (void) memcpy(CMSG_DATA(cp), p, size);
9bccf70c
A
1388 m->m_len = CMSG_SPACE(size);
1389 cp->cmsg_len = CMSG_LEN(size);
1c79356b
A
1390 cp->cmsg_level = level;
1391 cp->cmsg_type = type;
1392 return (m);
1393}
1394
1395/*
1396 * Some routines that return EOPNOTSUPP for entry points that are not
1397 * supported by a protocol. Fill in as needed.
1398 */
1399int
2d21ac55 1400pru_abort_notsupp(__unused struct socket *so)
1c79356b 1401{
2d21ac55 1402 return (EOPNOTSUPP);
1c79356b
A
1403}
1404
1c79356b 1405int
2d21ac55 1406pru_accept_notsupp(__unused struct socket *so, __unused struct sockaddr **nam)
1c79356b 1407{
2d21ac55 1408 return (EOPNOTSUPP);
1c79356b
A
1409}
1410
1411int
2d21ac55
A
1412pru_attach_notsupp(__unused struct socket *so, __unused int proto,
1413 __unused struct proc *p)
1c79356b 1414{
2d21ac55 1415 return (EOPNOTSUPP);
1c79356b
A
1416}
1417
1418int
2d21ac55
A
1419pru_bind_notsupp(__unused struct socket *so, __unused struct sockaddr *nam,
1420 __unused struct proc *p)
1c79356b 1421{
2d21ac55 1422 return (EOPNOTSUPP);
1c79356b
A
1423}
1424
1425int
2d21ac55
A
1426pru_connect_notsupp(__unused struct socket *so, __unused struct sockaddr *nam,
1427 __unused struct proc *p)
1c79356b 1428{
2d21ac55 1429 return (EOPNOTSUPP);
1c79356b
A
1430}
1431
1432int
2d21ac55 1433pru_connect2_notsupp(__unused struct socket *so1, __unused struct socket *so2)
1c79356b 1434{
2d21ac55 1435 return (EOPNOTSUPP);
1c79356b
A
1436}
1437
1438int
2d21ac55
A
1439pru_control_notsupp(__unused struct socket *so, __unused u_long cmd,
1440 __unused caddr_t data, __unused struct ifnet *ifp, __unused struct proc *p)
1c79356b 1441{
2d21ac55 1442 return (EOPNOTSUPP);
1c79356b
A
1443}
1444
1445int
2d21ac55 1446pru_detach_notsupp(__unused struct socket *so)
1c79356b 1447{
2d21ac55 1448 return (EOPNOTSUPP);
1c79356b
A
1449}
1450
1451int
2d21ac55 1452pru_disconnect_notsupp(__unused struct socket *so)
1c79356b 1453{
2d21ac55 1454 return (EOPNOTSUPP);
1c79356b
A
1455}
1456
1457int
2d21ac55 1458pru_listen_notsupp(__unused struct socket *so, __unused struct proc *p)
1c79356b 1459{
2d21ac55 1460 return (EOPNOTSUPP);
1c79356b
A
1461}
1462
1463int
2d21ac55 1464pru_peeraddr_notsupp(__unused struct socket *so, __unused struct sockaddr **nam)
1c79356b 1465{
2d21ac55 1466 return (EOPNOTSUPP);
1c79356b
A
1467}
1468
1469int
2d21ac55 1470pru_rcvd_notsupp(__unused struct socket *so, __unused int flags)
1c79356b 1471{
2d21ac55 1472 return (EOPNOTSUPP);
1c79356b
A
1473}
1474
1475int
2d21ac55
A
1476pru_rcvoob_notsupp(__unused struct socket *so, __unused struct mbuf *m,
1477 __unused int flags)
1c79356b 1478{
2d21ac55 1479 return (EOPNOTSUPP);
1c79356b
A
1480}
1481
1482int
2d21ac55
A
1483pru_send_notsupp(__unused struct socket *so, __unused int flags,
1484 __unused struct mbuf *m, __unused struct sockaddr *addr,
1485 __unused struct mbuf *control, __unused struct proc *p)
1c79356b
A
1486
1487{
2d21ac55 1488 return (EOPNOTSUPP);
1c79356b
A
1489}
1490
1491
1492/*
1493 * This isn't really a ``null'' operation, but it's the default one
1494 * and doesn't do anything destructive.
1495 */
1496int
2d21ac55 1497pru_sense_null(struct socket *so, void *ub, int isstat64)
1c79356b 1498{
2d21ac55
A
1499 if (isstat64 != 0) {
1500 struct stat64 *sb64;
1c79356b 1501
2d21ac55
A
1502 sb64 = (struct stat64 *)ub;
1503 sb64->st_blksize = so->so_snd.sb_hiwat;
1504 } else {
1505 struct stat *sb;
1c79356b 1506
2d21ac55
A
1507 sb = (struct stat *)ub;
1508 sb->st_blksize = so->so_snd.sb_hiwat;
1509 }
1c79356b 1510
2d21ac55 1511 return (0);
1c79356b
A
1512}
1513
1c79356b
A
1514
1515int
2d21ac55
A
1516pru_sosend_notsupp(__unused struct socket *so, __unused struct sockaddr *addr,
1517 __unused struct uio *uio, __unused struct mbuf *top,
1518 __unused struct mbuf *control, __unused int flags)
1c79356b 1519
1c79356b 1520{
2d21ac55 1521 return (EOPNOTSUPP);
1c79356b
A
1522}
1523
1524int
2d21ac55
A
1525pru_soreceive_notsupp(__unused struct socket *so,
1526 __unused struct sockaddr **paddr,
1527 __unused struct uio *uio, __unused struct mbuf **mp0,
1528 __unused struct mbuf **controlp, __unused int *flagsp)
1c79356b 1529{
2d21ac55 1530 return (EOPNOTSUPP);
1c79356b
A
1531}
1532
2d21ac55
A
1533int
1534pru_shutdown_notsupp(__unused struct socket *so)
1c79356b 1535{
2d21ac55 1536 return (EOPNOTSUPP);
1c79356b
A
1537}
1538
2d21ac55
A
1539int
1540pru_sockaddr_notsupp(__unused struct socket *so, __unused struct sockaddr **nam)
1c79356b 1541{
2d21ac55 1542 return (EOPNOTSUPP);
1c79356b
A
1543}
1544
91447636
A
1545int
1546pru_sopoll_notsupp(__unused struct socket *so, __unused int events,
2d21ac55 1547 __unused kauth_cred_t cred, __unused void *wql)
1c79356b 1548{
2d21ac55 1549 return (EOPNOTSUPP);
1c79356b
A
1550}
1551
1552
9bccf70c
A
1553#ifdef __APPLE__
1554/*
1555 * The following are macros on BSD and functions on Darwin
1556 */
1c79356b 1557
0b4e3aa0
A
1558/*
1559 * Do we need to notify the other side when I/O is possible?
1560 */
1561
2d21ac55 1562int
0b4e3aa0
A
1563sb_notify(struct sockbuf *sb)
1564{
2d21ac55
A
1565 return ((sb->sb_flags &
1566 (SB_WAIT|SB_SEL|SB_ASYNC|SB_UPCALL|SB_KNOTE)) != 0);
0b4e3aa0
A
1567}
1568
1569/*
1570 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
1571 * This is problematical if the fields are unsigned, as the space might
1572 * still be negative (cc > hiwat or mbcnt > mbmax). Should detect
1573 * overflow and return 0. Should use "lmin" but it doesn't exist now.
1574 */
1575long
1576sbspace(struct sockbuf *sb)
1577{
2d21ac55
A
1578 return ((long)imin((int)(sb->sb_hiwat - sb->sb_cc),
1579 (int)(sb->sb_mbmax - sb->sb_mbcnt)));
0b4e3aa0
A
1580}
1581
1582/* do we have to send all at once on a socket? */
1583int
1584sosendallatonce(struct socket *so)
1585{
2d21ac55 1586 return (so->so_proto->pr_flags & PR_ATOMIC);
0b4e3aa0
A
1587}
1588
1589/* can we read something from so? */
1590int
1591soreadable(struct socket *so)
1592{
2d21ac55
A
1593 return (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
1594 (so->so_state & SS_CANTRCVMORE) ||
1595 so->so_comp.tqh_first || so->so_error);
0b4e3aa0
A
1596}
1597
1598/* can we write something to so? */
1599
1600int
1601sowriteable(struct socket *so)
1602{
2d21ac55
A
1603 return ((sbspace(&(so)->so_snd) >= (long)(so)->so_snd.sb_lowat &&
1604 ((so->so_state&SS_ISCONNECTED) ||
1605 (so->so_proto->pr_flags&PR_CONNREQUIRED) == 0)) ||
1606 (so->so_state & SS_CANTSENDMORE) ||
1607 so->so_error);
0b4e3aa0
A
1608}
1609
1610/* adjust counters in sb reflecting allocation of m */
1611
1612void
1613sballoc(struct sockbuf *sb, struct mbuf *m)
1614{
2d21ac55 1615 int cnt = 1;
0b4e3aa0 1616 sb->sb_cc += m->m_len;
2d21ac55
A
1617 if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
1618 m->m_type != MT_OOBDATA)
1619 sb->sb_ctl += m->m_len;
0b4e3aa0 1620 sb->sb_mbcnt += MSIZE;
2d21ac55
A
1621
1622 if (m->m_flags & M_EXT) {
0b4e3aa0 1623 sb->sb_mbcnt += m->m_ext.ext_size;
2d21ac55
A
1624 cnt += m->m_ext.ext_size / MSIZE ;
1625 }
1626 OSAddAtomic(cnt, (SInt32*)&total_mb_cnt);
0b4e3aa0
A
1627}
1628
1629/* adjust counters in sb reflecting freeing of m */
1630void
1631sbfree(struct sockbuf *sb, struct mbuf *m)
1632{
2d21ac55
A
1633 int cnt = -1;
1634 sb->sb_cc -= m->m_len;
1635 if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
1636 m->m_type != MT_OOBDATA)
1637 sb->sb_ctl -= m->m_len;
0b4e3aa0 1638 sb->sb_mbcnt -= MSIZE;
2d21ac55 1639 if (m->m_flags & M_EXT) {
0b4e3aa0 1640 sb->sb_mbcnt -= m->m_ext.ext_size;
2d21ac55
A
1641 cnt -= m->m_ext.ext_size / MSIZE ;
1642 }
1643 OSAddAtomic(cnt, (SInt32*)&total_mb_cnt);
0b4e3aa0
A
1644}
1645
1646/*
1647 * Set lock on sockbuf sb; sleep if lock is already held.
1648 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
1649 * Returns error without lock if sleep is interrupted.
2d21ac55
A
1650 *
1651 * Returns: 0 Success
1652 * EWOULDBLOCK
1653 * sb_lock:EINTR
0b4e3aa0
A
1654 */
1655int
1656sblock(struct sockbuf *sb, int wf)
1657{
6601e61a
A
1658 int error = 0;
1659
1660 if (sb->sb_flags & SB_LOCK)
1661 error = (wf == M_WAIT) ? sb_lock(sb) : EWOULDBLOCK;
1662 else
1663 sb->sb_flags |= SB_LOCK;
1664
1665 return (error);
0b4e3aa0
A
1666}
1667
1668/* release lock on sockbuf sb */
1669void
91447636 1670sbunlock(struct sockbuf *sb, int keeplocked)
0b4e3aa0 1671{
91447636 1672 struct socket *so = sb->sb_so;
0c530ab8 1673 int lr_saved;
91447636
A
1674 lck_mtx_t *mutex_held;
1675
0c530ab8 1676 lr_saved = (unsigned int) __builtin_return_address(0);
91447636 1677
2d21ac55 1678 sb->sb_flags &= ~SB_LOCK;
91447636 1679
2d21ac55
A
1680 if (sb->sb_flags & SB_WANT) {
1681 sb->sb_flags &= ~SB_WANT;
91447636 1682 if (so->so_usecount < 0)
2d21ac55
A
1683 panic("sbunlock: b4 wakeup so=%p ref=%d lr=%x "
1684 "sb_flags=%x\n", sb->sb_so, so->so_usecount,
1685 lr_saved, sb->sb_flags);
91447636 1686
2d21ac55
A
1687 wakeup((caddr_t)&(sb)->sb_flags);
1688 }
91447636 1689 if (keeplocked == 0) { /* unlock on exit */
0c530ab8
A
1690 if (so->so_proto->pr_getlock != NULL)
1691 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
1692 else
1693 mutex_held = so->so_proto->pr_domain->dom_mtx;
2d21ac55 1694
0c530ab8
A
1695 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
1696
91447636
A
1697 so->so_usecount--;
1698 if (so->so_usecount < 0)
2d21ac55
A
1699 panic("sbunlock: unlock on exit so=%p ref=%d lr=%x "
1700 "sb_flags=%x\n", so, so->so_usecount, lr_saved,
1701 sb->sb_flags);
1702 so->unlock_lr[so->next_unlock_lr] = (u_int32_t)lr_saved;
0c530ab8 1703 so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;
91447636
A
1704 lck_mtx_unlock(mutex_held);
1705 }
0b4e3aa0
A
1706}
1707
1708void
2d21ac55 1709sorwakeup(struct socket *so)
0b4e3aa0 1710{
2d21ac55
A
1711 if (sb_notify(&so->so_rcv))
1712 sowakeup(so, &so->so_rcv);
0b4e3aa0
A
1713}
1714
1715void
2d21ac55 1716sowwakeup(struct socket *so)
0b4e3aa0 1717{
2d21ac55
A
1718 if (sb_notify(&so->so_snd))
1719 sowakeup(so, &so->so_snd);
0b4e3aa0 1720}
2d21ac55 1721#endif /* __APPLE__ */
0b4e3aa0 1722
1c79356b
A
1723/*
1724 * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
1725 */
1726struct sockaddr *
2d21ac55 1727dup_sockaddr(struct sockaddr *sa, int canwait)
1c79356b
A
1728{
1729 struct sockaddr *sa2;
1730
2d21ac55
A
1731 MALLOC(sa2, struct sockaddr *, sa->sa_len, M_SONAME,
1732 canwait ? M_WAITOK : M_NOWAIT);
1c79356b
A
1733 if (sa2)
1734 bcopy(sa, sa2, sa->sa_len);
2d21ac55 1735 return (sa2);
1c79356b
A
1736}
1737
1738/*
1739 * Create an external-format (``xsocket'') structure using the information
1740 * in the kernel-format socket structure pointed to by so. This is done
1741 * to reduce the spew of irrelevant information over this interface,
1742 * to isolate user code from changes in the kernel structure, and
1743 * potentially to provide information-hiding if we decide that
1744 * some of this information should be hidden from users.
1745 */
1746void
1747sotoxsocket(struct socket *so, struct xsocket *xso)
1748{
2d21ac55 1749 xso->xso_len = sizeof (*xso);
1c79356b
A
1750 xso->xso_so = so;
1751 xso->so_type = so->so_type;
1752 xso->so_options = so->so_options;
1753 xso->so_linger = so->so_linger;
1754 xso->so_state = so->so_state;
1755 xso->so_pcb = so->so_pcb;
91447636
A
1756 if (so->so_proto) {
1757 xso->xso_protocol = so->so_proto->pr_protocol;
1758 xso->xso_family = so->so_proto->pr_domain->dom_family;
2d21ac55 1759 } else {
91447636 1760 xso->xso_protocol = xso->xso_family = 0;
2d21ac55 1761 }
1c79356b
A
1762 xso->so_qlen = so->so_qlen;
1763 xso->so_incqlen = so->so_incqlen;
1764 xso->so_qlimit = so->so_qlimit;
1765 xso->so_timeo = so->so_timeo;
1766 xso->so_error = so->so_error;
1767 xso->so_pgid = so->so_pgid;
1768 xso->so_oobmark = so->so_oobmark;
1769 sbtoxsockbuf(&so->so_snd, &xso->so_snd);
1770 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
1771 xso->so_uid = so->so_uid;
1772}
1773
1774/*
1775 * This does the same for sockbufs. Note that the xsockbuf structure,
1776 * since it is always embedded in a socket, does not include a self
1777 * pointer nor a length. We make this entry point public in case
1778 * some other mechanism needs it.
1779 */
1780void
1781sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
1782{
1783 xsb->sb_cc = sb->sb_cc;
1784 xsb->sb_hiwat = sb->sb_hiwat;
1785 xsb->sb_mbcnt = sb->sb_mbcnt;
1786 xsb->sb_mbmax = sb->sb_mbmax;
1787 xsb->sb_lowat = sb->sb_lowat;
1788 xsb->sb_flags = sb->sb_flags;
2d21ac55
A
1789 xsb->sb_timeo = (u_long)
1790 (sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick;
91447636
A
1791 if (xsb->sb_timeo == 0 && sb->sb_timeo.tv_usec != 0)
1792 xsb->sb_timeo = 1;
1c79356b
A
1793}
1794
1795/*
1796 * Here is the definition of some of the basic objects in the kern.ipc
1797 * branch of the MIB.
1798 */
2d21ac55 1799SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IPC");
1c79356b
A
1800
1801/* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
1802static int dummy;
1803SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
1804
2d21ac55 1805SYSCTL_INT(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLFLAG_RW,
9bccf70c 1806 &sb_max, 0, "Maximum socket buffer size");
2d21ac55 1807SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD,
9bccf70c 1808 &maxsockets, 0, "Maximum number of sockets avaliable");
1c79356b 1809SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
2d21ac55
A
1810 &sb_efficiency, 0, "");
1811SYSCTL_INT(_kern_ipc, OID_AUTO, sbspace_factor, CTLFLAG_RW,
1812 &sbspace_factor, 0, "Ratio of mbuf/cluster use for socket layers");
1813SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD,
1814 &nmbclusters, 0, "");
1815SYSCTL_INT(_kern_ipc, OID_AUTO, njcl, CTLFLAG_RD, &njcl, 0, "");
1816SYSCTL_INT(_kern_ipc, OID_AUTO, njclbytes, CTLFLAG_RD, &njclbytes, 0, "");
1817SYSCTL_INT(_kern_ipc, KIPC_SOQLIMITCOMPAT, soqlimitcompat, CTLFLAG_RW,
1818 &soqlimitcompat, 1, "Enable socket queue limit compatibility");
1819SYSCTL_INT(_kern_ipc, OID_AUTO, soqlencomp, CTLFLAG_RW,
1820 &soqlencomp, 0, "Listen backlog represents only complete queue");