]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/uipc_socket2.c
xnu-792.18.15.tar.gz
[apple/xnu.git] / bsd / kern / uipc_socket2.c
CommitLineData
1c79356b 1/*
5d5c5d0d
A
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
8f6c56a5 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
8f6c56a5
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
8ad349bb 24 * limitations under the License.
8f6c56a5
A
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
29/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
30/*
31 * Copyright (c) 1982, 1986, 1988, 1990, 1993
32 * The Regents of the University of California. All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 * must display the following acknowledgement:
44 * This product includes software developed by the University of
45 * California, Berkeley and its contributors.
46 * 4. Neither the name of the University nor the names of its contributors
47 * may be used to endorse or promote products derived from this software
48 * without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 *
62 * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
9bccf70c 63 * $FreeBSD: src/sys/kern/uipc_socket2.c,v 1.55.2.9 2001/07/26 18:53:02 peter Exp $
1c79356b
A
64 */
65
66#include <sys/param.h>
67#include <sys/systm.h>
68#include <sys/domain.h>
69#include <sys/kernel.h>
91447636
A
70#include <sys/proc_internal.h>
71#include <sys/kauth.h>
1c79356b
A
72#include <sys/malloc.h>
73#include <sys/mbuf.h>
74#include <sys/protosw.h>
75#include <sys/stat.h>
76#include <sys/socket.h>
77#include <sys/socketvar.h>
78#include <sys/signalvar.h>
79#include <sys/sysctl.h>
80#include <sys/ev.h>
91447636
A
81#include <kern/locks.h>
82#include <net/route.h>
83#include <netinet/in.h>
84#include <netinet/in_pcb.h>
fa4905b1
A
85#include <sys/kdebug.h>
86
87#define DBG_FNC_SBDROP NETDBG_CODE(DBG_NETSOCK, 4)
88#define DBG_FNC_SBAPPEND NETDBG_CODE(DBG_NETSOCK, 5)
89
89b3af67 90static int sbcompress(struct sockbuf *, struct mbuf *, struct mbuf *);
fa4905b1 91
1c79356b
A
92/*
93 * Primitive routines for operating on sockets and socket buffers
94 */
95
96u_long sb_max = SB_MAX; /* XXX should be static */
97
98static u_long sb_efficiency = 8; /* parameter for sbreserve() */
99
1c79356b
A
100/*
101 * Procedures to manipulate state flags of socket
102 * and do appropriate wakeups. Normal sequence from the
103 * active (originating) side is that soisconnecting() is
104 * called during processing of connect() call,
105 * resulting in an eventual call to soisconnected() if/when the
106 * connection is established. When the connection is torn down
9bccf70c 107 * soisdisconnecting() is called during processing of disconnect() call,
1c79356b
A
108 * and soisdisconnected() is called when the connection to the peer
109 * is totally severed. The semantics of these routines are such that
110 * connectionless protocols can call soisconnected() and soisdisconnected()
111 * only, bypassing the in-progress calls when setting up a ``connection''
112 * takes no time.
113 *
114 * From the passive side, a socket is created with
e3027f41
A
115 * two queues of sockets: so_incomp for connections in progress
116 * and so_comp for connections already made and awaiting user acceptance.
9bccf70c 117 * As a protocol is preparing incoming connections, it creates a socket
e3027f41 118 * structure queued on so_incomp by calling sonewconn(). When the connection
1c79356b 119 * is established, soisconnected() is called, and transfers the
e3027f41 120 * socket structure to so_comp, making it available to accept().
1c79356b 121 *
9bccf70c 122 * If a socket is closed with sockets on either
e3027f41 123 * so_incomp or so_comp, these sockets are dropped.
9bccf70c 124 *
1c79356b
A
125 * If higher level protocols are implemented in
126 * the kernel, the wakeups done here will sometimes
127 * cause software-interrupt process scheduling.
128 */
1c79356b
A
129void
130soisconnecting(so)
131 register struct socket *so;
132{
133
134 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
135 so->so_state |= SS_ISCONNECTING;
91447636
A
136
137 sflt_notify(so, sock_evt_connecting, NULL);
1c79356b
A
138}
139
140void
141soisconnected(so)
9bccf70c
A
142 struct socket *so;
143{
144 struct socket *head = so->so_head;
1c79356b
A
145
146 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
147 so->so_state |= SS_ISCONNECTED;
91447636
A
148
149 sflt_notify(so, sock_evt_connected, NULL);
150
1c79356b 151 if (head && (so->so_state & SS_INCOMP)) {
ff6e181a
A
152 so->so_state &= ~SS_INCOMP;
153 so->so_state |= SS_COMP;
154 if (head->so_proto->pr_getlock != NULL) {
155 socket_unlock(so, 0);
91447636 156 socket_lock(head, 1);
ff6e181a 157 }
91447636 158 postevent(head, 0, EV_RCONN);
1c79356b
A
159 TAILQ_REMOVE(&head->so_incomp, so, so_list);
160 head->so_incqlen--;
1c79356b 161 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
1c79356b 162 sorwakeup(head);
91447636 163 wakeup_one((caddr_t)&head->so_timeo);
ff6e181a 164 if (head->so_proto->pr_getlock != NULL) {
91447636 165 socket_unlock(head, 1);
ff6e181a
A
166 socket_lock(so, 0);
167 }
1c79356b 168 } else {
91447636 169 postevent(so, 0, EV_WCONN);
1c79356b
A
170 wakeup((caddr_t)&so->so_timeo);
171 sorwakeup(so);
172 sowwakeup(so);
173 }
174}
175
176void
177soisdisconnecting(so)
178 register struct socket *so;
9bccf70c 179{
1c79356b
A
180 so->so_state &= ~SS_ISCONNECTING;
181 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
91447636 182 sflt_notify(so, sock_evt_disconnecting, NULL);
1c79356b
A
183 wakeup((caddr_t)&so->so_timeo);
184 sowwakeup(so);
185 sorwakeup(so);
186}
187
188void
189soisdisconnected(so)
190 register struct socket *so;
9bccf70c 191{
1c79356b 192 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
9bccf70c 193 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
91447636 194 sflt_notify(so, sock_evt_disconnected, NULL);
1c79356b
A
195 wakeup((caddr_t)&so->so_timeo);
196 sowwakeup(so);
197 sorwakeup(so);
198}
199
200/*
201 * Return a random connection that hasn't been serviced yet and
202 * is eligible for discard. There is a one in qlen chance that
203 * we will return a null, saying that there are no dropable
204 * requests. In this case, the protocol specific code should drop
205 * the new request. This insures fairness.
206 *
207 * This may be used in conjunction with protocol specific queue
208 * congestion routines.
209 */
210struct socket *
211sodropablereq(head)
212 register struct socket *head;
213{
91447636 214 struct socket *so, *sonext = NULL;
1c79356b
A
215 unsigned int i, j, qlen;
216 static int rnd;
217 static struct timeval old_runtime;
218 static unsigned int cur_cnt, old_cnt;
219 struct timeval tv;
220
221 microtime(&tv);
222 if ((i = (tv.tv_sec - old_runtime.tv_sec)) != 0) {
223 old_runtime = tv;
224 old_cnt = cur_cnt / i;
225 cur_cnt = 0;
226 }
227
228 so = TAILQ_FIRST(&head->so_incomp);
229 if (!so)
91447636 230 return (NULL);
1c79356b
A
231
232 qlen = head->so_incqlen;
233 if (++cur_cnt > qlen || old_cnt > qlen) {
234 rnd = (314159 * rnd + 66329) & 0xffff;
235 j = ((qlen + 1) * rnd) >> 16;
91447636
A
236//###LD To clean up
237 while (j-- && so) {
238// if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
239 socket_lock(so, 1);
240 sonext = TAILQ_NEXT(so, so_list);
241// in_pcb_check_state(so->so_pcb, WNT_RELEASE, 0);
242 socket_unlock(so, 1);
243 so = sonext;
244 }
1c79356b
A
245 }
246
91447636
A
247// if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) == WNT_STOPUSING)
248// return (NULL);
249// else
250 return (so);
1c79356b
A
251}
252
253/*
254 * When an attempt at a new connection is noted on a socket
255 * which accepts connections, sonewconn is called. If the
256 * connection is possible (subject to space constraints, etc.)
257 * then we allocate a new structure, propoerly linked into the
258 * data structure of the original socket, and return this.
259 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
260 */
91447636
A
261static struct socket *
262sonewconn_internal(head, connstatus)
1c79356b
A
263 register struct socket *head;
264 int connstatus;
9bccf70c
A
265{
266 int error = 0;
1c79356b 267 register struct socket *so;
91447636
A
268 lck_mtx_t *mutex_held;
269
270 if (head->so_proto->pr_getlock != NULL)
271 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
272 else
273 mutex_held = head->so_proto->pr_domain->dom_mtx;
274 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
1c79356b
A
275
276 if (head->so_qlen > 3 * head->so_qlimit / 2)
277 return ((struct socket *)0);
0b4e3aa0 278 so = soalloc(1, head->so_proto->pr_domain->dom_family, head->so_type);
1c79356b
A
279 if (so == NULL)
280 return ((struct socket *)0);
9bccf70c
A
281 /* check if head was closed during the soalloc */
282 if (head->so_proto == NULL) {
283 sodealloc(so);
284 return ((struct socket *)0);
1c79356b
A
285 }
286
287 so->so_head = head;
288 so->so_type = head->so_type;
289 so->so_options = head->so_options &~ SO_ACCEPTCONN;
290 so->so_linger = head->so_linger;
291 so->so_state = head->so_state | SS_NOFDREF;
292 so->so_proto = head->so_proto;
293 so->so_timeo = head->so_timeo;
294 so->so_pgid = head->so_pgid;
295 so->so_uid = head->so_uid;
91447636 296 so->so_usecount = 1;
89b3af67
A
297 so->next_lock_lr = 0;
298 so->next_unlock_lr = 0;
1c79356b 299
13fec989
A
300#ifdef __APPLE__
301 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
302 so->so_rcv.sb_so = so->so_snd.sb_so = so;
303 TAILQ_INIT(&so->so_evlist);
304#endif
305
91447636
A
306 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
307 sflt_termsock(so);
9bccf70c
A
308 sodealloc(so);
309 return ((struct socket *)0);
310 }
311
91447636 312 /*
37839358 313 * Must be done with head unlocked to avoid deadlock for protocol with per socket mutexes.
91447636 314 */
37839358
A
315 if (head->so_proto->pr_unlock)
316 socket_unlock(head, 0);
91447636
A
317 if (((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL) != 0) || error) {
318 sflt_termsock(so);
1c79356b 319 sodealloc(so);
37839358
A
320 if (head->so_proto->pr_unlock)
321 socket_lock(head, 0);
1c79356b
A
322 return ((struct socket *)0);
323 }
37839358
A
324 if (head->so_proto->pr_unlock)
325 socket_lock(head, 0);
9bccf70c 326#ifdef __APPLE__
1c79356b 327 so->so_proto->pr_domain->dom_refs++;
9bccf70c 328#endif
1c79356b
A
329
330 if (connstatus) {
331 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
332 so->so_state |= SS_COMP;
333 } else {
334 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
335 so->so_state |= SS_INCOMP;
336 head->so_incqlen++;
337 }
338 head->so_qlen++;
91447636 339
13fec989 340#ifdef __APPLE__
89b3af67
A
341 /* Attach socket filters for this protocol */
342 sflt_initsock(so);
9bccf70c 343#endif
91447636
A
344 if (connstatus) {
345 so->so_state |= connstatus;
346 sorwakeup(head);
347 wakeup((caddr_t)&head->so_timeo);
348 }
1c79356b
A
349 return (so);
350}
351
91447636
A
352
353struct socket *
354sonewconn(
355 struct socket *head,
356 int connstatus,
357 const struct sockaddr *from)
358{
359 int error = 0;
360 struct socket_filter_entry *filter;
361 int filtered = 0;
362
363 error = 0;
364 for (filter = head->so_filt; filter && (error == 0);
365 filter = filter->sfe_next_onsocket) {
366 if (filter->sfe_filter->sf_filter.sf_connect_in) {
367 if (filtered == 0) {
368 filtered = 1;
369 sflt_use(head);
370 socket_unlock(head, 0);
371 }
372 error = filter->sfe_filter->sf_filter.sf_connect_in(
373 filter->sfe_cookie, head, from);
374 }
375 }
376 if (filtered != 0) {
377 socket_lock(head, 0);
378 sflt_unuse(head);
379 }
380
381 if (error) {
382 return NULL;
383 }
384
385 return sonewconn_internal(head, connstatus);
386}
387
1c79356b
A
388/*
389 * Socantsendmore indicates that no more data will be sent on the
390 * socket; it would normally be applied to a socket when the user
391 * informs the system that no more data is to be sent, by the protocol
392 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
393 * will be received, and will normally be applied to the socket by a
394 * protocol when it detects that the peer will send no more data.
395 * Data queued for reading in the socket may yet be read.
396 */
397
398void
399socantsendmore(so)
400 struct socket *so;
9bccf70c 401{
1c79356b 402 so->so_state |= SS_CANTSENDMORE;
91447636 403 sflt_notify(so, sock_evt_cantsendmore, NULL);
1c79356b
A
404 sowwakeup(so);
405}
406
407void
408socantrcvmore(so)
409 struct socket *so;
9bccf70c 410{
1c79356b 411 so->so_state |= SS_CANTRCVMORE;
91447636 412 sflt_notify(so, sock_evt_cantrecvmore, NULL);
1c79356b
A
413 sorwakeup(so);
414}
415
416/*
417 * Wait for data to arrive at/drain from a socket buffer.
418 */
419int
420sbwait(sb)
421 struct sockbuf *sb;
422{
89b3af67 423 int error = 0, lr_saved;
91447636
A
424 struct socket *so = sb->sb_so;
425 lck_mtx_t *mutex_held;
426 struct timespec ts;
427
89b3af67 428 lr_saved = (unsigned int) __builtin_return_address(0);
91447636 429
91447636
A
430 if (so->so_proto->pr_getlock != NULL)
431 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
432 else
433 mutex_held = so->so_proto->pr_domain->dom_mtx;
1c79356b
A
434
435 sb->sb_flags |= SB_WAIT;
91447636
A
436
437 if (so->so_usecount < 1)
438 panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount);
439 ts.tv_sec = sb->sb_timeo.tv_sec;
440 ts.tv_nsec = sb->sb_timeo.tv_usec * 1000;
441 error = msleep((caddr_t)&sb->sb_cc, mutex_held,
442 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
443 &ts);
444
445 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
446
447 if (so->so_usecount < 1)
448 panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount);
449
450 if ((so->so_state & SS_DRAINING)) {
451 error = EBADF;
452 }
453
454 return (error);
1c79356b
A
455}
456
457/*
458 * Lock a sockbuf already known to be locked;
459 * return any error returned from sleep (EINTR).
460 */
461int
462sb_lock(sb)
463 register struct sockbuf *sb;
464{
91447636
A
465 struct socket *so = sb->sb_so;
466 lck_mtx_t * mutex_held;
89b3af67 467 int error = 0;
91447636
A
468
469 if (so == NULL)
470 panic("sb_lock: null so back pointer sb=%x\n", sb);
1c79356b
A
471
472 while (sb->sb_flags & SB_LOCK) {
473 sb->sb_flags |= SB_WANT;
91447636
A
474 if (so->so_proto->pr_getlock != NULL)
475 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
476 else
477 mutex_held = so->so_proto->pr_domain->dom_mtx;
478 if (so->so_usecount < 1)
479 panic("sb_lock: so=%x refcount=%d\n", so, so->so_usecount);
89b3af67 480
91447636
A
481 error = msleep((caddr_t)&sb->sb_flags, mutex_held,
482 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sblock", 0);
483 if (so->so_usecount < 1)
484 panic("sb_lock: 2 so=%x refcount=%d\n", so, so->so_usecount);
485 if (error)
1c79356b
A
486 return (error);
487 }
488 sb->sb_flags |= SB_LOCK;
489 return (0);
490}
491
492/*
493 * Wakeup processes waiting on a socket buffer.
494 * Do asynchronous notification via SIGIO
495 * if the socket has the SS_ASYNC flag set.
496 */
497void
498sowakeup(so, sb)
499 register struct socket *so;
500 register struct sockbuf *sb;
501{
502 struct proc *p = current_proc();
0b4e3aa0 503 sb->sb_flags &= ~SB_SEL;
1c79356b 504 selwakeup(&sb->sb_sel);
1c79356b
A
505 if (sb->sb_flags & SB_WAIT) {
506 sb->sb_flags &= ~SB_WAIT;
507 wakeup((caddr_t)&sb->sb_cc);
508 }
509 if (so->so_state & SS_ASYNC) {
510 if (so->so_pgid < 0)
511 gsignal(-so->so_pgid, SIGIO);
512 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
513 psignal(p, SIGIO);
514 }
91447636
A
515 if (sb->sb_flags & SB_KNOTE) {
516 KNOTE(&sb->sb_sel.si_note, SO_FILT_HINT_LOCKED);
517 }
518 if (sb->sb_flags & SB_UPCALL) {
519 socket_unlock(so, 0);
1c79356b 520 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
91447636
A
521 socket_lock(so, 0);
522 }
1c79356b
A
523}
524
525/*
526 * Socket buffer (struct sockbuf) utility routines.
527 *
528 * Each socket contains two socket buffers: one for sending data and
529 * one for receiving data. Each buffer contains a queue of mbufs,
530 * information about the number of mbufs and amount of data in the
531 * queue, and other fields allowing select() statements and notification
532 * on data availability to be implemented.
533 *
534 * Data stored in a socket buffer is maintained as a list of records.
535 * Each record is a list of mbufs chained together with the m_next
536 * field. Records are chained together with the m_nextpkt field. The upper
537 * level routine soreceive() expects the following conventions to be
538 * observed when placing information in the receive buffer:
539 *
540 * 1. If the protocol requires each message be preceded by the sender's
541 * name, then a record containing that name must be present before
542 * any associated data (mbuf's must be of type MT_SONAME).
543 * 2. If the protocol supports the exchange of ``access rights'' (really
544 * just additional data associated with the message), and there are
545 * ``rights'' to be received, then a record containing this data
546 * should be present (mbuf's must be of type MT_RIGHTS).
547 * 3. If a name or rights record exists, then it must be followed by
548 * a data record, perhaps of zero length.
549 *
550 * Before using a new socket structure it is first necessary to reserve
551 * buffer space to the socket, by calling sbreserve(). This should commit
552 * some of the available buffer space in the system buffer pool for the
553 * socket (currently, it does nothing but enforce limits). The space
554 * should be released by calling sbrelease() when the socket is destroyed.
555 */
556
557int
558soreserve(so, sndcc, rcvcc)
559 register struct socket *so;
560 u_long sndcc, rcvcc;
561{
1c79356b
A
562
563 if (sbreserve(&so->so_snd, sndcc) == 0)
564 goto bad;
565 if (sbreserve(&so->so_rcv, rcvcc) == 0)
566 goto bad2;
567 if (so->so_rcv.sb_lowat == 0)
568 so->so_rcv.sb_lowat = 1;
569 if (so->so_snd.sb_lowat == 0)
570 so->so_snd.sb_lowat = MCLBYTES;
571 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
572 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
573 return (0);
574bad2:
9bccf70c 575#ifdef __APPLE__
0b4e3aa0 576 selthreadclear(&so->so_snd.sb_sel);
9bccf70c 577#endif
1c79356b
A
578 sbrelease(&so->so_snd);
579bad:
580 return (ENOBUFS);
581}
582
583/*
584 * Allot mbufs to a sockbuf.
585 * Attempt to scale mbmax so that mbcnt doesn't become limiting
586 * if buffering efficiency is near the normal case.
587 */
588int
589sbreserve(sb, cc)
590 struct sockbuf *sb;
591 u_long cc;
592{
593 if ((u_quad_t)cc > (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES))
594 return (0);
595 sb->sb_hiwat = cc;
596 sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
597 if (sb->sb_lowat > sb->sb_hiwat)
598 sb->sb_lowat = sb->sb_hiwat;
599 return (1);
600}
601
602/*
603 * Free mbufs held by a socket, and reserved mbuf space.
604 */
0b4e3aa0 605 /* WARNING needs to do selthreadclear() before calling this */
1c79356b
A
606void
607sbrelease(sb)
608 struct sockbuf *sb;
609{
610
611 sbflush(sb);
9bccf70c
A
612 sb->sb_hiwat = 0;
613 sb->sb_mbmax = 0;
614
1c79356b
A
615}
616
617/*
618 * Routines to add and remove
619 * data from an mbuf queue.
620 *
621 * The routines sbappend() or sbappendrecord() are normally called to
622 * append new mbufs to a socket buffer, after checking that adequate
623 * space is available, comparing the function sbspace() with the amount
624 * of data to be added. sbappendrecord() differs from sbappend() in
625 * that data supplied is treated as the beginning of a new record.
626 * To place a sender's address, optional access rights, and data in a
627 * socket receive buffer, sbappendaddr() should be used. To place
628 * access rights and data in a socket receive buffer, sbappendrights()
629 * should be used. In either case, the new data begins a new record.
630 * Note that unlike sbappend() and sbappendrecord(), these routines check
631 * for the caller that there will be enough space to store the data.
632 * Each fails if there is not enough space, or if it cannot find mbufs
633 * to store additional information in.
634 *
635 * Reliable protocols may use the socket send buffer to hold data
636 * awaiting acknowledgement. Data is normally copied from a socket
637 * send buffer in a protocol with m_copy for output to a peer,
638 * and then removing the data from the socket buffer with sbdrop()
639 * or sbdroprecord() when the data is acknowledged by the peer.
640 */
641
642/*
643 * Append mbuf chain m to the last record in the
644 * socket buffer sb. The additional space associated
645 * the mbuf chain is recorded in sb. Empty mbufs are
646 * discarded and mbufs are compacted where possible.
647 */
91447636 648int
1c79356b
A
649sbappend(sb, m)
650 struct sockbuf *sb;
651 struct mbuf *m;
9bccf70c 652{
91447636
A
653 register struct mbuf *n, *sb_first;
654 int result = 0;
655 int error = 0;
cc9f6e38 656 int filtered = 0;
1c79356b 657
fa4905b1
A
658
659 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_START), sb, m->m_len, 0, 0, 0);
660
1c79356b 661 if (m == 0)
91447636 662 return 0;
cc9f6e38
A
663
664again:
91447636 665 sb_first = n = sb->sb_mb;
9bccf70c 666 if (n) {
1c79356b
A
667 while (n->m_nextpkt)
668 n = n->m_nextpkt;
669 do {
670 if (n->m_flags & M_EOR) {
91447636 671 result = sbappendrecord(sb, m); /* XXXXXX!!!! */
55e303ae 672 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
91447636 673 return result;
1c79356b
A
674 }
675 } while (n->m_next && (n = n->m_next));
676 }
91447636 677
cc9f6e38
A
678 if (!filtered && (sb->sb_flags & SB_RECV) != 0) {
679 error = sflt_data_in(sb->sb_so, NULL, &m, NULL, 0, &filtered);
91447636
A
680 if (error) {
681 /* no data was appended, caller should not call sowakeup */
682 return 0;
683 }
cc9f6e38
A
684
685 /*
686 If we any filters, the socket lock was dropped. n and sb_first
687 cached data from the socket buffer. This cache is not valid
688 since we dropped the lock. We must start over. Since filtered
689 is set we won't run through the filters a second time. We just
690 set n and sb_start again.
691 */
692 if (filtered)
693 goto again;
91447636
A
694 }
695
696 result = sbcompress(sb, m, n);
fa4905b1
A
697
698 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
91447636
A
699
700 return result;
1c79356b
A
701}
702
703#ifdef SOCKBUF_DEBUG
704void
705sbcheck(sb)
706 register struct sockbuf *sb;
707{
708 register struct mbuf *m;
709 register struct mbuf *n = 0;
710 register u_long len = 0, mbcnt = 0;
91447636
A
711 lck_mtx_t *mutex_held;
712
713 if (sb->sb_so->so_proto->pr_getlock != NULL)
714 mutex_held = (*sb->sb_so->so_proto->pr_getlock)(sb->sb_so, 0);
715 else
716 mutex_held = sb->sb_so->so_proto->pr_domain->dom_mtx;
717
718 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
719
720 if (sbchecking == 0)
721 return;
1c79356b
A
722
723 for (m = sb->sb_mb; m; m = n) {
724 n = m->m_nextpkt;
725 for (; m; m = m->m_next) {
9bccf70c
A
726 len += m->m_len;
727 mbcnt += MSIZE;
728 if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
729 mbcnt += m->m_ext.ext_size;
730 }
1c79356b 731 }
9bccf70c 732 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
91447636 733 panic("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
9bccf70c 734 mbcnt, sb->sb_mbcnt);
9bccf70c 735 }
1c79356b
A
736}
737#endif
738
739/*
740 * As above, except the mbuf chain
741 * begins a new record.
742 */
91447636 743int
1c79356b
A
744sbappendrecord(sb, m0)
745 register struct sockbuf *sb;
89b3af67 746 struct mbuf *m0;
1c79356b
A
747{
748 register struct mbuf *m;
91447636 749 int result = 0;
9bccf70c 750
1c79356b 751 if (m0 == 0)
91447636
A
752 return 0;
753
754 if ((sb->sb_flags & SB_RECV) != 0) {
cc9f6e38 755 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL, sock_data_filt_flag_record, NULL);
91447636
A
756 if (error != 0) {
757 if (error != EJUSTRETURN)
758 m_freem(m0);
759 return 0;
1c79356b 760 }
1c79356b
A
761 }
762
763 m = sb->sb_mb;
764 if (m)
765 while (m->m_nextpkt)
766 m = m->m_nextpkt;
767 /*
768 * Put the first mbuf on the queue.
769 * Note this permits zero length records.
770 */
771 sballoc(sb, m0);
772 if (m)
773 m->m_nextpkt = m0;
774 else
775 sb->sb_mb = m0;
776 m = m0->m_next;
777 m0->m_next = 0;
778 if (m && (m0->m_flags & M_EOR)) {
779 m0->m_flags &= ~M_EOR;
780 m->m_flags |= M_EOR;
781 }
91447636 782 return sbcompress(sb, m, m0);
1c79356b
A
783}
784
785/*
786 * As above except that OOB data
787 * is inserted at the beginning of the sockbuf,
788 * but after any other OOB data.
789 */
91447636 790int
1c79356b 791sbinsertoob(sb, m0)
91447636
A
792 struct sockbuf *sb;
793 struct mbuf *m0;
1c79356b 794{
91447636
A
795 struct mbuf *m;
796 struct mbuf **mp;
1c79356b
A
797
798 if (m0 == 0)
91447636
A
799 return 0;
800
801 if ((sb->sb_flags & SB_RECV) != 0) {
802 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL,
cc9f6e38 803 sock_data_filt_flag_oob, NULL);
91447636
A
804
805 if (error) {
806 if (error != EJUSTRETURN) {
807 m_freem(m0);
808 }
809 return 0;
1c79356b 810 }
1c79356b
A
811 }
812
813 for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) {
814 m = *mp;
815 again:
816 switch (m->m_type) {
817
818 case MT_OOBDATA:
819 continue; /* WANT next train */
820
821 case MT_CONTROL:
822 m = m->m_next;
823 if (m)
824 goto again; /* inspect THIS train further */
825 }
826 break;
827 }
828 /*
829 * Put the first mbuf on the queue.
830 * Note this permits zero length records.
831 */
832 sballoc(sb, m0);
833 m0->m_nextpkt = *mp;
834 *mp = m0;
835 m = m0->m_next;
836 m0->m_next = 0;
837 if (m && (m0->m_flags & M_EOR)) {
838 m0->m_flags &= ~M_EOR;
839 m->m_flags |= M_EOR;
840 }
91447636 841 return sbcompress(sb, m, m0);
1c79356b
A
842}
843
844/*
845 * Append address and data, and optionally, control (ancillary) data
846 * to the receive queue of a socket. If present,
847 * m0 must include a packet header with total length.
848 * Returns 0 if no space in sockbuf or insufficient mbufs.
849 */
91447636
A
850static int
851sbappendaddr_internal(sb, asa, m0, control)
1c79356b
A
852 register struct sockbuf *sb;
853 struct sockaddr *asa;
854 struct mbuf *m0, *control;
855{
856 register struct mbuf *m, *n;
857 int space = asa->sa_len;
1c79356b
A
858
859 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
860 panic("sbappendaddr");
861
1c79356b
A
862 if (m0)
863 space += m0->m_pkthdr.len;
864 for (n = control; n; n = n->m_next) {
865 space += n->m_len;
866 if (n->m_next == 0) /* keep pointer to last control buf */
867 break;
868 }
869 if (space > sbspace(sb))
870 return (0);
871 if (asa->sa_len > MLEN)
872 return (0);
873 MGET(m, M_DONTWAIT, MT_SONAME);
874 if (m == 0)
875 return (0);
876 m->m_len = asa->sa_len;
877 bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
878 if (n)
879 n->m_next = m0; /* concatenate data to control */
880 else
881 control = m0;
882 m->m_next = control;
883 for (n = m; n; n = n->m_next)
884 sballoc(sb, n);
885 n = sb->sb_mb;
886 if (n) {
887 while (n->m_nextpkt)
888 n = n->m_nextpkt;
889 n->m_nextpkt = m;
890 } else
891 sb->sb_mb = m;
892 postevent(0,sb,EV_RWBYTES);
893 return (1);
894}
895
896int
91447636
A
897sbappendaddr(
898 struct sockbuf* sb,
899 struct sockaddr* asa,
900 struct mbuf *m0,
901 struct mbuf *control,
902 int *error_out)
903{
904 int result = 0;
905
906 if (error_out) *error_out = 0;
907
908 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
909 panic("sbappendaddrorfree");
910
911 /* Call socket data in filters */
912 if ((sb->sb_flags & SB_RECV) != 0) {
913 int error;
cc9f6e38 914 error = sflt_data_in(sb->sb_so, asa, &m0, &control, 0, NULL);
91447636
A
915 if (error) {
916 if (error != EJUSTRETURN) {
917 if (m0) m_freem(m0);
918 if (control) m_freem(control);
919 if (error_out) *error_out = error;
920 }
921 return 0;
922 }
923 }
924
925 result = sbappendaddr_internal(sb, asa, m0, control);
926 if (result == 0) {
927 if (m0) m_freem(m0);
928 if (control) m_freem(control);
929 if (error_out) *error_out = ENOBUFS;
930 }
931
932 return result;
933}
934
935static int
936sbappendcontrol_internal(sb, m0, control)
1c79356b
A
937 struct sockbuf *sb;
938 struct mbuf *control, *m0;
939{
940 register struct mbuf *m, *n;
941 int space = 0;
1c79356b
A
942
943 if (control == 0)
944 panic("sbappendcontrol");
945
1c79356b
A
946 for (m = control; ; m = m->m_next) {
947 space += m->m_len;
948 if (m->m_next == 0)
949 break;
950 }
951 n = m; /* save pointer to last control buffer */
952 for (m = m0; m; m = m->m_next)
953 space += m->m_len;
954 if (space > sbspace(sb))
955 return (0);
956 n->m_next = m0; /* concatenate data to control */
957 for (m = control; m; m = m->m_next)
958 sballoc(sb, m);
959 n = sb->sb_mb;
960 if (n) {
961 while (n->m_nextpkt)
962 n = n->m_nextpkt;
963 n->m_nextpkt = control;
964 } else
965 sb->sb_mb = control;
966 postevent(0,sb,EV_RWBYTES);
967 return (1);
968}
969
91447636
A
970int
971sbappendcontrol(
972 struct sockbuf *sb,
973 struct mbuf *m0,
974 struct mbuf *control,
975 int *error_out)
976{
977 int result = 0;
978
979 if (error_out) *error_out = 0;
980
981 if (sb->sb_flags & SB_RECV) {
982 int error;
cc9f6e38 983 error = sflt_data_in(sb->sb_so, NULL, &m0, &control, 0, NULL);
91447636
A
984 if (error) {
985 if (error != EJUSTRETURN) {
986 if (m0) m_freem(m0);
987 if (control) m_freem(control);
988 if (error_out) *error_out = error;
989 }
990 return 0;
991 }
992 }
993
994 result = sbappendcontrol_internal(sb, m0, control);
995 if (result == 0) {
996 if (m0) m_freem(m0);
997 if (control) m_freem(control);
998 if (error_out) *error_out = ENOBUFS;
999 }
1000
1001 return result;
1002}
1003
1c79356b
A
1004/*
1005 * Compress mbuf chain m into the socket
1006 * buffer sb following mbuf n. If n
1007 * is null, the buffer is presumed empty.
1008 */
91447636 1009static int
1c79356b
A
1010sbcompress(sb, m, n)
1011 register struct sockbuf *sb;
1012 register struct mbuf *m, *n;
1013{
1014 register int eor = 0;
1015 register struct mbuf *o;
1016
1017 while (m) {
1018 eor |= m->m_flags & M_EOR;
1019 if (m->m_len == 0 &&
1020 (eor == 0 ||
1021 (((o = m->m_next) || (o = n)) &&
1022 o->m_type == m->m_type))) {
1023 m = m_free(m);
1024 continue;
1025 }
9bccf70c
A
1026 if (n && (n->m_flags & M_EOR) == 0 &&
1027#ifndef __APPLE__
1028 M_WRITABLE(n) &&
1029#endif
1030 m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
1031 m->m_len <= M_TRAILINGSPACE(n) &&
1c79356b
A
1032 n->m_type == m->m_type) {
1033 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
1034 (unsigned)m->m_len);
1035 n->m_len += m->m_len;
1036 sb->sb_cc += m->m_len;
1037 m = m_free(m);
1038 continue;
1039 }
1040 if (n)
1041 n->m_next = m;
1042 else
1043 sb->sb_mb = m;
1044 sballoc(sb, m);
1045 n = m;
1046 m->m_flags &= ~M_EOR;
1047 m = m->m_next;
1048 n->m_next = 0;
1049 }
1050 if (eor) {
1051 if (n)
1052 n->m_flags |= eor;
1053 else
1054 printf("semi-panic: sbcompress\n");
1055 }
1056 postevent(0,sb, EV_RWBYTES);
91447636 1057 return 1;
1c79356b
A
1058}
1059
1060/*
1061 * Free all mbufs in a sockbuf.
1062 * Check that all resources are reclaimed.
1063 */
1064void
1065sbflush(sb)
1066 register struct sockbuf *sb;
1067{
91447636
A
1068 if (sb->sb_so == NULL)
1069 panic ("sbflush sb->sb_so already null sb=%x\n", sb);
55e303ae 1070 (void)sblock(sb, M_WAIT);
9bccf70c
A
1071 while (sb->sb_mbcnt) {
1072 /*
1073 * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
1074 * we would loop forever. Panic instead.
1075 */
1076 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
1077 break;
1c79356b 1078 sbdrop(sb, (int)sb->sb_cc);
9bccf70c 1079 }
91447636
A
1080 if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt || sb->sb_so == NULL)
1081 panic("sbflush: cc %ld || mb %p || mbcnt %ld sb_so=%x", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt, sb->sb_so);
55e303ae 1082
1c79356b 1083 postevent(0, sb, EV_RWBYTES);
91447636
A
1084 sbunlock(sb, 1); /* keep socket locked */
1085
1c79356b
A
1086}
1087
1088/*
1089 * Drop data from (the front of) a sockbuf.
9bccf70c
A
1090 * use m_freem_list to free the mbuf structures
1091 * under a single lock... this is done by pruning
1092 * the top of the tree from the body by keeping track
1093 * of where we get to in the tree and then zeroing the
1094 * two pertinent pointers m_nextpkt and m_next
1095 * the socket buffer is then updated to point at the new
1096 * top of the tree and the pruned area is released via
1097 * m_freem_list.
1c79356b
A
1098 */
1099void
1100sbdrop(sb, len)
1101 register struct sockbuf *sb;
1102 register int len;
1103{
fa4905b1
A
1104 register struct mbuf *m, *free_list, *ml;
1105 struct mbuf *next, *last;
1c79356b 1106
fa4905b1
A
1107 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_START), sb, len, 0, 0, 0);
1108
1c79356b 1109 next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
fa4905b1
A
1110 free_list = last = m;
1111 ml = (struct mbuf *)0;
1112
1c79356b
A
1113 while (len > 0) {
1114 if (m == 0) {
9bccf70c
A
1115 if (next == 0) {
1116 /* temporarily replacing this panic with printf because
1117 * it occurs occasionally when closing a socket when there
1118 * is no harm in ignoring it. This problem will be investigated
1119 * further.
1120 */
1121 /* panic("sbdrop"); */
1122 printf("sbdrop - count not zero\n");
1123 len = 0;
1124 /* zero the counts. if we have no mbufs, we have no data (PR-2986815) */
1125 sb->sb_cc = 0;
1126 sb->sb_mbcnt = 0;
1127 break;
1128 }
1129 m = last = next;
1130 next = m->m_nextpkt;
1131 continue;
1c79356b
A
1132 }
1133 if (m->m_len > len) {
1134 m->m_len -= len;
1135 m->m_data += len;
1136 sb->sb_cc -= len;
1137 break;
1138 }
1139 len -= m->m_len;
1140 sbfree(sb, m);
fa4905b1
A
1141
1142 ml = m;
1143 m = m->m_next;
1c79356b
A
1144 }
1145 while (m && m->m_len == 0) {
1146 sbfree(sb, m);
fa4905b1
A
1147
1148 ml = m;
1149 m = m->m_next;
1150 }
1151 if (ml) {
1152 ml->m_next = (struct mbuf *)0;
1153 last->m_nextpkt = (struct mbuf *)0;
1154 m_freem_list(free_list);
1c79356b
A
1155 }
1156 if (m) {
1157 sb->sb_mb = m;
1158 m->m_nextpkt = next;
1159 } else
1160 sb->sb_mb = next;
fa4905b1 1161
1c79356b 1162 postevent(0, sb, EV_RWBYTES);
fa4905b1
A
1163
1164 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_END), sb, 0, 0, 0, 0);
1c79356b
A
1165}
1166
1167/*
1168 * Drop a record off the front of a sockbuf
1169 * and move the next record to the front.
1170 */
1171void
1172sbdroprecord(sb)
1173 register struct sockbuf *sb;
1174{
1175 register struct mbuf *m, *mn;
1c79356b
A
1176
1177 m = sb->sb_mb;
1178 if (m) {
1179 sb->sb_mb = m->m_nextpkt;
1180 do {
1181 sbfree(sb, m);
1182 MFREE(m, mn);
9bccf70c
A
1183 m = mn;
1184 } while (m);
1c79356b
A
1185 }
1186 postevent(0, sb, EV_RWBYTES);
1187}
1188
1189/*
1190 * Create a "control" mbuf containing the specified data
1191 * with the specified type for presentation on a socket buffer.
1192 */
1193struct mbuf *
1194sbcreatecontrol(p, size, type, level)
1195 caddr_t p;
1196 register int size;
1197 int type, level;
1198{
1199 register struct cmsghdr *cp;
1200 struct mbuf *m;
1201
9bccf70c
A
1202 if (CMSG_SPACE((u_int)size) > MLEN)
1203 return ((struct mbuf *) NULL);
1c79356b
A
1204 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
1205 return ((struct mbuf *) NULL);
1206 cp = mtod(m, struct cmsghdr *);
1207 /* XXX check size? */
1208 (void)memcpy(CMSG_DATA(cp), p, size);
9bccf70c
A
1209 m->m_len = CMSG_SPACE(size);
1210 cp->cmsg_len = CMSG_LEN(size);
1c79356b
A
1211 cp->cmsg_level = level;
1212 cp->cmsg_type = type;
1213 return (m);
1214}
1215
1216/*
1217 * Some routines that return EOPNOTSUPP for entry points that are not
1218 * supported by a protocol. Fill in as needed.
1219 */
1220int
1221pru_abort_notsupp(struct socket *so)
1222{
1223 return EOPNOTSUPP;
1224}
1225
1226
1227int
1228pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
1229{
1230 return EOPNOTSUPP;
1231}
1232
1233int
1234pru_attach_notsupp(struct socket *so, int proto, struct proc *p)
1235{
1236 return EOPNOTSUPP;
1237}
1238
1239int
1240pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1241{
1242 return EOPNOTSUPP;
1243}
1244
1245int
1246pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1247{
1248 return EOPNOTSUPP;
1249}
1250
1251int
1252pru_connect2_notsupp(struct socket *so1, struct socket *so2)
1253{
1254 return EOPNOTSUPP;
1255}
1256
1257int
1258pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
1259 struct ifnet *ifp, struct proc *p)
1260{
1261 return EOPNOTSUPP;
1262}
1263
1264int
1265pru_detach_notsupp(struct socket *so)
1266{
1267 return EOPNOTSUPP;
1268}
1269
1270int
1271pru_disconnect_notsupp(struct socket *so)
1272{
1273 return EOPNOTSUPP;
1274}
1275
1276int
1277pru_listen_notsupp(struct socket *so, struct proc *p)
1278{
1279 return EOPNOTSUPP;
1280}
1281
1282int
1283pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
1284{
1285 return EOPNOTSUPP;
1286}
1287
1288int
1289pru_rcvd_notsupp(struct socket *so, int flags)
1290{
1291 return EOPNOTSUPP;
1292}
1293
1294int
1295pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
1296{
1297 return EOPNOTSUPP;
1298}
1299
1300int
1301pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
1302 struct sockaddr *addr, struct mbuf *control,
1303 struct proc *p)
1304
1305{
1306 return EOPNOTSUPP;
1307}
1308
1309
1310/*
1311 * This isn't really a ``null'' operation, but it's the default one
1312 * and doesn't do anything destructive.
1313 */
1314int
1315pru_sense_null(struct socket *so, struct stat *sb)
1316{
1317 sb->st_blksize = so->so_snd.sb_hiwat;
1318 return 0;
1319}
1320
1321
1322int pru_sosend_notsupp(struct socket *so, struct sockaddr *addr,
1323 struct uio *uio, struct mbuf *top,
1324 struct mbuf *control, int flags)
1325
1326{
1327 return EOPNOTSUPP;
1328}
1329
1330int pru_soreceive_notsupp(struct socket *so,
1331 struct sockaddr **paddr,
1332 struct uio *uio, struct mbuf **mp0,
1333 struct mbuf **controlp, int *flagsp)
1334{
1335 return EOPNOTSUPP;
1336}
1337
1338int
1339
1340pru_shutdown_notsupp(struct socket *so)
1341{
1342 return EOPNOTSUPP;
1343}
1344
1345int
1346pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
1347{
1348 return EOPNOTSUPP;
1349}
1350
1351int pru_sosend(struct socket *so, struct sockaddr *addr,
1352 struct uio *uio, struct mbuf *top,
1353 struct mbuf *control, int flags)
1354{
1355 return EOPNOTSUPP;
1356}
1357
1358int pru_soreceive(struct socket *so,
1359 struct sockaddr **paddr,
1360 struct uio *uio, struct mbuf **mp0,
1361 struct mbuf **controlp, int *flagsp)
1362{
1363 return EOPNOTSUPP;
1364}
1365
1366
91447636
A
1367int
1368pru_sopoll_notsupp(__unused struct socket *so, __unused int events,
1369 __unused kauth_cred_t cred, __unused void *wql)
1c79356b
A
1370{
1371 return EOPNOTSUPP;
1372}
1373
1374
9bccf70c
A
1375#ifdef __APPLE__
1376/*
1377 * The following are macros on BSD and functions on Darwin
1378 */
1c79356b 1379
0b4e3aa0
A
1380/*
1381 * Do we need to notify the other side when I/O is possible?
1382 */
1383
1384int
1385sb_notify(struct sockbuf *sb)
1386{
55e303ae 1387 return ((sb->sb_flags & (SB_WAIT|SB_SEL|SB_ASYNC|SB_UPCALL|SB_KNOTE)) != 0);
0b4e3aa0
A
1388}
1389
1390/*
1391 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
1392 * This is problematical if the fields are unsigned, as the space might
1393 * still be negative (cc > hiwat or mbcnt > mbmax). Should detect
1394 * overflow and return 0. Should use "lmin" but it doesn't exist now.
1395 */
1396long
1397sbspace(struct sockbuf *sb)
1398{
1399 return ((long) imin((int)(sb->sb_hiwat - sb->sb_cc),
1400 (int)(sb->sb_mbmax - sb->sb_mbcnt)));
1401}
1402
1403/* do we have to send all at once on a socket? */
1404int
1405sosendallatonce(struct socket *so)
1406{
1407 return (so->so_proto->pr_flags & PR_ATOMIC);
1408}
1409
1410/* can we read something from so? */
1411int
1412soreadable(struct socket *so)
1413{
1414 return (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
1415 (so->so_state & SS_CANTRCVMORE) ||
1416 so->so_comp.tqh_first || so->so_error);
1417}
1418
1419/* can we write something to so? */
1420
1421int
1422sowriteable(struct socket *so)
1423{
1424 return ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat &&
1425 ((so->so_state&SS_ISCONNECTED) ||
1426 (so->so_proto->pr_flags&PR_CONNREQUIRED)==0)) ||
1427 (so->so_state & SS_CANTSENDMORE) ||
1428 so->so_error);
1429}
1430
1431/* adjust counters in sb reflecting allocation of m */
1432
1433void
1434sballoc(struct sockbuf *sb, struct mbuf *m)
1435{
1436 sb->sb_cc += m->m_len;
1437 sb->sb_mbcnt += MSIZE;
1438 if (m->m_flags & M_EXT)
1439 sb->sb_mbcnt += m->m_ext.ext_size;
1440}
1441
1442/* adjust counters in sb reflecting freeing of m */
1443void
1444sbfree(struct sockbuf *sb, struct mbuf *m)
1445{
1446 sb->sb_cc -= m->m_len;
1447 sb->sb_mbcnt -= MSIZE;
1448 if (m->m_flags & M_EXT)
1449 sb->sb_mbcnt -= m->m_ext.ext_size;
1450}
1451
1452/*
1453 * Set lock on sockbuf sb; sleep if lock is already held.
1454 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
1455 * Returns error without lock if sleep is interrupted.
1456 */
1457int
1458sblock(struct sockbuf *sb, int wf)
1459{
1460 return(sb->sb_flags & SB_LOCK ?
1461 ((wf == M_WAIT) ? sb_lock(sb) : EWOULDBLOCK) :
1462 (sb->sb_flags |= SB_LOCK), 0);
1463}
1464
1465/* release lock on sockbuf sb */
1466void
91447636 1467sbunlock(struct sockbuf *sb, int keeplocked)
0b4e3aa0 1468{
91447636 1469 struct socket *so = sb->sb_so;
89b3af67 1470 int lr_saved;
91447636
A
1471 lck_mtx_t *mutex_held;
1472
89b3af67
A
1473
1474 lr_saved = (unsigned int) __builtin_return_address(0);
1475
0b4e3aa0 1476 sb->sb_flags &= ~SB_LOCK;
91447636
A
1477
1478 if (so->so_proto->pr_getlock != NULL)
1479 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
1480 else
1481 mutex_held = so->so_proto->pr_domain->dom_mtx;
1482
1483 if (keeplocked == 0)
1484 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
1485
0b4e3aa0
A
1486 if (sb->sb_flags & SB_WANT) {
1487 sb->sb_flags &= ~SB_WANT;
91447636
A
1488 if (so->so_usecount < 0)
1489 panic("sbunlock: b4 wakeup so=%x ref=%d lr=%x sb_flags=%x\n", sb->sb_so, so->so_usecount, lr_saved, sb->sb_flags);
1490
0b4e3aa0
A
1491 wakeup((caddr_t)&(sb)->sb_flags);
1492 }
91447636
A
1493 if (keeplocked == 0) { /* unlock on exit */
1494 so->so_usecount--;
1495 if (so->so_usecount < 0)
1496 panic("sbunlock: unlock on exit so=%x lr=%x sb_flags=%x\n", so, so->so_usecount,lr_saved, sb->sb_flags);
89b3af67
A
1497 so->unlock_lr[so->next_unlock_lr] = (void *)lr_saved;
1498 so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;
91447636
A
1499 lck_mtx_unlock(mutex_held);
1500 }
0b4e3aa0
A
1501}
1502
1503void
1504sorwakeup(struct socket * so)
1505{
1506 if (sb_notify(&so->so_rcv))
1507 sowakeup(so, &so->so_rcv);
1508}
1509
1510void
1511sowwakeup(struct socket * so)
1512{
1513 if (sb_notify(&so->so_snd))
1514 sowakeup(so, &so->so_snd);
1515}
9bccf70c 1516#endif __APPLE__
0b4e3aa0 1517
1c79356b
A
1518/*
1519 * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
1520 */
1521struct sockaddr *
1522dup_sockaddr(sa, canwait)
1523 struct sockaddr *sa;
1524 int canwait;
1525{
1526 struct sockaddr *sa2;
1527
1528 MALLOC(sa2, struct sockaddr *, sa->sa_len, M_SONAME,
1529 canwait ? M_WAITOK : M_NOWAIT);
1530 if (sa2)
1531 bcopy(sa, sa2, sa->sa_len);
1532 return sa2;
1533}
1534
1535/*
1536 * Create an external-format (``xsocket'') structure using the information
1537 * in the kernel-format socket structure pointed to by so. This is done
1538 * to reduce the spew of irrelevant information over this interface,
1539 * to isolate user code from changes in the kernel structure, and
1540 * potentially to provide information-hiding if we decide that
1541 * some of this information should be hidden from users.
1542 */
1543void
1544sotoxsocket(struct socket *so, struct xsocket *xso)
1545{
1546 xso->xso_len = sizeof *xso;
1547 xso->xso_so = so;
1548 xso->so_type = so->so_type;
1549 xso->so_options = so->so_options;
1550 xso->so_linger = so->so_linger;
1551 xso->so_state = so->so_state;
1552 xso->so_pcb = so->so_pcb;
91447636
A
1553 if (so->so_proto) {
1554 xso->xso_protocol = so->so_proto->pr_protocol;
1555 xso->xso_family = so->so_proto->pr_domain->dom_family;
1556 }
1557 else
1558 xso->xso_protocol = xso->xso_family = 0;
1c79356b
A
1559 xso->so_qlen = so->so_qlen;
1560 xso->so_incqlen = so->so_incqlen;
1561 xso->so_qlimit = so->so_qlimit;
1562 xso->so_timeo = so->so_timeo;
1563 xso->so_error = so->so_error;
1564 xso->so_pgid = so->so_pgid;
1565 xso->so_oobmark = so->so_oobmark;
1566 sbtoxsockbuf(&so->so_snd, &xso->so_snd);
1567 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
1568 xso->so_uid = so->so_uid;
1569}
1570
1571/*
1572 * This does the same for sockbufs. Note that the xsockbuf structure,
1573 * since it is always embedded in a socket, does not include a self
1574 * pointer nor a length. We make this entry point public in case
1575 * some other mechanism needs it.
1576 */
1577void
1578sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
1579{
1580 xsb->sb_cc = sb->sb_cc;
1581 xsb->sb_hiwat = sb->sb_hiwat;
1582 xsb->sb_mbcnt = sb->sb_mbcnt;
1583 xsb->sb_mbmax = sb->sb_mbmax;
1584 xsb->sb_lowat = sb->sb_lowat;
1585 xsb->sb_flags = sb->sb_flags;
91447636
A
1586 xsb->sb_timeo = (u_long)(sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick;
1587 if (xsb->sb_timeo == 0 && sb->sb_timeo.tv_usec != 0)
1588 xsb->sb_timeo = 1;
1c79356b
A
1589}
1590
1591/*
1592 * Here is the definition of some of the basic objects in the kern.ipc
1593 * branch of the MIB.
1594 */
1c79356b
A
1595SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
1596
1597/* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
1598static int dummy;
1599SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
1600
9bccf70c
A
1601SYSCTL_INT(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLFLAG_RW,
1602 &sb_max, 0, "Maximum socket buffer size");
1603SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD,
1604 &maxsockets, 0, "Maximum number of sockets avaliable");
1c79356b
A
1605SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
1606 &sb_efficiency, 0, "");
1607SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, &nmbclusters, 0, "");
1608