]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/uipc_socket2.c
xnu-792.17.14.tar.gz
[apple/xnu.git] / bsd / kern / uipc_socket2.c
CommitLineData
1c79356b 1/*
5d5c5d0d
A
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
8f6c56a5 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
8f6c56a5
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
8ad349bb 24 * limitations under the License.
8f6c56a5
A
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
29/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
30/*
31 * Copyright (c) 1982, 1986, 1988, 1990, 1993
32 * The Regents of the University of California. All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 * must display the following acknowledgement:
44 * This product includes software developed by the University of
45 * California, Berkeley and its contributors.
46 * 4. Neither the name of the University nor the names of its contributors
47 * may be used to endorse or promote products derived from this software
48 * without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 *
62 * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
9bccf70c 63 * $FreeBSD: src/sys/kern/uipc_socket2.c,v 1.55.2.9 2001/07/26 18:53:02 peter Exp $
1c79356b
A
64 */
65
66#include <sys/param.h>
67#include <sys/systm.h>
68#include <sys/domain.h>
69#include <sys/kernel.h>
91447636
A
70#include <sys/proc_internal.h>
71#include <sys/kauth.h>
1c79356b
A
72#include <sys/malloc.h>
73#include <sys/mbuf.h>
74#include <sys/protosw.h>
75#include <sys/stat.h>
76#include <sys/socket.h>
77#include <sys/socketvar.h>
78#include <sys/signalvar.h>
79#include <sys/sysctl.h>
80#include <sys/ev.h>
91447636
A
81#include <kern/locks.h>
82#include <net/route.h>
83#include <netinet/in.h>
84#include <netinet/in_pcb.h>
fa4905b1
A
85#include <sys/kdebug.h>
86
87#define DBG_FNC_SBDROP NETDBG_CODE(DBG_NETSOCK, 4)
88#define DBG_FNC_SBAPPEND NETDBG_CODE(DBG_NETSOCK, 5)
89
90
1c79356b
A
91/*
92 * Primitive routines for operating on sockets and socket buffers
93 */
94
95u_long sb_max = SB_MAX; /* XXX should be static */
96
97static u_long sb_efficiency = 8; /* parameter for sbreserve() */
98
1c79356b
A
99/*
100 * Procedures to manipulate state flags of socket
101 * and do appropriate wakeups. Normal sequence from the
102 * active (originating) side is that soisconnecting() is
103 * called during processing of connect() call,
104 * resulting in an eventual call to soisconnected() if/when the
105 * connection is established. When the connection is torn down
9bccf70c 106 * soisdisconnecting() is called during processing of disconnect() call,
1c79356b
A
107 * and soisdisconnected() is called when the connection to the peer
108 * is totally severed. The semantics of these routines are such that
109 * connectionless protocols can call soisconnected() and soisdisconnected()
110 * only, bypassing the in-progress calls when setting up a ``connection''
111 * takes no time.
112 *
113 * From the passive side, a socket is created with
e3027f41
A
114 * two queues of sockets: so_incomp for connections in progress
115 * and so_comp for connections already made and awaiting user acceptance.
9bccf70c 116 * As a protocol is preparing incoming connections, it creates a socket
e3027f41 117 * structure queued on so_incomp by calling sonewconn(). When the connection
1c79356b 118 * is established, soisconnected() is called, and transfers the
e3027f41 119 * socket structure to so_comp, making it available to accept().
1c79356b 120 *
9bccf70c 121 * If a socket is closed with sockets on either
e3027f41 122 * so_incomp or so_comp, these sockets are dropped.
9bccf70c 123 *
1c79356b
A
124 * If higher level protocols are implemented in
125 * the kernel, the wakeups done here will sometimes
126 * cause software-interrupt process scheduling.
127 */
1c79356b
A
128void
129soisconnecting(so)
130 register struct socket *so;
131{
132
133 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
134 so->so_state |= SS_ISCONNECTING;
91447636
A
135
136 sflt_notify(so, sock_evt_connecting, NULL);
1c79356b
A
137}
138
139void
140soisconnected(so)
9bccf70c
A
141 struct socket *so;
142{
143 struct socket *head = so->so_head;
1c79356b
A
144
145 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
146 so->so_state |= SS_ISCONNECTED;
91447636
A
147
148 sflt_notify(so, sock_evt_connected, NULL);
149
1c79356b 150 if (head && (so->so_state & SS_INCOMP)) {
ff6e181a
A
151 so->so_state &= ~SS_INCOMP;
152 so->so_state |= SS_COMP;
153 if (head->so_proto->pr_getlock != NULL) {
154 socket_unlock(so, 0);
91447636 155 socket_lock(head, 1);
ff6e181a 156 }
91447636 157 postevent(head, 0, EV_RCONN);
1c79356b
A
158 TAILQ_REMOVE(&head->so_incomp, so, so_list);
159 head->so_incqlen--;
1c79356b 160 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
1c79356b 161 sorwakeup(head);
91447636 162 wakeup_one((caddr_t)&head->so_timeo);
ff6e181a 163 if (head->so_proto->pr_getlock != NULL) {
91447636 164 socket_unlock(head, 1);
ff6e181a
A
165 socket_lock(so, 0);
166 }
1c79356b 167 } else {
91447636 168 postevent(so, 0, EV_WCONN);
1c79356b
A
169 wakeup((caddr_t)&so->so_timeo);
170 sorwakeup(so);
171 sowwakeup(so);
172 }
173}
174
175void
176soisdisconnecting(so)
177 register struct socket *so;
9bccf70c 178{
1c79356b
A
179 so->so_state &= ~SS_ISCONNECTING;
180 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
91447636 181 sflt_notify(so, sock_evt_disconnecting, NULL);
1c79356b
A
182 wakeup((caddr_t)&so->so_timeo);
183 sowwakeup(so);
184 sorwakeup(so);
185}
186
187void
188soisdisconnected(so)
189 register struct socket *so;
9bccf70c 190{
1c79356b 191 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
9bccf70c 192 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
91447636 193 sflt_notify(so, sock_evt_disconnected, NULL);
1c79356b
A
194 wakeup((caddr_t)&so->so_timeo);
195 sowwakeup(so);
196 sorwakeup(so);
197}
198
199/*
200 * Return a random connection that hasn't been serviced yet and
201 * is eligible for discard. There is a one in qlen chance that
202 * we will return a null, saying that there are no dropable
203 * requests. In this case, the protocol specific code should drop
204 * the new request. This insures fairness.
205 *
206 * This may be used in conjunction with protocol specific queue
207 * congestion routines.
208 */
209struct socket *
210sodropablereq(head)
211 register struct socket *head;
212{
91447636 213 struct socket *so, *sonext = NULL;
1c79356b
A
214 unsigned int i, j, qlen;
215 static int rnd;
216 static struct timeval old_runtime;
217 static unsigned int cur_cnt, old_cnt;
218 struct timeval tv;
219
220 microtime(&tv);
221 if ((i = (tv.tv_sec - old_runtime.tv_sec)) != 0) {
222 old_runtime = tv;
223 old_cnt = cur_cnt / i;
224 cur_cnt = 0;
225 }
226
227 so = TAILQ_FIRST(&head->so_incomp);
228 if (!so)
91447636 229 return (NULL);
1c79356b
A
230
231 qlen = head->so_incqlen;
232 if (++cur_cnt > qlen || old_cnt > qlen) {
233 rnd = (314159 * rnd + 66329) & 0xffff;
234 j = ((qlen + 1) * rnd) >> 16;
91447636
A
235//###LD To clean up
236 while (j-- && so) {
237// if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
238 socket_lock(so, 1);
239 sonext = TAILQ_NEXT(so, so_list);
240// in_pcb_check_state(so->so_pcb, WNT_RELEASE, 0);
241 socket_unlock(so, 1);
242 so = sonext;
243 }
1c79356b
A
244 }
245
91447636
A
246// if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) == WNT_STOPUSING)
247// return (NULL);
248// else
249 return (so);
1c79356b
A
250}
251
252/*
253 * When an attempt at a new connection is noted on a socket
254 * which accepts connections, sonewconn is called. If the
255 * connection is possible (subject to space constraints, etc.)
256 * then we allocate a new structure, propoerly linked into the
257 * data structure of the original socket, and return this.
258 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
259 */
91447636
A
260static struct socket *
261sonewconn_internal(head, connstatus)
1c79356b
A
262 register struct socket *head;
263 int connstatus;
9bccf70c
A
264{
265 int error = 0;
1c79356b 266 register struct socket *so;
91447636
A
267 lck_mtx_t *mutex_held;
268
269 if (head->so_proto->pr_getlock != NULL)
270 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
271 else
272 mutex_held = head->so_proto->pr_domain->dom_mtx;
273 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
1c79356b
A
274
275 if (head->so_qlen > 3 * head->so_qlimit / 2)
276 return ((struct socket *)0);
0b4e3aa0 277 so = soalloc(1, head->so_proto->pr_domain->dom_family, head->so_type);
1c79356b
A
278 if (so == NULL)
279 return ((struct socket *)0);
9bccf70c
A
280 /* check if head was closed during the soalloc */
281 if (head->so_proto == NULL) {
282 sodealloc(so);
283 return ((struct socket *)0);
1c79356b
A
284 }
285
286 so->so_head = head;
287 so->so_type = head->so_type;
288 so->so_options = head->so_options &~ SO_ACCEPTCONN;
289 so->so_linger = head->so_linger;
290 so->so_state = head->so_state | SS_NOFDREF;
291 so->so_proto = head->so_proto;
292 so->so_timeo = head->so_timeo;
293 so->so_pgid = head->so_pgid;
294 so->so_uid = head->so_uid;
91447636 295 so->so_usecount = 1;
1c79356b 296
13fec989
A
297#ifdef __APPLE__
298 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
299 so->so_rcv.sb_so = so->so_snd.sb_so = so;
300 TAILQ_INIT(&so->so_evlist);
301#endif
302
91447636
A
303 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
304 sflt_termsock(so);
9bccf70c
A
305 sodealloc(so);
306 return ((struct socket *)0);
307 }
308
91447636 309 /*
37839358 310 * Must be done with head unlocked to avoid deadlock for protocol with per socket mutexes.
91447636 311 */
37839358
A
312 if (head->so_proto->pr_unlock)
313 socket_unlock(head, 0);
91447636
A
314 if (((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL) != 0) || error) {
315 sflt_termsock(so);
1c79356b 316 sodealloc(so);
37839358
A
317 if (head->so_proto->pr_unlock)
318 socket_lock(head, 0);
1c79356b
A
319 return ((struct socket *)0);
320 }
37839358
A
321 if (head->so_proto->pr_unlock)
322 socket_lock(head, 0);
9bccf70c 323#ifdef __APPLE__
1c79356b 324 so->so_proto->pr_domain->dom_refs++;
9bccf70c 325#endif
1c79356b
A
326
327 if (connstatus) {
328 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
329 so->so_state |= SS_COMP;
330 } else {
331 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
332 so->so_state |= SS_INCOMP;
333 head->so_incqlen++;
334 }
335 head->so_qlen++;
91447636 336
13fec989 337#ifdef __APPLE__
8f6c56a5
A
338 /* Attach socket filters for this protocol */
339 sflt_initsock(so);
9bccf70c 340#endif
91447636
A
341 if (connstatus) {
342 so->so_state |= connstatus;
343 sorwakeup(head);
344 wakeup((caddr_t)&head->so_timeo);
345 }
1c79356b
A
346 return (so);
347}
348
91447636
A
349
350struct socket *
351sonewconn(
352 struct socket *head,
353 int connstatus,
354 const struct sockaddr *from)
355{
356 int error = 0;
357 struct socket_filter_entry *filter;
358 int filtered = 0;
359
360 error = 0;
361 for (filter = head->so_filt; filter && (error == 0);
362 filter = filter->sfe_next_onsocket) {
363 if (filter->sfe_filter->sf_filter.sf_connect_in) {
364 if (filtered == 0) {
365 filtered = 1;
366 sflt_use(head);
367 socket_unlock(head, 0);
368 }
369 error = filter->sfe_filter->sf_filter.sf_connect_in(
370 filter->sfe_cookie, head, from);
371 }
372 }
373 if (filtered != 0) {
374 socket_lock(head, 0);
375 sflt_unuse(head);
376 }
377
378 if (error) {
379 return NULL;
380 }
381
382 return sonewconn_internal(head, connstatus);
383}
384
1c79356b
A
385/*
386 * Socantsendmore indicates that no more data will be sent on the
387 * socket; it would normally be applied to a socket when the user
388 * informs the system that no more data is to be sent, by the protocol
389 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
390 * will be received, and will normally be applied to the socket by a
391 * protocol when it detects that the peer will send no more data.
392 * Data queued for reading in the socket may yet be read.
393 */
394
395void
396socantsendmore(so)
397 struct socket *so;
9bccf70c 398{
1c79356b 399 so->so_state |= SS_CANTSENDMORE;
91447636 400 sflt_notify(so, sock_evt_cantsendmore, NULL);
1c79356b
A
401 sowwakeup(so);
402}
403
404void
405socantrcvmore(so)
406 struct socket *so;
9bccf70c 407{
1c79356b 408 so->so_state |= SS_CANTRCVMORE;
91447636 409 sflt_notify(so, sock_evt_cantrecvmore, NULL);
1c79356b
A
410 sorwakeup(so);
411}
412
413/*
414 * Wait for data to arrive at/drain from a socket buffer.
415 */
416int
417sbwait(sb)
418 struct sockbuf *sb;
419{
8f6c56a5 420 int error = 0, lr, lr_saved;
91447636
A
421 struct socket *so = sb->sb_so;
422 lck_mtx_t *mutex_held;
423 struct timespec ts;
424
8f6c56a5
A
425#ifdef __ppc__
426 __asm__ volatile("mflr %0" : "=r" (lr));
427 lr_saved = lr;
428#endif
91447636 429
8f6c56a5 430
91447636
A
431 if (so->so_proto->pr_getlock != NULL)
432 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
433 else
434 mutex_held = so->so_proto->pr_domain->dom_mtx;
1c79356b
A
435
436 sb->sb_flags |= SB_WAIT;
91447636
A
437
438 if (so->so_usecount < 1)
439 panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount);
440 ts.tv_sec = sb->sb_timeo.tv_sec;
441 ts.tv_nsec = sb->sb_timeo.tv_usec * 1000;
442 error = msleep((caddr_t)&sb->sb_cc, mutex_held,
443 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
444 &ts);
445
446 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
447
448 if (so->so_usecount < 1)
449 panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount);
450
451 if ((so->so_state & SS_DRAINING)) {
452 error = EBADF;
453 }
454
455 return (error);
1c79356b
A
456}
457
458/*
459 * Lock a sockbuf already known to be locked;
460 * return any error returned from sleep (EINTR).
461 */
462int
463sb_lock(sb)
464 register struct sockbuf *sb;
465{
91447636
A
466 struct socket *so = sb->sb_so;
467 lck_mtx_t * mutex_held;
8f6c56a5
A
468 int error = 0, lr, lr_saved;
469
470#ifdef __ppc__
471 __asm__ volatile("mflr %0" : "=r" (lr));
472 lr_saved = lr;
473#endif
91447636
A
474
475 if (so == NULL)
476 panic("sb_lock: null so back pointer sb=%x\n", sb);
1c79356b
A
477
478 while (sb->sb_flags & SB_LOCK) {
479 sb->sb_flags |= SB_WANT;
91447636
A
480 if (so->so_proto->pr_getlock != NULL)
481 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
482 else
483 mutex_held = so->so_proto->pr_domain->dom_mtx;
484 if (so->so_usecount < 1)
485 panic("sb_lock: so=%x refcount=%d\n", so, so->so_usecount);
486 error = msleep((caddr_t)&sb->sb_flags, mutex_held,
487 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sblock", 0);
488 if (so->so_usecount < 1)
489 panic("sb_lock: 2 so=%x refcount=%d\n", so, so->so_usecount);
490 if (error)
1c79356b
A
491 return (error);
492 }
493 sb->sb_flags |= SB_LOCK;
494 return (0);
495}
496
497/*
498 * Wakeup processes waiting on a socket buffer.
499 * Do asynchronous notification via SIGIO
500 * if the socket has the SS_ASYNC flag set.
501 */
502void
503sowakeup(so, sb)
504 register struct socket *so;
505 register struct sockbuf *sb;
506{
507 struct proc *p = current_proc();
0b4e3aa0 508 sb->sb_flags &= ~SB_SEL;
1c79356b 509 selwakeup(&sb->sb_sel);
1c79356b
A
510 if (sb->sb_flags & SB_WAIT) {
511 sb->sb_flags &= ~SB_WAIT;
512 wakeup((caddr_t)&sb->sb_cc);
513 }
514 if (so->so_state & SS_ASYNC) {
515 if (so->so_pgid < 0)
516 gsignal(-so->so_pgid, SIGIO);
517 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
518 psignal(p, SIGIO);
519 }
91447636
A
520 if (sb->sb_flags & SB_KNOTE) {
521 KNOTE(&sb->sb_sel.si_note, SO_FILT_HINT_LOCKED);
522 }
523 if (sb->sb_flags & SB_UPCALL) {
524 socket_unlock(so, 0);
1c79356b 525 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
91447636
A
526 socket_lock(so, 0);
527 }
1c79356b
A
528}
529
530/*
531 * Socket buffer (struct sockbuf) utility routines.
532 *
533 * Each socket contains two socket buffers: one for sending data and
534 * one for receiving data. Each buffer contains a queue of mbufs,
535 * information about the number of mbufs and amount of data in the
536 * queue, and other fields allowing select() statements and notification
537 * on data availability to be implemented.
538 *
539 * Data stored in a socket buffer is maintained as a list of records.
540 * Each record is a list of mbufs chained together with the m_next
541 * field. Records are chained together with the m_nextpkt field. The upper
542 * level routine soreceive() expects the following conventions to be
543 * observed when placing information in the receive buffer:
544 *
545 * 1. If the protocol requires each message be preceded by the sender's
546 * name, then a record containing that name must be present before
547 * any associated data (mbuf's must be of type MT_SONAME).
548 * 2. If the protocol supports the exchange of ``access rights'' (really
549 * just additional data associated with the message), and there are
550 * ``rights'' to be received, then a record containing this data
551 * should be present (mbuf's must be of type MT_RIGHTS).
552 * 3. If a name or rights record exists, then it must be followed by
553 * a data record, perhaps of zero length.
554 *
555 * Before using a new socket structure it is first necessary to reserve
556 * buffer space to the socket, by calling sbreserve(). This should commit
557 * some of the available buffer space in the system buffer pool for the
558 * socket (currently, it does nothing but enforce limits). The space
559 * should be released by calling sbrelease() when the socket is destroyed.
560 */
561
562int
563soreserve(so, sndcc, rcvcc)
564 register struct socket *so;
565 u_long sndcc, rcvcc;
566{
1c79356b
A
567
568 if (sbreserve(&so->so_snd, sndcc) == 0)
569 goto bad;
570 if (sbreserve(&so->so_rcv, rcvcc) == 0)
571 goto bad2;
572 if (so->so_rcv.sb_lowat == 0)
573 so->so_rcv.sb_lowat = 1;
574 if (so->so_snd.sb_lowat == 0)
575 so->so_snd.sb_lowat = MCLBYTES;
576 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
577 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
578 return (0);
579bad2:
9bccf70c 580#ifdef __APPLE__
0b4e3aa0 581 selthreadclear(&so->so_snd.sb_sel);
9bccf70c 582#endif
1c79356b
A
583 sbrelease(&so->so_snd);
584bad:
585 return (ENOBUFS);
586}
587
588/*
589 * Allot mbufs to a sockbuf.
590 * Attempt to scale mbmax so that mbcnt doesn't become limiting
591 * if buffering efficiency is near the normal case.
592 */
593int
594sbreserve(sb, cc)
595 struct sockbuf *sb;
596 u_long cc;
597{
598 if ((u_quad_t)cc > (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES))
599 return (0);
600 sb->sb_hiwat = cc;
601 sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
602 if (sb->sb_lowat > sb->sb_hiwat)
603 sb->sb_lowat = sb->sb_hiwat;
604 return (1);
605}
606
607/*
608 * Free mbufs held by a socket, and reserved mbuf space.
609 */
0b4e3aa0 610 /* WARNING needs to do selthreadclear() before calling this */
1c79356b
A
611void
612sbrelease(sb)
613 struct sockbuf *sb;
614{
615
616 sbflush(sb);
9bccf70c
A
617 sb->sb_hiwat = 0;
618 sb->sb_mbmax = 0;
619
1c79356b
A
620}
621
622/*
623 * Routines to add and remove
624 * data from an mbuf queue.
625 *
626 * The routines sbappend() or sbappendrecord() are normally called to
627 * append new mbufs to a socket buffer, after checking that adequate
628 * space is available, comparing the function sbspace() with the amount
629 * of data to be added. sbappendrecord() differs from sbappend() in
630 * that data supplied is treated as the beginning of a new record.
631 * To place a sender's address, optional access rights, and data in a
632 * socket receive buffer, sbappendaddr() should be used. To place
633 * access rights and data in a socket receive buffer, sbappendrights()
634 * should be used. In either case, the new data begins a new record.
635 * Note that unlike sbappend() and sbappendrecord(), these routines check
636 * for the caller that there will be enough space to store the data.
637 * Each fails if there is not enough space, or if it cannot find mbufs
638 * to store additional information in.
639 *
640 * Reliable protocols may use the socket send buffer to hold data
641 * awaiting acknowledgement. Data is normally copied from a socket
642 * send buffer in a protocol with m_copy for output to a peer,
643 * and then removing the data from the socket buffer with sbdrop()
644 * or sbdroprecord() when the data is acknowledged by the peer.
645 */
646
647/*
648 * Append mbuf chain m to the last record in the
649 * socket buffer sb. The additional space associated
650 * the mbuf chain is recorded in sb. Empty mbufs are
651 * discarded and mbufs are compacted where possible.
652 */
91447636 653int
1c79356b
A
654sbappend(sb, m)
655 struct sockbuf *sb;
656 struct mbuf *m;
9bccf70c 657{
91447636
A
658 register struct mbuf *n, *sb_first;
659 int result = 0;
660 int error = 0;
cc9f6e38 661 int filtered = 0;
1c79356b 662
fa4905b1
A
663
664 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_START), sb, m->m_len, 0, 0, 0);
665
1c79356b 666 if (m == 0)
91447636 667 return 0;
cc9f6e38
A
668
669again:
91447636 670 sb_first = n = sb->sb_mb;
9bccf70c 671 if (n) {
1c79356b
A
672 while (n->m_nextpkt)
673 n = n->m_nextpkt;
674 do {
675 if (n->m_flags & M_EOR) {
91447636 676 result = sbappendrecord(sb, m); /* XXXXXX!!!! */
55e303ae 677 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
91447636 678 return result;
1c79356b
A
679 }
680 } while (n->m_next && (n = n->m_next));
681 }
91447636 682
cc9f6e38
A
683 if (!filtered && (sb->sb_flags & SB_RECV) != 0) {
684 error = sflt_data_in(sb->sb_so, NULL, &m, NULL, 0, &filtered);
91447636
A
685 if (error) {
686 /* no data was appended, caller should not call sowakeup */
687 return 0;
688 }
cc9f6e38
A
689
690 /*
691 If we any filters, the socket lock was dropped. n and sb_first
692 cached data from the socket buffer. This cache is not valid
693 since we dropped the lock. We must start over. Since filtered
694 is set we won't run through the filters a second time. We just
695 set n and sb_start again.
696 */
697 if (filtered)
698 goto again;
91447636
A
699 }
700
701 result = sbcompress(sb, m, n);
fa4905b1
A
702
703 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
91447636
A
704
705 return result;
1c79356b
A
706}
707
708#ifdef SOCKBUF_DEBUG
709void
710sbcheck(sb)
711 register struct sockbuf *sb;
712{
713 register struct mbuf *m;
714 register struct mbuf *n = 0;
715 register u_long len = 0, mbcnt = 0;
91447636
A
716 lck_mtx_t *mutex_held;
717
718 if (sb->sb_so->so_proto->pr_getlock != NULL)
719 mutex_held = (*sb->sb_so->so_proto->pr_getlock)(sb->sb_so, 0);
720 else
721 mutex_held = sb->sb_so->so_proto->pr_domain->dom_mtx;
722
723 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
724
725 if (sbchecking == 0)
726 return;
1c79356b
A
727
728 for (m = sb->sb_mb; m; m = n) {
729 n = m->m_nextpkt;
730 for (; m; m = m->m_next) {
9bccf70c
A
731 len += m->m_len;
732 mbcnt += MSIZE;
733 if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
734 mbcnt += m->m_ext.ext_size;
735 }
1c79356b 736 }
9bccf70c 737 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
91447636 738 panic("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
9bccf70c 739 mbcnt, sb->sb_mbcnt);
9bccf70c 740 }
1c79356b
A
741}
742#endif
743
744/*
745 * As above, except the mbuf chain
746 * begins a new record.
747 */
91447636 748int
1c79356b
A
749sbappendrecord(sb, m0)
750 register struct sockbuf *sb;
8f6c56a5 751 register struct mbuf *m0;
1c79356b
A
752{
753 register struct mbuf *m;
91447636 754 int result = 0;
9bccf70c 755
1c79356b 756 if (m0 == 0)
91447636
A
757 return 0;
758
759 if ((sb->sb_flags & SB_RECV) != 0) {
cc9f6e38 760 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL, sock_data_filt_flag_record, NULL);
91447636
A
761 if (error != 0) {
762 if (error != EJUSTRETURN)
763 m_freem(m0);
764 return 0;
1c79356b 765 }
1c79356b
A
766 }
767
768 m = sb->sb_mb;
769 if (m)
770 while (m->m_nextpkt)
771 m = m->m_nextpkt;
772 /*
773 * Put the first mbuf on the queue.
774 * Note this permits zero length records.
775 */
776 sballoc(sb, m0);
777 if (m)
778 m->m_nextpkt = m0;
779 else
780 sb->sb_mb = m0;
781 m = m0->m_next;
782 m0->m_next = 0;
783 if (m && (m0->m_flags & M_EOR)) {
784 m0->m_flags &= ~M_EOR;
785 m->m_flags |= M_EOR;
786 }
91447636 787 return sbcompress(sb, m, m0);
1c79356b
A
788}
789
790/*
791 * As above except that OOB data
792 * is inserted at the beginning of the sockbuf,
793 * but after any other OOB data.
794 */
91447636 795int
1c79356b 796sbinsertoob(sb, m0)
91447636
A
797 struct sockbuf *sb;
798 struct mbuf *m0;
1c79356b 799{
91447636
A
800 struct mbuf *m;
801 struct mbuf **mp;
1c79356b
A
802
803 if (m0 == 0)
91447636
A
804 return 0;
805
806 if ((sb->sb_flags & SB_RECV) != 0) {
807 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL,
cc9f6e38 808 sock_data_filt_flag_oob, NULL);
91447636
A
809
810 if (error) {
811 if (error != EJUSTRETURN) {
812 m_freem(m0);
813 }
814 return 0;
1c79356b 815 }
1c79356b
A
816 }
817
818 for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) {
819 m = *mp;
820 again:
821 switch (m->m_type) {
822
823 case MT_OOBDATA:
824 continue; /* WANT next train */
825
826 case MT_CONTROL:
827 m = m->m_next;
828 if (m)
829 goto again; /* inspect THIS train further */
830 }
831 break;
832 }
833 /*
834 * Put the first mbuf on the queue.
835 * Note this permits zero length records.
836 */
837 sballoc(sb, m0);
838 m0->m_nextpkt = *mp;
839 *mp = m0;
840 m = m0->m_next;
841 m0->m_next = 0;
842 if (m && (m0->m_flags & M_EOR)) {
843 m0->m_flags &= ~M_EOR;
844 m->m_flags |= M_EOR;
845 }
91447636 846 return sbcompress(sb, m, m0);
1c79356b
A
847}
848
849/*
850 * Append address and data, and optionally, control (ancillary) data
851 * to the receive queue of a socket. If present,
852 * m0 must include a packet header with total length.
853 * Returns 0 if no space in sockbuf or insufficient mbufs.
854 */
91447636
A
855static int
856sbappendaddr_internal(sb, asa, m0, control)
1c79356b
A
857 register struct sockbuf *sb;
858 struct sockaddr *asa;
859 struct mbuf *m0, *control;
860{
861 register struct mbuf *m, *n;
862 int space = asa->sa_len;
1c79356b
A
863
864 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
865 panic("sbappendaddr");
866
1c79356b
A
867 if (m0)
868 space += m0->m_pkthdr.len;
869 for (n = control; n; n = n->m_next) {
870 space += n->m_len;
871 if (n->m_next == 0) /* keep pointer to last control buf */
872 break;
873 }
874 if (space > sbspace(sb))
875 return (0);
876 if (asa->sa_len > MLEN)
877 return (0);
878 MGET(m, M_DONTWAIT, MT_SONAME);
879 if (m == 0)
880 return (0);
881 m->m_len = asa->sa_len;
882 bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
883 if (n)
884 n->m_next = m0; /* concatenate data to control */
885 else
886 control = m0;
887 m->m_next = control;
888 for (n = m; n; n = n->m_next)
889 sballoc(sb, n);
890 n = sb->sb_mb;
891 if (n) {
892 while (n->m_nextpkt)
893 n = n->m_nextpkt;
894 n->m_nextpkt = m;
895 } else
896 sb->sb_mb = m;
897 postevent(0,sb,EV_RWBYTES);
898 return (1);
899}
900
901int
91447636
A
902sbappendaddr(
903 struct sockbuf* sb,
904 struct sockaddr* asa,
905 struct mbuf *m0,
906 struct mbuf *control,
907 int *error_out)
908{
909 int result = 0;
910
911 if (error_out) *error_out = 0;
912
913 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
914 panic("sbappendaddrorfree");
915
916 /* Call socket data in filters */
917 if ((sb->sb_flags & SB_RECV) != 0) {
918 int error;
cc9f6e38 919 error = sflt_data_in(sb->sb_so, asa, &m0, &control, 0, NULL);
91447636
A
920 if (error) {
921 if (error != EJUSTRETURN) {
922 if (m0) m_freem(m0);
923 if (control) m_freem(control);
924 if (error_out) *error_out = error;
925 }
926 return 0;
927 }
928 }
929
930 result = sbappendaddr_internal(sb, asa, m0, control);
931 if (result == 0) {
932 if (m0) m_freem(m0);
933 if (control) m_freem(control);
934 if (error_out) *error_out = ENOBUFS;
935 }
936
937 return result;
938}
939
940static int
941sbappendcontrol_internal(sb, m0, control)
1c79356b
A
942 struct sockbuf *sb;
943 struct mbuf *control, *m0;
944{
945 register struct mbuf *m, *n;
946 int space = 0;
1c79356b
A
947
948 if (control == 0)
949 panic("sbappendcontrol");
950
1c79356b
A
951 for (m = control; ; m = m->m_next) {
952 space += m->m_len;
953 if (m->m_next == 0)
954 break;
955 }
956 n = m; /* save pointer to last control buffer */
957 for (m = m0; m; m = m->m_next)
958 space += m->m_len;
959 if (space > sbspace(sb))
960 return (0);
961 n->m_next = m0; /* concatenate data to control */
962 for (m = control; m; m = m->m_next)
963 sballoc(sb, m);
964 n = sb->sb_mb;
965 if (n) {
966 while (n->m_nextpkt)
967 n = n->m_nextpkt;
968 n->m_nextpkt = control;
969 } else
970 sb->sb_mb = control;
971 postevent(0,sb,EV_RWBYTES);
972 return (1);
973}
974
91447636
A
975int
976sbappendcontrol(
977 struct sockbuf *sb,
978 struct mbuf *m0,
979 struct mbuf *control,
980 int *error_out)
981{
982 int result = 0;
983
984 if (error_out) *error_out = 0;
985
986 if (sb->sb_flags & SB_RECV) {
987 int error;
cc9f6e38 988 error = sflt_data_in(sb->sb_so, NULL, &m0, &control, 0, NULL);
91447636
A
989 if (error) {
990 if (error != EJUSTRETURN) {
991 if (m0) m_freem(m0);
992 if (control) m_freem(control);
993 if (error_out) *error_out = error;
994 }
995 return 0;
996 }
997 }
998
999 result = sbappendcontrol_internal(sb, m0, control);
1000 if (result == 0) {
1001 if (m0) m_freem(m0);
1002 if (control) m_freem(control);
1003 if (error_out) *error_out = ENOBUFS;
1004 }
1005
1006 return result;
1007}
1008
1c79356b
A
1009/*
1010 * Compress mbuf chain m into the socket
1011 * buffer sb following mbuf n. If n
1012 * is null, the buffer is presumed empty.
1013 */
91447636 1014static int
1c79356b
A
1015sbcompress(sb, m, n)
1016 register struct sockbuf *sb;
1017 register struct mbuf *m, *n;
1018{
1019 register int eor = 0;
1020 register struct mbuf *o;
1021
1022 while (m) {
1023 eor |= m->m_flags & M_EOR;
1024 if (m->m_len == 0 &&
1025 (eor == 0 ||
1026 (((o = m->m_next) || (o = n)) &&
1027 o->m_type == m->m_type))) {
1028 m = m_free(m);
1029 continue;
1030 }
9bccf70c
A
1031 if (n && (n->m_flags & M_EOR) == 0 &&
1032#ifndef __APPLE__
1033 M_WRITABLE(n) &&
1034#endif
1035 m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
1036 m->m_len <= M_TRAILINGSPACE(n) &&
1c79356b
A
1037 n->m_type == m->m_type) {
1038 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
1039 (unsigned)m->m_len);
1040 n->m_len += m->m_len;
1041 sb->sb_cc += m->m_len;
1042 m = m_free(m);
1043 continue;
1044 }
1045 if (n)
1046 n->m_next = m;
1047 else
1048 sb->sb_mb = m;
1049 sballoc(sb, m);
1050 n = m;
1051 m->m_flags &= ~M_EOR;
1052 m = m->m_next;
1053 n->m_next = 0;
1054 }
1055 if (eor) {
1056 if (n)
1057 n->m_flags |= eor;
1058 else
1059 printf("semi-panic: sbcompress\n");
1060 }
1061 postevent(0,sb, EV_RWBYTES);
91447636 1062 return 1;
1c79356b
A
1063}
1064
1065/*
1066 * Free all mbufs in a sockbuf.
1067 * Check that all resources are reclaimed.
1068 */
1069void
1070sbflush(sb)
1071 register struct sockbuf *sb;
1072{
91447636
A
1073 if (sb->sb_so == NULL)
1074 panic ("sbflush sb->sb_so already null sb=%x\n", sb);
55e303ae 1075 (void)sblock(sb, M_WAIT);
9bccf70c
A
1076 while (sb->sb_mbcnt) {
1077 /*
1078 * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
1079 * we would loop forever. Panic instead.
1080 */
1081 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
1082 break;
1c79356b 1083 sbdrop(sb, (int)sb->sb_cc);
9bccf70c 1084 }
91447636
A
1085 if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt || sb->sb_so == NULL)
1086 panic("sbflush: cc %ld || mb %p || mbcnt %ld sb_so=%x", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt, sb->sb_so);
55e303ae 1087
1c79356b 1088 postevent(0, sb, EV_RWBYTES);
91447636
A
1089 sbunlock(sb, 1); /* keep socket locked */
1090
1c79356b
A
1091}
1092
1093/*
1094 * Drop data from (the front of) a sockbuf.
9bccf70c
A
1095 * use m_freem_list to free the mbuf structures
1096 * under a single lock... this is done by pruning
1097 * the top of the tree from the body by keeping track
1098 * of where we get to in the tree and then zeroing the
1099 * two pertinent pointers m_nextpkt and m_next
1100 * the socket buffer is then updated to point at the new
1101 * top of the tree and the pruned area is released via
1102 * m_freem_list.
1c79356b
A
1103 */
1104void
1105sbdrop(sb, len)
1106 register struct sockbuf *sb;
1107 register int len;
1108{
fa4905b1
A
1109 register struct mbuf *m, *free_list, *ml;
1110 struct mbuf *next, *last;
1c79356b 1111
fa4905b1
A
1112 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_START), sb, len, 0, 0, 0);
1113
1c79356b 1114 next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
fa4905b1
A
1115 free_list = last = m;
1116 ml = (struct mbuf *)0;
1117
1c79356b
A
1118 while (len > 0) {
1119 if (m == 0) {
9bccf70c
A
1120 if (next == 0) {
1121 /* temporarily replacing this panic with printf because
1122 * it occurs occasionally when closing a socket when there
1123 * is no harm in ignoring it. This problem will be investigated
1124 * further.
1125 */
1126 /* panic("sbdrop"); */
1127 printf("sbdrop - count not zero\n");
1128 len = 0;
1129 /* zero the counts. if we have no mbufs, we have no data (PR-2986815) */
1130 sb->sb_cc = 0;
1131 sb->sb_mbcnt = 0;
1132 break;
1133 }
1134 m = last = next;
1135 next = m->m_nextpkt;
1136 continue;
1c79356b
A
1137 }
1138 if (m->m_len > len) {
1139 m->m_len -= len;
1140 m->m_data += len;
1141 sb->sb_cc -= len;
1142 break;
1143 }
1144 len -= m->m_len;
1145 sbfree(sb, m);
fa4905b1
A
1146
1147 ml = m;
1148 m = m->m_next;
1c79356b
A
1149 }
1150 while (m && m->m_len == 0) {
1151 sbfree(sb, m);
fa4905b1
A
1152
1153 ml = m;
1154 m = m->m_next;
1155 }
1156 if (ml) {
1157 ml->m_next = (struct mbuf *)0;
1158 last->m_nextpkt = (struct mbuf *)0;
1159 m_freem_list(free_list);
1c79356b
A
1160 }
1161 if (m) {
1162 sb->sb_mb = m;
1163 m->m_nextpkt = next;
1164 } else
1165 sb->sb_mb = next;
fa4905b1 1166
1c79356b 1167 postevent(0, sb, EV_RWBYTES);
fa4905b1
A
1168
1169 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_END), sb, 0, 0, 0, 0);
1c79356b
A
1170}
1171
1172/*
1173 * Drop a record off the front of a sockbuf
1174 * and move the next record to the front.
1175 */
1176void
1177sbdroprecord(sb)
1178 register struct sockbuf *sb;
1179{
1180 register struct mbuf *m, *mn;
1c79356b
A
1181
1182 m = sb->sb_mb;
1183 if (m) {
1184 sb->sb_mb = m->m_nextpkt;
1185 do {
1186 sbfree(sb, m);
1187 MFREE(m, mn);
9bccf70c
A
1188 m = mn;
1189 } while (m);
1c79356b
A
1190 }
1191 postevent(0, sb, EV_RWBYTES);
1192}
1193
1194/*
1195 * Create a "control" mbuf containing the specified data
1196 * with the specified type for presentation on a socket buffer.
1197 */
1198struct mbuf *
1199sbcreatecontrol(p, size, type, level)
1200 caddr_t p;
1201 register int size;
1202 int type, level;
1203{
1204 register struct cmsghdr *cp;
1205 struct mbuf *m;
1206
9bccf70c
A
1207 if (CMSG_SPACE((u_int)size) > MLEN)
1208 return ((struct mbuf *) NULL);
1c79356b
A
1209 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
1210 return ((struct mbuf *) NULL);
1211 cp = mtod(m, struct cmsghdr *);
1212 /* XXX check size? */
1213 (void)memcpy(CMSG_DATA(cp), p, size);
9bccf70c
A
1214 m->m_len = CMSG_SPACE(size);
1215 cp->cmsg_len = CMSG_LEN(size);
1c79356b
A
1216 cp->cmsg_level = level;
1217 cp->cmsg_type = type;
1218 return (m);
1219}
1220
1221/*
1222 * Some routines that return EOPNOTSUPP for entry points that are not
1223 * supported by a protocol. Fill in as needed.
1224 */
1225int
1226pru_abort_notsupp(struct socket *so)
1227{
1228 return EOPNOTSUPP;
1229}
1230
1231
1232int
1233pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
1234{
1235 return EOPNOTSUPP;
1236}
1237
1238int
1239pru_attach_notsupp(struct socket *so, int proto, struct proc *p)
1240{
1241 return EOPNOTSUPP;
1242}
1243
1244int
1245pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1246{
1247 return EOPNOTSUPP;
1248}
1249
1250int
1251pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1252{
1253 return EOPNOTSUPP;
1254}
1255
1256int
1257pru_connect2_notsupp(struct socket *so1, struct socket *so2)
1258{
1259 return EOPNOTSUPP;
1260}
1261
1262int
1263pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
1264 struct ifnet *ifp, struct proc *p)
1265{
1266 return EOPNOTSUPP;
1267}
1268
1269int
1270pru_detach_notsupp(struct socket *so)
1271{
1272 return EOPNOTSUPP;
1273}
1274
1275int
1276pru_disconnect_notsupp(struct socket *so)
1277{
1278 return EOPNOTSUPP;
1279}
1280
1281int
1282pru_listen_notsupp(struct socket *so, struct proc *p)
1283{
1284 return EOPNOTSUPP;
1285}
1286
1287int
1288pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
1289{
1290 return EOPNOTSUPP;
1291}
1292
1293int
1294pru_rcvd_notsupp(struct socket *so, int flags)
1295{
1296 return EOPNOTSUPP;
1297}
1298
1299int
1300pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
1301{
1302 return EOPNOTSUPP;
1303}
1304
1305int
1306pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
1307 struct sockaddr *addr, struct mbuf *control,
1308 struct proc *p)
1309
1310{
1311 return EOPNOTSUPP;
1312}
1313
1314
1315/*
1316 * This isn't really a ``null'' operation, but it's the default one
1317 * and doesn't do anything destructive.
1318 */
1319int
1320pru_sense_null(struct socket *so, struct stat *sb)
1321{
1322 sb->st_blksize = so->so_snd.sb_hiwat;
1323 return 0;
1324}
1325
1326
1327int pru_sosend_notsupp(struct socket *so, struct sockaddr *addr,
1328 struct uio *uio, struct mbuf *top,
1329 struct mbuf *control, int flags)
1330
1331{
1332 return EOPNOTSUPP;
1333}
1334
1335int pru_soreceive_notsupp(struct socket *so,
1336 struct sockaddr **paddr,
1337 struct uio *uio, struct mbuf **mp0,
1338 struct mbuf **controlp, int *flagsp)
1339{
1340 return EOPNOTSUPP;
1341}
1342
1343int
1344
1345pru_shutdown_notsupp(struct socket *so)
1346{
1347 return EOPNOTSUPP;
1348}
1349
1350int
1351pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
1352{
1353 return EOPNOTSUPP;
1354}
1355
1356int pru_sosend(struct socket *so, struct sockaddr *addr,
1357 struct uio *uio, struct mbuf *top,
1358 struct mbuf *control, int flags)
1359{
1360 return EOPNOTSUPP;
1361}
1362
1363int pru_soreceive(struct socket *so,
1364 struct sockaddr **paddr,
1365 struct uio *uio, struct mbuf **mp0,
1366 struct mbuf **controlp, int *flagsp)
1367{
1368 return EOPNOTSUPP;
1369}
1370
1371
91447636
A
1372int
1373pru_sopoll_notsupp(__unused struct socket *so, __unused int events,
1374 __unused kauth_cred_t cred, __unused void *wql)
1c79356b
A
1375{
1376 return EOPNOTSUPP;
1377}
1378
1379
9bccf70c
A
1380#ifdef __APPLE__
1381/*
1382 * The following are macros on BSD and functions on Darwin
1383 */
1c79356b 1384
0b4e3aa0
A
1385/*
1386 * Do we need to notify the other side when I/O is possible?
1387 */
1388
1389int
1390sb_notify(struct sockbuf *sb)
1391{
55e303ae 1392 return ((sb->sb_flags & (SB_WAIT|SB_SEL|SB_ASYNC|SB_UPCALL|SB_KNOTE)) != 0);
0b4e3aa0
A
1393}
1394
1395/*
1396 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
1397 * This is problematical if the fields are unsigned, as the space might
1398 * still be negative (cc > hiwat or mbcnt > mbmax). Should detect
1399 * overflow and return 0. Should use "lmin" but it doesn't exist now.
1400 */
1401long
1402sbspace(struct sockbuf *sb)
1403{
1404 return ((long) imin((int)(sb->sb_hiwat - sb->sb_cc),
1405 (int)(sb->sb_mbmax - sb->sb_mbcnt)));
1406}
1407
1408/* do we have to send all at once on a socket? */
1409int
1410sosendallatonce(struct socket *so)
1411{
1412 return (so->so_proto->pr_flags & PR_ATOMIC);
1413}
1414
1415/* can we read something from so? */
1416int
1417soreadable(struct socket *so)
1418{
1419 return (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
1420 (so->so_state & SS_CANTRCVMORE) ||
1421 so->so_comp.tqh_first || so->so_error);
1422}
1423
1424/* can we write something to so? */
1425
1426int
1427sowriteable(struct socket *so)
1428{
1429 return ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat &&
1430 ((so->so_state&SS_ISCONNECTED) ||
1431 (so->so_proto->pr_flags&PR_CONNREQUIRED)==0)) ||
1432 (so->so_state & SS_CANTSENDMORE) ||
1433 so->so_error);
1434}
1435
1436/* adjust counters in sb reflecting allocation of m */
1437
1438void
1439sballoc(struct sockbuf *sb, struct mbuf *m)
1440{
1441 sb->sb_cc += m->m_len;
1442 sb->sb_mbcnt += MSIZE;
1443 if (m->m_flags & M_EXT)
1444 sb->sb_mbcnt += m->m_ext.ext_size;
1445}
1446
1447/* adjust counters in sb reflecting freeing of m */
1448void
1449sbfree(struct sockbuf *sb, struct mbuf *m)
1450{
1451 sb->sb_cc -= m->m_len;
1452 sb->sb_mbcnt -= MSIZE;
1453 if (m->m_flags & M_EXT)
1454 sb->sb_mbcnt -= m->m_ext.ext_size;
1455}
1456
1457/*
1458 * Set lock on sockbuf sb; sleep if lock is already held.
1459 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
1460 * Returns error without lock if sleep is interrupted.
1461 */
1462int
1463sblock(struct sockbuf *sb, int wf)
1464{
1465 return(sb->sb_flags & SB_LOCK ?
1466 ((wf == M_WAIT) ? sb_lock(sb) : EWOULDBLOCK) :
1467 (sb->sb_flags |= SB_LOCK), 0);
1468}
1469
1470/* release lock on sockbuf sb */
1471void
91447636 1472sbunlock(struct sockbuf *sb, int keeplocked)
0b4e3aa0 1473{
91447636 1474 struct socket *so = sb->sb_so;
8f6c56a5 1475 int lr, lr_saved;
91447636
A
1476 lck_mtx_t *mutex_held;
1477
8f6c56a5
A
1478#ifdef __ppc__
1479 __asm__ volatile("mflr %0" : "=r" (lr));
1480 lr_saved = lr;
1481#endif
0b4e3aa0 1482 sb->sb_flags &= ~SB_LOCK;
91447636
A
1483
1484 if (so->so_proto->pr_getlock != NULL)
1485 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
1486 else
1487 mutex_held = so->so_proto->pr_domain->dom_mtx;
1488
1489 if (keeplocked == 0)
1490 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
1491
0b4e3aa0
A
1492 if (sb->sb_flags & SB_WANT) {
1493 sb->sb_flags &= ~SB_WANT;
91447636
A
1494 if (so->so_usecount < 0)
1495 panic("sbunlock: b4 wakeup so=%x ref=%d lr=%x sb_flags=%x\n", sb->sb_so, so->so_usecount, lr_saved, sb->sb_flags);
1496
0b4e3aa0
A
1497 wakeup((caddr_t)&(sb)->sb_flags);
1498 }
91447636
A
1499 if (keeplocked == 0) { /* unlock on exit */
1500 so->so_usecount--;
1501 if (so->so_usecount < 0)
1502 panic("sbunlock: unlock on exit so=%x lr=%x sb_flags=%x\n", so, so->so_usecount,lr_saved, sb->sb_flags);
8f6c56a5 1503 so->reserved4= lr_saved;
91447636
A
1504 lck_mtx_unlock(mutex_held);
1505 }
0b4e3aa0
A
1506}
1507
1508void
1509sorwakeup(struct socket * so)
1510{
1511 if (sb_notify(&so->so_rcv))
1512 sowakeup(so, &so->so_rcv);
1513}
1514
1515void
1516sowwakeup(struct socket * so)
1517{
1518 if (sb_notify(&so->so_snd))
1519 sowakeup(so, &so->so_snd);
1520}
9bccf70c 1521#endif __APPLE__
0b4e3aa0 1522
1c79356b
A
1523/*
1524 * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
1525 */
1526struct sockaddr *
1527dup_sockaddr(sa, canwait)
1528 struct sockaddr *sa;
1529 int canwait;
1530{
1531 struct sockaddr *sa2;
1532
1533 MALLOC(sa2, struct sockaddr *, sa->sa_len, M_SONAME,
1534 canwait ? M_WAITOK : M_NOWAIT);
1535 if (sa2)
1536 bcopy(sa, sa2, sa->sa_len);
1537 return sa2;
1538}
1539
1540/*
1541 * Create an external-format (``xsocket'') structure using the information
1542 * in the kernel-format socket structure pointed to by so. This is done
1543 * to reduce the spew of irrelevant information over this interface,
1544 * to isolate user code from changes in the kernel structure, and
1545 * potentially to provide information-hiding if we decide that
1546 * some of this information should be hidden from users.
1547 */
1548void
1549sotoxsocket(struct socket *so, struct xsocket *xso)
1550{
1551 xso->xso_len = sizeof *xso;
1552 xso->xso_so = so;
1553 xso->so_type = so->so_type;
1554 xso->so_options = so->so_options;
1555 xso->so_linger = so->so_linger;
1556 xso->so_state = so->so_state;
1557 xso->so_pcb = so->so_pcb;
91447636
A
1558 if (so->so_proto) {
1559 xso->xso_protocol = so->so_proto->pr_protocol;
1560 xso->xso_family = so->so_proto->pr_domain->dom_family;
1561 }
1562 else
1563 xso->xso_protocol = xso->xso_family = 0;
1c79356b
A
1564 xso->so_qlen = so->so_qlen;
1565 xso->so_incqlen = so->so_incqlen;
1566 xso->so_qlimit = so->so_qlimit;
1567 xso->so_timeo = so->so_timeo;
1568 xso->so_error = so->so_error;
1569 xso->so_pgid = so->so_pgid;
1570 xso->so_oobmark = so->so_oobmark;
1571 sbtoxsockbuf(&so->so_snd, &xso->so_snd);
1572 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
1573 xso->so_uid = so->so_uid;
1574}
1575
1576/*
1577 * This does the same for sockbufs. Note that the xsockbuf structure,
1578 * since it is always embedded in a socket, does not include a self
1579 * pointer nor a length. We make this entry point public in case
1580 * some other mechanism needs it.
1581 */
1582void
1583sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
1584{
1585 xsb->sb_cc = sb->sb_cc;
1586 xsb->sb_hiwat = sb->sb_hiwat;
1587 xsb->sb_mbcnt = sb->sb_mbcnt;
1588 xsb->sb_mbmax = sb->sb_mbmax;
1589 xsb->sb_lowat = sb->sb_lowat;
1590 xsb->sb_flags = sb->sb_flags;
91447636
A
1591 xsb->sb_timeo = (u_long)(sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick;
1592 if (xsb->sb_timeo == 0 && sb->sb_timeo.tv_usec != 0)
1593 xsb->sb_timeo = 1;
1c79356b
A
1594}
1595
1596/*
1597 * Here is the definition of some of the basic objects in the kern.ipc
1598 * branch of the MIB.
1599 */
1c79356b
A
1600SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
1601
1602/* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
1603static int dummy;
1604SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
1605
9bccf70c
A
1606SYSCTL_INT(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLFLAG_RW,
1607 &sb_max, 0, "Maximum socket buffer size");
1608SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD,
1609 &maxsockets, 0, "Maximum number of sockets avaliable");
1c79356b
A
1610SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
1611 &sb_efficiency, 0, "");
1612SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, &nmbclusters, 0, "");
1613