]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/uipc_socket2.c
xnu-792.12.6.tar.gz
[apple/xnu.git] / bsd / kern / uipc_socket2.c
CommitLineData
1c79356b 1/*
8ad349bb 2 * Copyright (c) 2006 Apple Computer, Inc. All Rights Reserved.
1c79356b 3 *
8ad349bb 4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
1c79356b 5 *
8ad349bb
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
14 * agreement.
15 *
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
18 * file.
19 *
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
27 *
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
1c79356b
A
29 */
30/* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
31/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
32/*
33 * Copyright (c) 1982, 1986, 1988, 1990, 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
9bccf70c 65 * $FreeBSD: src/sys/kern/uipc_socket2.c,v 1.55.2.9 2001/07/26 18:53:02 peter Exp $
1c79356b
A
66 */
67
68#include <sys/param.h>
69#include <sys/systm.h>
70#include <sys/domain.h>
71#include <sys/kernel.h>
91447636
A
72#include <sys/proc_internal.h>
73#include <sys/kauth.h>
1c79356b
A
74#include <sys/malloc.h>
75#include <sys/mbuf.h>
76#include <sys/protosw.h>
77#include <sys/stat.h>
78#include <sys/socket.h>
79#include <sys/socketvar.h>
80#include <sys/signalvar.h>
81#include <sys/sysctl.h>
82#include <sys/ev.h>
91447636
A
83#include <kern/locks.h>
84#include <net/route.h>
85#include <netinet/in.h>
86#include <netinet/in_pcb.h>
fa4905b1
A
87#include <sys/kdebug.h>
88
89#define DBG_FNC_SBDROP NETDBG_CODE(DBG_NETSOCK, 4)
90#define DBG_FNC_SBAPPEND NETDBG_CODE(DBG_NETSOCK, 5)
91
92
1c79356b
A
93/*
94 * Primitive routines for operating on sockets and socket buffers
95 */
96
97u_long sb_max = SB_MAX; /* XXX should be static */
98
99static u_long sb_efficiency = 8; /* parameter for sbreserve() */
100
1c79356b
A
101/*
102 * Procedures to manipulate state flags of socket
103 * and do appropriate wakeups. Normal sequence from the
104 * active (originating) side is that soisconnecting() is
105 * called during processing of connect() call,
106 * resulting in an eventual call to soisconnected() if/when the
107 * connection is established. When the connection is torn down
9bccf70c 108 * soisdisconnecting() is called during processing of disconnect() call,
1c79356b
A
109 * and soisdisconnected() is called when the connection to the peer
110 * is totally severed. The semantics of these routines are such that
111 * connectionless protocols can call soisconnected() and soisdisconnected()
112 * only, bypassing the in-progress calls when setting up a ``connection''
113 * takes no time.
114 *
115 * From the passive side, a socket is created with
e3027f41
A
116 * two queues of sockets: so_incomp for connections in progress
117 * and so_comp for connections already made and awaiting user acceptance.
9bccf70c 118 * As a protocol is preparing incoming connections, it creates a socket
e3027f41 119 * structure queued on so_incomp by calling sonewconn(). When the connection
1c79356b 120 * is established, soisconnected() is called, and transfers the
e3027f41 121 * socket structure to so_comp, making it available to accept().
1c79356b 122 *
9bccf70c 123 * If a socket is closed with sockets on either
e3027f41 124 * so_incomp or so_comp, these sockets are dropped.
9bccf70c 125 *
1c79356b
A
126 * If higher level protocols are implemented in
127 * the kernel, the wakeups done here will sometimes
128 * cause software-interrupt process scheduling.
129 */
1c79356b
A
130void
131soisconnecting(so)
132 register struct socket *so;
133{
134
135 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
136 so->so_state |= SS_ISCONNECTING;
91447636
A
137
138 sflt_notify(so, sock_evt_connecting, NULL);
1c79356b
A
139}
140
141void
142soisconnected(so)
9bccf70c
A
143 struct socket *so;
144{
145 struct socket *head = so->so_head;
1c79356b
A
146
147 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
148 so->so_state |= SS_ISCONNECTED;
91447636
A
149
150 sflt_notify(so, sock_evt_connected, NULL);
151
1c79356b 152 if (head && (so->so_state & SS_INCOMP)) {
ff6e181a
A
153 so->so_state &= ~SS_INCOMP;
154 so->so_state |= SS_COMP;
155 if (head->so_proto->pr_getlock != NULL) {
156 socket_unlock(so, 0);
91447636 157 socket_lock(head, 1);
ff6e181a 158 }
91447636 159 postevent(head, 0, EV_RCONN);
1c79356b
A
160 TAILQ_REMOVE(&head->so_incomp, so, so_list);
161 head->so_incqlen--;
1c79356b 162 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
1c79356b 163 sorwakeup(head);
91447636 164 wakeup_one((caddr_t)&head->so_timeo);
ff6e181a 165 if (head->so_proto->pr_getlock != NULL) {
91447636 166 socket_unlock(head, 1);
ff6e181a
A
167 socket_lock(so, 0);
168 }
1c79356b 169 } else {
91447636 170 postevent(so, 0, EV_WCONN);
1c79356b
A
171 wakeup((caddr_t)&so->so_timeo);
172 sorwakeup(so);
173 sowwakeup(so);
174 }
175}
176
177void
178soisdisconnecting(so)
179 register struct socket *so;
9bccf70c 180{
1c79356b
A
181 so->so_state &= ~SS_ISCONNECTING;
182 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
91447636 183 sflt_notify(so, sock_evt_disconnecting, NULL);
1c79356b
A
184 wakeup((caddr_t)&so->so_timeo);
185 sowwakeup(so);
186 sorwakeup(so);
187}
188
189void
190soisdisconnected(so)
191 register struct socket *so;
9bccf70c 192{
1c79356b 193 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
9bccf70c 194 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
91447636 195 sflt_notify(so, sock_evt_disconnected, NULL);
1c79356b
A
196 wakeup((caddr_t)&so->so_timeo);
197 sowwakeup(so);
198 sorwakeup(so);
199}
200
201/*
202 * Return a random connection that hasn't been serviced yet and
203 * is eligible for discard. There is a one in qlen chance that
204 * we will return a null, saying that there are no dropable
205 * requests. In this case, the protocol specific code should drop
206 * the new request. This insures fairness.
207 *
208 * This may be used in conjunction with protocol specific queue
209 * congestion routines.
210 */
211struct socket *
212sodropablereq(head)
213 register struct socket *head;
214{
91447636 215 struct socket *so, *sonext = NULL;
1c79356b
A
216 unsigned int i, j, qlen;
217 static int rnd;
218 static struct timeval old_runtime;
219 static unsigned int cur_cnt, old_cnt;
220 struct timeval tv;
221
222 microtime(&tv);
223 if ((i = (tv.tv_sec - old_runtime.tv_sec)) != 0) {
224 old_runtime = tv;
225 old_cnt = cur_cnt / i;
226 cur_cnt = 0;
227 }
228
229 so = TAILQ_FIRST(&head->so_incomp);
230 if (!so)
91447636 231 return (NULL);
1c79356b
A
232
233 qlen = head->so_incqlen;
234 if (++cur_cnt > qlen || old_cnt > qlen) {
235 rnd = (314159 * rnd + 66329) & 0xffff;
236 j = ((qlen + 1) * rnd) >> 16;
91447636
A
237//###LD To clean up
238 while (j-- && so) {
239// if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
240 socket_lock(so, 1);
241 sonext = TAILQ_NEXT(so, so_list);
242// in_pcb_check_state(so->so_pcb, WNT_RELEASE, 0);
243 socket_unlock(so, 1);
244 so = sonext;
245 }
1c79356b
A
246 }
247
91447636
A
248// if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) == WNT_STOPUSING)
249// return (NULL);
250// else
251 return (so);
1c79356b
A
252}
253
254/*
255 * When an attempt at a new connection is noted on a socket
256 * which accepts connections, sonewconn is called. If the
257 * connection is possible (subject to space constraints, etc.)
258 * then we allocate a new structure, propoerly linked into the
259 * data structure of the original socket, and return this.
260 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
261 */
91447636
A
262static struct socket *
263sonewconn_internal(head, connstatus)
1c79356b
A
264 register struct socket *head;
265 int connstatus;
9bccf70c
A
266{
267 int error = 0;
1c79356b 268 register struct socket *so;
91447636
A
269 lck_mtx_t *mutex_held;
270
271 if (head->so_proto->pr_getlock != NULL)
272 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
273 else
274 mutex_held = head->so_proto->pr_domain->dom_mtx;
275 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
1c79356b
A
276
277 if (head->so_qlen > 3 * head->so_qlimit / 2)
278 return ((struct socket *)0);
0b4e3aa0 279 so = soalloc(1, head->so_proto->pr_domain->dom_family, head->so_type);
1c79356b
A
280 if (so == NULL)
281 return ((struct socket *)0);
9bccf70c
A
282 /* check if head was closed during the soalloc */
283 if (head->so_proto == NULL) {
284 sodealloc(so);
285 return ((struct socket *)0);
1c79356b
A
286 }
287
288 so->so_head = head;
289 so->so_type = head->so_type;
290 so->so_options = head->so_options &~ SO_ACCEPTCONN;
291 so->so_linger = head->so_linger;
292 so->so_state = head->so_state | SS_NOFDREF;
293 so->so_proto = head->so_proto;
294 so->so_timeo = head->so_timeo;
295 so->so_pgid = head->so_pgid;
296 so->so_uid = head->so_uid;
91447636 297 so->so_usecount = 1;
1c79356b 298
13fec989
A
299#ifdef __APPLE__
300 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
301 so->so_rcv.sb_so = so->so_snd.sb_so = so;
302 TAILQ_INIT(&so->so_evlist);
303#endif
304
91447636
A
305 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
306 sflt_termsock(so);
9bccf70c
A
307 sodealloc(so);
308 return ((struct socket *)0);
309 }
310
91447636 311 /*
37839358 312 * Must be done with head unlocked to avoid deadlock for protocol with per socket mutexes.
91447636 313 */
37839358
A
314 if (head->so_proto->pr_unlock)
315 socket_unlock(head, 0);
91447636
A
316 if (((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL) != 0) || error) {
317 sflt_termsock(so);
1c79356b 318 sodealloc(so);
37839358
A
319 if (head->so_proto->pr_unlock)
320 socket_lock(head, 0);
1c79356b
A
321 return ((struct socket *)0);
322 }
37839358
A
323 if (head->so_proto->pr_unlock)
324 socket_lock(head, 0);
9bccf70c 325#ifdef __APPLE__
1c79356b 326 so->so_proto->pr_domain->dom_refs++;
9bccf70c 327#endif
1c79356b
A
328
329 if (connstatus) {
330 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
331 so->so_state |= SS_COMP;
332 } else {
333 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
334 so->so_state |= SS_INCOMP;
335 head->so_incqlen++;
336 }
337 head->so_qlen++;
91447636 338
13fec989 339#ifdef __APPLE__
8ad349bb
A
340 /* Attach socket filters for this protocol */
341 sflt_initsock(so);
9bccf70c 342#endif
91447636
A
343 if (connstatus) {
344 so->so_state |= connstatus;
345 sorwakeup(head);
346 wakeup((caddr_t)&head->so_timeo);
347 }
1c79356b
A
348 return (so);
349}
350
91447636
A
351
352struct socket *
353sonewconn(
354 struct socket *head,
355 int connstatus,
356 const struct sockaddr *from)
357{
358 int error = 0;
359 struct socket_filter_entry *filter;
360 int filtered = 0;
361
362 error = 0;
363 for (filter = head->so_filt; filter && (error == 0);
364 filter = filter->sfe_next_onsocket) {
365 if (filter->sfe_filter->sf_filter.sf_connect_in) {
366 if (filtered == 0) {
367 filtered = 1;
368 sflt_use(head);
369 socket_unlock(head, 0);
370 }
371 error = filter->sfe_filter->sf_filter.sf_connect_in(
372 filter->sfe_cookie, head, from);
373 }
374 }
375 if (filtered != 0) {
376 socket_lock(head, 0);
377 sflt_unuse(head);
378 }
379
380 if (error) {
381 return NULL;
382 }
383
384 return sonewconn_internal(head, connstatus);
385}
386
1c79356b
A
387/*
388 * Socantsendmore indicates that no more data will be sent on the
389 * socket; it would normally be applied to a socket when the user
390 * informs the system that no more data is to be sent, by the protocol
391 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
392 * will be received, and will normally be applied to the socket by a
393 * protocol when it detects that the peer will send no more data.
394 * Data queued for reading in the socket may yet be read.
395 */
396
397void
398socantsendmore(so)
399 struct socket *so;
9bccf70c 400{
1c79356b 401 so->so_state |= SS_CANTSENDMORE;
91447636 402 sflt_notify(so, sock_evt_cantsendmore, NULL);
1c79356b
A
403 sowwakeup(so);
404}
405
406void
407socantrcvmore(so)
408 struct socket *so;
9bccf70c 409{
1c79356b 410 so->so_state |= SS_CANTRCVMORE;
91447636 411 sflt_notify(so, sock_evt_cantrecvmore, NULL);
1c79356b
A
412 sorwakeup(so);
413}
414
415/*
416 * Wait for data to arrive at/drain from a socket buffer.
417 */
418int
419sbwait(sb)
420 struct sockbuf *sb;
421{
8ad349bb 422 int error = 0, lr, lr_saved;
91447636
A
423 struct socket *so = sb->sb_so;
424 lck_mtx_t *mutex_held;
425 struct timespec ts;
426
8ad349bb
A
427#ifdef __ppc__
428 __asm__ volatile("mflr %0" : "=r" (lr));
429 lr_saved = lr;
430#endif
91447636 431
8ad349bb 432
91447636
A
433 if (so->so_proto->pr_getlock != NULL)
434 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
435 else
436 mutex_held = so->so_proto->pr_domain->dom_mtx;
1c79356b
A
437
438 sb->sb_flags |= SB_WAIT;
91447636
A
439
440 if (so->so_usecount < 1)
441 panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount);
442 ts.tv_sec = sb->sb_timeo.tv_sec;
443 ts.tv_nsec = sb->sb_timeo.tv_usec * 1000;
444 error = msleep((caddr_t)&sb->sb_cc, mutex_held,
445 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
446 &ts);
447
448 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
449
450 if (so->so_usecount < 1)
451 panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount);
452
453 if ((so->so_state & SS_DRAINING)) {
454 error = EBADF;
455 }
456
457 return (error);
1c79356b
A
458}
459
460/*
461 * Lock a sockbuf already known to be locked;
462 * return any error returned from sleep (EINTR).
463 */
464int
465sb_lock(sb)
466 register struct sockbuf *sb;
467{
91447636
A
468 struct socket *so = sb->sb_so;
469 lck_mtx_t * mutex_held;
8ad349bb
A
470 int error = 0, lr, lr_saved;
471
472#ifdef __ppc__
473 __asm__ volatile("mflr %0" : "=r" (lr));
474 lr_saved = lr;
475#endif
91447636
A
476
477 if (so == NULL)
478 panic("sb_lock: null so back pointer sb=%x\n", sb);
1c79356b
A
479
480 while (sb->sb_flags & SB_LOCK) {
481 sb->sb_flags |= SB_WANT;
91447636
A
482 if (so->so_proto->pr_getlock != NULL)
483 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
484 else
485 mutex_held = so->so_proto->pr_domain->dom_mtx;
486 if (so->so_usecount < 1)
487 panic("sb_lock: so=%x refcount=%d\n", so, so->so_usecount);
488 error = msleep((caddr_t)&sb->sb_flags, mutex_held,
489 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sblock", 0);
490 if (so->so_usecount < 1)
491 panic("sb_lock: 2 so=%x refcount=%d\n", so, so->so_usecount);
492 if (error)
1c79356b
A
493 return (error);
494 }
495 sb->sb_flags |= SB_LOCK;
496 return (0);
497}
498
499/*
500 * Wakeup processes waiting on a socket buffer.
501 * Do asynchronous notification via SIGIO
502 * if the socket has the SS_ASYNC flag set.
503 */
504void
505sowakeup(so, sb)
506 register struct socket *so;
507 register struct sockbuf *sb;
508{
509 struct proc *p = current_proc();
0b4e3aa0 510 sb->sb_flags &= ~SB_SEL;
1c79356b 511 selwakeup(&sb->sb_sel);
1c79356b
A
512 if (sb->sb_flags & SB_WAIT) {
513 sb->sb_flags &= ~SB_WAIT;
514 wakeup((caddr_t)&sb->sb_cc);
515 }
516 if (so->so_state & SS_ASYNC) {
517 if (so->so_pgid < 0)
518 gsignal(-so->so_pgid, SIGIO);
519 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
520 psignal(p, SIGIO);
521 }
91447636
A
522 if (sb->sb_flags & SB_KNOTE) {
523 KNOTE(&sb->sb_sel.si_note, SO_FILT_HINT_LOCKED);
524 }
525 if (sb->sb_flags & SB_UPCALL) {
526 socket_unlock(so, 0);
1c79356b 527 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
91447636
A
528 socket_lock(so, 0);
529 }
1c79356b
A
530}
531
532/*
533 * Socket buffer (struct sockbuf) utility routines.
534 *
535 * Each socket contains two socket buffers: one for sending data and
536 * one for receiving data. Each buffer contains a queue of mbufs,
537 * information about the number of mbufs and amount of data in the
538 * queue, and other fields allowing select() statements and notification
539 * on data availability to be implemented.
540 *
541 * Data stored in a socket buffer is maintained as a list of records.
542 * Each record is a list of mbufs chained together with the m_next
543 * field. Records are chained together with the m_nextpkt field. The upper
544 * level routine soreceive() expects the following conventions to be
545 * observed when placing information in the receive buffer:
546 *
547 * 1. If the protocol requires each message be preceded by the sender's
548 * name, then a record containing that name must be present before
549 * any associated data (mbuf's must be of type MT_SONAME).
550 * 2. If the protocol supports the exchange of ``access rights'' (really
551 * just additional data associated with the message), and there are
552 * ``rights'' to be received, then a record containing this data
553 * should be present (mbuf's must be of type MT_RIGHTS).
554 * 3. If a name or rights record exists, then it must be followed by
555 * a data record, perhaps of zero length.
556 *
557 * Before using a new socket structure it is first necessary to reserve
558 * buffer space to the socket, by calling sbreserve(). This should commit
559 * some of the available buffer space in the system buffer pool for the
560 * socket (currently, it does nothing but enforce limits). The space
561 * should be released by calling sbrelease() when the socket is destroyed.
562 */
563
564int
565soreserve(so, sndcc, rcvcc)
566 register struct socket *so;
567 u_long sndcc, rcvcc;
568{
1c79356b
A
569
570 if (sbreserve(&so->so_snd, sndcc) == 0)
571 goto bad;
572 if (sbreserve(&so->so_rcv, rcvcc) == 0)
573 goto bad2;
574 if (so->so_rcv.sb_lowat == 0)
575 so->so_rcv.sb_lowat = 1;
576 if (so->so_snd.sb_lowat == 0)
577 so->so_snd.sb_lowat = MCLBYTES;
578 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
579 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
580 return (0);
581bad2:
9bccf70c 582#ifdef __APPLE__
0b4e3aa0 583 selthreadclear(&so->so_snd.sb_sel);
9bccf70c 584#endif
1c79356b
A
585 sbrelease(&so->so_snd);
586bad:
587 return (ENOBUFS);
588}
589
590/*
591 * Allot mbufs to a sockbuf.
592 * Attempt to scale mbmax so that mbcnt doesn't become limiting
593 * if buffering efficiency is near the normal case.
594 */
595int
596sbreserve(sb, cc)
597 struct sockbuf *sb;
598 u_long cc;
599{
600 if ((u_quad_t)cc > (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES))
601 return (0);
602 sb->sb_hiwat = cc;
603 sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
604 if (sb->sb_lowat > sb->sb_hiwat)
605 sb->sb_lowat = sb->sb_hiwat;
606 return (1);
607}
608
609/*
610 * Free mbufs held by a socket, and reserved mbuf space.
611 */
0b4e3aa0 612 /* WARNING needs to do selthreadclear() before calling this */
1c79356b
A
613void
614sbrelease(sb)
615 struct sockbuf *sb;
616{
617
618 sbflush(sb);
9bccf70c
A
619 sb->sb_hiwat = 0;
620 sb->sb_mbmax = 0;
621
1c79356b
A
622}
623
624/*
625 * Routines to add and remove
626 * data from an mbuf queue.
627 *
628 * The routines sbappend() or sbappendrecord() are normally called to
629 * append new mbufs to a socket buffer, after checking that adequate
630 * space is available, comparing the function sbspace() with the amount
631 * of data to be added. sbappendrecord() differs from sbappend() in
632 * that data supplied is treated as the beginning of a new record.
633 * To place a sender's address, optional access rights, and data in a
634 * socket receive buffer, sbappendaddr() should be used. To place
635 * access rights and data in a socket receive buffer, sbappendrights()
636 * should be used. In either case, the new data begins a new record.
637 * Note that unlike sbappend() and sbappendrecord(), these routines check
638 * for the caller that there will be enough space to store the data.
639 * Each fails if there is not enough space, or if it cannot find mbufs
640 * to store additional information in.
641 *
642 * Reliable protocols may use the socket send buffer to hold data
643 * awaiting acknowledgement. Data is normally copied from a socket
644 * send buffer in a protocol with m_copy for output to a peer,
645 * and then removing the data from the socket buffer with sbdrop()
646 * or sbdroprecord() when the data is acknowledged by the peer.
647 */
648
649/*
650 * Append mbuf chain m to the last record in the
651 * socket buffer sb. The additional space associated
652 * the mbuf chain is recorded in sb. Empty mbufs are
653 * discarded and mbufs are compacted where possible.
654 */
91447636 655int
1c79356b
A
656sbappend(sb, m)
657 struct sockbuf *sb;
658 struct mbuf *m;
9bccf70c 659{
91447636
A
660 register struct mbuf *n, *sb_first;
661 int result = 0;
662 int error = 0;
cc9f6e38 663 int filtered = 0;
1c79356b 664
fa4905b1
A
665
666 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_START), sb, m->m_len, 0, 0, 0);
667
1c79356b 668 if (m == 0)
91447636 669 return 0;
cc9f6e38
A
670
671again:
91447636 672 sb_first = n = sb->sb_mb;
9bccf70c 673 if (n) {
1c79356b
A
674 while (n->m_nextpkt)
675 n = n->m_nextpkt;
676 do {
677 if (n->m_flags & M_EOR) {
91447636 678 result = sbappendrecord(sb, m); /* XXXXXX!!!! */
55e303ae 679 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
91447636 680 return result;
1c79356b
A
681 }
682 } while (n->m_next && (n = n->m_next));
683 }
91447636 684
cc9f6e38
A
685 if (!filtered && (sb->sb_flags & SB_RECV) != 0) {
686 error = sflt_data_in(sb->sb_so, NULL, &m, NULL, 0, &filtered);
91447636
A
687 if (error) {
688 /* no data was appended, caller should not call sowakeup */
689 return 0;
690 }
cc9f6e38
A
691
692 /*
693 If we any filters, the socket lock was dropped. n and sb_first
694 cached data from the socket buffer. This cache is not valid
695 since we dropped the lock. We must start over. Since filtered
696 is set we won't run through the filters a second time. We just
697 set n and sb_start again.
698 */
699 if (filtered)
700 goto again;
91447636
A
701 }
702
703 result = sbcompress(sb, m, n);
fa4905b1
A
704
705 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
91447636
A
706
707 return result;
1c79356b
A
708}
709
710#ifdef SOCKBUF_DEBUG
711void
712sbcheck(sb)
713 register struct sockbuf *sb;
714{
715 register struct mbuf *m;
716 register struct mbuf *n = 0;
717 register u_long len = 0, mbcnt = 0;
91447636
A
718 lck_mtx_t *mutex_held;
719
720 if (sb->sb_so->so_proto->pr_getlock != NULL)
721 mutex_held = (*sb->sb_so->so_proto->pr_getlock)(sb->sb_so, 0);
722 else
723 mutex_held = sb->sb_so->so_proto->pr_domain->dom_mtx;
724
725 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
726
727 if (sbchecking == 0)
728 return;
1c79356b
A
729
730 for (m = sb->sb_mb; m; m = n) {
731 n = m->m_nextpkt;
732 for (; m; m = m->m_next) {
9bccf70c
A
733 len += m->m_len;
734 mbcnt += MSIZE;
735 if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
736 mbcnt += m->m_ext.ext_size;
737 }
1c79356b 738 }
9bccf70c 739 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
91447636 740 panic("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
9bccf70c 741 mbcnt, sb->sb_mbcnt);
9bccf70c 742 }
1c79356b
A
743}
744#endif
745
746/*
747 * As above, except the mbuf chain
748 * begins a new record.
749 */
91447636 750int
1c79356b
A
751sbappendrecord(sb, m0)
752 register struct sockbuf *sb;
8ad349bb 753 register struct mbuf *m0;
1c79356b
A
754{
755 register struct mbuf *m;
91447636 756 int result = 0;
9bccf70c 757
1c79356b 758 if (m0 == 0)
91447636
A
759 return 0;
760
761 if ((sb->sb_flags & SB_RECV) != 0) {
cc9f6e38 762 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL, sock_data_filt_flag_record, NULL);
91447636
A
763 if (error != 0) {
764 if (error != EJUSTRETURN)
765 m_freem(m0);
766 return 0;
1c79356b 767 }
1c79356b
A
768 }
769
770 m = sb->sb_mb;
771 if (m)
772 while (m->m_nextpkt)
773 m = m->m_nextpkt;
774 /*
775 * Put the first mbuf on the queue.
776 * Note this permits zero length records.
777 */
778 sballoc(sb, m0);
779 if (m)
780 m->m_nextpkt = m0;
781 else
782 sb->sb_mb = m0;
783 m = m0->m_next;
784 m0->m_next = 0;
785 if (m && (m0->m_flags & M_EOR)) {
786 m0->m_flags &= ~M_EOR;
787 m->m_flags |= M_EOR;
788 }
91447636 789 return sbcompress(sb, m, m0);
1c79356b
A
790}
791
792/*
793 * As above except that OOB data
794 * is inserted at the beginning of the sockbuf,
795 * but after any other OOB data.
796 */
91447636 797int
1c79356b 798sbinsertoob(sb, m0)
91447636
A
799 struct sockbuf *sb;
800 struct mbuf *m0;
1c79356b 801{
91447636
A
802 struct mbuf *m;
803 struct mbuf **mp;
1c79356b
A
804
805 if (m0 == 0)
91447636
A
806 return 0;
807
808 if ((sb->sb_flags & SB_RECV) != 0) {
809 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL,
cc9f6e38 810 sock_data_filt_flag_oob, NULL);
91447636
A
811
812 if (error) {
813 if (error != EJUSTRETURN) {
814 m_freem(m0);
815 }
816 return 0;
1c79356b 817 }
1c79356b
A
818 }
819
820 for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) {
821 m = *mp;
822 again:
823 switch (m->m_type) {
824
825 case MT_OOBDATA:
826 continue; /* WANT next train */
827
828 case MT_CONTROL:
829 m = m->m_next;
830 if (m)
831 goto again; /* inspect THIS train further */
832 }
833 break;
834 }
835 /*
836 * Put the first mbuf on the queue.
837 * Note this permits zero length records.
838 */
839 sballoc(sb, m0);
840 m0->m_nextpkt = *mp;
841 *mp = m0;
842 m = m0->m_next;
843 m0->m_next = 0;
844 if (m && (m0->m_flags & M_EOR)) {
845 m0->m_flags &= ~M_EOR;
846 m->m_flags |= M_EOR;
847 }
91447636 848 return sbcompress(sb, m, m0);
1c79356b
A
849}
850
851/*
852 * Append address and data, and optionally, control (ancillary) data
853 * to the receive queue of a socket. If present,
854 * m0 must include a packet header with total length.
855 * Returns 0 if no space in sockbuf or insufficient mbufs.
856 */
91447636
A
857static int
858sbappendaddr_internal(sb, asa, m0, control)
1c79356b
A
859 register struct sockbuf *sb;
860 struct sockaddr *asa;
861 struct mbuf *m0, *control;
862{
863 register struct mbuf *m, *n;
864 int space = asa->sa_len;
1c79356b
A
865
866 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
867 panic("sbappendaddr");
868
1c79356b
A
869 if (m0)
870 space += m0->m_pkthdr.len;
871 for (n = control; n; n = n->m_next) {
872 space += n->m_len;
873 if (n->m_next == 0) /* keep pointer to last control buf */
874 break;
875 }
876 if (space > sbspace(sb))
877 return (0);
878 if (asa->sa_len > MLEN)
879 return (0);
880 MGET(m, M_DONTWAIT, MT_SONAME);
881 if (m == 0)
882 return (0);
883 m->m_len = asa->sa_len;
884 bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
885 if (n)
886 n->m_next = m0; /* concatenate data to control */
887 else
888 control = m0;
889 m->m_next = control;
890 for (n = m; n; n = n->m_next)
891 sballoc(sb, n);
892 n = sb->sb_mb;
893 if (n) {
894 while (n->m_nextpkt)
895 n = n->m_nextpkt;
896 n->m_nextpkt = m;
897 } else
898 sb->sb_mb = m;
899 postevent(0,sb,EV_RWBYTES);
900 return (1);
901}
902
903int
91447636
A
904sbappendaddr(
905 struct sockbuf* sb,
906 struct sockaddr* asa,
907 struct mbuf *m0,
908 struct mbuf *control,
909 int *error_out)
910{
911 int result = 0;
912
913 if (error_out) *error_out = 0;
914
915 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
916 panic("sbappendaddrorfree");
917
918 /* Call socket data in filters */
919 if ((sb->sb_flags & SB_RECV) != 0) {
920 int error;
cc9f6e38 921 error = sflt_data_in(sb->sb_so, asa, &m0, &control, 0, NULL);
91447636
A
922 if (error) {
923 if (error != EJUSTRETURN) {
924 if (m0) m_freem(m0);
925 if (control) m_freem(control);
926 if (error_out) *error_out = error;
927 }
928 return 0;
929 }
930 }
931
932 result = sbappendaddr_internal(sb, asa, m0, control);
933 if (result == 0) {
934 if (m0) m_freem(m0);
935 if (control) m_freem(control);
936 if (error_out) *error_out = ENOBUFS;
937 }
938
939 return result;
940}
941
942static int
943sbappendcontrol_internal(sb, m0, control)
1c79356b
A
944 struct sockbuf *sb;
945 struct mbuf *control, *m0;
946{
947 register struct mbuf *m, *n;
948 int space = 0;
1c79356b
A
949
950 if (control == 0)
951 panic("sbappendcontrol");
952
1c79356b
A
953 for (m = control; ; m = m->m_next) {
954 space += m->m_len;
955 if (m->m_next == 0)
956 break;
957 }
958 n = m; /* save pointer to last control buffer */
959 for (m = m0; m; m = m->m_next)
960 space += m->m_len;
961 if (space > sbspace(sb))
962 return (0);
963 n->m_next = m0; /* concatenate data to control */
964 for (m = control; m; m = m->m_next)
965 sballoc(sb, m);
966 n = sb->sb_mb;
967 if (n) {
968 while (n->m_nextpkt)
969 n = n->m_nextpkt;
970 n->m_nextpkt = control;
971 } else
972 sb->sb_mb = control;
973 postevent(0,sb,EV_RWBYTES);
974 return (1);
975}
976
91447636
A
977int
978sbappendcontrol(
979 struct sockbuf *sb,
980 struct mbuf *m0,
981 struct mbuf *control,
982 int *error_out)
983{
984 int result = 0;
985
986 if (error_out) *error_out = 0;
987
988 if (sb->sb_flags & SB_RECV) {
989 int error;
cc9f6e38 990 error = sflt_data_in(sb->sb_so, NULL, &m0, &control, 0, NULL);
91447636
A
991 if (error) {
992 if (error != EJUSTRETURN) {
993 if (m0) m_freem(m0);
994 if (control) m_freem(control);
995 if (error_out) *error_out = error;
996 }
997 return 0;
998 }
999 }
1000
1001 result = sbappendcontrol_internal(sb, m0, control);
1002 if (result == 0) {
1003 if (m0) m_freem(m0);
1004 if (control) m_freem(control);
1005 if (error_out) *error_out = ENOBUFS;
1006 }
1007
1008 return result;
1009}
1010
1c79356b
A
1011/*
1012 * Compress mbuf chain m into the socket
1013 * buffer sb following mbuf n. If n
1014 * is null, the buffer is presumed empty.
1015 */
91447636 1016static int
1c79356b
A
1017sbcompress(sb, m, n)
1018 register struct sockbuf *sb;
1019 register struct mbuf *m, *n;
1020{
1021 register int eor = 0;
1022 register struct mbuf *o;
1023
1024 while (m) {
1025 eor |= m->m_flags & M_EOR;
1026 if (m->m_len == 0 &&
1027 (eor == 0 ||
1028 (((o = m->m_next) || (o = n)) &&
1029 o->m_type == m->m_type))) {
1030 m = m_free(m);
1031 continue;
1032 }
9bccf70c
A
1033 if (n && (n->m_flags & M_EOR) == 0 &&
1034#ifndef __APPLE__
1035 M_WRITABLE(n) &&
1036#endif
1037 m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
1038 m->m_len <= M_TRAILINGSPACE(n) &&
1c79356b
A
1039 n->m_type == m->m_type) {
1040 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
1041 (unsigned)m->m_len);
1042 n->m_len += m->m_len;
1043 sb->sb_cc += m->m_len;
1044 m = m_free(m);
1045 continue;
1046 }
1047 if (n)
1048 n->m_next = m;
1049 else
1050 sb->sb_mb = m;
1051 sballoc(sb, m);
1052 n = m;
1053 m->m_flags &= ~M_EOR;
1054 m = m->m_next;
1055 n->m_next = 0;
1056 }
1057 if (eor) {
1058 if (n)
1059 n->m_flags |= eor;
1060 else
1061 printf("semi-panic: sbcompress\n");
1062 }
1063 postevent(0,sb, EV_RWBYTES);
91447636 1064 return 1;
1c79356b
A
1065}
1066
1067/*
1068 * Free all mbufs in a sockbuf.
1069 * Check that all resources are reclaimed.
1070 */
1071void
1072sbflush(sb)
1073 register struct sockbuf *sb;
1074{
91447636
A
1075 if (sb->sb_so == NULL)
1076 panic ("sbflush sb->sb_so already null sb=%x\n", sb);
55e303ae 1077 (void)sblock(sb, M_WAIT);
9bccf70c
A
1078 while (sb->sb_mbcnt) {
1079 /*
1080 * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
1081 * we would loop forever. Panic instead.
1082 */
1083 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
1084 break;
1c79356b 1085 sbdrop(sb, (int)sb->sb_cc);
9bccf70c 1086 }
91447636
A
1087 if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt || sb->sb_so == NULL)
1088 panic("sbflush: cc %ld || mb %p || mbcnt %ld sb_so=%x", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt, sb->sb_so);
55e303ae 1089
1c79356b 1090 postevent(0, sb, EV_RWBYTES);
91447636
A
1091 sbunlock(sb, 1); /* keep socket locked */
1092
1c79356b
A
1093}
1094
1095/*
1096 * Drop data from (the front of) a sockbuf.
9bccf70c
A
1097 * use m_freem_list to free the mbuf structures
1098 * under a single lock... this is done by pruning
1099 * the top of the tree from the body by keeping track
1100 * of where we get to in the tree and then zeroing the
1101 * two pertinent pointers m_nextpkt and m_next
1102 * the socket buffer is then updated to point at the new
1103 * top of the tree and the pruned area is released via
1104 * m_freem_list.
1c79356b
A
1105 */
1106void
1107sbdrop(sb, len)
1108 register struct sockbuf *sb;
1109 register int len;
1110{
fa4905b1
A
1111 register struct mbuf *m, *free_list, *ml;
1112 struct mbuf *next, *last;
1c79356b 1113
fa4905b1
A
1114 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_START), sb, len, 0, 0, 0);
1115
1c79356b 1116 next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
fa4905b1
A
1117 free_list = last = m;
1118 ml = (struct mbuf *)0;
1119
1c79356b
A
1120 while (len > 0) {
1121 if (m == 0) {
9bccf70c
A
1122 if (next == 0) {
1123 /* temporarily replacing this panic with printf because
1124 * it occurs occasionally when closing a socket when there
1125 * is no harm in ignoring it. This problem will be investigated
1126 * further.
1127 */
1128 /* panic("sbdrop"); */
1129 printf("sbdrop - count not zero\n");
1130 len = 0;
1131 /* zero the counts. if we have no mbufs, we have no data (PR-2986815) */
1132 sb->sb_cc = 0;
1133 sb->sb_mbcnt = 0;
1134 break;
1135 }
1136 m = last = next;
1137 next = m->m_nextpkt;
1138 continue;
1c79356b
A
1139 }
1140 if (m->m_len > len) {
1141 m->m_len -= len;
1142 m->m_data += len;
1143 sb->sb_cc -= len;
1144 break;
1145 }
1146 len -= m->m_len;
1147 sbfree(sb, m);
fa4905b1
A
1148
1149 ml = m;
1150 m = m->m_next;
1c79356b
A
1151 }
1152 while (m && m->m_len == 0) {
1153 sbfree(sb, m);
fa4905b1
A
1154
1155 ml = m;
1156 m = m->m_next;
1157 }
1158 if (ml) {
1159 ml->m_next = (struct mbuf *)0;
1160 last->m_nextpkt = (struct mbuf *)0;
1161 m_freem_list(free_list);
1c79356b
A
1162 }
1163 if (m) {
1164 sb->sb_mb = m;
1165 m->m_nextpkt = next;
1166 } else
1167 sb->sb_mb = next;
fa4905b1 1168
1c79356b 1169 postevent(0, sb, EV_RWBYTES);
fa4905b1
A
1170
1171 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_END), sb, 0, 0, 0, 0);
1c79356b
A
1172}
1173
1174/*
1175 * Drop a record off the front of a sockbuf
1176 * and move the next record to the front.
1177 */
1178void
1179sbdroprecord(sb)
1180 register struct sockbuf *sb;
1181{
1182 register struct mbuf *m, *mn;
1c79356b
A
1183
1184 m = sb->sb_mb;
1185 if (m) {
1186 sb->sb_mb = m->m_nextpkt;
1187 do {
1188 sbfree(sb, m);
1189 MFREE(m, mn);
9bccf70c
A
1190 m = mn;
1191 } while (m);
1c79356b
A
1192 }
1193 postevent(0, sb, EV_RWBYTES);
1194}
1195
1196/*
1197 * Create a "control" mbuf containing the specified data
1198 * with the specified type for presentation on a socket buffer.
1199 */
1200struct mbuf *
1201sbcreatecontrol(p, size, type, level)
1202 caddr_t p;
1203 register int size;
1204 int type, level;
1205{
1206 register struct cmsghdr *cp;
1207 struct mbuf *m;
1208
9bccf70c
A
1209 if (CMSG_SPACE((u_int)size) > MLEN)
1210 return ((struct mbuf *) NULL);
1c79356b
A
1211 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
1212 return ((struct mbuf *) NULL);
1213 cp = mtod(m, struct cmsghdr *);
1214 /* XXX check size? */
1215 (void)memcpy(CMSG_DATA(cp), p, size);
9bccf70c
A
1216 m->m_len = CMSG_SPACE(size);
1217 cp->cmsg_len = CMSG_LEN(size);
1c79356b
A
1218 cp->cmsg_level = level;
1219 cp->cmsg_type = type;
1220 return (m);
1221}
1222
1223/*
1224 * Some routines that return EOPNOTSUPP for entry points that are not
1225 * supported by a protocol. Fill in as needed.
1226 */
1227int
1228pru_abort_notsupp(struct socket *so)
1229{
1230 return EOPNOTSUPP;
1231}
1232
1233
1234int
1235pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
1236{
1237 return EOPNOTSUPP;
1238}
1239
1240int
1241pru_attach_notsupp(struct socket *so, int proto, struct proc *p)
1242{
1243 return EOPNOTSUPP;
1244}
1245
1246int
1247pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1248{
1249 return EOPNOTSUPP;
1250}
1251
1252int
1253pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1254{
1255 return EOPNOTSUPP;
1256}
1257
1258int
1259pru_connect2_notsupp(struct socket *so1, struct socket *so2)
1260{
1261 return EOPNOTSUPP;
1262}
1263
1264int
1265pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
1266 struct ifnet *ifp, struct proc *p)
1267{
1268 return EOPNOTSUPP;
1269}
1270
1271int
1272pru_detach_notsupp(struct socket *so)
1273{
1274 return EOPNOTSUPP;
1275}
1276
1277int
1278pru_disconnect_notsupp(struct socket *so)
1279{
1280 return EOPNOTSUPP;
1281}
1282
1283int
1284pru_listen_notsupp(struct socket *so, struct proc *p)
1285{
1286 return EOPNOTSUPP;
1287}
1288
1289int
1290pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
1291{
1292 return EOPNOTSUPP;
1293}
1294
1295int
1296pru_rcvd_notsupp(struct socket *so, int flags)
1297{
1298 return EOPNOTSUPP;
1299}
1300
1301int
1302pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
1303{
1304 return EOPNOTSUPP;
1305}
1306
1307int
1308pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
1309 struct sockaddr *addr, struct mbuf *control,
1310 struct proc *p)
1311
1312{
1313 return EOPNOTSUPP;
1314}
1315
1316
1317/*
1318 * This isn't really a ``null'' operation, but it's the default one
1319 * and doesn't do anything destructive.
1320 */
1321int
1322pru_sense_null(struct socket *so, struct stat *sb)
1323{
1324 sb->st_blksize = so->so_snd.sb_hiwat;
1325 return 0;
1326}
1327
1328
1329int pru_sosend_notsupp(struct socket *so, struct sockaddr *addr,
1330 struct uio *uio, struct mbuf *top,
1331 struct mbuf *control, int flags)
1332
1333{
1334 return EOPNOTSUPP;
1335}
1336
1337int pru_soreceive_notsupp(struct socket *so,
1338 struct sockaddr **paddr,
1339 struct uio *uio, struct mbuf **mp0,
1340 struct mbuf **controlp, int *flagsp)
1341{
1342 return EOPNOTSUPP;
1343}
1344
1345int
1346
1347pru_shutdown_notsupp(struct socket *so)
1348{
1349 return EOPNOTSUPP;
1350}
1351
1352int
1353pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
1354{
1355 return EOPNOTSUPP;
1356}
1357
1358int pru_sosend(struct socket *so, struct sockaddr *addr,
1359 struct uio *uio, struct mbuf *top,
1360 struct mbuf *control, int flags)
1361{
1362 return EOPNOTSUPP;
1363}
1364
1365int pru_soreceive(struct socket *so,
1366 struct sockaddr **paddr,
1367 struct uio *uio, struct mbuf **mp0,
1368 struct mbuf **controlp, int *flagsp)
1369{
1370 return EOPNOTSUPP;
1371}
1372
1373
91447636
A
1374int
1375pru_sopoll_notsupp(__unused struct socket *so, __unused int events,
1376 __unused kauth_cred_t cred, __unused void *wql)
1c79356b
A
1377{
1378 return EOPNOTSUPP;
1379}
1380
1381
9bccf70c
A
1382#ifdef __APPLE__
1383/*
1384 * The following are macros on BSD and functions on Darwin
1385 */
1c79356b 1386
0b4e3aa0
A
1387/*
1388 * Do we need to notify the other side when I/O is possible?
1389 */
1390
1391int
1392sb_notify(struct sockbuf *sb)
1393{
55e303ae 1394 return ((sb->sb_flags & (SB_WAIT|SB_SEL|SB_ASYNC|SB_UPCALL|SB_KNOTE)) != 0);
0b4e3aa0
A
1395}
1396
1397/*
1398 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
1399 * This is problematical if the fields are unsigned, as the space might
1400 * still be negative (cc > hiwat or mbcnt > mbmax). Should detect
1401 * overflow and return 0. Should use "lmin" but it doesn't exist now.
1402 */
1403long
1404sbspace(struct sockbuf *sb)
1405{
1406 return ((long) imin((int)(sb->sb_hiwat - sb->sb_cc),
1407 (int)(sb->sb_mbmax - sb->sb_mbcnt)));
1408}
1409
1410/* do we have to send all at once on a socket? */
1411int
1412sosendallatonce(struct socket *so)
1413{
1414 return (so->so_proto->pr_flags & PR_ATOMIC);
1415}
1416
1417/* can we read something from so? */
1418int
1419soreadable(struct socket *so)
1420{
1421 return (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
1422 (so->so_state & SS_CANTRCVMORE) ||
1423 so->so_comp.tqh_first || so->so_error);
1424}
1425
1426/* can we write something to so? */
1427
1428int
1429sowriteable(struct socket *so)
1430{
1431 return ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat &&
1432 ((so->so_state&SS_ISCONNECTED) ||
1433 (so->so_proto->pr_flags&PR_CONNREQUIRED)==0)) ||
1434 (so->so_state & SS_CANTSENDMORE) ||
1435 so->so_error);
1436}
1437
1438/* adjust counters in sb reflecting allocation of m */
1439
1440void
1441sballoc(struct sockbuf *sb, struct mbuf *m)
1442{
1443 sb->sb_cc += m->m_len;
1444 sb->sb_mbcnt += MSIZE;
1445 if (m->m_flags & M_EXT)
1446 sb->sb_mbcnt += m->m_ext.ext_size;
1447}
1448
1449/* adjust counters in sb reflecting freeing of m */
1450void
1451sbfree(struct sockbuf *sb, struct mbuf *m)
1452{
1453 sb->sb_cc -= m->m_len;
1454 sb->sb_mbcnt -= MSIZE;
1455 if (m->m_flags & M_EXT)
1456 sb->sb_mbcnt -= m->m_ext.ext_size;
1457}
1458
1459/*
1460 * Set lock on sockbuf sb; sleep if lock is already held.
1461 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
1462 * Returns error without lock if sleep is interrupted.
1463 */
1464int
1465sblock(struct sockbuf *sb, int wf)
1466{
1467 return(sb->sb_flags & SB_LOCK ?
1468 ((wf == M_WAIT) ? sb_lock(sb) : EWOULDBLOCK) :
1469 (sb->sb_flags |= SB_LOCK), 0);
1470}
1471
1472/* release lock on sockbuf sb */
1473void
91447636 1474sbunlock(struct sockbuf *sb, int keeplocked)
0b4e3aa0 1475{
91447636 1476 struct socket *so = sb->sb_so;
8ad349bb 1477 int lr, lr_saved;
91447636
A
1478 lck_mtx_t *mutex_held;
1479
8ad349bb
A
1480#ifdef __ppc__
1481 __asm__ volatile("mflr %0" : "=r" (lr));
1482 lr_saved = lr;
1483#endif
0b4e3aa0 1484 sb->sb_flags &= ~SB_LOCK;
91447636
A
1485
1486 if (so->so_proto->pr_getlock != NULL)
1487 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
1488 else
1489 mutex_held = so->so_proto->pr_domain->dom_mtx;
1490
1491 if (keeplocked == 0)
1492 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
1493
0b4e3aa0
A
1494 if (sb->sb_flags & SB_WANT) {
1495 sb->sb_flags &= ~SB_WANT;
91447636
A
1496 if (so->so_usecount < 0)
1497 panic("sbunlock: b4 wakeup so=%x ref=%d lr=%x sb_flags=%x\n", sb->sb_so, so->so_usecount, lr_saved, sb->sb_flags);
1498
0b4e3aa0
A
1499 wakeup((caddr_t)&(sb)->sb_flags);
1500 }
91447636
A
1501 if (keeplocked == 0) { /* unlock on exit */
1502 so->so_usecount--;
1503 if (so->so_usecount < 0)
1504 panic("sbunlock: unlock on exit so=%x lr=%x sb_flags=%x\n", so, so->so_usecount,lr_saved, sb->sb_flags);
8ad349bb 1505 so->reserved4= lr_saved;
91447636
A
1506 lck_mtx_unlock(mutex_held);
1507 }
0b4e3aa0
A
1508}
1509
1510void
1511sorwakeup(struct socket * so)
1512{
1513 if (sb_notify(&so->so_rcv))
1514 sowakeup(so, &so->so_rcv);
1515}
1516
1517void
1518sowwakeup(struct socket * so)
1519{
1520 if (sb_notify(&so->so_snd))
1521 sowakeup(so, &so->so_snd);
1522}
9bccf70c 1523#endif __APPLE__
0b4e3aa0 1524
1c79356b
A
1525/*
1526 * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
1527 */
1528struct sockaddr *
1529dup_sockaddr(sa, canwait)
1530 struct sockaddr *sa;
1531 int canwait;
1532{
1533 struct sockaddr *sa2;
1534
1535 MALLOC(sa2, struct sockaddr *, sa->sa_len, M_SONAME,
1536 canwait ? M_WAITOK : M_NOWAIT);
1537 if (sa2)
1538 bcopy(sa, sa2, sa->sa_len);
1539 return sa2;
1540}
1541
1542/*
1543 * Create an external-format (``xsocket'') structure using the information
1544 * in the kernel-format socket structure pointed to by so. This is done
1545 * to reduce the spew of irrelevant information over this interface,
1546 * to isolate user code from changes in the kernel structure, and
1547 * potentially to provide information-hiding if we decide that
1548 * some of this information should be hidden from users.
1549 */
1550void
1551sotoxsocket(struct socket *so, struct xsocket *xso)
1552{
1553 xso->xso_len = sizeof *xso;
1554 xso->xso_so = so;
1555 xso->so_type = so->so_type;
1556 xso->so_options = so->so_options;
1557 xso->so_linger = so->so_linger;
1558 xso->so_state = so->so_state;
1559 xso->so_pcb = so->so_pcb;
91447636
A
1560 if (so->so_proto) {
1561 xso->xso_protocol = so->so_proto->pr_protocol;
1562 xso->xso_family = so->so_proto->pr_domain->dom_family;
1563 }
1564 else
1565 xso->xso_protocol = xso->xso_family = 0;
1c79356b
A
1566 xso->so_qlen = so->so_qlen;
1567 xso->so_incqlen = so->so_incqlen;
1568 xso->so_qlimit = so->so_qlimit;
1569 xso->so_timeo = so->so_timeo;
1570 xso->so_error = so->so_error;
1571 xso->so_pgid = so->so_pgid;
1572 xso->so_oobmark = so->so_oobmark;
1573 sbtoxsockbuf(&so->so_snd, &xso->so_snd);
1574 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
1575 xso->so_uid = so->so_uid;
1576}
1577
1578/*
1579 * This does the same for sockbufs. Note that the xsockbuf structure,
1580 * since it is always embedded in a socket, does not include a self
1581 * pointer nor a length. We make this entry point public in case
1582 * some other mechanism needs it.
1583 */
1584void
1585sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
1586{
1587 xsb->sb_cc = sb->sb_cc;
1588 xsb->sb_hiwat = sb->sb_hiwat;
1589 xsb->sb_mbcnt = sb->sb_mbcnt;
1590 xsb->sb_mbmax = sb->sb_mbmax;
1591 xsb->sb_lowat = sb->sb_lowat;
1592 xsb->sb_flags = sb->sb_flags;
91447636
A
1593 xsb->sb_timeo = (u_long)(sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick;
1594 if (xsb->sb_timeo == 0 && sb->sb_timeo.tv_usec != 0)
1595 xsb->sb_timeo = 1;
1c79356b
A
1596}
1597
1598/*
1599 * Here is the definition of some of the basic objects in the kern.ipc
1600 * branch of the MIB.
1601 */
1c79356b
A
1602SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
1603
1604/* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
1605static int dummy;
1606SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
1607
9bccf70c
A
1608SYSCTL_INT(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLFLAG_RW,
1609 &sb_max, 0, "Maximum socket buffer size");
1610SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD,
1611 &maxsockets, 0, "Maximum number of sockets avaliable");
1c79356b
A
1612SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
1613 &sb_efficiency, 0, "");
1614SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, &nmbclusters, 0, "");
1615