]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_socket2.c
1b2ced9a906e5f7eb3d54d47042bf6feb57356d9
[apple/xnu.git] / bsd / kern / uipc_socket2.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
29 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
30 /*
31 * Copyright (c) 1982, 1986, 1988, 1990, 1993
32 * The Regents of the University of California. All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 * must display the following acknowledgement:
44 * This product includes software developed by the University of
45 * California, Berkeley and its contributors.
46 * 4. Neither the name of the University nor the names of its contributors
47 * may be used to endorse or promote products derived from this software
48 * without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 *
62 * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
63 * $FreeBSD: src/sys/kern/uipc_socket2.c,v 1.55.2.9 2001/07/26 18:53:02 peter Exp $
64 */
65
66 #include <sys/param.h>
67 #include <sys/systm.h>
68 #include <sys/domain.h>
69 #include <sys/kernel.h>
70 #include <sys/proc_internal.h>
71 #include <sys/kauth.h>
72 #include <sys/malloc.h>
73 #include <sys/mbuf.h>
74 #include <sys/protosw.h>
75 #include <sys/stat.h>
76 #include <sys/socket.h>
77 #include <sys/socketvar.h>
78 #include <sys/signalvar.h>
79 #include <sys/sysctl.h>
80 #include <sys/ev.h>
81 #include <kern/locks.h>
82 #include <net/route.h>
83 #include <netinet/in.h>
84 #include <netinet/in_pcb.h>
85 #include <sys/kdebug.h>
86
87 #define DBG_FNC_SBDROP NETDBG_CODE(DBG_NETSOCK, 4)
88 #define DBG_FNC_SBAPPEND NETDBG_CODE(DBG_NETSOCK, 5)
89
90
91 /*
92 * Primitive routines for operating on sockets and socket buffers
93 */
94
95 u_long sb_max = SB_MAX; /* XXX should be static */
96
97 static u_long sb_efficiency = 8; /* parameter for sbreserve() */
98
99 /*
100 * Procedures to manipulate state flags of socket
101 * and do appropriate wakeups. Normal sequence from the
102 * active (originating) side is that soisconnecting() is
103 * called during processing of connect() call,
104 * resulting in an eventual call to soisconnected() if/when the
105 * connection is established. When the connection is torn down
106 * soisdisconnecting() is called during processing of disconnect() call,
107 * and soisdisconnected() is called when the connection to the peer
108 * is totally severed. The semantics of these routines are such that
109 * connectionless protocols can call soisconnected() and soisdisconnected()
110 * only, bypassing the in-progress calls when setting up a ``connection''
111 * takes no time.
112 *
113 * From the passive side, a socket is created with
114 * two queues of sockets: so_incomp for connections in progress
115 * and so_comp for connections already made and awaiting user acceptance.
116 * As a protocol is preparing incoming connections, it creates a socket
117 * structure queued on so_incomp by calling sonewconn(). When the connection
118 * is established, soisconnected() is called, and transfers the
119 * socket structure to so_comp, making it available to accept().
120 *
121 * If a socket is closed with sockets on either
122 * so_incomp or so_comp, these sockets are dropped.
123 *
124 * If higher level protocols are implemented in
125 * the kernel, the wakeups done here will sometimes
126 * cause software-interrupt process scheduling.
127 */
128 void
129 soisconnecting(so)
130 register struct socket *so;
131 {
132
133 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
134 so->so_state |= SS_ISCONNECTING;
135
136 sflt_notify(so, sock_evt_connecting, NULL);
137 }
138
139 void
140 soisconnected(so)
141 struct socket *so;
142 {
143 struct socket *head = so->so_head;
144
145 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
146 so->so_state |= SS_ISCONNECTED;
147
148 sflt_notify(so, sock_evt_connected, NULL);
149
150 if (head && (so->so_state & SS_INCOMP)) {
151 so->so_state &= ~SS_INCOMP;
152 so->so_state |= SS_COMP;
153 if (head->so_proto->pr_getlock != NULL) {
154 socket_unlock(so, 0);
155 socket_lock(head, 1);
156 }
157 postevent(head, 0, EV_RCONN);
158 TAILQ_REMOVE(&head->so_incomp, so, so_list);
159 head->so_incqlen--;
160 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
161 sorwakeup(head);
162 wakeup_one((caddr_t)&head->so_timeo);
163 if (head->so_proto->pr_getlock != NULL) {
164 socket_unlock(head, 1);
165 socket_lock(so, 0);
166 }
167 } else {
168 postevent(so, 0, EV_WCONN);
169 wakeup((caddr_t)&so->so_timeo);
170 sorwakeup(so);
171 sowwakeup(so);
172 }
173 }
174
175 void
176 soisdisconnecting(so)
177 register struct socket *so;
178 {
179 so->so_state &= ~SS_ISCONNECTING;
180 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
181 sflt_notify(so, sock_evt_disconnecting, NULL);
182 wakeup((caddr_t)&so->so_timeo);
183 sowwakeup(so);
184 sorwakeup(so);
185 }
186
187 void
188 soisdisconnected(so)
189 register struct socket *so;
190 {
191 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
192 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
193 sflt_notify(so, sock_evt_disconnected, NULL);
194 wakeup((caddr_t)&so->so_timeo);
195 sowwakeup(so);
196 sorwakeup(so);
197 }
198
199 /*
200 * Return a random connection that hasn't been serviced yet and
201 * is eligible for discard. There is a one in qlen chance that
202 * we will return a null, saying that there are no dropable
203 * requests. In this case, the protocol specific code should drop
204 * the new request. This insures fairness.
205 *
206 * This may be used in conjunction with protocol specific queue
207 * congestion routines.
208 */
209 struct socket *
210 sodropablereq(head)
211 register struct socket *head;
212 {
213 struct socket *so, *sonext = NULL;
214 unsigned int i, j, qlen;
215 static int rnd;
216 static struct timeval old_runtime;
217 static unsigned int cur_cnt, old_cnt;
218 struct timeval tv;
219
220 microtime(&tv);
221 if ((i = (tv.tv_sec - old_runtime.tv_sec)) != 0) {
222 old_runtime = tv;
223 old_cnt = cur_cnt / i;
224 cur_cnt = 0;
225 }
226
227 so = TAILQ_FIRST(&head->so_incomp);
228 if (!so)
229 return (NULL);
230
231 qlen = head->so_incqlen;
232 if (++cur_cnt > qlen || old_cnt > qlen) {
233 rnd = (314159 * rnd + 66329) & 0xffff;
234 j = ((qlen + 1) * rnd) >> 16;
235 //###LD To clean up
236 while (j-- && so) {
237 // if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
238 socket_lock(so, 1);
239 sonext = TAILQ_NEXT(so, so_list);
240 // in_pcb_check_state(so->so_pcb, WNT_RELEASE, 0);
241 socket_unlock(so, 1);
242 so = sonext;
243 }
244 }
245
246 // if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) == WNT_STOPUSING)
247 // return (NULL);
248 // else
249 return (so);
250 }
251
252 /*
253 * When an attempt at a new connection is noted on a socket
254 * which accepts connections, sonewconn is called. If the
255 * connection is possible (subject to space constraints, etc.)
256 * then we allocate a new structure, propoerly linked into the
257 * data structure of the original socket, and return this.
258 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
259 */
260 static struct socket *
261 sonewconn_internal(head, connstatus)
262 register struct socket *head;
263 int connstatus;
264 {
265 int error = 0;
266 register struct socket *so;
267 lck_mtx_t *mutex_held;
268
269 if (head->so_proto->pr_getlock != NULL)
270 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
271 else
272 mutex_held = head->so_proto->pr_domain->dom_mtx;
273 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
274
275 if (head->so_qlen > 3 * head->so_qlimit / 2)
276 return ((struct socket *)0);
277 so = soalloc(1, head->so_proto->pr_domain->dom_family, head->so_type);
278 if (so == NULL)
279 return ((struct socket *)0);
280 /* check if head was closed during the soalloc */
281 if (head->so_proto == NULL) {
282 sodealloc(so);
283 return ((struct socket *)0);
284 }
285
286 so->so_head = head;
287 so->so_type = head->so_type;
288 so->so_options = head->so_options &~ SO_ACCEPTCONN;
289 so->so_linger = head->so_linger;
290 so->so_state = head->so_state | SS_NOFDREF;
291 so->so_proto = head->so_proto;
292 so->so_timeo = head->so_timeo;
293 so->so_pgid = head->so_pgid;
294 so->so_uid = head->so_uid;
295 so->so_usecount = 1;
296
297 #ifdef __APPLE__
298 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
299 so->so_rcv.sb_so = so->so_snd.sb_so = so;
300 TAILQ_INIT(&so->so_evlist);
301 #endif
302
303 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
304 sflt_termsock(so);
305 sodealloc(so);
306 return ((struct socket *)0);
307 }
308
309 /*
310 * Must be done with head unlocked to avoid deadlock for protocol with per socket mutexes.
311 */
312 if (head->so_proto->pr_unlock)
313 socket_unlock(head, 0);
314 if (((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL) != 0) || error) {
315 sflt_termsock(so);
316 sodealloc(so);
317 if (head->so_proto->pr_unlock)
318 socket_lock(head, 0);
319 return ((struct socket *)0);
320 }
321 if (head->so_proto->pr_unlock)
322 socket_lock(head, 0);
323 #ifdef __APPLE__
324 so->so_proto->pr_domain->dom_refs++;
325 #endif
326
327 if (connstatus) {
328 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
329 so->so_state |= SS_COMP;
330 } else {
331 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
332 so->so_state |= SS_INCOMP;
333 head->so_incqlen++;
334 }
335 head->so_qlen++;
336
337 #ifdef __APPLE__
338 /* Attach socket filters for this protocol */
339 sflt_initsock(so);
340 #endif
341 if (connstatus) {
342 so->so_state |= connstatus;
343 sorwakeup(head);
344 wakeup((caddr_t)&head->so_timeo);
345 }
346 return (so);
347 }
348
349
350 struct socket *
351 sonewconn(
352 struct socket *head,
353 int connstatus,
354 const struct sockaddr *from)
355 {
356 int error = 0;
357 struct socket_filter_entry *filter;
358 int filtered = 0;
359
360 error = 0;
361 for (filter = head->so_filt; filter && (error == 0);
362 filter = filter->sfe_next_onsocket) {
363 if (filter->sfe_filter->sf_filter.sf_connect_in) {
364 if (filtered == 0) {
365 filtered = 1;
366 sflt_use(head);
367 socket_unlock(head, 0);
368 }
369 error = filter->sfe_filter->sf_filter.sf_connect_in(
370 filter->sfe_cookie, head, from);
371 }
372 }
373 if (filtered != 0) {
374 socket_lock(head, 0);
375 sflt_unuse(head);
376 }
377
378 if (error) {
379 return NULL;
380 }
381
382 return sonewconn_internal(head, connstatus);
383 }
384
385 /*
386 * Socantsendmore indicates that no more data will be sent on the
387 * socket; it would normally be applied to a socket when the user
388 * informs the system that no more data is to be sent, by the protocol
389 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
390 * will be received, and will normally be applied to the socket by a
391 * protocol when it detects that the peer will send no more data.
392 * Data queued for reading in the socket may yet be read.
393 */
394
395 void
396 socantsendmore(so)
397 struct socket *so;
398 {
399 so->so_state |= SS_CANTSENDMORE;
400 sflt_notify(so, sock_evt_cantsendmore, NULL);
401 sowwakeup(so);
402 }
403
404 void
405 socantrcvmore(so)
406 struct socket *so;
407 {
408 so->so_state |= SS_CANTRCVMORE;
409 sflt_notify(so, sock_evt_cantrecvmore, NULL);
410 sorwakeup(so);
411 }
412
413 /*
414 * Wait for data to arrive at/drain from a socket buffer.
415 */
416 int
417 sbwait(sb)
418 struct sockbuf *sb;
419 {
420 int error = 0, lr, lr_saved;
421 struct socket *so = sb->sb_so;
422 lck_mtx_t *mutex_held;
423 struct timespec ts;
424
425 #ifdef __ppc__
426 __asm__ volatile("mflr %0" : "=r" (lr));
427 lr_saved = lr;
428 #endif
429
430
431 if (so->so_proto->pr_getlock != NULL)
432 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
433 else
434 mutex_held = so->so_proto->pr_domain->dom_mtx;
435
436 sb->sb_flags |= SB_WAIT;
437
438 if (so->so_usecount < 1)
439 panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount);
440 ts.tv_sec = sb->sb_timeo.tv_sec;
441 ts.tv_nsec = sb->sb_timeo.tv_usec * 1000;
442 error = msleep((caddr_t)&sb->sb_cc, mutex_held,
443 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
444 &ts);
445
446 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
447
448 if (so->so_usecount < 1)
449 panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount);
450
451 if ((so->so_state & SS_DRAINING)) {
452 error = EBADF;
453 }
454
455 return (error);
456 }
457
458 /*
459 * Lock a sockbuf already known to be locked;
460 * return any error returned from sleep (EINTR).
461 */
462 int
463 sb_lock(sb)
464 register struct sockbuf *sb;
465 {
466 struct socket *so = sb->sb_so;
467 lck_mtx_t * mutex_held;
468 int error = 0, lr, lr_saved;
469
470 #ifdef __ppc__
471 __asm__ volatile("mflr %0" : "=r" (lr));
472 lr_saved = lr;
473 #endif
474
475 if (so == NULL)
476 panic("sb_lock: null so back pointer sb=%x\n", sb);
477
478 while (sb->sb_flags & SB_LOCK) {
479 sb->sb_flags |= SB_WANT;
480 if (so->so_proto->pr_getlock != NULL)
481 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
482 else
483 mutex_held = so->so_proto->pr_domain->dom_mtx;
484 if (so->so_usecount < 1)
485 panic("sb_lock: so=%x refcount=%d\n", so, so->so_usecount);
486 error = msleep((caddr_t)&sb->sb_flags, mutex_held,
487 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sblock", 0);
488 if (so->so_usecount < 1)
489 panic("sb_lock: 2 so=%x refcount=%d\n", so, so->so_usecount);
490 if (error)
491 return (error);
492 }
493 sb->sb_flags |= SB_LOCK;
494 return (0);
495 }
496
497 /*
498 * Wakeup processes waiting on a socket buffer.
499 * Do asynchronous notification via SIGIO
500 * if the socket has the SS_ASYNC flag set.
501 */
502 void
503 sowakeup(so, sb)
504 register struct socket *so;
505 register struct sockbuf *sb;
506 {
507 struct proc *p = current_proc();
508 sb->sb_flags &= ~SB_SEL;
509 selwakeup(&sb->sb_sel);
510 if (sb->sb_flags & SB_WAIT) {
511 sb->sb_flags &= ~SB_WAIT;
512 wakeup((caddr_t)&sb->sb_cc);
513 }
514 if (so->so_state & SS_ASYNC) {
515 if (so->so_pgid < 0)
516 gsignal(-so->so_pgid, SIGIO);
517 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
518 psignal(p, SIGIO);
519 }
520 if (sb->sb_flags & SB_KNOTE) {
521 KNOTE(&sb->sb_sel.si_note, SO_FILT_HINT_LOCKED);
522 }
523 if (sb->sb_flags & SB_UPCALL) {
524 socket_unlock(so, 0);
525 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
526 socket_lock(so, 0);
527 }
528 }
529
530 /*
531 * Socket buffer (struct sockbuf) utility routines.
532 *
533 * Each socket contains two socket buffers: one for sending data and
534 * one for receiving data. Each buffer contains a queue of mbufs,
535 * information about the number of mbufs and amount of data in the
536 * queue, and other fields allowing select() statements and notification
537 * on data availability to be implemented.
538 *
539 * Data stored in a socket buffer is maintained as a list of records.
540 * Each record is a list of mbufs chained together with the m_next
541 * field. Records are chained together with the m_nextpkt field. The upper
542 * level routine soreceive() expects the following conventions to be
543 * observed when placing information in the receive buffer:
544 *
545 * 1. If the protocol requires each message be preceded by the sender's
546 * name, then a record containing that name must be present before
547 * any associated data (mbuf's must be of type MT_SONAME).
548 * 2. If the protocol supports the exchange of ``access rights'' (really
549 * just additional data associated with the message), and there are
550 * ``rights'' to be received, then a record containing this data
551 * should be present (mbuf's must be of type MT_RIGHTS).
552 * 3. If a name or rights record exists, then it must be followed by
553 * a data record, perhaps of zero length.
554 *
555 * Before using a new socket structure it is first necessary to reserve
556 * buffer space to the socket, by calling sbreserve(). This should commit
557 * some of the available buffer space in the system buffer pool for the
558 * socket (currently, it does nothing but enforce limits). The space
559 * should be released by calling sbrelease() when the socket is destroyed.
560 */
561
562 int
563 soreserve(so, sndcc, rcvcc)
564 register struct socket *so;
565 u_long sndcc, rcvcc;
566 {
567
568 if (sbreserve(&so->so_snd, sndcc) == 0)
569 goto bad;
570 if (sbreserve(&so->so_rcv, rcvcc) == 0)
571 goto bad2;
572 if (so->so_rcv.sb_lowat == 0)
573 so->so_rcv.sb_lowat = 1;
574 if (so->so_snd.sb_lowat == 0)
575 so->so_snd.sb_lowat = MCLBYTES;
576 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
577 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
578 return (0);
579 bad2:
580 #ifdef __APPLE__
581 selthreadclear(&so->so_snd.sb_sel);
582 #endif
583 sbrelease(&so->so_snd);
584 bad:
585 return (ENOBUFS);
586 }
587
588 /*
589 * Allot mbufs to a sockbuf.
590 * Attempt to scale mbmax so that mbcnt doesn't become limiting
591 * if buffering efficiency is near the normal case.
592 */
593 int
594 sbreserve(sb, cc)
595 struct sockbuf *sb;
596 u_long cc;
597 {
598 if ((u_quad_t)cc > (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES))
599 return (0);
600 sb->sb_hiwat = cc;
601 sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
602 if (sb->sb_lowat > sb->sb_hiwat)
603 sb->sb_lowat = sb->sb_hiwat;
604 return (1);
605 }
606
607 /*
608 * Free mbufs held by a socket, and reserved mbuf space.
609 */
610 /* WARNING needs to do selthreadclear() before calling this */
611 void
612 sbrelease(sb)
613 struct sockbuf *sb;
614 {
615
616 sbflush(sb);
617 sb->sb_hiwat = 0;
618 sb->sb_mbmax = 0;
619
620 }
621
622 /*
623 * Routines to add and remove
624 * data from an mbuf queue.
625 *
626 * The routines sbappend() or sbappendrecord() are normally called to
627 * append new mbufs to a socket buffer, after checking that adequate
628 * space is available, comparing the function sbspace() with the amount
629 * of data to be added. sbappendrecord() differs from sbappend() in
630 * that data supplied is treated as the beginning of a new record.
631 * To place a sender's address, optional access rights, and data in a
632 * socket receive buffer, sbappendaddr() should be used. To place
633 * access rights and data in a socket receive buffer, sbappendrights()
634 * should be used. In either case, the new data begins a new record.
635 * Note that unlike sbappend() and sbappendrecord(), these routines check
636 * for the caller that there will be enough space to store the data.
637 * Each fails if there is not enough space, or if it cannot find mbufs
638 * to store additional information in.
639 *
640 * Reliable protocols may use the socket send buffer to hold data
641 * awaiting acknowledgement. Data is normally copied from a socket
642 * send buffer in a protocol with m_copy for output to a peer,
643 * and then removing the data from the socket buffer with sbdrop()
644 * or sbdroprecord() when the data is acknowledged by the peer.
645 */
646
647 /*
648 * Append mbuf chain m to the last record in the
649 * socket buffer sb. The additional space associated
650 * the mbuf chain is recorded in sb. Empty mbufs are
651 * discarded and mbufs are compacted where possible.
652 */
653 int
654 sbappend(sb, m)
655 struct sockbuf *sb;
656 struct mbuf *m;
657 {
658 register struct mbuf *n, *sb_first;
659 int result = 0;
660 int error = 0;
661 int filtered = 0;
662
663
664 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_START), sb, m->m_len, 0, 0, 0);
665
666 if (m == 0)
667 return 0;
668
669 again:
670 sb_first = n = sb->sb_mb;
671 if (n) {
672 while (n->m_nextpkt)
673 n = n->m_nextpkt;
674 do {
675 if (n->m_flags & M_EOR) {
676 result = sbappendrecord(sb, m); /* XXXXXX!!!! */
677 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
678 return result;
679 }
680 } while (n->m_next && (n = n->m_next));
681 }
682
683 if (!filtered && (sb->sb_flags & SB_RECV) != 0) {
684 error = sflt_data_in(sb->sb_so, NULL, &m, NULL, 0, &filtered);
685 if (error) {
686 /* no data was appended, caller should not call sowakeup */
687 return 0;
688 }
689
690 /*
691 If we any filters, the socket lock was dropped. n and sb_first
692 cached data from the socket buffer. This cache is not valid
693 since we dropped the lock. We must start over. Since filtered
694 is set we won't run through the filters a second time. We just
695 set n and sb_start again.
696 */
697 if (filtered)
698 goto again;
699 }
700
701 result = sbcompress(sb, m, n);
702
703 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
704
705 return result;
706 }
707
708 #ifdef SOCKBUF_DEBUG
709 void
710 sbcheck(sb)
711 register struct sockbuf *sb;
712 {
713 register struct mbuf *m;
714 register struct mbuf *n = 0;
715 register u_long len = 0, mbcnt = 0;
716 lck_mtx_t *mutex_held;
717
718 if (sb->sb_so->so_proto->pr_getlock != NULL)
719 mutex_held = (*sb->sb_so->so_proto->pr_getlock)(sb->sb_so, 0);
720 else
721 mutex_held = sb->sb_so->so_proto->pr_domain->dom_mtx;
722
723 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
724
725 if (sbchecking == 0)
726 return;
727
728 for (m = sb->sb_mb; m; m = n) {
729 n = m->m_nextpkt;
730 for (; m; m = m->m_next) {
731 len += m->m_len;
732 mbcnt += MSIZE;
733 if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
734 mbcnt += m->m_ext.ext_size;
735 }
736 }
737 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
738 panic("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
739 mbcnt, sb->sb_mbcnt);
740 }
741 }
742 #endif
743
744 /*
745 * As above, except the mbuf chain
746 * begins a new record.
747 */
748 int
749 sbappendrecord(sb, m0)
750 register struct sockbuf *sb;
751 register struct mbuf *m0;
752 {
753 register struct mbuf *m;
754 int result = 0;
755
756 if (m0 == 0)
757 return 0;
758
759 if ((sb->sb_flags & SB_RECV) != 0) {
760 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL, sock_data_filt_flag_record, NULL);
761 if (error != 0) {
762 if (error != EJUSTRETURN)
763 m_freem(m0);
764 return 0;
765 }
766 }
767
768 m = sb->sb_mb;
769 if (m)
770 while (m->m_nextpkt)
771 m = m->m_nextpkt;
772 /*
773 * Put the first mbuf on the queue.
774 * Note this permits zero length records.
775 */
776 sballoc(sb, m0);
777 if (m)
778 m->m_nextpkt = m0;
779 else
780 sb->sb_mb = m0;
781 m = m0->m_next;
782 m0->m_next = 0;
783 if (m && (m0->m_flags & M_EOR)) {
784 m0->m_flags &= ~M_EOR;
785 m->m_flags |= M_EOR;
786 }
787 return sbcompress(sb, m, m0);
788 }
789
790 /*
791 * As above except that OOB data
792 * is inserted at the beginning of the sockbuf,
793 * but after any other OOB data.
794 */
795 int
796 sbinsertoob(sb, m0)
797 struct sockbuf *sb;
798 struct mbuf *m0;
799 {
800 struct mbuf *m;
801 struct mbuf **mp;
802
803 if (m0 == 0)
804 return 0;
805
806 if ((sb->sb_flags & SB_RECV) != 0) {
807 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL,
808 sock_data_filt_flag_oob, NULL);
809
810 if (error) {
811 if (error != EJUSTRETURN) {
812 m_freem(m0);
813 }
814 return 0;
815 }
816 }
817
818 for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) {
819 m = *mp;
820 again:
821 switch (m->m_type) {
822
823 case MT_OOBDATA:
824 continue; /* WANT next train */
825
826 case MT_CONTROL:
827 m = m->m_next;
828 if (m)
829 goto again; /* inspect THIS train further */
830 }
831 break;
832 }
833 /*
834 * Put the first mbuf on the queue.
835 * Note this permits zero length records.
836 */
837 sballoc(sb, m0);
838 m0->m_nextpkt = *mp;
839 *mp = m0;
840 m = m0->m_next;
841 m0->m_next = 0;
842 if (m && (m0->m_flags & M_EOR)) {
843 m0->m_flags &= ~M_EOR;
844 m->m_flags |= M_EOR;
845 }
846 return sbcompress(sb, m, m0);
847 }
848
849 /*
850 * Append address and data, and optionally, control (ancillary) data
851 * to the receive queue of a socket. If present,
852 * m0 must include a packet header with total length.
853 * Returns 0 if no space in sockbuf or insufficient mbufs.
854 */
855 static int
856 sbappendaddr_internal(sb, asa, m0, control)
857 register struct sockbuf *sb;
858 struct sockaddr *asa;
859 struct mbuf *m0, *control;
860 {
861 register struct mbuf *m, *n;
862 int space = asa->sa_len;
863
864 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
865 panic("sbappendaddr");
866
867 if (m0)
868 space += m0->m_pkthdr.len;
869 for (n = control; n; n = n->m_next) {
870 space += n->m_len;
871 if (n->m_next == 0) /* keep pointer to last control buf */
872 break;
873 }
874 if (space > sbspace(sb))
875 return (0);
876 if (asa->sa_len > MLEN)
877 return (0);
878 MGET(m, M_DONTWAIT, MT_SONAME);
879 if (m == 0)
880 return (0);
881 m->m_len = asa->sa_len;
882 bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
883 if (n)
884 n->m_next = m0; /* concatenate data to control */
885 else
886 control = m0;
887 m->m_next = control;
888 for (n = m; n; n = n->m_next)
889 sballoc(sb, n);
890 n = sb->sb_mb;
891 if (n) {
892 while (n->m_nextpkt)
893 n = n->m_nextpkt;
894 n->m_nextpkt = m;
895 } else
896 sb->sb_mb = m;
897 postevent(0,sb,EV_RWBYTES);
898 return (1);
899 }
900
901 int
902 sbappendaddr(
903 struct sockbuf* sb,
904 struct sockaddr* asa,
905 struct mbuf *m0,
906 struct mbuf *control,
907 int *error_out)
908 {
909 int result = 0;
910
911 if (error_out) *error_out = 0;
912
913 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
914 panic("sbappendaddrorfree");
915
916 /* Call socket data in filters */
917 if ((sb->sb_flags & SB_RECV) != 0) {
918 int error;
919 error = sflt_data_in(sb->sb_so, asa, &m0, &control, 0, NULL);
920 if (error) {
921 if (error != EJUSTRETURN) {
922 if (m0) m_freem(m0);
923 if (control) m_freem(control);
924 if (error_out) *error_out = error;
925 }
926 return 0;
927 }
928 }
929
930 result = sbappendaddr_internal(sb, asa, m0, control);
931 if (result == 0) {
932 if (m0) m_freem(m0);
933 if (control) m_freem(control);
934 if (error_out) *error_out = ENOBUFS;
935 }
936
937 return result;
938 }
939
940 static int
941 sbappendcontrol_internal(sb, m0, control)
942 struct sockbuf *sb;
943 struct mbuf *control, *m0;
944 {
945 register struct mbuf *m, *n;
946 int space = 0;
947
948 if (control == 0)
949 panic("sbappendcontrol");
950
951 for (m = control; ; m = m->m_next) {
952 space += m->m_len;
953 if (m->m_next == 0)
954 break;
955 }
956 n = m; /* save pointer to last control buffer */
957 for (m = m0; m; m = m->m_next)
958 space += m->m_len;
959 if (space > sbspace(sb))
960 return (0);
961 n->m_next = m0; /* concatenate data to control */
962 for (m = control; m; m = m->m_next)
963 sballoc(sb, m);
964 n = sb->sb_mb;
965 if (n) {
966 while (n->m_nextpkt)
967 n = n->m_nextpkt;
968 n->m_nextpkt = control;
969 } else
970 sb->sb_mb = control;
971 postevent(0,sb,EV_RWBYTES);
972 return (1);
973 }
974
975 int
976 sbappendcontrol(
977 struct sockbuf *sb,
978 struct mbuf *m0,
979 struct mbuf *control,
980 int *error_out)
981 {
982 int result = 0;
983
984 if (error_out) *error_out = 0;
985
986 if (sb->sb_flags & SB_RECV) {
987 int error;
988 error = sflt_data_in(sb->sb_so, NULL, &m0, &control, 0, NULL);
989 if (error) {
990 if (error != EJUSTRETURN) {
991 if (m0) m_freem(m0);
992 if (control) m_freem(control);
993 if (error_out) *error_out = error;
994 }
995 return 0;
996 }
997 }
998
999 result = sbappendcontrol_internal(sb, m0, control);
1000 if (result == 0) {
1001 if (m0) m_freem(m0);
1002 if (control) m_freem(control);
1003 if (error_out) *error_out = ENOBUFS;
1004 }
1005
1006 return result;
1007 }
1008
1009 /*
1010 * Compress mbuf chain m into the socket
1011 * buffer sb following mbuf n. If n
1012 * is null, the buffer is presumed empty.
1013 */
1014 static int
1015 sbcompress(sb, m, n)
1016 register struct sockbuf *sb;
1017 register struct mbuf *m, *n;
1018 {
1019 register int eor = 0;
1020 register struct mbuf *o;
1021
1022 while (m) {
1023 eor |= m->m_flags & M_EOR;
1024 if (m->m_len == 0 &&
1025 (eor == 0 ||
1026 (((o = m->m_next) || (o = n)) &&
1027 o->m_type == m->m_type))) {
1028 m = m_free(m);
1029 continue;
1030 }
1031 if (n && (n->m_flags & M_EOR) == 0 &&
1032 #ifndef __APPLE__
1033 M_WRITABLE(n) &&
1034 #endif
1035 m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
1036 m->m_len <= M_TRAILINGSPACE(n) &&
1037 n->m_type == m->m_type) {
1038 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
1039 (unsigned)m->m_len);
1040 n->m_len += m->m_len;
1041 sb->sb_cc += m->m_len;
1042 m = m_free(m);
1043 continue;
1044 }
1045 if (n)
1046 n->m_next = m;
1047 else
1048 sb->sb_mb = m;
1049 sballoc(sb, m);
1050 n = m;
1051 m->m_flags &= ~M_EOR;
1052 m = m->m_next;
1053 n->m_next = 0;
1054 }
1055 if (eor) {
1056 if (n)
1057 n->m_flags |= eor;
1058 else
1059 printf("semi-panic: sbcompress\n");
1060 }
1061 postevent(0,sb, EV_RWBYTES);
1062 return 1;
1063 }
1064
1065 /*
1066 * Free all mbufs in a sockbuf.
1067 * Check that all resources are reclaimed.
1068 */
1069 void
1070 sbflush(sb)
1071 register struct sockbuf *sb;
1072 {
1073 if (sb->sb_so == NULL)
1074 panic ("sbflush sb->sb_so already null sb=%x\n", sb);
1075 (void)sblock(sb, M_WAIT);
1076 while (sb->sb_mbcnt) {
1077 /*
1078 * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
1079 * we would loop forever. Panic instead.
1080 */
1081 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
1082 break;
1083 sbdrop(sb, (int)sb->sb_cc);
1084 }
1085 if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt || sb->sb_so == NULL)
1086 panic("sbflush: cc %ld || mb %p || mbcnt %ld sb_so=%x", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt, sb->sb_so);
1087
1088 postevent(0, sb, EV_RWBYTES);
1089 sbunlock(sb, 1); /* keep socket locked */
1090
1091 }
1092
1093 /*
1094 * Drop data from (the front of) a sockbuf.
1095 * use m_freem_list to free the mbuf structures
1096 * under a single lock... this is done by pruning
1097 * the top of the tree from the body by keeping track
1098 * of where we get to in the tree and then zeroing the
1099 * two pertinent pointers m_nextpkt and m_next
1100 * the socket buffer is then updated to point at the new
1101 * top of the tree and the pruned area is released via
1102 * m_freem_list.
1103 */
1104 void
1105 sbdrop(sb, len)
1106 register struct sockbuf *sb;
1107 register int len;
1108 {
1109 register struct mbuf *m, *free_list, *ml;
1110 struct mbuf *next, *last;
1111
1112 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_START), sb, len, 0, 0, 0);
1113
1114 next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
1115 free_list = last = m;
1116 ml = (struct mbuf *)0;
1117
1118 while (len > 0) {
1119 if (m == 0) {
1120 if (next == 0) {
1121 /* temporarily replacing this panic with printf because
1122 * it occurs occasionally when closing a socket when there
1123 * is no harm in ignoring it. This problem will be investigated
1124 * further.
1125 */
1126 /* panic("sbdrop"); */
1127 printf("sbdrop - count not zero\n");
1128 len = 0;
1129 /* zero the counts. if we have no mbufs, we have no data (PR-2986815) */
1130 sb->sb_cc = 0;
1131 sb->sb_mbcnt = 0;
1132 break;
1133 }
1134 m = last = next;
1135 next = m->m_nextpkt;
1136 continue;
1137 }
1138 if (m->m_len > len) {
1139 m->m_len -= len;
1140 m->m_data += len;
1141 sb->sb_cc -= len;
1142 break;
1143 }
1144 len -= m->m_len;
1145 sbfree(sb, m);
1146
1147 ml = m;
1148 m = m->m_next;
1149 }
1150 while (m && m->m_len == 0) {
1151 sbfree(sb, m);
1152
1153 ml = m;
1154 m = m->m_next;
1155 }
1156 if (ml) {
1157 ml->m_next = (struct mbuf *)0;
1158 last->m_nextpkt = (struct mbuf *)0;
1159 m_freem_list(free_list);
1160 }
1161 if (m) {
1162 sb->sb_mb = m;
1163 m->m_nextpkt = next;
1164 } else
1165 sb->sb_mb = next;
1166
1167 postevent(0, sb, EV_RWBYTES);
1168
1169 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_END), sb, 0, 0, 0, 0);
1170 }
1171
1172 /*
1173 * Drop a record off the front of a sockbuf
1174 * and move the next record to the front.
1175 */
1176 void
1177 sbdroprecord(sb)
1178 register struct sockbuf *sb;
1179 {
1180 register struct mbuf *m, *mn;
1181
1182 m = sb->sb_mb;
1183 if (m) {
1184 sb->sb_mb = m->m_nextpkt;
1185 do {
1186 sbfree(sb, m);
1187 MFREE(m, mn);
1188 m = mn;
1189 } while (m);
1190 }
1191 postevent(0, sb, EV_RWBYTES);
1192 }
1193
1194 /*
1195 * Create a "control" mbuf containing the specified data
1196 * with the specified type for presentation on a socket buffer.
1197 */
1198 struct mbuf *
1199 sbcreatecontrol(p, size, type, level)
1200 caddr_t p;
1201 register int size;
1202 int type, level;
1203 {
1204 register struct cmsghdr *cp;
1205 struct mbuf *m;
1206
1207 if (CMSG_SPACE((u_int)size) > MLEN)
1208 return ((struct mbuf *) NULL);
1209 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
1210 return ((struct mbuf *) NULL);
1211 cp = mtod(m, struct cmsghdr *);
1212 /* XXX check size? */
1213 (void)memcpy(CMSG_DATA(cp), p, size);
1214 m->m_len = CMSG_SPACE(size);
1215 cp->cmsg_len = CMSG_LEN(size);
1216 cp->cmsg_level = level;
1217 cp->cmsg_type = type;
1218 return (m);
1219 }
1220
1221 /*
1222 * Some routines that return EOPNOTSUPP for entry points that are not
1223 * supported by a protocol. Fill in as needed.
1224 */
1225 int
1226 pru_abort_notsupp(struct socket *so)
1227 {
1228 return EOPNOTSUPP;
1229 }
1230
1231
1232 int
1233 pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
1234 {
1235 return EOPNOTSUPP;
1236 }
1237
1238 int
1239 pru_attach_notsupp(struct socket *so, int proto, struct proc *p)
1240 {
1241 return EOPNOTSUPP;
1242 }
1243
1244 int
1245 pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1246 {
1247 return EOPNOTSUPP;
1248 }
1249
1250 int
1251 pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1252 {
1253 return EOPNOTSUPP;
1254 }
1255
1256 int
1257 pru_connect2_notsupp(struct socket *so1, struct socket *so2)
1258 {
1259 return EOPNOTSUPP;
1260 }
1261
1262 int
1263 pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
1264 struct ifnet *ifp, struct proc *p)
1265 {
1266 return EOPNOTSUPP;
1267 }
1268
1269 int
1270 pru_detach_notsupp(struct socket *so)
1271 {
1272 return EOPNOTSUPP;
1273 }
1274
1275 int
1276 pru_disconnect_notsupp(struct socket *so)
1277 {
1278 return EOPNOTSUPP;
1279 }
1280
1281 int
1282 pru_listen_notsupp(struct socket *so, struct proc *p)
1283 {
1284 return EOPNOTSUPP;
1285 }
1286
1287 int
1288 pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
1289 {
1290 return EOPNOTSUPP;
1291 }
1292
1293 int
1294 pru_rcvd_notsupp(struct socket *so, int flags)
1295 {
1296 return EOPNOTSUPP;
1297 }
1298
1299 int
1300 pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
1301 {
1302 return EOPNOTSUPP;
1303 }
1304
1305 int
1306 pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
1307 struct sockaddr *addr, struct mbuf *control,
1308 struct proc *p)
1309
1310 {
1311 return EOPNOTSUPP;
1312 }
1313
1314
1315 /*
1316 * This isn't really a ``null'' operation, but it's the default one
1317 * and doesn't do anything destructive.
1318 */
1319 int
1320 pru_sense_null(struct socket *so, struct stat *sb)
1321 {
1322 sb->st_blksize = so->so_snd.sb_hiwat;
1323 return 0;
1324 }
1325
1326
1327 int pru_sosend_notsupp(struct socket *so, struct sockaddr *addr,
1328 struct uio *uio, struct mbuf *top,
1329 struct mbuf *control, int flags)
1330
1331 {
1332 return EOPNOTSUPP;
1333 }
1334
1335 int pru_soreceive_notsupp(struct socket *so,
1336 struct sockaddr **paddr,
1337 struct uio *uio, struct mbuf **mp0,
1338 struct mbuf **controlp, int *flagsp)
1339 {
1340 return EOPNOTSUPP;
1341 }
1342
1343 int
1344
1345 pru_shutdown_notsupp(struct socket *so)
1346 {
1347 return EOPNOTSUPP;
1348 }
1349
1350 int
1351 pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
1352 {
1353 return EOPNOTSUPP;
1354 }
1355
1356 int pru_sosend(struct socket *so, struct sockaddr *addr,
1357 struct uio *uio, struct mbuf *top,
1358 struct mbuf *control, int flags)
1359 {
1360 return EOPNOTSUPP;
1361 }
1362
1363 int pru_soreceive(struct socket *so,
1364 struct sockaddr **paddr,
1365 struct uio *uio, struct mbuf **mp0,
1366 struct mbuf **controlp, int *flagsp)
1367 {
1368 return EOPNOTSUPP;
1369 }
1370
1371
1372 int
1373 pru_sopoll_notsupp(__unused struct socket *so, __unused int events,
1374 __unused kauth_cred_t cred, __unused void *wql)
1375 {
1376 return EOPNOTSUPP;
1377 }
1378
1379
1380 #ifdef __APPLE__
1381 /*
1382 * The following are macros on BSD and functions on Darwin
1383 */
1384
1385 /*
1386 * Do we need to notify the other side when I/O is possible?
1387 */
1388
1389 int
1390 sb_notify(struct sockbuf *sb)
1391 {
1392 return ((sb->sb_flags & (SB_WAIT|SB_SEL|SB_ASYNC|SB_UPCALL|SB_KNOTE)) != 0);
1393 }
1394
1395 /*
1396 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
1397 * This is problematical if the fields are unsigned, as the space might
1398 * still be negative (cc > hiwat or mbcnt > mbmax). Should detect
1399 * overflow and return 0. Should use "lmin" but it doesn't exist now.
1400 */
1401 long
1402 sbspace(struct sockbuf *sb)
1403 {
1404 return ((long) imin((int)(sb->sb_hiwat - sb->sb_cc),
1405 (int)(sb->sb_mbmax - sb->sb_mbcnt)));
1406 }
1407
1408 /* do we have to send all at once on a socket? */
1409 int
1410 sosendallatonce(struct socket *so)
1411 {
1412 return (so->so_proto->pr_flags & PR_ATOMIC);
1413 }
1414
1415 /* can we read something from so? */
1416 int
1417 soreadable(struct socket *so)
1418 {
1419 return (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
1420 (so->so_state & SS_CANTRCVMORE) ||
1421 so->so_comp.tqh_first || so->so_error);
1422 }
1423
1424 /* can we write something to so? */
1425
1426 int
1427 sowriteable(struct socket *so)
1428 {
1429 return ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat &&
1430 ((so->so_state&SS_ISCONNECTED) ||
1431 (so->so_proto->pr_flags&PR_CONNREQUIRED)==0)) ||
1432 (so->so_state & SS_CANTSENDMORE) ||
1433 so->so_error);
1434 }
1435
1436 /* adjust counters in sb reflecting allocation of m */
1437
1438 void
1439 sballoc(struct sockbuf *sb, struct mbuf *m)
1440 {
1441 sb->sb_cc += m->m_len;
1442 sb->sb_mbcnt += MSIZE;
1443 if (m->m_flags & M_EXT)
1444 sb->sb_mbcnt += m->m_ext.ext_size;
1445 }
1446
1447 /* adjust counters in sb reflecting freeing of m */
1448 void
1449 sbfree(struct sockbuf *sb, struct mbuf *m)
1450 {
1451 sb->sb_cc -= m->m_len;
1452 sb->sb_mbcnt -= MSIZE;
1453 if (m->m_flags & M_EXT)
1454 sb->sb_mbcnt -= m->m_ext.ext_size;
1455 }
1456
1457 /*
1458 * Set lock on sockbuf sb; sleep if lock is already held.
1459 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
1460 * Returns error without lock if sleep is interrupted.
1461 */
1462 int
1463 sblock(struct sockbuf *sb, int wf)
1464 {
1465 return(sb->sb_flags & SB_LOCK ?
1466 ((wf == M_WAIT) ? sb_lock(sb) : EWOULDBLOCK) :
1467 (sb->sb_flags |= SB_LOCK), 0);
1468 }
1469
1470 /* release lock on sockbuf sb */
1471 void
1472 sbunlock(struct sockbuf *sb, int keeplocked)
1473 {
1474 struct socket *so = sb->sb_so;
1475 int lr, lr_saved;
1476 lck_mtx_t *mutex_held;
1477
1478 #ifdef __ppc__
1479 __asm__ volatile("mflr %0" : "=r" (lr));
1480 lr_saved = lr;
1481 #endif
1482 sb->sb_flags &= ~SB_LOCK;
1483
1484 if (so->so_proto->pr_getlock != NULL)
1485 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
1486 else
1487 mutex_held = so->so_proto->pr_domain->dom_mtx;
1488
1489 if (keeplocked == 0)
1490 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
1491
1492 if (sb->sb_flags & SB_WANT) {
1493 sb->sb_flags &= ~SB_WANT;
1494 if (so->so_usecount < 0)
1495 panic("sbunlock: b4 wakeup so=%x ref=%d lr=%x sb_flags=%x\n", sb->sb_so, so->so_usecount, lr_saved, sb->sb_flags);
1496
1497 wakeup((caddr_t)&(sb)->sb_flags);
1498 }
1499 if (keeplocked == 0) { /* unlock on exit */
1500 so->so_usecount--;
1501 if (so->so_usecount < 0)
1502 panic("sbunlock: unlock on exit so=%x lr=%x sb_flags=%x\n", so, so->so_usecount,lr_saved, sb->sb_flags);
1503 so->reserved4= lr_saved;
1504 lck_mtx_unlock(mutex_held);
1505 }
1506 }
1507
1508 void
1509 sorwakeup(struct socket * so)
1510 {
1511 if (sb_notify(&so->so_rcv))
1512 sowakeup(so, &so->so_rcv);
1513 }
1514
1515 void
1516 sowwakeup(struct socket * so)
1517 {
1518 if (sb_notify(&so->so_snd))
1519 sowakeup(so, &so->so_snd);
1520 }
1521 #endif __APPLE__
1522
1523 /*
1524 * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
1525 */
1526 struct sockaddr *
1527 dup_sockaddr(sa, canwait)
1528 struct sockaddr *sa;
1529 int canwait;
1530 {
1531 struct sockaddr *sa2;
1532
1533 MALLOC(sa2, struct sockaddr *, sa->sa_len, M_SONAME,
1534 canwait ? M_WAITOK : M_NOWAIT);
1535 if (sa2)
1536 bcopy(sa, sa2, sa->sa_len);
1537 return sa2;
1538 }
1539
1540 /*
1541 * Create an external-format (``xsocket'') structure using the information
1542 * in the kernel-format socket structure pointed to by so. This is done
1543 * to reduce the spew of irrelevant information over this interface,
1544 * to isolate user code from changes in the kernel structure, and
1545 * potentially to provide information-hiding if we decide that
1546 * some of this information should be hidden from users.
1547 */
1548 void
1549 sotoxsocket(struct socket *so, struct xsocket *xso)
1550 {
1551 xso->xso_len = sizeof *xso;
1552 xso->xso_so = so;
1553 xso->so_type = so->so_type;
1554 xso->so_options = so->so_options;
1555 xso->so_linger = so->so_linger;
1556 xso->so_state = so->so_state;
1557 xso->so_pcb = so->so_pcb;
1558 if (so->so_proto) {
1559 xso->xso_protocol = so->so_proto->pr_protocol;
1560 xso->xso_family = so->so_proto->pr_domain->dom_family;
1561 }
1562 else
1563 xso->xso_protocol = xso->xso_family = 0;
1564 xso->so_qlen = so->so_qlen;
1565 xso->so_incqlen = so->so_incqlen;
1566 xso->so_qlimit = so->so_qlimit;
1567 xso->so_timeo = so->so_timeo;
1568 xso->so_error = so->so_error;
1569 xso->so_pgid = so->so_pgid;
1570 xso->so_oobmark = so->so_oobmark;
1571 sbtoxsockbuf(&so->so_snd, &xso->so_snd);
1572 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
1573 xso->so_uid = so->so_uid;
1574 }
1575
1576 /*
1577 * This does the same for sockbufs. Note that the xsockbuf structure,
1578 * since it is always embedded in a socket, does not include a self
1579 * pointer nor a length. We make this entry point public in case
1580 * some other mechanism needs it.
1581 */
1582 void
1583 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
1584 {
1585 xsb->sb_cc = sb->sb_cc;
1586 xsb->sb_hiwat = sb->sb_hiwat;
1587 xsb->sb_mbcnt = sb->sb_mbcnt;
1588 xsb->sb_mbmax = sb->sb_mbmax;
1589 xsb->sb_lowat = sb->sb_lowat;
1590 xsb->sb_flags = sb->sb_flags;
1591 xsb->sb_timeo = (u_long)(sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick;
1592 if (xsb->sb_timeo == 0 && sb->sb_timeo.tv_usec != 0)
1593 xsb->sb_timeo = 1;
1594 }
1595
1596 /*
1597 * Here is the definition of some of the basic objects in the kern.ipc
1598 * branch of the MIB.
1599 */
1600 SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
1601
1602 /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
1603 static int dummy;
1604 SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
1605
1606 SYSCTL_INT(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLFLAG_RW,
1607 &sb_max, 0, "Maximum socket buffer size");
1608 SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD,
1609 &maxsockets, 0, "Maximum number of sockets avaliable");
1610 SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
1611 &sb_efficiency, 0, "");
1612 SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, &nmbclusters, 0, "");
1613