]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_socket2.c
xnu-792.6.76.tar.gz
[apple/xnu.git] / bsd / kern / uipc_socket2.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
23 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
24 /*
25 * Copyright (c) 1982, 1986, 1988, 1990, 1993
26 * The Regents of the University of California. All rights reserved.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 * 3. All advertising materials mentioning features or use of this software
37 * must display the following acknowledgement:
38 * This product includes software developed by the University of
39 * California, Berkeley and its contributors.
40 * 4. Neither the name of the University nor the names of its contributors
41 * may be used to endorse or promote products derived from this software
42 * without specific prior written permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
45 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
46 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
47 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
48 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
49 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
50 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
51 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * SUCH DAMAGE.
55 *
56 * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
57 * $FreeBSD: src/sys/kern/uipc_socket2.c,v 1.55.2.9 2001/07/26 18:53:02 peter Exp $
58 */
59
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/domain.h>
63 #include <sys/kernel.h>
64 #include <sys/proc_internal.h>
65 #include <sys/kauth.h>
66 #include <sys/malloc.h>
67 #include <sys/mbuf.h>
68 #include <sys/protosw.h>
69 #include <sys/stat.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
72 #include <sys/signalvar.h>
73 #include <sys/sysctl.h>
74 #include <sys/ev.h>
75 #include <kern/locks.h>
76 #include <net/route.h>
77 #include <netinet/in.h>
78 #include <netinet/in_pcb.h>
79 #include <sys/kdebug.h>
80
81 #define DBG_FNC_SBDROP NETDBG_CODE(DBG_NETSOCK, 4)
82 #define DBG_FNC_SBAPPEND NETDBG_CODE(DBG_NETSOCK, 5)
83
84
85 /*
86 * Primitive routines for operating on sockets and socket buffers
87 */
88
89 u_long sb_max = SB_MAX; /* XXX should be static */
90
91 static u_long sb_efficiency = 8; /* parameter for sbreserve() */
92
93 /*
94 * Procedures to manipulate state flags of socket
95 * and do appropriate wakeups. Normal sequence from the
96 * active (originating) side is that soisconnecting() is
97 * called during processing of connect() call,
98 * resulting in an eventual call to soisconnected() if/when the
99 * connection is established. When the connection is torn down
100 * soisdisconnecting() is called during processing of disconnect() call,
101 * and soisdisconnected() is called when the connection to the peer
102 * is totally severed. The semantics of these routines are such that
103 * connectionless protocols can call soisconnected() and soisdisconnected()
104 * only, bypassing the in-progress calls when setting up a ``connection''
105 * takes no time.
106 *
107 * From the passive side, a socket is created with
108 * two queues of sockets: so_incomp for connections in progress
109 * and so_comp for connections already made and awaiting user acceptance.
110 * As a protocol is preparing incoming connections, it creates a socket
111 * structure queued on so_incomp by calling sonewconn(). When the connection
112 * is established, soisconnected() is called, and transfers the
113 * socket structure to so_comp, making it available to accept().
114 *
115 * If a socket is closed with sockets on either
116 * so_incomp or so_comp, these sockets are dropped.
117 *
118 * If higher level protocols are implemented in
119 * the kernel, the wakeups done here will sometimes
120 * cause software-interrupt process scheduling.
121 */
122 void
123 soisconnecting(so)
124 register struct socket *so;
125 {
126
127 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
128 so->so_state |= SS_ISCONNECTING;
129
130 sflt_notify(so, sock_evt_connecting, NULL);
131 }
132
133 void
134 soisconnected(so)
135 struct socket *so;
136 {
137 struct socket *head = so->so_head;
138
139 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
140 so->so_state |= SS_ISCONNECTED;
141
142 sflt_notify(so, sock_evt_connected, NULL);
143
144 if (head && (so->so_state & SS_INCOMP)) {
145 so->so_state &= ~SS_INCOMP;
146 so->so_state |= SS_COMP;
147 if (head->so_proto->pr_getlock != NULL) {
148 socket_unlock(so, 0);
149 socket_lock(head, 1);
150 }
151 postevent(head, 0, EV_RCONN);
152 TAILQ_REMOVE(&head->so_incomp, so, so_list);
153 head->so_incqlen--;
154 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
155 sorwakeup(head);
156 wakeup_one((caddr_t)&head->so_timeo);
157 if (head->so_proto->pr_getlock != NULL) {
158 socket_unlock(head, 1);
159 socket_lock(so, 0);
160 }
161 } else {
162 postevent(so, 0, EV_WCONN);
163 wakeup((caddr_t)&so->so_timeo);
164 sorwakeup(so);
165 sowwakeup(so);
166 }
167 }
168
169 void
170 soisdisconnecting(so)
171 register struct socket *so;
172 {
173 so->so_state &= ~SS_ISCONNECTING;
174 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
175 sflt_notify(so, sock_evt_disconnecting, NULL);
176 wakeup((caddr_t)&so->so_timeo);
177 sowwakeup(so);
178 sorwakeup(so);
179 }
180
181 void
182 soisdisconnected(so)
183 register struct socket *so;
184 {
185 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
186 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
187 sflt_notify(so, sock_evt_disconnected, NULL);
188 wakeup((caddr_t)&so->so_timeo);
189 sowwakeup(so);
190 sorwakeup(so);
191 }
192
193 /*
194 * Return a random connection that hasn't been serviced yet and
195 * is eligible for discard. There is a one in qlen chance that
196 * we will return a null, saying that there are no dropable
197 * requests. In this case, the protocol specific code should drop
198 * the new request. This insures fairness.
199 *
200 * This may be used in conjunction with protocol specific queue
201 * congestion routines.
202 */
203 struct socket *
204 sodropablereq(head)
205 register struct socket *head;
206 {
207 struct socket *so, *sonext = NULL;
208 unsigned int i, j, qlen;
209 static int rnd;
210 static struct timeval old_runtime;
211 static unsigned int cur_cnt, old_cnt;
212 struct timeval tv;
213
214 microtime(&tv);
215 if ((i = (tv.tv_sec - old_runtime.tv_sec)) != 0) {
216 old_runtime = tv;
217 old_cnt = cur_cnt / i;
218 cur_cnt = 0;
219 }
220
221 so = TAILQ_FIRST(&head->so_incomp);
222 if (!so)
223 return (NULL);
224
225 qlen = head->so_incqlen;
226 if (++cur_cnt > qlen || old_cnt > qlen) {
227 rnd = (314159 * rnd + 66329) & 0xffff;
228 j = ((qlen + 1) * rnd) >> 16;
229 //###LD To clean up
230 while (j-- && so) {
231 // if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
232 socket_lock(so, 1);
233 sonext = TAILQ_NEXT(so, so_list);
234 // in_pcb_check_state(so->so_pcb, WNT_RELEASE, 0);
235 socket_unlock(so, 1);
236 so = sonext;
237 }
238 }
239
240 // if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) == WNT_STOPUSING)
241 // return (NULL);
242 // else
243 return (so);
244 }
245
246 /*
247 * When an attempt at a new connection is noted on a socket
248 * which accepts connections, sonewconn is called. If the
249 * connection is possible (subject to space constraints, etc.)
250 * then we allocate a new structure, propoerly linked into the
251 * data structure of the original socket, and return this.
252 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
253 */
254 static struct socket *
255 sonewconn_internal(head, connstatus)
256 register struct socket *head;
257 int connstatus;
258 {
259 int error = 0;
260 register struct socket *so;
261 lck_mtx_t *mutex_held;
262
263 if (head->so_proto->pr_getlock != NULL)
264 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
265 else
266 mutex_held = head->so_proto->pr_domain->dom_mtx;
267 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
268
269 if (head->so_qlen > 3 * head->so_qlimit / 2)
270 return ((struct socket *)0);
271 so = soalloc(1, head->so_proto->pr_domain->dom_family, head->so_type);
272 if (so == NULL)
273 return ((struct socket *)0);
274 /* check if head was closed during the soalloc */
275 if (head->so_proto == NULL) {
276 sodealloc(so);
277 return ((struct socket *)0);
278 }
279
280 so->so_head = head;
281 so->so_type = head->so_type;
282 so->so_options = head->so_options &~ SO_ACCEPTCONN;
283 so->so_linger = head->so_linger;
284 so->so_state = head->so_state | SS_NOFDREF;
285 so->so_proto = head->so_proto;
286 so->so_timeo = head->so_timeo;
287 so->so_pgid = head->so_pgid;
288 so->so_uid = head->so_uid;
289 so->so_usecount = 1;
290
291 #ifdef __APPLE__
292 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
293 so->so_rcv.sb_so = so->so_snd.sb_so = so;
294 TAILQ_INIT(&so->so_evlist);
295 #endif
296
297 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
298 sflt_termsock(so);
299 sodealloc(so);
300 return ((struct socket *)0);
301 }
302
303 /*
304 * Must be done with head unlocked to avoid deadlock for protocol with per socket mutexes.
305 */
306 if (head->so_proto->pr_unlock)
307 socket_unlock(head, 0);
308 if (((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL) != 0) || error) {
309 sflt_termsock(so);
310 sodealloc(so);
311 if (head->so_proto->pr_unlock)
312 socket_lock(head, 0);
313 return ((struct socket *)0);
314 }
315 if (head->so_proto->pr_unlock)
316 socket_lock(head, 0);
317 #ifdef __APPLE__
318 so->so_proto->pr_domain->dom_refs++;
319 #endif
320
321 if (connstatus) {
322 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
323 so->so_state |= SS_COMP;
324 } else {
325 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
326 so->so_state |= SS_INCOMP;
327 head->so_incqlen++;
328 }
329 head->so_qlen++;
330
331 #ifdef __APPLE__
332 /* Attach socket filters for this protocol */
333 sflt_initsock(so);
334 #endif
335 if (connstatus) {
336 so->so_state |= connstatus;
337 sorwakeup(head);
338 wakeup((caddr_t)&head->so_timeo);
339 }
340 return (so);
341 }
342
343
344 struct socket *
345 sonewconn(
346 struct socket *head,
347 int connstatus,
348 const struct sockaddr *from)
349 {
350 int error = 0;
351 struct socket_filter_entry *filter;
352 int filtered = 0;
353
354 error = 0;
355 for (filter = head->so_filt; filter && (error == 0);
356 filter = filter->sfe_next_onsocket) {
357 if (filter->sfe_filter->sf_filter.sf_connect_in) {
358 if (filtered == 0) {
359 filtered = 1;
360 sflt_use(head);
361 socket_unlock(head, 0);
362 }
363 error = filter->sfe_filter->sf_filter.sf_connect_in(
364 filter->sfe_cookie, head, from);
365 }
366 }
367 if (filtered != 0) {
368 socket_lock(head, 0);
369 sflt_unuse(head);
370 }
371
372 if (error) {
373 return NULL;
374 }
375
376 return sonewconn_internal(head, connstatus);
377 }
378
379 /*
380 * Socantsendmore indicates that no more data will be sent on the
381 * socket; it would normally be applied to a socket when the user
382 * informs the system that no more data is to be sent, by the protocol
383 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
384 * will be received, and will normally be applied to the socket by a
385 * protocol when it detects that the peer will send no more data.
386 * Data queued for reading in the socket may yet be read.
387 */
388
389 void
390 socantsendmore(so)
391 struct socket *so;
392 {
393 so->so_state |= SS_CANTSENDMORE;
394 sflt_notify(so, sock_evt_cantsendmore, NULL);
395 sowwakeup(so);
396 }
397
398 void
399 socantrcvmore(so)
400 struct socket *so;
401 {
402 so->so_state |= SS_CANTRCVMORE;
403 sflt_notify(so, sock_evt_cantrecvmore, NULL);
404 sorwakeup(so);
405 }
406
407 /*
408 * Wait for data to arrive at/drain from a socket buffer.
409 */
410 int
411 sbwait(sb)
412 struct sockbuf *sb;
413 {
414 int error = 0, lr, lr_saved;
415 struct socket *so = sb->sb_so;
416 lck_mtx_t *mutex_held;
417 struct timespec ts;
418
419 #ifdef __ppc__
420 __asm__ volatile("mflr %0" : "=r" (lr));
421 lr_saved = lr;
422 #endif
423
424
425 if (so->so_proto->pr_getlock != NULL)
426 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
427 else
428 mutex_held = so->so_proto->pr_domain->dom_mtx;
429
430 sb->sb_flags |= SB_WAIT;
431
432 if (so->so_usecount < 1)
433 panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount);
434 ts.tv_sec = sb->sb_timeo.tv_sec;
435 ts.tv_nsec = sb->sb_timeo.tv_usec * 1000;
436 error = msleep((caddr_t)&sb->sb_cc, mutex_held,
437 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
438 &ts);
439
440 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
441
442 if (so->so_usecount < 1)
443 panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount);
444
445 if ((so->so_state & SS_DRAINING)) {
446 error = EBADF;
447 }
448
449 return (error);
450 }
451
452 /*
453 * Lock a sockbuf already known to be locked;
454 * return any error returned from sleep (EINTR).
455 */
456 int
457 sb_lock(sb)
458 register struct sockbuf *sb;
459 {
460 struct socket *so = sb->sb_so;
461 lck_mtx_t * mutex_held;
462 int error = 0, lr, lr_saved;
463
464 #ifdef __ppc__
465 __asm__ volatile("mflr %0" : "=r" (lr));
466 lr_saved = lr;
467 #endif
468
469 if (so == NULL)
470 panic("sb_lock: null so back pointer sb=%x\n", sb);
471
472 while (sb->sb_flags & SB_LOCK) {
473 sb->sb_flags |= SB_WANT;
474 if (so->so_proto->pr_getlock != NULL)
475 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
476 else
477 mutex_held = so->so_proto->pr_domain->dom_mtx;
478 if (so->so_usecount < 1)
479 panic("sb_lock: so=%x refcount=%d\n", so, so->so_usecount);
480 error = msleep((caddr_t)&sb->sb_flags, mutex_held,
481 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sblock", 0);
482 if (so->so_usecount < 1)
483 panic("sb_lock: 2 so=%x refcount=%d\n", so, so->so_usecount);
484 if (error)
485 return (error);
486 }
487 sb->sb_flags |= SB_LOCK;
488 return (0);
489 }
490
491 /*
492 * Wakeup processes waiting on a socket buffer.
493 * Do asynchronous notification via SIGIO
494 * if the socket has the SS_ASYNC flag set.
495 */
496 void
497 sowakeup(so, sb)
498 register struct socket *so;
499 register struct sockbuf *sb;
500 {
501 struct proc *p = current_proc();
502 sb->sb_flags &= ~SB_SEL;
503 selwakeup(&sb->sb_sel);
504 if (sb->sb_flags & SB_WAIT) {
505 sb->sb_flags &= ~SB_WAIT;
506 wakeup((caddr_t)&sb->sb_cc);
507 }
508 if (so->so_state & SS_ASYNC) {
509 if (so->so_pgid < 0)
510 gsignal(-so->so_pgid, SIGIO);
511 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
512 psignal(p, SIGIO);
513 }
514 if (sb->sb_flags & SB_KNOTE) {
515 KNOTE(&sb->sb_sel.si_note, SO_FILT_HINT_LOCKED);
516 }
517 if (sb->sb_flags & SB_UPCALL) {
518 socket_unlock(so, 0);
519 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
520 socket_lock(so, 0);
521 }
522 }
523
524 /*
525 * Socket buffer (struct sockbuf) utility routines.
526 *
527 * Each socket contains two socket buffers: one for sending data and
528 * one for receiving data. Each buffer contains a queue of mbufs,
529 * information about the number of mbufs and amount of data in the
530 * queue, and other fields allowing select() statements and notification
531 * on data availability to be implemented.
532 *
533 * Data stored in a socket buffer is maintained as a list of records.
534 * Each record is a list of mbufs chained together with the m_next
535 * field. Records are chained together with the m_nextpkt field. The upper
536 * level routine soreceive() expects the following conventions to be
537 * observed when placing information in the receive buffer:
538 *
539 * 1. If the protocol requires each message be preceded by the sender's
540 * name, then a record containing that name must be present before
541 * any associated data (mbuf's must be of type MT_SONAME).
542 * 2. If the protocol supports the exchange of ``access rights'' (really
543 * just additional data associated with the message), and there are
544 * ``rights'' to be received, then a record containing this data
545 * should be present (mbuf's must be of type MT_RIGHTS).
546 * 3. If a name or rights record exists, then it must be followed by
547 * a data record, perhaps of zero length.
548 *
549 * Before using a new socket structure it is first necessary to reserve
550 * buffer space to the socket, by calling sbreserve(). This should commit
551 * some of the available buffer space in the system buffer pool for the
552 * socket (currently, it does nothing but enforce limits). The space
553 * should be released by calling sbrelease() when the socket is destroyed.
554 */
555
556 int
557 soreserve(so, sndcc, rcvcc)
558 register struct socket *so;
559 u_long sndcc, rcvcc;
560 {
561
562 if (sbreserve(&so->so_snd, sndcc) == 0)
563 goto bad;
564 if (sbreserve(&so->so_rcv, rcvcc) == 0)
565 goto bad2;
566 if (so->so_rcv.sb_lowat == 0)
567 so->so_rcv.sb_lowat = 1;
568 if (so->so_snd.sb_lowat == 0)
569 so->so_snd.sb_lowat = MCLBYTES;
570 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
571 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
572 return (0);
573 bad2:
574 #ifdef __APPLE__
575 selthreadclear(&so->so_snd.sb_sel);
576 #endif
577 sbrelease(&so->so_snd);
578 bad:
579 return (ENOBUFS);
580 }
581
582 /*
583 * Allot mbufs to a sockbuf.
584 * Attempt to scale mbmax so that mbcnt doesn't become limiting
585 * if buffering efficiency is near the normal case.
586 */
587 int
588 sbreserve(sb, cc)
589 struct sockbuf *sb;
590 u_long cc;
591 {
592 if ((u_quad_t)cc > (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES))
593 return (0);
594 sb->sb_hiwat = cc;
595 sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
596 if (sb->sb_lowat > sb->sb_hiwat)
597 sb->sb_lowat = sb->sb_hiwat;
598 return (1);
599 }
600
601 /*
602 * Free mbufs held by a socket, and reserved mbuf space.
603 */
604 /* WARNING needs to do selthreadclear() before calling this */
605 void
606 sbrelease(sb)
607 struct sockbuf *sb;
608 {
609
610 sbflush(sb);
611 sb->sb_hiwat = 0;
612 sb->sb_mbmax = 0;
613
614 }
615
616 /*
617 * Routines to add and remove
618 * data from an mbuf queue.
619 *
620 * The routines sbappend() or sbappendrecord() are normally called to
621 * append new mbufs to a socket buffer, after checking that adequate
622 * space is available, comparing the function sbspace() with the amount
623 * of data to be added. sbappendrecord() differs from sbappend() in
624 * that data supplied is treated as the beginning of a new record.
625 * To place a sender's address, optional access rights, and data in a
626 * socket receive buffer, sbappendaddr() should be used. To place
627 * access rights and data in a socket receive buffer, sbappendrights()
628 * should be used. In either case, the new data begins a new record.
629 * Note that unlike sbappend() and sbappendrecord(), these routines check
630 * for the caller that there will be enough space to store the data.
631 * Each fails if there is not enough space, or if it cannot find mbufs
632 * to store additional information in.
633 *
634 * Reliable protocols may use the socket send buffer to hold data
635 * awaiting acknowledgement. Data is normally copied from a socket
636 * send buffer in a protocol with m_copy for output to a peer,
637 * and then removing the data from the socket buffer with sbdrop()
638 * or sbdroprecord() when the data is acknowledged by the peer.
639 */
640
641 /*
642 * Append mbuf chain m to the last record in the
643 * socket buffer sb. The additional space associated
644 * the mbuf chain is recorded in sb. Empty mbufs are
645 * discarded and mbufs are compacted where possible.
646 */
647 int
648 sbappend(sb, m)
649 struct sockbuf *sb;
650 struct mbuf *m;
651 {
652 register struct mbuf *n, *sb_first;
653 int result = 0;
654 int error = 0;
655 int filtered = 0;
656
657
658 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_START), sb, m->m_len, 0, 0, 0);
659
660 if (m == 0)
661 return 0;
662
663 again:
664 sb_first = n = sb->sb_mb;
665 if (n) {
666 while (n->m_nextpkt)
667 n = n->m_nextpkt;
668 do {
669 if (n->m_flags & M_EOR) {
670 result = sbappendrecord(sb, m); /* XXXXXX!!!! */
671 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
672 return result;
673 }
674 } while (n->m_next && (n = n->m_next));
675 }
676
677 if (!filtered && (sb->sb_flags & SB_RECV) != 0) {
678 error = sflt_data_in(sb->sb_so, NULL, &m, NULL, 0, &filtered);
679 if (error) {
680 /* no data was appended, caller should not call sowakeup */
681 return 0;
682 }
683
684 /*
685 If we any filters, the socket lock was dropped. n and sb_first
686 cached data from the socket buffer. This cache is not valid
687 since we dropped the lock. We must start over. Since filtered
688 is set we won't run through the filters a second time. We just
689 set n and sb_start again.
690 */
691 if (filtered)
692 goto again;
693 }
694
695 result = sbcompress(sb, m, n);
696
697 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
698
699 return result;
700 }
701
702 #ifdef SOCKBUF_DEBUG
703 void
704 sbcheck(sb)
705 register struct sockbuf *sb;
706 {
707 register struct mbuf *m;
708 register struct mbuf *n = 0;
709 register u_long len = 0, mbcnt = 0;
710 lck_mtx_t *mutex_held;
711
712 if (sb->sb_so->so_proto->pr_getlock != NULL)
713 mutex_held = (*sb->sb_so->so_proto->pr_getlock)(sb->sb_so, 0);
714 else
715 mutex_held = sb->sb_so->so_proto->pr_domain->dom_mtx;
716
717 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
718
719 if (sbchecking == 0)
720 return;
721
722 for (m = sb->sb_mb; m; m = n) {
723 n = m->m_nextpkt;
724 for (; m; m = m->m_next) {
725 len += m->m_len;
726 mbcnt += MSIZE;
727 if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
728 mbcnt += m->m_ext.ext_size;
729 }
730 }
731 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
732 panic("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
733 mbcnt, sb->sb_mbcnt);
734 }
735 }
736 #endif
737
738 /*
739 * As above, except the mbuf chain
740 * begins a new record.
741 */
742 int
743 sbappendrecord(sb, m0)
744 register struct sockbuf *sb;
745 register struct mbuf *m0;
746 {
747 register struct mbuf *m;
748 int result = 0;
749
750 if (m0 == 0)
751 return 0;
752
753 if ((sb->sb_flags & SB_RECV) != 0) {
754 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL, sock_data_filt_flag_record, NULL);
755 if (error != 0) {
756 if (error != EJUSTRETURN)
757 m_freem(m0);
758 return 0;
759 }
760 }
761
762 m = sb->sb_mb;
763 if (m)
764 while (m->m_nextpkt)
765 m = m->m_nextpkt;
766 /*
767 * Put the first mbuf on the queue.
768 * Note this permits zero length records.
769 */
770 sballoc(sb, m0);
771 if (m)
772 m->m_nextpkt = m0;
773 else
774 sb->sb_mb = m0;
775 m = m0->m_next;
776 m0->m_next = 0;
777 if (m && (m0->m_flags & M_EOR)) {
778 m0->m_flags &= ~M_EOR;
779 m->m_flags |= M_EOR;
780 }
781 return sbcompress(sb, m, m0);
782 }
783
784 /*
785 * As above except that OOB data
786 * is inserted at the beginning of the sockbuf,
787 * but after any other OOB data.
788 */
789 int
790 sbinsertoob(sb, m0)
791 struct sockbuf *sb;
792 struct mbuf *m0;
793 {
794 struct mbuf *m;
795 struct mbuf **mp;
796
797 if (m0 == 0)
798 return 0;
799
800 if ((sb->sb_flags & SB_RECV) != 0) {
801 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL,
802 sock_data_filt_flag_oob, NULL);
803
804 if (error) {
805 if (error != EJUSTRETURN) {
806 m_freem(m0);
807 }
808 return 0;
809 }
810 }
811
812 for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) {
813 m = *mp;
814 again:
815 switch (m->m_type) {
816
817 case MT_OOBDATA:
818 continue; /* WANT next train */
819
820 case MT_CONTROL:
821 m = m->m_next;
822 if (m)
823 goto again; /* inspect THIS train further */
824 }
825 break;
826 }
827 /*
828 * Put the first mbuf on the queue.
829 * Note this permits zero length records.
830 */
831 sballoc(sb, m0);
832 m0->m_nextpkt = *mp;
833 *mp = m0;
834 m = m0->m_next;
835 m0->m_next = 0;
836 if (m && (m0->m_flags & M_EOR)) {
837 m0->m_flags &= ~M_EOR;
838 m->m_flags |= M_EOR;
839 }
840 return sbcompress(sb, m, m0);
841 }
842
843 /*
844 * Append address and data, and optionally, control (ancillary) data
845 * to the receive queue of a socket. If present,
846 * m0 must include a packet header with total length.
847 * Returns 0 if no space in sockbuf or insufficient mbufs.
848 */
849 static int
850 sbappendaddr_internal(sb, asa, m0, control)
851 register struct sockbuf *sb;
852 struct sockaddr *asa;
853 struct mbuf *m0, *control;
854 {
855 register struct mbuf *m, *n;
856 int space = asa->sa_len;
857
858 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
859 panic("sbappendaddr");
860
861 if (m0)
862 space += m0->m_pkthdr.len;
863 for (n = control; n; n = n->m_next) {
864 space += n->m_len;
865 if (n->m_next == 0) /* keep pointer to last control buf */
866 break;
867 }
868 if (space > sbspace(sb))
869 return (0);
870 if (asa->sa_len > MLEN)
871 return (0);
872 MGET(m, M_DONTWAIT, MT_SONAME);
873 if (m == 0)
874 return (0);
875 m->m_len = asa->sa_len;
876 bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
877 if (n)
878 n->m_next = m0; /* concatenate data to control */
879 else
880 control = m0;
881 m->m_next = control;
882 for (n = m; n; n = n->m_next)
883 sballoc(sb, n);
884 n = sb->sb_mb;
885 if (n) {
886 while (n->m_nextpkt)
887 n = n->m_nextpkt;
888 n->m_nextpkt = m;
889 } else
890 sb->sb_mb = m;
891 postevent(0,sb,EV_RWBYTES);
892 return (1);
893 }
894
895 int
896 sbappendaddr(
897 struct sockbuf* sb,
898 struct sockaddr* asa,
899 struct mbuf *m0,
900 struct mbuf *control,
901 int *error_out)
902 {
903 int result = 0;
904
905 if (error_out) *error_out = 0;
906
907 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
908 panic("sbappendaddrorfree");
909
910 /* Call socket data in filters */
911 if ((sb->sb_flags & SB_RECV) != 0) {
912 int error;
913 error = sflt_data_in(sb->sb_so, asa, &m0, &control, 0, NULL);
914 if (error) {
915 if (error != EJUSTRETURN) {
916 if (m0) m_freem(m0);
917 if (control) m_freem(control);
918 if (error_out) *error_out = error;
919 }
920 return 0;
921 }
922 }
923
924 result = sbappendaddr_internal(sb, asa, m0, control);
925 if (result == 0) {
926 if (m0) m_freem(m0);
927 if (control) m_freem(control);
928 if (error_out) *error_out = ENOBUFS;
929 }
930
931 return result;
932 }
933
934 static int
935 sbappendcontrol_internal(sb, m0, control)
936 struct sockbuf *sb;
937 struct mbuf *control, *m0;
938 {
939 register struct mbuf *m, *n;
940 int space = 0;
941
942 if (control == 0)
943 panic("sbappendcontrol");
944
945 for (m = control; ; m = m->m_next) {
946 space += m->m_len;
947 if (m->m_next == 0)
948 break;
949 }
950 n = m; /* save pointer to last control buffer */
951 for (m = m0; m; m = m->m_next)
952 space += m->m_len;
953 if (space > sbspace(sb))
954 return (0);
955 n->m_next = m0; /* concatenate data to control */
956 for (m = control; m; m = m->m_next)
957 sballoc(sb, m);
958 n = sb->sb_mb;
959 if (n) {
960 while (n->m_nextpkt)
961 n = n->m_nextpkt;
962 n->m_nextpkt = control;
963 } else
964 sb->sb_mb = control;
965 postevent(0,sb,EV_RWBYTES);
966 return (1);
967 }
968
969 int
970 sbappendcontrol(
971 struct sockbuf *sb,
972 struct mbuf *m0,
973 struct mbuf *control,
974 int *error_out)
975 {
976 int result = 0;
977
978 if (error_out) *error_out = 0;
979
980 if (sb->sb_flags & SB_RECV) {
981 int error;
982 error = sflt_data_in(sb->sb_so, NULL, &m0, &control, 0, NULL);
983 if (error) {
984 if (error != EJUSTRETURN) {
985 if (m0) m_freem(m0);
986 if (control) m_freem(control);
987 if (error_out) *error_out = error;
988 }
989 return 0;
990 }
991 }
992
993 result = sbappendcontrol_internal(sb, m0, control);
994 if (result == 0) {
995 if (m0) m_freem(m0);
996 if (control) m_freem(control);
997 if (error_out) *error_out = ENOBUFS;
998 }
999
1000 return result;
1001 }
1002
1003 /*
1004 * Compress mbuf chain m into the socket
1005 * buffer sb following mbuf n. If n
1006 * is null, the buffer is presumed empty.
1007 */
1008 static int
1009 sbcompress(sb, m, n)
1010 register struct sockbuf *sb;
1011 register struct mbuf *m, *n;
1012 {
1013 register int eor = 0;
1014 register struct mbuf *o;
1015
1016 while (m) {
1017 eor |= m->m_flags & M_EOR;
1018 if (m->m_len == 0 &&
1019 (eor == 0 ||
1020 (((o = m->m_next) || (o = n)) &&
1021 o->m_type == m->m_type))) {
1022 m = m_free(m);
1023 continue;
1024 }
1025 if (n && (n->m_flags & M_EOR) == 0 &&
1026 #ifndef __APPLE__
1027 M_WRITABLE(n) &&
1028 #endif
1029 m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
1030 m->m_len <= M_TRAILINGSPACE(n) &&
1031 n->m_type == m->m_type) {
1032 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
1033 (unsigned)m->m_len);
1034 n->m_len += m->m_len;
1035 sb->sb_cc += m->m_len;
1036 m = m_free(m);
1037 continue;
1038 }
1039 if (n)
1040 n->m_next = m;
1041 else
1042 sb->sb_mb = m;
1043 sballoc(sb, m);
1044 n = m;
1045 m->m_flags &= ~M_EOR;
1046 m = m->m_next;
1047 n->m_next = 0;
1048 }
1049 if (eor) {
1050 if (n)
1051 n->m_flags |= eor;
1052 else
1053 printf("semi-panic: sbcompress\n");
1054 }
1055 postevent(0,sb, EV_RWBYTES);
1056 return 1;
1057 }
1058
1059 /*
1060 * Free all mbufs in a sockbuf.
1061 * Check that all resources are reclaimed.
1062 */
1063 void
1064 sbflush(sb)
1065 register struct sockbuf *sb;
1066 {
1067 if (sb->sb_so == NULL)
1068 panic ("sbflush sb->sb_so already null sb=%x\n", sb);
1069 (void)sblock(sb, M_WAIT);
1070 while (sb->sb_mbcnt) {
1071 /*
1072 * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
1073 * we would loop forever. Panic instead.
1074 */
1075 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
1076 break;
1077 sbdrop(sb, (int)sb->sb_cc);
1078 }
1079 if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt || sb->sb_so == NULL)
1080 panic("sbflush: cc %ld || mb %p || mbcnt %ld sb_so=%x", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt, sb->sb_so);
1081
1082 postevent(0, sb, EV_RWBYTES);
1083 sbunlock(sb, 1); /* keep socket locked */
1084
1085 }
1086
1087 /*
1088 * Drop data from (the front of) a sockbuf.
1089 * use m_freem_list to free the mbuf structures
1090 * under a single lock... this is done by pruning
1091 * the top of the tree from the body by keeping track
1092 * of where we get to in the tree and then zeroing the
1093 * two pertinent pointers m_nextpkt and m_next
1094 * the socket buffer is then updated to point at the new
1095 * top of the tree and the pruned area is released via
1096 * m_freem_list.
1097 */
1098 void
1099 sbdrop(sb, len)
1100 register struct sockbuf *sb;
1101 register int len;
1102 {
1103 register struct mbuf *m, *free_list, *ml;
1104 struct mbuf *next, *last;
1105
1106 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_START), sb, len, 0, 0, 0);
1107
1108 next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
1109 free_list = last = m;
1110 ml = (struct mbuf *)0;
1111
1112 while (len > 0) {
1113 if (m == 0) {
1114 if (next == 0) {
1115 /* temporarily replacing this panic with printf because
1116 * it occurs occasionally when closing a socket when there
1117 * is no harm in ignoring it. This problem will be investigated
1118 * further.
1119 */
1120 /* panic("sbdrop"); */
1121 printf("sbdrop - count not zero\n");
1122 len = 0;
1123 /* zero the counts. if we have no mbufs, we have no data (PR-2986815) */
1124 sb->sb_cc = 0;
1125 sb->sb_mbcnt = 0;
1126 break;
1127 }
1128 m = last = next;
1129 next = m->m_nextpkt;
1130 continue;
1131 }
1132 if (m->m_len > len) {
1133 m->m_len -= len;
1134 m->m_data += len;
1135 sb->sb_cc -= len;
1136 break;
1137 }
1138 len -= m->m_len;
1139 sbfree(sb, m);
1140
1141 ml = m;
1142 m = m->m_next;
1143 }
1144 while (m && m->m_len == 0) {
1145 sbfree(sb, m);
1146
1147 ml = m;
1148 m = m->m_next;
1149 }
1150 if (ml) {
1151 ml->m_next = (struct mbuf *)0;
1152 last->m_nextpkt = (struct mbuf *)0;
1153 m_freem_list(free_list);
1154 }
1155 if (m) {
1156 sb->sb_mb = m;
1157 m->m_nextpkt = next;
1158 } else
1159 sb->sb_mb = next;
1160
1161 postevent(0, sb, EV_RWBYTES);
1162
1163 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_END), sb, 0, 0, 0, 0);
1164 }
1165
1166 /*
1167 * Drop a record off the front of a sockbuf
1168 * and move the next record to the front.
1169 */
1170 void
1171 sbdroprecord(sb)
1172 register struct sockbuf *sb;
1173 {
1174 register struct mbuf *m, *mn;
1175
1176 m = sb->sb_mb;
1177 if (m) {
1178 sb->sb_mb = m->m_nextpkt;
1179 do {
1180 sbfree(sb, m);
1181 MFREE(m, mn);
1182 m = mn;
1183 } while (m);
1184 }
1185 postevent(0, sb, EV_RWBYTES);
1186 }
1187
1188 /*
1189 * Create a "control" mbuf containing the specified data
1190 * with the specified type for presentation on a socket buffer.
1191 */
1192 struct mbuf *
1193 sbcreatecontrol(p, size, type, level)
1194 caddr_t p;
1195 register int size;
1196 int type, level;
1197 {
1198 register struct cmsghdr *cp;
1199 struct mbuf *m;
1200
1201 if (CMSG_SPACE((u_int)size) > MLEN)
1202 return ((struct mbuf *) NULL);
1203 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
1204 return ((struct mbuf *) NULL);
1205 cp = mtod(m, struct cmsghdr *);
1206 /* XXX check size? */
1207 (void)memcpy(CMSG_DATA(cp), p, size);
1208 m->m_len = CMSG_SPACE(size);
1209 cp->cmsg_len = CMSG_LEN(size);
1210 cp->cmsg_level = level;
1211 cp->cmsg_type = type;
1212 return (m);
1213 }
1214
1215 /*
1216 * Some routines that return EOPNOTSUPP for entry points that are not
1217 * supported by a protocol. Fill in as needed.
1218 */
1219 int
1220 pru_abort_notsupp(struct socket *so)
1221 {
1222 return EOPNOTSUPP;
1223 }
1224
1225
1226 int
1227 pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
1228 {
1229 return EOPNOTSUPP;
1230 }
1231
1232 int
1233 pru_attach_notsupp(struct socket *so, int proto, struct proc *p)
1234 {
1235 return EOPNOTSUPP;
1236 }
1237
1238 int
1239 pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1240 {
1241 return EOPNOTSUPP;
1242 }
1243
1244 int
1245 pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1246 {
1247 return EOPNOTSUPP;
1248 }
1249
1250 int
1251 pru_connect2_notsupp(struct socket *so1, struct socket *so2)
1252 {
1253 return EOPNOTSUPP;
1254 }
1255
1256 int
1257 pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
1258 struct ifnet *ifp, struct proc *p)
1259 {
1260 return EOPNOTSUPP;
1261 }
1262
1263 int
1264 pru_detach_notsupp(struct socket *so)
1265 {
1266 return EOPNOTSUPP;
1267 }
1268
1269 int
1270 pru_disconnect_notsupp(struct socket *so)
1271 {
1272 return EOPNOTSUPP;
1273 }
1274
1275 int
1276 pru_listen_notsupp(struct socket *so, struct proc *p)
1277 {
1278 return EOPNOTSUPP;
1279 }
1280
1281 int
1282 pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
1283 {
1284 return EOPNOTSUPP;
1285 }
1286
1287 int
1288 pru_rcvd_notsupp(struct socket *so, int flags)
1289 {
1290 return EOPNOTSUPP;
1291 }
1292
1293 int
1294 pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
1295 {
1296 return EOPNOTSUPP;
1297 }
1298
1299 int
1300 pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
1301 struct sockaddr *addr, struct mbuf *control,
1302 struct proc *p)
1303
1304 {
1305 return EOPNOTSUPP;
1306 }
1307
1308
1309 /*
1310 * This isn't really a ``null'' operation, but it's the default one
1311 * and doesn't do anything destructive.
1312 */
1313 int
1314 pru_sense_null(struct socket *so, struct stat *sb)
1315 {
1316 sb->st_blksize = so->so_snd.sb_hiwat;
1317 return 0;
1318 }
1319
1320
1321 int pru_sosend_notsupp(struct socket *so, struct sockaddr *addr,
1322 struct uio *uio, struct mbuf *top,
1323 struct mbuf *control, int flags)
1324
1325 {
1326 return EOPNOTSUPP;
1327 }
1328
1329 int pru_soreceive_notsupp(struct socket *so,
1330 struct sockaddr **paddr,
1331 struct uio *uio, struct mbuf **mp0,
1332 struct mbuf **controlp, int *flagsp)
1333 {
1334 return EOPNOTSUPP;
1335 }
1336
1337 int
1338
1339 pru_shutdown_notsupp(struct socket *so)
1340 {
1341 return EOPNOTSUPP;
1342 }
1343
1344 int
1345 pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
1346 {
1347 return EOPNOTSUPP;
1348 }
1349
1350 int pru_sosend(struct socket *so, struct sockaddr *addr,
1351 struct uio *uio, struct mbuf *top,
1352 struct mbuf *control, int flags)
1353 {
1354 return EOPNOTSUPP;
1355 }
1356
1357 int pru_soreceive(struct socket *so,
1358 struct sockaddr **paddr,
1359 struct uio *uio, struct mbuf **mp0,
1360 struct mbuf **controlp, int *flagsp)
1361 {
1362 return EOPNOTSUPP;
1363 }
1364
1365
1366 int
1367 pru_sopoll_notsupp(__unused struct socket *so, __unused int events,
1368 __unused kauth_cred_t cred, __unused void *wql)
1369 {
1370 return EOPNOTSUPP;
1371 }
1372
1373
1374 #ifdef __APPLE__
1375 /*
1376 * The following are macros on BSD and functions on Darwin
1377 */
1378
1379 /*
1380 * Do we need to notify the other side when I/O is possible?
1381 */
1382
1383 int
1384 sb_notify(struct sockbuf *sb)
1385 {
1386 return ((sb->sb_flags & (SB_WAIT|SB_SEL|SB_ASYNC|SB_UPCALL|SB_KNOTE)) != 0);
1387 }
1388
1389 /*
1390 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
1391 * This is problematical if the fields are unsigned, as the space might
1392 * still be negative (cc > hiwat or mbcnt > mbmax). Should detect
1393 * overflow and return 0. Should use "lmin" but it doesn't exist now.
1394 */
1395 long
1396 sbspace(struct sockbuf *sb)
1397 {
1398 return ((long) imin((int)(sb->sb_hiwat - sb->sb_cc),
1399 (int)(sb->sb_mbmax - sb->sb_mbcnt)));
1400 }
1401
1402 /* do we have to send all at once on a socket? */
1403 int
1404 sosendallatonce(struct socket *so)
1405 {
1406 return (so->so_proto->pr_flags & PR_ATOMIC);
1407 }
1408
1409 /* can we read something from so? */
1410 int
1411 soreadable(struct socket *so)
1412 {
1413 return (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
1414 (so->so_state & SS_CANTRCVMORE) ||
1415 so->so_comp.tqh_first || so->so_error);
1416 }
1417
1418 /* can we write something to so? */
1419
1420 int
1421 sowriteable(struct socket *so)
1422 {
1423 return ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat &&
1424 ((so->so_state&SS_ISCONNECTED) ||
1425 (so->so_proto->pr_flags&PR_CONNREQUIRED)==0)) ||
1426 (so->so_state & SS_CANTSENDMORE) ||
1427 so->so_error);
1428 }
1429
1430 /* adjust counters in sb reflecting allocation of m */
1431
1432 void
1433 sballoc(struct sockbuf *sb, struct mbuf *m)
1434 {
1435 sb->sb_cc += m->m_len;
1436 sb->sb_mbcnt += MSIZE;
1437 if (m->m_flags & M_EXT)
1438 sb->sb_mbcnt += m->m_ext.ext_size;
1439 }
1440
1441 /* adjust counters in sb reflecting freeing of m */
1442 void
1443 sbfree(struct sockbuf *sb, struct mbuf *m)
1444 {
1445 sb->sb_cc -= m->m_len;
1446 sb->sb_mbcnt -= MSIZE;
1447 if (m->m_flags & M_EXT)
1448 sb->sb_mbcnt -= m->m_ext.ext_size;
1449 }
1450
1451 /*
1452 * Set lock on sockbuf sb; sleep if lock is already held.
1453 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
1454 * Returns error without lock if sleep is interrupted.
1455 */
1456 int
1457 sblock(struct sockbuf *sb, int wf)
1458 {
1459 return(sb->sb_flags & SB_LOCK ?
1460 ((wf == M_WAIT) ? sb_lock(sb) : EWOULDBLOCK) :
1461 (sb->sb_flags |= SB_LOCK), 0);
1462 }
1463
1464 /* release lock on sockbuf sb */
1465 void
1466 sbunlock(struct sockbuf *sb, int keeplocked)
1467 {
1468 struct socket *so = sb->sb_so;
1469 int lr, lr_saved;
1470 lck_mtx_t *mutex_held;
1471
1472 #ifdef __ppc__
1473 __asm__ volatile("mflr %0" : "=r" (lr));
1474 lr_saved = lr;
1475 #endif
1476 sb->sb_flags &= ~SB_LOCK;
1477
1478 if (so->so_proto->pr_getlock != NULL)
1479 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
1480 else
1481 mutex_held = so->so_proto->pr_domain->dom_mtx;
1482
1483 if (keeplocked == 0)
1484 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
1485
1486 if (sb->sb_flags & SB_WANT) {
1487 sb->sb_flags &= ~SB_WANT;
1488 if (so->so_usecount < 0)
1489 panic("sbunlock: b4 wakeup so=%x ref=%d lr=%x sb_flags=%x\n", sb->sb_so, so->so_usecount, lr_saved, sb->sb_flags);
1490
1491 wakeup((caddr_t)&(sb)->sb_flags);
1492 }
1493 if (keeplocked == 0) { /* unlock on exit */
1494 so->so_usecount--;
1495 if (so->so_usecount < 0)
1496 panic("sbunlock: unlock on exit so=%x lr=%x sb_flags=%x\n", so, so->so_usecount,lr_saved, sb->sb_flags);
1497 so->reserved4= lr_saved;
1498 lck_mtx_unlock(mutex_held);
1499 }
1500 }
1501
1502 void
1503 sorwakeup(struct socket * so)
1504 {
1505 if (sb_notify(&so->so_rcv))
1506 sowakeup(so, &so->so_rcv);
1507 }
1508
1509 void
1510 sowwakeup(struct socket * so)
1511 {
1512 if (sb_notify(&so->so_snd))
1513 sowakeup(so, &so->so_snd);
1514 }
1515 #endif __APPLE__
1516
1517 /*
1518 * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
1519 */
1520 struct sockaddr *
1521 dup_sockaddr(sa, canwait)
1522 struct sockaddr *sa;
1523 int canwait;
1524 {
1525 struct sockaddr *sa2;
1526
1527 MALLOC(sa2, struct sockaddr *, sa->sa_len, M_SONAME,
1528 canwait ? M_WAITOK : M_NOWAIT);
1529 if (sa2)
1530 bcopy(sa, sa2, sa->sa_len);
1531 return sa2;
1532 }
1533
1534 /*
1535 * Create an external-format (``xsocket'') structure using the information
1536 * in the kernel-format socket structure pointed to by so. This is done
1537 * to reduce the spew of irrelevant information over this interface,
1538 * to isolate user code from changes in the kernel structure, and
1539 * potentially to provide information-hiding if we decide that
1540 * some of this information should be hidden from users.
1541 */
1542 void
1543 sotoxsocket(struct socket *so, struct xsocket *xso)
1544 {
1545 xso->xso_len = sizeof *xso;
1546 xso->xso_so = so;
1547 xso->so_type = so->so_type;
1548 xso->so_options = so->so_options;
1549 xso->so_linger = so->so_linger;
1550 xso->so_state = so->so_state;
1551 xso->so_pcb = so->so_pcb;
1552 if (so->so_proto) {
1553 xso->xso_protocol = so->so_proto->pr_protocol;
1554 xso->xso_family = so->so_proto->pr_domain->dom_family;
1555 }
1556 else
1557 xso->xso_protocol = xso->xso_family = 0;
1558 xso->so_qlen = so->so_qlen;
1559 xso->so_incqlen = so->so_incqlen;
1560 xso->so_qlimit = so->so_qlimit;
1561 xso->so_timeo = so->so_timeo;
1562 xso->so_error = so->so_error;
1563 xso->so_pgid = so->so_pgid;
1564 xso->so_oobmark = so->so_oobmark;
1565 sbtoxsockbuf(&so->so_snd, &xso->so_snd);
1566 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
1567 xso->so_uid = so->so_uid;
1568 }
1569
1570 /*
1571 * This does the same for sockbufs. Note that the xsockbuf structure,
1572 * since it is always embedded in a socket, does not include a self
1573 * pointer nor a length. We make this entry point public in case
1574 * some other mechanism needs it.
1575 */
1576 void
1577 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
1578 {
1579 xsb->sb_cc = sb->sb_cc;
1580 xsb->sb_hiwat = sb->sb_hiwat;
1581 xsb->sb_mbcnt = sb->sb_mbcnt;
1582 xsb->sb_mbmax = sb->sb_mbmax;
1583 xsb->sb_lowat = sb->sb_lowat;
1584 xsb->sb_flags = sb->sb_flags;
1585 xsb->sb_timeo = (u_long)(sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick;
1586 if (xsb->sb_timeo == 0 && sb->sb_timeo.tv_usec != 0)
1587 xsb->sb_timeo = 1;
1588 }
1589
1590 /*
1591 * Here is the definition of some of the basic objects in the kern.ipc
1592 * branch of the MIB.
1593 */
1594 SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
1595
1596 /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
1597 static int dummy;
1598 SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
1599
1600 SYSCTL_INT(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLFLAG_RW,
1601 &sb_max, 0, "Maximum socket buffer size");
1602 SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD,
1603 &maxsockets, 0, "Maximum number of sockets avaliable");
1604 SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
1605 &sb_efficiency, 0, "");
1606 SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, &nmbclusters, 0, "");
1607