]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_socket.c
10712d219b1aa1bf08a8baa46d4d80b38cc5b958
[apple/xnu.git] / bsd / kern / uipc_socket.c
1 /*
2 * Copyright (c) 2006 Apple Computer, Inc. All Rights Reserved.
3 *
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
14 * agreement.
15 *
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
18 * file.
19 *
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
27 *
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
29 */
30 /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
31 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
32 /*
33 * Copyright (c) 1982, 1986, 1988, 1990, 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
65 * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.16 2001/06/14 20:46:06 ume Exp $
66 */
67
68 #include <sys/param.h>
69 #include <sys/systm.h>
70 #include <sys/filedesc.h>
71 #include <sys/proc_internal.h>
72 #include <sys/kauth.h>
73 #include <sys/file_internal.h>
74 #include <sys/fcntl.h>
75 #include <sys/malloc.h>
76 #include <sys/mbuf.h>
77 #include <sys/domain.h>
78 #include <sys/kernel.h>
79 #include <sys/event.h>
80 #include <sys/poll.h>
81 #include <sys/protosw.h>
82 #include <sys/socket.h>
83 #include <sys/socketvar.h>
84 #include <sys/resourcevar.h>
85 #include <sys/signalvar.h>
86 #include <sys/sysctl.h>
87 #include <sys/uio.h>
88 #include <sys/ev.h>
89 #include <sys/kdebug.h>
90 #include <net/route.h>
91 #include <netinet/in.h>
92 #include <netinet/in_pcb.h>
93 #include <kern/zalloc.h>
94 #include <kern/locks.h>
95 #include <machine/limits.h>
96
97 int so_cache_hw = 0;
98 int so_cache_timeouts = 0;
99 int so_cache_max_freed = 0;
100 int cached_sock_count = 0;
101 struct socket *socket_cache_head = 0;
102 struct socket *socket_cache_tail = 0;
103 u_long so_cache_time = 0;
104 int so_cache_init_done = 0;
105 struct zone *so_cache_zone;
106 extern int get_inpcb_str_size();
107 extern int get_tcp_str_size();
108
109 static lck_grp_t *so_cache_mtx_grp;
110 static lck_attr_t *so_cache_mtx_attr;
111 static lck_grp_attr_t *so_cache_mtx_grp_attr;
112 lck_mtx_t *so_cache_mtx;
113
114 #include <machine/limits.h>
115
116 static void filt_sordetach(struct knote *kn);
117 static int filt_soread(struct knote *kn, long hint);
118 static void filt_sowdetach(struct knote *kn);
119 static int filt_sowrite(struct knote *kn, long hint);
120 static int filt_solisten(struct knote *kn, long hint);
121
122 static struct filterops solisten_filtops =
123 { 1, NULL, filt_sordetach, filt_solisten };
124 static struct filterops soread_filtops =
125 { 1, NULL, filt_sordetach, filt_soread };
126 static struct filterops sowrite_filtops =
127 { 1, NULL, filt_sowdetach, filt_sowrite };
128
129 #define EVEN_MORE_LOCKING_DEBUG 0
130 int socket_debug = 0;
131 int socket_zone = M_SOCKET;
132 so_gen_t so_gencnt; /* generation count for sockets */
133
134 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
135 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
136
137 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
138 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
139 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
140 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
141 #define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1)
142 #define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8))
143 #define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8))
144
145 #define MAX_SOOPTGETM_SIZE (128 * MCLBYTES)
146
147
148 SYSCTL_DECL(_kern_ipc);
149
150 static int somaxconn = SOMAXCONN;
151 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn,
152 0, "");
153
154 /* Should we get a maximum also ??? */
155 static int sosendmaxchain = 65536;
156 static int sosendminchain = 16384;
157 static int sorecvmincopy = 16384;
158 SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW, &sosendminchain,
159 0, "");
160 SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, CTLFLAG_RW, &sorecvmincopy,
161 0, "");
162
163 void so_cache_timer();
164
165 /*
166 * Socket operation routines.
167 * These routines are called by the routines in
168 * sys_socket.c or from a system process, and
169 * implement the semantics of socket operations by
170 * switching out to the protocol specific routines.
171 */
172
173 #ifdef __APPLE__
174
175 vm_size_t so_cache_zone_element_size;
176
177 static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid);
178
179
180 void socketinit()
181 {
182 vm_size_t str_size;
183
184 if (so_cache_init_done) {
185 printf("socketinit: already called...\n");
186 return;
187 }
188
189 /*
190 * allocate lock group attribute and group for socket cache mutex
191 */
192 so_cache_mtx_grp_attr = lck_grp_attr_alloc_init();
193 lck_grp_attr_setdefault(so_cache_mtx_grp_attr);
194
195 so_cache_mtx_grp = lck_grp_alloc_init("so_cache", so_cache_mtx_grp_attr);
196
197 /*
198 * allocate the lock attribute for socket cache mutex
199 */
200 so_cache_mtx_attr = lck_attr_alloc_init();
201 lck_attr_setdefault(so_cache_mtx_attr);
202
203 so_cache_init_done = 1;
204
205 so_cache_mtx = lck_mtx_alloc_init(so_cache_mtx_grp, so_cache_mtx_attr); /* cached sockets mutex */
206
207 if (so_cache_mtx == NULL)
208 return; /* we're hosed... */
209
210 str_size = (vm_size_t)( sizeof(struct socket) + 4 +
211 get_inpcb_str_size() + 4 +
212 get_tcp_str_size());
213 so_cache_zone = zinit (str_size, 120000*str_size, 8192, "socache zone");
214 #if TEMPDEBUG
215 printf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size);
216 #endif
217 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
218
219 so_cache_zone_element_size = str_size;
220
221 sflt_init();
222
223 }
224
225 void cached_sock_alloc(so, waitok)
226 struct socket **so;
227 int waitok;
228
229 {
230 caddr_t temp;
231 register u_long offset;
232
233
234 lck_mtx_lock(so_cache_mtx);
235
236 if (cached_sock_count) {
237 cached_sock_count--;
238 *so = socket_cache_head;
239 if (*so == 0)
240 panic("cached_sock_alloc: cached sock is null");
241
242 socket_cache_head = socket_cache_head->cache_next;
243 if (socket_cache_head)
244 socket_cache_head->cache_prev = 0;
245 else
246 socket_cache_tail = 0;
247
248 lck_mtx_unlock(so_cache_mtx);
249
250 temp = (*so)->so_saved_pcb;
251 bzero((caddr_t)*so, sizeof(struct socket));
252 #if TEMPDEBUG
253 kprintf("cached_sock_alloc - retreiving cached sock %x - count == %d\n", *so,
254 cached_sock_count);
255 #endif
256 (*so)->so_saved_pcb = temp;
257 (*so)->cached_in_sock_layer = 1;
258
259 }
260 else {
261 #if TEMPDEBUG
262 kprintf("Allocating cached sock %x from memory\n", *so);
263 #endif
264
265 lck_mtx_unlock(so_cache_mtx);
266
267 if (waitok)
268 *so = (struct socket *) zalloc(so_cache_zone);
269 else
270 *so = (struct socket *) zalloc_noblock(so_cache_zone);
271
272 if (*so == 0)
273 return;
274
275 bzero((caddr_t)*so, sizeof(struct socket));
276
277 /*
278 * Define offsets for extra structures into our single block of
279 * memory. Align extra structures on longword boundaries.
280 */
281
282
283 offset = (u_long) *so;
284 offset += sizeof(struct socket);
285 if (offset & 0x3) {
286 offset += 4;
287 offset &= 0xfffffffc;
288 }
289 (*so)->so_saved_pcb = (caddr_t) offset;
290 offset += get_inpcb_str_size();
291 if (offset & 0x3) {
292 offset += 4;
293 offset &= 0xfffffffc;
294 }
295
296 ((struct inpcb *) (*so)->so_saved_pcb)->inp_saved_ppcb = (caddr_t) offset;
297 #if TEMPDEBUG
298 kprintf("Allocating cached socket - %x, pcb=%x tcpcb=%x\n", *so,
299 (*so)->so_saved_pcb,
300 ((struct inpcb *)(*so)->so_saved_pcb)->inp_saved_ppcb);
301 #endif
302 }
303
304 (*so)->cached_in_sock_layer = 1;
305 }
306
307
308 void cached_sock_free(so)
309 struct socket *so;
310 {
311
312 lck_mtx_lock(so_cache_mtx);
313
314 if (++cached_sock_count > MAX_CACHED_SOCKETS) {
315 --cached_sock_count;
316 lck_mtx_unlock(so_cache_mtx);
317 #if TEMPDEBUG
318 kprintf("Freeing overflowed cached socket %x\n", so);
319 #endif
320 zfree(so_cache_zone, so);
321 }
322 else {
323 #if TEMPDEBUG
324 kprintf("Freeing socket %x into cache\n", so);
325 #endif
326 if (so_cache_hw < cached_sock_count)
327 so_cache_hw = cached_sock_count;
328
329 so->cache_next = socket_cache_head;
330 so->cache_prev = 0;
331 if (socket_cache_head)
332 socket_cache_head->cache_prev = so;
333 else
334 socket_cache_tail = so;
335
336 so->cache_timestamp = so_cache_time;
337 socket_cache_head = so;
338 lck_mtx_unlock(so_cache_mtx);
339 }
340
341 #if TEMPDEBUG
342 kprintf("Freed cached sock %x into cache - count is %d\n", so, cached_sock_count);
343 #endif
344
345
346 }
347
348
349 void so_cache_timer()
350 {
351 register struct socket *p;
352 register int n_freed = 0;
353
354
355 lck_mtx_lock(so_cache_mtx);
356
357 ++so_cache_time;
358
359 while ( (p = socket_cache_tail) )
360 {
361 if ((so_cache_time - p->cache_timestamp) < SO_CACHE_TIME_LIMIT)
362 break;
363
364 so_cache_timeouts++;
365
366 if ( (socket_cache_tail = p->cache_prev) )
367 p->cache_prev->cache_next = 0;
368 if (--cached_sock_count == 0)
369 socket_cache_head = 0;
370
371
372 zfree(so_cache_zone, p);
373
374 if (++n_freed >= SO_CACHE_MAX_FREE_BATCH)
375 {
376 so_cache_max_freed++;
377 break;
378 }
379 }
380 lck_mtx_unlock(so_cache_mtx);
381
382 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
383
384
385 }
386 #endif /* __APPLE__ */
387
388 /*
389 * Get a socket structure from our zone, and initialize it.
390 * We don't implement `waitok' yet (see comments in uipc_domain.c).
391 * Note that it would probably be better to allocate socket
392 * and PCB at the same time, but I'm not convinced that all
393 * the protocols can be easily modified to do this.
394 */
395 struct socket *
396 soalloc(waitok, dom, type)
397 int waitok;
398 int dom;
399 int type;
400 {
401 struct socket *so;
402
403 if ((dom == PF_INET) && (type == SOCK_STREAM))
404 cached_sock_alloc(&so, waitok);
405 else
406 {
407 MALLOC_ZONE(so, struct socket *, sizeof(*so), socket_zone, M_WAITOK);
408 if (so)
409 bzero(so, sizeof *so);
410 }
411 /* XXX race condition for reentrant kernel */
412 //###LD Atomic add for so_gencnt
413 if (so) {
414 so->so_gencnt = ++so_gencnt;
415 so->so_zone = socket_zone;
416 }
417
418 return so;
419 }
420
421 int
422 socreate(dom, aso, type, proto)
423 int dom;
424 struct socket **aso;
425 register int type;
426 int proto;
427 {
428 struct proc *p = current_proc();
429 register struct protosw *prp;
430 register struct socket *so;
431 register int error = 0;
432 #if TCPDEBUG
433 extern int tcpconsdebug;
434 #endif
435 if (proto)
436 prp = pffindproto(dom, proto, type);
437 else
438 prp = pffindtype(dom, type);
439
440 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
441 return (EPROTONOSUPPORT);
442 #ifndef __APPLE__
443
444 if (p->p_prison && jail_socket_unixiproute_only &&
445 prp->pr_domain->dom_family != PF_LOCAL &&
446 prp->pr_domain->dom_family != PF_INET &&
447 prp->pr_domain->dom_family != PF_ROUTE) {
448 return (EPROTONOSUPPORT);
449 }
450
451 #endif
452 if (prp->pr_type != type)
453 return (EPROTOTYPE);
454 so = soalloc(p != 0, dom, type);
455 if (so == 0)
456 return (ENOBUFS);
457
458 TAILQ_INIT(&so->so_incomp);
459 TAILQ_INIT(&so->so_comp);
460 so->so_type = type;
461
462 #ifdef __APPLE__
463 if (p != 0) {
464 so->so_uid = kauth_cred_getuid(kauth_cred_get());
465 if (!suser(kauth_cred_get(),NULL))
466 so->so_state = SS_PRIV;
467 }
468 #else
469 so->so_cred = kauth_cred_get_with_ref();
470 #endif
471 so->so_proto = prp;
472 #ifdef __APPLE__
473 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
474 so->so_rcv.sb_so = so->so_snd.sb_so = so;
475 #endif
476
477 //### Attachement will create the per pcb lock if necessary and increase refcount
478 so->so_usecount++; /* for creation, make sure it's done before socket is inserted in lists */
479
480 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
481 if (error) {
482 /*
483 * Warning:
484 * If so_pcb is not zero, the socket will be leaked,
485 * so protocol attachment handler must be coded carefuly
486 */
487 so->so_state |= SS_NOFDREF;
488 so->so_usecount--;
489 sofreelastref(so, 1); /* will deallocate the socket */
490 return (error);
491 }
492 #ifdef __APPLE__
493 prp->pr_domain->dom_refs++;
494 TAILQ_INIT(&so->so_evlist);
495
496 /* Attach socket filters for this protocol */
497 sflt_initsock(so);
498 #if TCPDEBUG
499 if (tcpconsdebug == 2)
500 so->so_options |= SO_DEBUG;
501 #endif
502 #endif
503
504 *aso = so;
505 return (0);
506 }
507
508 int
509 sobind(so, nam)
510 struct socket *so;
511 struct sockaddr *nam;
512
513 {
514 struct proc *p = current_proc();
515 int error = 0;
516 struct socket_filter_entry *filter;
517 int filtered = 0;
518
519 socket_lock(so, 1);
520
521 /* Socket filter */
522 error = 0;
523 for (filter = so->so_filt; filter && (error == 0);
524 filter = filter->sfe_next_onsocket) {
525 if (filter->sfe_filter->sf_filter.sf_bind) {
526 if (filtered == 0) {
527 filtered = 1;
528 sflt_use(so);
529 socket_unlock(so, 0);
530 }
531 error = filter->sfe_filter->sf_filter.sf_bind(
532 filter->sfe_cookie, so, nam);
533 }
534 }
535 if (filtered != 0) {
536 socket_lock(so, 0);
537 sflt_unuse(so);
538 }
539 /* End socket filter */
540
541 if (error == 0)
542 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
543
544 socket_unlock(so, 1);
545
546 if (error == EJUSTRETURN)
547 error = 0;
548
549 return (error);
550 }
551
552 void
553 sodealloc(so)
554 struct socket *so;
555 {
556 so->so_gencnt = ++so_gencnt;
557
558 #ifndef __APPLE__
559 if (so->so_rcv.sb_hiwat)
560 (void)chgsbsize(so->so_cred->cr_uidinfo,
561 &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY);
562 if (so->so_snd.sb_hiwat)
563 (void)chgsbsize(so->so_cred->cr_uidinfo,
564 &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
565 #ifdef INET
566 if (so->so_accf != NULL) {
567 if (so->so_accf->so_accept_filter != NULL &&
568 so->so_accf->so_accept_filter->accf_destroy != NULL) {
569 so->so_accf->so_accept_filter->accf_destroy(so);
570 }
571 if (so->so_accf->so_accept_filter_str != NULL)
572 FREE(so->so_accf->so_accept_filter_str, M_ACCF);
573 FREE(so->so_accf, M_ACCF);
574 }
575 #endif /* INET */
576 kauth_cred_rele(so->so_cred);
577 zfreei(so->so_zone, so);
578 #else
579 if (so->cached_in_sock_layer == 1)
580 cached_sock_free(so);
581 else {
582 if (so->cached_in_sock_layer == -1)
583 panic("sodealloc: double dealloc: so=%x\n", so);
584 so->cached_in_sock_layer = -1;
585 FREE_ZONE(so, sizeof(*so), so->so_zone);
586 }
587 #endif /* __APPLE__ */
588 }
589
590 int
591 solisten(so, backlog)
592 register struct socket *so;
593 int backlog;
594
595 {
596 struct proc *p = current_proc();
597 int error;
598
599 socket_lock(so, 1);
600
601 {
602 struct socket_filter_entry *filter;
603 int filtered = 0;
604 error = 0;
605 for (filter = so->so_filt; filter && (error == 0);
606 filter = filter->sfe_next_onsocket) {
607 if (filter->sfe_filter->sf_filter.sf_listen) {
608 if (filtered == 0) {
609 filtered = 1;
610 sflt_use(so);
611 socket_unlock(so, 0);
612 }
613 error = filter->sfe_filter->sf_filter.sf_listen(
614 filter->sfe_cookie, so);
615 }
616 }
617 if (filtered != 0) {
618 socket_lock(so, 0);
619 sflt_unuse(so);
620 }
621 }
622
623 if (error == 0) {
624 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
625 }
626
627 if (error) {
628 socket_unlock(so, 1);
629 if (error == EJUSTRETURN)
630 error = 0;
631 return (error);
632 }
633
634 if (TAILQ_EMPTY(&so->so_comp))
635 so->so_options |= SO_ACCEPTCONN;
636 if (backlog < 0 || backlog > somaxconn)
637 backlog = somaxconn;
638 so->so_qlimit = backlog;
639
640 socket_unlock(so, 1);
641 return (0);
642 }
643
644 void
645 sofreelastref(so, dealloc)
646 register struct socket *so;
647 int dealloc;
648 {
649 int error;
650 struct socket *head = so->so_head;
651
652 /*### Assume socket is locked */
653
654 /* Remove any filters - may be called more than once */
655 sflt_termsock(so);
656
657 if ((!(so->so_flags & SOF_PCBCLEARING)) || ((so->so_state & SS_NOFDREF) == 0)) {
658 #ifdef __APPLE__
659 selthreadclear(&so->so_snd.sb_sel);
660 selthreadclear(&so->so_rcv.sb_sel);
661 so->so_rcv.sb_flags &= ~SB_UPCALL;
662 so->so_snd.sb_flags &= ~SB_UPCALL;
663 #endif
664 return;
665 }
666 if (head != NULL) {
667 socket_lock(head, 1);
668 if (so->so_state & SS_INCOMP) {
669 TAILQ_REMOVE(&head->so_incomp, so, so_list);
670 head->so_incqlen--;
671 } else if (so->so_state & SS_COMP) {
672 /*
673 * We must not decommission a socket that's
674 * on the accept(2) queue. If we do, then
675 * accept(2) may hang after select(2) indicated
676 * that the listening socket was ready.
677 */
678 #ifdef __APPLE__
679 selthreadclear(&so->so_snd.sb_sel);
680 selthreadclear(&so->so_rcv.sb_sel);
681 so->so_rcv.sb_flags &= ~SB_UPCALL;
682 so->so_snd.sb_flags &= ~SB_UPCALL;
683 #endif
684 socket_unlock(head, 1);
685 return;
686 } else {
687 panic("sofree: not queued");
688 }
689 head->so_qlen--;
690 so->so_state &= ~SS_INCOMP;
691 so->so_head = NULL;
692 socket_unlock(head, 1);
693 }
694 #ifdef __APPLE__
695 selthreadclear(&so->so_snd.sb_sel);
696 sbrelease(&so->so_snd);
697 #endif
698 sorflush(so);
699
700 /* 3932268: disable upcall */
701 so->so_rcv.sb_flags &= ~SB_UPCALL;
702 so->so_snd.sb_flags &= ~SB_UPCALL;
703
704 if (dealloc)
705 sodealloc(so);
706 }
707
708 /*
709 * Close a socket on last file table reference removal.
710 * Initiate disconnect if connected.
711 * Free socket when disconnect complete.
712 */
713 int
714 soclose_locked(so)
715 register struct socket *so;
716 {
717 int error = 0;
718 lck_mtx_t * mutex_held;
719 struct timespec ts;
720
721 if (so->so_usecount == 0) {
722 panic("soclose: so=%x refcount=0\n", so);
723 }
724
725 sflt_notify(so, sock_evt_closing, NULL);
726
727 if ((so->so_options & SO_ACCEPTCONN)) {
728 struct socket *sp;
729
730 /* We do not want new connection to be added to the connection queues */
731 so->so_options &= ~SO_ACCEPTCONN;
732
733 while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) {
734 /* A bit tricky here. We need to keep
735 * a lock if it's a protocol global lock
736 * but we want the head, not the socket locked
737 * in the case of per-socket lock...
738 */
739 if (so->so_proto->pr_getlock != NULL) {
740 socket_unlock(so, 0);
741 socket_lock(sp, 1);
742 }
743 (void) soabort(sp);
744 if (so->so_proto->pr_getlock != NULL) {
745 socket_unlock(sp, 1);
746 socket_lock(so, 0);
747 }
748 }
749
750 while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) {
751 /* Dequeue from so_comp since sofree() won't do it */
752 TAILQ_REMOVE(&so->so_comp, sp, so_list);
753 so->so_qlen--;
754
755 if (so->so_proto->pr_getlock != NULL) {
756 socket_unlock(so, 0);
757 socket_lock(sp, 1);
758 }
759
760 sp->so_state &= ~SS_COMP;
761 sp->so_head = NULL;
762
763 (void) soabort(sp);
764 if (so->so_proto->pr_getlock != NULL) {
765 socket_unlock(sp, 1);
766 socket_lock(so, 0);
767 }
768 }
769 }
770 if (so->so_pcb == 0) {
771 /* 3915887: mark the socket as ready for dealloc */
772 so->so_flags |= SOF_PCBCLEARING;
773 goto discard;
774 }
775 if (so->so_state & SS_ISCONNECTED) {
776 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
777 error = sodisconnectlocked(so);
778 if (error)
779 goto drop;
780 }
781 if (so->so_options & SO_LINGER) {
782 if ((so->so_state & SS_ISDISCONNECTING) &&
783 (so->so_state & SS_NBIO))
784 goto drop;
785 if (so->so_proto->pr_getlock != NULL)
786 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
787 else
788 mutex_held = so->so_proto->pr_domain->dom_mtx;
789 while (so->so_state & SS_ISCONNECTED) {
790 ts.tv_sec = (so->so_linger/100);
791 ts.tv_nsec = (so->so_linger % 100) * NSEC_PER_USEC * 1000 * 10;
792 error = msleep((caddr_t)&so->so_timeo, mutex_held,
793 PSOCK | PCATCH, "soclos", &ts);
794 if (error) {
795 /* It's OK when the time fires, don't report an error */
796 if (error == EWOULDBLOCK)
797 error = 0;
798 break;
799 }
800 }
801 }
802 }
803 drop:
804 if (so->so_usecount == 0)
805 panic("soclose: usecount is zero so=%x\n", so);
806 if (so->so_pcb && !(so->so_flags & SOF_PCBCLEARING)) {
807 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
808 if (error == 0)
809 error = error2;
810 }
811 if (so->so_usecount <= 0)
812 panic("soclose: usecount is zero so=%x\n", so);
813 discard:
814 if (so->so_pcb && so->so_state & SS_NOFDREF)
815 panic("soclose: NOFDREF");
816 so->so_state |= SS_NOFDREF;
817 #ifdef __APPLE__
818 so->so_proto->pr_domain->dom_refs--;
819 evsofree(so);
820 #endif
821 so->so_usecount--;
822 sofree(so);
823 return (error);
824 }
825
826 int
827 soclose(so)
828 register struct socket *so;
829 {
830 int error = 0;
831 socket_lock(so, 1);
832 if (so->so_retaincnt == 0)
833 error = soclose_locked(so);
834 else { /* if the FD is going away, but socket is retained in kernel remove its reference */
835 so->so_usecount--;
836 if (so->so_usecount < 2)
837 panic("soclose: retaincnt non null and so=%x usecount=%x\n", so->so_usecount);
838 }
839 socket_unlock(so, 1);
840 return (error);
841 }
842
843
844 /*
845 * Must be called at splnet...
846 */
847 //#### Should already be locked
848 int
849 soabort(so)
850 struct socket *so;
851 {
852 int error;
853
854 #ifdef MORE_LOCKING_DEBUG
855 lck_mtx_t * mutex_held;
856
857 if (so->so_proto->pr_getlock != NULL)
858 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
859 else
860 mutex_held = so->so_proto->pr_domain->dom_mtx;
861 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
862 #endif
863
864 error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
865 if (error) {
866 sofree(so);
867 return error;
868 }
869 return (0);
870 }
871
872 int
873 soacceptlock(so, nam, dolock)
874 register struct socket *so;
875 struct sockaddr **nam;
876 int dolock;
877 {
878 int error;
879
880 if (dolock) socket_lock(so, 1);
881
882 if ((so->so_state & SS_NOFDREF) == 0)
883 panic("soaccept: !NOFDREF");
884 so->so_state &= ~SS_NOFDREF;
885 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
886
887 if (dolock) socket_unlock(so, 1);
888 return (error);
889 }
890 int
891 soaccept(so, nam)
892 register struct socket *so;
893 struct sockaddr **nam;
894 {
895 return (soacceptlock(so, nam, 1));
896 }
897
898 int
899 soconnectlock(so, nam, dolock)
900 register struct socket *so;
901 struct sockaddr *nam;
902 int dolock;
903
904 {
905 int s;
906 int error;
907 struct proc *p = current_proc();
908
909 if (dolock) socket_lock(so, 1);
910
911 if (so->so_options & SO_ACCEPTCONN) {
912 if (dolock) socket_unlock(so, 1);
913 return (EOPNOTSUPP);
914 }
915 /*
916 * If protocol is connection-based, can only connect once.
917 * Otherwise, if connected, try to disconnect first.
918 * This allows user to disconnect by connecting to, e.g.,
919 * a null address.
920 */
921 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
922 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
923 (error = sodisconnectlocked(so))))
924 error = EISCONN;
925 else {
926 /*
927 * Run connect filter before calling protocol:
928 * - non-blocking connect returns before completion;
929 */
930 {
931 struct socket_filter_entry *filter;
932 int filtered = 0;
933 error = 0;
934 for (filter = so->so_filt; filter && (error == 0);
935 filter = filter->sfe_next_onsocket) {
936 if (filter->sfe_filter->sf_filter.sf_connect_out) {
937 if (filtered == 0) {
938 filtered = 1;
939 sflt_use(so);
940 socket_unlock(so, 0);
941 }
942 error = filter->sfe_filter->sf_filter.sf_connect_out(
943 filter->sfe_cookie, so, nam);
944 }
945 }
946 if (filtered != 0) {
947 socket_lock(so, 0);
948 sflt_unuse(so);
949 }
950 }
951 if (error) {
952 if (error == EJUSTRETURN)
953 error = 0;
954 if (dolock) socket_unlock(so, 1);
955 return error;
956 }
957
958 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
959 }
960 if (dolock) socket_unlock(so, 1);
961 return (error);
962 }
963
964 int
965 soconnect(so, nam)
966 register struct socket *so;
967 struct sockaddr *nam;
968 {
969 return (soconnectlock(so, nam, 1));
970 }
971
972 int
973 soconnect2(so1, so2)
974 register struct socket *so1;
975 struct socket *so2;
976 {
977 int error;
978 //####### Assumes so1 is already locked /
979
980 socket_lock(so2, 1);
981
982 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
983
984 socket_unlock(so2, 1);
985 return (error);
986 }
987
988
989 int
990 sodisconnectlocked(so)
991 register struct socket *so;
992 {
993 int error;
994
995 if ((so->so_state & SS_ISCONNECTED) == 0) {
996 error = ENOTCONN;
997 goto bad;
998 }
999 if (so->so_state & SS_ISDISCONNECTING) {
1000 error = EALREADY;
1001 goto bad;
1002 }
1003
1004 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
1005
1006 if (error == 0) {
1007 sflt_notify(so, sock_evt_disconnected, NULL);
1008 }
1009
1010 bad:
1011 return (error);
1012 }
1013 //### Locking version
1014 int
1015 sodisconnect(so)
1016 register struct socket *so;
1017 {
1018 int error;
1019
1020 socket_lock(so, 1);
1021 error = sodisconnectlocked(so);
1022 socket_unlock(so, 1);
1023 return(error);
1024 }
1025
1026 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_DONTWAIT : M_WAIT)
1027
1028 /*
1029 * sosendcheck will lock the socket buffer if it isn't locked and
1030 * verify that there is space for the data being inserted.
1031 */
1032
1033 static int
1034 sosendcheck(
1035 struct socket *so,
1036 struct sockaddr *addr,
1037 long resid,
1038 long clen,
1039 long atomic,
1040 int flags,
1041 int *sblocked)
1042 {
1043 int error = 0;
1044 long space;
1045 int assumelock = 0;
1046
1047 restart:
1048 if (*sblocked == 0) {
1049 if ((so->so_snd.sb_flags & SB_LOCK) != 0 &&
1050 so->so_send_filt_thread != 0 &&
1051 so->so_send_filt_thread == current_thread()) {
1052 /*
1053 * We're being called recursively from a filter,
1054 * allow this to continue. Radar 4150520.
1055 * Don't set sblocked because we don't want
1056 * to perform an unlock later.
1057 */
1058 assumelock = 1;
1059 }
1060 else {
1061 error = sblock(&so->so_snd, SBLOCKWAIT(flags));
1062 if (error) {
1063 return error;
1064 }
1065 *sblocked = 1;
1066 }
1067 }
1068
1069 if (so->so_state & SS_CANTSENDMORE)
1070 return EPIPE;
1071
1072 if (so->so_error) {
1073 error = so->so_error;
1074 so->so_error = 0;
1075 return error;
1076 }
1077
1078 if ((so->so_state & SS_ISCONNECTED) == 0) {
1079 /*
1080 * `sendto' and `sendmsg' is allowed on a connection-
1081 * based socket if it supports implied connect.
1082 * Return ENOTCONN if not connected and no address is
1083 * supplied.
1084 */
1085 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
1086 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
1087 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
1088 !(resid == 0 && clen != 0))
1089 return ENOTCONN;
1090 } else if (addr == 0 && !(flags&MSG_HOLD))
1091 return (so->so_proto->pr_flags & PR_CONNREQUIRED) ? ENOTCONN : EDESTADDRREQ;
1092 }
1093 space = sbspace(&so->so_snd);
1094 if (flags & MSG_OOB)
1095 space += 1024;
1096 if ((atomic && resid > so->so_snd.sb_hiwat) ||
1097 clen > so->so_snd.sb_hiwat)
1098 return EMSGSIZE;
1099 if (space < resid + clen &&
1100 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
1101 if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO) || assumelock) {
1102 return EWOULDBLOCK;
1103 }
1104 sbunlock(&so->so_snd, 1);
1105 error = sbwait(&so->so_snd);
1106 if (error) {
1107 return error;
1108 }
1109 goto restart;
1110 }
1111
1112 return 0;
1113 }
1114
1115 /*
1116 * Send on a socket.
1117 * If send must go all at once and message is larger than
1118 * send buffering, then hard error.
1119 * Lock against other senders.
1120 * If must go all at once and not enough room now, then
1121 * inform user that this would block and do nothing.
1122 * Otherwise, if nonblocking, send as much as possible.
1123 * The data to be sent is described by "uio" if nonzero,
1124 * otherwise by the mbuf chain "top" (which must be null
1125 * if uio is not). Data provided in mbuf chain must be small
1126 * enough to send all at once.
1127 *
1128 * Returns nonzero on error, timeout or signal; callers
1129 * must check for short counts if EINTR/ERESTART are returned.
1130 * Data and control buffers are freed on return.
1131 * Experiment:
1132 * MSG_HOLD: go thru most of sosend(), but just enqueue the mbuf
1133 * MSG_SEND: go thru as for MSG_HOLD on current fragment, then
1134 * point at the mbuf chain being constructed and go from there.
1135 */
1136 int
1137 sosend(so, addr, uio, top, control, flags)
1138 register struct socket *so;
1139 struct sockaddr *addr;
1140 struct uio *uio;
1141 struct mbuf *top;
1142 struct mbuf *control;
1143 int flags;
1144
1145 {
1146 struct mbuf **mp;
1147 register struct mbuf *m, *freelist = NULL;
1148 register long space, len, resid;
1149 int clen = 0, error, dontroute, mlen, sendflags;
1150 int atomic = sosendallatonce(so) || top;
1151 int sblocked = 0;
1152 struct proc *p = current_proc();
1153
1154 if (uio)
1155 // LP64todo - fix this!
1156 resid = uio_resid(uio);
1157 else
1158 resid = top->m_pkthdr.len;
1159
1160 KERNEL_DEBUG((DBG_FNC_SOSEND | DBG_FUNC_START),
1161 so,
1162 resid,
1163 so->so_snd.sb_cc,
1164 so->so_snd.sb_lowat,
1165 so->so_snd.sb_hiwat);
1166
1167 socket_lock(so, 1);
1168
1169 /*
1170 * In theory resid should be unsigned.
1171 * However, space must be signed, as it might be less than 0
1172 * if we over-committed, and we must use a signed comparison
1173 * of space and resid. On the other hand, a negative resid
1174 * causes us to loop sending 0-length segments to the protocol.
1175 *
1176 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
1177 * type sockets since that's an error.
1178 */
1179 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
1180 error = EINVAL;
1181 socket_unlock(so, 1);
1182 goto out;
1183 }
1184
1185 dontroute =
1186 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
1187 (so->so_proto->pr_flags & PR_ATOMIC);
1188 if (p)
1189 p->p_stats->p_ru.ru_msgsnd++;
1190 if (control)
1191 clen = control->m_len;
1192
1193 do {
1194 error = sosendcheck(so, addr, resid, clen, atomic, flags, &sblocked);
1195 if (error) {
1196 goto release;
1197 }
1198 mp = &top;
1199 space = sbspace(&so->so_snd) - clen + ((flags & MSG_OOB) ? 1024 : 0);
1200
1201 do {
1202
1203 if (uio == NULL) {
1204 /*
1205 * Data is prepackaged in "top".
1206 */
1207 resid = 0;
1208 if (flags & MSG_EOR)
1209 top->m_flags |= M_EOR;
1210 } else {
1211 int chainlength;
1212 int bytes_to_copy;
1213
1214 bytes_to_copy = min(resid, space);
1215
1216 if (sosendminchain > 0) {
1217 chainlength = 0;
1218 } else
1219 chainlength = sosendmaxchain;
1220
1221 socket_unlock(so, 0);
1222
1223 do {
1224 int num_needed;
1225 int hdrs_needed = (top == 0) ? 1 : 0;
1226
1227 /*
1228 * try to maintain a local cache of mbuf clusters needed to complete this write
1229 * the list is further limited to the number that are currently needed to fill the socket
1230 * this mechanism allows a large number of mbufs/clusters to be grabbed under a single
1231 * mbuf lock... if we can't get any clusters, than fall back to trying for mbufs
1232 * if we fail early (or miscalcluate the number needed) make sure to release any clusters
1233 * we haven't yet consumed.
1234 */
1235 if (freelist == NULL && bytes_to_copy > MCLBYTES) {
1236 num_needed = bytes_to_copy / NBPG;
1237
1238 if ((bytes_to_copy - (num_needed * NBPG)) >= MINCLSIZE)
1239 num_needed++;
1240
1241 freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, NBPG);
1242 /* Fall back to cluster size if allocation failed */
1243 }
1244
1245 if (freelist == NULL && bytes_to_copy > MINCLSIZE) {
1246 num_needed = bytes_to_copy / MCLBYTES;
1247
1248 if ((bytes_to_copy - (num_needed * MCLBYTES)) >= MINCLSIZE)
1249 num_needed++;
1250
1251 freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, MCLBYTES);
1252 /* Fall back to a single mbuf if allocation failed */
1253 }
1254
1255 if (freelist == NULL) {
1256 if (top == 0)
1257 MGETHDR(freelist, M_WAIT, MT_DATA);
1258 else
1259 MGET(freelist, M_WAIT, MT_DATA);
1260
1261 if (freelist == NULL) {
1262 error = ENOBUFS;
1263 socket_lock(so, 0);
1264 goto release;
1265 }
1266 /*
1267 * For datagram protocols, leave room
1268 * for protocol headers in first mbuf.
1269 */
1270 if (atomic && top == 0 && bytes_to_copy < MHLEN)
1271 MH_ALIGN(freelist, bytes_to_copy);
1272 }
1273 m = freelist;
1274 freelist = m->m_next;
1275 m->m_next = NULL;
1276
1277 if ((m->m_flags & M_EXT))
1278 mlen = m->m_ext.ext_size;
1279 else if ((m->m_flags & M_PKTHDR))
1280 mlen = MHLEN - m_leadingspace(m);
1281 else
1282 mlen = MLEN;
1283 len = min(mlen, bytes_to_copy);
1284
1285 chainlength += len;
1286
1287 space -= len;
1288
1289 error = uiomove(mtod(m, caddr_t), (int)len, uio);
1290
1291 // LP64todo - fix this!
1292 resid = uio_resid(uio);
1293
1294 m->m_len = len;
1295 *mp = m;
1296 top->m_pkthdr.len += len;
1297 if (error)
1298 break;
1299 mp = &m->m_next;
1300 if (resid <= 0) {
1301 if (flags & MSG_EOR)
1302 top->m_flags |= M_EOR;
1303 break;
1304 }
1305 bytes_to_copy = min(resid, space);
1306
1307 } while (space > 0 && (chainlength < sosendmaxchain || atomic || resid < MINCLSIZE));
1308
1309 socket_lock(so, 0);
1310
1311 if (error)
1312 goto release;
1313 }
1314
1315 if (flags & (MSG_HOLD|MSG_SEND))
1316 {
1317 /* Enqueue for later, go away if HOLD */
1318 register struct mbuf *mb1;
1319 if (so->so_temp && (flags & MSG_FLUSH))
1320 {
1321 m_freem(so->so_temp);
1322 so->so_temp = NULL;
1323 }
1324 if (so->so_temp)
1325 so->so_tail->m_next = top;
1326 else
1327 so->so_temp = top;
1328 mb1 = top;
1329 while (mb1->m_next)
1330 mb1 = mb1->m_next;
1331 so->so_tail = mb1;
1332 if (flags & MSG_HOLD)
1333 {
1334 top = NULL;
1335 goto release;
1336 }
1337 top = so->so_temp;
1338 }
1339 if (dontroute)
1340 so->so_options |= SO_DONTROUTE;
1341 /* Compute flags here, for pru_send and NKEs */
1342 sendflags = (flags & MSG_OOB) ? PRUS_OOB :
1343 /*
1344 * If the user set MSG_EOF, the protocol
1345 * understands this flag and nothing left to
1346 * send then use PRU_SEND_EOF instead of PRU_SEND.
1347 */
1348 ((flags & MSG_EOF) &&
1349 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
1350 (resid <= 0)) ?
1351 PRUS_EOF :
1352 /* If there is more to send set PRUS_MORETOCOME */
1353 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0;
1354
1355 /*
1356 * Socket filter processing
1357 */
1358 {
1359 struct socket_filter_entry *filter;
1360 int filtered;
1361
1362 filtered = 0;
1363 error = 0;
1364 for (filter = so->so_filt; filter && (error == 0);
1365 filter = filter->sfe_next_onsocket) {
1366 if (filter->sfe_filter->sf_filter.sf_data_out) {
1367 int so_flags = 0;
1368 if (filtered == 0) {
1369 filtered = 1;
1370 so->so_send_filt_thread = current_thread();
1371 sflt_use(so);
1372 socket_unlock(so, 0);
1373 so_flags = (sendflags & MSG_OOB) ? sock_data_filt_flag_oob : 0;
1374 }
1375 error = filter->sfe_filter->sf_filter.sf_data_out(
1376 filter->sfe_cookie, so, addr, &top, &control, so_flags);
1377 }
1378 }
1379
1380 if (filtered) {
1381 /*
1382 * At this point, we've run at least one filter.
1383 * The socket is unlocked as is the socket buffer.
1384 */
1385 socket_lock(so, 0);
1386 sflt_unuse(so);
1387 so->so_send_filt_thread = 0;
1388 if (error) {
1389 if (error == EJUSTRETURN) {
1390 error = 0;
1391 clen = 0;
1392 control = 0;
1393 top = 0;
1394 }
1395
1396 goto release;
1397 }
1398 }
1399 }
1400 /*
1401 * End Socket filter processing
1402 */
1403
1404 if (error == EJUSTRETURN) {
1405 /* A socket filter handled this data */
1406 error = 0;
1407 }
1408 else {
1409 error = (*so->so_proto->pr_usrreqs->pru_send)(so,
1410 sendflags, top, addr, control, p);
1411 }
1412 #ifdef __APPLE__
1413 if (flags & MSG_SEND)
1414 so->so_temp = NULL;
1415 #endif
1416 if (dontroute)
1417 so->so_options &= ~SO_DONTROUTE;
1418 clen = 0;
1419 control = 0;
1420 top = 0;
1421 mp = &top;
1422 if (error)
1423 goto release;
1424 } while (resid && space > 0);
1425 } while (resid);
1426
1427 release:
1428 if (sblocked)
1429 sbunlock(&so->so_snd, 0); /* will unlock socket */
1430 else
1431 socket_unlock(so, 1);
1432 out:
1433 if (top)
1434 m_freem(top);
1435 if (control)
1436 m_freem(control);
1437 if (freelist)
1438 m_freem_list(freelist);
1439
1440 KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END,
1441 so,
1442 resid,
1443 so->so_snd.sb_cc,
1444 space,
1445 error);
1446
1447 return (error);
1448 }
1449
1450 /*
1451 * Implement receive operations on a socket.
1452 * We depend on the way that records are added to the sockbuf
1453 * by sbappend*. In particular, each record (mbufs linked through m_next)
1454 * must begin with an address if the protocol so specifies,
1455 * followed by an optional mbuf or mbufs containing ancillary data,
1456 * and then zero or more mbufs of data.
1457 * In order to avoid blocking network interrupts for the entire time here,
1458 * we splx() while doing the actual copy to user space.
1459 * Although the sockbuf is locked, new data may still be appended,
1460 * and thus we must maintain consistency of the sockbuf during that time.
1461 *
1462 * The caller may receive the data as a single mbuf chain by supplying
1463 * an mbuf **mp0 for use in returning the chain. The uio is then used
1464 * only for the count in uio_resid.
1465 */
1466 int
1467 soreceive(so, psa, uio, mp0, controlp, flagsp)
1468 register struct socket *so;
1469 struct sockaddr **psa;
1470 struct uio *uio;
1471 struct mbuf **mp0;
1472 struct mbuf **controlp;
1473 int *flagsp;
1474 {
1475 register struct mbuf *m, **mp, *ml = NULL;
1476 register int flags, len, error, offset;
1477 struct protosw *pr = so->so_proto;
1478 struct mbuf *nextrecord;
1479 int moff, type = 0;
1480 // LP64todo - fix this!
1481 int orig_resid = uio_resid(uio);
1482 volatile struct mbuf *free_list;
1483 volatile int delayed_copy_len;
1484 int can_delay;
1485 int need_event;
1486 struct proc *p = current_proc();
1487
1488
1489 // LP64todo - fix this!
1490 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START,
1491 so,
1492 uio_resid(uio),
1493 so->so_rcv.sb_cc,
1494 so->so_rcv.sb_lowat,
1495 so->so_rcv.sb_hiwat);
1496
1497 socket_lock(so, 1);
1498
1499 #ifdef MORE_LOCKING_DEBUG
1500 if (so->so_usecount == 1)
1501 panic("soreceive: so=%x no other reference on socket\n", so);
1502 #endif
1503 mp = mp0;
1504 if (psa)
1505 *psa = 0;
1506 if (controlp)
1507 *controlp = 0;
1508 if (flagsp)
1509 flags = *flagsp &~ MSG_EOR;
1510 else
1511 flags = 0;
1512 /*
1513 * When SO_WANTOOBFLAG is set we try to get out-of-band data
1514 * regardless of the flags argument. Here is the case were
1515 * out-of-band data is not inline.
1516 */
1517 if ((flags & MSG_OOB) ||
1518 ((so->so_options & SO_WANTOOBFLAG) != 0 &&
1519 (so->so_options & SO_OOBINLINE) == 0 &&
1520 (so->so_oobmark || (so->so_state & SS_RCVATMARK)))) {
1521 m = m_get(M_WAIT, MT_DATA);
1522 if (m == NULL) {
1523 socket_unlock(so, 1);
1524 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, ENOBUFS,0,0,0,0);
1525 return (ENOBUFS);
1526 }
1527 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
1528 if (error)
1529 goto bad;
1530 socket_unlock(so, 0);
1531 do {
1532 // LP64todo - fix this!
1533 error = uiomove(mtod(m, caddr_t),
1534 (int) min(uio_resid(uio), m->m_len), uio);
1535 m = m_free(m);
1536 } while (uio_resid(uio) && error == 0 && m);
1537 socket_lock(so, 0);
1538 bad:
1539 if (m)
1540 m_freem(m);
1541 #ifdef __APPLE__
1542 if ((so->so_options & SO_WANTOOBFLAG) != 0) {
1543 if (error == EWOULDBLOCK || error == EINVAL) {
1544 /*
1545 * Let's try to get normal data:
1546 * EWOULDBLOCK: out-of-band data not receive yet;
1547 * EINVAL: out-of-band data already read.
1548 */
1549 error = 0;
1550 goto nooob;
1551 } else if (error == 0 && flagsp)
1552 *flagsp |= MSG_OOB;
1553 }
1554 socket_unlock(so, 1);
1555 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1556 #endif
1557 return (error);
1558 }
1559 nooob:
1560 if (mp)
1561 *mp = (struct mbuf *)0;
1562 if (so->so_state & SS_ISCONFIRMING && uio_resid(uio))
1563 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
1564
1565
1566 free_list = (struct mbuf *)0;
1567 delayed_copy_len = 0;
1568 restart:
1569 #ifdef MORE_LOCKING_DEBUG
1570 if (so->so_usecount <= 1)
1571 printf("soreceive: sblock so=%x ref=%d on socket\n", so, so->so_usecount);
1572 #endif
1573 error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
1574 if (error) {
1575 socket_unlock(so, 1);
1576 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1577 return (error);
1578 }
1579
1580 m = so->so_rcv.sb_mb;
1581 /*
1582 * If we have less data than requested, block awaiting more
1583 * (subject to any timeout) if:
1584 * 1. the current count is less than the low water mark, or
1585 * 2. MSG_WAITALL is set, and it is possible to do the entire
1586 * receive operation at once if we block (resid <= hiwat).
1587 * 3. MSG_DONTWAIT is not set
1588 * If MSG_WAITALL is set but resid is larger than the receive buffer,
1589 * we have to do the receive in sections, and thus risk returning
1590 * a short count if a timeout or signal occurs after we start.
1591 */
1592 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
1593 so->so_rcv.sb_cc < uio_resid(uio)) &&
1594 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
1595 ((flags & MSG_WAITALL) && uio_resid(uio) <= so->so_rcv.sb_hiwat)) &&
1596 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
1597
1598 KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
1599 if (so->so_error) {
1600 if (m)
1601 goto dontblock;
1602 error = so->so_error;
1603 if ((flags & MSG_PEEK) == 0)
1604 so->so_error = 0;
1605 goto release;
1606 }
1607 if (so->so_state & SS_CANTRCVMORE) {
1608 if (m)
1609 goto dontblock;
1610 else
1611 goto release;
1612 }
1613 for (; m; m = m->m_next)
1614 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
1615 m = so->so_rcv.sb_mb;
1616 goto dontblock;
1617 }
1618 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
1619 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
1620 error = ENOTCONN;
1621 goto release;
1622 }
1623 if (uio_resid(uio) == 0)
1624 goto release;
1625 if ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO))) {
1626 error = EWOULDBLOCK;
1627 goto release;
1628 }
1629 sbunlock(&so->so_rcv, 1);
1630 #ifdef EVEN_MORE_LOCKING_DEBUG
1631 if (socket_debug)
1632 printf("Waiting for socket data\n");
1633 #endif
1634
1635 error = sbwait(&so->so_rcv);
1636 #ifdef EVEN_MORE_LOCKING_DEBUG
1637 if (socket_debug)
1638 printf("SORECEIVE - sbwait returned %d\n", error);
1639 #endif
1640 if (so->so_usecount < 1)
1641 panic("soreceive: after 2nd sblock so=%x ref=%d on socket\n", so, so->so_usecount);
1642 if (error) {
1643 socket_unlock(so, 1);
1644 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1645 return (error);
1646 }
1647 goto restart;
1648 }
1649 dontblock:
1650 #ifndef __APPLE__
1651 if (uio->uio_procp)
1652 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
1653 #else /* __APPLE__ */
1654 /*
1655 * 2207985
1656 * This should be uio->uio-procp; however, some callers of this
1657 * function use auto variables with stack garbage, and fail to
1658 * fill out the uio structure properly.
1659 */
1660 if (p)
1661 p->p_stats->p_ru.ru_msgrcv++;
1662 #endif /* __APPLE__ */
1663 nextrecord = m->m_nextpkt;
1664 if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) {
1665 KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
1666 orig_resid = 0;
1667 if (psa) {
1668 *psa = dup_sockaddr(mtod(m, struct sockaddr *),
1669 mp0 == 0);
1670 if ((*psa == 0) && (flags & MSG_NEEDSA)) {
1671 error = EWOULDBLOCK;
1672 goto release;
1673 }
1674 }
1675 if (flags & MSG_PEEK) {
1676 m = m->m_next;
1677 } else {
1678 sbfree(&so->so_rcv, m);
1679 if (m->m_next == 0 && so->so_rcv.sb_cc != 0)
1680 panic("soreceive: about to create invalid socketbuf");
1681 MFREE(m, so->so_rcv.sb_mb);
1682 m = so->so_rcv.sb_mb;
1683 }
1684 }
1685 while (m && m->m_type == MT_CONTROL && error == 0) {
1686 if (flags & MSG_PEEK) {
1687 if (controlp)
1688 *controlp = m_copy(m, 0, m->m_len);
1689 m = m->m_next;
1690 } else {
1691 sbfree(&so->so_rcv, m);
1692 if (controlp) {
1693 if (pr->pr_domain->dom_externalize &&
1694 mtod(m, struct cmsghdr *)->cmsg_type ==
1695 SCM_RIGHTS) {
1696 socket_unlock(so, 0); /* release socket lock: see 3903171 */
1697 error = (*pr->pr_domain->dom_externalize)(m);
1698 socket_lock(so, 0);
1699 }
1700 *controlp = m;
1701 if (m->m_next == 0 && so->so_rcv.sb_cc != 0)
1702 panic("soreceive: so->so_rcv.sb_mb->m_next == 0 && so->so_rcv.sb_cc != 0");
1703 so->so_rcv.sb_mb = m->m_next;
1704 m->m_next = 0;
1705 m = so->so_rcv.sb_mb;
1706 } else {
1707 MFREE(m, so->so_rcv.sb_mb);
1708 m = so->so_rcv.sb_mb;
1709 }
1710 }
1711 if (controlp) {
1712 orig_resid = 0;
1713 controlp = &(*controlp)->m_next;
1714 }
1715 }
1716 if (m) {
1717 if ((flags & MSG_PEEK) == 0)
1718 m->m_nextpkt = nextrecord;
1719 type = m->m_type;
1720 if (type == MT_OOBDATA)
1721 flags |= MSG_OOB;
1722 }
1723 moff = 0;
1724 offset = 0;
1725
1726 if (!(flags & MSG_PEEK) && uio_resid(uio) > sorecvmincopy)
1727 can_delay = 1;
1728 else
1729 can_delay = 0;
1730
1731 need_event = 0;
1732
1733 while (m && (uio_resid(uio) - delayed_copy_len) > 0 && error == 0) {
1734 if (m->m_type == MT_OOBDATA) {
1735 if (type != MT_OOBDATA)
1736 break;
1737 } else if (type == MT_OOBDATA)
1738 break;
1739 #ifndef __APPLE__
1740 /*
1741 * This assertion needs rework. The trouble is Appletalk is uses many
1742 * mbuf types (NOT listed in mbuf.h!) which will trigger this panic.
1743 * For now just remove the assertion... CSM 9/98
1744 */
1745 else
1746 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
1747 ("receive 3"));
1748 #else
1749 /*
1750 * Make sure to allways set MSG_OOB event when getting
1751 * out of band data inline.
1752 */
1753 if ((so->so_options & SO_WANTOOBFLAG) != 0 &&
1754 (so->so_options & SO_OOBINLINE) != 0 &&
1755 (so->so_state & SS_RCVATMARK) != 0) {
1756 flags |= MSG_OOB;
1757 }
1758 #endif
1759 so->so_state &= ~SS_RCVATMARK;
1760 // LP64todo - fix this!
1761 len = uio_resid(uio) - delayed_copy_len;
1762 if (so->so_oobmark && len > so->so_oobmark - offset)
1763 len = so->so_oobmark - offset;
1764 if (len > m->m_len - moff)
1765 len = m->m_len - moff;
1766 /*
1767 * If mp is set, just pass back the mbufs.
1768 * Otherwise copy them out via the uio, then free.
1769 * Sockbuf must be consistent here (points to current mbuf,
1770 * it points to next record) when we drop priority;
1771 * we must note any additions to the sockbuf when we
1772 * block interrupts again.
1773 */
1774 if (mp == 0) {
1775 if (can_delay && len == m->m_len) {
1776 /*
1777 * only delay the copy if we're consuming the
1778 * mbuf and we're NOT in MSG_PEEK mode
1779 * and we have enough data to make it worthwile
1780 * to drop and retake the funnel... can_delay
1781 * reflects the state of the 2 latter constraints
1782 * moff should always be zero in these cases
1783 */
1784 delayed_copy_len += len;
1785 } else {
1786
1787 if (delayed_copy_len) {
1788 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1789
1790 if (error) {
1791 goto release;
1792 }
1793 if (m != so->so_rcv.sb_mb) {
1794 /*
1795 * can only get here if MSG_PEEK is not set
1796 * therefore, m should point at the head of the rcv queue...
1797 * if it doesn't, it means something drastically changed
1798 * while we were out from behind the funnel in sodelayed_copy...
1799 * perhaps a RST on the stream... in any event, the stream has
1800 * been interrupted... it's probably best just to return
1801 * whatever data we've moved and let the caller sort it out...
1802 */
1803 break;
1804 }
1805 }
1806 socket_unlock(so, 0);
1807 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
1808 socket_lock(so, 0);
1809
1810 if (error)
1811 goto release;
1812 }
1813 } else
1814 uio_setresid(uio, (uio_resid(uio) - len));
1815
1816 if (len == m->m_len - moff) {
1817 if (m->m_flags & M_EOR)
1818 flags |= MSG_EOR;
1819 if (flags & MSG_PEEK) {
1820 m = m->m_next;
1821 moff = 0;
1822 } else {
1823 nextrecord = m->m_nextpkt;
1824 sbfree(&so->so_rcv, m);
1825 m->m_nextpkt = NULL;
1826
1827 if (mp) {
1828 *mp = m;
1829 mp = &m->m_next;
1830 so->so_rcv.sb_mb = m = m->m_next;
1831 *mp = (struct mbuf *)0;
1832 } else {
1833 if (free_list == NULL)
1834 free_list = m;
1835 else
1836 ml->m_next = m;
1837 ml = m;
1838 so->so_rcv.sb_mb = m = m->m_next;
1839 ml->m_next = 0;
1840 }
1841 if (m)
1842 m->m_nextpkt = nextrecord;
1843 }
1844 } else {
1845 if (flags & MSG_PEEK)
1846 moff += len;
1847 else {
1848 if (mp)
1849 *mp = m_copym(m, 0, len, M_WAIT);
1850 m->m_data += len;
1851 m->m_len -= len;
1852 so->so_rcv.sb_cc -= len;
1853 }
1854 }
1855 if (so->so_oobmark) {
1856 if ((flags & MSG_PEEK) == 0) {
1857 so->so_oobmark -= len;
1858 if (so->so_oobmark == 0) {
1859 so->so_state |= SS_RCVATMARK;
1860 /*
1861 * delay posting the actual event until after
1862 * any delayed copy processing has finished
1863 */
1864 need_event = 1;
1865 break;
1866 }
1867 } else {
1868 offset += len;
1869 if (offset == so->so_oobmark)
1870 break;
1871 }
1872 }
1873 if (flags & MSG_EOR)
1874 break;
1875 /*
1876 * If the MSG_WAITALL or MSG_WAITSTREAM flag is set (for non-atomic socket),
1877 * we must not quit until "uio->uio_resid == 0" or an error
1878 * termination. If a signal/timeout occurs, return
1879 * with a short count but without error.
1880 * Keep sockbuf locked against other readers.
1881 */
1882 while (flags & (MSG_WAITALL|MSG_WAITSTREAM) && m == 0 && (uio_resid(uio) - delayed_copy_len) > 0 &&
1883 !sosendallatonce(so) && !nextrecord) {
1884 if (so->so_error || so->so_state & SS_CANTRCVMORE)
1885 goto release;
1886
1887 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb && (((struct inpcb *)so->so_pcb)->inp_state != INPCB_STATE_DEAD))
1888 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1889 if (sbwait(&so->so_rcv)) {
1890 error = 0;
1891 goto release;
1892 }
1893 /*
1894 * have to wait until after we get back from the sbwait to do the copy because
1895 * we will drop the funnel if we have enough data that has been delayed... by dropping
1896 * the funnel we open up a window allowing the netisr thread to process the incoming packets
1897 * and to change the state of this socket... we're issuing the sbwait because
1898 * the socket is empty and we're expecting the netisr thread to wake us up when more
1899 * packets arrive... if we allow that processing to happen and then sbwait, we
1900 * could stall forever with packets sitting in the socket if no further packets
1901 * arrive from the remote side.
1902 *
1903 * we want to copy before we've collected all the data to satisfy this request to
1904 * allow the copy to overlap the incoming packet processing on an MP system
1905 */
1906 if (delayed_copy_len > sorecvmincopy && (delayed_copy_len > (so->so_rcv.sb_hiwat / 2))) {
1907
1908 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1909
1910 if (error)
1911 goto release;
1912 }
1913 m = so->so_rcv.sb_mb;
1914 if (m) {
1915 nextrecord = m->m_nextpkt;
1916 }
1917 }
1918 }
1919 #ifdef MORE_LOCKING_DEBUG
1920 if (so->so_usecount <= 1)
1921 panic("soreceive: after big while so=%x ref=%d on socket\n", so, so->so_usecount);
1922 #endif
1923
1924 if (m && pr->pr_flags & PR_ATOMIC) {
1925 #ifdef __APPLE__
1926 if (so->so_options & SO_DONTTRUNC)
1927 flags |= MSG_RCVMORE;
1928 else {
1929 #endif
1930 flags |= MSG_TRUNC;
1931 if ((flags & MSG_PEEK) == 0)
1932 (void) sbdroprecord(&so->so_rcv);
1933 #ifdef __APPLE__
1934 }
1935 #endif
1936 }
1937 if ((flags & MSG_PEEK) == 0) {
1938 if (m == 0)
1939 so->so_rcv.sb_mb = nextrecord;
1940 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
1941 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1942 }
1943 #ifdef __APPLE__
1944 if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0)
1945 flags |= MSG_HAVEMORE;
1946
1947 if (delayed_copy_len) {
1948 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1949
1950 if (error)
1951 goto release;
1952 }
1953 if (free_list) {
1954 m_freem_list((struct mbuf *)free_list);
1955 free_list = (struct mbuf *)0;
1956 }
1957 if (need_event)
1958 postevent(so, 0, EV_OOB);
1959 #endif
1960 if (orig_resid == uio_resid(uio) && orig_resid &&
1961 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
1962 sbunlock(&so->so_rcv, 1);
1963 goto restart;
1964 }
1965
1966 if (flagsp)
1967 *flagsp |= flags;
1968 release:
1969 #ifdef MORE_LOCKING_DEBUG
1970 if (so->so_usecount <= 1)
1971 panic("soreceive: release so=%x ref=%d on socket\n", so, so->so_usecount);
1972 #endif
1973 if (delayed_copy_len) {
1974 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1975 }
1976 if (free_list) {
1977 m_freem_list((struct mbuf *)free_list);
1978 }
1979 sbunlock(&so->so_rcv, 0); /* will unlock socket */
1980
1981 // LP64todo - fix this!
1982 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END,
1983 so,
1984 uio_resid(uio),
1985 so->so_rcv.sb_cc,
1986 0,
1987 error);
1988
1989 return (error);
1990 }
1991
1992
1993 static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid)
1994 {
1995 int error = 0;
1996 struct mbuf *m;
1997
1998 m = *free_list;
1999
2000 socket_unlock(so, 0);
2001
2002 while (m && error == 0) {
2003
2004 error = uiomove(mtod(m, caddr_t), (int)m->m_len, uio);
2005
2006 m = m->m_next;
2007 }
2008 m_freem_list(*free_list);
2009
2010 *free_list = (struct mbuf *)NULL;
2011 *resid = 0;
2012
2013 socket_lock(so, 0);
2014
2015 return (error);
2016 }
2017
2018
2019 int
2020 soshutdown(so, how)
2021 register struct socket *so;
2022 register int how;
2023 {
2024 register struct protosw *pr = so->so_proto;
2025 int ret;
2026
2027 socket_lock(so, 1);
2028
2029 sflt_notify(so, sock_evt_shutdown, &how);
2030
2031 if (how != SHUT_WR) {
2032 sorflush(so);
2033 postevent(so, 0, EV_RCLOSED);
2034 }
2035 if (how != SHUT_RD) {
2036 ret = ((*pr->pr_usrreqs->pru_shutdown)(so));
2037 postevent(so, 0, EV_WCLOSED);
2038 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
2039 socket_unlock(so, 1);
2040 return(ret);
2041 }
2042
2043 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
2044 socket_unlock(so, 1);
2045 return (0);
2046 }
2047
2048 void
2049 sorflush(so)
2050 register struct socket *so;
2051 {
2052 register struct sockbuf *sb = &so->so_rcv;
2053 register struct protosw *pr = so->so_proto;
2054 struct sockbuf asb;
2055
2056 #ifdef MORE_LOCKING_DEBUG
2057 lck_mtx_t * mutex_held;
2058
2059 if (so->so_proto->pr_getlock != NULL)
2060 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2061 else
2062 mutex_held = so->so_proto->pr_domain->dom_mtx;
2063 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2064 #endif
2065
2066 sflt_notify(so, sock_evt_flush_read, NULL);
2067
2068 sb->sb_flags |= SB_NOINTR;
2069 (void) sblock(sb, M_WAIT);
2070 socantrcvmore(so);
2071 sbunlock(sb, 1);
2072 #ifdef __APPLE__
2073 selthreadclear(&sb->sb_sel);
2074 #endif
2075 asb = *sb;
2076 bzero((caddr_t)sb, sizeof (*sb));
2077 sb->sb_so = so; /* reestablish link to socket */
2078 if (asb.sb_flags & SB_KNOTE) {
2079 sb->sb_sel.si_note = asb.sb_sel.si_note;
2080 sb->sb_flags = SB_KNOTE;
2081 }
2082 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
2083 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
2084 sbrelease(&asb);
2085 }
2086
2087 /*
2088 * Perhaps this routine, and sooptcopyout(), below, ought to come in
2089 * an additional variant to handle the case where the option value needs
2090 * to be some kind of integer, but not a specific size.
2091 * In addition to their use here, these functions are also called by the
2092 * protocol-level pr_ctloutput() routines.
2093 */
2094 int
2095 sooptcopyin(sopt, buf, len, minlen)
2096 struct sockopt *sopt;
2097 void *buf;
2098 size_t len;
2099 size_t minlen;
2100 {
2101 size_t valsize;
2102
2103 /*
2104 * If the user gives us more than we wanted, we ignore it,
2105 * but if we don't get the minimum length the caller
2106 * wants, we return EINVAL. On success, sopt->sopt_valsize
2107 * is set to however much we actually retrieved.
2108 */
2109 if ((valsize = sopt->sopt_valsize) < minlen)
2110 return EINVAL;
2111 if (valsize > len)
2112 sopt->sopt_valsize = valsize = len;
2113
2114 if (sopt->sopt_p != 0)
2115 return (copyin(sopt->sopt_val, buf, valsize));
2116
2117 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), buf, valsize);
2118 return 0;
2119 }
2120
2121 int
2122 sosetopt(so, sopt)
2123 struct socket *so;
2124 struct sockopt *sopt;
2125 {
2126 int error, optval;
2127 struct linger l;
2128 struct timeval tv;
2129 short val;
2130
2131 socket_lock(so, 1);
2132
2133 if (sopt->sopt_dir != SOPT_SET) {
2134 sopt->sopt_dir = SOPT_SET;
2135 }
2136
2137 {
2138 struct socket_filter_entry *filter;
2139 int filtered = 0;
2140 error = 0;
2141 for (filter = so->so_filt; filter && (error == 0);
2142 filter = filter->sfe_next_onsocket) {
2143 if (filter->sfe_filter->sf_filter.sf_setoption) {
2144 if (filtered == 0) {
2145 filtered = 1;
2146 sflt_use(so);
2147 socket_unlock(so, 0);
2148 }
2149 error = filter->sfe_filter->sf_filter.sf_setoption(
2150 filter->sfe_cookie, so, sopt);
2151 }
2152 }
2153
2154 if (filtered != 0) {
2155 socket_lock(so, 0);
2156 sflt_unuse(so);
2157
2158 if (error) {
2159 if (error == EJUSTRETURN)
2160 error = 0;
2161 goto bad;
2162 }
2163 }
2164 }
2165
2166 error = 0;
2167 if (sopt->sopt_level != SOL_SOCKET) {
2168 if (so->so_proto && so->so_proto->pr_ctloutput) {
2169 error = (*so->so_proto->pr_ctloutput)
2170 (so, sopt);
2171 socket_unlock(so, 1);
2172 return (error);
2173 }
2174 error = ENOPROTOOPT;
2175 } else {
2176 switch (sopt->sopt_name) {
2177 case SO_LINGER:
2178 case SO_LINGER_SEC:
2179 error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
2180 if (error)
2181 goto bad;
2182
2183 so->so_linger = (sopt->sopt_name == SO_LINGER) ? l.l_linger : l.l_linger * hz;
2184 if (l.l_onoff)
2185 so->so_options |= SO_LINGER;
2186 else
2187 so->so_options &= ~SO_LINGER;
2188 break;
2189
2190 case SO_DEBUG:
2191 case SO_KEEPALIVE:
2192 case SO_DONTROUTE:
2193 case SO_USELOOPBACK:
2194 case SO_BROADCAST:
2195 case SO_REUSEADDR:
2196 case SO_REUSEPORT:
2197 case SO_OOBINLINE:
2198 case SO_TIMESTAMP:
2199 #ifdef __APPLE__
2200 case SO_DONTTRUNC:
2201 case SO_WANTMORE:
2202 case SO_WANTOOBFLAG:
2203 #endif
2204 error = sooptcopyin(sopt, &optval, sizeof optval,
2205 sizeof optval);
2206 if (error)
2207 goto bad;
2208 if (optval)
2209 so->so_options |= sopt->sopt_name;
2210 else
2211 so->so_options &= ~sopt->sopt_name;
2212 break;
2213
2214 case SO_SNDBUF:
2215 case SO_RCVBUF:
2216 case SO_SNDLOWAT:
2217 case SO_RCVLOWAT:
2218 error = sooptcopyin(sopt, &optval, sizeof optval,
2219 sizeof optval);
2220 if (error)
2221 goto bad;
2222
2223 /*
2224 * Values < 1 make no sense for any of these
2225 * options, so disallow them.
2226 */
2227 if (optval < 1) {
2228 error = EINVAL;
2229 goto bad;
2230 }
2231
2232 switch (sopt->sopt_name) {
2233 case SO_SNDBUF:
2234 case SO_RCVBUF:
2235 if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
2236 &so->so_snd : &so->so_rcv,
2237 (u_long) optval) == 0) {
2238 error = ENOBUFS;
2239 goto bad;
2240 }
2241 break;
2242
2243 /*
2244 * Make sure the low-water is never greater than
2245 * the high-water.
2246 */
2247 case SO_SNDLOWAT:
2248 so->so_snd.sb_lowat =
2249 (optval > so->so_snd.sb_hiwat) ?
2250 so->so_snd.sb_hiwat : optval;
2251 break;
2252 case SO_RCVLOWAT:
2253 so->so_rcv.sb_lowat =
2254 (optval > so->so_rcv.sb_hiwat) ?
2255 so->so_rcv.sb_hiwat : optval;
2256 break;
2257 }
2258 break;
2259
2260 case SO_SNDTIMEO:
2261 case SO_RCVTIMEO:
2262 error = sooptcopyin(sopt, &tv, sizeof tv,
2263 sizeof tv);
2264 if (error)
2265 goto bad;
2266
2267 if (tv.tv_sec < 0 || tv.tv_sec > LONG_MAX ||
2268 tv.tv_usec < 0 || tv.tv_usec >= 1000000) {
2269 error = EDOM;
2270 goto bad;
2271 }
2272
2273 switch (sopt->sopt_name) {
2274 case SO_SNDTIMEO:
2275 so->so_snd.sb_timeo = tv;
2276 break;
2277 case SO_RCVTIMEO:
2278 so->so_rcv.sb_timeo = tv;
2279 break;
2280 }
2281 break;
2282
2283 case SO_NKE:
2284 {
2285 struct so_nke nke;
2286
2287 error = sooptcopyin(sopt, &nke,
2288 sizeof nke, sizeof nke);
2289 if (error)
2290 goto bad;
2291
2292 error = sflt_attach_private(so, NULL, nke.nke_handle, 1);
2293 break;
2294 }
2295
2296 case SO_NOSIGPIPE:
2297 error = sooptcopyin(sopt, &optval, sizeof optval,
2298 sizeof optval);
2299 if (error)
2300 goto bad;
2301 if (optval)
2302 so->so_flags |= SOF_NOSIGPIPE;
2303 else
2304 so->so_flags &= ~SOF_NOSIGPIPE;
2305
2306 break;
2307
2308 case SO_NOADDRERR:
2309 error = sooptcopyin(sopt, &optval, sizeof optval,
2310 sizeof optval);
2311 if (error)
2312 goto bad;
2313 if (optval)
2314 so->so_flags |= SOF_NOADDRAVAIL;
2315 else
2316 so->so_flags &= ~SOF_NOADDRAVAIL;
2317
2318 break;
2319
2320 default:
2321 error = ENOPROTOOPT;
2322 break;
2323 }
2324 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
2325 (void) ((*so->so_proto->pr_ctloutput)
2326 (so, sopt));
2327 }
2328 }
2329 bad:
2330 socket_unlock(so, 1);
2331 return (error);
2332 }
2333
2334 /* Helper routine for getsockopt */
2335 int
2336 sooptcopyout(sopt, buf, len)
2337 struct sockopt *sopt;
2338 void *buf;
2339 size_t len;
2340 {
2341 int error;
2342 size_t valsize;
2343
2344 error = 0;
2345
2346 /*
2347 * Documented get behavior is that we always return a value,
2348 * possibly truncated to fit in the user's buffer.
2349 * Traditional behavior is that we always tell the user
2350 * precisely how much we copied, rather than something useful
2351 * like the total amount we had available for her.
2352 * Note that this interface is not idempotent; the entire answer must
2353 * generated ahead of time.
2354 */
2355 valsize = min(len, sopt->sopt_valsize);
2356 sopt->sopt_valsize = valsize;
2357 if (sopt->sopt_val != USER_ADDR_NULL) {
2358 if (sopt->sopt_p != 0)
2359 error = copyout(buf, sopt->sopt_val, valsize);
2360 else
2361 bcopy(buf, CAST_DOWN(caddr_t, sopt->sopt_val), valsize);
2362 }
2363 return error;
2364 }
2365
2366 int
2367 sogetopt(so, sopt)
2368 struct socket *so;
2369 struct sockopt *sopt;
2370 {
2371 int error, optval;
2372 struct linger l;
2373 struct timeval tv;
2374
2375 if (sopt->sopt_dir != SOPT_GET) {
2376 sopt->sopt_dir = SOPT_GET;
2377 }
2378
2379 socket_lock(so, 1);
2380
2381 {
2382 struct socket_filter_entry *filter;
2383 int filtered = 0;
2384 error = 0;
2385 for (filter = so->so_filt; filter && (error == 0);
2386 filter = filter->sfe_next_onsocket) {
2387 if (filter->sfe_filter->sf_filter.sf_getoption) {
2388 if (filtered == 0) {
2389 filtered = 1;
2390 sflt_use(so);
2391 socket_unlock(so, 0);
2392 }
2393 error = filter->sfe_filter->sf_filter.sf_getoption(
2394 filter->sfe_cookie, so, sopt);
2395 }
2396 }
2397 if (filtered != 0) {
2398 socket_lock(so, 0);
2399 sflt_unuse(so);
2400
2401 if (error) {
2402 if (error == EJUSTRETURN)
2403 error = 0;
2404 socket_unlock(so, 1);
2405 return error;
2406 }
2407 }
2408 }
2409
2410 error = 0;
2411 if (sopt->sopt_level != SOL_SOCKET) {
2412 if (so->so_proto && so->so_proto->pr_ctloutput) {
2413 error = (*so->so_proto->pr_ctloutput)
2414 (so, sopt);
2415 socket_unlock(so, 1);
2416 return (error);
2417 } else {
2418 socket_unlock(so, 1);
2419 return (ENOPROTOOPT);
2420 }
2421 } else {
2422 switch (sopt->sopt_name) {
2423 case SO_LINGER:
2424 case SO_LINGER_SEC:
2425 l.l_onoff = so->so_options & SO_LINGER;
2426 l.l_linger = (sopt->sopt_name == SO_LINGER) ? so->so_linger :
2427 so->so_linger / hz;
2428 error = sooptcopyout(sopt, &l, sizeof l);
2429 break;
2430
2431 case SO_USELOOPBACK:
2432 case SO_DONTROUTE:
2433 case SO_DEBUG:
2434 case SO_KEEPALIVE:
2435 case SO_REUSEADDR:
2436 case SO_REUSEPORT:
2437 case SO_BROADCAST:
2438 case SO_OOBINLINE:
2439 case SO_TIMESTAMP:
2440 #ifdef __APPLE__
2441 case SO_DONTTRUNC:
2442 case SO_WANTMORE:
2443 case SO_WANTOOBFLAG:
2444 #endif
2445 optval = so->so_options & sopt->sopt_name;
2446 integer:
2447 error = sooptcopyout(sopt, &optval, sizeof optval);
2448 break;
2449
2450 case SO_TYPE:
2451 optval = so->so_type;
2452 goto integer;
2453
2454 #ifdef __APPLE__
2455 case SO_NREAD:
2456 {
2457 int pkt_total;
2458 struct mbuf *m1;
2459
2460 pkt_total = 0;
2461 m1 = so->so_rcv.sb_mb;
2462 if (so->so_proto->pr_flags & PR_ATOMIC)
2463 {
2464 while (m1) {
2465 if (m1->m_type == MT_DATA)
2466 pkt_total += m1->m_len;
2467 m1 = m1->m_next;
2468 }
2469 optval = pkt_total;
2470 } else
2471 optval = so->so_rcv.sb_cc;
2472 goto integer;
2473 }
2474 case SO_NWRITE:
2475 optval = so->so_snd.sb_cc;
2476 goto integer;
2477 #endif
2478 case SO_ERROR:
2479 optval = so->so_error;
2480 so->so_error = 0;
2481 goto integer;
2482
2483 case SO_SNDBUF:
2484 optval = so->so_snd.sb_hiwat;
2485 goto integer;
2486
2487 case SO_RCVBUF:
2488 optval = so->so_rcv.sb_hiwat;
2489 goto integer;
2490
2491 case SO_SNDLOWAT:
2492 optval = so->so_snd.sb_lowat;
2493 goto integer;
2494
2495 case SO_RCVLOWAT:
2496 optval = so->so_rcv.sb_lowat;
2497 goto integer;
2498
2499 case SO_SNDTIMEO:
2500 case SO_RCVTIMEO:
2501 tv = (sopt->sopt_name == SO_SNDTIMEO ?
2502 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
2503
2504 error = sooptcopyout(sopt, &tv, sizeof tv);
2505 break;
2506
2507 case SO_NOSIGPIPE:
2508 optval = (so->so_flags & SOF_NOSIGPIPE);
2509 goto integer;
2510
2511 case SO_NOADDRERR:
2512 optval = (so->so_flags & SOF_NOADDRAVAIL);
2513 goto integer;
2514
2515 default:
2516 error = ENOPROTOOPT;
2517 break;
2518 }
2519 socket_unlock(so, 1);
2520 return (error);
2521 }
2522 }
2523
2524 /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
2525 int
2526 soopt_getm(struct sockopt *sopt, struct mbuf **mp)
2527 {
2528 struct mbuf *m, *m_prev;
2529 int sopt_size = sopt->sopt_valsize;
2530
2531 if (sopt_size > MAX_SOOPTGETM_SIZE)
2532 return EMSGSIZE;
2533
2534 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2535 if (m == 0)
2536 return ENOBUFS;
2537 if (sopt_size > MLEN) {
2538 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2539 if ((m->m_flags & M_EXT) == 0) {
2540 m_free(m);
2541 return ENOBUFS;
2542 }
2543 m->m_len = min(MCLBYTES, sopt_size);
2544 } else {
2545 m->m_len = min(MLEN, sopt_size);
2546 }
2547 sopt_size -= m->m_len;
2548 *mp = m;
2549 m_prev = m;
2550
2551 while (sopt_size) {
2552 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2553 if (m == 0) {
2554 m_freem(*mp);
2555 return ENOBUFS;
2556 }
2557 if (sopt_size > MLEN) {
2558 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2559 if ((m->m_flags & M_EXT) == 0) {
2560 m_freem(*mp);
2561 return ENOBUFS;
2562 }
2563 m->m_len = min(MCLBYTES, sopt_size);
2564 } else {
2565 m->m_len = min(MLEN, sopt_size);
2566 }
2567 sopt_size -= m->m_len;
2568 m_prev->m_next = m;
2569 m_prev = m;
2570 }
2571 return 0;
2572 }
2573
2574 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
2575 int
2576 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
2577 {
2578 struct mbuf *m0 = m;
2579
2580 if (sopt->sopt_val == USER_ADDR_NULL)
2581 return 0;
2582 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2583 if (sopt->sopt_p != NULL) {
2584 int error;
2585
2586 error = copyin(sopt->sopt_val, mtod(m, char *), m->m_len);
2587 if (error != 0) {
2588 m_freem(m0);
2589 return(error);
2590 }
2591 } else
2592 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), mtod(m, char *), m->m_len);
2593 sopt->sopt_valsize -= m->m_len;
2594 sopt->sopt_val += m->m_len;
2595 m = m->m_next;
2596 }
2597 if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
2598 panic("soopt_mcopyin");
2599 return 0;
2600 }
2601
2602 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
2603 int
2604 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
2605 {
2606 struct mbuf *m0 = m;
2607 size_t valsize = 0;
2608
2609 if (sopt->sopt_val == USER_ADDR_NULL)
2610 return 0;
2611 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2612 if (sopt->sopt_p != NULL) {
2613 int error;
2614
2615 error = copyout(mtod(m, char *), sopt->sopt_val, m->m_len);
2616 if (error != 0) {
2617 m_freem(m0);
2618 return(error);
2619 }
2620 } else
2621 bcopy(mtod(m, char *), CAST_DOWN(caddr_t, sopt->sopt_val), m->m_len);
2622 sopt->sopt_valsize -= m->m_len;
2623 sopt->sopt_val += m->m_len;
2624 valsize += m->m_len;
2625 m = m->m_next;
2626 }
2627 if (m != NULL) {
2628 /* enough soopt buffer should be given from user-land */
2629 m_freem(m0);
2630 return(EINVAL);
2631 }
2632 sopt->sopt_valsize = valsize;
2633 return 0;
2634 }
2635
2636 void
2637 sohasoutofband(so)
2638 register struct socket *so;
2639 {
2640 struct proc *p;
2641
2642 if (so->so_pgid < 0)
2643 gsignal(-so->so_pgid, SIGURG);
2644 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
2645 psignal(p, SIGURG);
2646 selwakeup(&so->so_rcv.sb_sel);
2647 }
2648
2649 int
2650 sopoll(struct socket *so, int events, __unused kauth_cred_t cred, void * wql)
2651 {
2652 struct proc *p = current_proc();
2653 int revents = 0;
2654
2655 socket_lock(so, 1);
2656
2657 if (events & (POLLIN | POLLRDNORM))
2658 if (soreadable(so))
2659 revents |= events & (POLLIN | POLLRDNORM);
2660
2661 if (events & (POLLOUT | POLLWRNORM))
2662 if (sowriteable(so))
2663 revents |= events & (POLLOUT | POLLWRNORM);
2664
2665 if (events & (POLLPRI | POLLRDBAND))
2666 if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
2667 revents |= events & (POLLPRI | POLLRDBAND);
2668
2669 if (revents == 0) {
2670 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
2671 /* Darwin sets the flag first, BSD calls selrecord first */
2672 so->so_rcv.sb_flags |= SB_SEL;
2673 selrecord(p, &so->so_rcv.sb_sel, wql);
2674 }
2675
2676 if (events & (POLLOUT | POLLWRNORM)) {
2677 /* Darwin sets the flag first, BSD calls selrecord first */
2678 so->so_snd.sb_flags |= SB_SEL;
2679 selrecord(p, &so->so_snd.sb_sel, wql);
2680 }
2681 }
2682
2683 socket_unlock(so, 1);
2684 return (revents);
2685 }
2686
2687 int soo_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p);
2688
2689 int
2690 soo_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct proc *p)
2691 {
2692 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2693 struct sockbuf *sb;
2694 socket_lock(so, 1);
2695
2696 switch (kn->kn_filter) {
2697 case EVFILT_READ:
2698 if (so->so_options & SO_ACCEPTCONN)
2699 kn->kn_fop = &solisten_filtops;
2700 else
2701 kn->kn_fop = &soread_filtops;
2702 sb = &so->so_rcv;
2703 break;
2704 case EVFILT_WRITE:
2705 kn->kn_fop = &sowrite_filtops;
2706 sb = &so->so_snd;
2707 break;
2708 default:
2709 socket_unlock(so, 1);
2710 return (1);
2711 }
2712
2713 if (KNOTE_ATTACH(&sb->sb_sel.si_note, kn))
2714 sb->sb_flags |= SB_KNOTE;
2715 socket_unlock(so, 1);
2716 return (0);
2717 }
2718
2719 static void
2720 filt_sordetach(struct knote *kn)
2721 {
2722 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2723
2724 socket_lock(so, 1);
2725 if (so->so_rcv.sb_flags & SB_KNOTE)
2726 if (KNOTE_DETACH(&so->so_rcv.sb_sel.si_note, kn))
2727 so->so_rcv.sb_flags &= ~SB_KNOTE;
2728 socket_unlock(so, 1);
2729 }
2730
2731 /*ARGSUSED*/
2732 static int
2733 filt_soread(struct knote *kn, long hint)
2734 {
2735 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2736
2737 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2738 socket_lock(so, 1);
2739
2740 if (so->so_oobmark) {
2741 if (kn->kn_flags & EV_OOBAND) {
2742 kn->kn_data = so->so_rcv.sb_cc - so->so_oobmark;
2743 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2744 socket_unlock(so, 1);
2745 return (1);
2746 }
2747 kn->kn_data = so->so_oobmark;
2748 kn->kn_flags |= EV_OOBAND;
2749 } else {
2750 kn->kn_data = so->so_rcv.sb_cc;
2751 if (so->so_state & SS_CANTRCVMORE) {
2752 kn->kn_flags |= EV_EOF;
2753 kn->kn_fflags = so->so_error;
2754 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2755 socket_unlock(so, 1);
2756 return (1);
2757 }
2758 }
2759
2760 if (so->so_state & SS_RCVATMARK) {
2761 if (kn->kn_flags & EV_OOBAND) {
2762 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2763 socket_unlock(so, 1);
2764 return (1);
2765 }
2766 kn->kn_flags |= EV_OOBAND;
2767 } else if (kn->kn_flags & EV_OOBAND) {
2768 kn->kn_data = 0;
2769 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2770 socket_unlock(so, 1);
2771 return (0);
2772 }
2773
2774 if (so->so_error) { /* temporary udp error */
2775 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2776 socket_unlock(so, 1);
2777 return (1);
2778 }
2779
2780 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2781 socket_unlock(so, 1);
2782
2783 return( kn->kn_flags & EV_OOBAND ||
2784 kn->kn_data >= ((kn->kn_sfflags & NOTE_LOWAT) ?
2785 kn->kn_sdata : so->so_rcv.sb_lowat));
2786 }
2787
2788 static void
2789 filt_sowdetach(struct knote *kn)
2790 {
2791 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2792 socket_lock(so, 1);
2793
2794 if(so->so_snd.sb_flags & SB_KNOTE)
2795 if (KNOTE_DETACH(&so->so_snd.sb_sel.si_note, kn))
2796 so->so_snd.sb_flags &= ~SB_KNOTE;
2797 socket_unlock(so, 1);
2798 }
2799
2800 /*ARGSUSED*/
2801 static int
2802 filt_sowrite(struct knote *kn, long hint)
2803 {
2804 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2805
2806 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2807 socket_lock(so, 1);
2808
2809 kn->kn_data = sbspace(&so->so_snd);
2810 if (so->so_state & SS_CANTSENDMORE) {
2811 kn->kn_flags |= EV_EOF;
2812 kn->kn_fflags = so->so_error;
2813 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2814 socket_unlock(so, 1);
2815 return (1);
2816 }
2817 if (so->so_error) { /* temporary udp error */
2818 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2819 socket_unlock(so, 1);
2820 return (1);
2821 }
2822 if (((so->so_state & SS_ISCONNECTED) == 0) &&
2823 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
2824 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2825 socket_unlock(so, 1);
2826 return (0);
2827 }
2828 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2829 socket_unlock(so, 1);
2830 if (kn->kn_sfflags & NOTE_LOWAT)
2831 return (kn->kn_data >= kn->kn_sdata);
2832 return (kn->kn_data >= so->so_snd.sb_lowat);
2833 }
2834
2835 /*ARGSUSED*/
2836 static int
2837 filt_solisten(struct knote *kn, long hint)
2838 {
2839 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2840 int isempty;
2841
2842 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2843 socket_lock(so, 1);
2844 kn->kn_data = so->so_qlen;
2845 isempty = ! TAILQ_EMPTY(&so->so_comp);
2846 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2847 socket_unlock(so, 1);
2848 return (isempty);
2849 }
2850
2851
2852 int
2853 socket_lock(so, refcount)
2854 struct socket *so;
2855 int refcount;
2856 {
2857 int error = 0, lr, lr_saved;
2858 #ifdef __ppc__
2859 __asm__ volatile("mflr %0" : "=r" (lr));
2860 lr_saved = lr;
2861 #endif
2862
2863 if (so->so_proto->pr_lock) {
2864 error = (*so->so_proto->pr_lock)(so, refcount, lr_saved);
2865 }
2866 else {
2867 #ifdef MORE_LOCKING_DEBUG
2868 lck_mtx_assert(so->so_proto->pr_domain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED);
2869 #endif
2870 lck_mtx_lock(so->so_proto->pr_domain->dom_mtx);
2871 if (refcount)
2872 so->so_usecount++;
2873 so->reserved3 = (void*)lr_saved; /* save caller for refcount going to zero */
2874 }
2875
2876 return(error);
2877
2878 }
2879
2880 int
2881 socket_unlock(so, refcount)
2882 struct socket *so;
2883 int refcount;
2884 {
2885 int error = 0, lr, lr_saved;
2886 lck_mtx_t * mutex_held;
2887
2888 #ifdef __ppc__
2889 __asm__ volatile("mflr %0" : "=r" (lr));
2890 lr_saved = lr;
2891 #endif
2892
2893
2894
2895 if (so->so_proto == NULL)
2896 panic("socket_unlock null so_proto so=%x\n", so);
2897
2898 if (so && so->so_proto->pr_unlock)
2899 error = (*so->so_proto->pr_unlock)(so, refcount, lr_saved);
2900 else {
2901 mutex_held = so->so_proto->pr_domain->dom_mtx;
2902 #ifdef MORE_LOCKING_DEBUG
2903 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2904 #endif
2905 if (refcount) {
2906 if (so->so_usecount <= 0)
2907 panic("socket_unlock: bad refcount so=%x value=%d\n", so, so->so_usecount);
2908 so->so_usecount--;
2909 if (so->so_usecount == 0) {
2910 sofreelastref(so, 1);
2911 }
2912 else
2913 so->reserved4 = (void*)lr_saved; /* save caller */
2914 }
2915 lck_mtx_unlock(mutex_held);
2916 }
2917
2918 return(error);
2919 }
2920 //### Called with socket locked, will unlock socket
2921 void
2922 sofree(so)
2923 struct socket *so;
2924 {
2925
2926 int lr, lr_saved;
2927 lck_mtx_t * mutex_held;
2928 #ifdef __ppc__
2929 __asm__ volatile("mflr %0" : "=r" (lr));
2930 lr_saved = lr;
2931 #endif
2932 if (so->so_proto->pr_getlock != NULL)
2933 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2934 else
2935 mutex_held = so->so_proto->pr_domain->dom_mtx;
2936 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2937
2938 sofreelastref(so, 0);
2939 }
2940
2941 void
2942 soreference(so)
2943 struct socket *so;
2944 {
2945 socket_lock(so, 1); /* locks & take one reference on socket */
2946 socket_unlock(so, 0); /* unlock only */
2947 }
2948
2949 void
2950 sodereference(so)
2951 struct socket *so;
2952 {
2953 socket_lock(so, 0);
2954 socket_unlock(so, 1);
2955 }