]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_socket.c
9ba75e943deef8d87da2e4508fab13d64617ca79
[apple/xnu.git] / bsd / kern / uipc_socket.c
1 /*
2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
29 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
30 /*
31 * Copyright (c) 1982, 1986, 1988, 1990, 1993
32 * The Regents of the University of California. All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 * must display the following acknowledgement:
44 * This product includes software developed by the University of
45 * California, Berkeley and its contributors.
46 * 4. Neither the name of the University nor the names of its contributors
47 * may be used to endorse or promote products derived from this software
48 * without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 *
62 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
63 * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.16 2001/06/14 20:46:06 ume Exp $
64 */
65
66 #include <sys/param.h>
67 #include <sys/systm.h>
68 #include <sys/filedesc.h>
69 #include <sys/proc_internal.h>
70 #include <sys/kauth.h>
71 #include <sys/file_internal.h>
72 #include <sys/fcntl.h>
73 #include <sys/malloc.h>
74 #include <sys/mbuf.h>
75 #include <sys/domain.h>
76 #include <sys/kernel.h>
77 #include <sys/event.h>
78 #include <sys/poll.h>
79 #include <sys/protosw.h>
80 #include <sys/socket.h>
81 #include <sys/socketvar.h>
82 #include <sys/resourcevar.h>
83 #include <sys/signalvar.h>
84 #include <sys/sysctl.h>
85 #include <sys/uio.h>
86 #include <sys/ev.h>
87 #include <sys/kdebug.h>
88 #include <net/route.h>
89 #include <netinet/in.h>
90 #include <netinet/in_pcb.h>
91 #include <kern/zalloc.h>
92 #include <kern/locks.h>
93 #include <machine/limits.h>
94
95 int so_cache_hw = 0;
96 int so_cache_timeouts = 0;
97 int so_cache_max_freed = 0;
98 int cached_sock_count = 0;
99 struct socket *socket_cache_head = 0;
100 struct socket *socket_cache_tail = 0;
101 u_long so_cache_time = 0;
102 int so_cache_init_done = 0;
103 struct zone *so_cache_zone;
104 extern int get_inpcb_str_size();
105 extern int get_tcp_str_size();
106
107 static lck_grp_t *so_cache_mtx_grp;
108 static lck_attr_t *so_cache_mtx_attr;
109 static lck_grp_attr_t *so_cache_mtx_grp_attr;
110 lck_mtx_t *so_cache_mtx;
111
112 #include <machine/limits.h>
113
114 static void filt_sordetach(struct knote *kn);
115 static int filt_soread(struct knote *kn, long hint);
116 static void filt_sowdetach(struct knote *kn);
117 static int filt_sowrite(struct knote *kn, long hint);
118 static int filt_solisten(struct knote *kn, long hint);
119
120 static struct filterops solisten_filtops =
121 { 1, NULL, filt_sordetach, filt_solisten };
122 static struct filterops soread_filtops =
123 { 1, NULL, filt_sordetach, filt_soread };
124 static struct filterops sowrite_filtops =
125 { 1, NULL, filt_sowdetach, filt_sowrite };
126
127 #define EVEN_MORE_LOCKING_DEBUG 0
128 int socket_debug = 0;
129 int socket_zone = M_SOCKET;
130 so_gen_t so_gencnt; /* generation count for sockets */
131
132 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
133 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
134
135 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
136 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
137 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
138 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
139 #define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1)
140 #define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8))
141 #define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8))
142
143 #define MAX_SOOPTGETM_SIZE (128 * MCLBYTES)
144
145
146 SYSCTL_DECL(_kern_ipc);
147
148 static int somaxconn = SOMAXCONN;
149 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn,
150 0, "");
151
152 /* Should we get a maximum also ??? */
153 static int sosendmaxchain = 65536;
154 static int sosendminchain = 16384;
155 static int sorecvmincopy = 16384;
156 SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW, &sosendminchain,
157 0, "");
158 SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, CTLFLAG_RW, &sorecvmincopy,
159 0, "");
160
161 void so_cache_timer();
162
163 /*
164 * Socket operation routines.
165 * These routines are called by the routines in
166 * sys_socket.c or from a system process, and
167 * implement the semantics of socket operations by
168 * switching out to the protocol specific routines.
169 */
170
171 #ifdef __APPLE__
172
173 vm_size_t so_cache_zone_element_size;
174
175 static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid);
176
177
178 void socketinit()
179 {
180 vm_size_t str_size;
181
182 if (so_cache_init_done) {
183 printf("socketinit: already called...\n");
184 return;
185 }
186
187 /*
188 * allocate lock group attribute and group for socket cache mutex
189 */
190 so_cache_mtx_grp_attr = lck_grp_attr_alloc_init();
191
192 so_cache_mtx_grp = lck_grp_alloc_init("so_cache", so_cache_mtx_grp_attr);
193
194 /*
195 * allocate the lock attribute for socket cache mutex
196 */
197 so_cache_mtx_attr = lck_attr_alloc_init();
198
199 so_cache_init_done = 1;
200
201 so_cache_mtx = lck_mtx_alloc_init(so_cache_mtx_grp, so_cache_mtx_attr); /* cached sockets mutex */
202
203 if (so_cache_mtx == NULL)
204 return; /* we're hosed... */
205
206 str_size = (vm_size_t)( sizeof(struct socket) + 4 +
207 get_inpcb_str_size() + 4 +
208 get_tcp_str_size());
209 so_cache_zone = zinit (str_size, 120000*str_size, 8192, "socache zone");
210 #if TEMPDEBUG
211 printf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size);
212 #endif
213 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
214
215 so_cache_zone_element_size = str_size;
216
217 sflt_init();
218
219 }
220
221 void cached_sock_alloc(so, waitok)
222 struct socket **so;
223 int waitok;
224
225 {
226 caddr_t temp;
227 register u_long offset;
228
229
230 lck_mtx_lock(so_cache_mtx);
231
232 if (cached_sock_count) {
233 cached_sock_count--;
234 *so = socket_cache_head;
235 if (*so == 0)
236 panic("cached_sock_alloc: cached sock is null");
237
238 socket_cache_head = socket_cache_head->cache_next;
239 if (socket_cache_head)
240 socket_cache_head->cache_prev = 0;
241 else
242 socket_cache_tail = 0;
243
244 lck_mtx_unlock(so_cache_mtx);
245
246 temp = (*so)->so_saved_pcb;
247 bzero((caddr_t)*so, sizeof(struct socket));
248 #if TEMPDEBUG
249 kprintf("cached_sock_alloc - retreiving cached sock %x - count == %d\n", *so,
250 cached_sock_count);
251 #endif
252 (*so)->so_saved_pcb = temp;
253 (*so)->cached_in_sock_layer = 1;
254
255 }
256 else {
257 #if TEMPDEBUG
258 kprintf("Allocating cached sock %x from memory\n", *so);
259 #endif
260
261 lck_mtx_unlock(so_cache_mtx);
262
263 if (waitok)
264 *so = (struct socket *) zalloc(so_cache_zone);
265 else
266 *so = (struct socket *) zalloc_noblock(so_cache_zone);
267
268 if (*so == 0)
269 return;
270
271 bzero((caddr_t)*so, sizeof(struct socket));
272
273 /*
274 * Define offsets for extra structures into our single block of
275 * memory. Align extra structures on longword boundaries.
276 */
277
278
279 offset = (u_long) *so;
280 offset += sizeof(struct socket);
281 if (offset & 0x3) {
282 offset += 4;
283 offset &= 0xfffffffc;
284 }
285 (*so)->so_saved_pcb = (caddr_t) offset;
286 offset += get_inpcb_str_size();
287 if (offset & 0x3) {
288 offset += 4;
289 offset &= 0xfffffffc;
290 }
291
292 ((struct inpcb *) (*so)->so_saved_pcb)->inp_saved_ppcb = (caddr_t) offset;
293 #if TEMPDEBUG
294 kprintf("Allocating cached socket - %x, pcb=%x tcpcb=%x\n", *so,
295 (*so)->so_saved_pcb,
296 ((struct inpcb *)(*so)->so_saved_pcb)->inp_saved_ppcb);
297 #endif
298 }
299
300 (*so)->cached_in_sock_layer = 1;
301 }
302
303
304 void cached_sock_free(so)
305 struct socket *so;
306 {
307
308 lck_mtx_lock(so_cache_mtx);
309
310 if (++cached_sock_count > MAX_CACHED_SOCKETS) {
311 --cached_sock_count;
312 lck_mtx_unlock(so_cache_mtx);
313 #if TEMPDEBUG
314 kprintf("Freeing overflowed cached socket %x\n", so);
315 #endif
316 zfree(so_cache_zone, so);
317 }
318 else {
319 #if TEMPDEBUG
320 kprintf("Freeing socket %x into cache\n", so);
321 #endif
322 if (so_cache_hw < cached_sock_count)
323 so_cache_hw = cached_sock_count;
324
325 so->cache_next = socket_cache_head;
326 so->cache_prev = 0;
327 if (socket_cache_head)
328 socket_cache_head->cache_prev = so;
329 else
330 socket_cache_tail = so;
331
332 so->cache_timestamp = so_cache_time;
333 socket_cache_head = so;
334 lck_mtx_unlock(so_cache_mtx);
335 }
336
337 #if TEMPDEBUG
338 kprintf("Freed cached sock %x into cache - count is %d\n", so, cached_sock_count);
339 #endif
340
341
342 }
343
344
345 void so_cache_timer()
346 {
347 register struct socket *p;
348 register int n_freed = 0;
349
350
351 lck_mtx_lock(so_cache_mtx);
352
353 ++so_cache_time;
354
355 while ( (p = socket_cache_tail) )
356 {
357 if ((so_cache_time - p->cache_timestamp) < SO_CACHE_TIME_LIMIT)
358 break;
359
360 so_cache_timeouts++;
361
362 if ( (socket_cache_tail = p->cache_prev) )
363 p->cache_prev->cache_next = 0;
364 if (--cached_sock_count == 0)
365 socket_cache_head = 0;
366
367
368 zfree(so_cache_zone, p);
369
370 if (++n_freed >= SO_CACHE_MAX_FREE_BATCH)
371 {
372 so_cache_max_freed++;
373 break;
374 }
375 }
376 lck_mtx_unlock(so_cache_mtx);
377
378 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
379
380
381 }
382 #endif /* __APPLE__ */
383
384 /*
385 * Get a socket structure from our zone, and initialize it.
386 * We don't implement `waitok' yet (see comments in uipc_domain.c).
387 * Note that it would probably be better to allocate socket
388 * and PCB at the same time, but I'm not convinced that all
389 * the protocols can be easily modified to do this.
390 */
391 struct socket *
392 soalloc(waitok, dom, type)
393 int waitok;
394 int dom;
395 int type;
396 {
397 struct socket *so;
398
399 if ((dom == PF_INET) && (type == SOCK_STREAM))
400 cached_sock_alloc(&so, waitok);
401 else
402 {
403 MALLOC_ZONE(so, struct socket *, sizeof(*so), socket_zone, M_WAITOK);
404 if (so)
405 bzero(so, sizeof *so);
406 }
407 /* XXX race condition for reentrant kernel */
408 //###LD Atomic add for so_gencnt
409 if (so) {
410 so->so_gencnt = ++so_gencnt;
411 so->so_zone = socket_zone;
412 }
413
414 return so;
415 }
416
417 int
418 socreate(dom, aso, type, proto)
419 int dom;
420 struct socket **aso;
421 register int type;
422 int proto;
423 {
424 struct proc *p = current_proc();
425 register struct protosw *prp;
426 register struct socket *so;
427 register int error = 0;
428 #if TCPDEBUG
429 extern int tcpconsdebug;
430 #endif
431 if (proto)
432 prp = pffindproto(dom, proto, type);
433 else
434 prp = pffindtype(dom, type);
435
436 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
437 return (EPROTONOSUPPORT);
438 #ifndef __APPLE__
439
440 if (p->p_prison && jail_socket_unixiproute_only &&
441 prp->pr_domain->dom_family != PF_LOCAL &&
442 prp->pr_domain->dom_family != PF_INET &&
443 prp->pr_domain->dom_family != PF_ROUTE) {
444 return (EPROTONOSUPPORT);
445 }
446
447 #endif
448 if (prp->pr_type != type)
449 return (EPROTOTYPE);
450 so = soalloc(p != 0, dom, type);
451 if (so == 0)
452 return (ENOBUFS);
453
454 TAILQ_INIT(&so->so_incomp);
455 TAILQ_INIT(&so->so_comp);
456 so->so_type = type;
457
458 #ifdef __APPLE__
459 if (p != 0) {
460 so->so_uid = kauth_cred_getuid(kauth_cred_get());
461 if (!suser(kauth_cred_get(),NULL))
462 so->so_state = SS_PRIV;
463 }
464 #else
465 so->so_cred = kauth_cred_get_with_ref();
466 #endif
467 so->so_proto = prp;
468 #ifdef __APPLE__
469 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
470 so->so_rcv.sb_so = so->so_snd.sb_so = so;
471 #endif
472 so->next_lock_lr = 0;
473 so->next_unlock_lr = 0;
474
475
476 //### Attachement will create the per pcb lock if necessary and increase refcount
477 so->so_usecount++; /* for creation, make sure it's done before socket is inserted in lists */
478
479 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
480 if (error) {
481 /*
482 * Warning:
483 * If so_pcb is not zero, the socket will be leaked,
484 * so protocol attachment handler must be coded carefuly
485 */
486 so->so_state |= SS_NOFDREF;
487 so->so_usecount--;
488 sofreelastref(so, 1); /* will deallocate the socket */
489 return (error);
490 }
491 #ifdef __APPLE__
492 prp->pr_domain->dom_refs++;
493 TAILQ_INIT(&so->so_evlist);
494
495 /* Attach socket filters for this protocol */
496 sflt_initsock(so);
497 #if TCPDEBUG
498 if (tcpconsdebug == 2)
499 so->so_options |= SO_DEBUG;
500 #endif
501 #endif
502
503 *aso = so;
504 return (0);
505 }
506
507 int
508 sobind(so, nam)
509 struct socket *so;
510 struct sockaddr *nam;
511
512 {
513 struct proc *p = current_proc();
514 int error = 0;
515 struct socket_filter_entry *filter;
516 int filtered = 0;
517
518 socket_lock(so, 1);
519
520 /* Socket filter */
521 error = 0;
522 for (filter = so->so_filt; filter && (error == 0);
523 filter = filter->sfe_next_onsocket) {
524 if (filter->sfe_filter->sf_filter.sf_bind) {
525 if (filtered == 0) {
526 filtered = 1;
527 sflt_use(so);
528 socket_unlock(so, 0);
529 }
530 error = filter->sfe_filter->sf_filter.sf_bind(
531 filter->sfe_cookie, so, nam);
532 }
533 }
534 if (filtered != 0) {
535 socket_lock(so, 0);
536 sflt_unuse(so);
537 }
538 /* End socket filter */
539
540 if (error == 0)
541 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
542
543 socket_unlock(so, 1);
544
545 if (error == EJUSTRETURN)
546 error = 0;
547
548 return (error);
549 }
550
551 void
552 sodealloc(so)
553 struct socket *so;
554 {
555 so->so_gencnt = ++so_gencnt;
556
557 #ifndef __APPLE__
558 if (so->so_rcv.sb_hiwat)
559 (void)chgsbsize(so->so_cred->cr_uidinfo,
560 &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY);
561 if (so->so_snd.sb_hiwat)
562 (void)chgsbsize(so->so_cred->cr_uidinfo,
563 &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
564 #ifdef INET
565 if (so->so_accf != NULL) {
566 if (so->so_accf->so_accept_filter != NULL &&
567 so->so_accf->so_accept_filter->accf_destroy != NULL) {
568 so->so_accf->so_accept_filter->accf_destroy(so);
569 }
570 if (so->so_accf->so_accept_filter_str != NULL)
571 FREE(so->so_accf->so_accept_filter_str, M_ACCF);
572 FREE(so->so_accf, M_ACCF);
573 }
574 #endif /* INET */
575 kauth_cred_unref(&so->so_cred);
576 zfreei(so->so_zone, so);
577 #else
578 if (so->cached_in_sock_layer == 1)
579 cached_sock_free(so);
580 else {
581 if (so->cached_in_sock_layer == -1)
582 panic("sodealloc: double dealloc: so=%x\n", so);
583 so->cached_in_sock_layer = -1;
584 FREE_ZONE(so, sizeof(*so), so->so_zone);
585 }
586 #endif /* __APPLE__ */
587 }
588
589 int
590 solisten(so, backlog)
591 register struct socket *so;
592 int backlog;
593
594 {
595 struct proc *p = current_proc();
596 int error;
597
598 socket_lock(so, 1);
599
600 {
601 struct socket_filter_entry *filter;
602 int filtered = 0;
603 error = 0;
604 for (filter = so->so_filt; filter && (error == 0);
605 filter = filter->sfe_next_onsocket) {
606 if (filter->sfe_filter->sf_filter.sf_listen) {
607 if (filtered == 0) {
608 filtered = 1;
609 sflt_use(so);
610 socket_unlock(so, 0);
611 }
612 error = filter->sfe_filter->sf_filter.sf_listen(
613 filter->sfe_cookie, so);
614 }
615 }
616 if (filtered != 0) {
617 socket_lock(so, 0);
618 sflt_unuse(so);
619 }
620 }
621
622 if (error == 0) {
623 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
624 }
625
626 if (error) {
627 socket_unlock(so, 1);
628 if (error == EJUSTRETURN)
629 error = 0;
630 return (error);
631 }
632
633 if (TAILQ_EMPTY(&so->so_comp))
634 so->so_options |= SO_ACCEPTCONN;
635 if (backlog < 0 || backlog > somaxconn)
636 backlog = somaxconn;
637 so->so_qlimit = backlog;
638
639 socket_unlock(so, 1);
640 return (0);
641 }
642
643 void
644 sofreelastref(so, dealloc)
645 register struct socket *so;
646 int dealloc;
647 {
648 int error;
649 struct socket *head = so->so_head;
650
651 /*### Assume socket is locked */
652
653 /* Remove any filters - may be called more than once */
654 sflt_termsock(so);
655
656 if ((!(so->so_flags & SOF_PCBCLEARING)) || ((so->so_state & SS_NOFDREF) == 0)) {
657 #ifdef __APPLE__
658 selthreadclear(&so->so_snd.sb_sel);
659 selthreadclear(&so->so_rcv.sb_sel);
660 so->so_rcv.sb_flags &= ~SB_UPCALL;
661 so->so_snd.sb_flags &= ~SB_UPCALL;
662 #endif
663 return;
664 }
665 if (head != NULL) {
666 socket_lock(head, 1);
667 if (so->so_state & SS_INCOMP) {
668 TAILQ_REMOVE(&head->so_incomp, so, so_list);
669 head->so_incqlen--;
670 } else if (so->so_state & SS_COMP) {
671 /*
672 * We must not decommission a socket that's
673 * on the accept(2) queue. If we do, then
674 * accept(2) may hang after select(2) indicated
675 * that the listening socket was ready.
676 */
677 #ifdef __APPLE__
678 selthreadclear(&so->so_snd.sb_sel);
679 selthreadclear(&so->so_rcv.sb_sel);
680 so->so_rcv.sb_flags &= ~SB_UPCALL;
681 so->so_snd.sb_flags &= ~SB_UPCALL;
682 #endif
683 socket_unlock(head, 1);
684 return;
685 } else {
686 panic("sofree: not queued");
687 }
688 head->so_qlen--;
689 so->so_state &= ~SS_INCOMP;
690 so->so_head = NULL;
691 socket_unlock(head, 1);
692 }
693 #ifdef __APPLE__
694 selthreadclear(&so->so_snd.sb_sel);
695 sbrelease(&so->so_snd);
696 #endif
697 sorflush(so);
698
699 /* 3932268: disable upcall */
700 so->so_rcv.sb_flags &= ~SB_UPCALL;
701 so->so_snd.sb_flags &= ~SB_UPCALL;
702
703 if (dealloc)
704 sodealloc(so);
705 }
706
707 /*
708 * Close a socket on last file table reference removal.
709 * Initiate disconnect if connected.
710 * Free socket when disconnect complete.
711 */
712 int
713 soclose_locked(so)
714 register struct socket *so;
715 {
716 int error = 0;
717 lck_mtx_t * mutex_held;
718 struct timespec ts;
719
720 if (so->so_usecount == 0) {
721 panic("soclose: so=%x refcount=0\n", so);
722 }
723
724 sflt_notify(so, sock_evt_closing, NULL);
725
726 if ((so->so_options & SO_ACCEPTCONN)) {
727 struct socket *sp;
728
729 /* We do not want new connection to be added to the connection queues */
730 so->so_options &= ~SO_ACCEPTCONN;
731
732 while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) {
733 /* A bit tricky here. We need to keep
734 * a lock if it's a protocol global lock
735 * but we want the head, not the socket locked
736 * in the case of per-socket lock...
737 */
738 if (so->so_proto->pr_getlock != NULL) {
739 socket_unlock(so, 0);
740 socket_lock(sp, 1);
741 }
742 (void) soabort(sp);
743 if (so->so_proto->pr_getlock != NULL) {
744 socket_unlock(sp, 1);
745 socket_lock(so, 0);
746 }
747 }
748
749 while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) {
750 /* Dequeue from so_comp since sofree() won't do it */
751 TAILQ_REMOVE(&so->so_comp, sp, so_list);
752 so->so_qlen--;
753
754 if (so->so_proto->pr_getlock != NULL) {
755 socket_unlock(so, 0);
756 socket_lock(sp, 1);
757 }
758
759 sp->so_state &= ~SS_COMP;
760 sp->so_head = NULL;
761
762 (void) soabort(sp);
763 if (so->so_proto->pr_getlock != NULL) {
764 socket_unlock(sp, 1);
765 socket_lock(so, 0);
766 }
767 }
768 }
769 if (so->so_pcb == 0) {
770 /* 3915887: mark the socket as ready for dealloc */
771 so->so_flags |= SOF_PCBCLEARING;
772 goto discard;
773 }
774 if (so->so_state & SS_ISCONNECTED) {
775 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
776 error = sodisconnectlocked(so);
777 if (error)
778 goto drop;
779 }
780 if (so->so_options & SO_LINGER) {
781 if ((so->so_state & SS_ISDISCONNECTING) &&
782 (so->so_state & SS_NBIO))
783 goto drop;
784 if (so->so_proto->pr_getlock != NULL)
785 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
786 else
787 mutex_held = so->so_proto->pr_domain->dom_mtx;
788 while (so->so_state & SS_ISCONNECTED) {
789 ts.tv_sec = (so->so_linger/100);
790 ts.tv_nsec = (so->so_linger % 100) * NSEC_PER_USEC * 1000 * 10;
791 error = msleep((caddr_t)&so->so_timeo, mutex_held,
792 PSOCK | PCATCH, "soclos", &ts);
793 if (error) {
794 /* It's OK when the time fires, don't report an error */
795 if (error == EWOULDBLOCK)
796 error = 0;
797 break;
798 }
799 }
800 }
801 }
802 drop:
803 if (so->so_usecount == 0)
804 panic("soclose: usecount is zero so=%x\n", so);
805 if (so->so_pcb && !(so->so_flags & SOF_PCBCLEARING)) {
806 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
807 if (error == 0)
808 error = error2;
809 }
810 if (so->so_usecount <= 0)
811 panic("soclose: usecount is zero so=%x\n", so);
812 discard:
813 if (so->so_pcb && so->so_state & SS_NOFDREF)
814 panic("soclose: NOFDREF");
815 so->so_state |= SS_NOFDREF;
816 #ifdef __APPLE__
817 so->so_proto->pr_domain->dom_refs--;
818 evsofree(so);
819 #endif
820 so->so_usecount--;
821 sofree(so);
822 return (error);
823 }
824
825 int
826 soclose(so)
827 register struct socket *so;
828 {
829 int error = 0;
830 socket_lock(so, 1);
831 if (so->so_retaincnt == 0)
832 error = soclose_locked(so);
833 else { /* if the FD is going away, but socket is retained in kernel remove its reference */
834 so->so_usecount--;
835 if (so->so_usecount < 2)
836 panic("soclose: retaincnt non null and so=%x usecount=%x\n", so->so_usecount);
837 }
838 socket_unlock(so, 1);
839 return (error);
840 }
841
842
843 /*
844 * Must be called at splnet...
845 */
846 //#### Should already be locked
847 int
848 soabort(so)
849 struct socket *so;
850 {
851 int error;
852
853 #ifdef MORE_LOCKING_DEBUG
854 lck_mtx_t * mutex_held;
855
856 if (so->so_proto->pr_getlock != NULL)
857 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
858 else
859 mutex_held = so->so_proto->pr_domain->dom_mtx;
860 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
861 #endif
862
863 error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
864 if (error) {
865 sofree(so);
866 return error;
867 }
868 return (0);
869 }
870
871 int
872 soacceptlock(so, nam, dolock)
873 register struct socket *so;
874 struct sockaddr **nam;
875 int dolock;
876 {
877 int error;
878
879 if (dolock) socket_lock(so, 1);
880
881 if ((so->so_state & SS_NOFDREF) == 0)
882 panic("soaccept: !NOFDREF");
883 so->so_state &= ~SS_NOFDREF;
884 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
885
886 if (dolock) socket_unlock(so, 1);
887 return (error);
888 }
889 int
890 soaccept(so, nam)
891 register struct socket *so;
892 struct sockaddr **nam;
893 {
894 return (soacceptlock(so, nam, 1));
895 }
896
897 int
898 soconnectlock(so, nam, dolock)
899 register struct socket *so;
900 struct sockaddr *nam;
901 int dolock;
902
903 {
904 int s;
905 int error;
906 struct proc *p = current_proc();
907
908 if (dolock) socket_lock(so, 1);
909
910 if (so->so_options & SO_ACCEPTCONN) {
911 if (dolock) socket_unlock(so, 1);
912 return (EOPNOTSUPP);
913 }
914 /*
915 * If protocol is connection-based, can only connect once.
916 * Otherwise, if connected, try to disconnect first.
917 * This allows user to disconnect by connecting to, e.g.,
918 * a null address.
919 */
920 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
921 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
922 (error = sodisconnectlocked(so))))
923 error = EISCONN;
924 else {
925 /*
926 * Run connect filter before calling protocol:
927 * - non-blocking connect returns before completion;
928 */
929 {
930 struct socket_filter_entry *filter;
931 int filtered = 0;
932 error = 0;
933 for (filter = so->so_filt; filter && (error == 0);
934 filter = filter->sfe_next_onsocket) {
935 if (filter->sfe_filter->sf_filter.sf_connect_out) {
936 if (filtered == 0) {
937 filtered = 1;
938 sflt_use(so);
939 socket_unlock(so, 0);
940 }
941 error = filter->sfe_filter->sf_filter.sf_connect_out(
942 filter->sfe_cookie, so, nam);
943 }
944 }
945 if (filtered != 0) {
946 socket_lock(so, 0);
947 sflt_unuse(so);
948 }
949 }
950 if (error) {
951 if (error == EJUSTRETURN)
952 error = 0;
953 if (dolock) socket_unlock(so, 1);
954 return error;
955 }
956
957 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
958 }
959 if (dolock) socket_unlock(so, 1);
960 return (error);
961 }
962
963 int
964 soconnect(so, nam)
965 register struct socket *so;
966 struct sockaddr *nam;
967 {
968 return (soconnectlock(so, nam, 1));
969 }
970
971 int
972 soconnect2(so1, so2)
973 register struct socket *so1;
974 struct socket *so2;
975 {
976 int error;
977
978 socket_lock(so1, 1);
979 if (so2->so_proto->pr_lock)
980 socket_lock(so2, 1);
981
982 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
983
984 socket_unlock(so1, 1);
985 if (so2->so_proto->pr_lock)
986 socket_unlock(so2, 1);
987 return (error);
988 }
989
990
991 int
992 sodisconnectlocked(so)
993 register struct socket *so;
994 {
995 int error;
996
997 if ((so->so_state & SS_ISCONNECTED) == 0) {
998 error = ENOTCONN;
999 goto bad;
1000 }
1001 if (so->so_state & SS_ISDISCONNECTING) {
1002 error = EALREADY;
1003 goto bad;
1004 }
1005
1006 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
1007
1008 if (error == 0) {
1009 sflt_notify(so, sock_evt_disconnected, NULL);
1010 }
1011
1012 bad:
1013 return (error);
1014 }
1015 //### Locking version
1016 int
1017 sodisconnect(so)
1018 register struct socket *so;
1019 {
1020 int error;
1021
1022 socket_lock(so, 1);
1023 error = sodisconnectlocked(so);
1024 socket_unlock(so, 1);
1025 return(error);
1026 }
1027
1028 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_DONTWAIT : M_WAIT)
1029
1030 /*
1031 * sosendcheck will lock the socket buffer if it isn't locked and
1032 * verify that there is space for the data being inserted.
1033 */
1034
1035 static int
1036 sosendcheck(
1037 struct socket *so,
1038 struct sockaddr *addr,
1039 long resid,
1040 long clen,
1041 long atomic,
1042 int flags,
1043 int *sblocked)
1044 {
1045 int error = 0;
1046 long space;
1047 int assumelock = 0;
1048
1049 restart:
1050 if (*sblocked == 0) {
1051 if ((so->so_snd.sb_flags & SB_LOCK) != 0 &&
1052 so->so_send_filt_thread != 0 &&
1053 so->so_send_filt_thread == current_thread()) {
1054 /*
1055 * We're being called recursively from a filter,
1056 * allow this to continue. Radar 4150520.
1057 * Don't set sblocked because we don't want
1058 * to perform an unlock later.
1059 */
1060 assumelock = 1;
1061 }
1062 else {
1063 error = sblock(&so->so_snd, SBLOCKWAIT(flags));
1064 if (error) {
1065 return error;
1066 }
1067 *sblocked = 1;
1068 }
1069 }
1070
1071 if (so->so_state & SS_CANTSENDMORE)
1072 return EPIPE;
1073
1074 if (so->so_error) {
1075 error = so->so_error;
1076 so->so_error = 0;
1077 return error;
1078 }
1079
1080 if ((so->so_state & SS_ISCONNECTED) == 0) {
1081 /*
1082 * `sendto' and `sendmsg' is allowed on a connection-
1083 * based socket if it supports implied connect.
1084 * Return ENOTCONN if not connected and no address is
1085 * supplied.
1086 */
1087 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
1088 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
1089 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
1090 !(resid == 0 && clen != 0))
1091 return ENOTCONN;
1092 } else if (addr == 0 && !(flags&MSG_HOLD))
1093 return (so->so_proto->pr_flags & PR_CONNREQUIRED) ? ENOTCONN : EDESTADDRREQ;
1094 }
1095 space = sbspace(&so->so_snd);
1096 if (flags & MSG_OOB)
1097 space += 1024;
1098 if ((atomic && resid > so->so_snd.sb_hiwat) ||
1099 clen > so->so_snd.sb_hiwat)
1100 return EMSGSIZE;
1101 if (space < resid + clen &&
1102 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
1103 if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO) || assumelock) {
1104 return EWOULDBLOCK;
1105 }
1106 sbunlock(&so->so_snd, 1);
1107 error = sbwait(&so->so_snd);
1108 if (error) {
1109 return error;
1110 }
1111 goto restart;
1112 }
1113
1114 return 0;
1115 }
1116
1117 /*
1118 * Send on a socket.
1119 * If send must go all at once and message is larger than
1120 * send buffering, then hard error.
1121 * Lock against other senders.
1122 * If must go all at once and not enough room now, then
1123 * inform user that this would block and do nothing.
1124 * Otherwise, if nonblocking, send as much as possible.
1125 * The data to be sent is described by "uio" if nonzero,
1126 * otherwise by the mbuf chain "top" (which must be null
1127 * if uio is not). Data provided in mbuf chain must be small
1128 * enough to send all at once.
1129 *
1130 * Returns nonzero on error, timeout or signal; callers
1131 * must check for short counts if EINTR/ERESTART are returned.
1132 * Data and control buffers are freed on return.
1133 * Experiment:
1134 * MSG_HOLD: go thru most of sosend(), but just enqueue the mbuf
1135 * MSG_SEND: go thru as for MSG_HOLD on current fragment, then
1136 * point at the mbuf chain being constructed and go from there.
1137 */
1138 int
1139 sosend(so, addr, uio, top, control, flags)
1140 register struct socket *so;
1141 struct sockaddr *addr;
1142 struct uio *uio;
1143 struct mbuf *top;
1144 struct mbuf *control;
1145 int flags;
1146
1147 {
1148 struct mbuf **mp;
1149 register struct mbuf *m, *freelist = NULL;
1150 register long space, len, resid;
1151 int clen = 0, error, dontroute, mlen, sendflags;
1152 int atomic = sosendallatonce(so) || top;
1153 int sblocked = 0;
1154 struct proc *p = current_proc();
1155
1156 if (uio)
1157 // LP64todo - fix this!
1158 resid = uio_resid(uio);
1159 else
1160 resid = top->m_pkthdr.len;
1161
1162 KERNEL_DEBUG((DBG_FNC_SOSEND | DBG_FUNC_START),
1163 so,
1164 resid,
1165 so->so_snd.sb_cc,
1166 so->so_snd.sb_lowat,
1167 so->so_snd.sb_hiwat);
1168
1169 socket_lock(so, 1);
1170
1171 /*
1172 * In theory resid should be unsigned.
1173 * However, space must be signed, as it might be less than 0
1174 * if we over-committed, and we must use a signed comparison
1175 * of space and resid. On the other hand, a negative resid
1176 * causes us to loop sending 0-length segments to the protocol.
1177 *
1178 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
1179 * type sockets since that's an error.
1180 */
1181 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
1182 error = EINVAL;
1183 socket_unlock(so, 1);
1184 goto out;
1185 }
1186
1187 dontroute =
1188 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
1189 (so->so_proto->pr_flags & PR_ATOMIC);
1190 if (p)
1191 p->p_stats->p_ru.ru_msgsnd++;
1192 if (control)
1193 clen = control->m_len;
1194
1195 do {
1196 error = sosendcheck(so, addr, resid, clen, atomic, flags, &sblocked);
1197 if (error) {
1198 goto release;
1199 }
1200 mp = &top;
1201 space = sbspace(&so->so_snd) - clen + ((flags & MSG_OOB) ? 1024 : 0);
1202
1203 do {
1204
1205 if (uio == NULL) {
1206 /*
1207 * Data is prepackaged in "top".
1208 */
1209 resid = 0;
1210 if (flags & MSG_EOR)
1211 top->m_flags |= M_EOR;
1212 } else {
1213 int chainlength;
1214 int bytes_to_copy;
1215
1216 bytes_to_copy = min(resid, space);
1217
1218 if (sosendminchain > 0) {
1219 chainlength = 0;
1220 } else
1221 chainlength = sosendmaxchain;
1222
1223 socket_unlock(so, 0);
1224
1225 do {
1226 int num_needed;
1227 int hdrs_needed = (top == 0) ? 1 : 0;
1228
1229 /*
1230 * try to maintain a local cache of mbuf clusters needed to complete this write
1231 * the list is further limited to the number that are currently needed to fill the socket
1232 * this mechanism allows a large number of mbufs/clusters to be grabbed under a single
1233 * mbuf lock... if we can't get any clusters, than fall back to trying for mbufs
1234 * if we fail early (or miscalcluate the number needed) make sure to release any clusters
1235 * we haven't yet consumed.
1236 */
1237 if (freelist == NULL && bytes_to_copy > MCLBYTES) {
1238 num_needed = bytes_to_copy / NBPG;
1239
1240 if ((bytes_to_copy - (num_needed * NBPG)) >= MINCLSIZE)
1241 num_needed++;
1242
1243 freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, NBPG);
1244 /* Fall back to cluster size if allocation failed */
1245 }
1246
1247 if (freelist == NULL && bytes_to_copy > MINCLSIZE) {
1248 num_needed = bytes_to_copy / MCLBYTES;
1249
1250 if ((bytes_to_copy - (num_needed * MCLBYTES)) >= MINCLSIZE)
1251 num_needed++;
1252
1253 freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, MCLBYTES);
1254 /* Fall back to a single mbuf if allocation failed */
1255 }
1256
1257 if (freelist == NULL) {
1258 if (top == 0)
1259 MGETHDR(freelist, M_WAIT, MT_DATA);
1260 else
1261 MGET(freelist, M_WAIT, MT_DATA);
1262
1263 if (freelist == NULL) {
1264 error = ENOBUFS;
1265 socket_lock(so, 0);
1266 goto release;
1267 }
1268 /*
1269 * For datagram protocols, leave room
1270 * for protocol headers in first mbuf.
1271 */
1272 if (atomic && top == 0 && bytes_to_copy < MHLEN)
1273 MH_ALIGN(freelist, bytes_to_copy);
1274 }
1275 m = freelist;
1276 freelist = m->m_next;
1277 m->m_next = NULL;
1278
1279 if ((m->m_flags & M_EXT))
1280 mlen = m->m_ext.ext_size;
1281 else if ((m->m_flags & M_PKTHDR))
1282 mlen = MHLEN - m_leadingspace(m);
1283 else
1284 mlen = MLEN;
1285 len = min(mlen, bytes_to_copy);
1286
1287 chainlength += len;
1288
1289 space -= len;
1290
1291 error = uiomove(mtod(m, caddr_t), (int)len, uio);
1292
1293 // LP64todo - fix this!
1294 resid = uio_resid(uio);
1295
1296 m->m_len = len;
1297 *mp = m;
1298 top->m_pkthdr.len += len;
1299 if (error)
1300 break;
1301 mp = &m->m_next;
1302 if (resid <= 0) {
1303 if (flags & MSG_EOR)
1304 top->m_flags |= M_EOR;
1305 break;
1306 }
1307 bytes_to_copy = min(resid, space);
1308
1309 } while (space > 0 && (chainlength < sosendmaxchain || atomic || resid < MINCLSIZE));
1310
1311 socket_lock(so, 0);
1312
1313 if (error)
1314 goto release;
1315 }
1316
1317 if (flags & (MSG_HOLD|MSG_SEND))
1318 {
1319 /* Enqueue for later, go away if HOLD */
1320 register struct mbuf *mb1;
1321 if (so->so_temp && (flags & MSG_FLUSH))
1322 {
1323 m_freem(so->so_temp);
1324 so->so_temp = NULL;
1325 }
1326 if (so->so_temp)
1327 so->so_tail->m_next = top;
1328 else
1329 so->so_temp = top;
1330 mb1 = top;
1331 while (mb1->m_next)
1332 mb1 = mb1->m_next;
1333 so->so_tail = mb1;
1334 if (flags & MSG_HOLD)
1335 {
1336 top = NULL;
1337 goto release;
1338 }
1339 top = so->so_temp;
1340 }
1341 if (dontroute)
1342 so->so_options |= SO_DONTROUTE;
1343 /* Compute flags here, for pru_send and NKEs */
1344 sendflags = (flags & MSG_OOB) ? PRUS_OOB :
1345 /*
1346 * If the user set MSG_EOF, the protocol
1347 * understands this flag and nothing left to
1348 * send then use PRU_SEND_EOF instead of PRU_SEND.
1349 */
1350 ((flags & MSG_EOF) &&
1351 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
1352 (resid <= 0)) ?
1353 PRUS_EOF :
1354 /* If there is more to send set PRUS_MORETOCOME */
1355 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0;
1356
1357 /*
1358 * Socket filter processing
1359 */
1360 {
1361 struct socket_filter_entry *filter;
1362 int filtered;
1363
1364 filtered = 0;
1365 error = 0;
1366 for (filter = so->so_filt; filter && (error == 0);
1367 filter = filter->sfe_next_onsocket) {
1368 if (filter->sfe_filter->sf_filter.sf_data_out) {
1369 int so_flags = 0;
1370 if (filtered == 0) {
1371 filtered = 1;
1372 so->so_send_filt_thread = current_thread();
1373 sflt_use(so);
1374 socket_unlock(so, 0);
1375 so_flags = (sendflags & MSG_OOB) ? sock_data_filt_flag_oob : 0;
1376 }
1377 error = filter->sfe_filter->sf_filter.sf_data_out(
1378 filter->sfe_cookie, so, addr, &top, &control, so_flags);
1379 }
1380 }
1381
1382 if (filtered) {
1383 /*
1384 * At this point, we've run at least one filter.
1385 * The socket is unlocked as is the socket buffer.
1386 */
1387 socket_lock(so, 0);
1388 sflt_unuse(so);
1389 so->so_send_filt_thread = 0;
1390 if (error) {
1391 if (error == EJUSTRETURN) {
1392 error = 0;
1393 clen = 0;
1394 control = 0;
1395 top = 0;
1396 }
1397
1398 goto release;
1399 }
1400 }
1401 }
1402 /*
1403 * End Socket filter processing
1404 */
1405
1406 if (error == EJUSTRETURN) {
1407 /* A socket filter handled this data */
1408 error = 0;
1409 }
1410 else {
1411 error = (*so->so_proto->pr_usrreqs->pru_send)(so,
1412 sendflags, top, addr, control, p);
1413 }
1414 #ifdef __APPLE__
1415 if (flags & MSG_SEND)
1416 so->so_temp = NULL;
1417 #endif
1418 if (dontroute)
1419 so->so_options &= ~SO_DONTROUTE;
1420 clen = 0;
1421 control = 0;
1422 top = 0;
1423 mp = &top;
1424 if (error)
1425 goto release;
1426 } while (resid && space > 0);
1427 } while (resid);
1428
1429 release:
1430 if (sblocked)
1431 sbunlock(&so->so_snd, 0); /* will unlock socket */
1432 else
1433 socket_unlock(so, 1);
1434 out:
1435 if (top)
1436 m_freem(top);
1437 if (control)
1438 m_freem(control);
1439 if (freelist)
1440 m_freem_list(freelist);
1441
1442 KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END,
1443 so,
1444 resid,
1445 so->so_snd.sb_cc,
1446 space,
1447 error);
1448
1449 return (error);
1450 }
1451
1452 /*
1453 * Implement receive operations on a socket.
1454 * We depend on the way that records are added to the sockbuf
1455 * by sbappend*. In particular, each record (mbufs linked through m_next)
1456 * must begin with an address if the protocol so specifies,
1457 * followed by an optional mbuf or mbufs containing ancillary data,
1458 * and then zero or more mbufs of data.
1459 * In order to avoid blocking network interrupts for the entire time here,
1460 * we splx() while doing the actual copy to user space.
1461 * Although the sockbuf is locked, new data may still be appended,
1462 * and thus we must maintain consistency of the sockbuf during that time.
1463 *
1464 * The caller may receive the data as a single mbuf chain by supplying
1465 * an mbuf **mp0 for use in returning the chain. The uio is then used
1466 * only for the count in uio_resid.
1467 */
1468 int
1469 soreceive(so, psa, uio, mp0, controlp, flagsp)
1470 register struct socket *so;
1471 struct sockaddr **psa;
1472 struct uio *uio;
1473 struct mbuf **mp0;
1474 struct mbuf **controlp;
1475 int *flagsp;
1476 {
1477 register struct mbuf *m, **mp, *ml = NULL;
1478 register int flags, len, error, offset;
1479 struct protosw *pr = so->so_proto;
1480 struct mbuf *nextrecord;
1481 int moff, type = 0;
1482 // LP64todo - fix this!
1483 int orig_resid = uio_resid(uio);
1484 volatile struct mbuf *free_list;
1485 volatile int delayed_copy_len;
1486 int can_delay;
1487 int need_event;
1488 struct proc *p = current_proc();
1489
1490
1491 // LP64todo - fix this!
1492 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START,
1493 so,
1494 uio_resid(uio),
1495 so->so_rcv.sb_cc,
1496 so->so_rcv.sb_lowat,
1497 so->so_rcv.sb_hiwat);
1498
1499 socket_lock(so, 1);
1500
1501 #ifdef MORE_LOCKING_DEBUG
1502 if (so->so_usecount == 1)
1503 panic("soreceive: so=%x no other reference on socket\n", so);
1504 #endif
1505 mp = mp0;
1506 if (psa)
1507 *psa = 0;
1508 if (controlp)
1509 *controlp = 0;
1510 if (flagsp)
1511 flags = *flagsp &~ MSG_EOR;
1512 else
1513 flags = 0;
1514 /*
1515 * When SO_WANTOOBFLAG is set we try to get out-of-band data
1516 * regardless of the flags argument. Here is the case were
1517 * out-of-band data is not inline.
1518 */
1519 if ((flags & MSG_OOB) ||
1520 ((so->so_options & SO_WANTOOBFLAG) != 0 &&
1521 (so->so_options & SO_OOBINLINE) == 0 &&
1522 (so->so_oobmark || (so->so_state & SS_RCVATMARK)))) {
1523 m = m_get(M_WAIT, MT_DATA);
1524 if (m == NULL) {
1525 socket_unlock(so, 1);
1526 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, ENOBUFS,0,0,0,0);
1527 return (ENOBUFS);
1528 }
1529 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
1530 if (error)
1531 goto bad;
1532 socket_unlock(so, 0);
1533 do {
1534 // LP64todo - fix this!
1535 error = uiomove(mtod(m, caddr_t),
1536 (int) min(uio_resid(uio), m->m_len), uio);
1537 m = m_free(m);
1538 } while (uio_resid(uio) && error == 0 && m);
1539 socket_lock(so, 0);
1540 bad:
1541 if (m)
1542 m_freem(m);
1543 #ifdef __APPLE__
1544 if ((so->so_options & SO_WANTOOBFLAG) != 0) {
1545 if (error == EWOULDBLOCK || error == EINVAL) {
1546 /*
1547 * Let's try to get normal data:
1548 * EWOULDBLOCK: out-of-band data not receive yet;
1549 * EINVAL: out-of-band data already read.
1550 */
1551 error = 0;
1552 goto nooob;
1553 } else if (error == 0 && flagsp)
1554 *flagsp |= MSG_OOB;
1555 }
1556 socket_unlock(so, 1);
1557 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1558 #endif
1559 return (error);
1560 }
1561 nooob:
1562 if (mp)
1563 *mp = (struct mbuf *)0;
1564 if (so->so_state & SS_ISCONFIRMING && uio_resid(uio))
1565 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
1566
1567
1568 free_list = (struct mbuf *)0;
1569 delayed_copy_len = 0;
1570 restart:
1571 #ifdef MORE_LOCKING_DEBUG
1572 if (so->so_usecount <= 1)
1573 printf("soreceive: sblock so=%x ref=%d on socket\n", so, so->so_usecount);
1574 #endif
1575 error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
1576 if (error) {
1577 socket_unlock(so, 1);
1578 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1579 return (error);
1580 }
1581
1582 m = so->so_rcv.sb_mb;
1583 /*
1584 * If we have less data than requested, block awaiting more
1585 * (subject to any timeout) if:
1586 * 1. the current count is less than the low water mark, or
1587 * 2. MSG_WAITALL is set, and it is possible to do the entire
1588 * receive operation at once if we block (resid <= hiwat).
1589 * 3. MSG_DONTWAIT is not set
1590 * If MSG_WAITALL is set but resid is larger than the receive buffer,
1591 * we have to do the receive in sections, and thus risk returning
1592 * a short count if a timeout or signal occurs after we start.
1593 */
1594 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
1595 so->so_rcv.sb_cc < uio_resid(uio)) &&
1596 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
1597 ((flags & MSG_WAITALL) && uio_resid(uio) <= so->so_rcv.sb_hiwat)) &&
1598 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
1599
1600 KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
1601 if (so->so_error) {
1602 if (m)
1603 goto dontblock;
1604 error = so->so_error;
1605 if ((flags & MSG_PEEK) == 0)
1606 so->so_error = 0;
1607 goto release;
1608 }
1609 if (so->so_state & SS_CANTRCVMORE) {
1610 if (m)
1611 goto dontblock;
1612 else
1613 goto release;
1614 }
1615 for (; m; m = m->m_next)
1616 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
1617 m = so->so_rcv.sb_mb;
1618 goto dontblock;
1619 }
1620 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
1621 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
1622 error = ENOTCONN;
1623 goto release;
1624 }
1625 if (uio_resid(uio) == 0)
1626 goto release;
1627 if ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO))) {
1628 error = EWOULDBLOCK;
1629 goto release;
1630 }
1631 sbunlock(&so->so_rcv, 1);
1632 #ifdef EVEN_MORE_LOCKING_DEBUG
1633 if (socket_debug)
1634 printf("Waiting for socket data\n");
1635 #endif
1636
1637 error = sbwait(&so->so_rcv);
1638 #ifdef EVEN_MORE_LOCKING_DEBUG
1639 if (socket_debug)
1640 printf("SORECEIVE - sbwait returned %d\n", error);
1641 #endif
1642 if (so->so_usecount < 1)
1643 panic("soreceive: after 2nd sblock so=%x ref=%d on socket\n", so, so->so_usecount);
1644 if (error) {
1645 socket_unlock(so, 1);
1646 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1647 return (error);
1648 }
1649 goto restart;
1650 }
1651 dontblock:
1652 #ifndef __APPLE__
1653 if (uio->uio_procp)
1654 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
1655 #else /* __APPLE__ */
1656 /*
1657 * 2207985
1658 * This should be uio->uio-procp; however, some callers of this
1659 * function use auto variables with stack garbage, and fail to
1660 * fill out the uio structure properly.
1661 */
1662 if (p)
1663 p->p_stats->p_ru.ru_msgrcv++;
1664 #endif /* __APPLE__ */
1665 nextrecord = m->m_nextpkt;
1666 if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) {
1667 KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
1668 orig_resid = 0;
1669 if (psa) {
1670 *psa = dup_sockaddr(mtod(m, struct sockaddr *),
1671 mp0 == 0);
1672 if ((*psa == 0) && (flags & MSG_NEEDSA)) {
1673 error = EWOULDBLOCK;
1674 goto release;
1675 }
1676 }
1677 if (flags & MSG_PEEK) {
1678 m = m->m_next;
1679 } else {
1680 sbfree(&so->so_rcv, m);
1681 if (m->m_next == 0 && so->so_rcv.sb_cc != 0)
1682 panic("soreceive: about to create invalid socketbuf");
1683 MFREE(m, so->so_rcv.sb_mb);
1684 m = so->so_rcv.sb_mb;
1685 }
1686 }
1687 while (m && m->m_type == MT_CONTROL && error == 0) {
1688 if (flags & MSG_PEEK) {
1689 if (controlp)
1690 *controlp = m_copy(m, 0, m->m_len);
1691 m = m->m_next;
1692 } else {
1693 sbfree(&so->so_rcv, m);
1694 if (controlp) {
1695 if (pr->pr_domain->dom_externalize &&
1696 mtod(m, struct cmsghdr *)->cmsg_type ==
1697 SCM_RIGHTS) {
1698 socket_unlock(so, 0); /* release socket lock: see 3903171 */
1699 error = (*pr->pr_domain->dom_externalize)(m);
1700 socket_lock(so, 0);
1701 }
1702 *controlp = m;
1703 if (m->m_next == 0 && so->so_rcv.sb_cc != 0)
1704 panic("soreceive: so->so_rcv.sb_mb->m_next == 0 && so->so_rcv.sb_cc != 0");
1705 so->so_rcv.sb_mb = m->m_next;
1706 m->m_next = 0;
1707 m = so->so_rcv.sb_mb;
1708 } else {
1709 MFREE(m, so->so_rcv.sb_mb);
1710 m = so->so_rcv.sb_mb;
1711 }
1712 }
1713 if (controlp) {
1714 orig_resid = 0;
1715 controlp = &(*controlp)->m_next;
1716 }
1717 }
1718 if (m) {
1719 if ((flags & MSG_PEEK) == 0)
1720 m->m_nextpkt = nextrecord;
1721 type = m->m_type;
1722 if (type == MT_OOBDATA)
1723 flags |= MSG_OOB;
1724 }
1725 moff = 0;
1726 offset = 0;
1727
1728 if (!(flags & MSG_PEEK) && uio_resid(uio) > sorecvmincopy)
1729 can_delay = 1;
1730 else
1731 can_delay = 0;
1732
1733 need_event = 0;
1734
1735 while (m && (uio_resid(uio) - delayed_copy_len) > 0 && error == 0) {
1736 if (m->m_type == MT_OOBDATA) {
1737 if (type != MT_OOBDATA)
1738 break;
1739 } else if (type == MT_OOBDATA)
1740 break;
1741 #ifndef __APPLE__
1742 /*
1743 * This assertion needs rework. The trouble is Appletalk is uses many
1744 * mbuf types (NOT listed in mbuf.h!) which will trigger this panic.
1745 * For now just remove the assertion... CSM 9/98
1746 */
1747 else
1748 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
1749 ("receive 3"));
1750 #else
1751 /*
1752 * Make sure to allways set MSG_OOB event when getting
1753 * out of band data inline.
1754 */
1755 if ((so->so_options & SO_WANTOOBFLAG) != 0 &&
1756 (so->so_options & SO_OOBINLINE) != 0 &&
1757 (so->so_state & SS_RCVATMARK) != 0) {
1758 flags |= MSG_OOB;
1759 }
1760 #endif
1761 so->so_state &= ~SS_RCVATMARK;
1762 // LP64todo - fix this!
1763 len = uio_resid(uio) - delayed_copy_len;
1764 if (so->so_oobmark && len > so->so_oobmark - offset)
1765 len = so->so_oobmark - offset;
1766 if (len > m->m_len - moff)
1767 len = m->m_len - moff;
1768 /*
1769 * If mp is set, just pass back the mbufs.
1770 * Otherwise copy them out via the uio, then free.
1771 * Sockbuf must be consistent here (points to current mbuf,
1772 * it points to next record) when we drop priority;
1773 * we must note any additions to the sockbuf when we
1774 * block interrupts again.
1775 */
1776 if (mp == 0) {
1777 if (can_delay && len == m->m_len) {
1778 /*
1779 * only delay the copy if we're consuming the
1780 * mbuf and we're NOT in MSG_PEEK mode
1781 * and we have enough data to make it worthwile
1782 * to drop and retake the funnel... can_delay
1783 * reflects the state of the 2 latter constraints
1784 * moff should always be zero in these cases
1785 */
1786 delayed_copy_len += len;
1787 } else {
1788
1789 if (delayed_copy_len) {
1790 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1791
1792 if (error) {
1793 goto release;
1794 }
1795 if (m != so->so_rcv.sb_mb) {
1796 /*
1797 * can only get here if MSG_PEEK is not set
1798 * therefore, m should point at the head of the rcv queue...
1799 * if it doesn't, it means something drastically changed
1800 * while we were out from behind the funnel in sodelayed_copy...
1801 * perhaps a RST on the stream... in any event, the stream has
1802 * been interrupted... it's probably best just to return
1803 * whatever data we've moved and let the caller sort it out...
1804 */
1805 break;
1806 }
1807 }
1808 socket_unlock(so, 0);
1809 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
1810 socket_lock(so, 0);
1811
1812 if (error)
1813 goto release;
1814 }
1815 } else
1816 uio_setresid(uio, (uio_resid(uio) - len));
1817
1818 if (len == m->m_len - moff) {
1819 if (m->m_flags & M_EOR)
1820 flags |= MSG_EOR;
1821 if (flags & MSG_PEEK) {
1822 m = m->m_next;
1823 moff = 0;
1824 } else {
1825 nextrecord = m->m_nextpkt;
1826 sbfree(&so->so_rcv, m);
1827 m->m_nextpkt = NULL;
1828
1829 if (mp) {
1830 *mp = m;
1831 mp = &m->m_next;
1832 so->so_rcv.sb_mb = m = m->m_next;
1833 *mp = (struct mbuf *)0;
1834 } else {
1835 if (free_list == NULL)
1836 free_list = m;
1837 else
1838 ml->m_next = m;
1839 ml = m;
1840 so->so_rcv.sb_mb = m = m->m_next;
1841 ml->m_next = 0;
1842 }
1843 if (m)
1844 m->m_nextpkt = nextrecord;
1845 }
1846 } else {
1847 if (flags & MSG_PEEK)
1848 moff += len;
1849 else {
1850 if (mp)
1851 *mp = m_copym(m, 0, len, M_WAIT);
1852 m->m_data += len;
1853 m->m_len -= len;
1854 so->so_rcv.sb_cc -= len;
1855 }
1856 }
1857 if (so->so_oobmark) {
1858 if ((flags & MSG_PEEK) == 0) {
1859 so->so_oobmark -= len;
1860 if (so->so_oobmark == 0) {
1861 so->so_state |= SS_RCVATMARK;
1862 /*
1863 * delay posting the actual event until after
1864 * any delayed copy processing has finished
1865 */
1866 need_event = 1;
1867 break;
1868 }
1869 } else {
1870 offset += len;
1871 if (offset == so->so_oobmark)
1872 break;
1873 }
1874 }
1875 if (flags & MSG_EOR)
1876 break;
1877 /*
1878 * If the MSG_WAITALL or MSG_WAITSTREAM flag is set (for non-atomic socket),
1879 * we must not quit until "uio->uio_resid == 0" or an error
1880 * termination. If a signal/timeout occurs, return
1881 * with a short count but without error.
1882 * Keep sockbuf locked against other readers.
1883 */
1884 while (flags & (MSG_WAITALL|MSG_WAITSTREAM) && m == 0 && (uio_resid(uio) - delayed_copy_len) > 0 &&
1885 !sosendallatonce(so) && !nextrecord) {
1886 if (so->so_error || so->so_state & SS_CANTRCVMORE)
1887 goto release;
1888
1889 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb && (((struct inpcb *)so->so_pcb)->inp_state != INPCB_STATE_DEAD))
1890 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1891 if (sbwait(&so->so_rcv)) {
1892 error = 0;
1893 goto release;
1894 }
1895 /*
1896 * have to wait until after we get back from the sbwait to do the copy because
1897 * we will drop the funnel if we have enough data that has been delayed... by dropping
1898 * the funnel we open up a window allowing the netisr thread to process the incoming packets
1899 * and to change the state of this socket... we're issuing the sbwait because
1900 * the socket is empty and we're expecting the netisr thread to wake us up when more
1901 * packets arrive... if we allow that processing to happen and then sbwait, we
1902 * could stall forever with packets sitting in the socket if no further packets
1903 * arrive from the remote side.
1904 *
1905 * we want to copy before we've collected all the data to satisfy this request to
1906 * allow the copy to overlap the incoming packet processing on an MP system
1907 */
1908 if (delayed_copy_len > sorecvmincopy && (delayed_copy_len > (so->so_rcv.sb_hiwat / 2))) {
1909
1910 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1911
1912 if (error)
1913 goto release;
1914 }
1915 m = so->so_rcv.sb_mb;
1916 if (m) {
1917 nextrecord = m->m_nextpkt;
1918 }
1919 }
1920 }
1921 #ifdef MORE_LOCKING_DEBUG
1922 if (so->so_usecount <= 1)
1923 panic("soreceive: after big while so=%x ref=%d on socket\n", so, so->so_usecount);
1924 #endif
1925
1926 if (m && pr->pr_flags & PR_ATOMIC) {
1927 #ifdef __APPLE__
1928 if (so->so_options & SO_DONTTRUNC)
1929 flags |= MSG_RCVMORE;
1930 else {
1931 #endif
1932 flags |= MSG_TRUNC;
1933 if ((flags & MSG_PEEK) == 0)
1934 (void) sbdroprecord(&so->so_rcv);
1935 #ifdef __APPLE__
1936 }
1937 #endif
1938 }
1939 if ((flags & MSG_PEEK) == 0) {
1940 if (m == 0)
1941 so->so_rcv.sb_mb = nextrecord;
1942 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
1943 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1944 }
1945 #ifdef __APPLE__
1946 if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0)
1947 flags |= MSG_HAVEMORE;
1948
1949 if (delayed_copy_len) {
1950 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1951
1952 if (error)
1953 goto release;
1954 }
1955 if (free_list) {
1956 m_freem_list((struct mbuf *)free_list);
1957 free_list = (struct mbuf *)0;
1958 }
1959 if (need_event)
1960 postevent(so, 0, EV_OOB);
1961 #endif
1962 if (orig_resid == uio_resid(uio) && orig_resid &&
1963 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
1964 sbunlock(&so->so_rcv, 1);
1965 goto restart;
1966 }
1967
1968 if (flagsp)
1969 *flagsp |= flags;
1970 release:
1971 #ifdef MORE_LOCKING_DEBUG
1972 if (so->so_usecount <= 1)
1973 panic("soreceive: release so=%x ref=%d on socket\n", so, so->so_usecount);
1974 #endif
1975 if (delayed_copy_len) {
1976 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1977 }
1978 if (free_list) {
1979 m_freem_list((struct mbuf *)free_list);
1980 }
1981 sbunlock(&so->so_rcv, 0); /* will unlock socket */
1982
1983 // LP64todo - fix this!
1984 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END,
1985 so,
1986 uio_resid(uio),
1987 so->so_rcv.sb_cc,
1988 0,
1989 error);
1990
1991 return (error);
1992 }
1993
1994
1995 static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid)
1996 {
1997 int error = 0;
1998 struct mbuf *m;
1999
2000 m = *free_list;
2001
2002 socket_unlock(so, 0);
2003
2004 while (m && error == 0) {
2005
2006 error = uiomove(mtod(m, caddr_t), (int)m->m_len, uio);
2007
2008 m = m->m_next;
2009 }
2010 m_freem_list(*free_list);
2011
2012 *free_list = (struct mbuf *)NULL;
2013 *resid = 0;
2014
2015 socket_lock(so, 0);
2016
2017 return (error);
2018 }
2019
2020
2021 int
2022 soshutdown(so, how)
2023 register struct socket *so;
2024 int how;
2025 {
2026 register struct protosw *pr = so->so_proto;
2027 int ret;
2028
2029 socket_lock(so, 1);
2030
2031 sflt_notify(so, sock_evt_shutdown, &how);
2032
2033 if (how != SHUT_WR) {
2034 sorflush(so);
2035 postevent(so, 0, EV_RCLOSED);
2036 }
2037 if (how != SHUT_RD) {
2038 ret = ((*pr->pr_usrreqs->pru_shutdown)(so));
2039 postevent(so, 0, EV_WCLOSED);
2040 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
2041 socket_unlock(so, 1);
2042 return(ret);
2043 }
2044
2045 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
2046 socket_unlock(so, 1);
2047 return (0);
2048 }
2049
2050 void
2051 sorflush(so)
2052 register struct socket *so;
2053 {
2054 register struct sockbuf *sb = &so->so_rcv;
2055 register struct protosw *pr = so->so_proto;
2056 struct sockbuf asb;
2057
2058 #ifdef MORE_LOCKING_DEBUG
2059 lck_mtx_t * mutex_held;
2060
2061 if (so->so_proto->pr_getlock != NULL)
2062 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2063 else
2064 mutex_held = so->so_proto->pr_domain->dom_mtx;
2065 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2066 #endif
2067
2068 sflt_notify(so, sock_evt_flush_read, NULL);
2069
2070 sb->sb_flags |= SB_NOINTR;
2071 (void) sblock(sb, M_WAIT);
2072 socantrcvmore(so);
2073 sbunlock(sb, 1);
2074 #ifdef __APPLE__
2075 selthreadclear(&sb->sb_sel);
2076 #endif
2077 asb = *sb;
2078 bzero((caddr_t)sb, sizeof (*sb));
2079 sb->sb_so = so; /* reestablish link to socket */
2080 if (asb.sb_flags & SB_KNOTE) {
2081 sb->sb_sel.si_note = asb.sb_sel.si_note;
2082 sb->sb_flags = SB_KNOTE;
2083 }
2084 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
2085 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
2086 sbrelease(&asb);
2087 }
2088
2089 /*
2090 * Perhaps this routine, and sooptcopyout(), below, ought to come in
2091 * an additional variant to handle the case where the option value needs
2092 * to be some kind of integer, but not a specific size.
2093 * In addition to their use here, these functions are also called by the
2094 * protocol-level pr_ctloutput() routines.
2095 */
2096 int
2097 sooptcopyin(sopt, buf, len, minlen)
2098 struct sockopt *sopt;
2099 void *buf;
2100 size_t len;
2101 size_t minlen;
2102 {
2103 size_t valsize;
2104
2105 /*
2106 * If the user gives us more than we wanted, we ignore it,
2107 * but if we don't get the minimum length the caller
2108 * wants, we return EINVAL. On success, sopt->sopt_valsize
2109 * is set to however much we actually retrieved.
2110 */
2111 if ((valsize = sopt->sopt_valsize) < minlen)
2112 return EINVAL;
2113 if (valsize > len)
2114 sopt->sopt_valsize = valsize = len;
2115
2116 if (sopt->sopt_p != 0)
2117 return (copyin(sopt->sopt_val, buf, valsize));
2118
2119 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), buf, valsize);
2120 return 0;
2121 }
2122
2123 int
2124 sosetopt(so, sopt)
2125 struct socket *so;
2126 struct sockopt *sopt;
2127 {
2128 int error, optval;
2129 struct linger l;
2130 struct timeval tv;
2131 short val;
2132
2133 socket_lock(so, 1);
2134
2135 if (sopt->sopt_dir != SOPT_SET) {
2136 sopt->sopt_dir = SOPT_SET;
2137 }
2138
2139 {
2140 struct socket_filter_entry *filter;
2141 int filtered = 0;
2142 error = 0;
2143 for (filter = so->so_filt; filter && (error == 0);
2144 filter = filter->sfe_next_onsocket) {
2145 if (filter->sfe_filter->sf_filter.sf_setoption) {
2146 if (filtered == 0) {
2147 filtered = 1;
2148 sflt_use(so);
2149 socket_unlock(so, 0);
2150 }
2151 error = filter->sfe_filter->sf_filter.sf_setoption(
2152 filter->sfe_cookie, so, sopt);
2153 }
2154 }
2155
2156 if (filtered != 0) {
2157 socket_lock(so, 0);
2158 sflt_unuse(so);
2159
2160 if (error) {
2161 if (error == EJUSTRETURN)
2162 error = 0;
2163 goto bad;
2164 }
2165 }
2166 }
2167
2168 error = 0;
2169 if (sopt->sopt_level != SOL_SOCKET) {
2170 if (so->so_proto && so->so_proto->pr_ctloutput) {
2171 error = (*so->so_proto->pr_ctloutput)
2172 (so, sopt);
2173 socket_unlock(so, 1);
2174 return (error);
2175 }
2176 error = ENOPROTOOPT;
2177 } else {
2178 switch (sopt->sopt_name) {
2179 case SO_LINGER:
2180 case SO_LINGER_SEC:
2181 error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
2182 if (error)
2183 goto bad;
2184
2185 so->so_linger = (sopt->sopt_name == SO_LINGER) ? l.l_linger : l.l_linger * hz;
2186 if (l.l_onoff)
2187 so->so_options |= SO_LINGER;
2188 else
2189 so->so_options &= ~SO_LINGER;
2190 break;
2191
2192 case SO_DEBUG:
2193 case SO_KEEPALIVE:
2194 case SO_DONTROUTE:
2195 case SO_USELOOPBACK:
2196 case SO_BROADCAST:
2197 case SO_REUSEADDR:
2198 case SO_REUSEPORT:
2199 case SO_OOBINLINE:
2200 case SO_TIMESTAMP:
2201 #ifdef __APPLE__
2202 case SO_DONTTRUNC:
2203 case SO_WANTMORE:
2204 case SO_WANTOOBFLAG:
2205 #endif
2206 error = sooptcopyin(sopt, &optval, sizeof optval,
2207 sizeof optval);
2208 if (error)
2209 goto bad;
2210 if (optval)
2211 so->so_options |= sopt->sopt_name;
2212 else
2213 so->so_options &= ~sopt->sopt_name;
2214 break;
2215
2216 case SO_SNDBUF:
2217 case SO_RCVBUF:
2218 case SO_SNDLOWAT:
2219 case SO_RCVLOWAT:
2220 error = sooptcopyin(sopt, &optval, sizeof optval,
2221 sizeof optval);
2222 if (error)
2223 goto bad;
2224
2225 /*
2226 * Values < 1 make no sense for any of these
2227 * options, so disallow them.
2228 */
2229 if (optval < 1) {
2230 error = EINVAL;
2231 goto bad;
2232 }
2233
2234 switch (sopt->sopt_name) {
2235 case SO_SNDBUF:
2236 case SO_RCVBUF:
2237 if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
2238 &so->so_snd : &so->so_rcv,
2239 (u_long) optval) == 0) {
2240 error = ENOBUFS;
2241 goto bad;
2242 }
2243 break;
2244
2245 /*
2246 * Make sure the low-water is never greater than
2247 * the high-water.
2248 */
2249 case SO_SNDLOWAT:
2250 so->so_snd.sb_lowat =
2251 (optval > so->so_snd.sb_hiwat) ?
2252 so->so_snd.sb_hiwat : optval;
2253 break;
2254 case SO_RCVLOWAT:
2255 so->so_rcv.sb_lowat =
2256 (optval > so->so_rcv.sb_hiwat) ?
2257 so->so_rcv.sb_hiwat : optval;
2258 break;
2259 }
2260 break;
2261
2262 case SO_SNDTIMEO:
2263 case SO_RCVTIMEO:
2264 error = sooptcopyin(sopt, &tv, sizeof tv,
2265 sizeof tv);
2266 if (error)
2267 goto bad;
2268
2269 if (tv.tv_sec < 0 || tv.tv_sec > LONG_MAX ||
2270 tv.tv_usec < 0 || tv.tv_usec >= 1000000) {
2271 error = EDOM;
2272 goto bad;
2273 }
2274
2275 switch (sopt->sopt_name) {
2276 case SO_SNDTIMEO:
2277 so->so_snd.sb_timeo = tv;
2278 break;
2279 case SO_RCVTIMEO:
2280 so->so_rcv.sb_timeo = tv;
2281 break;
2282 }
2283 break;
2284
2285 case SO_NKE:
2286 {
2287 struct so_nke nke;
2288
2289 error = sooptcopyin(sopt, &nke,
2290 sizeof nke, sizeof nke);
2291 if (error)
2292 goto bad;
2293
2294 error = sflt_attach_private(so, NULL, nke.nke_handle, 1);
2295 break;
2296 }
2297
2298 case SO_NOSIGPIPE:
2299 error = sooptcopyin(sopt, &optval, sizeof optval,
2300 sizeof optval);
2301 if (error)
2302 goto bad;
2303 if (optval)
2304 so->so_flags |= SOF_NOSIGPIPE;
2305 else
2306 so->so_flags &= ~SOF_NOSIGPIPE;
2307
2308 break;
2309
2310 case SO_NOADDRERR:
2311 error = sooptcopyin(sopt, &optval, sizeof optval,
2312 sizeof optval);
2313 if (error)
2314 goto bad;
2315 if (optval)
2316 so->so_flags |= SOF_NOADDRAVAIL;
2317 else
2318 so->so_flags &= ~SOF_NOADDRAVAIL;
2319
2320 break;
2321
2322 default:
2323 error = ENOPROTOOPT;
2324 break;
2325 }
2326 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
2327 (void) ((*so->so_proto->pr_ctloutput)
2328 (so, sopt));
2329 }
2330 }
2331 bad:
2332 socket_unlock(so, 1);
2333 return (error);
2334 }
2335
2336 /* Helper routine for getsockopt */
2337 int
2338 sooptcopyout(sopt, buf, len)
2339 struct sockopt *sopt;
2340 void *buf;
2341 size_t len;
2342 {
2343 int error;
2344 size_t valsize;
2345
2346 error = 0;
2347
2348 /*
2349 * Documented get behavior is that we always return a value,
2350 * possibly truncated to fit in the user's buffer.
2351 * Traditional behavior is that we always tell the user
2352 * precisely how much we copied, rather than something useful
2353 * like the total amount we had available for her.
2354 * Note that this interface is not idempotent; the entire answer must
2355 * generated ahead of time.
2356 */
2357 valsize = min(len, sopt->sopt_valsize);
2358 sopt->sopt_valsize = valsize;
2359 if (sopt->sopt_val != USER_ADDR_NULL) {
2360 if (sopt->sopt_p != 0)
2361 error = copyout(buf, sopt->sopt_val, valsize);
2362 else
2363 bcopy(buf, CAST_DOWN(caddr_t, sopt->sopt_val), valsize);
2364 }
2365 return error;
2366 }
2367
2368 int
2369 sogetopt(so, sopt)
2370 struct socket *so;
2371 struct sockopt *sopt;
2372 {
2373 int error, optval;
2374 struct linger l;
2375 struct timeval tv;
2376
2377 if (sopt->sopt_dir != SOPT_GET) {
2378 sopt->sopt_dir = SOPT_GET;
2379 }
2380
2381 socket_lock(so, 1);
2382
2383 {
2384 struct socket_filter_entry *filter;
2385 int filtered = 0;
2386 error = 0;
2387 for (filter = so->so_filt; filter && (error == 0);
2388 filter = filter->sfe_next_onsocket) {
2389 if (filter->sfe_filter->sf_filter.sf_getoption) {
2390 if (filtered == 0) {
2391 filtered = 1;
2392 sflt_use(so);
2393 socket_unlock(so, 0);
2394 }
2395 error = filter->sfe_filter->sf_filter.sf_getoption(
2396 filter->sfe_cookie, so, sopt);
2397 }
2398 }
2399 if (filtered != 0) {
2400 socket_lock(so, 0);
2401 sflt_unuse(so);
2402
2403 if (error) {
2404 if (error == EJUSTRETURN)
2405 error = 0;
2406 socket_unlock(so, 1);
2407 return error;
2408 }
2409 }
2410 }
2411
2412 error = 0;
2413 if (sopt->sopt_level != SOL_SOCKET) {
2414 if (so->so_proto && so->so_proto->pr_ctloutput) {
2415 error = (*so->so_proto->pr_ctloutput)
2416 (so, sopt);
2417 socket_unlock(so, 1);
2418 return (error);
2419 } else {
2420 socket_unlock(so, 1);
2421 return (ENOPROTOOPT);
2422 }
2423 } else {
2424 switch (sopt->sopt_name) {
2425 case SO_LINGER:
2426 case SO_LINGER_SEC:
2427 l.l_onoff = so->so_options & SO_LINGER;
2428 l.l_linger = (sopt->sopt_name == SO_LINGER) ? so->so_linger :
2429 so->so_linger / hz;
2430 error = sooptcopyout(sopt, &l, sizeof l);
2431 break;
2432
2433 case SO_USELOOPBACK:
2434 case SO_DONTROUTE:
2435 case SO_DEBUG:
2436 case SO_KEEPALIVE:
2437 case SO_REUSEADDR:
2438 case SO_REUSEPORT:
2439 case SO_BROADCAST:
2440 case SO_OOBINLINE:
2441 case SO_TIMESTAMP:
2442 #ifdef __APPLE__
2443 case SO_DONTTRUNC:
2444 case SO_WANTMORE:
2445 case SO_WANTOOBFLAG:
2446 #endif
2447 optval = so->so_options & sopt->sopt_name;
2448 integer:
2449 error = sooptcopyout(sopt, &optval, sizeof optval);
2450 break;
2451
2452 case SO_TYPE:
2453 optval = so->so_type;
2454 goto integer;
2455
2456 #ifdef __APPLE__
2457 case SO_NREAD:
2458 {
2459 int pkt_total;
2460 struct mbuf *m1;
2461
2462 pkt_total = 0;
2463 m1 = so->so_rcv.sb_mb;
2464 if (so->so_proto->pr_flags & PR_ATOMIC)
2465 {
2466 while (m1) {
2467 if (m1->m_type == MT_DATA)
2468 pkt_total += m1->m_len;
2469 m1 = m1->m_next;
2470 }
2471 optval = pkt_total;
2472 } else
2473 optval = so->so_rcv.sb_cc;
2474 goto integer;
2475 }
2476 case SO_NWRITE:
2477 optval = so->so_snd.sb_cc;
2478 goto integer;
2479 #endif
2480 case SO_ERROR:
2481 optval = so->so_error;
2482 so->so_error = 0;
2483 goto integer;
2484
2485 case SO_SNDBUF:
2486 optval = so->so_snd.sb_hiwat;
2487 goto integer;
2488
2489 case SO_RCVBUF:
2490 optval = so->so_rcv.sb_hiwat;
2491 goto integer;
2492
2493 case SO_SNDLOWAT:
2494 optval = so->so_snd.sb_lowat;
2495 goto integer;
2496
2497 case SO_RCVLOWAT:
2498 optval = so->so_rcv.sb_lowat;
2499 goto integer;
2500
2501 case SO_SNDTIMEO:
2502 case SO_RCVTIMEO:
2503 tv = (sopt->sopt_name == SO_SNDTIMEO ?
2504 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
2505
2506 error = sooptcopyout(sopt, &tv, sizeof tv);
2507 break;
2508
2509 case SO_NOSIGPIPE:
2510 optval = (so->so_flags & SOF_NOSIGPIPE);
2511 goto integer;
2512
2513 case SO_NOADDRERR:
2514 optval = (so->so_flags & SOF_NOADDRAVAIL);
2515 goto integer;
2516
2517 default:
2518 error = ENOPROTOOPT;
2519 break;
2520 }
2521 socket_unlock(so, 1);
2522 return (error);
2523 }
2524 }
2525
2526 /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
2527 int
2528 soopt_getm(struct sockopt *sopt, struct mbuf **mp)
2529 {
2530 struct mbuf *m, *m_prev;
2531 int sopt_size = sopt->sopt_valsize;
2532
2533 if (sopt_size > MAX_SOOPTGETM_SIZE)
2534 return EMSGSIZE;
2535
2536 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2537 if (m == 0)
2538 return ENOBUFS;
2539 if (sopt_size > MLEN) {
2540 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2541 if ((m->m_flags & M_EXT) == 0) {
2542 m_free(m);
2543 return ENOBUFS;
2544 }
2545 m->m_len = min(MCLBYTES, sopt_size);
2546 } else {
2547 m->m_len = min(MLEN, sopt_size);
2548 }
2549 sopt_size -= m->m_len;
2550 *mp = m;
2551 m_prev = m;
2552
2553 while (sopt_size) {
2554 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2555 if (m == 0) {
2556 m_freem(*mp);
2557 return ENOBUFS;
2558 }
2559 if (sopt_size > MLEN) {
2560 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2561 if ((m->m_flags & M_EXT) == 0) {
2562 m_freem(*mp);
2563 return ENOBUFS;
2564 }
2565 m->m_len = min(MCLBYTES, sopt_size);
2566 } else {
2567 m->m_len = min(MLEN, sopt_size);
2568 }
2569 sopt_size -= m->m_len;
2570 m_prev->m_next = m;
2571 m_prev = m;
2572 }
2573 return 0;
2574 }
2575
2576 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
2577 int
2578 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
2579 {
2580 struct mbuf *m0 = m;
2581
2582 if (sopt->sopt_val == USER_ADDR_NULL)
2583 return 0;
2584 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2585 if (sopt->sopt_p != NULL) {
2586 int error;
2587
2588 error = copyin(sopt->sopt_val, mtod(m, char *), m->m_len);
2589 if (error != 0) {
2590 m_freem(m0);
2591 return(error);
2592 }
2593 } else
2594 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), mtod(m, char *), m->m_len);
2595 sopt->sopt_valsize -= m->m_len;
2596 sopt->sopt_val += m->m_len;
2597 m = m->m_next;
2598 }
2599 if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
2600 panic("soopt_mcopyin");
2601 return 0;
2602 }
2603
2604 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
2605 int
2606 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
2607 {
2608 struct mbuf *m0 = m;
2609 size_t valsize = 0;
2610
2611 if (sopt->sopt_val == USER_ADDR_NULL)
2612 return 0;
2613 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2614 if (sopt->sopt_p != NULL) {
2615 int error;
2616
2617 error = copyout(mtod(m, char *), sopt->sopt_val, m->m_len);
2618 if (error != 0) {
2619 m_freem(m0);
2620 return(error);
2621 }
2622 } else
2623 bcopy(mtod(m, char *), CAST_DOWN(caddr_t, sopt->sopt_val), m->m_len);
2624 sopt->sopt_valsize -= m->m_len;
2625 sopt->sopt_val += m->m_len;
2626 valsize += m->m_len;
2627 m = m->m_next;
2628 }
2629 if (m != NULL) {
2630 /* enough soopt buffer should be given from user-land */
2631 m_freem(m0);
2632 return(EINVAL);
2633 }
2634 sopt->sopt_valsize = valsize;
2635 return 0;
2636 }
2637
2638 void
2639 sohasoutofband(so)
2640 register struct socket *so;
2641 {
2642 struct proc *p;
2643
2644 if (so->so_pgid < 0)
2645 gsignal(-so->so_pgid, SIGURG);
2646 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
2647 psignal(p, SIGURG);
2648 selwakeup(&so->so_rcv.sb_sel);
2649 }
2650
2651 int
2652 sopoll(struct socket *so, int events, __unused kauth_cred_t cred, void * wql)
2653 {
2654 struct proc *p = current_proc();
2655 int revents = 0;
2656
2657 socket_lock(so, 1);
2658
2659 if (events & (POLLIN | POLLRDNORM))
2660 if (soreadable(so))
2661 revents |= events & (POLLIN | POLLRDNORM);
2662
2663 if (events & (POLLOUT | POLLWRNORM))
2664 if (sowriteable(so))
2665 revents |= events & (POLLOUT | POLLWRNORM);
2666
2667 if (events & (POLLPRI | POLLRDBAND))
2668 if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
2669 revents |= events & (POLLPRI | POLLRDBAND);
2670
2671 if (revents == 0) {
2672 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
2673 /* Darwin sets the flag first, BSD calls selrecord first */
2674 so->so_rcv.sb_flags |= SB_SEL;
2675 selrecord(p, &so->so_rcv.sb_sel, wql);
2676 }
2677
2678 if (events & (POLLOUT | POLLWRNORM)) {
2679 /* Darwin sets the flag first, BSD calls selrecord first */
2680 so->so_snd.sb_flags |= SB_SEL;
2681 selrecord(p, &so->so_snd.sb_sel, wql);
2682 }
2683 }
2684
2685 socket_unlock(so, 1);
2686 return (revents);
2687 }
2688
2689 int soo_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p);
2690
2691 int
2692 soo_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct proc *p)
2693 {
2694 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2695 struct sockbuf *sb;
2696 socket_lock(so, 1);
2697
2698 switch (kn->kn_filter) {
2699 case EVFILT_READ:
2700 if (so->so_options & SO_ACCEPTCONN)
2701 kn->kn_fop = &solisten_filtops;
2702 else
2703 kn->kn_fop = &soread_filtops;
2704 sb = &so->so_rcv;
2705 break;
2706 case EVFILT_WRITE:
2707 kn->kn_fop = &sowrite_filtops;
2708 sb = &so->so_snd;
2709 break;
2710 default:
2711 socket_unlock(so, 1);
2712 return (1);
2713 }
2714
2715 if (KNOTE_ATTACH(&sb->sb_sel.si_note, kn))
2716 sb->sb_flags |= SB_KNOTE;
2717 socket_unlock(so, 1);
2718 return (0);
2719 }
2720
2721 static void
2722 filt_sordetach(struct knote *kn)
2723 {
2724 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2725
2726 socket_lock(so, 1);
2727 if (so->so_rcv.sb_flags & SB_KNOTE)
2728 if (KNOTE_DETACH(&so->so_rcv.sb_sel.si_note, kn))
2729 so->so_rcv.sb_flags &= ~SB_KNOTE;
2730 socket_unlock(so, 1);
2731 }
2732
2733 /*ARGSUSED*/
2734 static int
2735 filt_soread(struct knote *kn, long hint)
2736 {
2737 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2738
2739 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2740 socket_lock(so, 1);
2741
2742 if (so->so_oobmark) {
2743 if (kn->kn_flags & EV_OOBAND) {
2744 kn->kn_data = so->so_rcv.sb_cc - so->so_oobmark;
2745 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2746 socket_unlock(so, 1);
2747 return (1);
2748 }
2749 kn->kn_data = so->so_oobmark;
2750 kn->kn_flags |= EV_OOBAND;
2751 } else {
2752 kn->kn_data = so->so_rcv.sb_cc;
2753 if (so->so_state & SS_CANTRCVMORE) {
2754 kn->kn_flags |= EV_EOF;
2755 kn->kn_fflags = so->so_error;
2756 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2757 socket_unlock(so, 1);
2758 return (1);
2759 }
2760 }
2761
2762 if (so->so_state & SS_RCVATMARK) {
2763 if (kn->kn_flags & EV_OOBAND) {
2764 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2765 socket_unlock(so, 1);
2766 return (1);
2767 }
2768 kn->kn_flags |= EV_OOBAND;
2769 } else if (kn->kn_flags & EV_OOBAND) {
2770 kn->kn_data = 0;
2771 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2772 socket_unlock(so, 1);
2773 return (0);
2774 }
2775
2776 if (so->so_error) { /* temporary udp error */
2777 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2778 socket_unlock(so, 1);
2779 return (1);
2780 }
2781
2782 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2783 socket_unlock(so, 1);
2784
2785 return( kn->kn_flags & EV_OOBAND ||
2786 kn->kn_data >= ((kn->kn_sfflags & NOTE_LOWAT) ?
2787 kn->kn_sdata : so->so_rcv.sb_lowat));
2788 }
2789
2790 static void
2791 filt_sowdetach(struct knote *kn)
2792 {
2793 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2794 socket_lock(so, 1);
2795
2796 if(so->so_snd.sb_flags & SB_KNOTE)
2797 if (KNOTE_DETACH(&so->so_snd.sb_sel.si_note, kn))
2798 so->so_snd.sb_flags &= ~SB_KNOTE;
2799 socket_unlock(so, 1);
2800 }
2801
2802 /*ARGSUSED*/
2803 static int
2804 filt_sowrite(struct knote *kn, long hint)
2805 {
2806 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2807
2808 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2809 socket_lock(so, 1);
2810
2811 kn->kn_data = sbspace(&so->so_snd);
2812 if (so->so_state & SS_CANTSENDMORE) {
2813 kn->kn_flags |= EV_EOF;
2814 kn->kn_fflags = so->so_error;
2815 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2816 socket_unlock(so, 1);
2817 return (1);
2818 }
2819 if (so->so_error) { /* temporary udp error */
2820 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2821 socket_unlock(so, 1);
2822 return (1);
2823 }
2824 if (((so->so_state & SS_ISCONNECTED) == 0) &&
2825 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
2826 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2827 socket_unlock(so, 1);
2828 return (0);
2829 }
2830 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2831 socket_unlock(so, 1);
2832 if (kn->kn_sfflags & NOTE_LOWAT)
2833 return (kn->kn_data >= kn->kn_sdata);
2834 return (kn->kn_data >= so->so_snd.sb_lowat);
2835 }
2836
2837 /*ARGSUSED*/
2838 static int
2839 filt_solisten(struct knote *kn, long hint)
2840 {
2841 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2842 int isempty;
2843
2844 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2845 socket_lock(so, 1);
2846 kn->kn_data = so->so_qlen;
2847 isempty = ! TAILQ_EMPTY(&so->so_comp);
2848 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2849 socket_unlock(so, 1);
2850 return (isempty);
2851 }
2852
2853
2854 int
2855 socket_lock(so, refcount)
2856 struct socket *so;
2857 int refcount;
2858 {
2859 int error = 0, lr_saved;
2860
2861 lr_saved = (unsigned int) __builtin_return_address(0);
2862
2863 if (so->so_proto->pr_lock) {
2864 error = (*so->so_proto->pr_lock)(so, refcount, lr_saved);
2865 }
2866 else {
2867 #ifdef MORE_LOCKING_DEBUG
2868 lck_mtx_assert(so->so_proto->pr_domain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED);
2869 #endif
2870 lck_mtx_lock(so->so_proto->pr_domain->dom_mtx);
2871 if (refcount)
2872 so->so_usecount++;
2873 so->lock_lr[so->next_lock_lr] = (void *)lr_saved;
2874 so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX;
2875 }
2876
2877 return(error);
2878
2879 }
2880
2881 int
2882 socket_unlock(so, refcount)
2883 struct socket *so;
2884 int refcount;
2885 {
2886 int error = 0, lr_saved;
2887 lck_mtx_t * mutex_held;
2888
2889 lr_saved = (unsigned int) __builtin_return_address(0);
2890
2891 if (so->so_proto == NULL)
2892 panic("socket_unlock null so_proto so=%x\n", so);
2893
2894 if (so && so->so_proto->pr_unlock)
2895 error = (*so->so_proto->pr_unlock)(so, refcount, lr_saved);
2896 else {
2897 mutex_held = so->so_proto->pr_domain->dom_mtx;
2898 #ifdef MORE_LOCKING_DEBUG
2899 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2900 #endif
2901 so->unlock_lr[so->next_unlock_lr] = (void *)lr_saved;
2902 so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;
2903
2904 if (refcount) {
2905 if (so->so_usecount <= 0)
2906 panic("socket_unlock: bad refcount so=%x value=%d\n", so, so->so_usecount);
2907 so->so_usecount--;
2908 if (so->so_usecount == 0) {
2909 sofreelastref(so, 1);
2910 }
2911 }
2912 lck_mtx_unlock(mutex_held);
2913 }
2914
2915 return(error);
2916 }
2917 //### Called with socket locked, will unlock socket
2918 void
2919 sofree(so)
2920 struct socket *so;
2921 {
2922
2923 lck_mtx_t * mutex_held;
2924 if (so->so_proto->pr_getlock != NULL)
2925 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2926 else
2927 mutex_held = so->so_proto->pr_domain->dom_mtx;
2928 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2929
2930 sofreelastref(so, 0);
2931 }
2932
2933 void
2934 soreference(so)
2935 struct socket *so;
2936 {
2937 socket_lock(so, 1); /* locks & take one reference on socket */
2938 socket_unlock(so, 0); /* unlock only */
2939 }
2940
2941 void
2942 sodereference(so)
2943 struct socket *so;
2944 {
2945 socket_lock(so, 0);
2946 socket_unlock(so, 1);
2947 }