]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_socket.c
xnu-792.13.8.tar.gz
[apple/xnu.git] / bsd / kern / uipc_socket.c
1 /*
2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
14 * agreement.
15 *
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
18 * file.
19 *
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
27 *
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
29 */
30 /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
31 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
32 /*
33 * Copyright (c) 1982, 1986, 1988, 1990, 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
65 * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.16 2001/06/14 20:46:06 ume Exp $
66 */
67
68 #include <sys/param.h>
69 #include <sys/systm.h>
70 #include <sys/filedesc.h>
71 #include <sys/proc_internal.h>
72 #include <sys/kauth.h>
73 #include <sys/file_internal.h>
74 #include <sys/fcntl.h>
75 #include <sys/malloc.h>
76 #include <sys/mbuf.h>
77 #include <sys/domain.h>
78 #include <sys/kernel.h>
79 #include <sys/event.h>
80 #include <sys/poll.h>
81 #include <sys/protosw.h>
82 #include <sys/socket.h>
83 #include <sys/socketvar.h>
84 #include <sys/resourcevar.h>
85 #include <sys/signalvar.h>
86 #include <sys/sysctl.h>
87 #include <sys/uio.h>
88 #include <sys/ev.h>
89 #include <sys/kdebug.h>
90 #include <net/route.h>
91 #include <netinet/in.h>
92 #include <netinet/in_pcb.h>
93 #include <kern/zalloc.h>
94 #include <kern/locks.h>
95 #include <machine/limits.h>
96
97 int so_cache_hw = 0;
98 int so_cache_timeouts = 0;
99 int so_cache_max_freed = 0;
100 int cached_sock_count = 0;
101 struct socket *socket_cache_head = 0;
102 struct socket *socket_cache_tail = 0;
103 u_long so_cache_time = 0;
104 int so_cache_init_done = 0;
105 struct zone *so_cache_zone;
106 extern int get_inpcb_str_size();
107 extern int get_tcp_str_size();
108
109 static lck_grp_t *so_cache_mtx_grp;
110 static lck_attr_t *so_cache_mtx_attr;
111 static lck_grp_attr_t *so_cache_mtx_grp_attr;
112 lck_mtx_t *so_cache_mtx;
113
114 #include <machine/limits.h>
115
116 static void filt_sordetach(struct knote *kn);
117 static int filt_soread(struct knote *kn, long hint);
118 static void filt_sowdetach(struct knote *kn);
119 static int filt_sowrite(struct knote *kn, long hint);
120 static int filt_solisten(struct knote *kn, long hint);
121
122 static struct filterops solisten_filtops =
123 { 1, NULL, filt_sordetach, filt_solisten };
124 static struct filterops soread_filtops =
125 { 1, NULL, filt_sordetach, filt_soread };
126 static struct filterops sowrite_filtops =
127 { 1, NULL, filt_sowdetach, filt_sowrite };
128
129 #define EVEN_MORE_LOCKING_DEBUG 0
130 int socket_debug = 0;
131 int socket_zone = M_SOCKET;
132 so_gen_t so_gencnt; /* generation count for sockets */
133
134 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
135 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
136
137 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
138 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
139 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
140 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
141 #define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1)
142 #define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8))
143 #define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8))
144
145 #define MAX_SOOPTGETM_SIZE (128 * MCLBYTES)
146
147
148 SYSCTL_DECL(_kern_ipc);
149
150 static int somaxconn = SOMAXCONN;
151 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn,
152 0, "");
153
154 /* Should we get a maximum also ??? */
155 static int sosendmaxchain = 65536;
156 static int sosendminchain = 16384;
157 static int sorecvmincopy = 16384;
158 SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW, &sosendminchain,
159 0, "");
160 SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, CTLFLAG_RW, &sorecvmincopy,
161 0, "");
162
163 void so_cache_timer();
164
165 /*
166 * Socket operation routines.
167 * These routines are called by the routines in
168 * sys_socket.c or from a system process, and
169 * implement the semantics of socket operations by
170 * switching out to the protocol specific routines.
171 */
172
173 #ifdef __APPLE__
174
175 vm_size_t so_cache_zone_element_size;
176
177 static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid);
178
179
180 void socketinit()
181 {
182 vm_size_t str_size;
183
184 if (so_cache_init_done) {
185 printf("socketinit: already called...\n");
186 return;
187 }
188
189 /*
190 * allocate lock group attribute and group for socket cache mutex
191 */
192 so_cache_mtx_grp_attr = lck_grp_attr_alloc_init();
193
194 so_cache_mtx_grp = lck_grp_alloc_init("so_cache", so_cache_mtx_grp_attr);
195
196 /*
197 * allocate the lock attribute for socket cache mutex
198 */
199 so_cache_mtx_attr = lck_attr_alloc_init();
200
201 so_cache_init_done = 1;
202
203 so_cache_mtx = lck_mtx_alloc_init(so_cache_mtx_grp, so_cache_mtx_attr); /* cached sockets mutex */
204
205 if (so_cache_mtx == NULL)
206 return; /* we're hosed... */
207
208 str_size = (vm_size_t)( sizeof(struct socket) + 4 +
209 get_inpcb_str_size() + 4 +
210 get_tcp_str_size());
211 so_cache_zone = zinit (str_size, 120000*str_size, 8192, "socache zone");
212 #if TEMPDEBUG
213 printf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size);
214 #endif
215 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
216
217 so_cache_zone_element_size = str_size;
218
219 sflt_init();
220
221 }
222
223 void cached_sock_alloc(so, waitok)
224 struct socket **so;
225 int waitok;
226
227 {
228 caddr_t temp;
229 register u_long offset;
230
231
232 lck_mtx_lock(so_cache_mtx);
233
234 if (cached_sock_count) {
235 cached_sock_count--;
236 *so = socket_cache_head;
237 if (*so == 0)
238 panic("cached_sock_alloc: cached sock is null");
239
240 socket_cache_head = socket_cache_head->cache_next;
241 if (socket_cache_head)
242 socket_cache_head->cache_prev = 0;
243 else
244 socket_cache_tail = 0;
245
246 lck_mtx_unlock(so_cache_mtx);
247
248 temp = (*so)->so_saved_pcb;
249 bzero((caddr_t)*so, sizeof(struct socket));
250 #if TEMPDEBUG
251 kprintf("cached_sock_alloc - retreiving cached sock %x - count == %d\n", *so,
252 cached_sock_count);
253 #endif
254 (*so)->so_saved_pcb = temp;
255 (*so)->cached_in_sock_layer = 1;
256
257 }
258 else {
259 #if TEMPDEBUG
260 kprintf("Allocating cached sock %x from memory\n", *so);
261 #endif
262
263 lck_mtx_unlock(so_cache_mtx);
264
265 if (waitok)
266 *so = (struct socket *) zalloc(so_cache_zone);
267 else
268 *so = (struct socket *) zalloc_noblock(so_cache_zone);
269
270 if (*so == 0)
271 return;
272
273 bzero((caddr_t)*so, sizeof(struct socket));
274
275 /*
276 * Define offsets for extra structures into our single block of
277 * memory. Align extra structures on longword boundaries.
278 */
279
280
281 offset = (u_long) *so;
282 offset += sizeof(struct socket);
283 if (offset & 0x3) {
284 offset += 4;
285 offset &= 0xfffffffc;
286 }
287 (*so)->so_saved_pcb = (caddr_t) offset;
288 offset += get_inpcb_str_size();
289 if (offset & 0x3) {
290 offset += 4;
291 offset &= 0xfffffffc;
292 }
293
294 ((struct inpcb *) (*so)->so_saved_pcb)->inp_saved_ppcb = (caddr_t) offset;
295 #if TEMPDEBUG
296 kprintf("Allocating cached socket - %x, pcb=%x tcpcb=%x\n", *so,
297 (*so)->so_saved_pcb,
298 ((struct inpcb *)(*so)->so_saved_pcb)->inp_saved_ppcb);
299 #endif
300 }
301
302 (*so)->cached_in_sock_layer = 1;
303 }
304
305
306 void cached_sock_free(so)
307 struct socket *so;
308 {
309
310 lck_mtx_lock(so_cache_mtx);
311
312 if (++cached_sock_count > MAX_CACHED_SOCKETS) {
313 --cached_sock_count;
314 lck_mtx_unlock(so_cache_mtx);
315 #if TEMPDEBUG
316 kprintf("Freeing overflowed cached socket %x\n", so);
317 #endif
318 zfree(so_cache_zone, so);
319 }
320 else {
321 #if TEMPDEBUG
322 kprintf("Freeing socket %x into cache\n", so);
323 #endif
324 if (so_cache_hw < cached_sock_count)
325 so_cache_hw = cached_sock_count;
326
327 so->cache_next = socket_cache_head;
328 so->cache_prev = 0;
329 if (socket_cache_head)
330 socket_cache_head->cache_prev = so;
331 else
332 socket_cache_tail = so;
333
334 so->cache_timestamp = so_cache_time;
335 socket_cache_head = so;
336 lck_mtx_unlock(so_cache_mtx);
337 }
338
339 #if TEMPDEBUG
340 kprintf("Freed cached sock %x into cache - count is %d\n", so, cached_sock_count);
341 #endif
342
343
344 }
345
346
347 void so_cache_timer()
348 {
349 register struct socket *p;
350 register int n_freed = 0;
351
352
353 lck_mtx_lock(so_cache_mtx);
354
355 ++so_cache_time;
356
357 while ( (p = socket_cache_tail) )
358 {
359 if ((so_cache_time - p->cache_timestamp) < SO_CACHE_TIME_LIMIT)
360 break;
361
362 so_cache_timeouts++;
363
364 if ( (socket_cache_tail = p->cache_prev) )
365 p->cache_prev->cache_next = 0;
366 if (--cached_sock_count == 0)
367 socket_cache_head = 0;
368
369
370 zfree(so_cache_zone, p);
371
372 if (++n_freed >= SO_CACHE_MAX_FREE_BATCH)
373 {
374 so_cache_max_freed++;
375 break;
376 }
377 }
378 lck_mtx_unlock(so_cache_mtx);
379
380 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
381
382
383 }
384 #endif /* __APPLE__ */
385
386 /*
387 * Get a socket structure from our zone, and initialize it.
388 * We don't implement `waitok' yet (see comments in uipc_domain.c).
389 * Note that it would probably be better to allocate socket
390 * and PCB at the same time, but I'm not convinced that all
391 * the protocols can be easily modified to do this.
392 */
393 struct socket *
394 soalloc(waitok, dom, type)
395 int waitok;
396 int dom;
397 int type;
398 {
399 struct socket *so;
400
401 if ((dom == PF_INET) && (type == SOCK_STREAM))
402 cached_sock_alloc(&so, waitok);
403 else
404 {
405 MALLOC_ZONE(so, struct socket *, sizeof(*so), socket_zone, M_WAITOK);
406 if (so)
407 bzero(so, sizeof *so);
408 }
409 /* XXX race condition for reentrant kernel */
410 //###LD Atomic add for so_gencnt
411 if (so) {
412 so->so_gencnt = ++so_gencnt;
413 so->so_zone = socket_zone;
414 }
415
416 return so;
417 }
418
419 int
420 socreate(dom, aso, type, proto)
421 int dom;
422 struct socket **aso;
423 register int type;
424 int proto;
425 {
426 struct proc *p = current_proc();
427 register struct protosw *prp;
428 register struct socket *so;
429 register int error = 0;
430 #if TCPDEBUG
431 extern int tcpconsdebug;
432 #endif
433 if (proto)
434 prp = pffindproto(dom, proto, type);
435 else
436 prp = pffindtype(dom, type);
437
438 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
439 return (EPROTONOSUPPORT);
440 #ifndef __APPLE__
441
442 if (p->p_prison && jail_socket_unixiproute_only &&
443 prp->pr_domain->dom_family != PF_LOCAL &&
444 prp->pr_domain->dom_family != PF_INET &&
445 prp->pr_domain->dom_family != PF_ROUTE) {
446 return (EPROTONOSUPPORT);
447 }
448
449 #endif
450 if (prp->pr_type != type)
451 return (EPROTOTYPE);
452 so = soalloc(p != 0, dom, type);
453 if (so == 0)
454 return (ENOBUFS);
455
456 TAILQ_INIT(&so->so_incomp);
457 TAILQ_INIT(&so->so_comp);
458 so->so_type = type;
459
460 #ifdef __APPLE__
461 if (p != 0) {
462 so->so_uid = kauth_cred_getuid(kauth_cred_get());
463 if (!suser(kauth_cred_get(),NULL))
464 so->so_state = SS_PRIV;
465 }
466 #else
467 so->so_cred = kauth_cred_get_with_ref();
468 #endif
469 so->so_proto = prp;
470 #ifdef __APPLE__
471 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
472 so->so_rcv.sb_so = so->so_snd.sb_so = so;
473 #endif
474 so->next_lock_lr = 0;
475 so->next_unlock_lr = 0;
476
477
478 //### Attachement will create the per pcb lock if necessary and increase refcount
479 so->so_usecount++; /* for creation, make sure it's done before socket is inserted in lists */
480
481 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
482 if (error) {
483 /*
484 * Warning:
485 * If so_pcb is not zero, the socket will be leaked,
486 * so protocol attachment handler must be coded carefuly
487 */
488 so->so_state |= SS_NOFDREF;
489 so->so_usecount--;
490 sofreelastref(so, 1); /* will deallocate the socket */
491 return (error);
492 }
493 #ifdef __APPLE__
494 prp->pr_domain->dom_refs++;
495 TAILQ_INIT(&so->so_evlist);
496
497 /* Attach socket filters for this protocol */
498 sflt_initsock(so);
499 #if TCPDEBUG
500 if (tcpconsdebug == 2)
501 so->so_options |= SO_DEBUG;
502 #endif
503 #endif
504
505 *aso = so;
506 return (0);
507 }
508
509 int
510 sobind(so, nam)
511 struct socket *so;
512 struct sockaddr *nam;
513
514 {
515 struct proc *p = current_proc();
516 int error = 0;
517 struct socket_filter_entry *filter;
518 int filtered = 0;
519
520 socket_lock(so, 1);
521
522 /* Socket filter */
523 error = 0;
524 for (filter = so->so_filt; filter && (error == 0);
525 filter = filter->sfe_next_onsocket) {
526 if (filter->sfe_filter->sf_filter.sf_bind) {
527 if (filtered == 0) {
528 filtered = 1;
529 sflt_use(so);
530 socket_unlock(so, 0);
531 }
532 error = filter->sfe_filter->sf_filter.sf_bind(
533 filter->sfe_cookie, so, nam);
534 }
535 }
536 if (filtered != 0) {
537 socket_lock(so, 0);
538 sflt_unuse(so);
539 }
540 /* End socket filter */
541
542 if (error == 0)
543 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
544
545 socket_unlock(so, 1);
546
547 if (error == EJUSTRETURN)
548 error = 0;
549
550 return (error);
551 }
552
553 void
554 sodealloc(so)
555 struct socket *so;
556 {
557 so->so_gencnt = ++so_gencnt;
558
559 #ifndef __APPLE__
560 if (so->so_rcv.sb_hiwat)
561 (void)chgsbsize(so->so_cred->cr_uidinfo,
562 &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY);
563 if (so->so_snd.sb_hiwat)
564 (void)chgsbsize(so->so_cred->cr_uidinfo,
565 &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
566 #ifdef INET
567 if (so->so_accf != NULL) {
568 if (so->so_accf->so_accept_filter != NULL &&
569 so->so_accf->so_accept_filter->accf_destroy != NULL) {
570 so->so_accf->so_accept_filter->accf_destroy(so);
571 }
572 if (so->so_accf->so_accept_filter_str != NULL)
573 FREE(so->so_accf->so_accept_filter_str, M_ACCF);
574 FREE(so->so_accf, M_ACCF);
575 }
576 #endif /* INET */
577 kauth_cred_rele(so->so_cred);
578 zfreei(so->so_zone, so);
579 #else
580 if (so->cached_in_sock_layer == 1)
581 cached_sock_free(so);
582 else {
583 if (so->cached_in_sock_layer == -1)
584 panic("sodealloc: double dealloc: so=%x\n", so);
585 so->cached_in_sock_layer = -1;
586 FREE_ZONE(so, sizeof(*so), so->so_zone);
587 }
588 #endif /* __APPLE__ */
589 }
590
591 int
592 solisten(so, backlog)
593 register struct socket *so;
594 int backlog;
595
596 {
597 struct proc *p = current_proc();
598 int error;
599
600 socket_lock(so, 1);
601
602 {
603 struct socket_filter_entry *filter;
604 int filtered = 0;
605 error = 0;
606 for (filter = so->so_filt; filter && (error == 0);
607 filter = filter->sfe_next_onsocket) {
608 if (filter->sfe_filter->sf_filter.sf_listen) {
609 if (filtered == 0) {
610 filtered = 1;
611 sflt_use(so);
612 socket_unlock(so, 0);
613 }
614 error = filter->sfe_filter->sf_filter.sf_listen(
615 filter->sfe_cookie, so);
616 }
617 }
618 if (filtered != 0) {
619 socket_lock(so, 0);
620 sflt_unuse(so);
621 }
622 }
623
624 if (error == 0) {
625 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
626 }
627
628 if (error) {
629 socket_unlock(so, 1);
630 if (error == EJUSTRETURN)
631 error = 0;
632 return (error);
633 }
634
635 if (TAILQ_EMPTY(&so->so_comp))
636 so->so_options |= SO_ACCEPTCONN;
637 if (backlog < 0 || backlog > somaxconn)
638 backlog = somaxconn;
639 so->so_qlimit = backlog;
640
641 socket_unlock(so, 1);
642 return (0);
643 }
644
645 void
646 sofreelastref(so, dealloc)
647 register struct socket *so;
648 int dealloc;
649 {
650 int error;
651 struct socket *head = so->so_head;
652
653 /*### Assume socket is locked */
654
655 /* Remove any filters - may be called more than once */
656 sflt_termsock(so);
657
658 if ((!(so->so_flags & SOF_PCBCLEARING)) || ((so->so_state & SS_NOFDREF) == 0)) {
659 #ifdef __APPLE__
660 selthreadclear(&so->so_snd.sb_sel);
661 selthreadclear(&so->so_rcv.sb_sel);
662 so->so_rcv.sb_flags &= ~SB_UPCALL;
663 so->so_snd.sb_flags &= ~SB_UPCALL;
664 #endif
665 return;
666 }
667 if (head != NULL) {
668 socket_lock(head, 1);
669 if (so->so_state & SS_INCOMP) {
670 TAILQ_REMOVE(&head->so_incomp, so, so_list);
671 head->so_incqlen--;
672 } else if (so->so_state & SS_COMP) {
673 /*
674 * We must not decommission a socket that's
675 * on the accept(2) queue. If we do, then
676 * accept(2) may hang after select(2) indicated
677 * that the listening socket was ready.
678 */
679 #ifdef __APPLE__
680 selthreadclear(&so->so_snd.sb_sel);
681 selthreadclear(&so->so_rcv.sb_sel);
682 so->so_rcv.sb_flags &= ~SB_UPCALL;
683 so->so_snd.sb_flags &= ~SB_UPCALL;
684 #endif
685 socket_unlock(head, 1);
686 return;
687 } else {
688 panic("sofree: not queued");
689 }
690 head->so_qlen--;
691 so->so_state &= ~SS_INCOMP;
692 so->so_head = NULL;
693 socket_unlock(head, 1);
694 }
695 #ifdef __APPLE__
696 selthreadclear(&so->so_snd.sb_sel);
697 sbrelease(&so->so_snd);
698 #endif
699 sorflush(so);
700
701 /* 3932268: disable upcall */
702 so->so_rcv.sb_flags &= ~SB_UPCALL;
703 so->so_snd.sb_flags &= ~SB_UPCALL;
704
705 if (dealloc)
706 sodealloc(so);
707 }
708
709 /*
710 * Close a socket on last file table reference removal.
711 * Initiate disconnect if connected.
712 * Free socket when disconnect complete.
713 */
714 int
715 soclose_locked(so)
716 register struct socket *so;
717 {
718 int error = 0;
719 lck_mtx_t * mutex_held;
720 struct timespec ts;
721
722 if (so->so_usecount == 0) {
723 panic("soclose: so=%x refcount=0\n", so);
724 }
725
726 sflt_notify(so, sock_evt_closing, NULL);
727
728 if ((so->so_options & SO_ACCEPTCONN)) {
729 struct socket *sp;
730
731 /* We do not want new connection to be added to the connection queues */
732 so->so_options &= ~SO_ACCEPTCONN;
733
734 while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) {
735 /* A bit tricky here. We need to keep
736 * a lock if it's a protocol global lock
737 * but we want the head, not the socket locked
738 * in the case of per-socket lock...
739 */
740 if (so->so_proto->pr_getlock != NULL) {
741 socket_unlock(so, 0);
742 socket_lock(sp, 1);
743 }
744 (void) soabort(sp);
745 if (so->so_proto->pr_getlock != NULL) {
746 socket_unlock(sp, 1);
747 socket_lock(so, 0);
748 }
749 }
750
751 while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) {
752 /* Dequeue from so_comp since sofree() won't do it */
753 TAILQ_REMOVE(&so->so_comp, sp, so_list);
754 so->so_qlen--;
755
756 if (so->so_proto->pr_getlock != NULL) {
757 socket_unlock(so, 0);
758 socket_lock(sp, 1);
759 }
760
761 sp->so_state &= ~SS_COMP;
762 sp->so_head = NULL;
763
764 (void) soabort(sp);
765 if (so->so_proto->pr_getlock != NULL) {
766 socket_unlock(sp, 1);
767 socket_lock(so, 0);
768 }
769 }
770 }
771 if (so->so_pcb == 0) {
772 /* 3915887: mark the socket as ready for dealloc */
773 so->so_flags |= SOF_PCBCLEARING;
774 goto discard;
775 }
776 if (so->so_state & SS_ISCONNECTED) {
777 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
778 error = sodisconnectlocked(so);
779 if (error)
780 goto drop;
781 }
782 if (so->so_options & SO_LINGER) {
783 if ((so->so_state & SS_ISDISCONNECTING) &&
784 (so->so_state & SS_NBIO))
785 goto drop;
786 if (so->so_proto->pr_getlock != NULL)
787 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
788 else
789 mutex_held = so->so_proto->pr_domain->dom_mtx;
790 while (so->so_state & SS_ISCONNECTED) {
791 ts.tv_sec = (so->so_linger/100);
792 ts.tv_nsec = (so->so_linger % 100) * NSEC_PER_USEC * 1000 * 10;
793 error = msleep((caddr_t)&so->so_timeo, mutex_held,
794 PSOCK | PCATCH, "soclos", &ts);
795 if (error) {
796 /* It's OK when the time fires, don't report an error */
797 if (error == EWOULDBLOCK)
798 error = 0;
799 break;
800 }
801 }
802 }
803 }
804 drop:
805 if (so->so_usecount == 0)
806 panic("soclose: usecount is zero so=%x\n", so);
807 if (so->so_pcb && !(so->so_flags & SOF_PCBCLEARING)) {
808 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
809 if (error == 0)
810 error = error2;
811 }
812 if (so->so_usecount <= 0)
813 panic("soclose: usecount is zero so=%x\n", so);
814 discard:
815 if (so->so_pcb && so->so_state & SS_NOFDREF)
816 panic("soclose: NOFDREF");
817 so->so_state |= SS_NOFDREF;
818 #ifdef __APPLE__
819 so->so_proto->pr_domain->dom_refs--;
820 evsofree(so);
821 #endif
822 so->so_usecount--;
823 sofree(so);
824 return (error);
825 }
826
827 int
828 soclose(so)
829 register struct socket *so;
830 {
831 int error = 0;
832 socket_lock(so, 1);
833 if (so->so_retaincnt == 0)
834 error = soclose_locked(so);
835 else { /* if the FD is going away, but socket is retained in kernel remove its reference */
836 so->so_usecount--;
837 if (so->so_usecount < 2)
838 panic("soclose: retaincnt non null and so=%x usecount=%x\n", so->so_usecount);
839 }
840 socket_unlock(so, 1);
841 return (error);
842 }
843
844
845 /*
846 * Must be called at splnet...
847 */
848 //#### Should already be locked
849 int
850 soabort(so)
851 struct socket *so;
852 {
853 int error;
854
855 #ifdef MORE_LOCKING_DEBUG
856 lck_mtx_t * mutex_held;
857
858 if (so->so_proto->pr_getlock != NULL)
859 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
860 else
861 mutex_held = so->so_proto->pr_domain->dom_mtx;
862 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
863 #endif
864
865 error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
866 if (error) {
867 sofree(so);
868 return error;
869 }
870 return (0);
871 }
872
873 int
874 soacceptlock(so, nam, dolock)
875 register struct socket *so;
876 struct sockaddr **nam;
877 int dolock;
878 {
879 int error;
880
881 if (dolock) socket_lock(so, 1);
882
883 if ((so->so_state & SS_NOFDREF) == 0)
884 panic("soaccept: !NOFDREF");
885 so->so_state &= ~SS_NOFDREF;
886 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
887
888 if (dolock) socket_unlock(so, 1);
889 return (error);
890 }
891 int
892 soaccept(so, nam)
893 register struct socket *so;
894 struct sockaddr **nam;
895 {
896 return (soacceptlock(so, nam, 1));
897 }
898
899 int
900 soconnectlock(so, nam, dolock)
901 register struct socket *so;
902 struct sockaddr *nam;
903 int dolock;
904
905 {
906 int s;
907 int error;
908 struct proc *p = current_proc();
909
910 if (dolock) socket_lock(so, 1);
911
912 if (so->so_options & SO_ACCEPTCONN) {
913 if (dolock) socket_unlock(so, 1);
914 return (EOPNOTSUPP);
915 }
916 /*
917 * If protocol is connection-based, can only connect once.
918 * Otherwise, if connected, try to disconnect first.
919 * This allows user to disconnect by connecting to, e.g.,
920 * a null address.
921 */
922 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
923 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
924 (error = sodisconnectlocked(so))))
925 error = EISCONN;
926 else {
927 /*
928 * Run connect filter before calling protocol:
929 * - non-blocking connect returns before completion;
930 */
931 {
932 struct socket_filter_entry *filter;
933 int filtered = 0;
934 error = 0;
935 for (filter = so->so_filt; filter && (error == 0);
936 filter = filter->sfe_next_onsocket) {
937 if (filter->sfe_filter->sf_filter.sf_connect_out) {
938 if (filtered == 0) {
939 filtered = 1;
940 sflt_use(so);
941 socket_unlock(so, 0);
942 }
943 error = filter->sfe_filter->sf_filter.sf_connect_out(
944 filter->sfe_cookie, so, nam);
945 }
946 }
947 if (filtered != 0) {
948 socket_lock(so, 0);
949 sflt_unuse(so);
950 }
951 }
952 if (error) {
953 if (error == EJUSTRETURN)
954 error = 0;
955 if (dolock) socket_unlock(so, 1);
956 return error;
957 }
958
959 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
960 }
961 if (dolock) socket_unlock(so, 1);
962 return (error);
963 }
964
965 int
966 soconnect(so, nam)
967 register struct socket *so;
968 struct sockaddr *nam;
969 {
970 return (soconnectlock(so, nam, 1));
971 }
972
973 int
974 soconnect2(so1, so2)
975 register struct socket *so1;
976 struct socket *so2;
977 {
978 int error;
979
980 socket_lock(so1, 1);
981 if (so2->so_proto->pr_lock)
982 socket_lock(so2, 1);
983
984 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
985
986 socket_unlock(so1, 1);
987 if (so2->so_proto->pr_lock)
988 socket_unlock(so2, 1);
989 return (error);
990 }
991
992
993 int
994 sodisconnectlocked(so)
995 register struct socket *so;
996 {
997 int error;
998
999 if ((so->so_state & SS_ISCONNECTED) == 0) {
1000 error = ENOTCONN;
1001 goto bad;
1002 }
1003 if (so->so_state & SS_ISDISCONNECTING) {
1004 error = EALREADY;
1005 goto bad;
1006 }
1007
1008 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
1009
1010 if (error == 0) {
1011 sflt_notify(so, sock_evt_disconnected, NULL);
1012 }
1013
1014 bad:
1015 return (error);
1016 }
1017 //### Locking version
1018 int
1019 sodisconnect(so)
1020 register struct socket *so;
1021 {
1022 int error;
1023
1024 socket_lock(so, 1);
1025 error = sodisconnectlocked(so);
1026 socket_unlock(so, 1);
1027 return(error);
1028 }
1029
1030 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_DONTWAIT : M_WAIT)
1031
1032 /*
1033 * sosendcheck will lock the socket buffer if it isn't locked and
1034 * verify that there is space for the data being inserted.
1035 */
1036
1037 static int
1038 sosendcheck(
1039 struct socket *so,
1040 struct sockaddr *addr,
1041 long resid,
1042 long clen,
1043 long atomic,
1044 int flags,
1045 int *sblocked)
1046 {
1047 int error = 0;
1048 long space;
1049 int assumelock = 0;
1050
1051 restart:
1052 if (*sblocked == 0) {
1053 if ((so->so_snd.sb_flags & SB_LOCK) != 0 &&
1054 so->so_send_filt_thread != 0 &&
1055 so->so_send_filt_thread == current_thread()) {
1056 /*
1057 * We're being called recursively from a filter,
1058 * allow this to continue. Radar 4150520.
1059 * Don't set sblocked because we don't want
1060 * to perform an unlock later.
1061 */
1062 assumelock = 1;
1063 }
1064 else {
1065 error = sblock(&so->so_snd, SBLOCKWAIT(flags));
1066 if (error) {
1067 return error;
1068 }
1069 *sblocked = 1;
1070 }
1071 }
1072
1073 if (so->so_state & SS_CANTSENDMORE)
1074 return EPIPE;
1075
1076 if (so->so_error) {
1077 error = so->so_error;
1078 so->so_error = 0;
1079 return error;
1080 }
1081
1082 if ((so->so_state & SS_ISCONNECTED) == 0) {
1083 /*
1084 * `sendto' and `sendmsg' is allowed on a connection-
1085 * based socket if it supports implied connect.
1086 * Return ENOTCONN if not connected and no address is
1087 * supplied.
1088 */
1089 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
1090 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
1091 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
1092 !(resid == 0 && clen != 0))
1093 return ENOTCONN;
1094 } else if (addr == 0 && !(flags&MSG_HOLD))
1095 return (so->so_proto->pr_flags & PR_CONNREQUIRED) ? ENOTCONN : EDESTADDRREQ;
1096 }
1097 space = sbspace(&so->so_snd);
1098 if (flags & MSG_OOB)
1099 space += 1024;
1100 if ((atomic && resid > so->so_snd.sb_hiwat) ||
1101 clen > so->so_snd.sb_hiwat)
1102 return EMSGSIZE;
1103 if (space < resid + clen &&
1104 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
1105 if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO) || assumelock) {
1106 return EWOULDBLOCK;
1107 }
1108 sbunlock(&so->so_snd, 1);
1109 error = sbwait(&so->so_snd);
1110 if (error) {
1111 return error;
1112 }
1113 goto restart;
1114 }
1115
1116 return 0;
1117 }
1118
1119 /*
1120 * Send on a socket.
1121 * If send must go all at once and message is larger than
1122 * send buffering, then hard error.
1123 * Lock against other senders.
1124 * If must go all at once and not enough room now, then
1125 * inform user that this would block and do nothing.
1126 * Otherwise, if nonblocking, send as much as possible.
1127 * The data to be sent is described by "uio" if nonzero,
1128 * otherwise by the mbuf chain "top" (which must be null
1129 * if uio is not). Data provided in mbuf chain must be small
1130 * enough to send all at once.
1131 *
1132 * Returns nonzero on error, timeout or signal; callers
1133 * must check for short counts if EINTR/ERESTART are returned.
1134 * Data and control buffers are freed on return.
1135 * Experiment:
1136 * MSG_HOLD: go thru most of sosend(), but just enqueue the mbuf
1137 * MSG_SEND: go thru as for MSG_HOLD on current fragment, then
1138 * point at the mbuf chain being constructed and go from there.
1139 */
1140 int
1141 sosend(so, addr, uio, top, control, flags)
1142 register struct socket *so;
1143 struct sockaddr *addr;
1144 struct uio *uio;
1145 struct mbuf *top;
1146 struct mbuf *control;
1147 int flags;
1148
1149 {
1150 struct mbuf **mp;
1151 register struct mbuf *m, *freelist = NULL;
1152 register long space, len, resid;
1153 int clen = 0, error, dontroute, mlen, sendflags;
1154 int atomic = sosendallatonce(so) || top;
1155 int sblocked = 0;
1156 struct proc *p = current_proc();
1157
1158 if (uio)
1159 // LP64todo - fix this!
1160 resid = uio_resid(uio);
1161 else
1162 resid = top->m_pkthdr.len;
1163
1164 KERNEL_DEBUG((DBG_FNC_SOSEND | DBG_FUNC_START),
1165 so,
1166 resid,
1167 so->so_snd.sb_cc,
1168 so->so_snd.sb_lowat,
1169 so->so_snd.sb_hiwat);
1170
1171 socket_lock(so, 1);
1172
1173 /*
1174 * In theory resid should be unsigned.
1175 * However, space must be signed, as it might be less than 0
1176 * if we over-committed, and we must use a signed comparison
1177 * of space and resid. On the other hand, a negative resid
1178 * causes us to loop sending 0-length segments to the protocol.
1179 *
1180 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
1181 * type sockets since that's an error.
1182 */
1183 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
1184 error = EINVAL;
1185 socket_unlock(so, 1);
1186 goto out;
1187 }
1188
1189 dontroute =
1190 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
1191 (so->so_proto->pr_flags & PR_ATOMIC);
1192 if (p)
1193 p->p_stats->p_ru.ru_msgsnd++;
1194 if (control)
1195 clen = control->m_len;
1196
1197 do {
1198 error = sosendcheck(so, addr, resid, clen, atomic, flags, &sblocked);
1199 if (error) {
1200 goto release;
1201 }
1202 mp = &top;
1203 space = sbspace(&so->so_snd) - clen + ((flags & MSG_OOB) ? 1024 : 0);
1204
1205 do {
1206
1207 if (uio == NULL) {
1208 /*
1209 * Data is prepackaged in "top".
1210 */
1211 resid = 0;
1212 if (flags & MSG_EOR)
1213 top->m_flags |= M_EOR;
1214 } else {
1215 int chainlength;
1216 int bytes_to_copy;
1217
1218 bytes_to_copy = min(resid, space);
1219
1220 if (sosendminchain > 0) {
1221 chainlength = 0;
1222 } else
1223 chainlength = sosendmaxchain;
1224
1225 socket_unlock(so, 0);
1226
1227 do {
1228 int num_needed;
1229 int hdrs_needed = (top == 0) ? 1 : 0;
1230
1231 /*
1232 * try to maintain a local cache of mbuf clusters needed to complete this write
1233 * the list is further limited to the number that are currently needed to fill the socket
1234 * this mechanism allows a large number of mbufs/clusters to be grabbed under a single
1235 * mbuf lock... if we can't get any clusters, than fall back to trying for mbufs
1236 * if we fail early (or miscalcluate the number needed) make sure to release any clusters
1237 * we haven't yet consumed.
1238 */
1239 if (freelist == NULL && bytes_to_copy > MCLBYTES) {
1240 num_needed = bytes_to_copy / NBPG;
1241
1242 if ((bytes_to_copy - (num_needed * NBPG)) >= MINCLSIZE)
1243 num_needed++;
1244
1245 freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, NBPG);
1246 /* Fall back to cluster size if allocation failed */
1247 }
1248
1249 if (freelist == NULL && bytes_to_copy > MINCLSIZE) {
1250 num_needed = bytes_to_copy / MCLBYTES;
1251
1252 if ((bytes_to_copy - (num_needed * MCLBYTES)) >= MINCLSIZE)
1253 num_needed++;
1254
1255 freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, MCLBYTES);
1256 /* Fall back to a single mbuf if allocation failed */
1257 }
1258
1259 if (freelist == NULL) {
1260 if (top == 0)
1261 MGETHDR(freelist, M_WAIT, MT_DATA);
1262 else
1263 MGET(freelist, M_WAIT, MT_DATA);
1264
1265 if (freelist == NULL) {
1266 error = ENOBUFS;
1267 socket_lock(so, 0);
1268 goto release;
1269 }
1270 /*
1271 * For datagram protocols, leave room
1272 * for protocol headers in first mbuf.
1273 */
1274 if (atomic && top == 0 && bytes_to_copy < MHLEN)
1275 MH_ALIGN(freelist, bytes_to_copy);
1276 }
1277 m = freelist;
1278 freelist = m->m_next;
1279 m->m_next = NULL;
1280
1281 if ((m->m_flags & M_EXT))
1282 mlen = m->m_ext.ext_size;
1283 else if ((m->m_flags & M_PKTHDR))
1284 mlen = MHLEN - m_leadingspace(m);
1285 else
1286 mlen = MLEN;
1287 len = min(mlen, bytes_to_copy);
1288
1289 chainlength += len;
1290
1291 space -= len;
1292
1293 error = uiomove(mtod(m, caddr_t), (int)len, uio);
1294
1295 // LP64todo - fix this!
1296 resid = uio_resid(uio);
1297
1298 m->m_len = len;
1299 *mp = m;
1300 top->m_pkthdr.len += len;
1301 if (error)
1302 break;
1303 mp = &m->m_next;
1304 if (resid <= 0) {
1305 if (flags & MSG_EOR)
1306 top->m_flags |= M_EOR;
1307 break;
1308 }
1309 bytes_to_copy = min(resid, space);
1310
1311 } while (space > 0 && (chainlength < sosendmaxchain || atomic || resid < MINCLSIZE));
1312
1313 socket_lock(so, 0);
1314
1315 if (error)
1316 goto release;
1317 }
1318
1319 if (flags & (MSG_HOLD|MSG_SEND))
1320 {
1321 /* Enqueue for later, go away if HOLD */
1322 register struct mbuf *mb1;
1323 if (so->so_temp && (flags & MSG_FLUSH))
1324 {
1325 m_freem(so->so_temp);
1326 so->so_temp = NULL;
1327 }
1328 if (so->so_temp)
1329 so->so_tail->m_next = top;
1330 else
1331 so->so_temp = top;
1332 mb1 = top;
1333 while (mb1->m_next)
1334 mb1 = mb1->m_next;
1335 so->so_tail = mb1;
1336 if (flags & MSG_HOLD)
1337 {
1338 top = NULL;
1339 goto release;
1340 }
1341 top = so->so_temp;
1342 }
1343 if (dontroute)
1344 so->so_options |= SO_DONTROUTE;
1345 /* Compute flags here, for pru_send and NKEs */
1346 sendflags = (flags & MSG_OOB) ? PRUS_OOB :
1347 /*
1348 * If the user set MSG_EOF, the protocol
1349 * understands this flag and nothing left to
1350 * send then use PRU_SEND_EOF instead of PRU_SEND.
1351 */
1352 ((flags & MSG_EOF) &&
1353 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
1354 (resid <= 0)) ?
1355 PRUS_EOF :
1356 /* If there is more to send set PRUS_MORETOCOME */
1357 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0;
1358
1359 /*
1360 * Socket filter processing
1361 */
1362 {
1363 struct socket_filter_entry *filter;
1364 int filtered;
1365
1366 filtered = 0;
1367 error = 0;
1368 for (filter = so->so_filt; filter && (error == 0);
1369 filter = filter->sfe_next_onsocket) {
1370 if (filter->sfe_filter->sf_filter.sf_data_out) {
1371 int so_flags = 0;
1372 if (filtered == 0) {
1373 filtered = 1;
1374 so->so_send_filt_thread = current_thread();
1375 sflt_use(so);
1376 socket_unlock(so, 0);
1377 so_flags = (sendflags & MSG_OOB) ? sock_data_filt_flag_oob : 0;
1378 }
1379 error = filter->sfe_filter->sf_filter.sf_data_out(
1380 filter->sfe_cookie, so, addr, &top, &control, so_flags);
1381 }
1382 }
1383
1384 if (filtered) {
1385 /*
1386 * At this point, we've run at least one filter.
1387 * The socket is unlocked as is the socket buffer.
1388 */
1389 socket_lock(so, 0);
1390 sflt_unuse(so);
1391 so->so_send_filt_thread = 0;
1392 if (error) {
1393 if (error == EJUSTRETURN) {
1394 error = 0;
1395 clen = 0;
1396 control = 0;
1397 top = 0;
1398 }
1399
1400 goto release;
1401 }
1402 }
1403 }
1404 /*
1405 * End Socket filter processing
1406 */
1407
1408 if (error == EJUSTRETURN) {
1409 /* A socket filter handled this data */
1410 error = 0;
1411 }
1412 else {
1413 error = (*so->so_proto->pr_usrreqs->pru_send)(so,
1414 sendflags, top, addr, control, p);
1415 }
1416 #ifdef __APPLE__
1417 if (flags & MSG_SEND)
1418 so->so_temp = NULL;
1419 #endif
1420 if (dontroute)
1421 so->so_options &= ~SO_DONTROUTE;
1422 clen = 0;
1423 control = 0;
1424 top = 0;
1425 mp = &top;
1426 if (error)
1427 goto release;
1428 } while (resid && space > 0);
1429 } while (resid);
1430
1431 release:
1432 if (sblocked)
1433 sbunlock(&so->so_snd, 0); /* will unlock socket */
1434 else
1435 socket_unlock(so, 1);
1436 out:
1437 if (top)
1438 m_freem(top);
1439 if (control)
1440 m_freem(control);
1441 if (freelist)
1442 m_freem_list(freelist);
1443
1444 KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END,
1445 so,
1446 resid,
1447 so->so_snd.sb_cc,
1448 space,
1449 error);
1450
1451 return (error);
1452 }
1453
1454 /*
1455 * Implement receive operations on a socket.
1456 * We depend on the way that records are added to the sockbuf
1457 * by sbappend*. In particular, each record (mbufs linked through m_next)
1458 * must begin with an address if the protocol so specifies,
1459 * followed by an optional mbuf or mbufs containing ancillary data,
1460 * and then zero or more mbufs of data.
1461 * In order to avoid blocking network interrupts for the entire time here,
1462 * we splx() while doing the actual copy to user space.
1463 * Although the sockbuf is locked, new data may still be appended,
1464 * and thus we must maintain consistency of the sockbuf during that time.
1465 *
1466 * The caller may receive the data as a single mbuf chain by supplying
1467 * an mbuf **mp0 for use in returning the chain. The uio is then used
1468 * only for the count in uio_resid.
1469 */
1470 int
1471 soreceive(so, psa, uio, mp0, controlp, flagsp)
1472 register struct socket *so;
1473 struct sockaddr **psa;
1474 struct uio *uio;
1475 struct mbuf **mp0;
1476 struct mbuf **controlp;
1477 int *flagsp;
1478 {
1479 register struct mbuf *m, **mp, *ml = NULL;
1480 register int flags, len, error, offset;
1481 struct protosw *pr = so->so_proto;
1482 struct mbuf *nextrecord;
1483 int moff, type = 0;
1484 // LP64todo - fix this!
1485 int orig_resid = uio_resid(uio);
1486 volatile struct mbuf *free_list;
1487 volatile int delayed_copy_len;
1488 int can_delay;
1489 int need_event;
1490 struct proc *p = current_proc();
1491
1492
1493 // LP64todo - fix this!
1494 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START,
1495 so,
1496 uio_resid(uio),
1497 so->so_rcv.sb_cc,
1498 so->so_rcv.sb_lowat,
1499 so->so_rcv.sb_hiwat);
1500
1501 socket_lock(so, 1);
1502
1503 #ifdef MORE_LOCKING_DEBUG
1504 if (so->so_usecount == 1)
1505 panic("soreceive: so=%x no other reference on socket\n", so);
1506 #endif
1507 mp = mp0;
1508 if (psa)
1509 *psa = 0;
1510 if (controlp)
1511 *controlp = 0;
1512 if (flagsp)
1513 flags = *flagsp &~ MSG_EOR;
1514 else
1515 flags = 0;
1516 /*
1517 * When SO_WANTOOBFLAG is set we try to get out-of-band data
1518 * regardless of the flags argument. Here is the case were
1519 * out-of-band data is not inline.
1520 */
1521 if ((flags & MSG_OOB) ||
1522 ((so->so_options & SO_WANTOOBFLAG) != 0 &&
1523 (so->so_options & SO_OOBINLINE) == 0 &&
1524 (so->so_oobmark || (so->so_state & SS_RCVATMARK)))) {
1525 m = m_get(M_WAIT, MT_DATA);
1526 if (m == NULL) {
1527 socket_unlock(so, 1);
1528 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, ENOBUFS,0,0,0,0);
1529 return (ENOBUFS);
1530 }
1531 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
1532 if (error)
1533 goto bad;
1534 socket_unlock(so, 0);
1535 do {
1536 // LP64todo - fix this!
1537 error = uiomove(mtod(m, caddr_t),
1538 (int) min(uio_resid(uio), m->m_len), uio);
1539 m = m_free(m);
1540 } while (uio_resid(uio) && error == 0 && m);
1541 socket_lock(so, 0);
1542 bad:
1543 if (m)
1544 m_freem(m);
1545 #ifdef __APPLE__
1546 if ((so->so_options & SO_WANTOOBFLAG) != 0) {
1547 if (error == EWOULDBLOCK || error == EINVAL) {
1548 /*
1549 * Let's try to get normal data:
1550 * EWOULDBLOCK: out-of-band data not receive yet;
1551 * EINVAL: out-of-band data already read.
1552 */
1553 error = 0;
1554 goto nooob;
1555 } else if (error == 0 && flagsp)
1556 *flagsp |= MSG_OOB;
1557 }
1558 socket_unlock(so, 1);
1559 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1560 #endif
1561 return (error);
1562 }
1563 nooob:
1564 if (mp)
1565 *mp = (struct mbuf *)0;
1566 if (so->so_state & SS_ISCONFIRMING && uio_resid(uio))
1567 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
1568
1569
1570 free_list = (struct mbuf *)0;
1571 delayed_copy_len = 0;
1572 restart:
1573 #ifdef MORE_LOCKING_DEBUG
1574 if (so->so_usecount <= 1)
1575 printf("soreceive: sblock so=%x ref=%d on socket\n", so, so->so_usecount);
1576 #endif
1577 error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
1578 if (error) {
1579 socket_unlock(so, 1);
1580 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1581 return (error);
1582 }
1583
1584 m = so->so_rcv.sb_mb;
1585 /*
1586 * If we have less data than requested, block awaiting more
1587 * (subject to any timeout) if:
1588 * 1. the current count is less than the low water mark, or
1589 * 2. MSG_WAITALL is set, and it is possible to do the entire
1590 * receive operation at once if we block (resid <= hiwat).
1591 * 3. MSG_DONTWAIT is not set
1592 * If MSG_WAITALL is set but resid is larger than the receive buffer,
1593 * we have to do the receive in sections, and thus risk returning
1594 * a short count if a timeout or signal occurs after we start.
1595 */
1596 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
1597 so->so_rcv.sb_cc < uio_resid(uio)) &&
1598 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
1599 ((flags & MSG_WAITALL) && uio_resid(uio) <= so->so_rcv.sb_hiwat)) &&
1600 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
1601
1602 KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
1603 if (so->so_error) {
1604 if (m)
1605 goto dontblock;
1606 error = so->so_error;
1607 if ((flags & MSG_PEEK) == 0)
1608 so->so_error = 0;
1609 goto release;
1610 }
1611 if (so->so_state & SS_CANTRCVMORE) {
1612 if (m)
1613 goto dontblock;
1614 else
1615 goto release;
1616 }
1617 for (; m; m = m->m_next)
1618 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
1619 m = so->so_rcv.sb_mb;
1620 goto dontblock;
1621 }
1622 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
1623 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
1624 error = ENOTCONN;
1625 goto release;
1626 }
1627 if (uio_resid(uio) == 0)
1628 goto release;
1629 if ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO))) {
1630 error = EWOULDBLOCK;
1631 goto release;
1632 }
1633 sbunlock(&so->so_rcv, 1);
1634 #ifdef EVEN_MORE_LOCKING_DEBUG
1635 if (socket_debug)
1636 printf("Waiting for socket data\n");
1637 #endif
1638
1639 error = sbwait(&so->so_rcv);
1640 #ifdef EVEN_MORE_LOCKING_DEBUG
1641 if (socket_debug)
1642 printf("SORECEIVE - sbwait returned %d\n", error);
1643 #endif
1644 if (so->so_usecount < 1)
1645 panic("soreceive: after 2nd sblock so=%x ref=%d on socket\n", so, so->so_usecount);
1646 if (error) {
1647 socket_unlock(so, 1);
1648 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1649 return (error);
1650 }
1651 goto restart;
1652 }
1653 dontblock:
1654 #ifndef __APPLE__
1655 if (uio->uio_procp)
1656 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
1657 #else /* __APPLE__ */
1658 /*
1659 * 2207985
1660 * This should be uio->uio-procp; however, some callers of this
1661 * function use auto variables with stack garbage, and fail to
1662 * fill out the uio structure properly.
1663 */
1664 if (p)
1665 p->p_stats->p_ru.ru_msgrcv++;
1666 #endif /* __APPLE__ */
1667 nextrecord = m->m_nextpkt;
1668 if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) {
1669 KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
1670 orig_resid = 0;
1671 if (psa) {
1672 *psa = dup_sockaddr(mtod(m, struct sockaddr *),
1673 mp0 == 0);
1674 if ((*psa == 0) && (flags & MSG_NEEDSA)) {
1675 error = EWOULDBLOCK;
1676 goto release;
1677 }
1678 }
1679 if (flags & MSG_PEEK) {
1680 m = m->m_next;
1681 } else {
1682 sbfree(&so->so_rcv, m);
1683 if (m->m_next == 0 && so->so_rcv.sb_cc != 0)
1684 panic("soreceive: about to create invalid socketbuf");
1685 MFREE(m, so->so_rcv.sb_mb);
1686 m = so->so_rcv.sb_mb;
1687 }
1688 }
1689 while (m && m->m_type == MT_CONTROL && error == 0) {
1690 if (flags & MSG_PEEK) {
1691 if (controlp)
1692 *controlp = m_copy(m, 0, m->m_len);
1693 m = m->m_next;
1694 } else {
1695 sbfree(&so->so_rcv, m);
1696 if (controlp) {
1697 if (pr->pr_domain->dom_externalize &&
1698 mtod(m, struct cmsghdr *)->cmsg_type ==
1699 SCM_RIGHTS) {
1700 socket_unlock(so, 0); /* release socket lock: see 3903171 */
1701 error = (*pr->pr_domain->dom_externalize)(m);
1702 socket_lock(so, 0);
1703 }
1704 *controlp = m;
1705 if (m->m_next == 0 && so->so_rcv.sb_cc != 0)
1706 panic("soreceive: so->so_rcv.sb_mb->m_next == 0 && so->so_rcv.sb_cc != 0");
1707 so->so_rcv.sb_mb = m->m_next;
1708 m->m_next = 0;
1709 m = so->so_rcv.sb_mb;
1710 } else {
1711 MFREE(m, so->so_rcv.sb_mb);
1712 m = so->so_rcv.sb_mb;
1713 }
1714 }
1715 if (controlp) {
1716 orig_resid = 0;
1717 controlp = &(*controlp)->m_next;
1718 }
1719 }
1720 if (m) {
1721 if ((flags & MSG_PEEK) == 0)
1722 m->m_nextpkt = nextrecord;
1723 type = m->m_type;
1724 if (type == MT_OOBDATA)
1725 flags |= MSG_OOB;
1726 }
1727 moff = 0;
1728 offset = 0;
1729
1730 if (!(flags & MSG_PEEK) && uio_resid(uio) > sorecvmincopy)
1731 can_delay = 1;
1732 else
1733 can_delay = 0;
1734
1735 need_event = 0;
1736
1737 while (m && (uio_resid(uio) - delayed_copy_len) > 0 && error == 0) {
1738 if (m->m_type == MT_OOBDATA) {
1739 if (type != MT_OOBDATA)
1740 break;
1741 } else if (type == MT_OOBDATA)
1742 break;
1743 #ifndef __APPLE__
1744 /*
1745 * This assertion needs rework. The trouble is Appletalk is uses many
1746 * mbuf types (NOT listed in mbuf.h!) which will trigger this panic.
1747 * For now just remove the assertion... CSM 9/98
1748 */
1749 else
1750 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
1751 ("receive 3"));
1752 #else
1753 /*
1754 * Make sure to allways set MSG_OOB event when getting
1755 * out of band data inline.
1756 */
1757 if ((so->so_options & SO_WANTOOBFLAG) != 0 &&
1758 (so->so_options & SO_OOBINLINE) != 0 &&
1759 (so->so_state & SS_RCVATMARK) != 0) {
1760 flags |= MSG_OOB;
1761 }
1762 #endif
1763 so->so_state &= ~SS_RCVATMARK;
1764 // LP64todo - fix this!
1765 len = uio_resid(uio) - delayed_copy_len;
1766 if (so->so_oobmark && len > so->so_oobmark - offset)
1767 len = so->so_oobmark - offset;
1768 if (len > m->m_len - moff)
1769 len = m->m_len - moff;
1770 /*
1771 * If mp is set, just pass back the mbufs.
1772 * Otherwise copy them out via the uio, then free.
1773 * Sockbuf must be consistent here (points to current mbuf,
1774 * it points to next record) when we drop priority;
1775 * we must note any additions to the sockbuf when we
1776 * block interrupts again.
1777 */
1778 if (mp == 0) {
1779 if (can_delay && len == m->m_len) {
1780 /*
1781 * only delay the copy if we're consuming the
1782 * mbuf and we're NOT in MSG_PEEK mode
1783 * and we have enough data to make it worthwile
1784 * to drop and retake the funnel... can_delay
1785 * reflects the state of the 2 latter constraints
1786 * moff should always be zero in these cases
1787 */
1788 delayed_copy_len += len;
1789 } else {
1790
1791 if (delayed_copy_len) {
1792 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1793
1794 if (error) {
1795 goto release;
1796 }
1797 if (m != so->so_rcv.sb_mb) {
1798 /*
1799 * can only get here if MSG_PEEK is not set
1800 * therefore, m should point at the head of the rcv queue...
1801 * if it doesn't, it means something drastically changed
1802 * while we were out from behind the funnel in sodelayed_copy...
1803 * perhaps a RST on the stream... in any event, the stream has
1804 * been interrupted... it's probably best just to return
1805 * whatever data we've moved and let the caller sort it out...
1806 */
1807 break;
1808 }
1809 }
1810 socket_unlock(so, 0);
1811 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
1812 socket_lock(so, 0);
1813
1814 if (error)
1815 goto release;
1816 }
1817 } else
1818 uio_setresid(uio, (uio_resid(uio) - len));
1819
1820 if (len == m->m_len - moff) {
1821 if (m->m_flags & M_EOR)
1822 flags |= MSG_EOR;
1823 if (flags & MSG_PEEK) {
1824 m = m->m_next;
1825 moff = 0;
1826 } else {
1827 nextrecord = m->m_nextpkt;
1828 sbfree(&so->so_rcv, m);
1829 m->m_nextpkt = NULL;
1830
1831 if (mp) {
1832 *mp = m;
1833 mp = &m->m_next;
1834 so->so_rcv.sb_mb = m = m->m_next;
1835 *mp = (struct mbuf *)0;
1836 } else {
1837 if (free_list == NULL)
1838 free_list = m;
1839 else
1840 ml->m_next = m;
1841 ml = m;
1842 so->so_rcv.sb_mb = m = m->m_next;
1843 ml->m_next = 0;
1844 }
1845 if (m)
1846 m->m_nextpkt = nextrecord;
1847 }
1848 } else {
1849 if (flags & MSG_PEEK)
1850 moff += len;
1851 else {
1852 if (mp)
1853 *mp = m_copym(m, 0, len, M_WAIT);
1854 m->m_data += len;
1855 m->m_len -= len;
1856 so->so_rcv.sb_cc -= len;
1857 }
1858 }
1859 if (so->so_oobmark) {
1860 if ((flags & MSG_PEEK) == 0) {
1861 so->so_oobmark -= len;
1862 if (so->so_oobmark == 0) {
1863 so->so_state |= SS_RCVATMARK;
1864 /*
1865 * delay posting the actual event until after
1866 * any delayed copy processing has finished
1867 */
1868 need_event = 1;
1869 break;
1870 }
1871 } else {
1872 offset += len;
1873 if (offset == so->so_oobmark)
1874 break;
1875 }
1876 }
1877 if (flags & MSG_EOR)
1878 break;
1879 /*
1880 * If the MSG_WAITALL or MSG_WAITSTREAM flag is set (for non-atomic socket),
1881 * we must not quit until "uio->uio_resid == 0" or an error
1882 * termination. If a signal/timeout occurs, return
1883 * with a short count but without error.
1884 * Keep sockbuf locked against other readers.
1885 */
1886 while (flags & (MSG_WAITALL|MSG_WAITSTREAM) && m == 0 && (uio_resid(uio) - delayed_copy_len) > 0 &&
1887 !sosendallatonce(so) && !nextrecord) {
1888 if (so->so_error || so->so_state & SS_CANTRCVMORE)
1889 goto release;
1890
1891 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb && (((struct inpcb *)so->so_pcb)->inp_state != INPCB_STATE_DEAD))
1892 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1893 if (sbwait(&so->so_rcv)) {
1894 error = 0;
1895 goto release;
1896 }
1897 /*
1898 * have to wait until after we get back from the sbwait to do the copy because
1899 * we will drop the funnel if we have enough data that has been delayed... by dropping
1900 * the funnel we open up a window allowing the netisr thread to process the incoming packets
1901 * and to change the state of this socket... we're issuing the sbwait because
1902 * the socket is empty and we're expecting the netisr thread to wake us up when more
1903 * packets arrive... if we allow that processing to happen and then sbwait, we
1904 * could stall forever with packets sitting in the socket if no further packets
1905 * arrive from the remote side.
1906 *
1907 * we want to copy before we've collected all the data to satisfy this request to
1908 * allow the copy to overlap the incoming packet processing on an MP system
1909 */
1910 if (delayed_copy_len > sorecvmincopy && (delayed_copy_len > (so->so_rcv.sb_hiwat / 2))) {
1911
1912 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1913
1914 if (error)
1915 goto release;
1916 }
1917 m = so->so_rcv.sb_mb;
1918 if (m) {
1919 nextrecord = m->m_nextpkt;
1920 }
1921 }
1922 }
1923 #ifdef MORE_LOCKING_DEBUG
1924 if (so->so_usecount <= 1)
1925 panic("soreceive: after big while so=%x ref=%d on socket\n", so, so->so_usecount);
1926 #endif
1927
1928 if (m && pr->pr_flags & PR_ATOMIC) {
1929 #ifdef __APPLE__
1930 if (so->so_options & SO_DONTTRUNC)
1931 flags |= MSG_RCVMORE;
1932 else {
1933 #endif
1934 flags |= MSG_TRUNC;
1935 if ((flags & MSG_PEEK) == 0)
1936 (void) sbdroprecord(&so->so_rcv);
1937 #ifdef __APPLE__
1938 }
1939 #endif
1940 }
1941 if ((flags & MSG_PEEK) == 0) {
1942 if (m == 0)
1943 so->so_rcv.sb_mb = nextrecord;
1944 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
1945 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1946 }
1947 #ifdef __APPLE__
1948 if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0)
1949 flags |= MSG_HAVEMORE;
1950
1951 if (delayed_copy_len) {
1952 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1953
1954 if (error)
1955 goto release;
1956 }
1957 if (free_list) {
1958 m_freem_list((struct mbuf *)free_list);
1959 free_list = (struct mbuf *)0;
1960 }
1961 if (need_event)
1962 postevent(so, 0, EV_OOB);
1963 #endif
1964 if (orig_resid == uio_resid(uio) && orig_resid &&
1965 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
1966 sbunlock(&so->so_rcv, 1);
1967 goto restart;
1968 }
1969
1970 if (flagsp)
1971 *flagsp |= flags;
1972 release:
1973 #ifdef MORE_LOCKING_DEBUG
1974 if (so->so_usecount <= 1)
1975 panic("soreceive: release so=%x ref=%d on socket\n", so, so->so_usecount);
1976 #endif
1977 if (delayed_copy_len) {
1978 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1979 }
1980 if (free_list) {
1981 m_freem_list((struct mbuf *)free_list);
1982 }
1983 sbunlock(&so->so_rcv, 0); /* will unlock socket */
1984
1985 // LP64todo - fix this!
1986 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END,
1987 so,
1988 uio_resid(uio),
1989 so->so_rcv.sb_cc,
1990 0,
1991 error);
1992
1993 return (error);
1994 }
1995
1996
1997 static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid)
1998 {
1999 int error = 0;
2000 struct mbuf *m;
2001
2002 m = *free_list;
2003
2004 socket_unlock(so, 0);
2005
2006 while (m && error == 0) {
2007
2008 error = uiomove(mtod(m, caddr_t), (int)m->m_len, uio);
2009
2010 m = m->m_next;
2011 }
2012 m_freem_list(*free_list);
2013
2014 *free_list = (struct mbuf *)NULL;
2015 *resid = 0;
2016
2017 socket_lock(so, 0);
2018
2019 return (error);
2020 }
2021
2022
2023 int
2024 soshutdown(so, how)
2025 register struct socket *so;
2026 int how;
2027 {
2028 register struct protosw *pr = so->so_proto;
2029 int ret;
2030
2031 socket_lock(so, 1);
2032
2033 sflt_notify(so, sock_evt_shutdown, &how);
2034
2035 if (how != SHUT_WR) {
2036 sorflush(so);
2037 postevent(so, 0, EV_RCLOSED);
2038 }
2039 if (how != SHUT_RD) {
2040 ret = ((*pr->pr_usrreqs->pru_shutdown)(so));
2041 postevent(so, 0, EV_WCLOSED);
2042 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
2043 socket_unlock(so, 1);
2044 return(ret);
2045 }
2046
2047 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
2048 socket_unlock(so, 1);
2049 return (0);
2050 }
2051
2052 void
2053 sorflush(so)
2054 register struct socket *so;
2055 {
2056 register struct sockbuf *sb = &so->so_rcv;
2057 register struct protosw *pr = so->so_proto;
2058 struct sockbuf asb;
2059
2060 #ifdef MORE_LOCKING_DEBUG
2061 lck_mtx_t * mutex_held;
2062
2063 if (so->so_proto->pr_getlock != NULL)
2064 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2065 else
2066 mutex_held = so->so_proto->pr_domain->dom_mtx;
2067 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2068 #endif
2069
2070 sflt_notify(so, sock_evt_flush_read, NULL);
2071
2072 sb->sb_flags |= SB_NOINTR;
2073 (void) sblock(sb, M_WAIT);
2074 socantrcvmore(so);
2075 sbunlock(sb, 1);
2076 #ifdef __APPLE__
2077 selthreadclear(&sb->sb_sel);
2078 #endif
2079 asb = *sb;
2080 bzero((caddr_t)sb, sizeof (*sb));
2081 sb->sb_so = so; /* reestablish link to socket */
2082 if (asb.sb_flags & SB_KNOTE) {
2083 sb->sb_sel.si_note = asb.sb_sel.si_note;
2084 sb->sb_flags = SB_KNOTE;
2085 }
2086 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
2087 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
2088 sbrelease(&asb);
2089 }
2090
2091 /*
2092 * Perhaps this routine, and sooptcopyout(), below, ought to come in
2093 * an additional variant to handle the case where the option value needs
2094 * to be some kind of integer, but not a specific size.
2095 * In addition to their use here, these functions are also called by the
2096 * protocol-level pr_ctloutput() routines.
2097 */
2098 int
2099 sooptcopyin(sopt, buf, len, minlen)
2100 struct sockopt *sopt;
2101 void *buf;
2102 size_t len;
2103 size_t minlen;
2104 {
2105 size_t valsize;
2106
2107 /*
2108 * If the user gives us more than we wanted, we ignore it,
2109 * but if we don't get the minimum length the caller
2110 * wants, we return EINVAL. On success, sopt->sopt_valsize
2111 * is set to however much we actually retrieved.
2112 */
2113 if ((valsize = sopt->sopt_valsize) < minlen)
2114 return EINVAL;
2115 if (valsize > len)
2116 sopt->sopt_valsize = valsize = len;
2117
2118 if (sopt->sopt_p != 0)
2119 return (copyin(sopt->sopt_val, buf, valsize));
2120
2121 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), buf, valsize);
2122 return 0;
2123 }
2124
2125 int
2126 sosetopt(so, sopt)
2127 struct socket *so;
2128 struct sockopt *sopt;
2129 {
2130 int error, optval;
2131 struct linger l;
2132 struct timeval tv;
2133 short val;
2134
2135 socket_lock(so, 1);
2136
2137 if (sopt->sopt_dir != SOPT_SET) {
2138 sopt->sopt_dir = SOPT_SET;
2139 }
2140
2141 {
2142 struct socket_filter_entry *filter;
2143 int filtered = 0;
2144 error = 0;
2145 for (filter = so->so_filt; filter && (error == 0);
2146 filter = filter->sfe_next_onsocket) {
2147 if (filter->sfe_filter->sf_filter.sf_setoption) {
2148 if (filtered == 0) {
2149 filtered = 1;
2150 sflt_use(so);
2151 socket_unlock(so, 0);
2152 }
2153 error = filter->sfe_filter->sf_filter.sf_setoption(
2154 filter->sfe_cookie, so, sopt);
2155 }
2156 }
2157
2158 if (filtered != 0) {
2159 socket_lock(so, 0);
2160 sflt_unuse(so);
2161
2162 if (error) {
2163 if (error == EJUSTRETURN)
2164 error = 0;
2165 goto bad;
2166 }
2167 }
2168 }
2169
2170 error = 0;
2171 if (sopt->sopt_level != SOL_SOCKET) {
2172 if (so->so_proto && so->so_proto->pr_ctloutput) {
2173 error = (*so->so_proto->pr_ctloutput)
2174 (so, sopt);
2175 socket_unlock(so, 1);
2176 return (error);
2177 }
2178 error = ENOPROTOOPT;
2179 } else {
2180 switch (sopt->sopt_name) {
2181 case SO_LINGER:
2182 case SO_LINGER_SEC:
2183 error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
2184 if (error)
2185 goto bad;
2186
2187 so->so_linger = (sopt->sopt_name == SO_LINGER) ? l.l_linger : l.l_linger * hz;
2188 if (l.l_onoff)
2189 so->so_options |= SO_LINGER;
2190 else
2191 so->so_options &= ~SO_LINGER;
2192 break;
2193
2194 case SO_DEBUG:
2195 case SO_KEEPALIVE:
2196 case SO_DONTROUTE:
2197 case SO_USELOOPBACK:
2198 case SO_BROADCAST:
2199 case SO_REUSEADDR:
2200 case SO_REUSEPORT:
2201 case SO_OOBINLINE:
2202 case SO_TIMESTAMP:
2203 #ifdef __APPLE__
2204 case SO_DONTTRUNC:
2205 case SO_WANTMORE:
2206 case SO_WANTOOBFLAG:
2207 #endif
2208 error = sooptcopyin(sopt, &optval, sizeof optval,
2209 sizeof optval);
2210 if (error)
2211 goto bad;
2212 if (optval)
2213 so->so_options |= sopt->sopt_name;
2214 else
2215 so->so_options &= ~sopt->sopt_name;
2216 break;
2217
2218 case SO_SNDBUF:
2219 case SO_RCVBUF:
2220 case SO_SNDLOWAT:
2221 case SO_RCVLOWAT:
2222 error = sooptcopyin(sopt, &optval, sizeof optval,
2223 sizeof optval);
2224 if (error)
2225 goto bad;
2226
2227 /*
2228 * Values < 1 make no sense for any of these
2229 * options, so disallow them.
2230 */
2231 if (optval < 1) {
2232 error = EINVAL;
2233 goto bad;
2234 }
2235
2236 switch (sopt->sopt_name) {
2237 case SO_SNDBUF:
2238 case SO_RCVBUF:
2239 if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
2240 &so->so_snd : &so->so_rcv,
2241 (u_long) optval) == 0) {
2242 error = ENOBUFS;
2243 goto bad;
2244 }
2245 break;
2246
2247 /*
2248 * Make sure the low-water is never greater than
2249 * the high-water.
2250 */
2251 case SO_SNDLOWAT:
2252 so->so_snd.sb_lowat =
2253 (optval > so->so_snd.sb_hiwat) ?
2254 so->so_snd.sb_hiwat : optval;
2255 break;
2256 case SO_RCVLOWAT:
2257 so->so_rcv.sb_lowat =
2258 (optval > so->so_rcv.sb_hiwat) ?
2259 so->so_rcv.sb_hiwat : optval;
2260 break;
2261 }
2262 break;
2263
2264 case SO_SNDTIMEO:
2265 case SO_RCVTIMEO:
2266 error = sooptcopyin(sopt, &tv, sizeof tv,
2267 sizeof tv);
2268 if (error)
2269 goto bad;
2270
2271 if (tv.tv_sec < 0 || tv.tv_sec > LONG_MAX ||
2272 tv.tv_usec < 0 || tv.tv_usec >= 1000000) {
2273 error = EDOM;
2274 goto bad;
2275 }
2276
2277 switch (sopt->sopt_name) {
2278 case SO_SNDTIMEO:
2279 so->so_snd.sb_timeo = tv;
2280 break;
2281 case SO_RCVTIMEO:
2282 so->so_rcv.sb_timeo = tv;
2283 break;
2284 }
2285 break;
2286
2287 case SO_NKE:
2288 {
2289 struct so_nke nke;
2290
2291 error = sooptcopyin(sopt, &nke,
2292 sizeof nke, sizeof nke);
2293 if (error)
2294 goto bad;
2295
2296 error = sflt_attach_private(so, NULL, nke.nke_handle, 1);
2297 break;
2298 }
2299
2300 case SO_NOSIGPIPE:
2301 error = sooptcopyin(sopt, &optval, sizeof optval,
2302 sizeof optval);
2303 if (error)
2304 goto bad;
2305 if (optval)
2306 so->so_flags |= SOF_NOSIGPIPE;
2307 else
2308 so->so_flags &= ~SOF_NOSIGPIPE;
2309
2310 break;
2311
2312 case SO_NOADDRERR:
2313 error = sooptcopyin(sopt, &optval, sizeof optval,
2314 sizeof optval);
2315 if (error)
2316 goto bad;
2317 if (optval)
2318 so->so_flags |= SOF_NOADDRAVAIL;
2319 else
2320 so->so_flags &= ~SOF_NOADDRAVAIL;
2321
2322 break;
2323
2324 default:
2325 error = ENOPROTOOPT;
2326 break;
2327 }
2328 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
2329 (void) ((*so->so_proto->pr_ctloutput)
2330 (so, sopt));
2331 }
2332 }
2333 bad:
2334 socket_unlock(so, 1);
2335 return (error);
2336 }
2337
2338 /* Helper routine for getsockopt */
2339 int
2340 sooptcopyout(sopt, buf, len)
2341 struct sockopt *sopt;
2342 void *buf;
2343 size_t len;
2344 {
2345 int error;
2346 size_t valsize;
2347
2348 error = 0;
2349
2350 /*
2351 * Documented get behavior is that we always return a value,
2352 * possibly truncated to fit in the user's buffer.
2353 * Traditional behavior is that we always tell the user
2354 * precisely how much we copied, rather than something useful
2355 * like the total amount we had available for her.
2356 * Note that this interface is not idempotent; the entire answer must
2357 * generated ahead of time.
2358 */
2359 valsize = min(len, sopt->sopt_valsize);
2360 sopt->sopt_valsize = valsize;
2361 if (sopt->sopt_val != USER_ADDR_NULL) {
2362 if (sopt->sopt_p != 0)
2363 error = copyout(buf, sopt->sopt_val, valsize);
2364 else
2365 bcopy(buf, CAST_DOWN(caddr_t, sopt->sopt_val), valsize);
2366 }
2367 return error;
2368 }
2369
2370 int
2371 sogetopt(so, sopt)
2372 struct socket *so;
2373 struct sockopt *sopt;
2374 {
2375 int error, optval;
2376 struct linger l;
2377 struct timeval tv;
2378
2379 if (sopt->sopt_dir != SOPT_GET) {
2380 sopt->sopt_dir = SOPT_GET;
2381 }
2382
2383 socket_lock(so, 1);
2384
2385 {
2386 struct socket_filter_entry *filter;
2387 int filtered = 0;
2388 error = 0;
2389 for (filter = so->so_filt; filter && (error == 0);
2390 filter = filter->sfe_next_onsocket) {
2391 if (filter->sfe_filter->sf_filter.sf_getoption) {
2392 if (filtered == 0) {
2393 filtered = 1;
2394 sflt_use(so);
2395 socket_unlock(so, 0);
2396 }
2397 error = filter->sfe_filter->sf_filter.sf_getoption(
2398 filter->sfe_cookie, so, sopt);
2399 }
2400 }
2401 if (filtered != 0) {
2402 socket_lock(so, 0);
2403 sflt_unuse(so);
2404
2405 if (error) {
2406 if (error == EJUSTRETURN)
2407 error = 0;
2408 socket_unlock(so, 1);
2409 return error;
2410 }
2411 }
2412 }
2413
2414 error = 0;
2415 if (sopt->sopt_level != SOL_SOCKET) {
2416 if (so->so_proto && so->so_proto->pr_ctloutput) {
2417 error = (*so->so_proto->pr_ctloutput)
2418 (so, sopt);
2419 socket_unlock(so, 1);
2420 return (error);
2421 } else {
2422 socket_unlock(so, 1);
2423 return (ENOPROTOOPT);
2424 }
2425 } else {
2426 switch (sopt->sopt_name) {
2427 case SO_LINGER:
2428 case SO_LINGER_SEC:
2429 l.l_onoff = so->so_options & SO_LINGER;
2430 l.l_linger = (sopt->sopt_name == SO_LINGER) ? so->so_linger :
2431 so->so_linger / hz;
2432 error = sooptcopyout(sopt, &l, sizeof l);
2433 break;
2434
2435 case SO_USELOOPBACK:
2436 case SO_DONTROUTE:
2437 case SO_DEBUG:
2438 case SO_KEEPALIVE:
2439 case SO_REUSEADDR:
2440 case SO_REUSEPORT:
2441 case SO_BROADCAST:
2442 case SO_OOBINLINE:
2443 case SO_TIMESTAMP:
2444 #ifdef __APPLE__
2445 case SO_DONTTRUNC:
2446 case SO_WANTMORE:
2447 case SO_WANTOOBFLAG:
2448 #endif
2449 optval = so->so_options & sopt->sopt_name;
2450 integer:
2451 error = sooptcopyout(sopt, &optval, sizeof optval);
2452 break;
2453
2454 case SO_TYPE:
2455 optval = so->so_type;
2456 goto integer;
2457
2458 #ifdef __APPLE__
2459 case SO_NREAD:
2460 {
2461 int pkt_total;
2462 struct mbuf *m1;
2463
2464 pkt_total = 0;
2465 m1 = so->so_rcv.sb_mb;
2466 if (so->so_proto->pr_flags & PR_ATOMIC)
2467 {
2468 while (m1) {
2469 if (m1->m_type == MT_DATA)
2470 pkt_total += m1->m_len;
2471 m1 = m1->m_next;
2472 }
2473 optval = pkt_total;
2474 } else
2475 optval = so->so_rcv.sb_cc;
2476 goto integer;
2477 }
2478 case SO_NWRITE:
2479 optval = so->so_snd.sb_cc;
2480 goto integer;
2481 #endif
2482 case SO_ERROR:
2483 optval = so->so_error;
2484 so->so_error = 0;
2485 goto integer;
2486
2487 case SO_SNDBUF:
2488 optval = so->so_snd.sb_hiwat;
2489 goto integer;
2490
2491 case SO_RCVBUF:
2492 optval = so->so_rcv.sb_hiwat;
2493 goto integer;
2494
2495 case SO_SNDLOWAT:
2496 optval = so->so_snd.sb_lowat;
2497 goto integer;
2498
2499 case SO_RCVLOWAT:
2500 optval = so->so_rcv.sb_lowat;
2501 goto integer;
2502
2503 case SO_SNDTIMEO:
2504 case SO_RCVTIMEO:
2505 tv = (sopt->sopt_name == SO_SNDTIMEO ?
2506 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
2507
2508 error = sooptcopyout(sopt, &tv, sizeof tv);
2509 break;
2510
2511 case SO_NOSIGPIPE:
2512 optval = (so->so_flags & SOF_NOSIGPIPE);
2513 goto integer;
2514
2515 case SO_NOADDRERR:
2516 optval = (so->so_flags & SOF_NOADDRAVAIL);
2517 goto integer;
2518
2519 default:
2520 error = ENOPROTOOPT;
2521 break;
2522 }
2523 socket_unlock(so, 1);
2524 return (error);
2525 }
2526 }
2527
2528 /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
2529 int
2530 soopt_getm(struct sockopt *sopt, struct mbuf **mp)
2531 {
2532 struct mbuf *m, *m_prev;
2533 int sopt_size = sopt->sopt_valsize;
2534
2535 if (sopt_size > MAX_SOOPTGETM_SIZE)
2536 return EMSGSIZE;
2537
2538 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2539 if (m == 0)
2540 return ENOBUFS;
2541 if (sopt_size > MLEN) {
2542 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2543 if ((m->m_flags & M_EXT) == 0) {
2544 m_free(m);
2545 return ENOBUFS;
2546 }
2547 m->m_len = min(MCLBYTES, sopt_size);
2548 } else {
2549 m->m_len = min(MLEN, sopt_size);
2550 }
2551 sopt_size -= m->m_len;
2552 *mp = m;
2553 m_prev = m;
2554
2555 while (sopt_size) {
2556 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2557 if (m == 0) {
2558 m_freem(*mp);
2559 return ENOBUFS;
2560 }
2561 if (sopt_size > MLEN) {
2562 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2563 if ((m->m_flags & M_EXT) == 0) {
2564 m_freem(*mp);
2565 return ENOBUFS;
2566 }
2567 m->m_len = min(MCLBYTES, sopt_size);
2568 } else {
2569 m->m_len = min(MLEN, sopt_size);
2570 }
2571 sopt_size -= m->m_len;
2572 m_prev->m_next = m;
2573 m_prev = m;
2574 }
2575 return 0;
2576 }
2577
2578 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
2579 int
2580 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
2581 {
2582 struct mbuf *m0 = m;
2583
2584 if (sopt->sopt_val == USER_ADDR_NULL)
2585 return 0;
2586 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2587 if (sopt->sopt_p != NULL) {
2588 int error;
2589
2590 error = copyin(sopt->sopt_val, mtod(m, char *), m->m_len);
2591 if (error != 0) {
2592 m_freem(m0);
2593 return(error);
2594 }
2595 } else
2596 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), mtod(m, char *), m->m_len);
2597 sopt->sopt_valsize -= m->m_len;
2598 sopt->sopt_val += m->m_len;
2599 m = m->m_next;
2600 }
2601 if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
2602 panic("soopt_mcopyin");
2603 return 0;
2604 }
2605
2606 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
2607 int
2608 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
2609 {
2610 struct mbuf *m0 = m;
2611 size_t valsize = 0;
2612
2613 if (sopt->sopt_val == USER_ADDR_NULL)
2614 return 0;
2615 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2616 if (sopt->sopt_p != NULL) {
2617 int error;
2618
2619 error = copyout(mtod(m, char *), sopt->sopt_val, m->m_len);
2620 if (error != 0) {
2621 m_freem(m0);
2622 return(error);
2623 }
2624 } else
2625 bcopy(mtod(m, char *), CAST_DOWN(caddr_t, sopt->sopt_val), m->m_len);
2626 sopt->sopt_valsize -= m->m_len;
2627 sopt->sopt_val += m->m_len;
2628 valsize += m->m_len;
2629 m = m->m_next;
2630 }
2631 if (m != NULL) {
2632 /* enough soopt buffer should be given from user-land */
2633 m_freem(m0);
2634 return(EINVAL);
2635 }
2636 sopt->sopt_valsize = valsize;
2637 return 0;
2638 }
2639
2640 void
2641 sohasoutofband(so)
2642 register struct socket *so;
2643 {
2644 struct proc *p;
2645
2646 if (so->so_pgid < 0)
2647 gsignal(-so->so_pgid, SIGURG);
2648 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
2649 psignal(p, SIGURG);
2650 selwakeup(&so->so_rcv.sb_sel);
2651 }
2652
2653 int
2654 sopoll(struct socket *so, int events, __unused kauth_cred_t cred, void * wql)
2655 {
2656 struct proc *p = current_proc();
2657 int revents = 0;
2658
2659 socket_lock(so, 1);
2660
2661 if (events & (POLLIN | POLLRDNORM))
2662 if (soreadable(so))
2663 revents |= events & (POLLIN | POLLRDNORM);
2664
2665 if (events & (POLLOUT | POLLWRNORM))
2666 if (sowriteable(so))
2667 revents |= events & (POLLOUT | POLLWRNORM);
2668
2669 if (events & (POLLPRI | POLLRDBAND))
2670 if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
2671 revents |= events & (POLLPRI | POLLRDBAND);
2672
2673 if (revents == 0) {
2674 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
2675 /* Darwin sets the flag first, BSD calls selrecord first */
2676 so->so_rcv.sb_flags |= SB_SEL;
2677 selrecord(p, &so->so_rcv.sb_sel, wql);
2678 }
2679
2680 if (events & (POLLOUT | POLLWRNORM)) {
2681 /* Darwin sets the flag first, BSD calls selrecord first */
2682 so->so_snd.sb_flags |= SB_SEL;
2683 selrecord(p, &so->so_snd.sb_sel, wql);
2684 }
2685 }
2686
2687 socket_unlock(so, 1);
2688 return (revents);
2689 }
2690
2691 int soo_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p);
2692
2693 int
2694 soo_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct proc *p)
2695 {
2696 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2697 struct sockbuf *sb;
2698 socket_lock(so, 1);
2699
2700 switch (kn->kn_filter) {
2701 case EVFILT_READ:
2702 if (so->so_options & SO_ACCEPTCONN)
2703 kn->kn_fop = &solisten_filtops;
2704 else
2705 kn->kn_fop = &soread_filtops;
2706 sb = &so->so_rcv;
2707 break;
2708 case EVFILT_WRITE:
2709 kn->kn_fop = &sowrite_filtops;
2710 sb = &so->so_snd;
2711 break;
2712 default:
2713 socket_unlock(so, 1);
2714 return (1);
2715 }
2716
2717 if (KNOTE_ATTACH(&sb->sb_sel.si_note, kn))
2718 sb->sb_flags |= SB_KNOTE;
2719 socket_unlock(so, 1);
2720 return (0);
2721 }
2722
2723 static void
2724 filt_sordetach(struct knote *kn)
2725 {
2726 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2727
2728 socket_lock(so, 1);
2729 if (so->so_rcv.sb_flags & SB_KNOTE)
2730 if (KNOTE_DETACH(&so->so_rcv.sb_sel.si_note, kn))
2731 so->so_rcv.sb_flags &= ~SB_KNOTE;
2732 socket_unlock(so, 1);
2733 }
2734
2735 /*ARGSUSED*/
2736 static int
2737 filt_soread(struct knote *kn, long hint)
2738 {
2739 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2740
2741 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2742 socket_lock(so, 1);
2743
2744 if (so->so_oobmark) {
2745 if (kn->kn_flags & EV_OOBAND) {
2746 kn->kn_data = so->so_rcv.sb_cc - so->so_oobmark;
2747 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2748 socket_unlock(so, 1);
2749 return (1);
2750 }
2751 kn->kn_data = so->so_oobmark;
2752 kn->kn_flags |= EV_OOBAND;
2753 } else {
2754 kn->kn_data = so->so_rcv.sb_cc;
2755 if (so->so_state & SS_CANTRCVMORE) {
2756 kn->kn_flags |= EV_EOF;
2757 kn->kn_fflags = so->so_error;
2758 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2759 socket_unlock(so, 1);
2760 return (1);
2761 }
2762 }
2763
2764 if (so->so_state & SS_RCVATMARK) {
2765 if (kn->kn_flags & EV_OOBAND) {
2766 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2767 socket_unlock(so, 1);
2768 return (1);
2769 }
2770 kn->kn_flags |= EV_OOBAND;
2771 } else if (kn->kn_flags & EV_OOBAND) {
2772 kn->kn_data = 0;
2773 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2774 socket_unlock(so, 1);
2775 return (0);
2776 }
2777
2778 if (so->so_error) { /* temporary udp error */
2779 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2780 socket_unlock(so, 1);
2781 return (1);
2782 }
2783
2784 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2785 socket_unlock(so, 1);
2786
2787 return( kn->kn_flags & EV_OOBAND ||
2788 kn->kn_data >= ((kn->kn_sfflags & NOTE_LOWAT) ?
2789 kn->kn_sdata : so->so_rcv.sb_lowat));
2790 }
2791
2792 static void
2793 filt_sowdetach(struct knote *kn)
2794 {
2795 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2796 socket_lock(so, 1);
2797
2798 if(so->so_snd.sb_flags & SB_KNOTE)
2799 if (KNOTE_DETACH(&so->so_snd.sb_sel.si_note, kn))
2800 so->so_snd.sb_flags &= ~SB_KNOTE;
2801 socket_unlock(so, 1);
2802 }
2803
2804 /*ARGSUSED*/
2805 static int
2806 filt_sowrite(struct knote *kn, long hint)
2807 {
2808 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2809
2810 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2811 socket_lock(so, 1);
2812
2813 kn->kn_data = sbspace(&so->so_snd);
2814 if (so->so_state & SS_CANTSENDMORE) {
2815 kn->kn_flags |= EV_EOF;
2816 kn->kn_fflags = so->so_error;
2817 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2818 socket_unlock(so, 1);
2819 return (1);
2820 }
2821 if (so->so_error) { /* temporary udp error */
2822 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2823 socket_unlock(so, 1);
2824 return (1);
2825 }
2826 if (((so->so_state & SS_ISCONNECTED) == 0) &&
2827 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
2828 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2829 socket_unlock(so, 1);
2830 return (0);
2831 }
2832 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2833 socket_unlock(so, 1);
2834 if (kn->kn_sfflags & NOTE_LOWAT)
2835 return (kn->kn_data >= kn->kn_sdata);
2836 return (kn->kn_data >= so->so_snd.sb_lowat);
2837 }
2838
2839 /*ARGSUSED*/
2840 static int
2841 filt_solisten(struct knote *kn, long hint)
2842 {
2843 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2844 int isempty;
2845
2846 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2847 socket_lock(so, 1);
2848 kn->kn_data = so->so_qlen;
2849 isempty = ! TAILQ_EMPTY(&so->so_comp);
2850 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2851 socket_unlock(so, 1);
2852 return (isempty);
2853 }
2854
2855
2856 int
2857 socket_lock(so, refcount)
2858 struct socket *so;
2859 int refcount;
2860 {
2861 int error = 0, lr_saved;
2862
2863 lr_saved = (unsigned int) __builtin_return_address(0);
2864
2865 if (so->so_proto->pr_lock) {
2866 error = (*so->so_proto->pr_lock)(so, refcount, lr_saved);
2867 }
2868 else {
2869 #ifdef MORE_LOCKING_DEBUG
2870 lck_mtx_assert(so->so_proto->pr_domain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED);
2871 #endif
2872 lck_mtx_lock(so->so_proto->pr_domain->dom_mtx);
2873 if (refcount)
2874 so->so_usecount++;
2875 so->lock_lr[so->next_lock_lr] = (void *)lr_saved;
2876 so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX;
2877 }
2878
2879 return(error);
2880
2881 }
2882
2883 int
2884 socket_unlock(so, refcount)
2885 struct socket *so;
2886 int refcount;
2887 {
2888 int error = 0, lr_saved;
2889 lck_mtx_t * mutex_held;
2890
2891 lr_saved = (unsigned int) __builtin_return_address(0);
2892
2893 if (so->so_proto == NULL)
2894 panic("socket_unlock null so_proto so=%x\n", so);
2895
2896 if (so && so->so_proto->pr_unlock)
2897 error = (*so->so_proto->pr_unlock)(so, refcount, lr_saved);
2898 else {
2899 mutex_held = so->so_proto->pr_domain->dom_mtx;
2900 #ifdef MORE_LOCKING_DEBUG
2901 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2902 #endif
2903 so->unlock_lr[so->next_unlock_lr] = (void *)lr_saved;
2904 so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;
2905
2906 if (refcount) {
2907 if (so->so_usecount <= 0)
2908 panic("socket_unlock: bad refcount so=%x value=%d\n", so, so->so_usecount);
2909 so->so_usecount--;
2910 if (so->so_usecount == 0) {
2911 sofreelastref(so, 1);
2912 }
2913 }
2914 lck_mtx_unlock(mutex_held);
2915 }
2916
2917 return(error);
2918 }
2919 //### Called with socket locked, will unlock socket
2920 void
2921 sofree(so)
2922 struct socket *so;
2923 {
2924
2925 lck_mtx_t * mutex_held;
2926 if (so->so_proto->pr_getlock != NULL)
2927 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2928 else
2929 mutex_held = so->so_proto->pr_domain->dom_mtx;
2930 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2931
2932 sofreelastref(so, 0);
2933 }
2934
2935 void
2936 soreference(so)
2937 struct socket *so;
2938 {
2939 socket_lock(so, 1); /* locks & take one reference on socket */
2940 socket_unlock(so, 0); /* unlock only */
2941 }
2942
2943 void
2944 sodereference(so)
2945 struct socket *so;
2946 {
2947 socket_lock(so, 0);
2948 socket_unlock(so, 1);
2949 }