]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_socket.c
b0068b4c86a6fe5b88b029c39de8e86a268d95ee
[apple/xnu.git] / bsd / kern / uipc_socket.c
1 /*
2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
23 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
24 /*
25 * Copyright (c) 1982, 1986, 1988, 1990, 1993
26 * The Regents of the University of California. All rights reserved.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 * 3. All advertising materials mentioning features or use of this software
37 * must display the following acknowledgement:
38 * This product includes software developed by the University of
39 * California, Berkeley and its contributors.
40 * 4. Neither the name of the University nor the names of its contributors
41 * may be used to endorse or promote products derived from this software
42 * without specific prior written permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
45 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
46 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
47 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
48 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
49 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
50 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
51 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * SUCH DAMAGE.
55 *
56 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
57 * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.16 2001/06/14 20:46:06 ume Exp $
58 */
59
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/filedesc.h>
63 #include <sys/proc_internal.h>
64 #include <sys/kauth.h>
65 #include <sys/file_internal.h>
66 #include <sys/fcntl.h>
67 #include <sys/malloc.h>
68 #include <sys/mbuf.h>
69 #include <sys/domain.h>
70 #include <sys/kernel.h>
71 #include <sys/event.h>
72 #include <sys/poll.h>
73 #include <sys/protosw.h>
74 #include <sys/socket.h>
75 #include <sys/socketvar.h>
76 #include <sys/resourcevar.h>
77 #include <sys/signalvar.h>
78 #include <sys/sysctl.h>
79 #include <sys/uio.h>
80 #include <sys/ev.h>
81 #include <sys/kdebug.h>
82 #include <net/route.h>
83 #include <netinet/in.h>
84 #include <netinet/in_pcb.h>
85 #include <kern/zalloc.h>
86 #include <kern/locks.h>
87 #include <machine/limits.h>
88
89 int so_cache_hw = 0;
90 int so_cache_timeouts = 0;
91 int so_cache_max_freed = 0;
92 int cached_sock_count = 0;
93 struct socket *socket_cache_head = 0;
94 struct socket *socket_cache_tail = 0;
95 u_long so_cache_time = 0;
96 int so_cache_init_done = 0;
97 struct zone *so_cache_zone;
98 extern int get_inpcb_str_size();
99 extern int get_tcp_str_size();
100
101 static lck_grp_t *so_cache_mtx_grp;
102 static lck_attr_t *so_cache_mtx_attr;
103 static lck_grp_attr_t *so_cache_mtx_grp_attr;
104 lck_mtx_t *so_cache_mtx;
105
106 #include <machine/limits.h>
107
108 static void filt_sordetach(struct knote *kn);
109 static int filt_soread(struct knote *kn, long hint);
110 static void filt_sowdetach(struct knote *kn);
111 static int filt_sowrite(struct knote *kn, long hint);
112 static int filt_solisten(struct knote *kn, long hint);
113
114 static struct filterops solisten_filtops =
115 { 1, NULL, filt_sordetach, filt_solisten };
116 static struct filterops soread_filtops =
117 { 1, NULL, filt_sordetach, filt_soread };
118 static struct filterops sowrite_filtops =
119 { 1, NULL, filt_sowdetach, filt_sowrite };
120
121 #define EVEN_MORE_LOCKING_DEBUG 0
122 int socket_debug = 0;
123 int socket_zone = M_SOCKET;
124 so_gen_t so_gencnt; /* generation count for sockets */
125
126 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
127 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
128
129 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
130 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
131 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
132 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
133 #define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1)
134 #define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8))
135 #define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8))
136
137 #define MAX_SOOPTGETM_SIZE (128 * MCLBYTES)
138
139
140 SYSCTL_DECL(_kern_ipc);
141
142 static int somaxconn = SOMAXCONN;
143 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn,
144 0, "");
145
146 /* Should we get a maximum also ??? */
147 static int sosendmaxchain = 65536;
148 static int sosendminchain = 16384;
149 static int sorecvmincopy = 16384;
150 SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW, &sosendminchain,
151 0, "");
152 SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, CTLFLAG_RW, &sorecvmincopy,
153 0, "");
154
155 void so_cache_timer();
156
157 /*
158 * Socket operation routines.
159 * These routines are called by the routines in
160 * sys_socket.c or from a system process, and
161 * implement the semantics of socket operations by
162 * switching out to the protocol specific routines.
163 */
164
165 #ifdef __APPLE__
166
167 vm_size_t so_cache_zone_element_size;
168
169 static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid);
170
171
172 void socketinit()
173 {
174 vm_size_t str_size;
175
176 if (so_cache_init_done) {
177 printf("socketinit: already called...\n");
178 return;
179 }
180
181 /*
182 * allocate lock group attribute and group for socket cache mutex
183 */
184 so_cache_mtx_grp_attr = lck_grp_attr_alloc_init();
185 lck_grp_attr_setdefault(so_cache_mtx_grp_attr);
186
187 so_cache_mtx_grp = lck_grp_alloc_init("so_cache", so_cache_mtx_grp_attr);
188
189 /*
190 * allocate the lock attribute for socket cache mutex
191 */
192 so_cache_mtx_attr = lck_attr_alloc_init();
193 lck_attr_setdefault(so_cache_mtx_attr);
194
195 so_cache_init_done = 1;
196
197 so_cache_mtx = lck_mtx_alloc_init(so_cache_mtx_grp, so_cache_mtx_attr); /* cached sockets mutex */
198
199 if (so_cache_mtx == NULL)
200 return; /* we're hosed... */
201
202 str_size = (vm_size_t)( sizeof(struct socket) + 4 +
203 get_inpcb_str_size() + 4 +
204 get_tcp_str_size());
205 so_cache_zone = zinit (str_size, 120000*str_size, 8192, "socache zone");
206 #if TEMPDEBUG
207 printf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size);
208 #endif
209 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
210
211 so_cache_zone_element_size = str_size;
212
213 sflt_init();
214
215 }
216
217 void cached_sock_alloc(so, waitok)
218 struct socket **so;
219 int waitok;
220
221 {
222 caddr_t temp;
223 register u_long offset;
224
225
226 lck_mtx_lock(so_cache_mtx);
227
228 if (cached_sock_count) {
229 cached_sock_count--;
230 *so = socket_cache_head;
231 if (*so == 0)
232 panic("cached_sock_alloc: cached sock is null");
233
234 socket_cache_head = socket_cache_head->cache_next;
235 if (socket_cache_head)
236 socket_cache_head->cache_prev = 0;
237 else
238 socket_cache_tail = 0;
239
240 lck_mtx_unlock(so_cache_mtx);
241
242 temp = (*so)->so_saved_pcb;
243 bzero((caddr_t)*so, sizeof(struct socket));
244 #if TEMPDEBUG
245 kprintf("cached_sock_alloc - retreiving cached sock %x - count == %d\n", *so,
246 cached_sock_count);
247 #endif
248 (*so)->so_saved_pcb = temp;
249 (*so)->cached_in_sock_layer = 1;
250
251 }
252 else {
253 #if TEMPDEBUG
254 kprintf("Allocating cached sock %x from memory\n", *so);
255 #endif
256
257 lck_mtx_unlock(so_cache_mtx);
258
259 if (waitok)
260 *so = (struct socket *) zalloc(so_cache_zone);
261 else
262 *so = (struct socket *) zalloc_noblock(so_cache_zone);
263
264 if (*so == 0)
265 return;
266
267 bzero((caddr_t)*so, sizeof(struct socket));
268
269 /*
270 * Define offsets for extra structures into our single block of
271 * memory. Align extra structures on longword boundaries.
272 */
273
274
275 offset = (u_long) *so;
276 offset += sizeof(struct socket);
277 if (offset & 0x3) {
278 offset += 4;
279 offset &= 0xfffffffc;
280 }
281 (*so)->so_saved_pcb = (caddr_t) offset;
282 offset += get_inpcb_str_size();
283 if (offset & 0x3) {
284 offset += 4;
285 offset &= 0xfffffffc;
286 }
287
288 ((struct inpcb *) (*so)->so_saved_pcb)->inp_saved_ppcb = (caddr_t) offset;
289 #if TEMPDEBUG
290 kprintf("Allocating cached socket - %x, pcb=%x tcpcb=%x\n", *so,
291 (*so)->so_saved_pcb,
292 ((struct inpcb *)(*so)->so_saved_pcb)->inp_saved_ppcb);
293 #endif
294 }
295
296 (*so)->cached_in_sock_layer = 1;
297 }
298
299
300 void cached_sock_free(so)
301 struct socket *so;
302 {
303
304 lck_mtx_lock(so_cache_mtx);
305
306 if (++cached_sock_count > MAX_CACHED_SOCKETS) {
307 --cached_sock_count;
308 lck_mtx_unlock(so_cache_mtx);
309 #if TEMPDEBUG
310 kprintf("Freeing overflowed cached socket %x\n", so);
311 #endif
312 zfree(so_cache_zone, so);
313 }
314 else {
315 #if TEMPDEBUG
316 kprintf("Freeing socket %x into cache\n", so);
317 #endif
318 if (so_cache_hw < cached_sock_count)
319 so_cache_hw = cached_sock_count;
320
321 so->cache_next = socket_cache_head;
322 so->cache_prev = 0;
323 if (socket_cache_head)
324 socket_cache_head->cache_prev = so;
325 else
326 socket_cache_tail = so;
327
328 so->cache_timestamp = so_cache_time;
329 socket_cache_head = so;
330 lck_mtx_unlock(so_cache_mtx);
331 }
332
333 #if TEMPDEBUG
334 kprintf("Freed cached sock %x into cache - count is %d\n", so, cached_sock_count);
335 #endif
336
337
338 }
339
340
341 void so_cache_timer()
342 {
343 register struct socket *p;
344 register int n_freed = 0;
345
346
347 lck_mtx_lock(so_cache_mtx);
348
349 ++so_cache_time;
350
351 while ( (p = socket_cache_tail) )
352 {
353 if ((so_cache_time - p->cache_timestamp) < SO_CACHE_TIME_LIMIT)
354 break;
355
356 so_cache_timeouts++;
357
358 if ( (socket_cache_tail = p->cache_prev) )
359 p->cache_prev->cache_next = 0;
360 if (--cached_sock_count == 0)
361 socket_cache_head = 0;
362
363
364 zfree(so_cache_zone, p);
365
366 if (++n_freed >= SO_CACHE_MAX_FREE_BATCH)
367 {
368 so_cache_max_freed++;
369 break;
370 }
371 }
372 lck_mtx_unlock(so_cache_mtx);
373
374 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
375
376
377 }
378 #endif /* __APPLE__ */
379
380 /*
381 * Get a socket structure from our zone, and initialize it.
382 * We don't implement `waitok' yet (see comments in uipc_domain.c).
383 * Note that it would probably be better to allocate socket
384 * and PCB at the same time, but I'm not convinced that all
385 * the protocols can be easily modified to do this.
386 */
387 struct socket *
388 soalloc(waitok, dom, type)
389 int waitok;
390 int dom;
391 int type;
392 {
393 struct socket *so;
394
395 if ((dom == PF_INET) && (type == SOCK_STREAM))
396 cached_sock_alloc(&so, waitok);
397 else
398 {
399 MALLOC_ZONE(so, struct socket *, sizeof(*so), socket_zone, M_WAITOK);
400 if (so)
401 bzero(so, sizeof *so);
402 }
403 /* XXX race condition for reentrant kernel */
404 //###LD Atomic add for so_gencnt
405 if (so) {
406 so->so_gencnt = ++so_gencnt;
407 so->so_zone = socket_zone;
408 }
409
410 return so;
411 }
412
413 int
414 socreate(dom, aso, type, proto)
415 int dom;
416 struct socket **aso;
417 register int type;
418 int proto;
419 {
420 struct proc *p = current_proc();
421 register struct protosw *prp;
422 register struct socket *so;
423 register int error = 0;
424 #if TCPDEBUG
425 extern int tcpconsdebug;
426 #endif
427 if (proto)
428 prp = pffindproto(dom, proto, type);
429 else
430 prp = pffindtype(dom, type);
431
432 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
433 return (EPROTONOSUPPORT);
434 #ifndef __APPLE__
435
436 if (p->p_prison && jail_socket_unixiproute_only &&
437 prp->pr_domain->dom_family != PF_LOCAL &&
438 prp->pr_domain->dom_family != PF_INET &&
439 prp->pr_domain->dom_family != PF_ROUTE) {
440 return (EPROTONOSUPPORT);
441 }
442
443 #endif
444 if (prp->pr_type != type)
445 return (EPROTOTYPE);
446 so = soalloc(p != 0, dom, type);
447 if (so == 0)
448 return (ENOBUFS);
449
450 TAILQ_INIT(&so->so_incomp);
451 TAILQ_INIT(&so->so_comp);
452 so->so_type = type;
453
454 #ifdef __APPLE__
455 if (p != 0) {
456 so->so_uid = kauth_cred_getuid(kauth_cred_get());
457 if (!suser(kauth_cred_get(),NULL))
458 so->so_state = SS_PRIV;
459 }
460 #else
461 so->so_cred = kauth_cred_get_with_ref();
462 #endif
463 so->so_proto = prp;
464 #ifdef __APPLE__
465 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
466 so->so_rcv.sb_so = so->so_snd.sb_so = so;
467 #endif
468
469 //### Attachement will create the per pcb lock if necessary and increase refcount
470
471 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
472 if (error) {
473 /*
474 * Warning:
475 * If so_pcb is not zero, the socket will be leaked,
476 * so protocol attachment handler must be coded carefuly
477 */
478 so->so_state |= SS_NOFDREF;
479 sofreelastref(so, 1);
480 return (error);
481 }
482 so->so_usecount++;
483 #ifdef __APPLE__
484 prp->pr_domain->dom_refs++;
485 TAILQ_INIT(&so->so_evlist);
486
487 /* Attach socket filters for this protocol */
488 sflt_initsock(so);
489 #if TCPDEBUG
490 if (tcpconsdebug == 2)
491 so->so_options |= SO_DEBUG;
492 #endif
493 #endif
494
495 *aso = so;
496 return (0);
497 }
498
499 int
500 sobind(so, nam)
501 struct socket *so;
502 struct sockaddr *nam;
503
504 {
505 struct proc *p = current_proc();
506 int error = 0;
507 struct socket_filter_entry *filter;
508 int filtered = 0;
509
510 socket_lock(so, 1);
511
512 /* Socket filter */
513 error = 0;
514 for (filter = so->so_filt; filter && (error == 0);
515 filter = filter->sfe_next_onsocket) {
516 if (filter->sfe_filter->sf_filter.sf_bind) {
517 if (filtered == 0) {
518 filtered = 1;
519 sflt_use(so);
520 socket_unlock(so, 0);
521 }
522 error = filter->sfe_filter->sf_filter.sf_bind(
523 filter->sfe_cookie, so, nam);
524 }
525 }
526 if (filtered != 0) {
527 socket_lock(so, 0);
528 sflt_unuse(so);
529 }
530 /* End socket filter */
531
532 if (error == 0)
533 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
534
535 socket_unlock(so, 1);
536
537 if (error == EJUSTRETURN)
538 error = 0;
539
540 return (error);
541 }
542
543 void
544 sodealloc(so)
545 struct socket *so;
546 {
547 so->so_gencnt = ++so_gencnt;
548
549 #ifndef __APPLE__
550 if (so->so_rcv.sb_hiwat)
551 (void)chgsbsize(so->so_cred->cr_uidinfo,
552 &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY);
553 if (so->so_snd.sb_hiwat)
554 (void)chgsbsize(so->so_cred->cr_uidinfo,
555 &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
556 #ifdef INET
557 if (so->so_accf != NULL) {
558 if (so->so_accf->so_accept_filter != NULL &&
559 so->so_accf->so_accept_filter->accf_destroy != NULL) {
560 so->so_accf->so_accept_filter->accf_destroy(so);
561 }
562 if (so->so_accf->so_accept_filter_str != NULL)
563 FREE(so->so_accf->so_accept_filter_str, M_ACCF);
564 FREE(so->so_accf, M_ACCF);
565 }
566 #endif /* INET */
567 kauth_cred_rele(so->so_cred);
568 zfreei(so->so_zone, so);
569 #else
570 if (so->cached_in_sock_layer == 1)
571 cached_sock_free(so);
572 else {
573 if (so->cached_in_sock_layer == -1)
574 panic("sodealloc: double dealloc: so=%x\n", so);
575 so->cached_in_sock_layer = -1;
576 FREE_ZONE(so, sizeof(*so), so->so_zone);
577 }
578 #endif /* __APPLE__ */
579 }
580
581 int
582 solisten(so, backlog)
583 register struct socket *so;
584 int backlog;
585
586 {
587 struct proc *p = current_proc();
588 int error;
589
590 socket_lock(so, 1);
591
592 {
593 struct socket_filter_entry *filter;
594 int filtered = 0;
595 error = 0;
596 for (filter = so->so_filt; filter && (error == 0);
597 filter = filter->sfe_next_onsocket) {
598 if (filter->sfe_filter->sf_filter.sf_listen) {
599 if (filtered == 0) {
600 filtered = 1;
601 sflt_use(so);
602 socket_unlock(so, 0);
603 }
604 error = filter->sfe_filter->sf_filter.sf_listen(
605 filter->sfe_cookie, so);
606 }
607 }
608 if (filtered != 0) {
609 socket_lock(so, 0);
610 sflt_unuse(so);
611 }
612 }
613
614 if (error == 0) {
615 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
616 }
617
618 if (error) {
619 socket_unlock(so, 1);
620 if (error == EJUSTRETURN)
621 error = 0;
622 return (error);
623 }
624
625 if (TAILQ_EMPTY(&so->so_comp))
626 so->so_options |= SO_ACCEPTCONN;
627 if (backlog < 0 || backlog > somaxconn)
628 backlog = somaxconn;
629 so->so_qlimit = backlog;
630
631 socket_unlock(so, 1);
632 return (0);
633 }
634
635 void
636 sofreelastref(so, dealloc)
637 register struct socket *so;
638 int dealloc;
639 {
640 int error;
641 struct socket *head = so->so_head;
642
643 /*### Assume socket is locked */
644
645 if ((!(so->so_flags & SOF_PCBCLEARING)) || ((so->so_state & SS_NOFDREF) == 0)) {
646 #ifdef __APPLE__
647 selthreadclear(&so->so_snd.sb_sel);
648 selthreadclear(&so->so_rcv.sb_sel);
649 so->so_rcv.sb_flags &= ~SB_UPCALL;
650 so->so_snd.sb_flags &= ~SB_UPCALL;
651 #endif
652 return;
653 }
654 if (head != NULL) {
655 socket_lock(head, 1);
656 if (so->so_state & SS_INCOMP) {
657 TAILQ_REMOVE(&head->so_incomp, so, so_list);
658 head->so_incqlen--;
659 } else if (so->so_state & SS_COMP) {
660 /*
661 * We must not decommission a socket that's
662 * on the accept(2) queue. If we do, then
663 * accept(2) may hang after select(2) indicated
664 * that the listening socket was ready.
665 */
666 #ifdef __APPLE__
667 selthreadclear(&so->so_snd.sb_sel);
668 selthreadclear(&so->so_rcv.sb_sel);
669 so->so_rcv.sb_flags &= ~SB_UPCALL;
670 so->so_snd.sb_flags &= ~SB_UPCALL;
671 #endif
672 socket_unlock(head, 1);
673 return;
674 } else {
675 panic("sofree: not queued");
676 }
677 head->so_qlen--;
678 so->so_state &= ~SS_INCOMP;
679 so->so_head = NULL;
680 socket_unlock(head, 1);
681 }
682 #ifdef __APPLE__
683 selthreadclear(&so->so_snd.sb_sel);
684 sbrelease(&so->so_snd);
685 #endif
686 sorflush(so);
687
688 /* 3932268: disable upcall */
689 so->so_rcv.sb_flags &= ~SB_UPCALL;
690 so->so_snd.sb_flags &= ~SB_UPCALL;
691
692 if (dealloc)
693 sodealloc(so);
694 }
695
696 /*
697 * Close a socket on last file table reference removal.
698 * Initiate disconnect if connected.
699 * Free socket when disconnect complete.
700 */
701 int
702 soclose_locked(so)
703 register struct socket *so;
704 {
705 int error = 0;
706 lck_mtx_t * mutex_held;
707 struct timespec ts;
708
709 if (so->so_usecount == 0) {
710 panic("soclose: so=%x refcount=0\n", so);
711 }
712
713 sflt_notify(so, sock_evt_closing, NULL);
714
715 if ((so->so_options & SO_ACCEPTCONN)) {
716 struct socket *sp;
717
718 /* We do not want new connection to be added to the connection queues */
719 so->so_options &= ~SO_ACCEPTCONN;
720
721 while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) {
722 /* A bit tricky here. We need to keep
723 * a lock if it's a protocol global lock
724 * but we want the head, not the socket locked
725 * in the case of per-socket lock...
726 */
727 if (so->so_proto->pr_getlock != NULL)
728 socket_lock(sp, 1);
729 if (so->so_proto->pr_getlock != NULL)
730 socket_unlock(so, 0);
731 (void) soabort(sp);
732 if (so->so_proto->pr_getlock != NULL)
733 socket_lock(so, 0);
734 if (so->so_proto->pr_getlock != NULL)
735 socket_unlock(sp, 1);
736 }
737
738 while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) {
739 if (so->so_proto->pr_getlock != NULL)
740 socket_lock(sp, 1);
741
742 /* Dequeue from so_comp since sofree() won't do it */
743 TAILQ_REMOVE(&so->so_comp, sp, so_list);
744 so->so_qlen--;
745 sp->so_state &= ~SS_COMP;
746 sp->so_head = NULL;
747
748 if (so->so_proto->pr_getlock != NULL)
749 socket_unlock(so, 0);
750 (void) soabort(sp);
751 if (so->so_proto->pr_getlock != NULL)
752 socket_lock(so, 0);
753 if (so->so_proto->pr_getlock != NULL)
754 socket_unlock(sp, 1);
755 }
756 }
757 if (so->so_pcb == 0) {
758 /* 3915887: mark the socket as ready for dealloc */
759 so->so_flags |= SOF_PCBCLEARING;
760 goto discard;
761 }
762 if (so->so_state & SS_ISCONNECTED) {
763 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
764 error = sodisconnectlocked(so);
765 if (error)
766 goto drop;
767 }
768 if (so->so_options & SO_LINGER) {
769 if ((so->so_state & SS_ISDISCONNECTING) &&
770 (so->so_state & SS_NBIO))
771 goto drop;
772 if (so->so_proto->pr_getlock != NULL)
773 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
774 else
775 mutex_held = so->so_proto->pr_domain->dom_mtx;
776 while (so->so_state & SS_ISCONNECTED) {
777 ts.tv_sec = (so->so_linger/100);
778 ts.tv_nsec = (so->so_linger % 100) * NSEC_PER_USEC * 1000 * 10;
779 error = msleep((caddr_t)&so->so_timeo, mutex_held,
780 PSOCK | PCATCH, "soclos", &ts);
781 if (error) {
782 /* It's OK when the time fires, don't report an error */
783 if (error == EWOULDBLOCK)
784 error = 0;
785 break;
786 }
787 }
788 }
789 }
790 drop:
791 if (so->so_usecount == 0)
792 panic("soclose: usecount is zero so=%x\n", so);
793 if (so->so_pcb && !(so->so_flags & SOF_PCBCLEARING)) {
794 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
795 if (error == 0)
796 error = error2;
797 }
798 if (so->so_usecount <= 0)
799 panic("soclose: usecount is zero so=%x\n", so);
800 discard:
801 if (so->so_pcb && so->so_state & SS_NOFDREF)
802 panic("soclose: NOFDREF");
803 so->so_state |= SS_NOFDREF;
804 #ifdef __APPLE__
805 so->so_proto->pr_domain->dom_refs--;
806 evsofree(so);
807 #endif
808 so->so_usecount--;
809 sofree(so);
810 return (error);
811 }
812
813 int
814 soclose(so)
815 register struct socket *so;
816 {
817 int error = 0;
818 socket_lock(so, 1);
819 if (so->so_retaincnt == 0)
820 error = soclose_locked(so);
821 else { /* if the FD is going away, but socket is retained in kernel remove its reference */
822 so->so_usecount--;
823 if (so->so_usecount < 2)
824 panic("soclose: retaincnt non null and so=%x usecount=%x\n", so->so_usecount);
825 }
826 socket_unlock(so, 1);
827 return (error);
828 }
829
830
831 /*
832 * Must be called at splnet...
833 */
834 //#### Should already be locked
835 int
836 soabort(so)
837 struct socket *so;
838 {
839 int error;
840
841 #ifdef MORE_LOCKING_DEBUG
842 lck_mtx_t * mutex_held;
843
844 if (so->so_proto->pr_getlock != NULL)
845 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
846 else
847 mutex_held = so->so_proto->pr_domain->dom_mtx;
848 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
849 #endif
850
851 error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
852 if (error) {
853 sofree(so);
854 return error;
855 }
856 return (0);
857 }
858
859 int
860 soacceptlock(so, nam, dolock)
861 register struct socket *so;
862 struct sockaddr **nam;
863 int dolock;
864 {
865 int error;
866
867 if (dolock) socket_lock(so, 1);
868
869 if ((so->so_state & SS_NOFDREF) == 0)
870 panic("soaccept: !NOFDREF");
871 so->so_state &= ~SS_NOFDREF;
872 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
873
874 if (dolock) socket_unlock(so, 1);
875 return (error);
876 }
877 int
878 soaccept(so, nam)
879 register struct socket *so;
880 struct sockaddr **nam;
881 {
882 return (soacceptlock(so, nam, 1));
883 }
884
885 int
886 soconnectlock(so, nam, dolock)
887 register struct socket *so;
888 struct sockaddr *nam;
889 int dolock;
890
891 {
892 int s;
893 int error;
894 struct proc *p = current_proc();
895
896 if (dolock) socket_lock(so, 1);
897
898 if (so->so_options & SO_ACCEPTCONN) {
899 if (dolock) socket_unlock(so, 1);
900 return (EOPNOTSUPP);
901 }
902 /*
903 * If protocol is connection-based, can only connect once.
904 * Otherwise, if connected, try to disconnect first.
905 * This allows user to disconnect by connecting to, e.g.,
906 * a null address.
907 */
908 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
909 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
910 (error = sodisconnectlocked(so))))
911 error = EISCONN;
912 else {
913 /*
914 * Run connect filter before calling protocol:
915 * - non-blocking connect returns before completion;
916 */
917 {
918 struct socket_filter_entry *filter;
919 int filtered = 0;
920 error = 0;
921 for (filter = so->so_filt; filter && (error == 0);
922 filter = filter->sfe_next_onsocket) {
923 if (filter->sfe_filter->sf_filter.sf_connect_out) {
924 if (filtered == 0) {
925 filtered = 1;
926 sflt_use(so);
927 socket_unlock(so, 0);
928 }
929 error = filter->sfe_filter->sf_filter.sf_connect_out(
930 filter->sfe_cookie, so, nam);
931 }
932 }
933 if (filtered != 0) {
934 socket_lock(so, 0);
935 sflt_unuse(so);
936 }
937 }
938 if (error) {
939 if (error == EJUSTRETURN)
940 error = 0;
941 if (dolock) socket_unlock(so, 1);
942 return error;
943 }
944
945 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
946 }
947 if (dolock) socket_unlock(so, 1);
948 return (error);
949 }
950
951 int
952 soconnect(so, nam)
953 register struct socket *so;
954 struct sockaddr *nam;
955 {
956 return (soconnectlock(so, nam, 1));
957 }
958
959 int
960 soconnect2(so1, so2)
961 register struct socket *so1;
962 struct socket *so2;
963 {
964 int error;
965 //####### Assumes so1 is already locked /
966
967 socket_lock(so2, 1);
968
969 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
970
971 socket_unlock(so2, 1);
972 return (error);
973 }
974
975
976 int
977 sodisconnectlocked(so)
978 register struct socket *so;
979 {
980 int error;
981
982 if ((so->so_state & SS_ISCONNECTED) == 0) {
983 error = ENOTCONN;
984 goto bad;
985 }
986 if (so->so_state & SS_ISDISCONNECTING) {
987 error = EALREADY;
988 goto bad;
989 }
990
991 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
992
993 if (error == 0) {
994 sflt_notify(so, sock_evt_disconnected, NULL);
995 }
996
997 bad:
998 return (error);
999 }
1000 //### Locking version
1001 int
1002 sodisconnect(so)
1003 register struct socket *so;
1004 {
1005 int error;
1006
1007 socket_lock(so, 1);
1008 error = sodisconnectlocked(so);
1009 socket_unlock(so, 1);
1010 return(error);
1011 }
1012
1013 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_DONTWAIT : M_WAIT)
1014
1015 /*
1016 * sosendcheck will lock the socket buffer if it isn't locked and
1017 * verify that there is space for the data being inserted.
1018 */
1019
1020 static int
1021 sosendcheck(
1022 struct socket *so,
1023 struct sockaddr *addr,
1024 long resid,
1025 long clen,
1026 long atomic,
1027 int flags,
1028 int *sblocked)
1029 {
1030 int error = 0;
1031 long space;
1032
1033 restart:
1034 if (*sblocked == 0) {
1035 error = sblock(&so->so_snd, SBLOCKWAIT(flags));
1036 if (error)
1037 return error;
1038 *sblocked = 1;
1039 }
1040
1041 if (so->so_state & SS_CANTSENDMORE)
1042 return EPIPE;
1043
1044 if (so->so_error) {
1045 error = so->so_error;
1046 so->so_error = 0;
1047 return error;
1048 }
1049
1050 if ((so->so_state & SS_ISCONNECTED) == 0) {
1051 /*
1052 * `sendto' and `sendmsg' is allowed on a connection-
1053 * based socket if it supports implied connect.
1054 * Return ENOTCONN if not connected and no address is
1055 * supplied.
1056 */
1057 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
1058 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
1059 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
1060 !(resid == 0 && clen != 0))
1061 return ENOTCONN;
1062 } else if (addr == 0 && !(flags&MSG_HOLD))
1063 return (so->so_proto->pr_flags & PR_CONNREQUIRED) ? ENOTCONN : EDESTADDRREQ;
1064 }
1065 space = sbspace(&so->so_snd);
1066 if (flags & MSG_OOB)
1067 space += 1024;
1068 if ((atomic && resid > so->so_snd.sb_hiwat) ||
1069 clen > so->so_snd.sb_hiwat)
1070 return EMSGSIZE;
1071 if (space < resid + clen &&
1072 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
1073 if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO))
1074 return EWOULDBLOCK;
1075 sbunlock(&so->so_snd, 1);
1076 error = sbwait(&so->so_snd);
1077 if (error) {
1078 return error;
1079 }
1080 goto restart;
1081 }
1082
1083 return 0;
1084 }
1085
1086 /*
1087 * Send on a socket.
1088 * If send must go all at once and message is larger than
1089 * send buffering, then hard error.
1090 * Lock against other senders.
1091 * If must go all at once and not enough room now, then
1092 * inform user that this would block and do nothing.
1093 * Otherwise, if nonblocking, send as much as possible.
1094 * The data to be sent is described by "uio" if nonzero,
1095 * otherwise by the mbuf chain "top" (which must be null
1096 * if uio is not). Data provided in mbuf chain must be small
1097 * enough to send all at once.
1098 *
1099 * Returns nonzero on error, timeout or signal; callers
1100 * must check for short counts if EINTR/ERESTART are returned.
1101 * Data and control buffers are freed on return.
1102 * Experiment:
1103 * MSG_HOLD: go thru most of sosend(), but just enqueue the mbuf
1104 * MSG_SEND: go thru as for MSG_HOLD on current fragment, then
1105 * point at the mbuf chain being constructed and go from there.
1106 */
1107 int
1108 sosend(so, addr, uio, top, control, flags)
1109 register struct socket *so;
1110 struct sockaddr *addr;
1111 struct uio *uio;
1112 struct mbuf *top;
1113 struct mbuf *control;
1114 int flags;
1115
1116 {
1117 struct mbuf **mp;
1118 register struct mbuf *m, *freelist = NULL;
1119 register long space, len, resid;
1120 int clen = 0, error, dontroute, mlen, sendflags;
1121 int atomic = sosendallatonce(so) || top;
1122 int sblocked = 0;
1123 struct proc *p = current_proc();
1124
1125 if (uio)
1126 // LP64todo - fix this!
1127 resid = uio_resid(uio);
1128 else
1129 resid = top->m_pkthdr.len;
1130
1131 KERNEL_DEBUG((DBG_FNC_SOSEND | DBG_FUNC_START),
1132 so,
1133 resid,
1134 so->so_snd.sb_cc,
1135 so->so_snd.sb_lowat,
1136 so->so_snd.sb_hiwat);
1137
1138 socket_lock(so, 1);
1139
1140 /*
1141 * In theory resid should be unsigned.
1142 * However, space must be signed, as it might be less than 0
1143 * if we over-committed, and we must use a signed comparison
1144 * of space and resid. On the other hand, a negative resid
1145 * causes us to loop sending 0-length segments to the protocol.
1146 *
1147 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
1148 * type sockets since that's an error.
1149 */
1150 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
1151 error = EINVAL;
1152 socket_unlock(so, 1);
1153 goto out;
1154 }
1155
1156 dontroute =
1157 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
1158 (so->so_proto->pr_flags & PR_ATOMIC);
1159 if (p)
1160 p->p_stats->p_ru.ru_msgsnd++;
1161 if (control)
1162 clen = control->m_len;
1163
1164 do {
1165 error = sosendcheck(so, addr, resid, clen, atomic, flags, &sblocked);
1166 if (error) {
1167 if (sblocked)
1168 goto release;
1169 else {
1170 socket_unlock(so, 1);
1171 goto out;
1172 }
1173 }
1174 mp = &top;
1175 space = sbspace(&so->so_snd) - clen + ((flags & MSG_OOB) ? 1024 : 0);
1176
1177 do {
1178
1179 if (uio == NULL) {
1180 /*
1181 * Data is prepackaged in "top".
1182 */
1183 resid = 0;
1184 if (flags & MSG_EOR)
1185 top->m_flags |= M_EOR;
1186 } else {
1187 int chainlength;
1188 int bytes_to_copy;
1189
1190 bytes_to_copy = min(resid, space);
1191
1192 if (sosendminchain > 0) {
1193 chainlength = 0;
1194 } else
1195 chainlength = sosendmaxchain;
1196
1197 socket_unlock(so, 0);
1198
1199 do {
1200 int num_needed;
1201 int hdrs_needed = (top == 0) ? 1 : 0;
1202
1203 /*
1204 * try to maintain a local cache of mbuf clusters needed to complete this write
1205 * the list is further limited to the number that are currently needed to fill the socket
1206 * this mechanism allows a large number of mbufs/clusters to be grabbed under a single
1207 * mbuf lock... if we can't get any clusters, than fall back to trying for mbufs
1208 * if we fail early (or miscalcluate the number needed) make sure to release any clusters
1209 * we haven't yet consumed.
1210 */
1211 if (freelist == NULL && bytes_to_copy > MCLBYTES) {
1212 num_needed = bytes_to_copy / NBPG;
1213
1214 if ((bytes_to_copy - (num_needed * NBPG)) >= MINCLSIZE)
1215 num_needed++;
1216
1217 freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, NBPG);
1218 /* Fall back to cluster size if allocation failed */
1219 }
1220
1221 if (freelist == NULL && bytes_to_copy > MINCLSIZE) {
1222 num_needed = bytes_to_copy / MCLBYTES;
1223
1224 if ((bytes_to_copy - (num_needed * MCLBYTES)) >= MINCLSIZE)
1225 num_needed++;
1226
1227 freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, MCLBYTES);
1228 /* Fall back to a single mbuf if allocation failed */
1229 }
1230
1231 if (freelist == NULL) {
1232 if (top == 0)
1233 MGETHDR(freelist, M_WAIT, MT_DATA);
1234 else
1235 MGET(freelist, M_WAIT, MT_DATA);
1236
1237 if (freelist == NULL) {
1238 error = ENOBUFS;
1239 socket_lock(so, 0);
1240 if (sblocked) {
1241 goto release;
1242 } else {
1243 socket_unlock(so, 1);
1244 goto out;
1245 }
1246 }
1247 /*
1248 * For datagram protocols, leave room
1249 * for protocol headers in first mbuf.
1250 */
1251 if (atomic && top == 0 && bytes_to_copy < MHLEN)
1252 MH_ALIGN(freelist, bytes_to_copy);
1253 }
1254 m = freelist;
1255 freelist = m->m_next;
1256 m->m_next = NULL;
1257
1258 if ((m->m_flags & M_EXT))
1259 mlen = m->m_ext.ext_size;
1260 else if ((m->m_flags & M_PKTHDR))
1261 mlen = MHLEN - m_leadingspace(m);
1262 else
1263 mlen = MLEN;
1264 len = min(mlen, bytes_to_copy);
1265
1266 chainlength += len;
1267
1268 space -= len;
1269
1270 error = uiomove(mtod(m, caddr_t), (int)len, uio);
1271
1272 // LP64todo - fix this!
1273 resid = uio_resid(uio);
1274
1275 m->m_len = len;
1276 *mp = m;
1277 top->m_pkthdr.len += len;
1278 if (error)
1279 break;
1280 mp = &m->m_next;
1281 if (resid <= 0) {
1282 if (flags & MSG_EOR)
1283 top->m_flags |= M_EOR;
1284 break;
1285 }
1286 bytes_to_copy = min(resid, space);
1287
1288 } while (space > 0 && (chainlength < sosendmaxchain || atomic || resid < MINCLSIZE));
1289
1290 socket_lock(so, 0);
1291
1292 if (error)
1293 goto release;
1294 }
1295
1296 if (flags & (MSG_HOLD|MSG_SEND))
1297 { /* Enqueue for later, go away if HOLD */
1298 register struct mbuf *mb1;
1299 if (so->so_temp && (flags & MSG_FLUSH))
1300 { m_freem(so->so_temp);
1301 so->so_temp = NULL;
1302 }
1303 if (so->so_temp)
1304 so->so_tail->m_next = top;
1305 else
1306 so->so_temp = top;
1307 mb1 = top;
1308 while (mb1->m_next)
1309 mb1 = mb1->m_next;
1310 so->so_tail = mb1;
1311 if (flags&MSG_HOLD)
1312 { top = NULL;
1313 goto release;
1314 }
1315 top = so->so_temp;
1316 }
1317 if (dontroute)
1318 so->so_options |= SO_DONTROUTE;
1319 /* Compute flags here, for pru_send and NKEs */
1320 sendflags = (flags & MSG_OOB) ? PRUS_OOB :
1321 /*
1322 * If the user set MSG_EOF, the protocol
1323 * understands this flag and nothing left to
1324 * send then use PRU_SEND_EOF instead of PRU_SEND.
1325 */
1326 ((flags & MSG_EOF) &&
1327 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
1328 (resid <= 0)) ?
1329 PRUS_EOF :
1330 /* If there is more to send set PRUS_MORETOCOME */
1331 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0;
1332
1333 /*
1334 * Socket filter processing
1335 */
1336 {
1337 struct socket_filter_entry *filter;
1338 int filtered;
1339
1340 filtered = 0;
1341 error = 0;
1342 for (filter = so->so_filt; filter && (error == 0);
1343 filter = filter->sfe_next_onsocket) {
1344 if (filter->sfe_filter->sf_filter.sf_data_out) {
1345 int so_flags = 0;
1346 if (filtered == 0) {
1347 filtered = 1;
1348 /*
1349 * We don't let sbunlock unlock the socket because
1350 * we don't want it to decrement the usecount.
1351 */
1352 sbunlock(&so->so_snd, 1);
1353 sblocked = 0;
1354 socket_unlock(so, 0);
1355 so_flags = (sendflags & MSG_OOB) ? sock_data_filt_flag_oob : 0;
1356 }
1357 error = filter->sfe_filter->sf_filter.sf_data_out(
1358 filter->sfe_cookie, so, addr, &top, &control, so_flags);
1359 }
1360 }
1361
1362 if (filtered) {
1363 /*
1364 * At this point, we've run at least one filter.
1365 * The socket is unlocked as is the socket buffer.
1366 */
1367 socket_lock(so, 0);
1368 if (error == EJUSTRETURN) {
1369 error = 0;
1370 clen = 0;
1371 control = 0;
1372 top = 0;
1373 socket_unlock(so, 1);
1374 goto out;
1375 }
1376 else if (error) {
1377 socket_unlock(so, 1);
1378 goto out;
1379 }
1380
1381
1382 /* Verify our state again, this will lock the socket buffer */
1383 error = sosendcheck(so, addr, top->m_pkthdr.len,
1384 control ? control->m_pkthdr.len : 0,
1385 atomic, flags, &sblocked);
1386 if (error) {
1387 if (sblocked) {
1388 /* sbunlock at release will unlock the socket */
1389 goto release;
1390 }
1391 else {
1392 socket_unlock(so, 1);
1393 goto out;
1394 }
1395 }
1396 }
1397 }
1398 /*
1399 * End Socket filter processing
1400 */
1401
1402 if (error == EJUSTRETURN) {
1403 /* A socket filter handled this data */
1404 error = 0;
1405 }
1406 else {
1407 error = (*so->so_proto->pr_usrreqs->pru_send)(so,
1408 sendflags, top, addr, control, p);
1409 }
1410 #ifdef __APPLE__
1411 if (flags & MSG_SEND)
1412 so->so_temp = NULL;
1413 #endif
1414 if (dontroute)
1415 so->so_options &= ~SO_DONTROUTE;
1416 clen = 0;
1417 control = 0;
1418 top = 0;
1419 mp = &top;
1420 if (error)
1421 goto release;
1422 } while (resid && space > 0);
1423 } while (resid);
1424
1425 release:
1426 sbunlock(&so->so_snd, 0); /* will unlock socket */
1427 out:
1428 if (top)
1429 m_freem(top);
1430 if (control)
1431 m_freem(control);
1432 if (freelist)
1433 m_freem_list(freelist);
1434
1435 KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END,
1436 so,
1437 resid,
1438 so->so_snd.sb_cc,
1439 space,
1440 error);
1441
1442 return (error);
1443 }
1444
1445 /*
1446 * Implement receive operations on a socket.
1447 * We depend on the way that records are added to the sockbuf
1448 * by sbappend*. In particular, each record (mbufs linked through m_next)
1449 * must begin with an address if the protocol so specifies,
1450 * followed by an optional mbuf or mbufs containing ancillary data,
1451 * and then zero or more mbufs of data.
1452 * In order to avoid blocking network interrupts for the entire time here,
1453 * we splx() while doing the actual copy to user space.
1454 * Although the sockbuf is locked, new data may still be appended,
1455 * and thus we must maintain consistency of the sockbuf during that time.
1456 *
1457 * The caller may receive the data as a single mbuf chain by supplying
1458 * an mbuf **mp0 for use in returning the chain. The uio is then used
1459 * only for the count in uio_resid.
1460 */
1461 int
1462 soreceive(so, psa, uio, mp0, controlp, flagsp)
1463 register struct socket *so;
1464 struct sockaddr **psa;
1465 struct uio *uio;
1466 struct mbuf **mp0;
1467 struct mbuf **controlp;
1468 int *flagsp;
1469 {
1470 register struct mbuf *m, **mp, *ml = NULL;
1471 register int flags, len, error, offset;
1472 struct protosw *pr = so->so_proto;
1473 struct mbuf *nextrecord;
1474 int moff, type = 0;
1475 // LP64todo - fix this!
1476 int orig_resid = uio_resid(uio);
1477 volatile struct mbuf *free_list;
1478 volatile int delayed_copy_len;
1479 int can_delay;
1480 int need_event;
1481 struct proc *p = current_proc();
1482
1483
1484 // LP64todo - fix this!
1485 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START,
1486 so,
1487 uio_resid(uio),
1488 so->so_rcv.sb_cc,
1489 so->so_rcv.sb_lowat,
1490 so->so_rcv.sb_hiwat);
1491
1492 socket_lock(so, 1);
1493
1494 #ifdef MORE_LOCKING_DEBUG
1495 if (so->so_usecount == 1)
1496 panic("soreceive: so=%x no other reference on socket\n", so);
1497 #endif
1498 mp = mp0;
1499 if (psa)
1500 *psa = 0;
1501 if (controlp)
1502 *controlp = 0;
1503 if (flagsp)
1504 flags = *flagsp &~ MSG_EOR;
1505 else
1506 flags = 0;
1507 /*
1508 * When SO_WANTOOBFLAG is set we try to get out-of-band data
1509 * regardless of the flags argument. Here is the case were
1510 * out-of-band data is not inline.
1511 */
1512 if ((flags & MSG_OOB) ||
1513 ((so->so_options & SO_WANTOOBFLAG) != 0 &&
1514 (so->so_options & SO_OOBINLINE) == 0 &&
1515 (so->so_oobmark || (so->so_state & SS_RCVATMARK)))) {
1516 m = m_get(M_WAIT, MT_DATA);
1517 if (m == NULL) {
1518 socket_unlock(so, 1);
1519 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, ENOBUFS,0,0,0,0);
1520 return (ENOBUFS);
1521 }
1522 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
1523 if (error)
1524 goto bad;
1525 socket_unlock(so, 0);
1526 do {
1527 // LP64todo - fix this!
1528 error = uiomove(mtod(m, caddr_t),
1529 (int) min(uio_resid(uio), m->m_len), uio);
1530 m = m_free(m);
1531 } while (uio_resid(uio) && error == 0 && m);
1532 socket_lock(so, 0);
1533 bad:
1534 if (m)
1535 m_freem(m);
1536 #ifdef __APPLE__
1537 if ((so->so_options & SO_WANTOOBFLAG) != 0) {
1538 if (error == EWOULDBLOCK || error == EINVAL) {
1539 /*
1540 * Let's try to get normal data:
1541 * EWOULDBLOCK: out-of-band data not receive yet;
1542 * EINVAL: out-of-band data already read.
1543 */
1544 error = 0;
1545 goto nooob;
1546 } else if (error == 0 && flagsp)
1547 *flagsp |= MSG_OOB;
1548 }
1549 socket_unlock(so, 1);
1550 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1551 #endif
1552 return (error);
1553 }
1554 nooob:
1555 if (mp)
1556 *mp = (struct mbuf *)0;
1557 if (so->so_state & SS_ISCONFIRMING && uio_resid(uio))
1558 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
1559
1560
1561 free_list = (struct mbuf *)0;
1562 delayed_copy_len = 0;
1563 restart:
1564 #ifdef MORE_LOCKING_DEBUG
1565 if (so->so_usecount <= 1)
1566 printf("soreceive: sblock so=%x ref=%d on socket\n", so, so->so_usecount);
1567 #endif
1568 error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
1569 if (error) {
1570 socket_unlock(so, 1);
1571 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1572 return (error);
1573 }
1574
1575 m = so->so_rcv.sb_mb;
1576 /*
1577 * If we have less data than requested, block awaiting more
1578 * (subject to any timeout) if:
1579 * 1. the current count is less than the low water mark, or
1580 * 2. MSG_WAITALL is set, and it is possible to do the entire
1581 * receive operation at once if we block (resid <= hiwat).
1582 * 3. MSG_DONTWAIT is not set
1583 * If MSG_WAITALL is set but resid is larger than the receive buffer,
1584 * we have to do the receive in sections, and thus risk returning
1585 * a short count if a timeout or signal occurs after we start.
1586 */
1587 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
1588 so->so_rcv.sb_cc < uio_resid(uio)) &&
1589 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
1590 ((flags & MSG_WAITALL) && uio_resid(uio) <= so->so_rcv.sb_hiwat)) &&
1591 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
1592
1593 KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
1594 if (so->so_error) {
1595 if (m)
1596 goto dontblock;
1597 error = so->so_error;
1598 if ((flags & MSG_PEEK) == 0)
1599 so->so_error = 0;
1600 goto release;
1601 }
1602 if (so->so_state & SS_CANTRCVMORE) {
1603 if (m)
1604 goto dontblock;
1605 else
1606 goto release;
1607 }
1608 for (; m; m = m->m_next)
1609 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
1610 m = so->so_rcv.sb_mb;
1611 goto dontblock;
1612 }
1613 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
1614 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
1615 error = ENOTCONN;
1616 goto release;
1617 }
1618 if (uio_resid(uio) == 0)
1619 goto release;
1620 if ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO))) {
1621 error = EWOULDBLOCK;
1622 goto release;
1623 }
1624 sbunlock(&so->so_rcv, 1);
1625 #ifdef EVEN_MORE_LOCKING_DEBUG
1626 if (socket_debug)
1627 printf("Waiting for socket data\n");
1628 #endif
1629
1630 error = sbwait(&so->so_rcv);
1631 #ifdef EVEN_MORE_LOCKING_DEBUG
1632 if (socket_debug)
1633 printf("SORECEIVE - sbwait returned %d\n", error);
1634 #endif
1635 if (so->so_usecount < 1)
1636 panic("soreceive: after 2nd sblock so=%x ref=%d on socket\n", so, so->so_usecount);
1637 if (error) {
1638 socket_unlock(so, 1);
1639 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1640 return (error);
1641 }
1642 goto restart;
1643 }
1644 dontblock:
1645 #ifndef __APPLE__
1646 if (uio->uio_procp)
1647 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
1648 #else /* __APPLE__ */
1649 /*
1650 * 2207985
1651 * This should be uio->uio-procp; however, some callers of this
1652 * function use auto variables with stack garbage, and fail to
1653 * fill out the uio structure properly.
1654 */
1655 if (p)
1656 p->p_stats->p_ru.ru_msgrcv++;
1657 #endif /* __APPLE__ */
1658 nextrecord = m->m_nextpkt;
1659 if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) {
1660 KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
1661 orig_resid = 0;
1662 if (psa) {
1663 *psa = dup_sockaddr(mtod(m, struct sockaddr *),
1664 mp0 == 0);
1665 if ((*psa == 0) && (flags & MSG_NEEDSA)) {
1666 error = EWOULDBLOCK;
1667 goto release;
1668 }
1669 }
1670 if (flags & MSG_PEEK) {
1671 m = m->m_next;
1672 } else {
1673 sbfree(&so->so_rcv, m);
1674 if (m->m_next == 0 && so->so_rcv.sb_cc != 0)
1675 panic("soreceive: about to create invalid socketbuf");
1676 MFREE(m, so->so_rcv.sb_mb);
1677 m = so->so_rcv.sb_mb;
1678 }
1679 }
1680 while (m && m->m_type == MT_CONTROL && error == 0) {
1681 if (flags & MSG_PEEK) {
1682 if (controlp)
1683 *controlp = m_copy(m, 0, m->m_len);
1684 m = m->m_next;
1685 } else {
1686 sbfree(&so->so_rcv, m);
1687 if (controlp) {
1688 if (pr->pr_domain->dom_externalize &&
1689 mtod(m, struct cmsghdr *)->cmsg_type ==
1690 SCM_RIGHTS) {
1691 socket_unlock(so, 0); /* release socket lock: see 3903171 */
1692 error = (*pr->pr_domain->dom_externalize)(m);
1693 socket_lock(so, 0);
1694 }
1695 *controlp = m;
1696 if (m->m_next == 0 && so->so_rcv.sb_cc != 0)
1697 panic("soreceive: so->so_rcv.sb_mb->m_next == 0 && so->so_rcv.sb_cc != 0");
1698 so->so_rcv.sb_mb = m->m_next;
1699 m->m_next = 0;
1700 m = so->so_rcv.sb_mb;
1701 } else {
1702 MFREE(m, so->so_rcv.sb_mb);
1703 m = so->so_rcv.sb_mb;
1704 }
1705 }
1706 if (controlp) {
1707 orig_resid = 0;
1708 controlp = &(*controlp)->m_next;
1709 }
1710 }
1711 if (m) {
1712 if ((flags & MSG_PEEK) == 0)
1713 m->m_nextpkt = nextrecord;
1714 type = m->m_type;
1715 if (type == MT_OOBDATA)
1716 flags |= MSG_OOB;
1717 }
1718 moff = 0;
1719 offset = 0;
1720
1721 if (!(flags & MSG_PEEK) && uio_resid(uio) > sorecvmincopy)
1722 can_delay = 1;
1723 else
1724 can_delay = 0;
1725
1726 need_event = 0;
1727
1728 while (m && (uio_resid(uio) - delayed_copy_len) > 0 && error == 0) {
1729 if (m->m_type == MT_OOBDATA) {
1730 if (type != MT_OOBDATA)
1731 break;
1732 } else if (type == MT_OOBDATA)
1733 break;
1734 #ifndef __APPLE__
1735 /*
1736 * This assertion needs rework. The trouble is Appletalk is uses many
1737 * mbuf types (NOT listed in mbuf.h!) which will trigger this panic.
1738 * For now just remove the assertion... CSM 9/98
1739 */
1740 else
1741 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
1742 ("receive 3"));
1743 #else
1744 /*
1745 * Make sure to allways set MSG_OOB event when getting
1746 * out of band data inline.
1747 */
1748 if ((so->so_options & SO_WANTOOBFLAG) != 0 &&
1749 (so->so_options & SO_OOBINLINE) != 0 &&
1750 (so->so_state & SS_RCVATMARK) != 0) {
1751 flags |= MSG_OOB;
1752 }
1753 #endif
1754 so->so_state &= ~SS_RCVATMARK;
1755 // LP64todo - fix this!
1756 len = uio_resid(uio) - delayed_copy_len;
1757 if (so->so_oobmark && len > so->so_oobmark - offset)
1758 len = so->so_oobmark - offset;
1759 if (len > m->m_len - moff)
1760 len = m->m_len - moff;
1761 /*
1762 * If mp is set, just pass back the mbufs.
1763 * Otherwise copy them out via the uio, then free.
1764 * Sockbuf must be consistent here (points to current mbuf,
1765 * it points to next record) when we drop priority;
1766 * we must note any additions to the sockbuf when we
1767 * block interrupts again.
1768 */
1769 if (mp == 0) {
1770 if (can_delay && len == m->m_len) {
1771 /*
1772 * only delay the copy if we're consuming the
1773 * mbuf and we're NOT in MSG_PEEK mode
1774 * and we have enough data to make it worthwile
1775 * to drop and retake the funnel... can_delay
1776 * reflects the state of the 2 latter constraints
1777 * moff should always be zero in these cases
1778 */
1779 delayed_copy_len += len;
1780 } else {
1781
1782 if (delayed_copy_len) {
1783 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1784
1785 if (error) {
1786 goto release;
1787 }
1788 if (m != so->so_rcv.sb_mb) {
1789 /*
1790 * can only get here if MSG_PEEK is not set
1791 * therefore, m should point at the head of the rcv queue...
1792 * if it doesn't, it means something drastically changed
1793 * while we were out from behind the funnel in sodelayed_copy...
1794 * perhaps a RST on the stream... in any event, the stream has
1795 * been interrupted... it's probably best just to return
1796 * whatever data we've moved and let the caller sort it out...
1797 */
1798 break;
1799 }
1800 }
1801 socket_unlock(so, 0);
1802 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
1803 socket_lock(so, 0);
1804
1805 if (error)
1806 goto release;
1807 }
1808 } else
1809 uio_setresid(uio, (uio_resid(uio) - len));
1810
1811 if (len == m->m_len - moff) {
1812 if (m->m_flags & M_EOR)
1813 flags |= MSG_EOR;
1814 if (flags & MSG_PEEK) {
1815 m = m->m_next;
1816 moff = 0;
1817 } else {
1818 nextrecord = m->m_nextpkt;
1819 sbfree(&so->so_rcv, m);
1820 m->m_nextpkt = NULL;
1821
1822 if (mp) {
1823 *mp = m;
1824 mp = &m->m_next;
1825 so->so_rcv.sb_mb = m = m->m_next;
1826 *mp = (struct mbuf *)0;
1827 } else {
1828 if (free_list == NULL)
1829 free_list = m;
1830 else
1831 ml->m_next = m;
1832 ml = m;
1833 so->so_rcv.sb_mb = m = m->m_next;
1834 ml->m_next = 0;
1835 }
1836 if (m)
1837 m->m_nextpkt = nextrecord;
1838 }
1839 } else {
1840 if (flags & MSG_PEEK)
1841 moff += len;
1842 else {
1843 if (mp)
1844 *mp = m_copym(m, 0, len, M_WAIT);
1845 m->m_data += len;
1846 m->m_len -= len;
1847 so->so_rcv.sb_cc -= len;
1848 }
1849 }
1850 if (so->so_oobmark) {
1851 if ((flags & MSG_PEEK) == 0) {
1852 so->so_oobmark -= len;
1853 if (so->so_oobmark == 0) {
1854 so->so_state |= SS_RCVATMARK;
1855 /*
1856 * delay posting the actual event until after
1857 * any delayed copy processing has finished
1858 */
1859 need_event = 1;
1860 break;
1861 }
1862 } else {
1863 offset += len;
1864 if (offset == so->so_oobmark)
1865 break;
1866 }
1867 }
1868 if (flags & MSG_EOR)
1869 break;
1870 /*
1871 * If the MSG_WAITALL or MSG_WAITSTREAM flag is set (for non-atomic socket),
1872 * we must not quit until "uio->uio_resid == 0" or an error
1873 * termination. If a signal/timeout occurs, return
1874 * with a short count but without error.
1875 * Keep sockbuf locked against other readers.
1876 */
1877 while (flags & (MSG_WAITALL|MSG_WAITSTREAM) && m == 0 && (uio_resid(uio) - delayed_copy_len) > 0 &&
1878 !sosendallatonce(so) && !nextrecord) {
1879 if (so->so_error || so->so_state & SS_CANTRCVMORE)
1880 goto release;
1881
1882 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb && (((struct inpcb *)so->so_pcb)->inp_state != INPCB_STATE_DEAD))
1883 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1884 if (sbwait(&so->so_rcv)) {
1885 error = 0;
1886 goto release;
1887 }
1888 /*
1889 * have to wait until after we get back from the sbwait to do the copy because
1890 * we will drop the funnel if we have enough data that has been delayed... by dropping
1891 * the funnel we open up a window allowing the netisr thread to process the incoming packets
1892 * and to change the state of this socket... we're issuing the sbwait because
1893 * the socket is empty and we're expecting the netisr thread to wake us up when more
1894 * packets arrive... if we allow that processing to happen and then sbwait, we
1895 * could stall forever with packets sitting in the socket if no further packets
1896 * arrive from the remote side.
1897 *
1898 * we want to copy before we've collected all the data to satisfy this request to
1899 * allow the copy to overlap the incoming packet processing on an MP system
1900 */
1901 if (delayed_copy_len > sorecvmincopy && (delayed_copy_len > (so->so_rcv.sb_hiwat / 2))) {
1902
1903 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1904
1905 if (error)
1906 goto release;
1907 }
1908 m = so->so_rcv.sb_mb;
1909 if (m) {
1910 nextrecord = m->m_nextpkt;
1911 }
1912 }
1913 }
1914 #ifdef MORE_LOCKING_DEBUG
1915 if (so->so_usecount <= 1)
1916 panic("soreceive: after big while so=%x ref=%d on socket\n", so, so->so_usecount);
1917 #endif
1918
1919 if (m && pr->pr_flags & PR_ATOMIC) {
1920 #ifdef __APPLE__
1921 if (so->so_options & SO_DONTTRUNC)
1922 flags |= MSG_RCVMORE;
1923 else {
1924 #endif
1925 flags |= MSG_TRUNC;
1926 if ((flags & MSG_PEEK) == 0)
1927 (void) sbdroprecord(&so->so_rcv);
1928 #ifdef __APPLE__
1929 }
1930 #endif
1931 }
1932 if ((flags & MSG_PEEK) == 0) {
1933 if (m == 0)
1934 so->so_rcv.sb_mb = nextrecord;
1935 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
1936 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1937 }
1938 #ifdef __APPLE__
1939 if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0)
1940 flags |= MSG_HAVEMORE;
1941
1942 if (delayed_copy_len) {
1943 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1944
1945 if (error)
1946 goto release;
1947 }
1948 if (free_list) {
1949 m_freem_list((struct mbuf *)free_list);
1950 free_list = (struct mbuf *)0;
1951 }
1952 if (need_event)
1953 postevent(so, 0, EV_OOB);
1954 #endif
1955 if (orig_resid == uio_resid(uio) && orig_resid &&
1956 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
1957 sbunlock(&so->so_rcv, 1);
1958 goto restart;
1959 }
1960
1961 if (flagsp)
1962 *flagsp |= flags;
1963 release:
1964 #ifdef MORE_LOCKING_DEBUG
1965 if (so->so_usecount <= 1)
1966 panic("soreceive: release so=%x ref=%d on socket\n", so, so->so_usecount);
1967 #endif
1968 if (delayed_copy_len) {
1969 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1970 }
1971 if (free_list) {
1972 m_freem_list((struct mbuf *)free_list);
1973 }
1974 sbunlock(&so->so_rcv, 0); /* will unlock socket */
1975
1976 // LP64todo - fix this!
1977 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END,
1978 so,
1979 uio_resid(uio),
1980 so->so_rcv.sb_cc,
1981 0,
1982 error);
1983
1984 return (error);
1985 }
1986
1987
1988 static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid)
1989 {
1990 int error = 0;
1991 struct mbuf *m;
1992
1993 m = *free_list;
1994
1995 socket_unlock(so, 0);
1996
1997 while (m && error == 0) {
1998
1999 error = uiomove(mtod(m, caddr_t), (int)m->m_len, uio);
2000
2001 m = m->m_next;
2002 }
2003 m_freem_list(*free_list);
2004
2005 *free_list = (struct mbuf *)NULL;
2006 *resid = 0;
2007
2008 socket_lock(so, 0);
2009
2010 return (error);
2011 }
2012
2013
2014 int
2015 soshutdown(so, how)
2016 register struct socket *so;
2017 register int how;
2018 {
2019 register struct protosw *pr = so->so_proto;
2020 int ret;
2021
2022 socket_lock(so, 1);
2023
2024 sflt_notify(so, sock_evt_shutdown, &how);
2025
2026 if (how != SHUT_WR) {
2027 sorflush(so);
2028 postevent(so, 0, EV_RCLOSED);
2029 }
2030 if (how != SHUT_RD) {
2031 ret = ((*pr->pr_usrreqs->pru_shutdown)(so));
2032 postevent(so, 0, EV_WCLOSED);
2033 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
2034 socket_unlock(so, 1);
2035 return(ret);
2036 }
2037
2038 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
2039 socket_unlock(so, 1);
2040 return (0);
2041 }
2042
2043 void
2044 sorflush(so)
2045 register struct socket *so;
2046 {
2047 register struct sockbuf *sb = &so->so_rcv;
2048 register struct protosw *pr = so->so_proto;
2049 struct sockbuf asb;
2050
2051 #ifdef MORE_LOCKING_DEBUG
2052 lck_mtx_t * mutex_held;
2053
2054 if (so->so_proto->pr_getlock != NULL)
2055 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2056 else
2057 mutex_held = so->so_proto->pr_domain->dom_mtx;
2058 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2059 #endif
2060
2061 sflt_notify(so, sock_evt_flush_read, NULL);
2062
2063 sb->sb_flags |= SB_NOINTR;
2064 (void) sblock(sb, M_WAIT);
2065 socantrcvmore(so);
2066 sbunlock(sb, 1);
2067 #ifdef __APPLE__
2068 selthreadclear(&sb->sb_sel);
2069 #endif
2070 asb = *sb;
2071 bzero((caddr_t)sb, sizeof (*sb));
2072 sb->sb_so = so; /* reestablish link to socket */
2073 if (asb.sb_flags & SB_KNOTE) {
2074 sb->sb_sel.si_note = asb.sb_sel.si_note;
2075 sb->sb_flags = SB_KNOTE;
2076 }
2077 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
2078 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
2079 sbrelease(&asb);
2080 }
2081
2082 /*
2083 * Perhaps this routine, and sooptcopyout(), below, ought to come in
2084 * an additional variant to handle the case where the option value needs
2085 * to be some kind of integer, but not a specific size.
2086 * In addition to their use here, these functions are also called by the
2087 * protocol-level pr_ctloutput() routines.
2088 */
2089 int
2090 sooptcopyin(sopt, buf, len, minlen)
2091 struct sockopt *sopt;
2092 void *buf;
2093 size_t len;
2094 size_t minlen;
2095 {
2096 size_t valsize;
2097
2098 /*
2099 * If the user gives us more than we wanted, we ignore it,
2100 * but if we don't get the minimum length the caller
2101 * wants, we return EINVAL. On success, sopt->sopt_valsize
2102 * is set to however much we actually retrieved.
2103 */
2104 if ((valsize = sopt->sopt_valsize) < minlen)
2105 return EINVAL;
2106 if (valsize > len)
2107 sopt->sopt_valsize = valsize = len;
2108
2109 if (sopt->sopt_p != 0)
2110 return (copyin(sopt->sopt_val, buf, valsize));
2111
2112 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), buf, valsize);
2113 return 0;
2114 }
2115
2116 int
2117 sosetopt(so, sopt)
2118 struct socket *so;
2119 struct sockopt *sopt;
2120 {
2121 int error, optval;
2122 struct linger l;
2123 struct timeval tv;
2124 short val;
2125
2126 socket_lock(so, 1);
2127
2128 if (sopt->sopt_dir != SOPT_SET) {
2129 sopt->sopt_dir = SOPT_SET;
2130 }
2131
2132 {
2133 struct socket_filter_entry *filter;
2134 int filtered = 0;
2135 error = 0;
2136 for (filter = so->so_filt; filter && (error == 0);
2137 filter = filter->sfe_next_onsocket) {
2138 if (filter->sfe_filter->sf_filter.sf_setoption) {
2139 if (filtered == 0) {
2140 filtered = 1;
2141 sflt_use(so);
2142 socket_unlock(so, 0);
2143 }
2144 error = filter->sfe_filter->sf_filter.sf_setoption(
2145 filter->sfe_cookie, so, sopt);
2146 }
2147 }
2148
2149 if (filtered != 0) {
2150 socket_lock(so, 0);
2151 sflt_unuse(so);
2152
2153 if (error) {
2154 if (error == EJUSTRETURN)
2155 error = 0;
2156 goto bad;
2157 }
2158 }
2159 }
2160
2161 error = 0;
2162 if (sopt->sopt_level != SOL_SOCKET) {
2163 if (so->so_proto && so->so_proto->pr_ctloutput) {
2164 error = (*so->so_proto->pr_ctloutput)
2165 (so, sopt);
2166 socket_unlock(so, 1);
2167 return (error);
2168 }
2169 error = ENOPROTOOPT;
2170 } else {
2171 switch (sopt->sopt_name) {
2172 case SO_LINGER:
2173 case SO_LINGER_SEC:
2174 error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
2175 if (error)
2176 goto bad;
2177
2178 so->so_linger = (sopt->sopt_name == SO_LINGER) ? l.l_linger : l.l_linger * hz;
2179 if (l.l_onoff)
2180 so->so_options |= SO_LINGER;
2181 else
2182 so->so_options &= ~SO_LINGER;
2183 break;
2184
2185 case SO_DEBUG:
2186 case SO_KEEPALIVE:
2187 case SO_DONTROUTE:
2188 case SO_USELOOPBACK:
2189 case SO_BROADCAST:
2190 case SO_REUSEADDR:
2191 case SO_REUSEPORT:
2192 case SO_OOBINLINE:
2193 case SO_TIMESTAMP:
2194 #ifdef __APPLE__
2195 case SO_DONTTRUNC:
2196 case SO_WANTMORE:
2197 case SO_WANTOOBFLAG:
2198 #endif
2199 error = sooptcopyin(sopt, &optval, sizeof optval,
2200 sizeof optval);
2201 if (error)
2202 goto bad;
2203 if (optval)
2204 so->so_options |= sopt->sopt_name;
2205 else
2206 so->so_options &= ~sopt->sopt_name;
2207 break;
2208
2209 case SO_SNDBUF:
2210 case SO_RCVBUF:
2211 case SO_SNDLOWAT:
2212 case SO_RCVLOWAT:
2213 error = sooptcopyin(sopt, &optval, sizeof optval,
2214 sizeof optval);
2215 if (error)
2216 goto bad;
2217
2218 /*
2219 * Values < 1 make no sense for any of these
2220 * options, so disallow them.
2221 */
2222 if (optval < 1) {
2223 error = EINVAL;
2224 goto bad;
2225 }
2226
2227 switch (sopt->sopt_name) {
2228 case SO_SNDBUF:
2229 case SO_RCVBUF:
2230 if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
2231 &so->so_snd : &so->so_rcv,
2232 (u_long) optval) == 0) {
2233 error = ENOBUFS;
2234 goto bad;
2235 }
2236 break;
2237
2238 /*
2239 * Make sure the low-water is never greater than
2240 * the high-water.
2241 */
2242 case SO_SNDLOWAT:
2243 so->so_snd.sb_lowat =
2244 (optval > so->so_snd.sb_hiwat) ?
2245 so->so_snd.sb_hiwat : optval;
2246 break;
2247 case SO_RCVLOWAT:
2248 so->so_rcv.sb_lowat =
2249 (optval > so->so_rcv.sb_hiwat) ?
2250 so->so_rcv.sb_hiwat : optval;
2251 break;
2252 }
2253 break;
2254
2255 case SO_SNDTIMEO:
2256 case SO_RCVTIMEO:
2257 error = sooptcopyin(sopt, &tv, sizeof tv,
2258 sizeof tv);
2259 if (error)
2260 goto bad;
2261
2262 if (tv.tv_sec < 0 || tv.tv_sec > LONG_MAX ||
2263 tv.tv_usec < 0 || tv.tv_usec >= 1000000) {
2264 error = EDOM;
2265 goto bad;
2266 }
2267
2268 switch (sopt->sopt_name) {
2269 case SO_SNDTIMEO:
2270 so->so_snd.sb_timeo = tv;
2271 break;
2272 case SO_RCVTIMEO:
2273 so->so_rcv.sb_timeo = tv;
2274 break;
2275 }
2276 break;
2277
2278 case SO_NKE:
2279 {
2280 struct so_nke nke;
2281
2282 error = sooptcopyin(sopt, &nke,
2283 sizeof nke, sizeof nke);
2284 if (error)
2285 goto bad;
2286
2287 error = sflt_attach_private(so, NULL, nke.nke_handle, 1);
2288 break;
2289 }
2290
2291 case SO_NOSIGPIPE:
2292 error = sooptcopyin(sopt, &optval, sizeof optval,
2293 sizeof optval);
2294 if (error)
2295 goto bad;
2296 if (optval)
2297 so->so_flags |= SOF_NOSIGPIPE;
2298 else
2299 so->so_flags &= ~SOF_NOSIGPIPE;
2300
2301 break;
2302
2303 case SO_NOADDRERR:
2304 error = sooptcopyin(sopt, &optval, sizeof optval,
2305 sizeof optval);
2306 if (error)
2307 goto bad;
2308 if (optval)
2309 so->so_flags |= SOF_NOADDRAVAIL;
2310 else
2311 so->so_flags &= ~SOF_NOADDRAVAIL;
2312
2313 break;
2314
2315 default:
2316 error = ENOPROTOOPT;
2317 break;
2318 }
2319 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
2320 (void) ((*so->so_proto->pr_ctloutput)
2321 (so, sopt));
2322 }
2323 }
2324 bad:
2325 socket_unlock(so, 1);
2326 return (error);
2327 }
2328
2329 /* Helper routine for getsockopt */
2330 int
2331 sooptcopyout(sopt, buf, len)
2332 struct sockopt *sopt;
2333 void *buf;
2334 size_t len;
2335 {
2336 int error;
2337 size_t valsize;
2338
2339 error = 0;
2340
2341 /*
2342 * Documented get behavior is that we always return a value,
2343 * possibly truncated to fit in the user's buffer.
2344 * Traditional behavior is that we always tell the user
2345 * precisely how much we copied, rather than something useful
2346 * like the total amount we had available for her.
2347 * Note that this interface is not idempotent; the entire answer must
2348 * generated ahead of time.
2349 */
2350 valsize = min(len, sopt->sopt_valsize);
2351 sopt->sopt_valsize = valsize;
2352 if (sopt->sopt_val != USER_ADDR_NULL) {
2353 if (sopt->sopt_p != 0)
2354 error = copyout(buf, sopt->sopt_val, valsize);
2355 else
2356 bcopy(buf, CAST_DOWN(caddr_t, sopt->sopt_val), valsize);
2357 }
2358 return error;
2359 }
2360
2361 int
2362 sogetopt(so, sopt)
2363 struct socket *so;
2364 struct sockopt *sopt;
2365 {
2366 int error, optval;
2367 struct linger l;
2368 struct timeval tv;
2369
2370 if (sopt->sopt_dir != SOPT_GET) {
2371 sopt->sopt_dir = SOPT_GET;
2372 }
2373
2374 socket_lock(so, 1);
2375
2376 {
2377 struct socket_filter_entry *filter;
2378 int filtered = 0;
2379 error = 0;
2380 for (filter = so->so_filt; filter && (error == 0);
2381 filter = filter->sfe_next_onsocket) {
2382 if (filter->sfe_filter->sf_filter.sf_getoption) {
2383 if (filtered == 0) {
2384 filtered = 1;
2385 sflt_use(so);
2386 socket_unlock(so, 0);
2387 }
2388 error = filter->sfe_filter->sf_filter.sf_getoption(
2389 filter->sfe_cookie, so, sopt);
2390 }
2391 }
2392 if (filtered != 0) {
2393 socket_lock(so, 0);
2394 sflt_unuse(so);
2395
2396 if (error) {
2397 if (error == EJUSTRETURN)
2398 error = 0;
2399 socket_unlock(so, 1);
2400 return error;
2401 }
2402 }
2403 }
2404
2405 error = 0;
2406 if (sopt->sopt_level != SOL_SOCKET) {
2407 if (so->so_proto && so->so_proto->pr_ctloutput) {
2408 error = (*so->so_proto->pr_ctloutput)
2409 (so, sopt);
2410 socket_unlock(so, 1);
2411 return (error);
2412 } else {
2413 socket_unlock(so, 1);
2414 return (ENOPROTOOPT);
2415 }
2416 } else {
2417 switch (sopt->sopt_name) {
2418 case SO_LINGER:
2419 case SO_LINGER_SEC:
2420 l.l_onoff = so->so_options & SO_LINGER;
2421 l.l_linger = (sopt->sopt_name == SO_LINGER) ? so->so_linger :
2422 so->so_linger / hz;
2423 error = sooptcopyout(sopt, &l, sizeof l);
2424 break;
2425
2426 case SO_USELOOPBACK:
2427 case SO_DONTROUTE:
2428 case SO_DEBUG:
2429 case SO_KEEPALIVE:
2430 case SO_REUSEADDR:
2431 case SO_REUSEPORT:
2432 case SO_BROADCAST:
2433 case SO_OOBINLINE:
2434 case SO_TIMESTAMP:
2435 #ifdef __APPLE__
2436 case SO_DONTTRUNC:
2437 case SO_WANTMORE:
2438 case SO_WANTOOBFLAG:
2439 #endif
2440 optval = so->so_options & sopt->sopt_name;
2441 integer:
2442 error = sooptcopyout(sopt, &optval, sizeof optval);
2443 break;
2444
2445 case SO_TYPE:
2446 optval = so->so_type;
2447 goto integer;
2448
2449 #ifdef __APPLE__
2450 case SO_NREAD:
2451 {
2452 int pkt_total;
2453 struct mbuf *m1;
2454
2455 pkt_total = 0;
2456 m1 = so->so_rcv.sb_mb;
2457 if (so->so_proto->pr_flags & PR_ATOMIC)
2458 {
2459 while (m1) {
2460 if (m1->m_type == MT_DATA)
2461 pkt_total += m1->m_len;
2462 m1 = m1->m_next;
2463 }
2464 optval = pkt_total;
2465 } else
2466 optval = so->so_rcv.sb_cc;
2467 goto integer;
2468 }
2469 case SO_NWRITE:
2470 optval = so->so_snd.sb_cc;
2471 goto integer;
2472 #endif
2473 case SO_ERROR:
2474 optval = so->so_error;
2475 so->so_error = 0;
2476 goto integer;
2477
2478 case SO_SNDBUF:
2479 optval = so->so_snd.sb_hiwat;
2480 goto integer;
2481
2482 case SO_RCVBUF:
2483 optval = so->so_rcv.sb_hiwat;
2484 goto integer;
2485
2486 case SO_SNDLOWAT:
2487 optval = so->so_snd.sb_lowat;
2488 goto integer;
2489
2490 case SO_RCVLOWAT:
2491 optval = so->so_rcv.sb_lowat;
2492 goto integer;
2493
2494 case SO_SNDTIMEO:
2495 case SO_RCVTIMEO:
2496 tv = (sopt->sopt_name == SO_SNDTIMEO ?
2497 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
2498
2499 error = sooptcopyout(sopt, &tv, sizeof tv);
2500 break;
2501
2502 case SO_NOSIGPIPE:
2503 optval = (so->so_flags & SOF_NOSIGPIPE);
2504 goto integer;
2505
2506 case SO_NOADDRERR:
2507 optval = (so->so_flags & SOF_NOADDRAVAIL);
2508 goto integer;
2509
2510 default:
2511 error = ENOPROTOOPT;
2512 break;
2513 }
2514 socket_unlock(so, 1);
2515 return (error);
2516 }
2517 }
2518
2519 /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
2520 int
2521 soopt_getm(struct sockopt *sopt, struct mbuf **mp)
2522 {
2523 struct mbuf *m, *m_prev;
2524 int sopt_size = sopt->sopt_valsize;
2525
2526 if (sopt_size > MAX_SOOPTGETM_SIZE)
2527 return EMSGSIZE;
2528
2529 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2530 if (m == 0)
2531 return ENOBUFS;
2532 if (sopt_size > MLEN) {
2533 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2534 if ((m->m_flags & M_EXT) == 0) {
2535 m_free(m);
2536 return ENOBUFS;
2537 }
2538 m->m_len = min(MCLBYTES, sopt_size);
2539 } else {
2540 m->m_len = min(MLEN, sopt_size);
2541 }
2542 sopt_size -= m->m_len;
2543 *mp = m;
2544 m_prev = m;
2545
2546 while (sopt_size) {
2547 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2548 if (m == 0) {
2549 m_freem(*mp);
2550 return ENOBUFS;
2551 }
2552 if (sopt_size > MLEN) {
2553 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2554 if ((m->m_flags & M_EXT) == 0) {
2555 m_freem(*mp);
2556 return ENOBUFS;
2557 }
2558 m->m_len = min(MCLBYTES, sopt_size);
2559 } else {
2560 m->m_len = min(MLEN, sopt_size);
2561 }
2562 sopt_size -= m->m_len;
2563 m_prev->m_next = m;
2564 m_prev = m;
2565 }
2566 return 0;
2567 }
2568
2569 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
2570 int
2571 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
2572 {
2573 struct mbuf *m0 = m;
2574
2575 if (sopt->sopt_val == USER_ADDR_NULL)
2576 return 0;
2577 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2578 if (sopt->sopt_p != NULL) {
2579 int error;
2580
2581 error = copyin(sopt->sopt_val, mtod(m, char *), m->m_len);
2582 if (error != 0) {
2583 m_freem(m0);
2584 return(error);
2585 }
2586 } else
2587 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), mtod(m, char *), m->m_len);
2588 sopt->sopt_valsize -= m->m_len;
2589 sopt->sopt_val += m->m_len;
2590 m = m->m_next;
2591 }
2592 if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
2593 panic("soopt_mcopyin");
2594 return 0;
2595 }
2596
2597 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
2598 int
2599 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
2600 {
2601 struct mbuf *m0 = m;
2602 size_t valsize = 0;
2603
2604 if (sopt->sopt_val == USER_ADDR_NULL)
2605 return 0;
2606 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2607 if (sopt->sopt_p != NULL) {
2608 int error;
2609
2610 error = copyout(mtod(m, char *), sopt->sopt_val, m->m_len);
2611 if (error != 0) {
2612 m_freem(m0);
2613 return(error);
2614 }
2615 } else
2616 bcopy(mtod(m, char *), CAST_DOWN(caddr_t, sopt->sopt_val), m->m_len);
2617 sopt->sopt_valsize -= m->m_len;
2618 sopt->sopt_val += m->m_len;
2619 valsize += m->m_len;
2620 m = m->m_next;
2621 }
2622 if (m != NULL) {
2623 /* enough soopt buffer should be given from user-land */
2624 m_freem(m0);
2625 return(EINVAL);
2626 }
2627 sopt->sopt_valsize = valsize;
2628 return 0;
2629 }
2630
2631 void
2632 sohasoutofband(so)
2633 register struct socket *so;
2634 {
2635 struct proc *p;
2636
2637 if (so->so_pgid < 0)
2638 gsignal(-so->so_pgid, SIGURG);
2639 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
2640 psignal(p, SIGURG);
2641 selwakeup(&so->so_rcv.sb_sel);
2642 }
2643
2644 int
2645 sopoll(struct socket *so, int events, __unused kauth_cred_t cred, void * wql)
2646 {
2647 struct proc *p = current_proc();
2648 int revents = 0;
2649
2650 socket_lock(so, 1);
2651
2652 if (events & (POLLIN | POLLRDNORM))
2653 if (soreadable(so))
2654 revents |= events & (POLLIN | POLLRDNORM);
2655
2656 if (events & (POLLOUT | POLLWRNORM))
2657 if (sowriteable(so))
2658 revents |= events & (POLLOUT | POLLWRNORM);
2659
2660 if (events & (POLLPRI | POLLRDBAND))
2661 if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
2662 revents |= events & (POLLPRI | POLLRDBAND);
2663
2664 if (revents == 0) {
2665 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
2666 /* Darwin sets the flag first, BSD calls selrecord first */
2667 so->so_rcv.sb_flags |= SB_SEL;
2668 selrecord(p, &so->so_rcv.sb_sel, wql);
2669 }
2670
2671 if (events & (POLLOUT | POLLWRNORM)) {
2672 /* Darwin sets the flag first, BSD calls selrecord first */
2673 so->so_snd.sb_flags |= SB_SEL;
2674 selrecord(p, &so->so_snd.sb_sel, wql);
2675 }
2676 }
2677
2678 socket_unlock(so, 1);
2679 return (revents);
2680 }
2681
2682 int soo_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p);
2683
2684 int
2685 soo_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct proc *p)
2686 {
2687 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2688 struct sockbuf *sb;
2689 socket_lock(so, 1);
2690
2691 switch (kn->kn_filter) {
2692 case EVFILT_READ:
2693 if (so->so_options & SO_ACCEPTCONN)
2694 kn->kn_fop = &solisten_filtops;
2695 else
2696 kn->kn_fop = &soread_filtops;
2697 sb = &so->so_rcv;
2698 break;
2699 case EVFILT_WRITE:
2700 kn->kn_fop = &sowrite_filtops;
2701 sb = &so->so_snd;
2702 break;
2703 default:
2704 socket_unlock(so, 1);
2705 return (1);
2706 }
2707
2708 if (KNOTE_ATTACH(&sb->sb_sel.si_note, kn))
2709 sb->sb_flags |= SB_KNOTE;
2710 socket_unlock(so, 1);
2711 return (0);
2712 }
2713
2714 static void
2715 filt_sordetach(struct knote *kn)
2716 {
2717 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2718
2719 socket_lock(so, 1);
2720 if (so->so_rcv.sb_flags & SB_KNOTE)
2721 if (KNOTE_DETACH(&so->so_rcv.sb_sel.si_note, kn))
2722 so->so_rcv.sb_flags &= ~SB_KNOTE;
2723 socket_unlock(so, 1);
2724 }
2725
2726 /*ARGSUSED*/
2727 static int
2728 filt_soread(struct knote *kn, long hint)
2729 {
2730 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2731
2732 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2733 socket_lock(so, 1);
2734
2735 if (so->so_oobmark) {
2736 if (kn->kn_flags & EV_OOBAND) {
2737 kn->kn_data = so->so_rcv.sb_cc - so->so_oobmark;
2738 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2739 socket_unlock(so, 1);
2740 return (1);
2741 }
2742 kn->kn_data = so->so_oobmark;
2743 kn->kn_flags |= EV_OOBAND;
2744 } else {
2745 kn->kn_data = so->so_rcv.sb_cc;
2746 if (so->so_state & SS_CANTRCVMORE) {
2747 kn->kn_flags |= EV_EOF;
2748 kn->kn_fflags = so->so_error;
2749 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2750 socket_unlock(so, 1);
2751 return (1);
2752 }
2753 }
2754
2755 if (so->so_state & SS_RCVATMARK) {
2756 if (kn->kn_flags & EV_OOBAND) {
2757 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2758 socket_unlock(so, 1);
2759 return (1);
2760 }
2761 kn->kn_flags |= EV_OOBAND;
2762 } else if (kn->kn_flags & EV_OOBAND) {
2763 kn->kn_data = 0;
2764 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2765 socket_unlock(so, 1);
2766 return (0);
2767 }
2768
2769 if (so->so_error) { /* temporary udp error */
2770 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2771 socket_unlock(so, 1);
2772 return (1);
2773 }
2774
2775 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2776 socket_unlock(so, 1);
2777
2778 return( kn->kn_flags & EV_OOBAND ||
2779 kn->kn_data >= ((kn->kn_sfflags & NOTE_LOWAT) ?
2780 kn->kn_sdata : so->so_rcv.sb_lowat));
2781 }
2782
2783 static void
2784 filt_sowdetach(struct knote *kn)
2785 {
2786 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2787 socket_lock(so, 1);
2788
2789 if(so->so_snd.sb_flags & SB_KNOTE)
2790 if (KNOTE_DETACH(&so->so_snd.sb_sel.si_note, kn))
2791 so->so_snd.sb_flags &= ~SB_KNOTE;
2792 socket_unlock(so, 1);
2793 }
2794
2795 /*ARGSUSED*/
2796 static int
2797 filt_sowrite(struct knote *kn, long hint)
2798 {
2799 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2800
2801 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2802 socket_lock(so, 1);
2803
2804 kn->kn_data = sbspace(&so->so_snd);
2805 if (so->so_state & SS_CANTSENDMORE) {
2806 kn->kn_flags |= EV_EOF;
2807 kn->kn_fflags = so->so_error;
2808 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2809 socket_unlock(so, 1);
2810 return (1);
2811 }
2812 if (so->so_error) { /* temporary udp error */
2813 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2814 socket_unlock(so, 1);
2815 return (1);
2816 }
2817 if (((so->so_state & SS_ISCONNECTED) == 0) &&
2818 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
2819 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2820 socket_unlock(so, 1);
2821 return (0);
2822 }
2823 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2824 socket_unlock(so, 1);
2825 if (kn->kn_sfflags & NOTE_LOWAT)
2826 return (kn->kn_data >= kn->kn_sdata);
2827 return (kn->kn_data >= so->so_snd.sb_lowat);
2828 }
2829
2830 /*ARGSUSED*/
2831 static int
2832 filt_solisten(struct knote *kn, long hint)
2833 {
2834 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2835 int isempty;
2836
2837 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2838 socket_lock(so, 1);
2839 kn->kn_data = so->so_qlen;
2840 isempty = ! TAILQ_EMPTY(&so->so_comp);
2841 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2842 socket_unlock(so, 1);
2843 return (isempty);
2844 }
2845
2846
2847 int
2848 socket_lock(so, refcount)
2849 struct socket *so;
2850 int refcount;
2851 {
2852 int error = 0, lr, lr_saved;
2853 #ifdef __ppc__
2854 __asm__ volatile("mflr %0" : "=r" (lr));
2855 lr_saved = lr;
2856 #endif
2857
2858 if (so->so_proto->pr_lock) {
2859 error = (*so->so_proto->pr_lock)(so, refcount, lr_saved);
2860 }
2861 else {
2862 #ifdef MORE_LOCKING_DEBUG
2863 lck_mtx_assert(so->so_proto->pr_domain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED);
2864 #endif
2865 lck_mtx_lock(so->so_proto->pr_domain->dom_mtx);
2866 if (refcount)
2867 so->so_usecount++;
2868 so->reserved3 = (void*)lr_saved; /* save caller for refcount going to zero */
2869 }
2870
2871 return(error);
2872
2873 }
2874
2875 int
2876 socket_unlock(so, refcount)
2877 struct socket *so;
2878 int refcount;
2879 {
2880 int error = 0, lr, lr_saved;
2881 lck_mtx_t * mutex_held;
2882
2883 #ifdef __ppc__
2884 __asm__ volatile("mflr %0" : "=r" (lr));
2885 lr_saved = lr;
2886 #endif
2887
2888
2889
2890 if (so->so_proto == NULL)
2891 panic("socket_unlock null so_proto so=%x\n", so);
2892
2893 if (so && so->so_proto->pr_unlock)
2894 error = (*so->so_proto->pr_unlock)(so, refcount, lr_saved);
2895 else {
2896 mutex_held = so->so_proto->pr_domain->dom_mtx;
2897 #ifdef MORE_LOCKING_DEBUG
2898 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2899 #endif
2900 if (refcount) {
2901 if (so->so_usecount <= 0)
2902 panic("socket_unlock: bad refcount so=%x value=%d\n", so, so->so_usecount);
2903 so->so_usecount--;
2904 if (so->so_usecount == 0) {
2905 sofreelastref(so, 1);
2906 }
2907 else
2908 so->reserved4 = (void*)lr_saved; /* save caller */
2909 }
2910 lck_mtx_unlock(mutex_held);
2911 }
2912
2913 return(error);
2914 }
2915 //### Called with socket locked, will unlock socket
2916 void
2917 sofree(so)
2918 struct socket *so;
2919 {
2920
2921 int lr, lr_saved;
2922 lck_mtx_t * mutex_held;
2923 #ifdef __ppc__
2924 __asm__ volatile("mflr %0" : "=r" (lr));
2925 lr_saved = lr;
2926 #endif
2927 if (so->so_proto->pr_getlock != NULL)
2928 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2929 else
2930 mutex_held = so->so_proto->pr_domain->dom_mtx;
2931 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2932
2933 /* Remove the filters */
2934 sflt_termsock(so);
2935
2936 sofreelastref(so, 0);
2937 }
2938
2939 void
2940 soreference(so)
2941 struct socket *so;
2942 {
2943 socket_lock(so, 1); /* locks & take one reference on socket */
2944 socket_unlock(so, 0); /* unlock only */
2945 }
2946
2947 void
2948 sodereference(so)
2949 struct socket *so;
2950 {
2951 socket_lock(so, 0);
2952 socket_unlock(so, 1);
2953 }