]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_socket.c
xnu-792.tar.gz
[apple/xnu.git] / bsd / kern / uipc_socket.c
1 /*
2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
23 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
24 /*
25 * Copyright (c) 1982, 1986, 1988, 1990, 1993
26 * The Regents of the University of California. All rights reserved.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 * 3. All advertising materials mentioning features or use of this software
37 * must display the following acknowledgement:
38 * This product includes software developed by the University of
39 * California, Berkeley and its contributors.
40 * 4. Neither the name of the University nor the names of its contributors
41 * may be used to endorse or promote products derived from this software
42 * without specific prior written permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
45 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
46 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
47 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
48 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
49 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
50 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
51 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * SUCH DAMAGE.
55 *
56 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
57 * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.16 2001/06/14 20:46:06 ume Exp $
58 */
59
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/filedesc.h>
63 #include <sys/proc_internal.h>
64 #include <sys/kauth.h>
65 #include <sys/file_internal.h>
66 #include <sys/fcntl.h>
67 #include <sys/malloc.h>
68 #include <sys/mbuf.h>
69 #include <sys/domain.h>
70 #include <sys/kernel.h>
71 #include <sys/event.h>
72 #include <sys/poll.h>
73 #include <sys/protosw.h>
74 #include <sys/socket.h>
75 #include <sys/socketvar.h>
76 #include <sys/resourcevar.h>
77 #include <sys/signalvar.h>
78 #include <sys/sysctl.h>
79 #include <sys/uio.h>
80 #include <sys/ev.h>
81 #include <sys/kdebug.h>
82 #include <net/route.h>
83 #include <netinet/in.h>
84 #include <netinet/in_pcb.h>
85 #include <kern/zalloc.h>
86 #include <kern/locks.h>
87 #include <machine/limits.h>
88
89 int so_cache_hw = 0;
90 int so_cache_timeouts = 0;
91 int so_cache_max_freed = 0;
92 int cached_sock_count = 0;
93 struct socket *socket_cache_head = 0;
94 struct socket *socket_cache_tail = 0;
95 u_long so_cache_time = 0;
96 int so_cache_init_done = 0;
97 struct zone *so_cache_zone;
98 extern int get_inpcb_str_size();
99 extern int get_tcp_str_size();
100
101 static lck_grp_t *so_cache_mtx_grp;
102 static lck_attr_t *so_cache_mtx_attr;
103 static lck_grp_attr_t *so_cache_mtx_grp_attr;
104 lck_mtx_t *so_cache_mtx;
105
106 #include <machine/limits.h>
107
108 static void filt_sordetach(struct knote *kn);
109 static int filt_soread(struct knote *kn, long hint);
110 static void filt_sowdetach(struct knote *kn);
111 static int filt_sowrite(struct knote *kn, long hint);
112 static int filt_solisten(struct knote *kn, long hint);
113
114 static struct filterops solisten_filtops =
115 { 1, NULL, filt_sordetach, filt_solisten };
116 static struct filterops soread_filtops =
117 { 1, NULL, filt_sordetach, filt_soread };
118 static struct filterops sowrite_filtops =
119 { 1, NULL, filt_sowdetach, filt_sowrite };
120
121 #define EVEN_MORE_LOCKING_DEBUG 0
122 int socket_debug = 0;
123 int socket_zone = M_SOCKET;
124 so_gen_t so_gencnt; /* generation count for sockets */
125
126 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
127 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
128
129 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
130 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
131 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
132 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
133 #define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1)
134 #define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8))
135 #define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8))
136
137 #define MAX_SOOPTGETM_SIZE (128 * MCLBYTES)
138
139
140 SYSCTL_DECL(_kern_ipc);
141
142 static int somaxconn = SOMAXCONN;
143 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn,
144 0, "");
145
146 /* Should we get a maximum also ??? */
147 static int sosendmaxchain = 65536;
148 static int sosendminchain = 16384;
149 static int sorecvmincopy = 16384;
150 SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW, &sosendminchain,
151 0, "");
152 SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, CTLFLAG_RW, &sorecvmincopy,
153 0, "");
154
155 void so_cache_timer();
156
157 /*
158 * Socket operation routines.
159 * These routines are called by the routines in
160 * sys_socket.c or from a system process, and
161 * implement the semantics of socket operations by
162 * switching out to the protocol specific routines.
163 */
164
165 #ifdef __APPLE__
166
167 vm_size_t so_cache_zone_element_size;
168
169 static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid);
170
171
172 void socketinit()
173 {
174 vm_size_t str_size;
175
176 if (so_cache_init_done) {
177 printf("socketinit: already called...\n");
178 return;
179 }
180
181 /*
182 * allocate lock group attribute and group for socket cache mutex
183 */
184 so_cache_mtx_grp_attr = lck_grp_attr_alloc_init();
185 lck_grp_attr_setdefault(so_cache_mtx_grp_attr);
186
187 so_cache_mtx_grp = lck_grp_alloc_init("so_cache", so_cache_mtx_grp_attr);
188
189 /*
190 * allocate the lock attribute for socket cache mutex
191 */
192 so_cache_mtx_attr = lck_attr_alloc_init();
193 lck_attr_setdefault(so_cache_mtx_attr);
194
195 so_cache_init_done = 1;
196
197 so_cache_mtx = lck_mtx_alloc_init(so_cache_mtx_grp, so_cache_mtx_attr); /* cached sockets mutex */
198
199 if (so_cache_mtx == NULL)
200 return; /* we're hosed... */
201
202 str_size = (vm_size_t)( sizeof(struct socket) + 4 +
203 get_inpcb_str_size() + 4 +
204 get_tcp_str_size());
205 so_cache_zone = zinit (str_size, 120000*str_size, 8192, "socache zone");
206 #if TEMPDEBUG
207 printf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size);
208 #endif
209 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
210
211 so_cache_zone_element_size = str_size;
212
213 sflt_init();
214
215 }
216
217 void cached_sock_alloc(so, waitok)
218 struct socket **so;
219 int waitok;
220
221 {
222 caddr_t temp;
223 register u_long offset;
224
225
226 lck_mtx_lock(so_cache_mtx);
227
228 if (cached_sock_count) {
229 cached_sock_count--;
230 *so = socket_cache_head;
231 if (*so == 0)
232 panic("cached_sock_alloc: cached sock is null");
233
234 socket_cache_head = socket_cache_head->cache_next;
235 if (socket_cache_head)
236 socket_cache_head->cache_prev = 0;
237 else
238 socket_cache_tail = 0;
239
240 lck_mtx_unlock(so_cache_mtx);
241
242 temp = (*so)->so_saved_pcb;
243 bzero((caddr_t)*so, sizeof(struct socket));
244 #if TEMPDEBUG
245 kprintf("cached_sock_alloc - retreiving cached sock %x - count == %d\n", *so,
246 cached_sock_count);
247 #endif
248 (*so)->so_saved_pcb = temp;
249 (*so)->cached_in_sock_layer = 1;
250
251 }
252 else {
253 #if TEMPDEBUG
254 kprintf("Allocating cached sock %x from memory\n", *so);
255 #endif
256
257 lck_mtx_unlock(so_cache_mtx);
258
259 if (waitok)
260 *so = (struct socket *) zalloc(so_cache_zone);
261 else
262 *so = (struct socket *) zalloc_noblock(so_cache_zone);
263
264 if (*so == 0)
265 return;
266
267 bzero((caddr_t)*so, sizeof(struct socket));
268
269 /*
270 * Define offsets for extra structures into our single block of
271 * memory. Align extra structures on longword boundaries.
272 */
273
274
275 offset = (u_long) *so;
276 offset += sizeof(struct socket);
277 if (offset & 0x3) {
278 offset += 4;
279 offset &= 0xfffffffc;
280 }
281 (*so)->so_saved_pcb = (caddr_t) offset;
282 offset += get_inpcb_str_size();
283 if (offset & 0x3) {
284 offset += 4;
285 offset &= 0xfffffffc;
286 }
287
288 ((struct inpcb *) (*so)->so_saved_pcb)->inp_saved_ppcb = (caddr_t) offset;
289 #if TEMPDEBUG
290 kprintf("Allocating cached socket - %x, pcb=%x tcpcb=%x\n", *so,
291 (*so)->so_saved_pcb,
292 ((struct inpcb *)(*so)->so_saved_pcb)->inp_saved_ppcb);
293 #endif
294 }
295
296 (*so)->cached_in_sock_layer = 1;
297 }
298
299
300 void cached_sock_free(so)
301 struct socket *so;
302 {
303
304 lck_mtx_lock(so_cache_mtx);
305
306 if (++cached_sock_count > MAX_CACHED_SOCKETS) {
307 --cached_sock_count;
308 lck_mtx_unlock(so_cache_mtx);
309 #if TEMPDEBUG
310 kprintf("Freeing overflowed cached socket %x\n", so);
311 #endif
312 zfree(so_cache_zone, so);
313 }
314 else {
315 #if TEMPDEBUG
316 kprintf("Freeing socket %x into cache\n", so);
317 #endif
318 if (so_cache_hw < cached_sock_count)
319 so_cache_hw = cached_sock_count;
320
321 so->cache_next = socket_cache_head;
322 so->cache_prev = 0;
323 if (socket_cache_head)
324 socket_cache_head->cache_prev = so;
325 else
326 socket_cache_tail = so;
327
328 so->cache_timestamp = so_cache_time;
329 socket_cache_head = so;
330 lck_mtx_unlock(so_cache_mtx);
331 }
332
333 #if TEMPDEBUG
334 kprintf("Freed cached sock %x into cache - count is %d\n", so, cached_sock_count);
335 #endif
336
337
338 }
339
340
341 void so_cache_timer()
342 {
343 register struct socket *p;
344 register int n_freed = 0;
345
346
347 lck_mtx_lock(so_cache_mtx);
348
349 ++so_cache_time;
350
351 while ( (p = socket_cache_tail) )
352 {
353 if ((so_cache_time - p->cache_timestamp) < SO_CACHE_TIME_LIMIT)
354 break;
355
356 so_cache_timeouts++;
357
358 if ( (socket_cache_tail = p->cache_prev) )
359 p->cache_prev->cache_next = 0;
360 if (--cached_sock_count == 0)
361 socket_cache_head = 0;
362
363
364 zfree(so_cache_zone, p);
365
366 if (++n_freed >= SO_CACHE_MAX_FREE_BATCH)
367 {
368 so_cache_max_freed++;
369 break;
370 }
371 }
372 lck_mtx_unlock(so_cache_mtx);
373
374 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
375
376
377 }
378 #endif /* __APPLE__ */
379
380 /*
381 * Get a socket structure from our zone, and initialize it.
382 * We don't implement `waitok' yet (see comments in uipc_domain.c).
383 * Note that it would probably be better to allocate socket
384 * and PCB at the same time, but I'm not convinced that all
385 * the protocols can be easily modified to do this.
386 */
387 struct socket *
388 soalloc(waitok, dom, type)
389 int waitok;
390 int dom;
391 int type;
392 {
393 struct socket *so;
394
395 if ((dom == PF_INET) && (type == SOCK_STREAM))
396 cached_sock_alloc(&so, waitok);
397 else
398 {
399 MALLOC_ZONE(so, struct socket *, sizeof(*so), socket_zone, M_WAITOK);
400 if (so)
401 bzero(so, sizeof *so);
402 }
403 /* XXX race condition for reentrant kernel */
404 //###LD Atomic add for so_gencnt
405 if (so) {
406 so->so_gencnt = ++so_gencnt;
407 so->so_zone = socket_zone;
408 }
409
410 return so;
411 }
412
413 int
414 socreate(dom, aso, type, proto)
415 int dom;
416 struct socket **aso;
417 register int type;
418 int proto;
419 {
420 struct proc *p = current_proc();
421 register struct protosw *prp;
422 register struct socket *so;
423 register int error = 0;
424 #if TCPDEBUG
425 extern int tcpconsdebug;
426 #endif
427 if (proto)
428 prp = pffindproto(dom, proto, type);
429 else
430 prp = pffindtype(dom, type);
431
432 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
433 return (EPROTONOSUPPORT);
434 #ifndef __APPLE__
435
436 if (p->p_prison && jail_socket_unixiproute_only &&
437 prp->pr_domain->dom_family != PF_LOCAL &&
438 prp->pr_domain->dom_family != PF_INET &&
439 prp->pr_domain->dom_family != PF_ROUTE) {
440 return (EPROTONOSUPPORT);
441 }
442
443 #endif
444 if (prp->pr_type != type)
445 return (EPROTOTYPE);
446 so = soalloc(p != 0, dom, type);
447 if (so == 0)
448 return (ENOBUFS);
449
450 TAILQ_INIT(&so->so_incomp);
451 TAILQ_INIT(&so->so_comp);
452 so->so_type = type;
453
454 #ifdef __APPLE__
455 if (p != 0) {
456 so->so_uid = kauth_cred_getuid(kauth_cred_get());
457 if (!suser(kauth_cred_get(),NULL))
458 so->so_state = SS_PRIV;
459 }
460 #else
461 so->so_cred = kauth_cred_get_with_ref();
462 #endif
463 so->so_proto = prp;
464 #ifdef __APPLE__
465 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
466 so->so_rcv.sb_so = so->so_snd.sb_so = so;
467 #endif
468
469 //### Attachement will create the per pcb lock if necessary and increase refcount
470
471 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
472 if (error) {
473 /*
474 * Warning:
475 * If so_pcb is not zero, the socket will be leaked,
476 * so protocol attachment handler must be coded carefuly
477 */
478 so->so_state |= SS_NOFDREF;
479 sofreelastref(so, 1);
480 return (error);
481 }
482 so->so_usecount++;
483 #ifdef __APPLE__
484 prp->pr_domain->dom_refs++;
485 TAILQ_INIT(&so->so_evlist);
486
487 /* Attach socket filters for this protocol */
488 sflt_initsock(so);
489 #if TCPDEBUG
490 if (tcpconsdebug == 2)
491 so->so_options |= SO_DEBUG;
492 #endif
493 #endif
494
495 *aso = so;
496 return (0);
497 }
498
499 int
500 sobind(so, nam)
501 struct socket *so;
502 struct sockaddr *nam;
503
504 {
505 struct proc *p = current_proc();
506 int error = 0;
507 struct socket_filter_entry *filter;
508 int filtered = 0;
509
510 socket_lock(so, 1);
511
512 /* Socket filter */
513 error = 0;
514 for (filter = so->so_filt; filter && (error == 0);
515 filter = filter->sfe_next_onsocket) {
516 if (filter->sfe_filter->sf_filter.sf_bind) {
517 if (filtered == 0) {
518 filtered = 1;
519 sflt_use(so);
520 socket_unlock(so, 0);
521 }
522 error = filter->sfe_filter->sf_filter.sf_bind(
523 filter->sfe_cookie, so, nam);
524 }
525 }
526 if (filtered != 0) {
527 socket_lock(so, 0);
528 sflt_unuse(so);
529 }
530 /* End socket filter */
531
532 if (error == 0)
533 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
534
535 socket_unlock(so, 1);
536
537 if (error == EJUSTRETURN)
538 error = 0;
539
540 return (error);
541 }
542
543 void
544 sodealloc(so)
545 struct socket *so;
546 {
547 so->so_gencnt = ++so_gencnt;
548
549 #ifndef __APPLE__
550 if (so->so_rcv.sb_hiwat)
551 (void)chgsbsize(so->so_cred->cr_uidinfo,
552 &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY);
553 if (so->so_snd.sb_hiwat)
554 (void)chgsbsize(so->so_cred->cr_uidinfo,
555 &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
556 #ifdef INET
557 if (so->so_accf != NULL) {
558 if (so->so_accf->so_accept_filter != NULL &&
559 so->so_accf->so_accept_filter->accf_destroy != NULL) {
560 so->so_accf->so_accept_filter->accf_destroy(so);
561 }
562 if (so->so_accf->so_accept_filter_str != NULL)
563 FREE(so->so_accf->so_accept_filter_str, M_ACCF);
564 FREE(so->so_accf, M_ACCF);
565 }
566 #endif /* INET */
567 kauth_cred_rele(so->so_cred);
568 zfreei(so->so_zone, so);
569 #else
570 if (so->cached_in_sock_layer == 1)
571 cached_sock_free(so);
572 else {
573 if (so->cached_in_sock_layer == -1)
574 panic("sodealloc: double dealloc: so=%x\n", so);
575 so->cached_in_sock_layer = -1;
576 FREE_ZONE(so, sizeof(*so), so->so_zone);
577 }
578 #endif /* __APPLE__ */
579 }
580
581 int
582 solisten(so, backlog)
583 register struct socket *so;
584 int backlog;
585
586 {
587 struct proc *p = current_proc();
588 int error;
589
590 socket_lock(so, 1);
591
592 {
593 struct socket_filter_entry *filter;
594 int filtered = 0;
595 error = 0;
596 for (filter = so->so_filt; filter && (error == 0);
597 filter = filter->sfe_next_onsocket) {
598 if (filter->sfe_filter->sf_filter.sf_listen) {
599 if (filtered == 0) {
600 filtered = 1;
601 sflt_use(so);
602 socket_unlock(so, 0);
603 }
604 error = filter->sfe_filter->sf_filter.sf_listen(
605 filter->sfe_cookie, so);
606 }
607 }
608 if (filtered != 0) {
609 socket_lock(so, 0);
610 sflt_unuse(so);
611 }
612 }
613
614 if (error == 0) {
615 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
616 }
617
618 if (error) {
619 socket_unlock(so, 1);
620 if (error == EJUSTRETURN)
621 error = 0;
622 return (error);
623 }
624
625 if (TAILQ_EMPTY(&so->so_comp))
626 so->so_options |= SO_ACCEPTCONN;
627 if (backlog < 0 || backlog > somaxconn)
628 backlog = somaxconn;
629 so->so_qlimit = backlog;
630
631 socket_unlock(so, 1);
632 return (0);
633 }
634
635 void
636 sofreelastref(so, dealloc)
637 register struct socket *so;
638 int dealloc;
639 {
640 int error;
641 struct socket *head = so->so_head;
642
643 /*### Assume socket is locked */
644
645 if ((!(so->so_flags & SOF_PCBCLEARING)) || ((so->so_state & SS_NOFDREF) == 0)) {
646 #ifdef __APPLE__
647 selthreadclear(&so->so_snd.sb_sel);
648 selthreadclear(&so->so_rcv.sb_sel);
649 #endif
650 return;
651 }
652 if (head != NULL) {
653 socket_lock(head, 1);
654 if (so->so_state & SS_INCOMP) {
655 TAILQ_REMOVE(&head->so_incomp, so, so_list);
656 head->so_incqlen--;
657 } else if (so->so_state & SS_COMP) {
658 /*
659 * We must not decommission a socket that's
660 * on the accept(2) queue. If we do, then
661 * accept(2) may hang after select(2) indicated
662 * that the listening socket was ready.
663 */
664 #ifdef __APPLE__
665 selthreadclear(&so->so_snd.sb_sel);
666 selthreadclear(&so->so_rcv.sb_sel);
667 #endif
668 socket_unlock(head, 1);
669 return;
670 } else {
671 panic("sofree: not queued");
672 }
673 head->so_qlen--;
674 so->so_state &= ~SS_INCOMP;
675 so->so_head = NULL;
676 socket_unlock(head, 1);
677 }
678 #ifdef __APPLE__
679 selthreadclear(&so->so_snd.sb_sel);
680 sbrelease(&so->so_snd);
681 #endif
682 sorflush(so);
683
684 /* 3932268: disable upcall */
685 so->so_rcv.sb_flags &= ~SB_UPCALL;
686 so->so_snd.sb_flags &= ~SB_UPCALL;
687
688 if (dealloc)
689 sodealloc(so);
690 }
691
692 /*
693 * Close a socket on last file table reference removal.
694 * Initiate disconnect if connected.
695 * Free socket when disconnect complete.
696 */
697 int
698 soclose_locked(so)
699 register struct socket *so;
700 {
701 int error = 0;
702 lck_mtx_t * mutex_held;
703 struct timespec ts;
704
705 if (so->so_usecount == 0) {
706 panic("soclose: so=%x refcount=0\n", so);
707 }
708
709 sflt_notify(so, sock_evt_closing, NULL);
710
711 if ((so->so_options & SO_ACCEPTCONN)) {
712 struct socket *sp;
713
714 /* We do not want new connection to be added to the connection queues */
715 so->so_options &= ~SO_ACCEPTCONN;
716
717 while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) {
718 /* A bit tricky here. We need to keep
719 * a lock if it's a protocol global lock
720 * but we want the head, not the socket locked
721 * in the case of per-socket lock...
722 */
723 if (so->so_proto->pr_getlock != NULL)
724 socket_lock(sp, 1);
725 if (so->so_proto->pr_getlock != NULL)
726 socket_unlock(so, 0);
727 (void) soabort(sp);
728 if (so->so_proto->pr_getlock != NULL)
729 socket_lock(so, 0);
730 if (so->so_proto->pr_getlock != NULL)
731 socket_unlock(sp, 1);
732 }
733
734 while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) {
735 if (so->so_proto->pr_getlock != NULL)
736 socket_lock(sp, 1);
737
738 /* Dequeue from so_comp since sofree() won't do it */
739 TAILQ_REMOVE(&so->so_comp, sp, so_list);
740 so->so_qlen--;
741 sp->so_state &= ~SS_COMP;
742 sp->so_head = NULL;
743
744 if (so->so_proto->pr_getlock != NULL)
745 socket_unlock(so, 0);
746 (void) soabort(sp);
747 if (so->so_proto->pr_getlock != NULL)
748 socket_lock(so, 0);
749 if (so->so_proto->pr_getlock != NULL)
750 socket_unlock(sp, 1);
751 }
752 }
753 if (so->so_pcb == 0) {
754 /* 3915887: mark the socket as ready for dealloc */
755 so->so_flags |= SOF_PCBCLEARING;
756 goto discard;
757 }
758 if (so->so_state & SS_ISCONNECTED) {
759 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
760 error = sodisconnectlocked(so);
761 if (error)
762 goto drop;
763 }
764 if (so->so_options & SO_LINGER) {
765 if ((so->so_state & SS_ISDISCONNECTING) &&
766 (so->so_state & SS_NBIO))
767 goto drop;
768 if (so->so_proto->pr_getlock != NULL)
769 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
770 else
771 mutex_held = so->so_proto->pr_domain->dom_mtx;
772 while (so->so_state & SS_ISCONNECTED) {
773 ts.tv_sec = (so->so_linger/100);
774 ts.tv_nsec = (so->so_linger % 100) * NSEC_PER_USEC * 1000 * 10;
775 error = msleep((caddr_t)&so->so_timeo, mutex_held,
776 PSOCK | PCATCH, "soclos", &ts);
777 if (error) {
778 /* It's OK when the time fires, don't report an error */
779 if (error == EWOULDBLOCK)
780 error = 0;
781 break;
782 }
783 }
784 }
785 }
786 drop:
787 if (so->so_usecount == 0)
788 panic("soclose: usecount is zero so=%x\n", so);
789 if (so->so_pcb && !(so->so_flags & SOF_PCBCLEARING)) {
790 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
791 if (error == 0)
792 error = error2;
793 }
794 if (so->so_usecount <= 0)
795 panic("soclose: usecount is zero so=%x\n", so);
796 discard:
797 if (so->so_pcb && so->so_state & SS_NOFDREF)
798 panic("soclose: NOFDREF");
799 so->so_state |= SS_NOFDREF;
800 #ifdef __APPLE__
801 so->so_proto->pr_domain->dom_refs--;
802 evsofree(so);
803 #endif
804 so->so_usecount--;
805 sofree(so);
806 return (error);
807 }
808
809 int
810 soclose(so)
811 register struct socket *so;
812 {
813 int error = 0;
814 socket_lock(so, 1);
815 if (so->so_retaincnt == 0)
816 error = soclose_locked(so);
817 else { /* if the FD is going away, but socket is retained in kernel remove its reference */
818 so->so_usecount--;
819 if (so->so_usecount < 2)
820 panic("soclose: retaincnt non null and so=%x usecount=%x\n", so->so_usecount);
821 }
822 socket_unlock(so, 1);
823 return (error);
824 }
825
826
827 /*
828 * Must be called at splnet...
829 */
830 //#### Should already be locked
831 int
832 soabort(so)
833 struct socket *so;
834 {
835 int error;
836
837 #ifdef MORE_LOCKING_DEBUG
838 lck_mtx_t * mutex_held;
839
840 if (so->so_proto->pr_getlock != NULL)
841 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
842 else
843 mutex_held = so->so_proto->pr_domain->dom_mtx;
844 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
845 #endif
846
847 error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
848 if (error) {
849 sofree(so);
850 return error;
851 }
852 return (0);
853 }
854
855 int
856 soacceptlock(so, nam, dolock)
857 register struct socket *so;
858 struct sockaddr **nam;
859 int dolock;
860 {
861 int error;
862
863 if (dolock) socket_lock(so, 1);
864
865 if ((so->so_state & SS_NOFDREF) == 0)
866 panic("soaccept: !NOFDREF");
867 so->so_state &= ~SS_NOFDREF;
868 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
869
870 if (dolock) socket_unlock(so, 1);
871 return (error);
872 }
873 int
874 soaccept(so, nam)
875 register struct socket *so;
876 struct sockaddr **nam;
877 {
878 return (soacceptlock(so, nam, 1));
879 }
880
881 int
882 soconnectlock(so, nam, dolock)
883 register struct socket *so;
884 struct sockaddr *nam;
885 int dolock;
886
887 {
888 int s;
889 int error;
890 struct proc *p = current_proc();
891
892 if (dolock) socket_lock(so, 1);
893
894 if (so->so_options & SO_ACCEPTCONN) {
895 if (dolock) socket_unlock(so, 1);
896 return (EOPNOTSUPP);
897 }
898 /*
899 * If protocol is connection-based, can only connect once.
900 * Otherwise, if connected, try to disconnect first.
901 * This allows user to disconnect by connecting to, e.g.,
902 * a null address.
903 */
904 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
905 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
906 (error = sodisconnectlocked(so))))
907 error = EISCONN;
908 else {
909 /*
910 * Run connect filter before calling protocol:
911 * - non-blocking connect returns before completion;
912 */
913 {
914 struct socket_filter_entry *filter;
915 int filtered = 0;
916 error = 0;
917 for (filter = so->so_filt; filter && (error == 0);
918 filter = filter->sfe_next_onsocket) {
919 if (filter->sfe_filter->sf_filter.sf_connect_out) {
920 if (filtered == 0) {
921 filtered = 1;
922 sflt_use(so);
923 socket_unlock(so, 0);
924 }
925 error = filter->sfe_filter->sf_filter.sf_connect_out(
926 filter->sfe_cookie, so, nam);
927 }
928 }
929 if (filtered != 0) {
930 socket_lock(so, 0);
931 sflt_unuse(so);
932 }
933 }
934 if (error) {
935 if (error == EJUSTRETURN)
936 error = 0;
937 if (dolock) socket_unlock(so, 1);
938 return error;
939 }
940
941 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
942 }
943 if (dolock) socket_unlock(so, 1);
944 return (error);
945 }
946
947 int
948 soconnect(so, nam)
949 register struct socket *so;
950 struct sockaddr *nam;
951 {
952 return (soconnectlock(so, nam, 1));
953 }
954
955 int
956 soconnect2(so1, so2)
957 register struct socket *so1;
958 struct socket *so2;
959 {
960 int error;
961 //####### Assumes so1 is already locked /
962
963 socket_lock(so2, 1);
964
965 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
966
967 socket_unlock(so2, 1);
968 return (error);
969 }
970
971
972 int
973 sodisconnectlocked(so)
974 register struct socket *so;
975 {
976 int error;
977
978 if ((so->so_state & SS_ISCONNECTED) == 0) {
979 error = ENOTCONN;
980 goto bad;
981 }
982 if (so->so_state & SS_ISDISCONNECTING) {
983 error = EALREADY;
984 goto bad;
985 }
986
987 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
988
989 if (error == 0) {
990 sflt_notify(so, sock_evt_disconnected, NULL);
991 }
992
993 bad:
994 return (error);
995 }
996 //### Locking version
997 int
998 sodisconnect(so)
999 register struct socket *so;
1000 {
1001 int error;
1002
1003 socket_lock(so, 1);
1004 error = sodisconnectlocked(so);
1005 socket_unlock(so, 1);
1006 return(error);
1007 }
1008
1009 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_DONTWAIT : M_WAIT)
1010
1011 /*
1012 * sosendcheck will lock the socket buffer if it isn't locked and
1013 * verify that there is space for the data being inserted.
1014 */
1015
1016 static int
1017 sosendcheck(
1018 struct socket *so,
1019 struct sockaddr *addr,
1020 long resid,
1021 long clen,
1022 long atomic,
1023 int flags,
1024 int *sblocked)
1025 {
1026 int error = 0;
1027 long space;
1028
1029 restart:
1030 if (*sblocked == 0) {
1031 error = sblock(&so->so_snd, SBLOCKWAIT(flags));
1032 if (error)
1033 return error;
1034 *sblocked = 1;
1035 }
1036
1037 if (so->so_state & SS_CANTSENDMORE)
1038 return EPIPE;
1039
1040 if (so->so_error) {
1041 error = so->so_error;
1042 so->so_error = 0;
1043 return error;
1044 }
1045
1046 if ((so->so_state & SS_ISCONNECTED) == 0) {
1047 /*
1048 * `sendto' and `sendmsg' is allowed on a connection-
1049 * based socket if it supports implied connect.
1050 * Return ENOTCONN if not connected and no address is
1051 * supplied.
1052 */
1053 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
1054 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
1055 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
1056 !(resid == 0 && clen != 0))
1057 return ENOTCONN;
1058 } else if (addr == 0 && !(flags&MSG_HOLD))
1059 return (so->so_proto->pr_flags & PR_CONNREQUIRED) ? ENOTCONN : EDESTADDRREQ;
1060 }
1061 space = sbspace(&so->so_snd);
1062 if (flags & MSG_OOB)
1063 space += 1024;
1064 if ((atomic && resid > so->so_snd.sb_hiwat) ||
1065 clen > so->so_snd.sb_hiwat)
1066 return EMSGSIZE;
1067 if (space < resid + clen &&
1068 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
1069 if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO))
1070 return EWOULDBLOCK;
1071 sbunlock(&so->so_snd, 1);
1072 error = sbwait(&so->so_snd);
1073 if (error) {
1074 return error;
1075 }
1076 goto restart;
1077 }
1078
1079 return 0;
1080 }
1081
1082 /*
1083 * Send on a socket.
1084 * If send must go all at once and message is larger than
1085 * send buffering, then hard error.
1086 * Lock against other senders.
1087 * If must go all at once and not enough room now, then
1088 * inform user that this would block and do nothing.
1089 * Otherwise, if nonblocking, send as much as possible.
1090 * The data to be sent is described by "uio" if nonzero,
1091 * otherwise by the mbuf chain "top" (which must be null
1092 * if uio is not). Data provided in mbuf chain must be small
1093 * enough to send all at once.
1094 *
1095 * Returns nonzero on error, timeout or signal; callers
1096 * must check for short counts if EINTR/ERESTART are returned.
1097 * Data and control buffers are freed on return.
1098 * Experiment:
1099 * MSG_HOLD: go thru most of sosend(), but just enqueue the mbuf
1100 * MSG_SEND: go thru as for MSG_HOLD on current fragment, then
1101 * point at the mbuf chain being constructed and go from there.
1102 */
1103 int
1104 sosend(so, addr, uio, top, control, flags)
1105 register struct socket *so;
1106 struct sockaddr *addr;
1107 struct uio *uio;
1108 struct mbuf *top;
1109 struct mbuf *control;
1110 int flags;
1111
1112 {
1113 struct mbuf **mp;
1114 register struct mbuf *m, *freelist = NULL;
1115 register long space, len, resid;
1116 int clen = 0, error, dontroute, mlen, sendflags;
1117 int atomic = sosendallatonce(so) || top;
1118 int sblocked = 0;
1119 struct proc *p = current_proc();
1120
1121 if (uio)
1122 // LP64todo - fix this!
1123 resid = uio_resid(uio);
1124 else
1125 resid = top->m_pkthdr.len;
1126
1127 KERNEL_DEBUG((DBG_FNC_SOSEND | DBG_FUNC_START),
1128 so,
1129 resid,
1130 so->so_snd.sb_cc,
1131 so->so_snd.sb_lowat,
1132 so->so_snd.sb_hiwat);
1133
1134 socket_lock(so, 1);
1135
1136 /*
1137 * In theory resid should be unsigned.
1138 * However, space must be signed, as it might be less than 0
1139 * if we over-committed, and we must use a signed comparison
1140 * of space and resid. On the other hand, a negative resid
1141 * causes us to loop sending 0-length segments to the protocol.
1142 *
1143 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
1144 * type sockets since that's an error.
1145 */
1146 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
1147 error = EINVAL;
1148 socket_unlock(so, 1);
1149 goto out;
1150 }
1151
1152 dontroute =
1153 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
1154 (so->so_proto->pr_flags & PR_ATOMIC);
1155 if (p)
1156 p->p_stats->p_ru.ru_msgsnd++;
1157 if (control)
1158 clen = control->m_len;
1159
1160 do {
1161 error = sosendcheck(so, addr, resid, clen, atomic, flags, &sblocked);
1162 if (error) {
1163 if (sblocked)
1164 goto release;
1165 else {
1166 socket_unlock(so, 1);
1167 goto out;
1168 }
1169 }
1170 mp = &top;
1171 space = sbspace(&so->so_snd) - clen + ((flags & MSG_OOB) ? 1024 : 0);
1172
1173 do {
1174
1175 if (uio == NULL) {
1176 /*
1177 * Data is prepackaged in "top".
1178 */
1179 resid = 0;
1180 if (flags & MSG_EOR)
1181 top->m_flags |= M_EOR;
1182 } else {
1183 int chainlength;
1184 int bytes_to_copy;
1185
1186 bytes_to_copy = min(resid, space);
1187
1188 if (sosendminchain > 0) {
1189 chainlength = 0;
1190 } else
1191 chainlength = sosendmaxchain;
1192
1193 socket_unlock(so, 0);
1194
1195 do {
1196 int num_needed;
1197 int hdrs_needed = (top == 0) ? 1 : 0;
1198
1199 /*
1200 * try to maintain a local cache of mbuf clusters needed to complete this write
1201 * the list is further limited to the number that are currently needed to fill the socket
1202 * this mechanism allows a large number of mbufs/clusters to be grabbed under a single
1203 * mbuf lock... if we can't get any clusters, than fall back to trying for mbufs
1204 * if we fail early (or miscalcluate the number needed) make sure to release any clusters
1205 * we haven't yet consumed.
1206 */
1207 if (freelist == NULL && bytes_to_copy > MCLBYTES) {
1208 num_needed = bytes_to_copy / NBPG;
1209
1210 if ((bytes_to_copy - (num_needed * NBPG)) >= MINCLSIZE)
1211 num_needed++;
1212
1213 freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, NBPG);
1214 /* Fall back to cluster size if allocation failed */
1215 }
1216
1217 if (freelist == NULL && bytes_to_copy > MINCLSIZE) {
1218 num_needed = bytes_to_copy / MCLBYTES;
1219
1220 if ((bytes_to_copy - (num_needed * MCLBYTES)) >= MINCLSIZE)
1221 num_needed++;
1222
1223 freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, MCLBYTES);
1224 /* Fall back to a single mbuf if allocation failed */
1225 }
1226
1227 if (freelist == NULL) {
1228 if (top == 0)
1229 MGETHDR(freelist, M_WAIT, MT_DATA);
1230 else
1231 MGET(freelist, M_WAIT, MT_DATA);
1232
1233 if (freelist == NULL) {
1234 error = ENOBUFS;
1235 socket_lock(so, 0);
1236 if (sblocked) {
1237 goto release;
1238 } else {
1239 socket_unlock(so, 1);
1240 goto out;
1241 }
1242 }
1243 /*
1244 * For datagram protocols, leave room
1245 * for protocol headers in first mbuf.
1246 */
1247 if (atomic && top == 0 && bytes_to_copy < MHLEN)
1248 MH_ALIGN(freelist, bytes_to_copy);
1249 }
1250 m = freelist;
1251 freelist = m->m_next;
1252 m->m_next = NULL;
1253
1254 if ((m->m_flags & M_EXT))
1255 mlen = m->m_ext.ext_size;
1256 else if ((m->m_flags & M_PKTHDR))
1257 mlen = MHLEN - m_leadingspace(m);
1258 else
1259 mlen = MLEN;
1260 len = min(mlen, bytes_to_copy);
1261
1262 chainlength += len;
1263
1264 space -= len;
1265
1266 error = uiomove(mtod(m, caddr_t), (int)len, uio);
1267
1268 // LP64todo - fix this!
1269 resid = uio_resid(uio);
1270
1271 m->m_len = len;
1272 *mp = m;
1273 top->m_pkthdr.len += len;
1274 if (error)
1275 break;
1276 mp = &m->m_next;
1277 if (resid <= 0) {
1278 if (flags & MSG_EOR)
1279 top->m_flags |= M_EOR;
1280 break;
1281 }
1282 bytes_to_copy = min(resid, space);
1283
1284 } while (space > 0 && (chainlength < sosendmaxchain || atomic || resid < MINCLSIZE));
1285
1286 socket_lock(so, 0);
1287
1288 if (error)
1289 goto release;
1290 }
1291
1292 if (flags & (MSG_HOLD|MSG_SEND))
1293 { /* Enqueue for later, go away if HOLD */
1294 register struct mbuf *mb1;
1295 if (so->so_temp && (flags & MSG_FLUSH))
1296 { m_freem(so->so_temp);
1297 so->so_temp = NULL;
1298 }
1299 if (so->so_temp)
1300 so->so_tail->m_next = top;
1301 else
1302 so->so_temp = top;
1303 mb1 = top;
1304 while (mb1->m_next)
1305 mb1 = mb1->m_next;
1306 so->so_tail = mb1;
1307 if (flags&MSG_HOLD)
1308 { top = NULL;
1309 goto release;
1310 }
1311 top = so->so_temp;
1312 }
1313 if (dontroute)
1314 so->so_options |= SO_DONTROUTE;
1315 /* Compute flags here, for pru_send and NKEs */
1316 sendflags = (flags & MSG_OOB) ? PRUS_OOB :
1317 /*
1318 * If the user set MSG_EOF, the protocol
1319 * understands this flag and nothing left to
1320 * send then use PRU_SEND_EOF instead of PRU_SEND.
1321 */
1322 ((flags & MSG_EOF) &&
1323 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
1324 (resid <= 0)) ?
1325 PRUS_EOF :
1326 /* If there is more to send set PRUS_MORETOCOME */
1327 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0;
1328
1329 /*
1330 * Socket filter processing
1331 */
1332 {
1333 struct socket_filter_entry *filter;
1334 int filtered;
1335
1336 filtered = 0;
1337 error = 0;
1338 for (filter = so->so_filt; filter && (error == 0);
1339 filter = filter->sfe_next_onsocket) {
1340 if (filter->sfe_filter->sf_filter.sf_data_out) {
1341 int so_flags = 0;
1342 if (filtered == 0) {
1343 filtered = 1;
1344 /*
1345 * We don't let sbunlock unlock the socket because
1346 * we don't want it to decrement the usecount.
1347 */
1348 sbunlock(&so->so_snd, 1);
1349 sblocked = 0;
1350 socket_unlock(so, 0);
1351 so_flags = (sendflags & MSG_OOB) ? sock_data_filt_flag_oob : 0;
1352 }
1353 error = filter->sfe_filter->sf_filter.sf_data_out(
1354 filter->sfe_cookie, so, addr, &top, &control, so_flags);
1355 }
1356 }
1357
1358 if (filtered) {
1359 /*
1360 * At this point, we've run at least one filter.
1361 * The socket is unlocked as is the socket buffer.
1362 */
1363 socket_lock(so, 0);
1364 if (error == EJUSTRETURN) {
1365 error = 0;
1366 clen = 0;
1367 control = 0;
1368 top = 0;
1369 socket_unlock(so, 1);
1370 goto out;
1371 }
1372 else if (error) {
1373 socket_unlock(so, 1);
1374 goto out;
1375 }
1376
1377
1378 /* Verify our state again, this will lock the socket buffer */
1379 error = sosendcheck(so, addr, top->m_pkthdr.len,
1380 control ? control->m_pkthdr.len : 0,
1381 atomic, flags, &sblocked);
1382 if (error) {
1383 if (sblocked) {
1384 /* sbunlock at release will unlock the socket */
1385 goto release;
1386 }
1387 else {
1388 socket_unlock(so, 1);
1389 goto out;
1390 }
1391 }
1392 }
1393 }
1394 /*
1395 * End Socket filter processing
1396 */
1397
1398 if (error == EJUSTRETURN) {
1399 /* A socket filter handled this data */
1400 error = 0;
1401 }
1402 else {
1403 error = (*so->so_proto->pr_usrreqs->pru_send)(so,
1404 sendflags, top, addr, control, p);
1405 }
1406 #ifdef __APPLE__
1407 if (flags & MSG_SEND)
1408 so->so_temp = NULL;
1409 #endif
1410 if (dontroute)
1411 so->so_options &= ~SO_DONTROUTE;
1412 clen = 0;
1413 control = 0;
1414 top = 0;
1415 mp = &top;
1416 if (error)
1417 goto release;
1418 } while (resid && space > 0);
1419 } while (resid);
1420
1421 release:
1422 sbunlock(&so->so_snd, 0); /* will unlock socket */
1423 out:
1424 if (top)
1425 m_freem(top);
1426 if (control)
1427 m_freem(control);
1428 if (freelist)
1429 m_freem_list(freelist);
1430
1431 KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END,
1432 so,
1433 resid,
1434 so->so_snd.sb_cc,
1435 space,
1436 error);
1437
1438 return (error);
1439 }
1440
1441 /*
1442 * Implement receive operations on a socket.
1443 * We depend on the way that records are added to the sockbuf
1444 * by sbappend*. In particular, each record (mbufs linked through m_next)
1445 * must begin with an address if the protocol so specifies,
1446 * followed by an optional mbuf or mbufs containing ancillary data,
1447 * and then zero or more mbufs of data.
1448 * In order to avoid blocking network interrupts for the entire time here,
1449 * we splx() while doing the actual copy to user space.
1450 * Although the sockbuf is locked, new data may still be appended,
1451 * and thus we must maintain consistency of the sockbuf during that time.
1452 *
1453 * The caller may receive the data as a single mbuf chain by supplying
1454 * an mbuf **mp0 for use in returning the chain. The uio is then used
1455 * only for the count in uio_resid.
1456 */
1457 int
1458 soreceive(so, psa, uio, mp0, controlp, flagsp)
1459 register struct socket *so;
1460 struct sockaddr **psa;
1461 struct uio *uio;
1462 struct mbuf **mp0;
1463 struct mbuf **controlp;
1464 int *flagsp;
1465 {
1466 register struct mbuf *m, **mp, *ml = NULL;
1467 register int flags, len, error, offset;
1468 struct protosw *pr = so->so_proto;
1469 struct mbuf *nextrecord;
1470 int moff, type = 0;
1471 // LP64todo - fix this!
1472 int orig_resid = uio_resid(uio);
1473 volatile struct mbuf *free_list;
1474 volatile int delayed_copy_len;
1475 int can_delay;
1476 int need_event;
1477 struct proc *p = current_proc();
1478
1479
1480 // LP64todo - fix this!
1481 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START,
1482 so,
1483 uio_resid(uio),
1484 so->so_rcv.sb_cc,
1485 so->so_rcv.sb_lowat,
1486 so->so_rcv.sb_hiwat);
1487
1488 socket_lock(so, 1);
1489
1490 #ifdef MORE_LOCKING_DEBUG
1491 if (so->so_usecount == 1)
1492 panic("soreceive: so=%x no other reference on socket\n", so);
1493 #endif
1494 mp = mp0;
1495 if (psa)
1496 *psa = 0;
1497 if (controlp)
1498 *controlp = 0;
1499 if (flagsp)
1500 flags = *flagsp &~ MSG_EOR;
1501 else
1502 flags = 0;
1503 /*
1504 * When SO_WANTOOBFLAG is set we try to get out-of-band data
1505 * regardless of the flags argument. Here is the case were
1506 * out-of-band data is not inline.
1507 */
1508 if ((flags & MSG_OOB) ||
1509 ((so->so_options & SO_WANTOOBFLAG) != 0 &&
1510 (so->so_options & SO_OOBINLINE) == 0 &&
1511 (so->so_oobmark || (so->so_state & SS_RCVATMARK)))) {
1512 m = m_get(M_WAIT, MT_DATA);
1513 if (m == NULL) {
1514 socket_unlock(so, 1);
1515 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, ENOBUFS,0,0,0,0);
1516 return (ENOBUFS);
1517 }
1518 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
1519 if (error)
1520 goto bad;
1521 socket_unlock(so, 0);
1522 do {
1523 // LP64todo - fix this!
1524 error = uiomove(mtod(m, caddr_t),
1525 (int) min(uio_resid(uio), m->m_len), uio);
1526 m = m_free(m);
1527 } while (uio_resid(uio) && error == 0 && m);
1528 socket_lock(so, 0);
1529 bad:
1530 if (m)
1531 m_freem(m);
1532 #ifdef __APPLE__
1533 if ((so->so_options & SO_WANTOOBFLAG) != 0) {
1534 if (error == EWOULDBLOCK || error == EINVAL) {
1535 /*
1536 * Let's try to get normal data:
1537 * EWOULDBLOCK: out-of-band data not receive yet;
1538 * EINVAL: out-of-band data already read.
1539 */
1540 error = 0;
1541 goto nooob;
1542 } else if (error == 0 && flagsp)
1543 *flagsp |= MSG_OOB;
1544 }
1545 socket_unlock(so, 1);
1546 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1547 #endif
1548 return (error);
1549 }
1550 nooob:
1551 if (mp)
1552 *mp = (struct mbuf *)0;
1553 if (so->so_state & SS_ISCONFIRMING && uio_resid(uio))
1554 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
1555
1556
1557 free_list = (struct mbuf *)0;
1558 delayed_copy_len = 0;
1559 restart:
1560 #ifdef MORE_LOCKING_DEBUG
1561 if (so->so_usecount <= 1)
1562 printf("soreceive: sblock so=%x ref=%d on socket\n", so, so->so_usecount);
1563 #endif
1564 error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
1565 if (error) {
1566 socket_unlock(so, 1);
1567 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1568 return (error);
1569 }
1570
1571 m = so->so_rcv.sb_mb;
1572 /*
1573 * If we have less data than requested, block awaiting more
1574 * (subject to any timeout) if:
1575 * 1. the current count is less than the low water mark, or
1576 * 2. MSG_WAITALL is set, and it is possible to do the entire
1577 * receive operation at once if we block (resid <= hiwat).
1578 * 3. MSG_DONTWAIT is not set
1579 * If MSG_WAITALL is set but resid is larger than the receive buffer,
1580 * we have to do the receive in sections, and thus risk returning
1581 * a short count if a timeout or signal occurs after we start.
1582 */
1583 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
1584 so->so_rcv.sb_cc < uio_resid(uio)) &&
1585 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
1586 ((flags & MSG_WAITALL) && uio_resid(uio) <= so->so_rcv.sb_hiwat)) &&
1587 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
1588
1589 KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
1590 if (so->so_error) {
1591 if (m)
1592 goto dontblock;
1593 error = so->so_error;
1594 if ((flags & MSG_PEEK) == 0)
1595 so->so_error = 0;
1596 goto release;
1597 }
1598 if (so->so_state & SS_CANTRCVMORE) {
1599 if (m)
1600 goto dontblock;
1601 else
1602 goto release;
1603 }
1604 for (; m; m = m->m_next)
1605 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
1606 m = so->so_rcv.sb_mb;
1607 goto dontblock;
1608 }
1609 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
1610 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
1611 error = ENOTCONN;
1612 goto release;
1613 }
1614 if (uio_resid(uio) == 0)
1615 goto release;
1616 if ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO))) {
1617 error = EWOULDBLOCK;
1618 goto release;
1619 }
1620 sbunlock(&so->so_rcv, 1);
1621 #ifdef EVEN_MORE_LOCKING_DEBUG
1622 if (socket_debug)
1623 printf("Waiting for socket data\n");
1624 #endif
1625
1626 error = sbwait(&so->so_rcv);
1627 #ifdef EVEN_MORE_LOCKING_DEBUG
1628 if (socket_debug)
1629 printf("SORECEIVE - sbwait returned %d\n", error);
1630 #endif
1631 if (so->so_usecount < 1)
1632 panic("soreceive: after 2nd sblock so=%x ref=%d on socket\n", so, so->so_usecount);
1633 if (error) {
1634 socket_unlock(so, 1);
1635 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1636 return (error);
1637 }
1638 goto restart;
1639 }
1640 dontblock:
1641 #ifndef __APPLE__
1642 if (uio->uio_procp)
1643 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
1644 #else /* __APPLE__ */
1645 /*
1646 * 2207985
1647 * This should be uio->uio-procp; however, some callers of this
1648 * function use auto variables with stack garbage, and fail to
1649 * fill out the uio structure properly.
1650 */
1651 if (p)
1652 p->p_stats->p_ru.ru_msgrcv++;
1653 #endif /* __APPLE__ */
1654 nextrecord = m->m_nextpkt;
1655 if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) {
1656 KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
1657 orig_resid = 0;
1658 if (psa) {
1659 *psa = dup_sockaddr(mtod(m, struct sockaddr *),
1660 mp0 == 0);
1661 if ((*psa == 0) && (flags & MSG_NEEDSA)) {
1662 error = EWOULDBLOCK;
1663 goto release;
1664 }
1665 }
1666 if (flags & MSG_PEEK) {
1667 m = m->m_next;
1668 } else {
1669 sbfree(&so->so_rcv, m);
1670 if (m->m_next == 0 && so->so_rcv.sb_cc != 0)
1671 panic("soreceive: about to create invalid socketbuf");
1672 MFREE(m, so->so_rcv.sb_mb);
1673 m = so->so_rcv.sb_mb;
1674 }
1675 }
1676 while (m && m->m_type == MT_CONTROL && error == 0) {
1677 if (flags & MSG_PEEK) {
1678 if (controlp)
1679 *controlp = m_copy(m, 0, m->m_len);
1680 m = m->m_next;
1681 } else {
1682 sbfree(&so->so_rcv, m);
1683 if (controlp) {
1684 if (pr->pr_domain->dom_externalize &&
1685 mtod(m, struct cmsghdr *)->cmsg_type ==
1686 SCM_RIGHTS) {
1687 socket_unlock(so, 0); /* release socket lock: see 3903171 */
1688 error = (*pr->pr_domain->dom_externalize)(m);
1689 socket_lock(so, 0);
1690 }
1691 *controlp = m;
1692 if (m->m_next == 0 && so->so_rcv.sb_cc != 0)
1693 panic("soreceive: so->so_rcv.sb_mb->m_next == 0 && so->so_rcv.sb_cc != 0");
1694 so->so_rcv.sb_mb = m->m_next;
1695 m->m_next = 0;
1696 m = so->so_rcv.sb_mb;
1697 } else {
1698 MFREE(m, so->so_rcv.sb_mb);
1699 m = so->so_rcv.sb_mb;
1700 }
1701 }
1702 if (controlp) {
1703 orig_resid = 0;
1704 controlp = &(*controlp)->m_next;
1705 }
1706 }
1707 if (m) {
1708 if ((flags & MSG_PEEK) == 0)
1709 m->m_nextpkt = nextrecord;
1710 type = m->m_type;
1711 if (type == MT_OOBDATA)
1712 flags |= MSG_OOB;
1713 }
1714 moff = 0;
1715 offset = 0;
1716
1717 if (!(flags & MSG_PEEK) && uio_resid(uio) > sorecvmincopy)
1718 can_delay = 1;
1719 else
1720 can_delay = 0;
1721
1722 need_event = 0;
1723
1724 while (m && (uio_resid(uio) - delayed_copy_len) > 0 && error == 0) {
1725 if (m->m_type == MT_OOBDATA) {
1726 if (type != MT_OOBDATA)
1727 break;
1728 } else if (type == MT_OOBDATA)
1729 break;
1730 #ifndef __APPLE__
1731 /*
1732 * This assertion needs rework. The trouble is Appletalk is uses many
1733 * mbuf types (NOT listed in mbuf.h!) which will trigger this panic.
1734 * For now just remove the assertion... CSM 9/98
1735 */
1736 else
1737 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
1738 ("receive 3"));
1739 #else
1740 /*
1741 * Make sure to allways set MSG_OOB event when getting
1742 * out of band data inline.
1743 */
1744 if ((so->so_options & SO_WANTOOBFLAG) != 0 &&
1745 (so->so_options & SO_OOBINLINE) != 0 &&
1746 (so->so_state & SS_RCVATMARK) != 0) {
1747 flags |= MSG_OOB;
1748 }
1749 #endif
1750 so->so_state &= ~SS_RCVATMARK;
1751 // LP64todo - fix this!
1752 len = uio_resid(uio) - delayed_copy_len;
1753 if (so->so_oobmark && len > so->so_oobmark - offset)
1754 len = so->so_oobmark - offset;
1755 if (len > m->m_len - moff)
1756 len = m->m_len - moff;
1757 /*
1758 * If mp is set, just pass back the mbufs.
1759 * Otherwise copy them out via the uio, then free.
1760 * Sockbuf must be consistent here (points to current mbuf,
1761 * it points to next record) when we drop priority;
1762 * we must note any additions to the sockbuf when we
1763 * block interrupts again.
1764 */
1765 if (mp == 0) {
1766 if (can_delay && len == m->m_len) {
1767 /*
1768 * only delay the copy if we're consuming the
1769 * mbuf and we're NOT in MSG_PEEK mode
1770 * and we have enough data to make it worthwile
1771 * to drop and retake the funnel... can_delay
1772 * reflects the state of the 2 latter constraints
1773 * moff should always be zero in these cases
1774 */
1775 delayed_copy_len += len;
1776 } else {
1777
1778 if (delayed_copy_len) {
1779 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1780
1781 if (error) {
1782 goto release;
1783 }
1784 if (m != so->so_rcv.sb_mb) {
1785 /*
1786 * can only get here if MSG_PEEK is not set
1787 * therefore, m should point at the head of the rcv queue...
1788 * if it doesn't, it means something drastically changed
1789 * while we were out from behind the funnel in sodelayed_copy...
1790 * perhaps a RST on the stream... in any event, the stream has
1791 * been interrupted... it's probably best just to return
1792 * whatever data we've moved and let the caller sort it out...
1793 */
1794 break;
1795 }
1796 }
1797 socket_unlock(so, 0);
1798 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
1799 socket_lock(so, 0);
1800
1801 if (error)
1802 goto release;
1803 }
1804 } else
1805 uio_setresid(uio, (uio_resid(uio) - len));
1806
1807 if (len == m->m_len - moff) {
1808 if (m->m_flags & M_EOR)
1809 flags |= MSG_EOR;
1810 if (flags & MSG_PEEK) {
1811 m = m->m_next;
1812 moff = 0;
1813 } else {
1814 nextrecord = m->m_nextpkt;
1815 sbfree(&so->so_rcv, m);
1816 m->m_nextpkt = NULL;
1817
1818 if (mp) {
1819 *mp = m;
1820 mp = &m->m_next;
1821 so->so_rcv.sb_mb = m = m->m_next;
1822 *mp = (struct mbuf *)0;
1823 } else {
1824 if (free_list == NULL)
1825 free_list = m;
1826 else
1827 ml->m_next = m;
1828 ml = m;
1829 so->so_rcv.sb_mb = m = m->m_next;
1830 ml->m_next = 0;
1831 }
1832 if (m)
1833 m->m_nextpkt = nextrecord;
1834 }
1835 } else {
1836 if (flags & MSG_PEEK)
1837 moff += len;
1838 else {
1839 if (mp)
1840 *mp = m_copym(m, 0, len, M_WAIT);
1841 m->m_data += len;
1842 m->m_len -= len;
1843 so->so_rcv.sb_cc -= len;
1844 }
1845 }
1846 if (so->so_oobmark) {
1847 if ((flags & MSG_PEEK) == 0) {
1848 so->so_oobmark -= len;
1849 if (so->so_oobmark == 0) {
1850 so->so_state |= SS_RCVATMARK;
1851 /*
1852 * delay posting the actual event until after
1853 * any delayed copy processing has finished
1854 */
1855 need_event = 1;
1856 break;
1857 }
1858 } else {
1859 offset += len;
1860 if (offset == so->so_oobmark)
1861 break;
1862 }
1863 }
1864 if (flags & MSG_EOR)
1865 break;
1866 /*
1867 * If the MSG_WAITALL or MSG_WAITSTREAM flag is set (for non-atomic socket),
1868 * we must not quit until "uio->uio_resid == 0" or an error
1869 * termination. If a signal/timeout occurs, return
1870 * with a short count but without error.
1871 * Keep sockbuf locked against other readers.
1872 */
1873 while (flags & (MSG_WAITALL|MSG_WAITSTREAM) && m == 0 && (uio_resid(uio) - delayed_copy_len) > 0 &&
1874 !sosendallatonce(so) && !nextrecord) {
1875 if (so->so_error || so->so_state & SS_CANTRCVMORE)
1876 goto release;
1877
1878 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb && (((struct inpcb *)so->so_pcb)->inp_state != INPCB_STATE_DEAD))
1879 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1880 if (sbwait(&so->so_rcv)) {
1881 error = 0;
1882 goto release;
1883 }
1884 /*
1885 * have to wait until after we get back from the sbwait to do the copy because
1886 * we will drop the funnel if we have enough data that has been delayed... by dropping
1887 * the funnel we open up a window allowing the netisr thread to process the incoming packets
1888 * and to change the state of this socket... we're issuing the sbwait because
1889 * the socket is empty and we're expecting the netisr thread to wake us up when more
1890 * packets arrive... if we allow that processing to happen and then sbwait, we
1891 * could stall forever with packets sitting in the socket if no further packets
1892 * arrive from the remote side.
1893 *
1894 * we want to copy before we've collected all the data to satisfy this request to
1895 * allow the copy to overlap the incoming packet processing on an MP system
1896 */
1897 if (delayed_copy_len > sorecvmincopy && (delayed_copy_len > (so->so_rcv.sb_hiwat / 2))) {
1898
1899 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1900
1901 if (error)
1902 goto release;
1903 }
1904 m = so->so_rcv.sb_mb;
1905 if (m) {
1906 nextrecord = m->m_nextpkt;
1907 }
1908 }
1909 }
1910 #ifdef MORE_LOCKING_DEBUG
1911 if (so->so_usecount <= 1)
1912 panic("soreceive: after big while so=%x ref=%d on socket\n", so, so->so_usecount);
1913 #endif
1914
1915 if (m && pr->pr_flags & PR_ATOMIC) {
1916 #ifdef __APPLE__
1917 if (so->so_options & SO_DONTTRUNC)
1918 flags |= MSG_RCVMORE;
1919 else {
1920 #endif
1921 flags |= MSG_TRUNC;
1922 if ((flags & MSG_PEEK) == 0)
1923 (void) sbdroprecord(&so->so_rcv);
1924 #ifdef __APPLE__
1925 }
1926 #endif
1927 }
1928 if ((flags & MSG_PEEK) == 0) {
1929 if (m == 0)
1930 so->so_rcv.sb_mb = nextrecord;
1931 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
1932 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1933 }
1934 #ifdef __APPLE__
1935 if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0)
1936 flags |= MSG_HAVEMORE;
1937
1938 if (delayed_copy_len) {
1939 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1940
1941 if (error)
1942 goto release;
1943 }
1944 if (free_list) {
1945 m_freem_list((struct mbuf *)free_list);
1946 free_list = (struct mbuf *)0;
1947 }
1948 if (need_event)
1949 postevent(so, 0, EV_OOB);
1950 #endif
1951 if (orig_resid == uio_resid(uio) && orig_resid &&
1952 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
1953 sbunlock(&so->so_rcv, 1);
1954 goto restart;
1955 }
1956
1957 if (flagsp)
1958 *flagsp |= flags;
1959 release:
1960 #ifdef MORE_LOCKING_DEBUG
1961 if (so->so_usecount <= 1)
1962 panic("soreceive: release so=%x ref=%d on socket\n", so, so->so_usecount);
1963 #endif
1964 if (delayed_copy_len) {
1965 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1966 }
1967 if (free_list) {
1968 m_freem_list((struct mbuf *)free_list);
1969 }
1970 sbunlock(&so->so_rcv, 0); /* will unlock socket */
1971
1972 // LP64todo - fix this!
1973 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END,
1974 so,
1975 uio_resid(uio),
1976 so->so_rcv.sb_cc,
1977 0,
1978 error);
1979
1980 return (error);
1981 }
1982
1983
1984 static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid)
1985 {
1986 int error = 0;
1987 struct mbuf *m;
1988
1989 m = *free_list;
1990
1991 socket_unlock(so, 0);
1992
1993 while (m && error == 0) {
1994
1995 error = uiomove(mtod(m, caddr_t), (int)m->m_len, uio);
1996
1997 m = m->m_next;
1998 }
1999 m_freem_list(*free_list);
2000
2001 *free_list = (struct mbuf *)NULL;
2002 *resid = 0;
2003
2004 socket_lock(so, 0);
2005
2006 return (error);
2007 }
2008
2009
2010 int
2011 soshutdown(so, how)
2012 register struct socket *so;
2013 register int how;
2014 {
2015 register struct protosw *pr = so->so_proto;
2016 int ret;
2017
2018 socket_lock(so, 1);
2019
2020 sflt_notify(so, sock_evt_shutdown, &how);
2021
2022 if (how != SHUT_WR) {
2023 sorflush(so);
2024 postevent(so, 0, EV_RCLOSED);
2025 }
2026 if (how != SHUT_RD) {
2027 ret = ((*pr->pr_usrreqs->pru_shutdown)(so));
2028 postevent(so, 0, EV_WCLOSED);
2029 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
2030 socket_unlock(so, 1);
2031 return(ret);
2032 }
2033
2034 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
2035 socket_unlock(so, 1);
2036 return (0);
2037 }
2038
2039 void
2040 sorflush(so)
2041 register struct socket *so;
2042 {
2043 register struct sockbuf *sb = &so->so_rcv;
2044 register struct protosw *pr = so->so_proto;
2045 struct sockbuf asb;
2046
2047 #ifdef MORE_LOCKING_DEBUG
2048 lck_mtx_t * mutex_held;
2049
2050 if (so->so_proto->pr_getlock != NULL)
2051 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2052 else
2053 mutex_held = so->so_proto->pr_domain->dom_mtx;
2054 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2055 #endif
2056
2057 sflt_notify(so, sock_evt_flush_read, NULL);
2058
2059 sb->sb_flags |= SB_NOINTR;
2060 (void) sblock(sb, M_WAIT);
2061 socantrcvmore(so);
2062 sbunlock(sb, 1);
2063 #ifdef __APPLE__
2064 selthreadclear(&sb->sb_sel);
2065 #endif
2066 asb = *sb;
2067 bzero((caddr_t)sb, sizeof (*sb));
2068 sb->sb_so = so; /* reestablish link to socket */
2069 if (asb.sb_flags & SB_KNOTE) {
2070 sb->sb_sel.si_note = asb.sb_sel.si_note;
2071 sb->sb_flags = SB_KNOTE;
2072 }
2073 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
2074 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
2075 sbrelease(&asb);
2076 }
2077
2078 /*
2079 * Perhaps this routine, and sooptcopyout(), below, ought to come in
2080 * an additional variant to handle the case where the option value needs
2081 * to be some kind of integer, but not a specific size.
2082 * In addition to their use here, these functions are also called by the
2083 * protocol-level pr_ctloutput() routines.
2084 */
2085 int
2086 sooptcopyin(sopt, buf, len, minlen)
2087 struct sockopt *sopt;
2088 void *buf;
2089 size_t len;
2090 size_t minlen;
2091 {
2092 size_t valsize;
2093
2094 /*
2095 * If the user gives us more than we wanted, we ignore it,
2096 * but if we don't get the minimum length the caller
2097 * wants, we return EINVAL. On success, sopt->sopt_valsize
2098 * is set to however much we actually retrieved.
2099 */
2100 if ((valsize = sopt->sopt_valsize) < minlen)
2101 return EINVAL;
2102 if (valsize > len)
2103 sopt->sopt_valsize = valsize = len;
2104
2105 if (sopt->sopt_p != 0)
2106 return (copyin(sopt->sopt_val, buf, valsize));
2107
2108 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), buf, valsize);
2109 return 0;
2110 }
2111
2112 int
2113 sosetopt(so, sopt)
2114 struct socket *so;
2115 struct sockopt *sopt;
2116 {
2117 int error, optval;
2118 struct linger l;
2119 struct timeval tv;
2120 short val;
2121
2122 socket_lock(so, 1);
2123
2124 if (sopt->sopt_dir != SOPT_SET) {
2125 sopt->sopt_dir = SOPT_SET;
2126 }
2127
2128 {
2129 struct socket_filter_entry *filter;
2130 int filtered = 0;
2131 error = 0;
2132 for (filter = so->so_filt; filter && (error == 0);
2133 filter = filter->sfe_next_onsocket) {
2134 if (filter->sfe_filter->sf_filter.sf_setoption) {
2135 if (filtered == 0) {
2136 filtered = 1;
2137 sflt_use(so);
2138 socket_unlock(so, 0);
2139 }
2140 error = filter->sfe_filter->sf_filter.sf_setoption(
2141 filter->sfe_cookie, so, sopt);
2142 }
2143 }
2144
2145 if (filtered != 0) {
2146 socket_lock(so, 0);
2147 sflt_unuse(so);
2148
2149 if (error) {
2150 if (error == EJUSTRETURN)
2151 error = 0;
2152 goto bad;
2153 }
2154 }
2155 }
2156
2157 error = 0;
2158 if (sopt->sopt_level != SOL_SOCKET) {
2159 if (so->so_proto && so->so_proto->pr_ctloutput) {
2160 error = (*so->so_proto->pr_ctloutput)
2161 (so, sopt);
2162 socket_unlock(so, 1);
2163 return (error);
2164 }
2165 error = ENOPROTOOPT;
2166 } else {
2167 switch (sopt->sopt_name) {
2168 case SO_LINGER:
2169 case SO_LINGER_SEC:
2170 error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
2171 if (error)
2172 goto bad;
2173
2174 so->so_linger = (sopt->sopt_name == SO_LINGER) ? l.l_linger : l.l_linger * hz;
2175 if (l.l_onoff)
2176 so->so_options |= SO_LINGER;
2177 else
2178 so->so_options &= ~SO_LINGER;
2179 break;
2180
2181 case SO_DEBUG:
2182 case SO_KEEPALIVE:
2183 case SO_DONTROUTE:
2184 case SO_USELOOPBACK:
2185 case SO_BROADCAST:
2186 case SO_REUSEADDR:
2187 case SO_REUSEPORT:
2188 case SO_OOBINLINE:
2189 case SO_TIMESTAMP:
2190 #ifdef __APPLE__
2191 case SO_DONTTRUNC:
2192 case SO_WANTMORE:
2193 case SO_WANTOOBFLAG:
2194 #endif
2195 error = sooptcopyin(sopt, &optval, sizeof optval,
2196 sizeof optval);
2197 if (error)
2198 goto bad;
2199 if (optval)
2200 so->so_options |= sopt->sopt_name;
2201 else
2202 so->so_options &= ~sopt->sopt_name;
2203 break;
2204
2205 case SO_SNDBUF:
2206 case SO_RCVBUF:
2207 case SO_SNDLOWAT:
2208 case SO_RCVLOWAT:
2209 error = sooptcopyin(sopt, &optval, sizeof optval,
2210 sizeof optval);
2211 if (error)
2212 goto bad;
2213
2214 /*
2215 * Values < 1 make no sense for any of these
2216 * options, so disallow them.
2217 */
2218 if (optval < 1) {
2219 error = EINVAL;
2220 goto bad;
2221 }
2222
2223 switch (sopt->sopt_name) {
2224 case SO_SNDBUF:
2225 case SO_RCVBUF:
2226 if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
2227 &so->so_snd : &so->so_rcv,
2228 (u_long) optval) == 0) {
2229 error = ENOBUFS;
2230 goto bad;
2231 }
2232 break;
2233
2234 /*
2235 * Make sure the low-water is never greater than
2236 * the high-water.
2237 */
2238 case SO_SNDLOWAT:
2239 so->so_snd.sb_lowat =
2240 (optval > so->so_snd.sb_hiwat) ?
2241 so->so_snd.sb_hiwat : optval;
2242 break;
2243 case SO_RCVLOWAT:
2244 so->so_rcv.sb_lowat =
2245 (optval > so->so_rcv.sb_hiwat) ?
2246 so->so_rcv.sb_hiwat : optval;
2247 break;
2248 }
2249 break;
2250
2251 case SO_SNDTIMEO:
2252 case SO_RCVTIMEO:
2253 error = sooptcopyin(sopt, &tv, sizeof tv,
2254 sizeof tv);
2255 if (error)
2256 goto bad;
2257
2258 if (tv.tv_sec < 0 || tv.tv_sec > LONG_MAX ||
2259 tv.tv_usec < 0 || tv.tv_usec >= 1000000) {
2260 error = EDOM;
2261 goto bad;
2262 }
2263
2264 switch (sopt->sopt_name) {
2265 case SO_SNDTIMEO:
2266 so->so_snd.sb_timeo = tv;
2267 break;
2268 case SO_RCVTIMEO:
2269 so->so_rcv.sb_timeo = tv;
2270 break;
2271 }
2272 break;
2273
2274 case SO_NKE:
2275 {
2276 struct so_nke nke;
2277
2278 error = sooptcopyin(sopt, &nke,
2279 sizeof nke, sizeof nke);
2280 if (error)
2281 goto bad;
2282
2283 error = sflt_attach_private(so, NULL, nke.nke_handle, 1);
2284 break;
2285 }
2286
2287 case SO_NOSIGPIPE:
2288 error = sooptcopyin(sopt, &optval, sizeof optval,
2289 sizeof optval);
2290 if (error)
2291 goto bad;
2292 if (optval)
2293 so->so_flags |= SOF_NOSIGPIPE;
2294 else
2295 so->so_flags &= ~SOF_NOSIGPIPE;
2296
2297 break;
2298
2299 case SO_NOADDRERR:
2300 error = sooptcopyin(sopt, &optval, sizeof optval,
2301 sizeof optval);
2302 if (error)
2303 goto bad;
2304 if (optval)
2305 so->so_flags |= SOF_NOADDRAVAIL;
2306 else
2307 so->so_flags &= ~SOF_NOADDRAVAIL;
2308
2309 break;
2310
2311 default:
2312 error = ENOPROTOOPT;
2313 break;
2314 }
2315 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
2316 (void) ((*so->so_proto->pr_ctloutput)
2317 (so, sopt));
2318 }
2319 }
2320 bad:
2321 socket_unlock(so, 1);
2322 return (error);
2323 }
2324
2325 /* Helper routine for getsockopt */
2326 int
2327 sooptcopyout(sopt, buf, len)
2328 struct sockopt *sopt;
2329 void *buf;
2330 size_t len;
2331 {
2332 int error;
2333 size_t valsize;
2334
2335 error = 0;
2336
2337 /*
2338 * Documented get behavior is that we always return a value,
2339 * possibly truncated to fit in the user's buffer.
2340 * Traditional behavior is that we always tell the user
2341 * precisely how much we copied, rather than something useful
2342 * like the total amount we had available for her.
2343 * Note that this interface is not idempotent; the entire answer must
2344 * generated ahead of time.
2345 */
2346 valsize = min(len, sopt->sopt_valsize);
2347 sopt->sopt_valsize = valsize;
2348 if (sopt->sopt_val != USER_ADDR_NULL) {
2349 if (sopt->sopt_p != 0)
2350 error = copyout(buf, sopt->sopt_val, valsize);
2351 else
2352 bcopy(buf, CAST_DOWN(caddr_t, sopt->sopt_val), valsize);
2353 }
2354 return error;
2355 }
2356
2357 int
2358 sogetopt(so, sopt)
2359 struct socket *so;
2360 struct sockopt *sopt;
2361 {
2362 int error, optval;
2363 struct linger l;
2364 struct timeval tv;
2365
2366 if (sopt->sopt_dir != SOPT_GET) {
2367 sopt->sopt_dir = SOPT_GET;
2368 }
2369
2370 socket_lock(so, 1);
2371
2372 {
2373 struct socket_filter_entry *filter;
2374 int filtered = 0;
2375 error = 0;
2376 for (filter = so->so_filt; filter && (error == 0);
2377 filter = filter->sfe_next_onsocket) {
2378 if (filter->sfe_filter->sf_filter.sf_getoption) {
2379 if (filtered == 0) {
2380 filtered = 1;
2381 sflt_use(so);
2382 socket_unlock(so, 0);
2383 }
2384 error = filter->sfe_filter->sf_filter.sf_getoption(
2385 filter->sfe_cookie, so, sopt);
2386 }
2387 }
2388 if (filtered != 0) {
2389 socket_lock(so, 0);
2390 sflt_unuse(so);
2391
2392 if (error) {
2393 if (error == EJUSTRETURN)
2394 error = 0;
2395 socket_unlock(so, 1);
2396 return error;
2397 }
2398 }
2399 }
2400
2401 error = 0;
2402 if (sopt->sopt_level != SOL_SOCKET) {
2403 if (so->so_proto && so->so_proto->pr_ctloutput) {
2404 error = (*so->so_proto->pr_ctloutput)
2405 (so, sopt);
2406 socket_unlock(so, 1);
2407 return (error);
2408 } else {
2409 socket_unlock(so, 1);
2410 return (ENOPROTOOPT);
2411 }
2412 } else {
2413 switch (sopt->sopt_name) {
2414 case SO_LINGER:
2415 case SO_LINGER_SEC:
2416 l.l_onoff = so->so_options & SO_LINGER;
2417 l.l_linger = (sopt->sopt_name == SO_LINGER) ? so->so_linger :
2418 so->so_linger / hz;
2419 error = sooptcopyout(sopt, &l, sizeof l);
2420 break;
2421
2422 case SO_USELOOPBACK:
2423 case SO_DONTROUTE:
2424 case SO_DEBUG:
2425 case SO_KEEPALIVE:
2426 case SO_REUSEADDR:
2427 case SO_REUSEPORT:
2428 case SO_BROADCAST:
2429 case SO_OOBINLINE:
2430 case SO_TIMESTAMP:
2431 #ifdef __APPLE__
2432 case SO_DONTTRUNC:
2433 case SO_WANTMORE:
2434 case SO_WANTOOBFLAG:
2435 #endif
2436 optval = so->so_options & sopt->sopt_name;
2437 integer:
2438 error = sooptcopyout(sopt, &optval, sizeof optval);
2439 break;
2440
2441 case SO_TYPE:
2442 optval = so->so_type;
2443 goto integer;
2444
2445 #ifdef __APPLE__
2446 case SO_NREAD:
2447 {
2448 int pkt_total;
2449 struct mbuf *m1;
2450
2451 pkt_total = 0;
2452 m1 = so->so_rcv.sb_mb;
2453 if (so->so_proto->pr_flags & PR_ATOMIC)
2454 {
2455 while (m1) {
2456 if (m1->m_type == MT_DATA)
2457 pkt_total += m1->m_len;
2458 m1 = m1->m_next;
2459 }
2460 optval = pkt_total;
2461 } else
2462 optval = so->so_rcv.sb_cc;
2463 goto integer;
2464 }
2465 case SO_NWRITE:
2466 optval = so->so_snd.sb_cc;
2467 goto integer;
2468 #endif
2469 case SO_ERROR:
2470 optval = so->so_error;
2471 so->so_error = 0;
2472 goto integer;
2473
2474 case SO_SNDBUF:
2475 optval = so->so_snd.sb_hiwat;
2476 goto integer;
2477
2478 case SO_RCVBUF:
2479 optval = so->so_rcv.sb_hiwat;
2480 goto integer;
2481
2482 case SO_SNDLOWAT:
2483 optval = so->so_snd.sb_lowat;
2484 goto integer;
2485
2486 case SO_RCVLOWAT:
2487 optval = so->so_rcv.sb_lowat;
2488 goto integer;
2489
2490 case SO_SNDTIMEO:
2491 case SO_RCVTIMEO:
2492 tv = (sopt->sopt_name == SO_SNDTIMEO ?
2493 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
2494
2495 error = sooptcopyout(sopt, &tv, sizeof tv);
2496 break;
2497
2498 case SO_NOSIGPIPE:
2499 optval = (so->so_flags & SOF_NOSIGPIPE);
2500 goto integer;
2501
2502 case SO_NOADDRERR:
2503 optval = (so->so_flags & SOF_NOADDRAVAIL);
2504 goto integer;
2505
2506 default:
2507 error = ENOPROTOOPT;
2508 break;
2509 }
2510 socket_unlock(so, 1);
2511 return (error);
2512 }
2513 }
2514
2515 /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
2516 int
2517 soopt_getm(struct sockopt *sopt, struct mbuf **mp)
2518 {
2519 struct mbuf *m, *m_prev;
2520 int sopt_size = sopt->sopt_valsize;
2521
2522 if (sopt_size > MAX_SOOPTGETM_SIZE)
2523 return EMSGSIZE;
2524
2525 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2526 if (m == 0)
2527 return ENOBUFS;
2528 if (sopt_size > MLEN) {
2529 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2530 if ((m->m_flags & M_EXT) == 0) {
2531 m_free(m);
2532 return ENOBUFS;
2533 }
2534 m->m_len = min(MCLBYTES, sopt_size);
2535 } else {
2536 m->m_len = min(MLEN, sopt_size);
2537 }
2538 sopt_size -= m->m_len;
2539 *mp = m;
2540 m_prev = m;
2541
2542 while (sopt_size) {
2543 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2544 if (m == 0) {
2545 m_freem(*mp);
2546 return ENOBUFS;
2547 }
2548 if (sopt_size > MLEN) {
2549 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2550 if ((m->m_flags & M_EXT) == 0) {
2551 m_freem(*mp);
2552 return ENOBUFS;
2553 }
2554 m->m_len = min(MCLBYTES, sopt_size);
2555 } else {
2556 m->m_len = min(MLEN, sopt_size);
2557 }
2558 sopt_size -= m->m_len;
2559 m_prev->m_next = m;
2560 m_prev = m;
2561 }
2562 return 0;
2563 }
2564
2565 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
2566 int
2567 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
2568 {
2569 struct mbuf *m0 = m;
2570
2571 if (sopt->sopt_val == USER_ADDR_NULL)
2572 return 0;
2573 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2574 if (sopt->sopt_p != NULL) {
2575 int error;
2576
2577 error = copyin(sopt->sopt_val, mtod(m, char *), m->m_len);
2578 if (error != 0) {
2579 m_freem(m0);
2580 return(error);
2581 }
2582 } else
2583 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), mtod(m, char *), m->m_len);
2584 sopt->sopt_valsize -= m->m_len;
2585 sopt->sopt_val += m->m_len;
2586 m = m->m_next;
2587 }
2588 if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
2589 panic("soopt_mcopyin");
2590 return 0;
2591 }
2592
2593 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
2594 int
2595 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
2596 {
2597 struct mbuf *m0 = m;
2598 size_t valsize = 0;
2599
2600 if (sopt->sopt_val == USER_ADDR_NULL)
2601 return 0;
2602 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2603 if (sopt->sopt_p != NULL) {
2604 int error;
2605
2606 error = copyout(mtod(m, char *), sopt->sopt_val, m->m_len);
2607 if (error != 0) {
2608 m_freem(m0);
2609 return(error);
2610 }
2611 } else
2612 bcopy(mtod(m, char *), CAST_DOWN(caddr_t, sopt->sopt_val), m->m_len);
2613 sopt->sopt_valsize -= m->m_len;
2614 sopt->sopt_val += m->m_len;
2615 valsize += m->m_len;
2616 m = m->m_next;
2617 }
2618 if (m != NULL) {
2619 /* enough soopt buffer should be given from user-land */
2620 m_freem(m0);
2621 return(EINVAL);
2622 }
2623 sopt->sopt_valsize = valsize;
2624 return 0;
2625 }
2626
2627 void
2628 sohasoutofband(so)
2629 register struct socket *so;
2630 {
2631 struct proc *p;
2632
2633 if (so->so_pgid < 0)
2634 gsignal(-so->so_pgid, SIGURG);
2635 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
2636 psignal(p, SIGURG);
2637 selwakeup(&so->so_rcv.sb_sel);
2638 }
2639
2640 int
2641 sopoll(struct socket *so, int events, __unused kauth_cred_t cred, void * wql)
2642 {
2643 struct proc *p = current_proc();
2644 int revents = 0;
2645
2646 socket_lock(so, 1);
2647
2648 if (events & (POLLIN | POLLRDNORM))
2649 if (soreadable(so))
2650 revents |= events & (POLLIN | POLLRDNORM);
2651
2652 if (events & (POLLOUT | POLLWRNORM))
2653 if (sowriteable(so))
2654 revents |= events & (POLLOUT | POLLWRNORM);
2655
2656 if (events & (POLLPRI | POLLRDBAND))
2657 if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
2658 revents |= events & (POLLPRI | POLLRDBAND);
2659
2660 if (revents == 0) {
2661 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
2662 /* Darwin sets the flag first, BSD calls selrecord first */
2663 so->so_rcv.sb_flags |= SB_SEL;
2664 selrecord(p, &so->so_rcv.sb_sel, wql);
2665 }
2666
2667 if (events & (POLLOUT | POLLWRNORM)) {
2668 /* Darwin sets the flag first, BSD calls selrecord first */
2669 so->so_snd.sb_flags |= SB_SEL;
2670 selrecord(p, &so->so_snd.sb_sel, wql);
2671 }
2672 }
2673
2674 socket_unlock(so, 1);
2675 return (revents);
2676 }
2677
2678 int soo_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p);
2679
2680 int
2681 soo_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct proc *p)
2682 {
2683 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2684 struct sockbuf *sb;
2685 socket_lock(so, 1);
2686
2687 switch (kn->kn_filter) {
2688 case EVFILT_READ:
2689 if (so->so_options & SO_ACCEPTCONN)
2690 kn->kn_fop = &solisten_filtops;
2691 else
2692 kn->kn_fop = &soread_filtops;
2693 sb = &so->so_rcv;
2694 break;
2695 case EVFILT_WRITE:
2696 kn->kn_fop = &sowrite_filtops;
2697 sb = &so->so_snd;
2698 break;
2699 default:
2700 socket_unlock(so, 1);
2701 return (1);
2702 }
2703
2704 if (KNOTE_ATTACH(&sb->sb_sel.si_note, kn))
2705 sb->sb_flags |= SB_KNOTE;
2706 socket_unlock(so, 1);
2707 return (0);
2708 }
2709
2710 static void
2711 filt_sordetach(struct knote *kn)
2712 {
2713 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2714
2715 socket_lock(so, 1);
2716 if (so->so_rcv.sb_flags & SB_KNOTE)
2717 if (KNOTE_DETACH(&so->so_rcv.sb_sel.si_note, kn))
2718 so->so_rcv.sb_flags &= ~SB_KNOTE;
2719 socket_unlock(so, 1);
2720 }
2721
2722 /*ARGSUSED*/
2723 static int
2724 filt_soread(struct knote *kn, long hint)
2725 {
2726 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2727
2728 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2729 socket_lock(so, 1);
2730
2731 if (so->so_oobmark) {
2732 if (kn->kn_flags & EV_OOBAND) {
2733 kn->kn_data = so->so_rcv.sb_cc - so->so_oobmark;
2734 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2735 socket_unlock(so, 1);
2736 return (1);
2737 }
2738 kn->kn_data = so->so_oobmark;
2739 kn->kn_flags |= EV_OOBAND;
2740 } else {
2741 kn->kn_data = so->so_rcv.sb_cc;
2742 if (so->so_state & SS_CANTRCVMORE) {
2743 kn->kn_flags |= EV_EOF;
2744 kn->kn_fflags = so->so_error;
2745 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2746 socket_unlock(so, 1);
2747 return (1);
2748 }
2749 }
2750
2751 if (so->so_state & SS_RCVATMARK) {
2752 if (kn->kn_flags & EV_OOBAND) {
2753 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2754 socket_unlock(so, 1);
2755 return (1);
2756 }
2757 kn->kn_flags |= EV_OOBAND;
2758 } else if (kn->kn_flags & EV_OOBAND) {
2759 kn->kn_data = 0;
2760 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2761 socket_unlock(so, 1);
2762 return (0);
2763 }
2764
2765 if (so->so_error) { /* temporary udp error */
2766 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2767 socket_unlock(so, 1);
2768 return (1);
2769 }
2770
2771 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2772 socket_unlock(so, 1);
2773
2774 return( kn->kn_flags & EV_OOBAND ||
2775 kn->kn_data >= ((kn->kn_sfflags & NOTE_LOWAT) ?
2776 kn->kn_sdata : so->so_rcv.sb_lowat));
2777 }
2778
2779 static void
2780 filt_sowdetach(struct knote *kn)
2781 {
2782 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2783 socket_lock(so, 1);
2784
2785 if(so->so_snd.sb_flags & SB_KNOTE)
2786 if (KNOTE_DETACH(&so->so_snd.sb_sel.si_note, kn))
2787 so->so_snd.sb_flags &= ~SB_KNOTE;
2788 socket_unlock(so, 1);
2789 }
2790
2791 /*ARGSUSED*/
2792 static int
2793 filt_sowrite(struct knote *kn, long hint)
2794 {
2795 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2796
2797 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2798 socket_lock(so, 1);
2799
2800 kn->kn_data = sbspace(&so->so_snd);
2801 if (so->so_state & SS_CANTSENDMORE) {
2802 kn->kn_flags |= EV_EOF;
2803 kn->kn_fflags = so->so_error;
2804 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2805 socket_unlock(so, 1);
2806 return (1);
2807 }
2808 if (so->so_error) { /* temporary udp error */
2809 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2810 socket_unlock(so, 1);
2811 return (1);
2812 }
2813 if (((so->so_state & SS_ISCONNECTED) == 0) &&
2814 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
2815 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2816 socket_unlock(so, 1);
2817 return (0);
2818 }
2819 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2820 socket_unlock(so, 1);
2821 if (kn->kn_sfflags & NOTE_LOWAT)
2822 return (kn->kn_data >= kn->kn_sdata);
2823 return (kn->kn_data >= so->so_snd.sb_lowat);
2824 }
2825
2826 /*ARGSUSED*/
2827 static int
2828 filt_solisten(struct knote *kn, long hint)
2829 {
2830 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2831 int isempty;
2832
2833 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2834 socket_lock(so, 1);
2835 kn->kn_data = so->so_qlen;
2836 isempty = ! TAILQ_EMPTY(&so->so_comp);
2837 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2838 socket_unlock(so, 1);
2839 return (isempty);
2840 }
2841
2842
2843 int
2844 socket_lock(so, refcount)
2845 struct socket *so;
2846 int refcount;
2847 {
2848 int error = 0, lr, lr_saved;
2849 #ifdef __ppc__
2850 __asm__ volatile("mflr %0" : "=r" (lr));
2851 lr_saved = lr;
2852 #endif
2853
2854 if (so->so_proto->pr_lock) {
2855 error = (*so->so_proto->pr_lock)(so, refcount, lr_saved);
2856 }
2857 else {
2858 #ifdef MORE_LOCKING_DEBUG
2859 lck_mtx_assert(so->so_proto->pr_domain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED);
2860 #endif
2861 lck_mtx_lock(so->so_proto->pr_domain->dom_mtx);
2862 if (refcount)
2863 so->so_usecount++;
2864 so->reserved3 = (void*)lr_saved; /* save caller for refcount going to zero */
2865 }
2866
2867 return(error);
2868
2869 }
2870
2871 int
2872 socket_unlock(so, refcount)
2873 struct socket *so;
2874 int refcount;
2875 {
2876 int error = 0, lr, lr_saved;
2877 lck_mtx_t * mutex_held;
2878
2879 #ifdef __ppc__
2880 __asm__ volatile("mflr %0" : "=r" (lr));
2881 lr_saved = lr;
2882 #endif
2883
2884
2885
2886 if (so->so_proto == NULL)
2887 panic("socket_unlock null so_proto so=%x\n", so);
2888
2889 if (so && so->so_proto->pr_unlock)
2890 error = (*so->so_proto->pr_unlock)(so, refcount, lr_saved);
2891 else {
2892 mutex_held = so->so_proto->pr_domain->dom_mtx;
2893 #ifdef MORE_LOCKING_DEBUG
2894 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2895 #endif
2896 if (refcount) {
2897 if (so->so_usecount <= 0)
2898 panic("socket_unlock: bad refcount so=%x value=%d\n", so, so->so_usecount);
2899 so->so_usecount--;
2900 if (so->so_usecount == 0) {
2901 sofreelastref(so, 1);
2902 }
2903 else
2904 so->reserved4 = (void*)lr_saved; /* save caller */
2905 }
2906 lck_mtx_unlock(mutex_held);
2907 }
2908
2909 return(error);
2910 }
2911 //### Called with socket locked, will unlock socket
2912 void
2913 sofree(so)
2914 struct socket *so;
2915 {
2916
2917 int lr, lr_saved;
2918 lck_mtx_t * mutex_held;
2919 #ifdef __ppc__
2920 __asm__ volatile("mflr %0" : "=r" (lr));
2921 lr_saved = lr;
2922 #endif
2923 if (so->so_proto->pr_getlock != NULL)
2924 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2925 else
2926 mutex_held = so->so_proto->pr_domain->dom_mtx;
2927 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2928
2929 /* Remove the filters */
2930 sflt_termsock(so);
2931
2932 sofreelastref(so, 0);
2933 }
2934
2935 void
2936 soreference(so)
2937 struct socket *so;
2938 {
2939 socket_lock(so, 1); /* locks & take one reference on socket */
2940 socket_unlock(so, 0); /* unlock only */
2941 }
2942
2943 void
2944 sodereference(so)
2945 struct socket *so;
2946 {
2947 socket_lock(so, 0);
2948 socket_unlock(so, 1);
2949 }