]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_socket.c
xnu-344.12.2.tar.gz
[apple/xnu.git] / bsd / kern / uipc_socket.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
23 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
24 /*
25 * Copyright (c) 1982, 1986, 1988, 1990, 1993
26 * The Regents of the University of California. All rights reserved.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 * 3. All advertising materials mentioning features or use of this software
37 * must display the following acknowledgement:
38 * This product includes software developed by the University of
39 * California, Berkeley and its contributors.
40 * 4. Neither the name of the University nor the names of its contributors
41 * may be used to endorse or promote products derived from this software
42 * without specific prior written permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
45 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
46 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
47 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
48 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
49 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
50 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
51 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * SUCH DAMAGE.
55 *
56 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
57 * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.16 2001/06/14 20:46:06 ume Exp $
58 */
59
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/proc.h>
63 #include <sys/fcntl.h>
64 #include <sys/malloc.h>
65 #include <sys/mbuf.h>
66 #include <sys/domain.h>
67 #include <sys/kernel.h>
68 #include <sys/poll.h>
69 #include <sys/protosw.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
72 #include <sys/resourcevar.h>
73 #include <sys/signalvar.h>
74 #include <sys/sysctl.h>
75 #include <sys/uio.h>
76 #include <sys/ev.h>
77 #include <sys/kdebug.h>
78 #include <net/route.h>
79 #include <netinet/in.h>
80 #include <netinet/in_pcb.h>
81 #include <kern/zalloc.h>
82 #include <machine/limits.h>
83
84 int so_cache_hw = 0;
85 int so_cache_timeouts = 0;
86 int so_cache_max_freed = 0;
87 int cached_sock_count = 0;
88 struct socket *socket_cache_head = 0;
89 struct socket *socket_cache_tail = 0;
90 u_long so_cache_time = 0;
91 int so_cache_init_done = 0;
92 struct zone *so_cache_zone;
93 extern int get_inpcb_str_size();
94 extern int get_tcp_str_size();
95
96 #include <machine/limits.h>
97
98 int socket_debug = 0;
99 int socket_zone = M_SOCKET;
100 so_gen_t so_gencnt; /* generation count for sockets */
101
102 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
103 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
104
105 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
106 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
107 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
108 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
109 #define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1)
110 #define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8))
111 #define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8))
112
113
114 SYSCTL_DECL(_kern_ipc);
115
116 static int somaxconn = SOMAXCONN;
117 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn,
118 0, "");
119
120 /* Should we get a maximum also ??? */
121 static int sosendmaxchain = 65536;
122 static int sosendminchain = 16384;
123 SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW, &sosendminchain,
124 0, "");
125
126 void so_cache_timer();
127 struct mbuf *m_getpackets(int, int, int);
128
129
130 /*
131 * Socket operation routines.
132 * These routines are called by the routines in
133 * sys_socket.c or from a system process, and
134 * implement the semantics of socket operations by
135 * switching out to the protocol specific routines.
136 */
137
138 #ifdef __APPLE__
139 void socketinit()
140 {
141 vm_size_t str_size;
142
143 so_cache_init_done = 1;
144
145 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
146 str_size = (vm_size_t)( sizeof(struct socket) + 4 +
147 get_inpcb_str_size() + 4 +
148 get_tcp_str_size());
149 so_cache_zone = zinit (str_size, 120000*str_size, 8192, "socache zone");
150 #if TEMPDEBUG
151 kprintf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size);
152 #endif
153
154 }
155
156 void cached_sock_alloc(so, waitok)
157 struct socket **so;
158 int waitok;
159
160 {
161 caddr_t temp;
162 int s;
163 register u_long offset;
164
165
166 s = splnet();
167 if (cached_sock_count) {
168 cached_sock_count--;
169 *so = socket_cache_head;
170 if (*so == 0)
171 panic("cached_sock_alloc: cached sock is null");
172
173 socket_cache_head = socket_cache_head->cache_next;
174 if (socket_cache_head)
175 socket_cache_head->cache_prev = 0;
176 else
177 socket_cache_tail = 0;
178 splx(s);
179
180 temp = (*so)->so_saved_pcb;
181 bzero((caddr_t)*so, sizeof(struct socket));
182 #if TEMPDEBUG
183 kprintf("cached_sock_alloc - retreiving cached sock %x - count == %d\n", *so,
184 cached_sock_count);
185 #endif
186 (*so)->so_saved_pcb = temp;
187 }
188 else {
189 #if TEMPDEBUG
190 kprintf("Allocating cached sock %x from memory\n", *so);
191 #endif
192
193 splx(s);
194 if (waitok)
195 *so = (struct socket *) zalloc(so_cache_zone);
196 else
197 *so = (struct socket *) zalloc_noblock(so_cache_zone);
198
199 if (*so == 0)
200 return;
201
202 bzero((caddr_t)*so, sizeof(struct socket));
203
204 /*
205 * Define offsets for extra structures into our single block of
206 * memory. Align extra structures on longword boundaries.
207 */
208
209
210 offset = (u_long) *so;
211 offset += sizeof(struct socket);
212 if (offset & 0x3) {
213 offset += 4;
214 offset &= 0xfffffffc;
215 }
216 (*so)->so_saved_pcb = (caddr_t) offset;
217 offset += get_inpcb_str_size();
218 if (offset & 0x3) {
219 offset += 4;
220 offset &= 0xfffffffc;
221 }
222
223 ((struct inpcb *) (*so)->so_saved_pcb)->inp_saved_ppcb = (caddr_t) offset;
224 #if TEMPDEBUG
225 kprintf("Allocating cached socket - %x, pcb=%x tcpcb=%x\n", *so,
226 (*so)->so_saved_pcb,
227 ((struct inpcb *)(*so)->so_saved_pcb)->inp_saved_ppcb);
228 #endif
229 }
230
231 (*so)->cached_in_sock_layer = 1;
232 }
233
234
235 void cached_sock_free(so)
236 struct socket *so;
237 {
238 int s;
239
240
241 s = splnet();
242 if (++cached_sock_count > MAX_CACHED_SOCKETS) {
243 --cached_sock_count;
244 splx(s);
245 #if TEMPDEBUG
246 kprintf("Freeing overflowed cached socket %x\n", so);
247 #endif
248 zfree(so_cache_zone, (vm_offset_t) so);
249 }
250 else {
251 #if TEMPDEBUG
252 kprintf("Freeing socket %x into cache\n", so);
253 #endif
254 if (so_cache_hw < cached_sock_count)
255 so_cache_hw = cached_sock_count;
256
257 so->cache_next = socket_cache_head;
258 so->cache_prev = 0;
259 if (socket_cache_head)
260 socket_cache_head->cache_prev = so;
261 else
262 socket_cache_tail = so;
263
264 so->cache_timestamp = so_cache_time;
265 socket_cache_head = so;
266 splx(s);
267 }
268
269 #if TEMPDEBUG
270 kprintf("Freed cached sock %x into cache - count is %d\n", so, cached_sock_count);
271 #endif
272
273
274 }
275
276
277 void so_cache_timer()
278 {
279 register struct socket *p;
280 register int s;
281 register int n_freed = 0;
282 boolean_t funnel_state;
283
284 funnel_state = thread_funnel_set(network_flock, TRUE);
285
286 ++so_cache_time;
287
288 s = splnet();
289
290 while (p = socket_cache_tail)
291 {
292 if ((so_cache_time - p->cache_timestamp) < SO_CACHE_TIME_LIMIT)
293 break;
294
295 so_cache_timeouts++;
296
297 if (socket_cache_tail = p->cache_prev)
298 p->cache_prev->cache_next = 0;
299 if (--cached_sock_count == 0)
300 socket_cache_head = 0;
301
302 splx(s);
303
304 zfree(so_cache_zone, (vm_offset_t) p);
305
306 splnet();
307 if (++n_freed >= SO_CACHE_MAX_FREE_BATCH)
308 {
309 so_cache_max_freed++;
310 break;
311 }
312 }
313 splx(s);
314
315 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
316
317 (void) thread_funnel_set(network_flock, FALSE);
318
319 }
320 #endif /* __APPLE__ */
321
322 /*
323 * Get a socket structure from our zone, and initialize it.
324 * We don't implement `waitok' yet (see comments in uipc_domain.c).
325 * Note that it would probably be better to allocate socket
326 * and PCB at the same time, but I'm not convinced that all
327 * the protocols can be easily modified to do this.
328 */
329 struct socket *
330 soalloc(waitok, dom, type)
331 int waitok;
332 int dom;
333 int type;
334 {
335 struct socket *so;
336
337 if ((dom == PF_INET) && (type == SOCK_STREAM))
338 cached_sock_alloc(&so, waitok);
339 else
340 {
341 so = _MALLOC_ZONE(sizeof(*so), socket_zone, M_WAITOK);
342 if (so)
343 bzero(so, sizeof *so);
344 }
345 /* XXX race condition for reentrant kernel */
346
347 if (so) {
348 so->so_gencnt = ++so_gencnt;
349 so->so_zone = socket_zone;
350 }
351
352 return so;
353 }
354
355 int
356 socreate(dom, aso, type, proto)
357 int dom;
358 struct socket **aso;
359 register int type;
360 int proto;
361 {
362 struct proc *p = current_proc();
363 register struct protosw *prp;
364 register struct socket *so;
365 register int error = 0;
366
367 if (proto)
368 prp = pffindproto(dom, proto, type);
369 else
370 prp = pffindtype(dom, type);
371
372 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
373 return (EPROTONOSUPPORT);
374 #ifndef __APPLE__
375
376 if (p->p_prison && jail_socket_unixiproute_only &&
377 prp->pr_domain->dom_family != PF_LOCAL &&
378 prp->pr_domain->dom_family != PF_INET &&
379 prp->pr_domain->dom_family != PF_ROUTE) {
380 return (EPROTONOSUPPORT);
381 }
382
383 #endif
384 if (prp->pr_type != type)
385 return (EPROTOTYPE);
386 so = soalloc(p != 0, dom, type);
387 if (so == 0)
388 return (ENOBUFS);
389
390 TAILQ_INIT(&so->so_incomp);
391 TAILQ_INIT(&so->so_comp);
392 so->so_type = type;
393
394 #ifdef __APPLE__
395 if (p != 0) {
396 if (p->p_ucred->cr_uid == 0)
397 so->so_state = SS_PRIV;
398
399 so->so_uid = p->p_ucred->cr_uid;
400 }
401 #else
402 so->so_cred = p->p_ucred;
403 crhold(so->so_cred);
404 #endif
405 so->so_proto = prp;
406 #ifdef __APPLE__
407 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
408 if (prp->pr_sfilter.tqh_first)
409 error = sfilter_init(so);
410 if (error == 0)
411 #endif
412 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
413 if (error) {
414 so->so_state |= SS_NOFDREF;
415 sofree(so);
416 return (error);
417 }
418 #ifdef __APPLE__
419 prp->pr_domain->dom_refs++;
420 so->so_rcv.sb_so = so->so_snd.sb_so = so;
421 TAILQ_INIT(&so->so_evlist);
422 #endif
423 *aso = so;
424 return (0);
425 }
426
427 int
428 sobind(so, nam)
429 struct socket *so;
430 struct sockaddr *nam;
431
432 {
433 struct proc *p = current_proc();
434 int error;
435 struct kextcb *kp;
436 int s = splnet();
437
438 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
439 if (error == 0) {
440 kp = sotokextcb(so);
441 while (kp) {
442 if (kp->e_soif && kp->e_soif->sf_sobind) {
443 error = (*kp->e_soif->sf_sobind)(so, nam, kp);
444 if (error) {
445 if (error == EJUSTRETURN) {
446 error = 0;
447 break;
448 }
449 splx(s);
450 return(error);
451 }
452 }
453 kp = kp->e_next;
454 }
455 }
456 splx(s);
457 return (error);
458 }
459
460 void
461 sodealloc(so)
462 struct socket *so;
463 {
464 so->so_gencnt = ++so_gencnt;
465
466 #ifndef __APPLE__
467 if (so->so_rcv.sb_hiwat)
468 (void)chgsbsize(so->so_cred->cr_uidinfo,
469 &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY);
470 if (so->so_snd.sb_hiwat)
471 (void)chgsbsize(so->so_cred->cr_uidinfo,
472 &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
473 #ifdef INET
474 if (so->so_accf != NULL) {
475 if (so->so_accf->so_accept_filter != NULL &&
476 so->so_accf->so_accept_filter->accf_destroy != NULL) {
477 so->so_accf->so_accept_filter->accf_destroy(so);
478 }
479 if (so->so_accf->so_accept_filter_str != NULL)
480 FREE(so->so_accf->so_accept_filter_str, M_ACCF);
481 FREE(so->so_accf, M_ACCF);
482 }
483 #endif /* INET */
484 crfree(so->so_cred);
485 zfreei(so->so_zone, so);
486 #else
487 if (so->cached_in_sock_layer == 1)
488 cached_sock_free(so);
489 else
490 _FREE_ZONE(so, sizeof(*so), so->so_zone);
491 #endif /* __APPLE__ */
492 }
493
494 int
495 solisten(so, backlog)
496 register struct socket *so;
497 int backlog;
498
499 {
500 struct kextcb *kp;
501 struct proc *p = current_proc();
502 int s, error;
503
504 s = splnet();
505 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
506 if (error) {
507 splx(s);
508 return (error);
509 }
510 if (TAILQ_EMPTY(&so->so_comp))
511 so->so_options |= SO_ACCEPTCONN;
512 if (backlog < 0 || backlog > somaxconn)
513 backlog = somaxconn;
514 so->so_qlimit = backlog;
515 kp = sotokextcb(so);
516 while (kp) {
517 if (kp->e_soif && kp->e_soif->sf_solisten) {
518 error = (*kp->e_soif->sf_solisten)(so, kp);
519 if (error) {
520 if (error == EJUSTRETURN) {
521 error = 0;
522 break;
523 }
524 splx(s);
525 return(error);
526 }
527 }
528 kp = kp->e_next;
529 }
530
531 splx(s);
532 return (0);
533 }
534
535
536 void
537 sofree(so)
538 register struct socket *so;
539 {
540 int error;
541 struct kextcb *kp;
542 struct socket *head = so->so_head;
543
544 kp = sotokextcb(so);
545 while (kp) {
546 if (kp->e_soif && kp->e_soif->sf_sofree) {
547 error = (*kp->e_soif->sf_sofree)(so, kp);
548 if (error) {
549 selthreadclear(&so->so_snd.sb_sel);
550 selthreadclear(&so->so_rcv.sb_sel);
551 return; /* void fn */
552 }
553 }
554 kp = kp->e_next;
555 }
556
557 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) {
558 #ifdef __APPLE__
559 selthreadclear(&so->so_snd.sb_sel);
560 selthreadclear(&so->so_rcv.sb_sel);
561 #endif
562 return;
563 }
564 if (head != NULL) {
565 if (so->so_state & SS_INCOMP) {
566 TAILQ_REMOVE(&head->so_incomp, so, so_list);
567 head->so_incqlen--;
568 } else if (so->so_state & SS_COMP) {
569 /*
570 * We must not decommission a socket that's
571 * on the accept(2) queue. If we do, then
572 * accept(2) may hang after select(2) indicated
573 * that the listening socket was ready.
574 */
575 #ifdef __APPLE__
576 selthreadclear(&so->so_snd.sb_sel);
577 selthreadclear(&so->so_rcv.sb_sel);
578 #endif
579 return;
580 } else {
581 panic("sofree: not queued");
582 }
583 head->so_qlen--;
584 so->so_state &= ~SS_INCOMP;
585 so->so_head = NULL;
586 }
587 #ifdef __APPLE__
588 selthreadclear(&so->so_snd.sb_sel);
589 sbrelease(&so->so_snd);
590 #endif
591 sorflush(so);
592 sfilter_term(so);
593 sodealloc(so);
594 }
595
596 /*
597 * Close a socket on last file table reference removal.
598 * Initiate disconnect if connected.
599 * Free socket when disconnect complete.
600 */
601 int
602 soclose(so)
603 register struct socket *so;
604 {
605 int s = splnet(); /* conservative */
606 int error = 0;
607 struct kextcb *kp;
608
609 #ifndef __APPLE__
610 funsetown(so->so_sigio);
611 #endif
612 kp = sotokextcb(so);
613 while (kp) {
614 if (kp->e_soif && kp->e_soif->sf_soclose) {
615 error = (*kp->e_soif->sf_soclose)(so, kp);
616 if (error) {
617 splx(s);
618 return((error == EJUSTRETURN) ? 0 : error);
619 }
620 }
621 kp = kp->e_next;
622 }
623
624 if (so->so_options & SO_ACCEPTCONN) {
625 struct socket *sp, *sonext;
626
627 sp = TAILQ_FIRST(&so->so_incomp);
628 for (; sp != NULL; sp = sonext) {
629 sonext = TAILQ_NEXT(sp, so_list);
630 (void) soabort(sp);
631 }
632 for (sp = TAILQ_FIRST(&so->so_comp); sp != NULL; sp = sonext) {
633 sonext = TAILQ_NEXT(sp, so_list);
634 /* Dequeue from so_comp since sofree() won't do it */
635 TAILQ_REMOVE(&so->so_comp, sp, so_list);
636 so->so_qlen--;
637 sp->so_state &= ~SS_COMP;
638 sp->so_head = NULL;
639 (void) soabort(sp);
640 }
641
642 }
643 if (so->so_pcb == 0)
644 goto discard;
645 if (so->so_state & SS_ISCONNECTED) {
646 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
647 error = sodisconnect(so);
648 if (error)
649 goto drop;
650 }
651 if (so->so_options & SO_LINGER) {
652 if ((so->so_state & SS_ISDISCONNECTING) &&
653 (so->so_state & SS_NBIO))
654 goto drop;
655 while (so->so_state & SS_ISCONNECTED) {
656 error = tsleep((caddr_t)&so->so_timeo,
657 PSOCK | PCATCH, "soclos", so->so_linger);
658 if (error)
659 break;
660 }
661 }
662 }
663 drop:
664 if (so->so_pcb) {
665 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
666 if (error == 0)
667 error = error2;
668 }
669 discard:
670 if (so->so_pcb && so->so_state & SS_NOFDREF)
671 panic("soclose: NOFDREF");
672 so->so_state |= SS_NOFDREF;
673 #ifdef __APPLE__
674 so->so_proto->pr_domain->dom_refs--;
675 evsofree(so);
676 #endif
677 sofree(so);
678 splx(s);
679 return (error);
680 }
681
682 /*
683 * Must be called at splnet...
684 */
685 int
686 soabort(so)
687 struct socket *so;
688 {
689 int error;
690
691 error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
692 if (error) {
693 sofree(so);
694 return error;
695 }
696 return (0);
697 }
698
699 int
700 soaccept(so, nam)
701 register struct socket *so;
702 struct sockaddr **nam;
703 {
704 int s = splnet();
705 int error;
706 struct kextcb *kp;
707
708 if ((so->so_state & SS_NOFDREF) == 0)
709 panic("soaccept: !NOFDREF");
710 so->so_state &= ~SS_NOFDREF;
711 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
712 if (error == 0) {
713 kp = sotokextcb(so);
714 while (kp) {
715 if (kp->e_soif && kp->e_soif->sf_soaccept) {
716 error = (*kp->e_soif->sf_soaccept)(so, nam, kp);
717 if (error) {
718 if (error == EJUSTRETURN) {
719 error = 0;
720 break;
721 }
722 splx(s);
723 return(error);
724 }
725 }
726 kp = kp->e_next;
727 }
728 }
729
730
731 splx(s);
732 return (error);
733 }
734
735 int
736 soconnect(so, nam)
737 register struct socket *so;
738 struct sockaddr *nam;
739
740 {
741 int s;
742 int error;
743 struct proc *p = current_proc();
744 struct kextcb *kp;
745
746 if (so->so_options & SO_ACCEPTCONN)
747 return (EOPNOTSUPP);
748 s = splnet();
749 /*
750 * If protocol is connection-based, can only connect once.
751 * Otherwise, if connected, try to disconnect first.
752 * This allows user to disconnect by connecting to, e.g.,
753 * a null address.
754 */
755 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
756 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
757 (error = sodisconnect(so))))
758 error = EISCONN;
759 else {
760 /*
761 * Run connect filter before calling protocol:
762 * - non-blocking connect returns before completion;
763 * - allows filters to modify address.
764 */
765 kp = sotokextcb(so);
766 while (kp) {
767 if (kp->e_soif && kp->e_soif->sf_soconnect) {
768 error = (*kp->e_soif->sf_soconnect)(so, nam, kp);
769 if (error) {
770 if (error == EJUSTRETURN) {
771 error = 0;
772 }
773 splx(s);
774 return(error);
775 }
776 }
777 kp = kp->e_next;
778 }
779 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
780 }
781 splx(s);
782 return (error);
783 }
784
785 int
786 soconnect2(so1, so2)
787 register struct socket *so1;
788 struct socket *so2;
789 {
790 int s = splnet();
791 int error;
792 struct kextcb *kp;
793
794 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
795 if (error == 0) {
796 kp = sotokextcb(so1);
797 while (kp) {
798 if (kp->e_soif && kp->e_soif->sf_soconnect2) {
799 error = (*kp->e_soif->sf_soconnect2)(so1, so2, kp);
800 if (error) {
801 if (error == EJUSTRETURN) {
802 return 0;
803 break;
804 }
805 splx(s);
806 return(error);
807 }
808 }
809 kp = kp->e_next;
810 }
811 }
812 splx(s);
813 return (error);
814 }
815
816 int
817 sodisconnect(so)
818 register struct socket *so;
819 {
820 int s = splnet();
821 int error;
822 struct kextcb *kp;
823
824 if ((so->so_state & SS_ISCONNECTED) == 0) {
825 error = ENOTCONN;
826 goto bad;
827 }
828 if (so->so_state & SS_ISDISCONNECTING) {
829 error = EALREADY;
830 goto bad;
831 }
832 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
833 if (error == 0) {
834 kp = sotokextcb(so);
835 while (kp) {
836 if (kp->e_soif && kp->e_soif->sf_sodisconnect) {
837 error = (*kp->e_soif->sf_sodisconnect)(so, kp);
838 if (error) {
839 if (error == EJUSTRETURN) {
840 error = 0;
841 break;
842 }
843 splx(s);
844 return(error);
845 }
846 }
847 kp = kp->e_next;
848 }
849 }
850
851 bad:
852 splx(s);
853 return (error);
854 }
855
856 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_DONTWAIT : M_WAIT)
857 /*
858 * Send on a socket.
859 * If send must go all at once and message is larger than
860 * send buffering, then hard error.
861 * Lock against other senders.
862 * If must go all at once and not enough room now, then
863 * inform user that this would block and do nothing.
864 * Otherwise, if nonblocking, send as much as possible.
865 * The data to be sent is described by "uio" if nonzero,
866 * otherwise by the mbuf chain "top" (which must be null
867 * if uio is not). Data provided in mbuf chain must be small
868 * enough to send all at once.
869 *
870 * Returns nonzero on error, timeout or signal; callers
871 * must check for short counts if EINTR/ERESTART are returned.
872 * Data and control buffers are freed on return.
873 * Experiment:
874 * MSG_HOLD: go thru most of sosend(), but just enqueue the mbuf
875 * MSG_SEND: go thru as for MSG_HOLD on current fragment, then
876 * point at the mbuf chain being constructed and go from there.
877 */
878 int
879 sosend(so, addr, uio, top, control, flags)
880 register struct socket *so;
881 struct sockaddr *addr;
882 struct uio *uio;
883 struct mbuf *top;
884 struct mbuf *control;
885 int flags;
886
887 {
888 struct mbuf **mp;
889 register struct mbuf *m, *freelist = NULL;
890 register long space, len, resid;
891 int clen = 0, error, s, dontroute, mlen, sendflags;
892 int atomic = sosendallatonce(so) || top;
893 struct proc *p = current_proc();
894 struct kextcb *kp;
895
896 if (uio)
897 resid = uio->uio_resid;
898 else
899 resid = top->m_pkthdr.len;
900
901 KERNEL_DEBUG((DBG_FNC_SOSEND | DBG_FUNC_START),
902 so,
903 resid,
904 so->so_snd.sb_cc,
905 so->so_snd.sb_lowat,
906 so->so_snd.sb_hiwat);
907
908 /*
909 * In theory resid should be unsigned.
910 * However, space must be signed, as it might be less than 0
911 * if we over-committed, and we must use a signed comparison
912 * of space and resid. On the other hand, a negative resid
913 * causes us to loop sending 0-length segments to the protocol.
914 *
915 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
916 * type sockets since that's an error.
917 */
918 if (resid < 0 || so->so_type == SOCK_STREAM && (flags & MSG_EOR)) {
919 error = EINVAL;
920 goto out;
921 }
922
923 dontroute =
924 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
925 (so->so_proto->pr_flags & PR_ATOMIC);
926 if (p)
927 p->p_stats->p_ru.ru_msgsnd++;
928 if (control)
929 clen = control->m_len;
930 #define snderr(errno) { error = errno; splx(s); goto release; }
931
932 restart:
933 error = sblock(&so->so_snd, SBLOCKWAIT(flags));
934 if (error)
935 goto out;
936 do {
937 s = splnet();
938 if (so->so_state & SS_CANTSENDMORE)
939 snderr(EPIPE);
940 if (so->so_error) {
941 error = so->so_error;
942 so->so_error = 0;
943 splx(s);
944 goto release;
945 }
946 if ((so->so_state & SS_ISCONNECTED) == 0) {
947 /*
948 * `sendto' and `sendmsg' is allowed on a connection-
949 * based socket if it supports implied connect.
950 * Return ENOTCONN if not connected and no address is
951 * supplied.
952 */
953 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
954 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
955 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
956 !(resid == 0 && clen != 0))
957 snderr(ENOTCONN);
958 } else if (addr == 0 && !(flags&MSG_HOLD))
959 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ?
960 ENOTCONN : EDESTADDRREQ);
961 }
962 space = sbspace(&so->so_snd);
963 if (flags & MSG_OOB)
964 space += 1024;
965 if ((atomic && resid > so->so_snd.sb_hiwat) ||
966 clen > so->so_snd.sb_hiwat)
967 snderr(EMSGSIZE);
968 if (space < resid + clen && uio &&
969 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
970 if (so->so_state & SS_NBIO)
971 snderr(EWOULDBLOCK);
972 sbunlock(&so->so_snd);
973 error = sbwait(&so->so_snd);
974 splx(s);
975 if (error)
976 goto out;
977 goto restart;
978 }
979 splx(s);
980 mp = &top;
981 space -= clen;
982
983 do {
984 if (uio == NULL) {
985 /*
986 * Data is prepackaged in "top".
987 */
988 resid = 0;
989 if (flags & MSG_EOR)
990 top->m_flags |= M_EOR;
991 } else {
992 boolean_t dropped_funnel = FALSE;
993 int chainlength;
994 int bytes_to_copy;
995
996 bytes_to_copy = min(resid, space);
997
998 if (sosendminchain > 0) {
999 if (bytes_to_copy >= sosendminchain) {
1000 dropped_funnel = TRUE;
1001 (void)thread_funnel_set(network_flock, FALSE);
1002 }
1003 chainlength = 0;
1004 } else
1005 chainlength = sosendmaxchain;
1006
1007 do {
1008
1009 if (bytes_to_copy >= MINCLSIZE) {
1010 /*
1011 * try to maintain a local cache of mbuf clusters needed to complete this write
1012 * the list is further limited to the number that are currently needed to fill the socket
1013 * this mechanism allows a large number of mbufs/clusters to be grabbed under a single
1014 * mbuf lock... if we can't get any clusters, than fall back to trying for mbufs
1015 * if we fail early (or miscalcluate the number needed) make sure to release any clusters
1016 * we haven't yet consumed.
1017 */
1018 if ((m = freelist) == NULL) {
1019 int num_needed;
1020 int hdrs_needed = 0;
1021
1022 if (top == 0)
1023 hdrs_needed = 1;
1024 num_needed = bytes_to_copy / MCLBYTES;
1025
1026 if ((bytes_to_copy - (num_needed * MCLBYTES)) >= MINCLSIZE)
1027 num_needed++;
1028
1029 if ((freelist = m_getpackets(num_needed, hdrs_needed, M_WAIT)) == NULL)
1030 goto getpackets_failed;
1031 m = freelist;
1032 }
1033 freelist = m->m_next;
1034 m->m_next = NULL;
1035
1036 mlen = MCLBYTES;
1037 len = min(mlen, bytes_to_copy);
1038 } else {
1039 getpackets_failed:
1040 if (top == 0) {
1041 MGETHDR(m, M_WAIT, MT_DATA);
1042 mlen = MHLEN;
1043 m->m_pkthdr.len = 0;
1044 m->m_pkthdr.rcvif = (struct ifnet *)0;
1045 } else {
1046 MGET(m, M_WAIT, MT_DATA);
1047 mlen = MLEN;
1048 }
1049 len = min(mlen, bytes_to_copy);
1050 /*
1051 * For datagram protocols, leave room
1052 * for protocol headers in first mbuf.
1053 */
1054 if (atomic && top == 0 && len < mlen)
1055 MH_ALIGN(m, len);
1056 }
1057 chainlength += len;
1058
1059 space -= len;
1060
1061 error = uiomove(mtod(m, caddr_t), (int)len, uio);
1062
1063 resid = uio->uio_resid;
1064
1065 m->m_len = len;
1066 *mp = m;
1067 top->m_pkthdr.len += len;
1068 if (error)
1069 break;
1070 mp = &m->m_next;
1071 if (resid <= 0) {
1072 if (flags & MSG_EOR)
1073 top->m_flags |= M_EOR;
1074 break;
1075 }
1076 bytes_to_copy = min(resid, space);
1077
1078 } while (space > 0 && (chainlength < sosendmaxchain || atomic || resid < MINCLSIZE));
1079
1080 if (dropped_funnel == TRUE)
1081 (void)thread_funnel_set(network_flock, TRUE);
1082 if (error)
1083 goto release;
1084 }
1085
1086 if (flags & (MSG_HOLD|MSG_SEND))
1087 { /* Enqueue for later, go away if HOLD */
1088 register struct mbuf *mb1;
1089 if (so->so_temp && (flags & MSG_FLUSH))
1090 { m_freem(so->so_temp);
1091 so->so_temp = NULL;
1092 }
1093 if (so->so_temp)
1094 so->so_tail->m_next = top;
1095 else
1096 so->so_temp = top;
1097 mb1 = top;
1098 while (mb1->m_next)
1099 mb1 = mb1->m_next;
1100 so->so_tail = mb1;
1101 if (flags&MSG_HOLD)
1102 { top = NULL;
1103 goto release;
1104 }
1105 top = so->so_temp;
1106 }
1107 if (dontroute)
1108 so->so_options |= SO_DONTROUTE;
1109 s = splnet(); /* XXX */
1110 /* Compute flags here, for pru_send and NKEs */
1111 sendflags = (flags & MSG_OOB) ? PRUS_OOB :
1112 /*
1113 * If the user set MSG_EOF, the protocol
1114 * understands this flag and nothing left to
1115 * send then use PRU_SEND_EOF instead of PRU_SEND.
1116 */
1117 ((flags & MSG_EOF) &&
1118 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
1119 (resid <= 0)) ?
1120 PRUS_EOF :
1121 /* If there is more to send set PRUS_MORETOCOME */
1122 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0;
1123 kp = sotokextcb(so);
1124 while (kp)
1125 { if (kp->e_soif && kp->e_soif->sf_sosend) {
1126 error = (*kp->e_soif->sf_sosend)(so, &addr,
1127 &uio, &top,
1128 &control,
1129 &sendflags,
1130 kp);
1131 if (error) {
1132 splx(s);
1133 if (error == EJUSTRETURN) {
1134 sbunlock(&so->so_snd);
1135
1136 if (freelist)
1137 m_freem_list(freelist);
1138 return(0);
1139 }
1140 goto release;
1141 }
1142 }
1143 kp = kp->e_next;
1144 }
1145
1146 error = (*so->so_proto->pr_usrreqs->pru_send)(so,
1147 sendflags, top, addr, control, p);
1148 splx(s);
1149 #ifdef __APPLE__
1150 if (flags & MSG_SEND)
1151 so->so_temp = NULL;
1152 #endif
1153 if (dontroute)
1154 so->so_options &= ~SO_DONTROUTE;
1155 clen = 0;
1156 control = 0;
1157 top = 0;
1158 mp = &top;
1159 if (error)
1160 goto release;
1161 } while (resid && space > 0);
1162 } while (resid);
1163
1164 release:
1165 sbunlock(&so->so_snd);
1166 out:
1167 if (top)
1168 m_freem(top);
1169 if (control)
1170 m_freem(control);
1171 if (freelist)
1172 m_freem_list(freelist);
1173
1174 KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END,
1175 so,
1176 resid,
1177 so->so_snd.sb_cc,
1178 space,
1179 error);
1180
1181 return (error);
1182 }
1183
1184 /*
1185 * Implement receive operations on a socket.
1186 * We depend on the way that records are added to the sockbuf
1187 * by sbappend*. In particular, each record (mbufs linked through m_next)
1188 * must begin with an address if the protocol so specifies,
1189 * followed by an optional mbuf or mbufs containing ancillary data,
1190 * and then zero or more mbufs of data.
1191 * In order to avoid blocking network interrupts for the entire time here,
1192 * we splx() while doing the actual copy to user space.
1193 * Although the sockbuf is locked, new data may still be appended,
1194 * and thus we must maintain consistency of the sockbuf during that time.
1195 *
1196 * The caller may receive the data as a single mbuf chain by supplying
1197 * an mbuf **mp0 for use in returning the chain. The uio is then used
1198 * only for the count in uio_resid.
1199 */
1200 int
1201 soreceive(so, psa, uio, mp0, controlp, flagsp)
1202 register struct socket *so;
1203 struct sockaddr **psa;
1204 struct uio *uio;
1205 struct mbuf **mp0;
1206 struct mbuf **controlp;
1207 int *flagsp;
1208 {
1209 register struct mbuf *m, **mp;
1210 register struct mbuf *free_list, *ml;
1211 register int flags, len, error, s, offset;
1212 struct protosw *pr = so->so_proto;
1213 struct mbuf *nextrecord;
1214 int moff, type = 0;
1215 int orig_resid = uio->uio_resid;
1216 struct kextcb *kp;
1217
1218 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START,
1219 so,
1220 uio->uio_resid,
1221 so->so_rcv.sb_cc,
1222 so->so_rcv.sb_lowat,
1223 so->so_rcv.sb_hiwat);
1224
1225 kp = sotokextcb(so);
1226 while (kp) {
1227 if (kp->e_soif && kp->e_soif->sf_soreceive) {
1228 error = (*kp->e_soif->sf_soreceive)(so, psa, &uio,
1229 mp0, controlp,
1230 flagsp, kp);
1231 if (error)
1232 return((error == EJUSTRETURN) ? 0 : error);
1233 }
1234 kp = kp->e_next;
1235 }
1236
1237 mp = mp0;
1238 if (psa)
1239 *psa = 0;
1240 if (controlp)
1241 *controlp = 0;
1242 if (flagsp)
1243 flags = *flagsp &~ MSG_EOR;
1244 else
1245 flags = 0;
1246 /*
1247 * When SO_WANTOOBFLAG is set we try to get out-of-band data
1248 * regardless of the flags argument. Here is the case were
1249 * out-of-band data is not inline.
1250 */
1251 if ((flags & MSG_OOB) ||
1252 ((so->so_options & SO_WANTOOBFLAG) != 0 &&
1253 (so->so_options & SO_OOBINLINE) == 0 &&
1254 (so->so_oobmark || (so->so_state & SS_RCVATMARK)))) {
1255 m = m_get(M_WAIT, MT_DATA);
1256 if (m == NULL)
1257 return (ENOBUFS);
1258 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
1259 if (error)
1260 goto bad;
1261 do {
1262 error = uiomove(mtod(m, caddr_t),
1263 (int) min(uio->uio_resid, m->m_len), uio);
1264 m = m_free(m);
1265 } while (uio->uio_resid && error == 0 && m);
1266 bad:
1267 if (m)
1268 m_freem(m);
1269 #ifdef __APPLE__
1270 if ((so->so_options & SO_WANTOOBFLAG) != 0) {
1271 if (error == EWOULDBLOCK || error == EINVAL) {
1272 /*
1273 * Let's try to get normal data:
1274 * EWOULDBLOCK: out-of-band data not receive yet;
1275 * EINVAL: out-of-band data already read.
1276 */
1277 error = 0;
1278 goto nooob;
1279 } else if (error == 0 && flagsp)
1280 *flagsp |= MSG_OOB;
1281 }
1282 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1283 #endif
1284 return (error);
1285 }
1286 nooob:
1287 if (mp)
1288 *mp = (struct mbuf *)0;
1289 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
1290 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
1291
1292 restart:
1293 error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
1294 if (error) {
1295 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1296 return (error);
1297 }
1298 s = splnet();
1299
1300 m = so->so_rcv.sb_mb;
1301 /*
1302 * If we have less data than requested, block awaiting more
1303 * (subject to any timeout) if:
1304 * 1. the current count is less than the low water mark, or
1305 * 2. MSG_WAITALL is set, and it is possible to do the entire
1306 * receive operation at once if we block (resid <= hiwat).
1307 * 3. MSG_DONTWAIT is not set
1308 * If MSG_WAITALL is set but resid is larger than the receive buffer,
1309 * we have to do the receive in sections, and thus risk returning
1310 * a short count if a timeout or signal occurs after we start.
1311 */
1312 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
1313 so->so_rcv.sb_cc < uio->uio_resid) &&
1314 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
1315 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
1316 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
1317 KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
1318 if (so->so_error) {
1319 if (m)
1320 goto dontblock;
1321 error = so->so_error;
1322 if ((flags & MSG_PEEK) == 0)
1323 so->so_error = 0;
1324 goto release;
1325 }
1326 if (so->so_state & SS_CANTRCVMORE) {
1327 if (m)
1328 goto dontblock;
1329 else
1330 goto release;
1331 }
1332 for (; m; m = m->m_next)
1333 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
1334 m = so->so_rcv.sb_mb;
1335 goto dontblock;
1336 }
1337 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
1338 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
1339 error = ENOTCONN;
1340 goto release;
1341 }
1342 if (uio->uio_resid == 0)
1343 goto release;
1344 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
1345 error = EWOULDBLOCK;
1346 goto release;
1347 }
1348 sbunlock(&so->so_rcv);
1349 if (socket_debug)
1350 printf("Waiting for socket data\n");
1351 error = sbwait(&so->so_rcv);
1352 if (socket_debug)
1353 printf("SORECEIVE - sbwait returned %d\n", error);
1354 splx(s);
1355 if (error) {
1356 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1357 return (error);
1358 }
1359 goto restart;
1360 }
1361 dontblock:
1362 #ifndef __APPLE__
1363 if (uio->uio_procp)
1364 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
1365 #endif
1366 nextrecord = m->m_nextpkt;
1367 if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) {
1368 KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
1369 orig_resid = 0;
1370 if (psa)
1371 *psa = dup_sockaddr(mtod(m, struct sockaddr *),
1372 mp0 == 0);
1373 if (flags & MSG_PEEK) {
1374 m = m->m_next;
1375 } else {
1376 sbfree(&so->so_rcv, m);
1377 MFREE(m, so->so_rcv.sb_mb);
1378 m = so->so_rcv.sb_mb;
1379 }
1380 }
1381 while (m && m->m_type == MT_CONTROL && error == 0) {
1382 if (flags & MSG_PEEK) {
1383 if (controlp)
1384 *controlp = m_copy(m, 0, m->m_len);
1385 m = m->m_next;
1386 } else {
1387 sbfree(&so->so_rcv, m);
1388 if (controlp) {
1389 if (pr->pr_domain->dom_externalize &&
1390 mtod(m, struct cmsghdr *)->cmsg_type ==
1391 SCM_RIGHTS)
1392 error = (*pr->pr_domain->dom_externalize)(m);
1393 *controlp = m;
1394 so->so_rcv.sb_mb = m->m_next;
1395 m->m_next = 0;
1396 m = so->so_rcv.sb_mb;
1397 } else {
1398 MFREE(m, so->so_rcv.sb_mb);
1399 m = so->so_rcv.sb_mb;
1400 }
1401 }
1402 if (controlp) {
1403 orig_resid = 0;
1404 controlp = &(*controlp)->m_next;
1405 }
1406 }
1407 if (m) {
1408 if ((flags & MSG_PEEK) == 0)
1409 m->m_nextpkt = nextrecord;
1410 type = m->m_type;
1411 if (type == MT_OOBDATA)
1412 flags |= MSG_OOB;
1413 }
1414 moff = 0;
1415 offset = 0;
1416
1417 free_list = m;
1418 ml = (struct mbuf *)0;
1419
1420 while (m && uio->uio_resid > 0 && error == 0) {
1421 if (m->m_type == MT_OOBDATA) {
1422 if (type != MT_OOBDATA)
1423 break;
1424 } else if (type == MT_OOBDATA)
1425 break;
1426 #ifndef __APPLE__
1427 /*
1428 * This assertion needs rework. The trouble is Appletalk is uses many
1429 * mbuf types (NOT listed in mbuf.h!) which will trigger this panic.
1430 * For now just remove the assertion... CSM 9/98
1431 */
1432 else
1433 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
1434 ("receive 3"));
1435 #else
1436 /*
1437 * Make sure to allways set MSG_OOB event when getting
1438 * out of band data inline.
1439 */
1440 if ((so->so_options & SO_WANTOOBFLAG) != 0 &&
1441 (so->so_options & SO_OOBINLINE) != 0 &&
1442 (so->so_state & SS_RCVATMARK) != 0) {
1443 flags |= MSG_OOB;
1444 }
1445 #endif
1446 so->so_state &= ~SS_RCVATMARK;
1447 len = uio->uio_resid;
1448 if (so->so_oobmark && len > so->so_oobmark - offset)
1449 len = so->so_oobmark - offset;
1450 if (len > m->m_len - moff)
1451 len = m->m_len - moff;
1452 /*
1453 * If mp is set, just pass back the mbufs.
1454 * Otherwise copy them out via the uio, then free.
1455 * Sockbuf must be consistent here (points to current mbuf,
1456 * it points to next record) when we drop priority;
1457 * we must note any additions to the sockbuf when we
1458 * block interrupts again.
1459 */
1460 if (mp == 0) {
1461 splx(s);
1462 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
1463 s = splnet();
1464 if (error)
1465 goto release;
1466 } else
1467 uio->uio_resid -= len;
1468 if (len == m->m_len - moff) {
1469 if (m->m_flags & M_EOR)
1470 flags |= MSG_EOR;
1471 if (flags & MSG_PEEK) {
1472 m = m->m_next;
1473 moff = 0;
1474 } else {
1475 nextrecord = m->m_nextpkt;
1476 sbfree(&so->so_rcv, m);
1477 if (mp) {
1478 *mp = m;
1479 mp = &m->m_next;
1480 so->so_rcv.sb_mb = m = m->m_next;
1481 *mp = (struct mbuf *)0;
1482 } else {
1483 m->m_nextpkt = 0;
1484 if (ml != 0)
1485 ml->m_next = m;
1486 ml = m;
1487 so->so_rcv.sb_mb = m = m->m_next;
1488 ml->m_next = 0;
1489 }
1490 if (m)
1491 m->m_nextpkt = nextrecord;
1492 }
1493 } else {
1494 if (flags & MSG_PEEK)
1495 moff += len;
1496 else {
1497 if (mp)
1498 *mp = m_copym(m, 0, len, M_WAIT);
1499 m->m_data += len;
1500 m->m_len -= len;
1501 so->so_rcv.sb_cc -= len;
1502 }
1503 }
1504 if (so->so_oobmark) {
1505 if ((flags & MSG_PEEK) == 0) {
1506 so->so_oobmark -= len;
1507 if (so->so_oobmark == 0) {
1508 so->so_state |= SS_RCVATMARK;
1509 postevent(so, 0, EV_OOB);
1510 break;
1511 }
1512 } else {
1513 offset += len;
1514 if (offset == so->so_oobmark)
1515 break;
1516 }
1517 }
1518 if (flags & MSG_EOR)
1519 break;
1520 /*
1521 * If the MSG_WAITALL flag is set (for non-atomic socket),
1522 * we must not quit until "uio->uio_resid == 0" or an error
1523 * termination. If a signal/timeout occurs, return
1524 * with a short count but without error.
1525 * Keep sockbuf locked against other readers.
1526 */
1527 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
1528 !sosendallatonce(so) && !nextrecord) {
1529 if (so->so_error || so->so_state & SS_CANTRCVMORE)
1530 break;
1531
1532 if (ml) {
1533 m_freem_list(free_list);
1534 }
1535 error = sbwait(&so->so_rcv);
1536 if (error) {
1537 sbunlock(&so->so_rcv);
1538 splx(s);
1539 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, 0,0,0,0,0);
1540 return (0);
1541 }
1542 m = so->so_rcv.sb_mb;
1543 if (m) {
1544 nextrecord = m->m_nextpkt;
1545 free_list = m;
1546 }
1547 ml = (struct mbuf *)0;
1548 }
1549 }
1550 if (ml) {
1551 m_freem_list(free_list);
1552 }
1553
1554 if (m && pr->pr_flags & PR_ATOMIC) {
1555 #ifdef __APPLE__
1556 if (so->so_options & SO_DONTTRUNC)
1557 flags |= MSG_RCVMORE;
1558 else {
1559 #endif
1560 flags |= MSG_TRUNC;
1561 if ((flags & MSG_PEEK) == 0)
1562 (void) sbdroprecord(&so->so_rcv);
1563 #ifdef __APPLE__
1564 }
1565 #endif
1566 }
1567 if ((flags & MSG_PEEK) == 0) {
1568 if (m == 0)
1569 so->so_rcv.sb_mb = nextrecord;
1570 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
1571 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1572 }
1573 #ifdef __APPLE__
1574 if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0)
1575 flags |= MSG_HAVEMORE;
1576 #endif
1577 if (orig_resid == uio->uio_resid && orig_resid &&
1578 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
1579 sbunlock(&so->so_rcv);
1580 splx(s);
1581 goto restart;
1582 }
1583
1584 if (flagsp)
1585 *flagsp |= flags;
1586 release:
1587 sbunlock(&so->so_rcv);
1588 splx(s);
1589
1590 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END,
1591 so,
1592 uio->uio_resid,
1593 so->so_rcv.sb_cc,
1594 0,
1595 error);
1596
1597 return (error);
1598 }
1599
1600 int
1601 soshutdown(so, how)
1602 register struct socket *so;
1603 register int how;
1604 {
1605 register struct protosw *pr = so->so_proto;
1606 struct kextcb *kp;
1607 int ret;
1608
1609
1610 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_START, 0,0,0,0,0);
1611 kp = sotokextcb(so);
1612 while (kp) {
1613 if (kp->e_soif && kp->e_soif->sf_soshutdown) {
1614 ret = (*kp->e_soif->sf_soshutdown)(so, how, kp);
1615 if (ret)
1616 return((ret == EJUSTRETURN) ? 0 : ret);
1617 }
1618 kp = kp->e_next;
1619 }
1620
1621 if (how != SHUT_WR) {
1622 sorflush(so);
1623 postevent(so, 0, EV_RCLOSED);
1624 }
1625 if (how != SHUT_RD) {
1626 ret = ((*pr->pr_usrreqs->pru_shutdown)(so));
1627 postevent(so, 0, EV_WCLOSED);
1628 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
1629 return(ret);
1630 }
1631
1632 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
1633 return (0);
1634 }
1635
1636 void
1637 sorflush(so)
1638 register struct socket *so;
1639 {
1640 register struct sockbuf *sb = &so->so_rcv;
1641 register struct protosw *pr = so->so_proto;
1642 register int s, error;
1643 struct sockbuf asb;
1644 struct kextcb *kp;
1645
1646 kp = sotokextcb(so);
1647 while (kp) {
1648 if (kp->e_soif && kp->e_soif->sf_sorflush) {
1649 if ((*kp->e_soif->sf_sorflush)(so, kp))
1650 return;
1651 }
1652 kp = kp->e_next;
1653 }
1654
1655 sb->sb_flags |= SB_NOINTR;
1656 (void) sblock(sb, M_WAIT);
1657 s = splimp();
1658 socantrcvmore(so);
1659 sbunlock(sb);
1660 #ifdef __APPLE__
1661 selthreadclear(&sb->sb_sel);
1662 #endif
1663 asb = *sb;
1664 bzero((caddr_t)sb, sizeof (*sb));
1665 #ifndef __APPLE__
1666 if (asb.sb_flags & SB_KNOTE) {
1667 sb->sb_sel.si_note = asb.sb_sel.si_note;
1668 sb->sb_flags = SB_KNOTE;
1669 }
1670 #endif
1671 splx(s);
1672 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
1673 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
1674
1675 sbrelease(&asb);
1676 }
1677
1678 /*
1679 * Perhaps this routine, and sooptcopyout(), below, ought to come in
1680 * an additional variant to handle the case where the option value needs
1681 * to be some kind of integer, but not a specific size.
1682 * In addition to their use here, these functions are also called by the
1683 * protocol-level pr_ctloutput() routines.
1684 */
1685 int
1686 sooptcopyin(sopt, buf, len, minlen)
1687 struct sockopt *sopt;
1688 void *buf;
1689 size_t len;
1690 size_t minlen;
1691 {
1692 size_t valsize;
1693
1694 /*
1695 * If the user gives us more than we wanted, we ignore it,
1696 * but if we don't get the minimum length the caller
1697 * wants, we return EINVAL. On success, sopt->sopt_valsize
1698 * is set to however much we actually retrieved.
1699 */
1700 if ((valsize = sopt->sopt_valsize) < minlen)
1701 return EINVAL;
1702 if (valsize > len)
1703 sopt->sopt_valsize = valsize = len;
1704
1705 if (sopt->sopt_p != 0)
1706 return (copyin(sopt->sopt_val, buf, valsize));
1707
1708 bcopy(sopt->sopt_val, buf, valsize);
1709 return 0;
1710 }
1711
1712 int
1713 sosetopt(so, sopt)
1714 struct socket *so;
1715 struct sockopt *sopt;
1716 {
1717 int error, optval;
1718 struct linger l;
1719 struct timeval tv;
1720 short val;
1721 struct kextcb *kp;
1722
1723 if (sopt->sopt_dir != SOPT_SET) {
1724 sopt->sopt_dir = SOPT_SET;
1725 }
1726
1727 kp = sotokextcb(so);
1728 while (kp) {
1729 if (kp->e_soif && kp->e_soif->sf_socontrol) {
1730 error = (*kp->e_soif->sf_socontrol)(so, sopt, kp);
1731 if (error)
1732 return((error == EJUSTRETURN) ? 0 : error);
1733 }
1734 kp = kp->e_next;
1735 }
1736
1737 error = 0;
1738 if (sopt->sopt_level != SOL_SOCKET) {
1739 if (so->so_proto && so->so_proto->pr_ctloutput)
1740 return ((*so->so_proto->pr_ctloutput)
1741 (so, sopt));
1742 error = ENOPROTOOPT;
1743 } else {
1744 switch (sopt->sopt_name) {
1745 case SO_LINGER:
1746 error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
1747 if (error)
1748 goto bad;
1749
1750 so->so_linger = l.l_linger;
1751 if (l.l_onoff)
1752 so->so_options |= SO_LINGER;
1753 else
1754 so->so_options &= ~SO_LINGER;
1755 break;
1756
1757 case SO_DEBUG:
1758 case SO_KEEPALIVE:
1759 case SO_DONTROUTE:
1760 case SO_USELOOPBACK:
1761 case SO_BROADCAST:
1762 case SO_REUSEADDR:
1763 case SO_REUSEPORT:
1764 case SO_OOBINLINE:
1765 case SO_TIMESTAMP:
1766 #ifdef __APPLE__
1767 case SO_DONTTRUNC:
1768 case SO_WANTMORE:
1769 case SO_WANTOOBFLAG:
1770 #endif
1771 error = sooptcopyin(sopt, &optval, sizeof optval,
1772 sizeof optval);
1773 if (error)
1774 goto bad;
1775 if (optval)
1776 so->so_options |= sopt->sopt_name;
1777 else
1778 so->so_options &= ~sopt->sopt_name;
1779 break;
1780
1781 case SO_SNDBUF:
1782 case SO_RCVBUF:
1783 case SO_SNDLOWAT:
1784 case SO_RCVLOWAT:
1785 error = sooptcopyin(sopt, &optval, sizeof optval,
1786 sizeof optval);
1787 if (error)
1788 goto bad;
1789
1790 /*
1791 * Values < 1 make no sense for any of these
1792 * options, so disallow them.
1793 */
1794 if (optval < 1) {
1795 error = EINVAL;
1796 goto bad;
1797 }
1798
1799 switch (sopt->sopt_name) {
1800 case SO_SNDBUF:
1801 case SO_RCVBUF:
1802 if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
1803 &so->so_snd : &so->so_rcv,
1804 (u_long) optval) == 0) {
1805 error = ENOBUFS;
1806 goto bad;
1807 }
1808 break;
1809
1810 /*
1811 * Make sure the low-water is never greater than
1812 * the high-water.
1813 */
1814 case SO_SNDLOWAT:
1815 so->so_snd.sb_lowat =
1816 (optval > so->so_snd.sb_hiwat) ?
1817 so->so_snd.sb_hiwat : optval;
1818 break;
1819 case SO_RCVLOWAT:
1820 so->so_rcv.sb_lowat =
1821 (optval > so->so_rcv.sb_hiwat) ?
1822 so->so_rcv.sb_hiwat : optval;
1823 break;
1824 }
1825 break;
1826
1827 case SO_SNDTIMEO:
1828 case SO_RCVTIMEO:
1829 error = sooptcopyin(sopt, &tv, sizeof tv,
1830 sizeof tv);
1831 if (error)
1832 goto bad;
1833
1834 /* assert(hz > 0); */
1835 if (tv.tv_sec < 0 || tv.tv_sec > SHRT_MAX / hz ||
1836 tv.tv_usec < 0 || tv.tv_usec >= 1000000) {
1837 error = EDOM;
1838 goto bad;
1839 }
1840 /* assert(tick > 0); */
1841 /* assert(ULONG_MAX - SHRT_MAX >= 1000000); */
1842 {
1843 long tmp = (u_long)(tv.tv_sec * hz) + tv.tv_usec / tick;
1844 if (tmp > SHRT_MAX) {
1845 error = EDOM;
1846 goto bad;
1847 }
1848 val = tmp;
1849 }
1850
1851 switch (sopt->sopt_name) {
1852 case SO_SNDTIMEO:
1853 so->so_snd.sb_timeo = val;
1854 break;
1855 case SO_RCVTIMEO:
1856 so->so_rcv.sb_timeo = val;
1857 break;
1858 }
1859 break;
1860
1861 case SO_NKE:
1862 {
1863 struct so_nke nke;
1864 struct NFDescriptor *nf1, *nf2 = NULL;
1865
1866 error = sooptcopyin(sopt, &nke,
1867 sizeof nke, sizeof nke);
1868 if (error)
1869 goto bad;
1870
1871 error = nke_insert(so, &nke);
1872 break;
1873 }
1874
1875 case SO_NOSIGPIPE:
1876 error = sooptcopyin(sopt, &optval, sizeof optval,
1877 sizeof optval);
1878 if (error)
1879 goto bad;
1880 if (optval)
1881 so->so_flags |= SOF_NOSIGPIPE;
1882 else
1883 so->so_flags &= ~SOF_NOSIGPIPE;
1884
1885 break;
1886
1887 default:
1888 error = ENOPROTOOPT;
1889 break;
1890 }
1891 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
1892 (void) ((*so->so_proto->pr_ctloutput)
1893 (so, sopt));
1894 }
1895 }
1896 bad:
1897 return (error);
1898 }
1899
1900 /* Helper routine for getsockopt */
1901 int
1902 sooptcopyout(sopt, buf, len)
1903 struct sockopt *sopt;
1904 void *buf;
1905 size_t len;
1906 {
1907 int error;
1908 size_t valsize;
1909
1910 error = 0;
1911
1912 /*
1913 * Documented get behavior is that we always return a value,
1914 * possibly truncated to fit in the user's buffer.
1915 * Traditional behavior is that we always tell the user
1916 * precisely how much we copied, rather than something useful
1917 * like the total amount we had available for her.
1918 * Note that this interface is not idempotent; the entire answer must
1919 * generated ahead of time.
1920 */
1921 valsize = min(len, sopt->sopt_valsize);
1922 sopt->sopt_valsize = valsize;
1923 if (sopt->sopt_val != 0) {
1924 if (sopt->sopt_p != 0)
1925 error = copyout(buf, sopt->sopt_val, valsize);
1926 else
1927 bcopy(buf, sopt->sopt_val, valsize);
1928 }
1929 return error;
1930 }
1931
1932 int
1933 sogetopt(so, sopt)
1934 struct socket *so;
1935 struct sockopt *sopt;
1936 {
1937 int error, optval;
1938 struct linger l;
1939 struct timeval tv;
1940 struct mbuf *m;
1941 struct kextcb *kp;
1942
1943 if (sopt->sopt_dir != SOPT_GET) {
1944 sopt->sopt_dir = SOPT_GET;
1945 }
1946
1947 kp = sotokextcb(so);
1948 while (kp) {
1949 if (kp->e_soif && kp->e_soif->sf_socontrol) {
1950 error = (*kp->e_soif->sf_socontrol)(so, sopt, kp);
1951 if (error)
1952 return((error == EJUSTRETURN) ? 0 : error);
1953 }
1954 kp = kp->e_next;
1955 }
1956
1957 error = 0;
1958 if (sopt->sopt_level != SOL_SOCKET) {
1959 if (so->so_proto && so->so_proto->pr_ctloutput) {
1960 return ((*so->so_proto->pr_ctloutput)
1961 (so, sopt));
1962 } else
1963 return (ENOPROTOOPT);
1964 } else {
1965 switch (sopt->sopt_name) {
1966 case SO_LINGER:
1967 l.l_onoff = so->so_options & SO_LINGER;
1968 l.l_linger = so->so_linger;
1969 error = sooptcopyout(sopt, &l, sizeof l);
1970 break;
1971
1972 case SO_USELOOPBACK:
1973 case SO_DONTROUTE:
1974 case SO_DEBUG:
1975 case SO_KEEPALIVE:
1976 case SO_REUSEADDR:
1977 case SO_REUSEPORT:
1978 case SO_BROADCAST:
1979 case SO_OOBINLINE:
1980 case SO_TIMESTAMP:
1981 #ifdef __APPLE__
1982 case SO_DONTTRUNC:
1983 case SO_WANTMORE:
1984 case SO_WANTOOBFLAG:
1985 #endif
1986 optval = so->so_options & sopt->sopt_name;
1987 integer:
1988 error = sooptcopyout(sopt, &optval, sizeof optval);
1989 break;
1990
1991 case SO_TYPE:
1992 optval = so->so_type;
1993 goto integer;
1994
1995 #ifdef __APPLE__
1996 case SO_NREAD:
1997 {
1998 int pkt_total;
1999 struct mbuf *m1;
2000
2001 pkt_total = 0;
2002 m1 = so->so_rcv.sb_mb;
2003 if (so->so_proto->pr_flags & PR_ATOMIC)
2004 {
2005 #if 0
2006 kprintf("SKT CC: %d\n", so->so_rcv.sb_cc);
2007 #endif
2008 while (m1) {
2009 if (m1->m_type == MT_DATA)
2010 pkt_total += m1->m_len;
2011 #if 0
2012 kprintf("CNT: %d/%d\n", m1->m_len, pkt_total);
2013 #endif
2014 m1 = m1->m_next;
2015 }
2016 optval = pkt_total;
2017 } else
2018 optval = so->so_rcv.sb_cc;
2019 #if 0
2020 kprintf("RTN: %d\n", optval);
2021 #endif
2022 goto integer;
2023 }
2024 #endif
2025 case SO_ERROR:
2026 optval = so->so_error;
2027 so->so_error = 0;
2028 goto integer;
2029
2030 case SO_SNDBUF:
2031 optval = so->so_snd.sb_hiwat;
2032 goto integer;
2033
2034 case SO_RCVBUF:
2035 optval = so->so_rcv.sb_hiwat;
2036 goto integer;
2037
2038 case SO_SNDLOWAT:
2039 optval = so->so_snd.sb_lowat;
2040 goto integer;
2041
2042 case SO_RCVLOWAT:
2043 optval = so->so_rcv.sb_lowat;
2044 goto integer;
2045
2046 case SO_SNDTIMEO:
2047 case SO_RCVTIMEO:
2048 optval = (sopt->sopt_name == SO_SNDTIMEO ?
2049 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
2050
2051 tv.tv_sec = optval / hz;
2052 tv.tv_usec = (optval % hz) * tick;
2053 error = sooptcopyout(sopt, &tv, sizeof tv);
2054 break;
2055
2056 case SO_NOSIGPIPE:
2057 optval = (so->so_flags & SOF_NOSIGPIPE);
2058 goto integer;
2059
2060 default:
2061 error = ENOPROTOOPT;
2062 break;
2063 }
2064 return (error);
2065 }
2066 }
2067
2068 #ifdef __APPLE__
2069 /*
2070 * Network filter support
2071 */
2072 /* Run the list of filters, creating extension control blocks */
2073 sfilter_init(register struct socket *so)
2074 { struct kextcb *kp, **kpp;
2075 struct protosw *prp;
2076 struct NFDescriptor *nfp;
2077
2078 prp = so->so_proto;
2079 nfp = prp->pr_sfilter.tqh_first; /* non-null */
2080 kpp = &so->so_ext;
2081 kp = NULL;
2082 while (nfp)
2083 { MALLOC(kp, struct kextcb *, sizeof(*kp),
2084 M_TEMP, M_WAITOK);
2085 if (kp == NULL)
2086 return(ENOBUFS); /* so_free will clean up */
2087 *kpp = kp;
2088 kpp = &kp->e_next;
2089 kp->e_next = NULL;
2090 kp->e_fcb = NULL;
2091 kp->e_nfd = nfp;
2092 kp->e_soif = nfp->nf_soif;
2093 kp->e_sout = nfp->nf_soutil;
2094 /*
2095 * Ignore return value for create
2096 * Everyone gets a chance at startup
2097 */
2098 if (kp->e_soif && kp->e_soif->sf_socreate)
2099 (*kp->e_soif->sf_socreate)(so, prp, kp);
2100 nfp = nfp->nf_next.tqe_next;
2101 }
2102 return(0);
2103 }
2104
2105 /*
2106 * Run the list of filters, freeing extension control blocks
2107 * Assumes the soif/soutil blocks have been handled.
2108 */
2109 sfilter_term(struct socket *so)
2110 { struct kextcb *kp, *kp1;
2111
2112 kp = so->so_ext;
2113 while (kp)
2114 { kp1 = kp->e_next;
2115 /*
2116 * Ignore return code on termination; everyone must
2117 * get terminated.
2118 */
2119 if (kp->e_soif && kp->e_soif->sf_sofree)
2120 kp->e_soif->sf_sofree(so, kp);
2121 FREE(kp, M_TEMP);
2122 kp = kp1;
2123 }
2124 return(0);
2125 }
2126 #endif __APPLE__
2127
2128 /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
2129 int
2130 soopt_getm(struct sockopt *sopt, struct mbuf **mp)
2131 {
2132 struct mbuf *m, *m_prev;
2133 int sopt_size = sopt->sopt_valsize;
2134
2135 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2136 if (m == 0)
2137 return ENOBUFS;
2138 if (sopt_size > MLEN) {
2139 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2140 if ((m->m_flags & M_EXT) == 0) {
2141 m_free(m);
2142 return ENOBUFS;
2143 }
2144 m->m_len = min(MCLBYTES, sopt_size);
2145 } else {
2146 m->m_len = min(MLEN, sopt_size);
2147 }
2148 sopt_size -= m->m_len;
2149 *mp = m;
2150 m_prev = m;
2151
2152 while (sopt_size) {
2153 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2154 if (m == 0) {
2155 m_freem(*mp);
2156 return ENOBUFS;
2157 }
2158 if (sopt_size > MLEN) {
2159 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2160 if ((m->m_flags & M_EXT) == 0) {
2161 m_freem(*mp);
2162 return ENOBUFS;
2163 }
2164 m->m_len = min(MCLBYTES, sopt_size);
2165 } else {
2166 m->m_len = min(MLEN, sopt_size);
2167 }
2168 sopt_size -= m->m_len;
2169 m_prev->m_next = m;
2170 m_prev = m;
2171 }
2172 return 0;
2173 }
2174
2175 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
2176 int
2177 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
2178 {
2179 struct mbuf *m0 = m;
2180
2181 if (sopt->sopt_val == NULL)
2182 return 0;
2183 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2184 if (sopt->sopt_p != NULL) {
2185 int error;
2186
2187 error = copyin(sopt->sopt_val, mtod(m, char *),
2188 m->m_len);
2189 if (error != 0) {
2190 m_freem(m0);
2191 return(error);
2192 }
2193 } else
2194 bcopy(sopt->sopt_val, mtod(m, char *), m->m_len);
2195 sopt->sopt_valsize -= m->m_len;
2196 (caddr_t)sopt->sopt_val += m->m_len;
2197 m = m->m_next;
2198 }
2199 if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
2200 panic("soopt_mcopyin");
2201 return 0;
2202 }
2203
2204 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
2205 int
2206 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
2207 {
2208 struct mbuf *m0 = m;
2209 size_t valsize = 0;
2210
2211 if (sopt->sopt_val == NULL)
2212 return 0;
2213 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2214 if (sopt->sopt_p != NULL) {
2215 int error;
2216
2217 error = copyout(mtod(m, char *), sopt->sopt_val,
2218 m->m_len);
2219 if (error != 0) {
2220 m_freem(m0);
2221 return(error);
2222 }
2223 } else
2224 bcopy(mtod(m, char *), sopt->sopt_val, m->m_len);
2225 sopt->sopt_valsize -= m->m_len;
2226 (caddr_t)sopt->sopt_val += m->m_len;
2227 valsize += m->m_len;
2228 m = m->m_next;
2229 }
2230 if (m != NULL) {
2231 /* enough soopt buffer should be given from user-land */
2232 m_freem(m0);
2233 return(EINVAL);
2234 }
2235 sopt->sopt_valsize = valsize;
2236 return 0;
2237 }
2238
2239 void
2240 sohasoutofband(so)
2241 register struct socket *so;
2242 {
2243 struct proc *p;
2244 struct kextcb *kp;
2245
2246 kp = sotokextcb(so);
2247 while (kp) {
2248 if (kp->e_soif && kp->e_soif->sf_sohasoutofband) {
2249 if ((*kp->e_soif->sf_sohasoutofband)(so, kp))
2250 return;
2251 }
2252 kp = kp->e_next;
2253 }
2254 if (so->so_pgid < 0)
2255 gsignal(-so->so_pgid, SIGURG);
2256 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
2257 psignal(p, SIGURG);
2258 selwakeup(&so->so_rcv.sb_sel);
2259 }
2260
2261 int
2262 sopoll(struct socket *so, int events, struct ucred *cred, void * wql)
2263 {
2264 struct proc *p = current_proc();
2265 int revents = 0;
2266 int s = splnet();
2267
2268 if (events & (POLLIN | POLLRDNORM))
2269 if (soreadable(so))
2270 revents |= events & (POLLIN | POLLRDNORM);
2271
2272 if (events & (POLLOUT | POLLWRNORM))
2273 if (sowriteable(so))
2274 revents |= events & (POLLOUT | POLLWRNORM);
2275
2276 if (events & (POLLPRI | POLLRDBAND))
2277 if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
2278 revents |= events & (POLLPRI | POLLRDBAND);
2279
2280 if (revents == 0) {
2281 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
2282 /* Darwin sets the flag first, BSD calls selrecord first */
2283 so->so_rcv.sb_flags |= SB_SEL;
2284 selrecord(p, &so->so_rcv.sb_sel, wql);
2285 }
2286
2287 if (events & (POLLOUT | POLLWRNORM)) {
2288 /* Darwin sets the flag first, BSD calls selrecord first */
2289 so->so_snd.sb_flags |= SB_SEL;
2290 selrecord(p, &so->so_snd.sb_sel, wql);
2291 }
2292 }
2293
2294 splx(s);
2295 return (revents);
2296 }