]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_socket.c
xnu-201.19.tar.gz
[apple/xnu.git] / bsd / kern / uipc_socket.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
23 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
24 /*
25 * Copyright (c) 1982, 1986, 1988, 1990, 1993
26 * The Regents of the University of California. All rights reserved.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 * 3. All advertising materials mentioning features or use of this software
37 * must display the following acknowledgement:
38 * This product includes software developed by the University of
39 * California, Berkeley and its contributors.
40 * 4. Neither the name of the University nor the names of its contributors
41 * may be used to endorse or promote products derived from this software
42 * without specific prior written permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
45 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
46 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
47 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
48 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
49 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
50 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
51 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * SUCH DAMAGE.
55 *
56 * @(#)uipc_socket.c 8.6 (Berkeley) 5/2/95
57 */
58
59 #include <sys/param.h>
60 #include <sys/systm.h>
61 #include <sys/proc.h>
62 #include <sys/fcntl.h>
63 #include <sys/malloc.h>
64 #include <sys/mbuf.h>
65 #include <sys/domain.h>
66 #include <sys/kernel.h>
67 #include <sys/poll.h>
68 #include <sys/protosw.h>
69 #include <sys/socket.h>
70 #include <sys/socketvar.h>
71 #include <sys/resourcevar.h>
72 #include <sys/signalvar.h>
73 #include <sys/sysctl.h>
74 #include <sys/uio.h>
75 #include <sys/ev.h>
76 #include <sys/kdebug.h>
77 #include <net/route.h>
78 #include <netinet/in.h>
79 #include <netinet/in_pcb.h>
80 #include <kern/zalloc.h>
81 #include <machine/limits.h>
82
83 int so_cache_hw = 0;
84 int so_cache_timeouts = 0;
85 int so_cache_max_freed = 0;
86 int cached_sock_count = 0;
87 struct socket *socket_cache_head = 0;
88 struct socket *socket_cache_tail = 0;
89 u_long so_cache_time = 0;
90 int so_cache_init_done = 0;
91 struct zone *so_cache_zone;
92 extern int get_inpcb_str_size();
93 extern int get_tcp_str_size();
94
95 #include <machine/limits.h>
96
97 int socket_debug = 0;
98 int socket_zone = M_SOCKET;
99 so_gen_t so_gencnt; /* generation count for sockets */
100
101 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
102 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
103
104 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
105 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
106 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
107 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
108 #define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1)
109 #define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8))
110 #define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8))
111
112
113 SYSCTL_DECL(_kern_ipc);
114
115 static int somaxconn = SOMAXCONN;
116 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn,
117 0, "");
118
119 /* Should we get a maximum also ??? */
120 static int sosendmaxchain = 65536;
121 static int sosendminchain = 16384;
122 SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW, &sosendminchain,
123 0, "");
124
125 void so_cache_timer();
126
127 /*
128 * Socket operation routines.
129 * These routines are called by the routines in
130 * sys_socket.c or from a system process, and
131 * implement the semantics of socket operations by
132 * switching out to the protocol specific routines.
133 */
134
135 void socketinit()
136 {
137 vm_size_t str_size;
138
139 so_cache_init_done = 1;
140
141 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
142 str_size = (vm_size_t)( sizeof(struct socket) + 4 +
143 get_inpcb_str_size() + 4 +
144 get_tcp_str_size());
145 so_cache_zone = zinit (str_size, 120000*str_size, 8192, "socache zone");
146 #if TEMPDEBUG
147 kprintf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size);
148 #endif
149
150 }
151
152 void cached_sock_alloc(so, waitok)
153 struct socket **so;
154 int waitok;
155
156 {
157 caddr_t temp;
158 int s;
159 register u_long offset;
160
161
162 s = splnet();
163 if (cached_sock_count) {
164 cached_sock_count--;
165 *so = socket_cache_head;
166 if (*so == 0)
167 panic("cached_sock_alloc: cached sock is null");
168
169 socket_cache_head = socket_cache_head->cache_next;
170 if (socket_cache_head)
171 socket_cache_head->cache_prev = 0;
172 else
173 socket_cache_tail = 0;
174 splx(s);
175
176 temp = (*so)->so_saved_pcb;
177 bzero((caddr_t)*so, sizeof(struct socket));
178 #if TEMPDEBUG
179 kprintf("cached_sock_alloc - retreiving cached sock %x - count == %d\n", *so,
180 cached_sock_count);
181 #endif
182 (*so)->so_saved_pcb = temp;
183 }
184 else {
185 #if TEMPDEBUG
186 kprintf("Allocating cached sock %x from memory\n", *so);
187 #endif
188
189 splx(s);
190 if (waitok)
191 *so = (struct socket *) zalloc(so_cache_zone);
192 else
193 *so = (struct socket *) zalloc_noblock(so_cache_zone);
194
195 if (*so == 0)
196 return;
197
198 bzero((caddr_t)*so, sizeof(struct socket));
199
200 /*
201 * Define offsets for extra structures into our single block of
202 * memory. Align extra structures on longword boundaries.
203 */
204
205
206 offset = (u_long) *so;
207 offset += sizeof(struct socket);
208 if (offset & 0x3) {
209 offset += 4;
210 offset &= 0xfffffffc;
211 }
212 (*so)->so_saved_pcb = (caddr_t) offset;
213 offset += get_inpcb_str_size();
214 if (offset & 0x3) {
215 offset += 4;
216 offset &= 0xfffffffc;
217 }
218
219 ((struct inpcb *) (*so)->so_saved_pcb)->inp_saved_ppcb = (caddr_t) offset;
220 #if TEMPDEBUG
221 kprintf("Allocating cached socket - %x, pcb=%x tcpcb=%x\n", *so,
222 (*so)->so_saved_pcb,
223 ((struct inpcb *)(*so)->so_saved_pcb)->inp_saved_ppcb);
224 #endif
225 }
226
227 (*so)->cached_in_sock_layer = 1;
228 }
229
230
231 void cached_sock_free(so)
232 struct socket *so;
233 {
234 int s;
235
236
237 s = splnet();
238 if (++cached_sock_count > MAX_CACHED_SOCKETS) {
239 --cached_sock_count;
240 splx(s);
241 #if TEMPDEBUG
242 kprintf("Freeing overflowed cached socket %x\n", so);
243 #endif
244 zfree(so_cache_zone, (vm_offset_t) so);
245 }
246 else {
247 #if TEMPDEBUG
248 kprintf("Freeing socket %x into cache\n", so);
249 #endif
250 if (so_cache_hw < cached_sock_count)
251 so_cache_hw = cached_sock_count;
252
253 so->cache_next = socket_cache_head;
254 so->cache_prev = 0;
255 if (socket_cache_head)
256 socket_cache_head->cache_prev = so;
257 else
258 socket_cache_tail = so;
259
260 so->cache_timestamp = so_cache_time;
261 socket_cache_head = so;
262 splx(s);
263 }
264
265 #if TEMPDEBUG
266 kprintf("Freed cached sock %x into cache - count is %d\n", so, cached_sock_count);
267 #endif
268
269
270 }
271
272
273 void so_cache_timer()
274 {
275 register struct socket *p;
276 register int s;
277 register int n_freed = 0;
278 boolean_t funnel_state;
279
280 funnel_state = thread_funnel_set(network_flock, TRUE);
281
282 ++so_cache_time;
283
284 s = splnet();
285
286 while (p = socket_cache_tail)
287 {
288 if ((so_cache_time - p->cache_timestamp) < SO_CACHE_TIME_LIMIT)
289 break;
290
291 so_cache_timeouts++;
292
293 if (socket_cache_tail = p->cache_prev)
294 p->cache_prev->cache_next = 0;
295 if (--cached_sock_count == 0)
296 socket_cache_head = 0;
297
298 splx(s);
299
300 zfree(so_cache_zone, (vm_offset_t) p);
301
302 splnet();
303 if (++n_freed >= SO_CACHE_MAX_FREE_BATCH)
304 {
305 so_cache_max_freed++;
306 break;
307 }
308 }
309 splx(s);
310
311 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
312
313 (void) thread_funnel_set(network_flock, FALSE);
314
315 }
316
317
318 /*
319 * Get a socket structure from our zone, and initialize it.
320 * We don't implement `waitok' yet (see comments in uipc_domain.c).
321 * Note that it would probably be better to allocate socket
322 * and PCB at the same time, but I'm not convinced that all
323 * the protocols can be easily modified to do this.
324 */
325 struct socket *
326 soalloc(waitok, dom, type)
327 int waitok;
328 int dom;
329 int type;
330 {
331 struct socket *so;
332
333 if ((dom == PF_INET) && (type == SOCK_STREAM))
334 cached_sock_alloc(&so, waitok);
335 else
336 {
337 so = _MALLOC_ZONE(sizeof(*so), socket_zone, M_WAITOK);
338 if (so)
339 bzero(so, sizeof *so);
340 }
341 /* XXX race condition for reentrant kernel */
342
343 if (so) {
344 so->so_gencnt = ++so_gencnt;
345 so->so_zone = socket_zone;
346 }
347
348 return so;
349 }
350
351 int
352 socreate(dom, aso, type, proto)
353 int dom;
354 struct socket **aso;
355 register int type;
356 int proto;
357
358 {
359 struct proc *p = current_proc();
360 register struct protosw *prp;
361 struct socket *so;
362 register int error = 0;
363
364 if (proto)
365 prp = pffindproto(dom, proto, type);
366 else
367 prp = pffindtype(dom, type);
368 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
369 return (EPROTONOSUPPORT);
370 if (prp->pr_type != type)
371 return (EPROTOTYPE);
372 so = soalloc(p != 0, dom, type);
373 if (so == 0)
374 return (ENOBUFS);
375
376 TAILQ_INIT(&so->so_incomp);
377 TAILQ_INIT(&so->so_comp);
378 so->so_type = type;
379
380 if (p != 0) {
381 if (p->p_ucred->cr_uid == 0)
382 so->so_state = SS_PRIV;
383
384 so->so_uid = p->p_ucred->cr_uid;
385 }
386
387 so->so_proto = prp;
388 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
389 if (prp->pr_sfilter.tqh_first)
390 error = sfilter_init(so);
391 if (error == 0)
392 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
393
394 if (error) {
395 so->so_state |= SS_NOFDREF;
396 sofree(so);
397 return (error);
398 }
399 prp->pr_domain->dom_refs++;
400 so->so_rcv.sb_so = so->so_snd.sb_so = so;
401 TAILQ_INIT(&so->so_evlist);
402 *aso = so;
403 return (0);
404 }
405
406 int
407 sobind(so, nam)
408 struct socket *so;
409 struct sockaddr *nam;
410
411 {
412 struct proc *p = current_proc();
413 int error;
414 struct kextcb *kp;
415 int s = splnet();
416
417 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
418 if (error == 0) /* ??? */
419 { kp = sotokextcb(so);
420 while (kp)
421 { if (kp->e_soif && kp->e_soif->sf_sobind)
422 { error = (*kp->e_soif->sf_sobind)(so, nam, kp);
423 if (error)
424 { if (error == EJUSTRETURN)
425 break;
426 splx(s);
427 return(error);
428 }
429 }
430 kp = kp->e_next;
431 }
432 }
433 splx(s);
434 return (error);
435 }
436
437 void
438 sodealloc(so)
439 struct socket *so;
440 {
441 so->so_gencnt = ++so_gencnt;
442
443 if (so->cached_in_sock_layer == 1)
444 cached_sock_free(so);
445 else
446 _FREE_ZONE(so, sizeof(*so), so->so_zone);
447 }
448
449 int
450 solisten(so, backlog)
451 register struct socket *so;
452 int backlog;
453
454 {
455 struct kextcb *kp;
456 struct proc *p = current_proc();
457 int s, error;
458
459 s = splnet();
460 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
461 if (error) {
462 splx(s);
463 return (error);
464 }
465 if (TAILQ_EMPTY(&so->so_comp))
466 so->so_options |= SO_ACCEPTCONN;
467 if (backlog < 0 || backlog > somaxconn)
468 backlog = somaxconn;
469 so->so_qlimit = backlog;
470 kp = sotokextcb(so);
471 while (kp)
472 {
473 if (kp->e_soif && kp->e_soif->sf_solisten)
474 { error = (*kp->e_soif->sf_solisten)(so, kp);
475 if (error)
476 { if (error == EJUSTRETURN)
477 break;
478 splx(s);
479 return(error);
480 }
481 }
482 kp = kp->e_next;
483 }
484
485 splx(s);
486 return (0);
487 }
488
489
490 void
491 sofree(so)
492 register struct socket *so;
493 { int error;
494 struct kextcb *kp;
495 struct socket *head = so->so_head;
496
497 kp = sotokextcb(so);
498 while (kp)
499 { if (kp->e_soif && kp->e_soif->sf_sofree)
500 { error = (*kp->e_soif->sf_sofree)(so, kp);
501 if (error) {
502 selthreadclear(&so->so_snd.sb_sel);
503 selthreadclear(&so->so_rcv.sb_sel);
504 return; /* void fn */
505 }
506 }
507 kp = kp->e_next;
508 }
509
510 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) {
511 selthreadclear(&so->so_snd.sb_sel);
512 selthreadclear(&so->so_rcv.sb_sel);
513 return;
514 }
515 if (head != NULL) {
516 if (so->so_state & SS_INCOMP) {
517 TAILQ_REMOVE(&head->so_incomp, so, so_list);
518 head->so_incqlen--;
519 } else if (so->so_state & SS_COMP) {
520 /*
521 * We must not decommission a socket that's
522 * on the accept(2) queue. If we do, then
523 * accept(2) may hang after select(2) indicated
524 * that the listening socket was ready.
525 */
526 selthreadclear(&so->so_snd.sb_sel);
527 selthreadclear(&so->so_rcv.sb_sel);
528 return;
529 } else {
530 panic("sofree: not queued");
531 }
532 head->so_qlen--;
533 so->so_state &= ~(SS_INCOMP|SS_COMP);
534 so->so_head = NULL;
535 }
536
537 selthreadclear(&so->so_snd.sb_sel);
538 sbrelease(&so->so_snd);
539 sorflush(so);
540 sfilter_term(so);
541 sodealloc(so);
542 }
543
544 /*
545 * Close a socket on last file table reference removal.
546 * Initiate disconnect if connected.
547 * Free socket when disconnect complete.
548 */
549 int
550 soclose(so)
551 register struct socket *so;
552 {
553 int s = splnet(); /* conservative */
554 int error = 0;
555 struct kextcb *kp;
556
557 #if FB31SIG
558 funsetown(so->so_pgid);
559 #endif
560 kp = sotokextcb(so);
561 while (kp)
562 { if (kp->e_soif && kp->e_soif->sf_soclose)
563 { error = (*kp->e_soif->sf_soclose)(so, kp);
564 if (error)
565 { splx(s);
566 return((error == EJUSTRETURN) ? 0 : error);
567 }
568 }
569 kp = kp->e_next;
570 }
571
572 if (so->so_options & SO_ACCEPTCONN) {
573 struct socket *sp, *sonext;
574
575 sp = TAILQ_FIRST(&so->so_incomp);
576 for (; sp != NULL; sp = sonext) {
577 sonext = TAILQ_NEXT(sp, so_list);
578 (void) soabort(sp);
579 }
580 for (sp = TAILQ_FIRST(&so->so_comp); sp != NULL; sp = sonext) {
581 sonext = TAILQ_NEXT(sp, so_list);
582 /* Dequeue from so_comp since sofree() won't do it */
583 TAILQ_REMOVE(&so->so_comp, sp, so_list);
584 so->so_qlen--;
585 sp->so_state &= ~SS_COMP;
586 sp->so_head = NULL;
587 (void) soabort(sp);
588 }
589
590 }
591 if (so->so_pcb == 0)
592 goto discard;
593 if (so->so_state & SS_ISCONNECTED) {
594 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
595 error = sodisconnect(so);
596 if (error)
597 goto drop;
598 }
599 if (so->so_options & SO_LINGER) {
600 if ((so->so_state & SS_ISDISCONNECTING) &&
601 (so->so_state & SS_NBIO))
602 goto drop;
603 while (so->so_state & SS_ISCONNECTED) {
604 error = tsleep((caddr_t)&so->so_timeo,
605 PSOCK | PCATCH, "soclos", so->so_linger);
606 if (error)
607 break;
608 }
609 }
610 }
611 drop:
612 if (so->so_pcb) {
613 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
614 if (error == 0)
615 error = error2;
616 }
617 discard:
618 if (so->so_pcb && so->so_state & SS_NOFDREF)
619 panic("soclose: NOFDREF");
620 so->so_state |= SS_NOFDREF;
621 so->so_proto->pr_domain->dom_refs--;
622 evsofree(so);
623 sofree(so);
624 splx(s);
625 return (error);
626 }
627
628 /*
629 * Must be called at splnet...
630 */
631 int
632 soabort(so)
633 struct socket *so;
634 {
635
636 return (*so->so_proto->pr_usrreqs->pru_abort)(so);
637 }
638
639 int
640 soaccept(so, nam)
641 register struct socket *so;
642 struct sockaddr **nam;
643 { int s = splnet();
644 int error;
645 struct kextcb *kp;
646
647 if ((so->so_state & SS_NOFDREF) == 0)
648 panic("soaccept: !NOFDREF");
649 so->so_state &= ~SS_NOFDREF;
650 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
651 if (error == 0)
652 { kp = sotokextcb(so);
653 while (kp) {
654 if (kp->e_soif && kp->e_soif->sf_soaccept)
655 { error = (*kp->e_soif->sf_soaccept)(so, nam, kp);
656 if (error)
657 { if (error == EJUSTRETURN)
658 break;
659 splx(s);
660 return(error);
661 }
662 }
663 kp = kp->e_next;
664 }
665 }
666
667
668 splx(s);
669 return (error);
670 }
671
672 int
673 soconnect(so, nam)
674 register struct socket *so;
675 struct sockaddr *nam;
676
677 {
678 int s;
679 int error;
680 struct proc *p = current_proc();
681 struct kextcb *kp;
682
683 if (so->so_options & SO_ACCEPTCONN)
684 return (EOPNOTSUPP);
685 s = splnet();
686 /*
687 * If protocol is connection-based, can only connect once.
688 * Otherwise, if connected, try to disconnect first.
689 * This allows user to disconnect by connecting to, e.g.,
690 * a null address.
691 */
692 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
693 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
694 (error = sodisconnect(so))))
695 error = EISCONN;
696 else {
697 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
698 if (error == 0)
699 {
700 kp = sotokextcb(so);
701 while (kp)
702 {
703 if (kp->e_soif && kp->e_soif->sf_soconnect)
704 { error = (*kp->e_soif->sf_soconnect)(so, nam, kp);
705 if (error)
706 { if (error == EJUSTRETURN)
707 break;
708 splx(s);
709 return(error);
710 }
711 }
712 kp = kp->e_next;
713 }
714 }
715 }
716
717 splx(s);
718 return (error);
719 }
720
721 int
722 soconnect2(so1, so2)
723 register struct socket *so1;
724 struct socket *so2;
725 {
726 int s = splnet();
727 int error;
728 struct kextcb *kp;
729
730 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
731 if (error == 0)
732 { kp = sotokextcb(so1);
733 while (kp)
734 { if (kp->e_soif && kp->e_soif->sf_soconnect2)
735 { error = (*kp->e_soif->sf_soconnect2)(so1, so2, kp);
736 if (error)
737 { if (error == EJUSTRETURN)
738 break;
739 splx(s);
740 return(error);
741 }
742 }
743 kp = kp->e_next;
744 }
745 }
746 splx(s);
747 return (error);
748 }
749
750 int
751 sodisconnect(so)
752 register struct socket *so;
753 {
754 int s = splnet();
755 int error;
756 struct kextcb *kp;
757
758 if ((so->so_state & SS_ISCONNECTED) == 0) {
759 error = ENOTCONN;
760 goto bad;
761 }
762 if (so->so_state & SS_ISDISCONNECTING) {
763 error = EALREADY;
764 goto bad;
765 }
766 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
767
768 if (error == 0)
769 { kp = sotokextcb(so);
770 while (kp)
771 { if (kp->e_soif && kp->e_soif->sf_sodisconnect)
772 { error = (*kp->e_soif->sf_sodisconnect)(so, kp);
773 if (error)
774 { if (error == EJUSTRETURN)
775 break;
776 splx(s);
777 return(error);
778 }
779 }
780 kp = kp->e_next;
781 }
782 }
783
784 bad:
785 splx(s);
786 return (error);
787 }
788
789 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_DONTWAIT : M_WAIT)
790 /*
791 * Send on a socket.
792 * If send must go all at once and message is larger than
793 * send buffering, then hard error.
794 * Lock against other senders.
795 * If must go all at once and not enough room now, then
796 * inform user that this would block and do nothing.
797 * Otherwise, if nonblocking, send as much as possible.
798 * The data to be sent is described by "uio" if nonzero,
799 * otherwise by the mbuf chain "top" (which must be null
800 * if uio is not). Data provided in mbuf chain must be small
801 * enough to send all at once.
802 *
803 * Returns nonzero on error, timeout or signal; callers
804 * must check for short counts if EINTR/ERESTART are returned.
805 * Data and control buffers are freed on return.
806 * Experiment:
807 * MSG_HOLD: go thru most of sosend(), but just enqueue the mbuf
808 * MSG_SEND: go thru as for MSG_HOLD on current fragment, then
809 * point at the mbuf chain being constructed and go from there.
810 */
811 int
812 sosend(so, addr, uio, top, control, flags)
813 register struct socket *so;
814 struct sockaddr *addr;
815 struct uio *uio;
816 struct mbuf *top;
817 struct mbuf *control;
818 int flags;
819
820 {
821 struct mbuf **mp;
822 register struct mbuf *m, *freelist = NULL;
823 register long space, len, resid;
824 int clen = 0, error, s, dontroute, mlen, sendflags;
825 int atomic = sosendallatonce(so) || top;
826 struct proc *p = current_proc();
827 struct kextcb *kp;
828
829 if (uio)
830 resid = uio->uio_resid;
831 else
832 resid = top->m_pkthdr.len;
833
834 KERNEL_DEBUG((DBG_FNC_SOSEND | DBG_FUNC_START),
835 so,
836 resid,
837 so->so_snd.sb_cc,
838 so->so_snd.sb_lowat,
839 so->so_snd.sb_hiwat);
840
841 /*
842 * In theory resid should be unsigned.
843 * However, space must be signed, as it might be less than 0
844 * if we over-committed, and we must use a signed comparison
845 * of space and resid. On the other hand, a negative resid
846 * causes us to loop sending 0-length segments to the protocol.
847 *
848 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
849 * type sockets since that's an error.
850 */
851 if (resid < 0 || so->so_type == SOCK_STREAM && (flags & MSG_EOR)) {
852 error = EINVAL;
853 goto out;
854 }
855
856 dontroute =
857 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
858 (so->so_proto->pr_flags & PR_ATOMIC);
859 if (p)
860 p->p_stats->p_ru.ru_msgsnd++;
861 if (control)
862 clen = control->m_len;
863 #define snderr(errno) { error = errno; splx(s); goto release; }
864
865 restart:
866 error = sblock(&so->so_snd, SBLOCKWAIT(flags));
867 if (error)
868 goto out;
869 do {
870 s = splnet();
871 if (so->so_state & SS_CANTSENDMORE)
872 snderr(EPIPE);
873 if (so->so_error) {
874 error = so->so_error;
875 so->so_error = 0;
876 splx(s);
877 goto release;
878 }
879 if ((so->so_state & SS_ISCONNECTED) == 0) {
880 /*
881 * `sendto' and `sendmsg' is allowed on a connection-
882 * based socket if it supports implied connect.
883 * Return ENOTCONN if not connected and no address is
884 * supplied.
885 */
886 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
887 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
888 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
889 !(resid == 0 && clen != 0))
890 snderr(ENOTCONN);
891 } else if (addr == 0 && !(flags&MSG_HOLD))
892 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ?
893 ENOTCONN : EDESTADDRREQ);
894 }
895 space = sbspace(&so->so_snd);
896 if (flags & MSG_OOB)
897 space += 1024;
898 if ((atomic && resid > so->so_snd.sb_hiwat) ||
899 clen > so->so_snd.sb_hiwat)
900 snderr(EMSGSIZE);
901 if (space < resid + clen && uio &&
902 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
903 if (so->so_state & SS_NBIO)
904 snderr(EWOULDBLOCK);
905 sbunlock(&so->so_snd);
906 error = sbwait(&so->so_snd);
907 splx(s);
908 if (error)
909 goto out;
910 goto restart;
911 }
912 splx(s);
913 mp = &top;
914 space -= clen;
915
916 do {
917 if (uio == NULL) {
918 /*
919 * Data is prepackaged in "top".
920 */
921 resid = 0;
922 if (flags & MSG_EOR)
923 top->m_flags |= M_EOR;
924 } else {
925 boolean_t dropped_funnel = FALSE;
926 int chainlength;
927 int bytes_to_copy;
928
929 bytes_to_copy = min(resid, space);
930
931 if (sosendminchain > 0) {
932 if (bytes_to_copy >= sosendminchain) {
933 dropped_funnel = TRUE;
934 (void)thread_funnel_set(network_flock, FALSE);
935 }
936 chainlength = 0;
937 } else
938 chainlength = sosendmaxchain;
939
940 do {
941
942 if (bytes_to_copy >= MINCLSIZE) {
943 if ((m = freelist) == NULL) {
944 int num_needed;
945 int hdrs_needed = 0;
946
947 if (top == 0)
948 hdrs_needed = 1;
949 num_needed = bytes_to_copy / MCLBYTES;
950
951 if ((bytes_to_copy - (num_needed * MCLBYTES)) >= MINCLSIZE)
952 num_needed++;
953
954 if ((freelist = m_getpackets(num_needed, hdrs_needed, M_WAIT)) == NULL)
955 goto getpackets_failed;
956 m = freelist;
957 }
958 freelist = m->m_next;
959 m->m_next = NULL;
960
961 mlen = MCLBYTES;
962 len = min(mlen, bytes_to_copy);
963 } else {
964 getpackets_failed:
965 if (top == 0) {
966 MGETHDR(m, M_WAIT, MT_DATA);
967 mlen = MHLEN;
968 m->m_pkthdr.len = 0;
969 m->m_pkthdr.rcvif = (struct ifnet *)0;
970 } else {
971 MGET(m, M_WAIT, MT_DATA);
972 mlen = MLEN;
973 }
974 len = min(mlen, bytes_to_copy);
975 /*
976 * For datagram protocols, leave room
977 * for protocol headers in first mbuf.
978 */
979 if (atomic && top == 0 && len < mlen)
980 MH_ALIGN(m, len);
981 }
982 chainlength += len;
983
984 space -= len;
985
986 error = uiomove(mtod(m, caddr_t), (int)len, uio);
987
988 resid = uio->uio_resid;
989
990 m->m_len = len;
991 *mp = m;
992 top->m_pkthdr.len += len;
993 if (error)
994 break;
995 mp = &m->m_next;
996 if (resid <= 0) {
997 if (flags & MSG_EOR)
998 top->m_flags |= M_EOR;
999 break;
1000 }
1001 bytes_to_copy = min(resid, space);
1002
1003 } while (space > 0 && (chainlength < sosendmaxchain || atomic || resid < MINCLSIZE));
1004
1005 if (dropped_funnel == TRUE)
1006 (void)thread_funnel_set(network_flock, TRUE);
1007 if (error)
1008 goto release;
1009 }
1010
1011 if (flags & (MSG_HOLD|MSG_SEND))
1012 { /* Enqueue for later, go away if HOLD */
1013 register struct mbuf *mb1;
1014 if (so->so_temp && (flags & MSG_FLUSH))
1015 { m_freem(so->so_temp);
1016 so->so_temp = NULL;
1017 }
1018 if (so->so_temp)
1019 so->so_tail->m_next = top;
1020 else
1021 so->so_temp = top;
1022 mb1 = top;
1023 while (mb1->m_next)
1024 mb1 = mb1->m_next;
1025 so->so_tail = mb1;
1026 if (flags&MSG_HOLD)
1027 { top = NULL;
1028 goto release;
1029 }
1030 top = so->so_temp;
1031 }
1032 if (dontroute)
1033 so->so_options |= SO_DONTROUTE;
1034 s = splnet(); /* XXX */
1035 kp = sotokextcb(so);
1036 /* Compute flags here, for pru_send and NKEs */
1037 sendflags = (flags & MSG_OOB) ? PRUS_OOB :
1038 /*
1039 * If the user set MSG_EOF, the protocol
1040 * understands this flag and nothing left to
1041 * send then use PRU_SEND_EOF instead of PRU_SEND.
1042 */
1043 ((flags & MSG_EOF) &&
1044 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
1045 (resid <= 0)) ?
1046 PRUS_EOF :
1047 /* If there is more to send set PRUS_MORETOCOME */
1048 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0;
1049 while (kp)
1050 { if (kp->e_soif && kp->e_soif->sf_sosend)
1051 { error = (*kp->e_soif->sf_sosend)(so, &addr,
1052 &uio, &top,
1053 &control,
1054 &sendflags,
1055 kp);
1056 if (error)
1057 { splx(s);
1058 if (error == EJUSTRETURN)
1059 { sbunlock(&so->so_snd);
1060
1061 if (freelist)
1062 m_freem_list(freelist);
1063 return(0);
1064 }
1065 goto release;
1066 }
1067 }
1068 kp = kp->e_next;
1069 }
1070
1071 error = (*so->so_proto->pr_usrreqs->pru_send)(so,
1072 sendflags, top, addr, control, p);
1073 splx(s);
1074 if (flags & MSG_SEND)
1075 so->so_temp = NULL;
1076
1077 if (dontroute)
1078 so->so_options &= ~SO_DONTROUTE;
1079 clen = 0;
1080 control = 0;
1081 top = 0;
1082 mp = &top;
1083 if (error)
1084 goto release;
1085 } while (resid && space > 0);
1086 } while (resid);
1087
1088 release:
1089 sbunlock(&so->so_snd);
1090 out:
1091 if (top)
1092 m_freem(top);
1093 if (control)
1094 m_freem(control);
1095 if (freelist)
1096 m_freem_list(freelist);
1097
1098 KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END,
1099 so,
1100 resid,
1101 so->so_snd.sb_cc,
1102 space,
1103 error);
1104
1105 return (error);
1106 }
1107
1108 /*
1109 * Implement receive operations on a socket.
1110 * We depend on the way that records are added to the sockbuf
1111 * by sbappend*. In particular, each record (mbufs linked through m_next)
1112 * must begin with an address if the protocol so specifies,
1113 * followed by an optional mbuf or mbufs containing ancillary data,
1114 * and then zero or more mbufs of data.
1115 * In order to avoid blocking network interrupts for the entire time here,
1116 * we splx() while doing the actual copy to user space.
1117 * Although the sockbuf is locked, new data may still be appended,
1118 * and thus we must maintain consistency of the sockbuf during that time.
1119 *
1120 * The caller may receive the data as a single mbuf chain by supplying
1121 * an mbuf **mp0 for use in returning the chain. The uio is then used
1122 * only for the count in uio_resid.
1123 */
1124 int
1125 soreceive(so, psa, uio, mp0, controlp, flagsp)
1126 register struct socket *so;
1127 struct sockaddr **psa;
1128 struct uio *uio;
1129 struct mbuf **mp0;
1130 struct mbuf **controlp;
1131 int *flagsp;
1132 {
1133 register struct mbuf *m, **mp;
1134 register struct mbuf *free_list, *ml;
1135 register int flags, len, error, s, offset;
1136 struct protosw *pr = so->so_proto;
1137 struct mbuf *nextrecord;
1138 int moff, type = 0;
1139 int orig_resid = uio->uio_resid;
1140 struct kextcb *kp;
1141
1142 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START,
1143 so,
1144 uio->uio_resid,
1145 so->so_rcv.sb_cc,
1146 so->so_rcv.sb_lowat,
1147 so->so_rcv.sb_hiwat);
1148
1149 kp = sotokextcb(so);
1150 while (kp)
1151 { if (kp->e_soif && kp->e_soif->sf_soreceive)
1152 { error = (*kp->e_soif->sf_soreceive)(so, psa, &uio,
1153 mp0, controlp,
1154 flagsp, kp);
1155 if (error)
1156 return((error == EJUSTRETURN) ? 0 : error);
1157 }
1158 kp = kp->e_next;
1159 }
1160
1161 mp = mp0;
1162 if (psa)
1163 *psa = 0;
1164 if (controlp)
1165 *controlp = 0;
1166 if (flagsp)
1167 flags = *flagsp &~ MSG_EOR;
1168 else
1169 flags = 0;
1170 /*
1171 * When SO_WANTOOBFLAG is set we try to get out-of-band data
1172 * regardless of the flags argument. Here is the case were
1173 * out-of-band data is not inline.
1174 */
1175 if ((flags & MSG_OOB) ||
1176 ((so->so_options & SO_WANTOOBFLAG) != 0 &&
1177 (so->so_options & SO_OOBINLINE) == 0 &&
1178 (so->so_oobmark || (so->so_state & SS_RCVATMARK)))) {
1179 m = m_get(M_WAIT, MT_DATA);
1180 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
1181 if (error)
1182 goto bad;
1183 do {
1184 error = uiomove(mtod(m, caddr_t),
1185 (int) min(uio->uio_resid, m->m_len), uio);
1186 m = m_free(m);
1187 } while (uio->uio_resid && error == 0 && m);
1188 bad:
1189 if (m)
1190 m_freem(m);
1191 if ((so->so_options & SO_WANTOOBFLAG) != 0) {
1192 if (error == EWOULDBLOCK || error == EINVAL) {
1193 /*
1194 * Let's try to get normal data:
1195 * EWOULDBLOCK: out-of-band data not receive yet;
1196 * EINVAL: out-of-band data already read.
1197 */
1198 error = 0;
1199 goto nooob;
1200 } else if (error == 0 && flagsp)
1201 *flagsp |= MSG_OOB;
1202 }
1203 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1204 return (error);
1205 }
1206 nooob:
1207 if (mp)
1208 *mp = (struct mbuf *)0;
1209 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
1210 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
1211
1212 restart:
1213 if (error = sblock(&so->so_rcv, SBLOCKWAIT(flags)))
1214 {
1215 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1216 return (error);
1217 }
1218 s = splnet();
1219
1220 m = so->so_rcv.sb_mb;
1221 /*
1222 * If we have less data than requested, block awaiting more
1223 * (subject to any timeout) if:
1224 * 1. the current count is less than the low water mark, or
1225 * 2. MSG_WAITALL is set, and it is possible to do the entire
1226 * receive operation at once if we block (resid <= hiwat).
1227 * 3. MSG_DONTWAIT is not set
1228 * If MSG_WAITALL is set but resid is larger than the receive buffer,
1229 * we have to do the receive in sections, and thus risk returning
1230 * a short count if a timeout or signal occurs after we start.
1231 */
1232 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
1233 so->so_rcv.sb_cc < uio->uio_resid) &&
1234 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
1235 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
1236 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
1237 KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
1238 if (so->so_error) {
1239 if (m)
1240 goto dontblock;
1241 error = so->so_error;
1242 if ((flags & MSG_PEEK) == 0)
1243 so->so_error = 0;
1244 goto release;
1245 }
1246 if (so->so_state & SS_CANTRCVMORE) {
1247 if (m)
1248 goto dontblock;
1249 else
1250 goto release;
1251 }
1252 for (; m; m = m->m_next)
1253 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
1254 m = so->so_rcv.sb_mb;
1255 goto dontblock;
1256 }
1257 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
1258 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
1259 error = ENOTCONN;
1260 goto release;
1261 }
1262 if (uio->uio_resid == 0)
1263 goto release;
1264 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
1265 error = EWOULDBLOCK;
1266 goto release;
1267 }
1268 sbunlock(&so->so_rcv);
1269 if (socket_debug)
1270 printf("Waiting for socket data\n");
1271 error = sbwait(&so->so_rcv);
1272 if (socket_debug)
1273 printf("SORECEIVE - sbwait returned %d\n", error);
1274 splx(s);
1275 if (error)
1276 {
1277 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1278 return (error);
1279 }
1280 goto restart;
1281 }
1282 dontblock:
1283 #ifdef notyet /* XXXX */
1284 if (uio->uio_procp)
1285 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
1286 #endif
1287 nextrecord = m->m_nextpkt;
1288 if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) {
1289 KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
1290 orig_resid = 0;
1291 if (psa)
1292 *psa = dup_sockaddr(mtod(m, struct sockaddr *),
1293 mp0 == 0);
1294 if (flags & MSG_PEEK) {
1295 m = m->m_next;
1296 } else {
1297 sbfree(&so->so_rcv, m);
1298 MFREE(m, so->so_rcv.sb_mb);
1299 m = so->so_rcv.sb_mb;
1300 }
1301 }
1302 while (m && m->m_type == MT_CONTROL && error == 0) {
1303 if (flags & MSG_PEEK) {
1304 if (controlp)
1305 *controlp = m_copy(m, 0, m->m_len);
1306 m = m->m_next;
1307 } else {
1308 sbfree(&so->so_rcv, m);
1309 if (controlp) {
1310 if (pr->pr_domain->dom_externalize &&
1311 mtod(m, struct cmsghdr *)->cmsg_type ==
1312 SCM_RIGHTS)
1313 error = (*pr->pr_domain->dom_externalize)(m);
1314 *controlp = m;
1315 so->so_rcv.sb_mb = m->m_next;
1316 m->m_next = 0;
1317 m = so->so_rcv.sb_mb;
1318 } else {
1319 MFREE(m, so->so_rcv.sb_mb);
1320 m = so->so_rcv.sb_mb;
1321 }
1322 }
1323 if (controlp) {
1324 orig_resid = 0;
1325 controlp = &(*controlp)->m_next;
1326 }
1327 }
1328 if (m) {
1329 if ((flags & MSG_PEEK) == 0)
1330 m->m_nextpkt = nextrecord;
1331 type = m->m_type;
1332 if (type == MT_OOBDATA)
1333 flags |= MSG_OOB;
1334 }
1335 moff = 0;
1336 offset = 0;
1337
1338 free_list = m;
1339 ml = (struct mbuf *)0;
1340
1341 while (m && uio->uio_resid > 0 && error == 0) {
1342 if (m->m_type == MT_OOBDATA) {
1343 if (type != MT_OOBDATA)
1344 break;
1345 } else if (type == MT_OOBDATA)
1346 break;
1347 #if 0
1348 /*
1349 * This assertion needs rework. The trouble is Appletalk is uses many
1350 * mbuf types (NOT listed in mbuf.h!) which will trigger this panic.
1351 * For now just remove the assertion... CSM 9/98
1352 */
1353 else
1354 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
1355 ("receive 3"));
1356 #endif
1357 /*
1358 * Make sure to allways set MSG_OOB event when getting
1359 * out of band data inline.
1360 */
1361 if ((so->so_options & SO_WANTOOBFLAG) != 0 &&
1362 (so->so_options & SO_OOBINLINE) != 0 &&
1363 (so->so_state & SS_RCVATMARK) != 0) {
1364 flags |= MSG_OOB;
1365 }
1366 so->so_state &= ~SS_RCVATMARK;
1367 len = uio->uio_resid;
1368 if (so->so_oobmark && len > so->so_oobmark - offset)
1369 len = so->so_oobmark - offset;
1370 if (len > m->m_len - moff)
1371 len = m->m_len - moff;
1372 /*
1373 * If mp is set, just pass back the mbufs.
1374 * Otherwise copy them out via the uio, then free.
1375 * Sockbuf must be consistent here (points to current mbuf,
1376 * it points to next record) when we drop priority;
1377 * we must note any additions to the sockbuf when we
1378 * block interrupts again.
1379 */
1380 if (mp == 0) {
1381 splx(s);
1382 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
1383 s = splnet();
1384 if (error)
1385 goto release;
1386 } else
1387 uio->uio_resid -= len;
1388 if (len == m->m_len - moff) {
1389 if (m->m_flags & M_EOR)
1390 flags |= MSG_EOR;
1391 if (flags & MSG_PEEK) {
1392 m = m->m_next;
1393 moff = 0;
1394 } else {
1395 nextrecord = m->m_nextpkt;
1396 sbfree(&so->so_rcv, m);
1397 if (mp) {
1398 *mp = m;
1399 mp = &m->m_next;
1400 so->so_rcv.sb_mb = m = m->m_next;
1401 *mp = (struct mbuf *)0;
1402 } else {
1403 m->m_nextpkt = 0;
1404 ml = m;
1405 m = m->m_next;
1406 }
1407 if (m)
1408 m->m_nextpkt = nextrecord;
1409 }
1410 } else {
1411 if (flags & MSG_PEEK)
1412 moff += len;
1413 else {
1414 if (mp)
1415 *mp = m_copym(m, 0, len, M_WAIT);
1416 m->m_data += len;
1417 m->m_len -= len;
1418 so->so_rcv.sb_cc -= len;
1419 }
1420 }
1421 if (so->so_oobmark) {
1422 if ((flags & MSG_PEEK) == 0) {
1423 so->so_oobmark -= len;
1424 if (so->so_oobmark == 0) {
1425 so->so_state |= SS_RCVATMARK;
1426 postevent(so, 0, EV_OOB);
1427 break;
1428 }
1429 } else {
1430 offset += len;
1431 if (offset == so->so_oobmark)
1432 break;
1433 }
1434 }
1435 if (flags & MSG_EOR)
1436 break;
1437 /*
1438 * If the MSG_WAITALL flag is set (for non-atomic socket),
1439 * we must not quit until "uio->uio_resid == 0" or an error
1440 * termination. If a signal/timeout occurs, return
1441 * with a short count but without error.
1442 * Keep sockbuf locked against other readers.
1443 */
1444 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
1445 !sosendallatonce(so) && !nextrecord) {
1446 if (so->so_error || so->so_state & SS_CANTRCVMORE)
1447 break;
1448
1449 if (ml) {
1450 so->so_rcv.sb_mb = ml->m_next;
1451 ml->m_next = (struct mbuf *)0;
1452 m_freem_list(free_list);
1453 }
1454 error = sbwait(&so->so_rcv);
1455 if (error) {
1456 sbunlock(&so->so_rcv);
1457 splx(s);
1458 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, 0,0,0,0,0);
1459 return (0);
1460 }
1461 m = so->so_rcv.sb_mb;
1462 if (m) {
1463 nextrecord = m->m_nextpkt;
1464 free_list = m;
1465 }
1466 ml = (struct mbuf *)0;
1467 }
1468 }
1469 if (ml) {
1470 so->so_rcv.sb_mb = ml->m_next;
1471 ml->m_next = (struct mbuf *)0;
1472 m_freem_list(free_list);
1473 }
1474
1475 if (m && pr->pr_flags & PR_ATOMIC) {
1476 if (so->so_options & SO_DONTTRUNC)
1477 flags |= MSG_RCVMORE;
1478 else
1479 { flags |= MSG_TRUNC;
1480 if ((flags & MSG_PEEK) == 0)
1481 (void) sbdroprecord(&so->so_rcv);
1482 }
1483 }
1484 if ((flags & MSG_PEEK) == 0) {
1485 if (m == 0)
1486 so->so_rcv.sb_mb = nextrecord;
1487 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
1488 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1489 }
1490 if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0)
1491 flags |= MSG_HAVEMORE;
1492 if (orig_resid == uio->uio_resid && orig_resid &&
1493 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
1494 sbunlock(&so->so_rcv);
1495 splx(s);
1496 goto restart;
1497 }
1498
1499 if (flagsp)
1500 *flagsp |= flags;
1501 release:
1502 sbunlock(&so->so_rcv);
1503 splx(s);
1504
1505 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END,
1506 so,
1507 uio->uio_resid,
1508 so->so_rcv.sb_cc,
1509 0,
1510 error);
1511
1512 return (error);
1513 }
1514
1515 int
1516 soshutdown(so, how)
1517 register struct socket *so;
1518 register int how;
1519 {
1520 register struct protosw *pr = so->so_proto;
1521 struct kextcb *kp;
1522 int ret;
1523
1524
1525 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_START, 0,0,0,0,0);
1526 kp = sotokextcb(so);
1527 while (kp)
1528 { if (kp->e_soif && kp->e_soif->sf_soshutdown)
1529 { ret = (*kp->e_soif->sf_soshutdown)(so, how, kp);
1530 if (ret)
1531 return((ret == EJUSTRETURN) ? 0 : ret);
1532 }
1533 kp = kp->e_next;
1534 }
1535
1536 how++;
1537 if (how & FREAD) {
1538 sorflush(so);
1539 postevent(so, 0, EV_RCLOSED);
1540 }
1541 if (how & FWRITE) {
1542 ret = ((*pr->pr_usrreqs->pru_shutdown)(so));
1543 postevent(so, 0, EV_WCLOSED);
1544 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
1545 return(ret);
1546 }
1547
1548 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
1549 return (0);
1550 }
1551
1552 void
1553 sorflush(so)
1554 register struct socket *so;
1555 {
1556 register struct sockbuf *sb = &so->so_rcv;
1557 register struct protosw *pr = so->so_proto;
1558 register int s, error;
1559 struct sockbuf asb;
1560 struct kextcb *kp;
1561
1562 kp = sotokextcb(so);
1563 while (kp)
1564 { if (kp->e_soif && kp->e_soif->sf_sorflush)
1565 { if ((*kp->e_soif->sf_sorflush)(so, kp))
1566 return;
1567 }
1568 kp = kp->e_next;
1569 }
1570
1571 sb->sb_flags |= SB_NOINTR;
1572 (void) sblock(sb, M_WAIT);
1573 s = splimp();
1574 socantrcvmore(so);
1575 sbunlock(sb);
1576 selthreadclear(&sb->sb_sel);
1577 asb = *sb;
1578 bzero((caddr_t)sb, sizeof (*sb));
1579 splx(s);
1580 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
1581 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
1582 sbrelease(&asb);
1583 }
1584
1585 /*
1586 * Perhaps this routine, and sooptcopyout(), below, ought to come in
1587 * an additional variant to handle the case where the option value needs
1588 * to be some kind of integer, but not a specific size.
1589 * In addition to their use here, these functions are also called by the
1590 * protocol-level pr_ctloutput() routines.
1591 */
1592 int
1593 sooptcopyin(sopt, buf, len, minlen)
1594 struct sockopt *sopt;
1595 void *buf;
1596 size_t len;
1597 size_t minlen;
1598 {
1599 size_t valsize;
1600
1601 /*
1602 * If the user gives us more than we wanted, we ignore it,
1603 * but if we don't get the minimum length the caller
1604 * wants, we return EINVAL. On success, sopt->sopt_valsize
1605 * is set to however much we actually retrieved.
1606 */
1607 if ((valsize = sopt->sopt_valsize) < minlen)
1608 return EINVAL;
1609 if (valsize > len)
1610 sopt->sopt_valsize = valsize = len;
1611
1612 if (sopt->sopt_p != 0)
1613 return (copyin(sopt->sopt_val, buf, valsize));
1614
1615 bcopy(sopt->sopt_val, buf, valsize);
1616 return 0;
1617 }
1618
1619 int
1620 sosetopt(so, sopt)
1621 struct socket *so;
1622 struct sockopt *sopt;
1623 {
1624 int error, optval;
1625 struct linger l;
1626 struct timeval tv;
1627 short val;
1628 struct kextcb *kp;
1629
1630 kp = sotokextcb(so);
1631 while (kp)
1632 { if (kp->e_soif && kp->e_soif->sf_socontrol)
1633 { error = (*kp->e_soif->sf_socontrol)(so, sopt, kp);
1634 if (error)
1635 return((error == EJUSTRETURN) ? 0 : error);
1636 }
1637 kp = kp->e_next;
1638 }
1639
1640 error = 0;
1641 if (sopt->sopt_level != SOL_SOCKET) {
1642 if (so->so_proto && so->so_proto->pr_ctloutput)
1643 return ((*so->so_proto->pr_ctloutput)
1644 (so, sopt));
1645 error = ENOPROTOOPT;
1646 } else {
1647 switch (sopt->sopt_name) {
1648 case SO_LINGER:
1649 error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
1650 if (error)
1651 goto bad;
1652
1653 so->so_linger = l.l_linger;
1654 if (l.l_onoff)
1655 so->so_options |= SO_LINGER;
1656 else
1657 so->so_options &= ~SO_LINGER;
1658 break;
1659
1660 case SO_DEBUG:
1661 case SO_KEEPALIVE:
1662 case SO_DONTROUTE:
1663 case SO_USELOOPBACK:
1664 case SO_BROADCAST:
1665 case SO_REUSEADDR:
1666 case SO_REUSEPORT:
1667 case SO_OOBINLINE:
1668 case SO_TIMESTAMP:
1669 case SO_DONTTRUNC:
1670 case SO_WANTMORE:
1671 case SO_WANTOOBFLAG:
1672 error = sooptcopyin(sopt, &optval, sizeof optval,
1673 sizeof optval);
1674 if (error)
1675 goto bad;
1676 if (optval)
1677 so->so_options |= sopt->sopt_name;
1678 else
1679 so->so_options &= ~sopt->sopt_name;
1680 break;
1681
1682 case SO_SNDBUF:
1683 case SO_RCVBUF:
1684 case SO_SNDLOWAT:
1685 case SO_RCVLOWAT:
1686 error = sooptcopyin(sopt, &optval, sizeof optval,
1687 sizeof optval);
1688 if (error)
1689 goto bad;
1690
1691 /*
1692 * Values < 1 make no sense for any of these
1693 * options, so disallow them.
1694 */
1695 if (optval < 1) {
1696 error = EINVAL;
1697 goto bad;
1698 }
1699
1700 switch (sopt->sopt_name) {
1701 case SO_SNDBUF:
1702 case SO_RCVBUF:
1703 if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
1704 &so->so_snd : &so->so_rcv,
1705 (u_long) optval) == 0) {
1706 error = ENOBUFS;
1707 goto bad;
1708 }
1709 break;
1710
1711 /*
1712 * Make sure the low-water is never greater than
1713 * the high-water.
1714 */
1715 case SO_SNDLOWAT:
1716 so->so_snd.sb_lowat =
1717 (optval > so->so_snd.sb_hiwat) ?
1718 so->so_snd.sb_hiwat : optval;
1719 break;
1720 case SO_RCVLOWAT:
1721 so->so_rcv.sb_lowat =
1722 (optval > so->so_rcv.sb_hiwat) ?
1723 so->so_rcv.sb_hiwat : optval;
1724 break;
1725 }
1726 break;
1727
1728 case SO_SNDTIMEO:
1729 case SO_RCVTIMEO:
1730 error = sooptcopyin(sopt, &tv, sizeof tv,
1731 sizeof tv);
1732 if (error)
1733 goto bad;
1734
1735 if (tv.tv_sec > SHRT_MAX / hz - hz) {
1736 error = EDOM;
1737 goto bad;
1738 }
1739 val = tv.tv_sec * hz + tv.tv_usec / tick;
1740
1741 switch (sopt->sopt_name) {
1742 case SO_SNDTIMEO:
1743 so->so_snd.sb_timeo = val;
1744 break;
1745 case SO_RCVTIMEO:
1746 so->so_rcv.sb_timeo = val;
1747 break;
1748 }
1749 break;
1750
1751 case SO_NKE:
1752 { struct so_nke nke;
1753 struct NFDescriptor *nf1, *nf2 = NULL;
1754
1755 error = sooptcopyin(sopt, &nke,
1756 sizeof nke, sizeof nke);
1757 if (error)
1758 goto bad;
1759
1760 error = nke_insert(so, &nke);
1761 break;
1762 }
1763
1764 default:
1765 error = ENOPROTOOPT;
1766 break;
1767 }
1768 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
1769 (void) ((*so->so_proto->pr_ctloutput)
1770 (so, sopt));
1771 }
1772 }
1773 bad:
1774 return (error);
1775 }
1776
1777 /* Helper routine for getsockopt */
1778 int
1779 sooptcopyout(sopt, buf, len)
1780 struct sockopt *sopt;
1781 void *buf;
1782 size_t len;
1783 {
1784 int error;
1785 size_t valsize;
1786
1787 error = 0;
1788
1789 /*
1790 * Documented get behavior is that we always return a value,
1791 * possibly truncated to fit in the user's buffer.
1792 * Traditional behavior is that we always tell the user
1793 * precisely how much we copied, rather than something useful
1794 * like the total amount we had available for her.
1795 * Note that this interface is not idempotent; the entire answer must
1796 * generated ahead of time.
1797 */
1798 valsize = min(len, sopt->sopt_valsize);
1799 sopt->sopt_valsize = valsize;
1800 if (sopt->sopt_val != 0) {
1801 if (sopt->sopt_p != 0)
1802 error = copyout(buf, sopt->sopt_val, valsize);
1803 else
1804 bcopy(buf, sopt->sopt_val, valsize);
1805 }
1806 return error;
1807 }
1808
1809 int
1810 sogetopt(so, sopt)
1811 struct socket *so;
1812 struct sockopt *sopt;
1813 {
1814 int error, optval;
1815 struct linger l;
1816 struct timeval tv;
1817 struct mbuf *m;
1818 struct kextcb *kp;
1819
1820 kp = sotokextcb(so);
1821 while (kp)
1822 { if (kp->e_soif && kp->e_soif->sf_socontrol)
1823 { error = (*kp->e_soif->sf_socontrol)(so, sopt, kp);
1824 if (error)
1825 return((error == EJUSTRETURN) ? 0 : error);
1826 }
1827 kp = kp->e_next;
1828 }
1829
1830 error = 0;
1831 if (sopt->sopt_level != SOL_SOCKET) {
1832 if (so->so_proto && so->so_proto->pr_ctloutput) {
1833 return ((*so->so_proto->pr_ctloutput)
1834 (so, sopt));
1835 } else
1836 return (ENOPROTOOPT);
1837 } else {
1838 switch (sopt->sopt_name) {
1839 case SO_LINGER:
1840 l.l_onoff = so->so_options & SO_LINGER;
1841 l.l_linger = so->so_linger;
1842 error = sooptcopyout(sopt, &l, sizeof l);
1843 break;
1844
1845 case SO_USELOOPBACK:
1846 case SO_DONTROUTE:
1847 case SO_DEBUG:
1848 case SO_KEEPALIVE:
1849 case SO_REUSEADDR:
1850 case SO_REUSEPORT:
1851 case SO_BROADCAST:
1852 case SO_OOBINLINE:
1853 case SO_TIMESTAMP:
1854 case SO_DONTTRUNC:
1855 case SO_WANTMORE:
1856 case SO_WANTOOBFLAG:
1857 optval = so->so_options & sopt->sopt_name;
1858 integer:
1859 error = sooptcopyout(sopt, &optval, sizeof optval);
1860 break;
1861
1862 case SO_TYPE:
1863 optval = so->so_type;
1864 goto integer;
1865
1866 case SO_NREAD:
1867 { int pkt_total;
1868 struct mbuf *m1;
1869
1870 pkt_total = 0;
1871 m1 = so->so_rcv.sb_mb;
1872 if (so->so_proto->pr_flags & PR_ATOMIC)
1873 {
1874 #if 0
1875 kprintf("SKT CC: %d\n", so->so_rcv.sb_cc);
1876 #endif
1877 while (m1)
1878 { if (m1->m_type == MT_DATA)
1879 pkt_total += m1->m_len;
1880 #if 0
1881 kprintf("CNT: %d/%d\n", m1->m_len, pkt_total);
1882 #endif
1883 m1 = m1->m_next;
1884 }
1885 optval = pkt_total;
1886 } else
1887 optval = so->so_rcv.sb_cc;
1888 #if 0
1889 kprintf("RTN: %d\n", optval);
1890 #endif
1891 goto integer;
1892 }
1893 case SO_ERROR:
1894 optval = so->so_error;
1895 so->so_error = 0;
1896 goto integer;
1897
1898 case SO_SNDBUF:
1899 optval = so->so_snd.sb_hiwat;
1900 goto integer;
1901
1902 case SO_RCVBUF:
1903 optval = so->so_rcv.sb_hiwat;
1904 goto integer;
1905
1906 case SO_SNDLOWAT:
1907 optval = so->so_snd.sb_lowat;
1908 goto integer;
1909
1910 case SO_RCVLOWAT:
1911 optval = so->so_rcv.sb_lowat;
1912 goto integer;
1913
1914 case SO_SNDTIMEO:
1915 case SO_RCVTIMEO:
1916 optval = (sopt->sopt_name == SO_SNDTIMEO ?
1917 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
1918
1919 tv.tv_sec = optval / hz;
1920 tv.tv_usec = (optval % hz) * tick;
1921 error = sooptcopyout(sopt, &tv, sizeof tv);
1922 break;
1923
1924 default:
1925 error = ENOPROTOOPT;
1926 break;
1927 }
1928 return (error);
1929 }
1930 }
1931
1932 void
1933 sohasoutofband(so)
1934 register struct socket *so;
1935 {
1936 struct proc *p;
1937
1938 struct kextcb *kp;
1939
1940 kp = sotokextcb(so);
1941 while (kp)
1942 { if (kp->e_soif && kp->e_soif->sf_sohasoutofband)
1943 { if ((*kp->e_soif->sf_sohasoutofband)(so, kp))
1944 return;
1945 }
1946 kp = kp->e_next;
1947 }
1948 if (so->so_pgid < 0)
1949 gsignal(-so->so_pgid, SIGURG);
1950 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
1951 psignal(p, SIGURG);
1952 selwakeup(&so->so_rcv.sb_sel);
1953 }
1954
1955 /*
1956 * Network filter support
1957 */
1958 /* Run the list of filters, creating extension control blocks */
1959 sfilter_init(register struct socket *so)
1960 { struct kextcb *kp, **kpp;
1961 struct protosw *prp;
1962 struct NFDescriptor *nfp;
1963
1964 prp = so->so_proto;
1965 nfp = prp->pr_sfilter.tqh_first; /* non-null */
1966 kpp = &so->so_ext;
1967 kp = NULL;
1968 while (nfp)
1969 { MALLOC(kp, struct kextcb *, sizeof(*kp),
1970 M_TEMP, M_WAITOK);
1971 if (kp == NULL)
1972 return(ENOBUFS); /* so_free will clean up */
1973 *kpp = kp;
1974 kpp = &kp->e_next;
1975 kp->e_next = NULL;
1976 kp->e_fcb = NULL;
1977 kp->e_nfd = nfp;
1978 kp->e_soif = nfp->nf_soif;
1979 kp->e_sout = nfp->nf_soutil;
1980 /*
1981 * Ignore return value for create
1982 * Everyone gets a chance at startup
1983 */
1984 if (kp->e_soif && kp->e_soif->sf_socreate)
1985 (*kp->e_soif->sf_socreate)(so, prp, kp);
1986 nfp = nfp->nf_next.tqe_next;
1987 }
1988 return(0);
1989 }
1990
1991
1992 /*
1993 * Run the list of filters, freeing extension control blocks
1994 * Assumes the soif/soutil blocks have been handled.
1995 */
1996 sfilter_term(struct socket *so)
1997 { struct kextcb *kp, *kp1;
1998
1999 kp = so->so_ext;
2000 while (kp)
2001 { kp1 = kp->e_next;
2002 /*
2003 * Ignore return code on termination; everyone must
2004 * get terminated.
2005 */
2006 if (kp->e_soif && kp->e_soif->sf_sofree)
2007 kp->e_soif->sf_sofree(so, kp);
2008 FREE(kp, M_TEMP);
2009 kp = kp1;
2010 }
2011 return(0);
2012 }
2013
2014
2015 int
2016 sopoll(struct socket *so, int events, struct ucred *cred, void * wql)
2017 {
2018 struct proc *p = current_proc();
2019 int revents = 0;
2020 int s = splnet();
2021
2022 if (events & (POLLIN | POLLRDNORM))
2023 if (soreadable(so))
2024 revents |= events & (POLLIN | POLLRDNORM);
2025
2026 if (events & (POLLOUT | POLLWRNORM))
2027 if (sowriteable(so))
2028 revents |= events & (POLLOUT | POLLWRNORM);
2029
2030 if (events & (POLLPRI | POLLRDBAND))
2031 if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
2032 revents |= events & (POLLPRI | POLLRDBAND);
2033
2034 if (revents == 0) {
2035 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
2036 so->so_rcv.sb_flags |= SB_SEL;
2037 selrecord(p, &so->so_rcv.sb_sel, wql);
2038 }
2039
2040 if (events & (POLLOUT | POLLWRNORM)) {
2041 so->so_snd.sb_flags |= SB_SEL;
2042 selrecord(p, &so->so_snd.sb_sel, wql);
2043 }
2044 }
2045
2046 splx(s);
2047 return (revents);
2048 }
2049
2050 /*#### IPv6 Integration. Added new routines */
2051 int
2052 sooptgetm(struct sockopt *sopt, struct mbuf **mp)
2053 {
2054 struct mbuf *m, *m_prev;
2055 int sopt_size = sopt->sopt_valsize;
2056
2057 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2058 if (m == 0)
2059 return ENOBUFS;
2060 if (sopt_size > MLEN) {
2061 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2062 if ((m->m_flags & M_EXT) == 0) {
2063 m_free(m);
2064 return ENOBUFS;
2065 }
2066 m->m_len = min(MCLBYTES, sopt_size);
2067 } else {
2068 m->m_len = min(MLEN, sopt_size);
2069 }
2070 sopt_size -= m->m_len;
2071 *mp = m;
2072 m_prev = m;
2073
2074 while (sopt_size) {
2075 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2076 if (m == 0) {
2077 m_freem(*mp);
2078 return ENOBUFS;
2079 }
2080 if (sopt_size > MLEN) {
2081 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2082 if ((m->m_flags & M_EXT) == 0) {
2083 m_freem(*mp);
2084 return ENOBUFS;
2085 }
2086 m->m_len = min(MCLBYTES, sopt_size);
2087 } else {
2088 m->m_len = min(MLEN, sopt_size);
2089 }
2090 sopt_size -= m->m_len;
2091 m_prev->m_next = m;
2092 m_prev = m;
2093 }
2094 return 0;
2095 }
2096
2097 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
2098 int
2099 sooptmcopyin(struct sockopt *sopt, struct mbuf *m)
2100 {
2101 struct mbuf *m0 = m;
2102
2103 if (sopt->sopt_val == NULL)
2104 return 0;
2105 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2106 if (sopt->sopt_p != NULL) {
2107 int error;
2108
2109 error = copyin(sopt->sopt_val, mtod(m, char *),
2110 m->m_len);
2111 if (error != 0) {
2112 m_freem(m0);
2113 return(error);
2114 }
2115 } else
2116 bcopy(sopt->sopt_val, mtod(m, char *), m->m_len);
2117 sopt->sopt_valsize -= m->m_len;
2118 (caddr_t)sopt->sopt_val += m->m_len;
2119 m = m->m_next;
2120 }
2121 if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
2122 panic("sooptmcopyin");
2123 return 0;
2124 }
2125
2126 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
2127 int
2128 sooptmcopyout(struct sockopt *sopt, struct mbuf *m)
2129 {
2130 struct mbuf *m0 = m;
2131 size_t valsize = 0;
2132
2133 if (sopt->sopt_val == NULL)
2134 return 0;
2135 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2136 if (sopt->sopt_p != NULL) {
2137 int error;
2138
2139 error = copyout(mtod(m, char *), sopt->sopt_val,
2140 m->m_len);
2141 if (error != 0) {
2142 m_freem(m0);
2143 return(error);
2144 }
2145 } else
2146 bcopy(mtod(m, char *), sopt->sopt_val, m->m_len);
2147 sopt->sopt_valsize -= m->m_len;
2148 (caddr_t)sopt->sopt_val += m->m_len;
2149 valsize += m->m_len;
2150 m = m->m_next;
2151 }
2152 if (m != NULL) {
2153 /* enough soopt buffer should be given from user-land */
2154 m_freem(m0);
2155 return(EINVAL);
2156 }
2157 sopt->sopt_valsize = valsize;
2158 return 0;
2159 }
2160