]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/uipc_socket.c
xnu-517.9.4.tar.gz
[apple/xnu.git] / bsd / kern / uipc_socket.c
CommitLineData
1c79356b
A
1/*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
e5568f75
A
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
1c79356b 11 *
e5568f75
A
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
e5568f75
A
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
1c79356b
A
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
23/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
24/*
25 * Copyright (c) 1982, 1986, 1988, 1990, 1993
26 * The Regents of the University of California. All rights reserved.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 * 3. All advertising materials mentioning features or use of this software
37 * must display the following acknowledgement:
38 * This product includes software developed by the University of
39 * California, Berkeley and its contributors.
40 * 4. Neither the name of the University nor the names of its contributors
41 * may be used to endorse or promote products derived from this software
42 * without specific prior written permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
45 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
46 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
47 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
48 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
49 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
50 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
51 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * SUCH DAMAGE.
55 *
9bccf70c
A
56 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
57 * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.16 2001/06/14 20:46:06 ume Exp $
1c79356b
A
58 */
59
60#include <sys/param.h>
61#include <sys/systm.h>
55e303ae 62#include <sys/filedesc.h>
1c79356b 63#include <sys/proc.h>
55e303ae 64#include <sys/file.h>
1c79356b
A
65#include <sys/fcntl.h>
66#include <sys/malloc.h>
67#include <sys/mbuf.h>
68#include <sys/domain.h>
69#include <sys/kernel.h>
55e303ae 70#include <sys/event.h>
1c79356b
A
71#include <sys/poll.h>
72#include <sys/protosw.h>
73#include <sys/socket.h>
74#include <sys/socketvar.h>
75#include <sys/resourcevar.h>
76#include <sys/signalvar.h>
77#include <sys/sysctl.h>
78#include <sys/uio.h>
79#include <sys/ev.h>
80#include <sys/kdebug.h>
81#include <net/route.h>
82#include <netinet/in.h>
83#include <netinet/in_pcb.h>
84#include <kern/zalloc.h>
85#include <machine/limits.h>
86
87int so_cache_hw = 0;
88int so_cache_timeouts = 0;
89int so_cache_max_freed = 0;
90int cached_sock_count = 0;
91struct socket *socket_cache_head = 0;
92struct socket *socket_cache_tail = 0;
93u_long so_cache_time = 0;
94int so_cache_init_done = 0;
95struct zone *so_cache_zone;
96extern int get_inpcb_str_size();
97extern int get_tcp_str_size();
98
99#include <machine/limits.h>
100
55e303ae
A
101static void filt_sordetach(struct knote *kn);
102static int filt_soread(struct knote *kn, long hint);
103static void filt_sowdetach(struct knote *kn);
104static int filt_sowrite(struct knote *kn, long hint);
105static int filt_solisten(struct knote *kn, long hint);
106
107static struct filterops solisten_filtops =
108 { 1, NULL, filt_sordetach, filt_solisten };
109static struct filterops soread_filtops =
110 { 1, NULL, filt_sordetach, filt_soread };
111static struct filterops sowrite_filtops =
112 { 1, NULL, filt_sowdetach, filt_sowrite };
113
1c79356b
A
114int socket_debug = 0;
115int socket_zone = M_SOCKET;
116so_gen_t so_gencnt; /* generation count for sockets */
117
118MALLOC_DEFINE(M_SONAME, "soname", "socket name");
119MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
120
121#define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
122#define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
123#define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
124#define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
125#define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1)
126#define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8))
127#define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8))
128
129
130SYSCTL_DECL(_kern_ipc);
131
132static int somaxconn = SOMAXCONN;
133SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn,
134 0, "");
135
136/* Should we get a maximum also ??? */
fa4905b1 137static int sosendmaxchain = 65536;
1c79356b 138static int sosendminchain = 16384;
55e303ae 139static int sorecvmincopy = 16384;
1c79356b
A
140SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW, &sosendminchain,
141 0, "");
55e303ae
A
142SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, CTLFLAG_RW, &sorecvmincopy,
143 0, "");
1c79356b
A
144
145void so_cache_timer();
9bccf70c
A
146struct mbuf *m_getpackets(int, int, int);
147
1c79356b
A
148
149/*
150 * Socket operation routines.
151 * These routines are called by the routines in
152 * sys_socket.c or from a system process, and
153 * implement the semantics of socket operations by
154 * switching out to the protocol specific routines.
155 */
156
9bccf70c 157#ifdef __APPLE__
1c79356b
A
158void socketinit()
159{
160 vm_size_t str_size;
161
162 so_cache_init_done = 1;
163
164 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
165 str_size = (vm_size_t)( sizeof(struct socket) + 4 +
166 get_inpcb_str_size() + 4 +
167 get_tcp_str_size());
168 so_cache_zone = zinit (str_size, 120000*str_size, 8192, "socache zone");
169#if TEMPDEBUG
170 kprintf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size);
171#endif
172
173}
174
175void cached_sock_alloc(so, waitok)
176struct socket **so;
177int waitok;
178
179{
180 caddr_t temp;
181 int s;
182 register u_long offset;
183
184
185 s = splnet();
186 if (cached_sock_count) {
187 cached_sock_count--;
188 *so = socket_cache_head;
189 if (*so == 0)
190 panic("cached_sock_alloc: cached sock is null");
191
192 socket_cache_head = socket_cache_head->cache_next;
193 if (socket_cache_head)
194 socket_cache_head->cache_prev = 0;
195 else
196 socket_cache_tail = 0;
197 splx(s);
198
199 temp = (*so)->so_saved_pcb;
200 bzero((caddr_t)*so, sizeof(struct socket));
201#if TEMPDEBUG
202 kprintf("cached_sock_alloc - retreiving cached sock %x - count == %d\n", *so,
203 cached_sock_count);
204#endif
205 (*so)->so_saved_pcb = temp;
206 }
207 else {
208#if TEMPDEBUG
209 kprintf("Allocating cached sock %x from memory\n", *so);
210#endif
211
212 splx(s);
213 if (waitok)
214 *so = (struct socket *) zalloc(so_cache_zone);
215 else
216 *so = (struct socket *) zalloc_noblock(so_cache_zone);
217
218 if (*so == 0)
219 return;
220
221 bzero((caddr_t)*so, sizeof(struct socket));
222
223 /*
224 * Define offsets for extra structures into our single block of
225 * memory. Align extra structures on longword boundaries.
226 */
227
228
229 offset = (u_long) *so;
230 offset += sizeof(struct socket);
231 if (offset & 0x3) {
232 offset += 4;
233 offset &= 0xfffffffc;
234 }
235 (*so)->so_saved_pcb = (caddr_t) offset;
236 offset += get_inpcb_str_size();
237 if (offset & 0x3) {
238 offset += 4;
239 offset &= 0xfffffffc;
240 }
241
242 ((struct inpcb *) (*so)->so_saved_pcb)->inp_saved_ppcb = (caddr_t) offset;
243#if TEMPDEBUG
244 kprintf("Allocating cached socket - %x, pcb=%x tcpcb=%x\n", *so,
245 (*so)->so_saved_pcb,
246 ((struct inpcb *)(*so)->so_saved_pcb)->inp_saved_ppcb);
247#endif
248 }
249
250 (*so)->cached_in_sock_layer = 1;
251}
252
253
254void cached_sock_free(so)
255struct socket *so;
256{
257 int s;
258
259
260 s = splnet();
261 if (++cached_sock_count > MAX_CACHED_SOCKETS) {
262 --cached_sock_count;
263 splx(s);
264#if TEMPDEBUG
265 kprintf("Freeing overflowed cached socket %x\n", so);
266#endif
267 zfree(so_cache_zone, (vm_offset_t) so);
268 }
269 else {
270#if TEMPDEBUG
271 kprintf("Freeing socket %x into cache\n", so);
272#endif
273 if (so_cache_hw < cached_sock_count)
274 so_cache_hw = cached_sock_count;
275
276 so->cache_next = socket_cache_head;
277 so->cache_prev = 0;
278 if (socket_cache_head)
279 socket_cache_head->cache_prev = so;
280 else
281 socket_cache_tail = so;
282
283 so->cache_timestamp = so_cache_time;
284 socket_cache_head = so;
285 splx(s);
286 }
287
288#if TEMPDEBUG
289 kprintf("Freed cached sock %x into cache - count is %d\n", so, cached_sock_count);
290#endif
291
292
293}
294
295
296void so_cache_timer()
297{
298 register struct socket *p;
299 register int s;
300 register int n_freed = 0;
301 boolean_t funnel_state;
302
303 funnel_state = thread_funnel_set(network_flock, TRUE);
304
305 ++so_cache_time;
306
307 s = splnet();
308
309 while (p = socket_cache_tail)
310 {
311 if ((so_cache_time - p->cache_timestamp) < SO_CACHE_TIME_LIMIT)
312 break;
313
314 so_cache_timeouts++;
315
316 if (socket_cache_tail = p->cache_prev)
317 p->cache_prev->cache_next = 0;
318 if (--cached_sock_count == 0)
319 socket_cache_head = 0;
320
321 splx(s);
322
323 zfree(so_cache_zone, (vm_offset_t) p);
324
325 splnet();
326 if (++n_freed >= SO_CACHE_MAX_FREE_BATCH)
327 {
328 so_cache_max_freed++;
329 break;
330 }
331 }
332 splx(s);
333
334 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
335
336 (void) thread_funnel_set(network_flock, FALSE);
337
338}
9bccf70c 339#endif /* __APPLE__ */
1c79356b
A
340
341/*
342 * Get a socket structure from our zone, and initialize it.
343 * We don't implement `waitok' yet (see comments in uipc_domain.c).
344 * Note that it would probably be better to allocate socket
345 * and PCB at the same time, but I'm not convinced that all
346 * the protocols can be easily modified to do this.
347 */
348struct socket *
349soalloc(waitok, dom, type)
350 int waitok;
351 int dom;
352 int type;
353{
354 struct socket *so;
355
356 if ((dom == PF_INET) && (type == SOCK_STREAM))
357 cached_sock_alloc(&so, waitok);
358 else
359 {
360 so = _MALLOC_ZONE(sizeof(*so), socket_zone, M_WAITOK);
361 if (so)
362 bzero(so, sizeof *so);
363 }
364 /* XXX race condition for reentrant kernel */
365
366 if (so) {
367 so->so_gencnt = ++so_gencnt;
368 so->so_zone = socket_zone;
369 }
370
371 return so;
372}
373
374int
375socreate(dom, aso, type, proto)
376 int dom;
377 struct socket **aso;
378 register int type;
379 int proto;
1c79356b
A
380{
381 struct proc *p = current_proc();
382 register struct protosw *prp;
9bccf70c 383 register struct socket *so;
1c79356b 384 register int error = 0;
55e303ae
A
385#if TCPDEBUG
386 extern int tcpconsdebug;
387#endif
1c79356b
A
388 if (proto)
389 prp = pffindproto(dom, proto, type);
390 else
391 prp = pffindtype(dom, type);
9bccf70c 392
1c79356b
A
393 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
394 return (EPROTONOSUPPORT);
9bccf70c
A
395#ifndef __APPLE__
396
397 if (p->p_prison && jail_socket_unixiproute_only &&
398 prp->pr_domain->dom_family != PF_LOCAL &&
399 prp->pr_domain->dom_family != PF_INET &&
400 prp->pr_domain->dom_family != PF_ROUTE) {
401 return (EPROTONOSUPPORT);
402 }
403
404#endif
1c79356b
A
405 if (prp->pr_type != type)
406 return (EPROTOTYPE);
407 so = soalloc(p != 0, dom, type);
408 if (so == 0)
409 return (ENOBUFS);
410
411 TAILQ_INIT(&so->so_incomp);
412 TAILQ_INIT(&so->so_comp);
413 so->so_type = type;
414
9bccf70c 415#ifdef __APPLE__
1c79356b
A
416 if (p != 0) {
417 if (p->p_ucred->cr_uid == 0)
418 so->so_state = SS_PRIV;
419
420 so->so_uid = p->p_ucred->cr_uid;
421 }
9bccf70c
A
422#else
423 so->so_cred = p->p_ucred;
424 crhold(so->so_cred);
425#endif
1c79356b 426 so->so_proto = prp;
9bccf70c 427#ifdef __APPLE__
1c79356b
A
428 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
429 if (prp->pr_sfilter.tqh_first)
430 error = sfilter_init(so);
431 if (error == 0)
9bccf70c 432#endif
1c79356b 433 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
1c79356b 434 if (error) {
55e303ae
A
435 /*
436 * Warning:
437 * If so_pcb is not zero, the socket will be leaked,
438 * so protocol attachment handler must be coded carefuly
439 */
1c79356b
A
440 so->so_state |= SS_NOFDREF;
441 sofree(so);
442 return (error);
443 }
9bccf70c 444#ifdef __APPLE__
1c79356b
A
445 prp->pr_domain->dom_refs++;
446 so->so_rcv.sb_so = so->so_snd.sb_so = so;
447 TAILQ_INIT(&so->so_evlist);
55e303ae
A
448#if TCPDEBUG
449 if (tcpconsdebug == 2)
450 so->so_options |= SO_DEBUG;
451#endif
9bccf70c 452#endif
55e303ae 453
1c79356b
A
454 *aso = so;
455 return (0);
456}
457
458int
459sobind(so, nam)
460 struct socket *so;
461 struct sockaddr *nam;
462
463{
464 struct proc *p = current_proc();
465 int error;
466 struct kextcb *kp;
467 int s = splnet();
468
469 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
9bccf70c
A
470 if (error == 0) {
471 kp = sotokextcb(so);
472 while (kp) {
473 if (kp->e_soif && kp->e_soif->sf_sobind) {
474 error = (*kp->e_soif->sf_sobind)(so, nam, kp);
475 if (error) {
476 if (error == EJUSTRETURN) {
477 error = 0;
1c79356b 478 break;
9bccf70c 479 }
1c79356b
A
480 splx(s);
481 return(error);
482 }
483 }
484 kp = kp->e_next;
485 }
486 }
487 splx(s);
488 return (error);
489}
490
491void
492sodealloc(so)
493 struct socket *so;
494{
495 so->so_gencnt = ++so_gencnt;
496
9bccf70c
A
497#ifndef __APPLE__
498 if (so->so_rcv.sb_hiwat)
499 (void)chgsbsize(so->so_cred->cr_uidinfo,
500 &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY);
501 if (so->so_snd.sb_hiwat)
502 (void)chgsbsize(so->so_cred->cr_uidinfo,
503 &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
504#ifdef INET
505 if (so->so_accf != NULL) {
506 if (so->so_accf->so_accept_filter != NULL &&
507 so->so_accf->so_accept_filter->accf_destroy != NULL) {
508 so->so_accf->so_accept_filter->accf_destroy(so);
509 }
510 if (so->so_accf->so_accept_filter_str != NULL)
511 FREE(so->so_accf->so_accept_filter_str, M_ACCF);
512 FREE(so->so_accf, M_ACCF);
513 }
514#endif /* INET */
515 crfree(so->so_cred);
516 zfreei(so->so_zone, so);
517#else
1c79356b
A
518 if (so->cached_in_sock_layer == 1)
519 cached_sock_free(so);
520 else
521 _FREE_ZONE(so, sizeof(*so), so->so_zone);
9bccf70c 522#endif /* __APPLE__ */
1c79356b
A
523}
524
525int
526solisten(so, backlog)
527 register struct socket *so;
528 int backlog;
529
530{
531 struct kextcb *kp;
532 struct proc *p = current_proc();
533 int s, error;
534
535 s = splnet();
536 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
537 if (error) {
538 splx(s);
539 return (error);
540 }
e3027f41 541 if (TAILQ_EMPTY(&so->so_comp))
1c79356b
A
542 so->so_options |= SO_ACCEPTCONN;
543 if (backlog < 0 || backlog > somaxconn)
544 backlog = somaxconn;
545 so->so_qlimit = backlog;
546 kp = sotokextcb(so);
9bccf70c
A
547 while (kp) {
548 if (kp->e_soif && kp->e_soif->sf_solisten) {
549 error = (*kp->e_soif->sf_solisten)(so, kp);
550 if (error) {
551 if (error == EJUSTRETURN) {
552 error = 0;
1c79356b 553 break;
9bccf70c 554 }
1c79356b
A
555 splx(s);
556 return(error);
557 }
558 }
559 kp = kp->e_next;
560 }
561
562 splx(s);
563 return (0);
564}
565
566
567void
568sofree(so)
569 register struct socket *so;
9bccf70c
A
570{
571 int error;
1c79356b
A
572 struct kextcb *kp;
573 struct socket *head = so->so_head;
574
575 kp = sotokextcb(so);
9bccf70c
A
576 while (kp) {
577 if (kp->e_soif && kp->e_soif->sf_sofree) {
578 error = (*kp->e_soif->sf_sofree)(so, kp);
0b4e3aa0
A
579 if (error) {
580 selthreadclear(&so->so_snd.sb_sel);
581 selthreadclear(&so->so_rcv.sb_sel);
1c79356b 582 return; /* void fn */
0b4e3aa0 583 }
1c79356b
A
584 }
585 kp = kp->e_next;
586 }
587
0b4e3aa0 588 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) {
9bccf70c 589#ifdef __APPLE__
0b4e3aa0
A
590 selthreadclear(&so->so_snd.sb_sel);
591 selthreadclear(&so->so_rcv.sb_sel);
9bccf70c 592#endif
1c79356b 593 return;
0b4e3aa0 594 }
9bccf70c
A
595 if (head != NULL) {
596 if (so->so_state & SS_INCOMP) {
597 TAILQ_REMOVE(&head->so_incomp, so, so_list);
598 head->so_incqlen--;
599 } else if (so->so_state & SS_COMP) {
600 /*
601 * We must not decommission a socket that's
602 * on the accept(2) queue. If we do, then
603 * accept(2) may hang after select(2) indicated
604 * that the listening socket was ready.
605 */
606#ifdef __APPLE__
607 selthreadclear(&so->so_snd.sb_sel);
608 selthreadclear(&so->so_rcv.sb_sel);
609#endif
610 return;
611 } else {
612 panic("sofree: not queued");
613 }
1c79356b 614 head->so_qlen--;
9bccf70c 615 so->so_state &= ~SS_INCOMP;
1c79356b
A
616 so->so_head = NULL;
617 }
9bccf70c 618#ifdef __APPLE__
0b4e3aa0 619 selthreadclear(&so->so_snd.sb_sel);
1c79356b 620 sbrelease(&so->so_snd);
9bccf70c 621#endif
1c79356b
A
622 sorflush(so);
623 sfilter_term(so);
624 sodealloc(so);
625}
626
627/*
628 * Close a socket on last file table reference removal.
629 * Initiate disconnect if connected.
630 * Free socket when disconnect complete.
631 */
632int
633soclose(so)
634 register struct socket *so;
635{
636 int s = splnet(); /* conservative */
637 int error = 0;
638 struct kextcb *kp;
639
9bccf70c
A
640#ifndef __APPLE__
641 funsetown(so->so_sigio);
1c79356b
A
642#endif
643 kp = sotokextcb(so);
9bccf70c
A
644 while (kp) {
645 if (kp->e_soif && kp->e_soif->sf_soclose) {
646 error = (*kp->e_soif->sf_soclose)(so, kp);
647 if (error) {
648 splx(s);
1c79356b
A
649 return((error == EJUSTRETURN) ? 0 : error);
650 }
651 }
652 kp = kp->e_next;
653 }
654
655 if (so->so_options & SO_ACCEPTCONN) {
656 struct socket *sp, *sonext;
657
e3027f41
A
658 sp = TAILQ_FIRST(&so->so_incomp);
659 for (; sp != NULL; sp = sonext) {
660 sonext = TAILQ_NEXT(sp, so_list);
661 (void) soabort(sp);
662 }
663 for (sp = TAILQ_FIRST(&so->so_comp); sp != NULL; sp = sonext) {
664 sonext = TAILQ_NEXT(sp, so_list);
665 /* Dequeue from so_comp since sofree() won't do it */
666 TAILQ_REMOVE(&so->so_comp, sp, so_list);
667 so->so_qlen--;
668 sp->so_state &= ~SS_COMP;
669 sp->so_head = NULL;
670 (void) soabort(sp);
671 }
672
1c79356b
A
673 }
674 if (so->so_pcb == 0)
675 goto discard;
676 if (so->so_state & SS_ISCONNECTED) {
677 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
678 error = sodisconnect(so);
679 if (error)
680 goto drop;
681 }
682 if (so->so_options & SO_LINGER) {
683 if ((so->so_state & SS_ISDISCONNECTING) &&
684 (so->so_state & SS_NBIO))
685 goto drop;
686 while (so->so_state & SS_ISCONNECTED) {
687 error = tsleep((caddr_t)&so->so_timeo,
688 PSOCK | PCATCH, "soclos", so->so_linger);
689 if (error)
690 break;
691 }
692 }
693 }
694drop:
695 if (so->so_pcb) {
696 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
697 if (error == 0)
698 error = error2;
699 }
700discard:
e3027f41 701 if (so->so_pcb && so->so_state & SS_NOFDREF)
1c79356b
A
702 panic("soclose: NOFDREF");
703 so->so_state |= SS_NOFDREF;
9bccf70c 704#ifdef __APPLE__
1c79356b
A
705 so->so_proto->pr_domain->dom_refs--;
706 evsofree(so);
9bccf70c 707#endif
1c79356b
A
708 sofree(so);
709 splx(s);
710 return (error);
711}
712
713/*
714 * Must be called at splnet...
715 */
716int
717soabort(so)
718 struct socket *so;
719{
9bccf70c 720 int error;
1c79356b 721
9bccf70c
A
722 error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
723 if (error) {
724 sofree(so);
725 return error;
726 }
727 return (0);
1c79356b
A
728}
729
730int
731soaccept(so, nam)
732 register struct socket *so;
733 struct sockaddr **nam;
9bccf70c
A
734{
735 int s = splnet();
1c79356b
A
736 int error;
737 struct kextcb *kp;
738
739 if ((so->so_state & SS_NOFDREF) == 0)
740 panic("soaccept: !NOFDREF");
741 so->so_state &= ~SS_NOFDREF;
742 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
9bccf70c
A
743 if (error == 0) {
744 kp = sotokextcb(so);
1c79356b 745 while (kp) {
9bccf70c
A
746 if (kp->e_soif && kp->e_soif->sf_soaccept) {
747 error = (*kp->e_soif->sf_soaccept)(so, nam, kp);
748 if (error) {
749 if (error == EJUSTRETURN) {
750 error = 0;
1c79356b 751 break;
9bccf70c 752 }
1c79356b
A
753 splx(s);
754 return(error);
755 }
756 }
757 kp = kp->e_next;
758 }
759 }
760
761
762 splx(s);
763 return (error);
764}
765
766int
767soconnect(so, nam)
768 register struct socket *so;
769 struct sockaddr *nam;
770
771{
772 int s;
773 int error;
774 struct proc *p = current_proc();
775 struct kextcb *kp;
776
777 if (so->so_options & SO_ACCEPTCONN)
778 return (EOPNOTSUPP);
779 s = splnet();
780 /*
781 * If protocol is connection-based, can only connect once.
782 * Otherwise, if connected, try to disconnect first.
783 * This allows user to disconnect by connecting to, e.g.,
784 * a null address.
785 */
786 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
787 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
788 (error = sodisconnect(so))))
789 error = EISCONN;
790 else {
9bccf70c
A
791 /*
792 * Run connect filter before calling protocol:
793 * - non-blocking connect returns before completion;
794 * - allows filters to modify address.
795 */
796 kp = sotokextcb(so);
797 while (kp) {
798 if (kp->e_soif && kp->e_soif->sf_soconnect) {
799 error = (*kp->e_soif->sf_soconnect)(so, nam, kp);
800 if (error) {
801 if (error == EJUSTRETURN) {
802 error = 0;
803 }
804 splx(s);
805 return(error);
806 }
807 }
808 kp = kp->e_next;
809 }
1c79356b 810 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
1c79356b 811 }
1c79356b
A
812 splx(s);
813 return (error);
814}
815
816int
817soconnect2(so1, so2)
818 register struct socket *so1;
819 struct socket *so2;
820{
821 int s = splnet();
822 int error;
823 struct kextcb *kp;
824
825 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
9bccf70c
A
826 if (error == 0) {
827 kp = sotokextcb(so1);
828 while (kp) {
829 if (kp->e_soif && kp->e_soif->sf_soconnect2) {
830 error = (*kp->e_soif->sf_soconnect2)(so1, so2, kp);
831 if (error) {
832 if (error == EJUSTRETURN) {
833 return 0;
1c79356b 834 break;
9bccf70c 835 }
1c79356b
A
836 splx(s);
837 return(error);
838 }
839 }
840 kp = kp->e_next;
841 }
842 }
843 splx(s);
844 return (error);
845}
846
847int
848sodisconnect(so)
849 register struct socket *so;
850{
851 int s = splnet();
852 int error;
853 struct kextcb *kp;
854
855 if ((so->so_state & SS_ISCONNECTED) == 0) {
856 error = ENOTCONN;
857 goto bad;
858 }
859 if (so->so_state & SS_ISDISCONNECTING) {
860 error = EALREADY;
861 goto bad;
862 }
863 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
9bccf70c
A
864 if (error == 0) {
865 kp = sotokextcb(so);
866 while (kp) {
867 if (kp->e_soif && kp->e_soif->sf_sodisconnect) {
868 error = (*kp->e_soif->sf_sodisconnect)(so, kp);
869 if (error) {
870 if (error == EJUSTRETURN) {
871 error = 0;
1c79356b 872 break;
9bccf70c 873 }
1c79356b
A
874 splx(s);
875 return(error);
876 }
877 }
878 kp = kp->e_next;
879 }
880 }
881
882bad:
883 splx(s);
884 return (error);
885}
886
887#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_DONTWAIT : M_WAIT)
888/*
889 * Send on a socket.
890 * If send must go all at once and message is larger than
891 * send buffering, then hard error.
892 * Lock against other senders.
893 * If must go all at once and not enough room now, then
894 * inform user that this would block and do nothing.
895 * Otherwise, if nonblocking, send as much as possible.
896 * The data to be sent is described by "uio" if nonzero,
897 * otherwise by the mbuf chain "top" (which must be null
898 * if uio is not). Data provided in mbuf chain must be small
899 * enough to send all at once.
900 *
901 * Returns nonzero on error, timeout or signal; callers
902 * must check for short counts if EINTR/ERESTART are returned.
903 * Data and control buffers are freed on return.
904 * Experiment:
905 * MSG_HOLD: go thru most of sosend(), but just enqueue the mbuf
906 * MSG_SEND: go thru as for MSG_HOLD on current fragment, then
907 * point at the mbuf chain being constructed and go from there.
908 */
909int
910sosend(so, addr, uio, top, control, flags)
911 register struct socket *so;
912 struct sockaddr *addr;
913 struct uio *uio;
914 struct mbuf *top;
915 struct mbuf *control;
916 int flags;
917
918{
919 struct mbuf **mp;
fa4905b1 920 register struct mbuf *m, *freelist = NULL;
1c79356b
A
921 register long space, len, resid;
922 int clen = 0, error, s, dontroute, mlen, sendflags;
923 int atomic = sosendallatonce(so) || top;
924 struct proc *p = current_proc();
925 struct kextcb *kp;
926
927 if (uio)
928 resid = uio->uio_resid;
929 else
930 resid = top->m_pkthdr.len;
931
932 KERNEL_DEBUG((DBG_FNC_SOSEND | DBG_FUNC_START),
933 so,
934 resid,
935 so->so_snd.sb_cc,
936 so->so_snd.sb_lowat,
937 so->so_snd.sb_hiwat);
938
939 /*
940 * In theory resid should be unsigned.
941 * However, space must be signed, as it might be less than 0
942 * if we over-committed, and we must use a signed comparison
943 * of space and resid. On the other hand, a negative resid
944 * causes us to loop sending 0-length segments to the protocol.
945 *
946 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
947 * type sockets since that's an error.
948 */
949 if (resid < 0 || so->so_type == SOCK_STREAM && (flags & MSG_EOR)) {
950 error = EINVAL;
951 goto out;
952 }
953
954 dontroute =
955 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
956 (so->so_proto->pr_flags & PR_ATOMIC);
957 if (p)
958 p->p_stats->p_ru.ru_msgsnd++;
959 if (control)
960 clen = control->m_len;
961#define snderr(errno) { error = errno; splx(s); goto release; }
962
963restart:
964 error = sblock(&so->so_snd, SBLOCKWAIT(flags));
965 if (error)
966 goto out;
967 do {
968 s = splnet();
969 if (so->so_state & SS_CANTSENDMORE)
970 snderr(EPIPE);
971 if (so->so_error) {
972 error = so->so_error;
973 so->so_error = 0;
974 splx(s);
975 goto release;
976 }
977 if ((so->so_state & SS_ISCONNECTED) == 0) {
978 /*
979 * `sendto' and `sendmsg' is allowed on a connection-
980 * based socket if it supports implied connect.
981 * Return ENOTCONN if not connected and no address is
982 * supplied.
983 */
984 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
985 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
986 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
987 !(resid == 0 && clen != 0))
988 snderr(ENOTCONN);
989 } else if (addr == 0 && !(flags&MSG_HOLD))
990 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ?
991 ENOTCONN : EDESTADDRREQ);
992 }
993 space = sbspace(&so->so_snd);
994 if (flags & MSG_OOB)
995 space += 1024;
996 if ((atomic && resid > so->so_snd.sb_hiwat) ||
997 clen > so->so_snd.sb_hiwat)
998 snderr(EMSGSIZE);
55e303ae 999 if (space < resid + clen &&
1c79356b
A
1000 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
1001 if (so->so_state & SS_NBIO)
1002 snderr(EWOULDBLOCK);
1003 sbunlock(&so->so_snd);
1004 error = sbwait(&so->so_snd);
1005 splx(s);
1006 if (error)
1007 goto out;
1008 goto restart;
1009 }
1010 splx(s);
1011 mp = &top;
1012 space -= clen;
fa4905b1 1013
1c79356b
A
1014 do {
1015 if (uio == NULL) {
1016 /*
1017 * Data is prepackaged in "top".
1018 */
1019 resid = 0;
1020 if (flags & MSG_EOR)
1021 top->m_flags |= M_EOR;
1022 } else {
fa4905b1
A
1023 boolean_t dropped_funnel = FALSE;
1024 int chainlength;
1025 int bytes_to_copy;
1026
1027 bytes_to_copy = min(resid, space);
1028
1029 if (sosendminchain > 0) {
1030 if (bytes_to_copy >= sosendminchain) {
1031 dropped_funnel = TRUE;
1032 (void)thread_funnel_set(network_flock, FALSE);
1033 }
1034 chainlength = 0;
1035 } else
1036 chainlength = sosendmaxchain;
1037
1c79356b 1038 do {
fa4905b1
A
1039
1040 if (bytes_to_copy >= MINCLSIZE) {
9bccf70c
A
1041 /*
1042 * try to maintain a local cache of mbuf clusters needed to complete this write
1043 * the list is further limited to the number that are currently needed to fill the socket
1044 * this mechanism allows a large number of mbufs/clusters to be grabbed under a single
1045 * mbuf lock... if we can't get any clusters, than fall back to trying for mbufs
1046 * if we fail early (or miscalcluate the number needed) make sure to release any clusters
1047 * we haven't yet consumed.
1048 */
fa4905b1
A
1049 if ((m = freelist) == NULL) {
1050 int num_needed;
1051 int hdrs_needed = 0;
1052
1053 if (top == 0)
1054 hdrs_needed = 1;
1055 num_needed = bytes_to_copy / MCLBYTES;
1056
1057 if ((bytes_to_copy - (num_needed * MCLBYTES)) >= MINCLSIZE)
1058 num_needed++;
1059
1060 if ((freelist = m_getpackets(num_needed, hdrs_needed, M_WAIT)) == NULL)
1061 goto getpackets_failed;
1062 m = freelist;
1063 }
1064 freelist = m->m_next;
1065 m->m_next = NULL;
1066
1067 mlen = MCLBYTES;
1068 len = min(mlen, bytes_to_copy);
1069 } else {
1070getpackets_failed:
1071 if (top == 0) {
1c79356b
A
1072 MGETHDR(m, M_WAIT, MT_DATA);
1073 mlen = MHLEN;
1074 m->m_pkthdr.len = 0;
1075 m->m_pkthdr.rcvif = (struct ifnet *)0;
fa4905b1 1076 } else {
1c79356b
A
1077 MGET(m, M_WAIT, MT_DATA);
1078 mlen = MLEN;
fa4905b1
A
1079 }
1080 len = min(mlen, bytes_to_copy);
1081 /*
1082 * For datagram protocols, leave room
1083 * for protocol headers in first mbuf.
1084 */
1085 if (atomic && top == 0 && len < mlen)
1086 MH_ALIGN(m, len);
1c79356b 1087 }
fa4905b1
A
1088 chainlength += len;
1089
1c79356b 1090 space -= len;
fa4905b1 1091
1c79356b 1092 error = uiomove(mtod(m, caddr_t), (int)len, uio);
fa4905b1 1093
1c79356b
A
1094 resid = uio->uio_resid;
1095
1096 m->m_len = len;
1097 *mp = m;
1098 top->m_pkthdr.len += len;
1099 if (error)
1100 break;
1101 mp = &m->m_next;
1102 if (resid <= 0) {
1103 if (flags & MSG_EOR)
1104 top->m_flags |= M_EOR;
1105 break;
1106 }
fa4905b1
A
1107 bytes_to_copy = min(resid, space);
1108
1109 } while (space > 0 && (chainlength < sosendmaxchain || atomic || resid < MINCLSIZE));
1110
1111 if (dropped_funnel == TRUE)
1112 (void)thread_funnel_set(network_flock, TRUE);
1c79356b
A
1113 if (error)
1114 goto release;
1115 }
1116
1117 if (flags & (MSG_HOLD|MSG_SEND))
1118 { /* Enqueue for later, go away if HOLD */
1119 register struct mbuf *mb1;
1120 if (so->so_temp && (flags & MSG_FLUSH))
1121 { m_freem(so->so_temp);
1122 so->so_temp = NULL;
1123 }
1124 if (so->so_temp)
1125 so->so_tail->m_next = top;
1126 else
1127 so->so_temp = top;
1128 mb1 = top;
1129 while (mb1->m_next)
1130 mb1 = mb1->m_next;
1131 so->so_tail = mb1;
1132 if (flags&MSG_HOLD)
1133 { top = NULL;
1134 goto release;
1135 }
1136 top = so->so_temp;
1137 }
1138 if (dontroute)
1139 so->so_options |= SO_DONTROUTE;
1140 s = splnet(); /* XXX */
1c79356b
A
1141 /* Compute flags here, for pru_send and NKEs */
1142 sendflags = (flags & MSG_OOB) ? PRUS_OOB :
1143 /*
1144 * If the user set MSG_EOF, the protocol
1145 * understands this flag and nothing left to
1146 * send then use PRU_SEND_EOF instead of PRU_SEND.
1147 */
1148 ((flags & MSG_EOF) &&
1149 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
1150 (resid <= 0)) ?
1151 PRUS_EOF :
1152 /* If there is more to send set PRUS_MORETOCOME */
1153 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0;
9bccf70c 1154 kp = sotokextcb(so);
1c79356b 1155 while (kp)
9bccf70c
A
1156 { if (kp->e_soif && kp->e_soif->sf_sosend) {
1157 error = (*kp->e_soif->sf_sosend)(so, &addr,
1c79356b
A
1158 &uio, &top,
1159 &control,
1160 &sendflags,
1161 kp);
9bccf70c
A
1162 if (error) {
1163 splx(s);
1164 if (error == EJUSTRETURN) {
1165 sbunlock(&so->so_snd);
fa4905b1
A
1166
1167 if (freelist)
1168 m_freem_list(freelist);
1c79356b
A
1169 return(0);
1170 }
1171 goto release;
1172 }
1173 }
1174 kp = kp->e_next;
1175 }
1176
1177 error = (*so->so_proto->pr_usrreqs->pru_send)(so,
1178 sendflags, top, addr, control, p);
1179 splx(s);
9bccf70c 1180#ifdef __APPLE__
1c79356b
A
1181 if (flags & MSG_SEND)
1182 so->so_temp = NULL;
9bccf70c 1183#endif
1c79356b
A
1184 if (dontroute)
1185 so->so_options &= ~SO_DONTROUTE;
1186 clen = 0;
1187 control = 0;
1188 top = 0;
1189 mp = &top;
1190 if (error)
1191 goto release;
1192 } while (resid && space > 0);
1193 } while (resid);
1194
1195release:
1196 sbunlock(&so->so_snd);
1197out:
1198 if (top)
1199 m_freem(top);
1200 if (control)
1201 m_freem(control);
fa4905b1
A
1202 if (freelist)
1203 m_freem_list(freelist);
1c79356b
A
1204
1205 KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END,
1206 so,
1207 resid,
1208 so->so_snd.sb_cc,
1209 space,
1210 error);
1211
1212 return (error);
1213}
1214
1215/*
1216 * Implement receive operations on a socket.
1217 * We depend on the way that records are added to the sockbuf
1218 * by sbappend*. In particular, each record (mbufs linked through m_next)
1219 * must begin with an address if the protocol so specifies,
1220 * followed by an optional mbuf or mbufs containing ancillary data,
1221 * and then zero or more mbufs of data.
1222 * In order to avoid blocking network interrupts for the entire time here,
1223 * we splx() while doing the actual copy to user space.
1224 * Although the sockbuf is locked, new data may still be appended,
1225 * and thus we must maintain consistency of the sockbuf during that time.
1226 *
1227 * The caller may receive the data as a single mbuf chain by supplying
1228 * an mbuf **mp0 for use in returning the chain. The uio is then used
1229 * only for the count in uio_resid.
1230 */
1231int
1232soreceive(so, psa, uio, mp0, controlp, flagsp)
1233 register struct socket *so;
1234 struct sockaddr **psa;
1235 struct uio *uio;
1236 struct mbuf **mp0;
1237 struct mbuf **controlp;
1238 int *flagsp;
1239{
55e303ae 1240 register struct mbuf *m, **mp, *ml;
1c79356b
A
1241 register int flags, len, error, s, offset;
1242 struct protosw *pr = so->so_proto;
1243 struct mbuf *nextrecord;
1244 int moff, type = 0;
1245 int orig_resid = uio->uio_resid;
1246 struct kextcb *kp;
55e303ae
A
1247 volatile struct mbuf *free_list;
1248 volatile int delayed_copy_len;
1249 int can_delay;
1250 int need_event;
1251 struct proc *p = current_proc();
1252
1253
1c79356b
A
1254 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START,
1255 so,
1256 uio->uio_resid,
1257 so->so_rcv.sb_cc,
1258 so->so_rcv.sb_lowat,
1259 so->so_rcv.sb_hiwat);
1260
1261 kp = sotokextcb(so);
9bccf70c
A
1262 while (kp) {
1263 if (kp->e_soif && kp->e_soif->sf_soreceive) {
1264 error = (*kp->e_soif->sf_soreceive)(so, psa, &uio,
1c79356b
A
1265 mp0, controlp,
1266 flagsp, kp);
55e303ae
A
1267 if (error) {
1268 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1c79356b 1269 return((error == EJUSTRETURN) ? 0 : error);
55e303ae 1270 }
1c79356b
A
1271 }
1272 kp = kp->e_next;
1273 }
1274
1275 mp = mp0;
1276 if (psa)
1277 *psa = 0;
1278 if (controlp)
1279 *controlp = 0;
1280 if (flagsp)
1281 flags = *flagsp &~ MSG_EOR;
1282 else
1283 flags = 0;
1284 /*
1285 * When SO_WANTOOBFLAG is set we try to get out-of-band data
1286 * regardless of the flags argument. Here is the case were
1287 * out-of-band data is not inline.
1288 */
1289 if ((flags & MSG_OOB) ||
1290 ((so->so_options & SO_WANTOOBFLAG) != 0 &&
1291 (so->so_options & SO_OOBINLINE) == 0 &&
1292 (so->so_oobmark || (so->so_state & SS_RCVATMARK)))) {
1293 m = m_get(M_WAIT, MT_DATA);
55e303ae
A
1294 if (m == NULL) {
1295 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, ENOBUFS,0,0,0,0);
9bccf70c 1296 return (ENOBUFS);
55e303ae 1297 }
1c79356b
A
1298 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
1299 if (error)
1300 goto bad;
1301 do {
1302 error = uiomove(mtod(m, caddr_t),
1303 (int) min(uio->uio_resid, m->m_len), uio);
1304 m = m_free(m);
1305 } while (uio->uio_resid && error == 0 && m);
1306bad:
1307 if (m)
1308 m_freem(m);
9bccf70c
A
1309#ifdef __APPLE__
1310 if ((so->so_options & SO_WANTOOBFLAG) != 0) {
1311 if (error == EWOULDBLOCK || error == EINVAL) {
1312 /*
1313 * Let's try to get normal data:
1314 * EWOULDBLOCK: out-of-band data not receive yet;
1315 * EINVAL: out-of-band data already read.
1316 */
1317 error = 0;
1318 goto nooob;
1319 } else if (error == 0 && flagsp)
1320 *flagsp |= MSG_OOB;
1321 }
1c79356b 1322 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
9bccf70c 1323#endif
1c79356b
A
1324 return (error);
1325 }
1326nooob:
1327 if (mp)
1328 *mp = (struct mbuf *)0;
1329 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
1330 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
1331
55e303ae
A
1332
1333 free_list = (struct mbuf *)0;
1334 delayed_copy_len = 0;
1c79356b 1335restart:
9bccf70c
A
1336 error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
1337 if (error) {
1c79356b
A
1338 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1339 return (error);
1340 }
1341 s = splnet();
1342
1343 m = so->so_rcv.sb_mb;
1344 /*
1345 * If we have less data than requested, block awaiting more
1346 * (subject to any timeout) if:
1347 * 1. the current count is less than the low water mark, or
1348 * 2. MSG_WAITALL is set, and it is possible to do the entire
1349 * receive operation at once if we block (resid <= hiwat).
1350 * 3. MSG_DONTWAIT is not set
1351 * If MSG_WAITALL is set but resid is larger than the receive buffer,
1352 * we have to do the receive in sections, and thus risk returning
1353 * a short count if a timeout or signal occurs after we start.
1354 */
1355 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
1356 so->so_rcv.sb_cc < uio->uio_resid) &&
55e303ae 1357 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
1c79356b
A
1358 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
1359 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
55e303ae 1360
1c79356b
A
1361 KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
1362 if (so->so_error) {
1363 if (m)
1364 goto dontblock;
1365 error = so->so_error;
1366 if ((flags & MSG_PEEK) == 0)
1367 so->so_error = 0;
1368 goto release;
1369 }
1370 if (so->so_state & SS_CANTRCVMORE) {
1371 if (m)
1372 goto dontblock;
1373 else
1374 goto release;
1375 }
1376 for (; m; m = m->m_next)
1377 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
1378 m = so->so_rcv.sb_mb;
1379 goto dontblock;
1380 }
1381 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
1382 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
1383 error = ENOTCONN;
1384 goto release;
1385 }
1386 if (uio->uio_resid == 0)
1387 goto release;
1388 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
1389 error = EWOULDBLOCK;
1390 goto release;
1391 }
1392 sbunlock(&so->so_rcv);
1393 if (socket_debug)
1394 printf("Waiting for socket data\n");
55e303ae 1395
1c79356b
A
1396 error = sbwait(&so->so_rcv);
1397 if (socket_debug)
1398 printf("SORECEIVE - sbwait returned %d\n", error);
1399 splx(s);
9bccf70c 1400 if (error) {
1c79356b
A
1401 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1402 return (error);
1403 }
1404 goto restart;
1405 }
1406dontblock:
9bccf70c 1407#ifndef __APPLE__
1c79356b
A
1408 if (uio->uio_procp)
1409 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
55e303ae
A
1410#else /* __APPLE__ */
1411 /*
1412 * 2207985
1413 * This should be uio->uio-procp; however, some callers of this
1414 * function use auto variables with stack garbage, and fail to
1415 * fill out the uio structure properly.
1416 */
1417 if (p)
1418 p->p_stats->p_ru.ru_msgrcv++;
1419#endif /* __APPLE__ */
1c79356b
A
1420 nextrecord = m->m_nextpkt;
1421 if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) {
1422 KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
1423 orig_resid = 0;
4a249263 1424 if (psa) {
1c79356b
A
1425 *psa = dup_sockaddr(mtod(m, struct sockaddr *),
1426 mp0 == 0);
4a249263
A
1427 if ((*psa == 0) && (flags & MSG_NEEDSA)) {
1428 error = EWOULDBLOCK;
1429 goto release;
1430 }
1431 }
1c79356b
A
1432 if (flags & MSG_PEEK) {
1433 m = m->m_next;
1434 } else {
1435 sbfree(&so->so_rcv, m);
1436 MFREE(m, so->so_rcv.sb_mb);
1437 m = so->so_rcv.sb_mb;
1438 }
1439 }
1440 while (m && m->m_type == MT_CONTROL && error == 0) {
1441 if (flags & MSG_PEEK) {
1442 if (controlp)
1443 *controlp = m_copy(m, 0, m->m_len);
1444 m = m->m_next;
1445 } else {
1446 sbfree(&so->so_rcv, m);
1447 if (controlp) {
1448 if (pr->pr_domain->dom_externalize &&
1449 mtod(m, struct cmsghdr *)->cmsg_type ==
1450 SCM_RIGHTS)
1451 error = (*pr->pr_domain->dom_externalize)(m);
1452 *controlp = m;
1453 so->so_rcv.sb_mb = m->m_next;
1454 m->m_next = 0;
1455 m = so->so_rcv.sb_mb;
1456 } else {
1457 MFREE(m, so->so_rcv.sb_mb);
1458 m = so->so_rcv.sb_mb;
1459 }
1460 }
1461 if (controlp) {
1462 orig_resid = 0;
1463 controlp = &(*controlp)->m_next;
1464 }
1465 }
1466 if (m) {
1467 if ((flags & MSG_PEEK) == 0)
1468 m->m_nextpkt = nextrecord;
1469 type = m->m_type;
1470 if (type == MT_OOBDATA)
1471 flags |= MSG_OOB;
1472 }
1473 moff = 0;
1474 offset = 0;
fa4905b1 1475
55e303ae
A
1476 if (!(flags & MSG_PEEK) && uio->uio_resid > sorecvmincopy)
1477 can_delay = 1;
1478 else
1479 can_delay = 0;
1480
1481 need_event = 0;
fa4905b1 1482
55e303ae
A
1483
1484 while (m && (uio->uio_resid - delayed_copy_len) > 0 && error == 0) {
1c79356b
A
1485 if (m->m_type == MT_OOBDATA) {
1486 if (type != MT_OOBDATA)
1487 break;
1488 } else if (type == MT_OOBDATA)
1489 break;
9bccf70c 1490#ifndef __APPLE__
1c79356b
A
1491/*
1492 * This assertion needs rework. The trouble is Appletalk is uses many
1493 * mbuf types (NOT listed in mbuf.h!) which will trigger this panic.
1494 * For now just remove the assertion... CSM 9/98
1495 */
1496 else
1497 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
1498 ("receive 3"));
9bccf70c
A
1499#else
1500 /*
1501 * Make sure to allways set MSG_OOB event when getting
1502 * out of band data inline.
1503 */
1c79356b 1504 if ((so->so_options & SO_WANTOOBFLAG) != 0 &&
9bccf70c
A
1505 (so->so_options & SO_OOBINLINE) != 0 &&
1506 (so->so_state & SS_RCVATMARK) != 0) {
1507 flags |= MSG_OOB;
1508 }
1509#endif
1c79356b 1510 so->so_state &= ~SS_RCVATMARK;
55e303ae 1511 len = uio->uio_resid - delayed_copy_len;
1c79356b
A
1512 if (so->so_oobmark && len > so->so_oobmark - offset)
1513 len = so->so_oobmark - offset;
1514 if (len > m->m_len - moff)
1515 len = m->m_len - moff;
1516 /*
1517 * If mp is set, just pass back the mbufs.
1518 * Otherwise copy them out via the uio, then free.
1519 * Sockbuf must be consistent here (points to current mbuf,
1520 * it points to next record) when we drop priority;
1521 * we must note any additions to the sockbuf when we
1522 * block interrupts again.
1523 */
1524 if (mp == 0) {
55e303ae
A
1525 if (can_delay && len == m->m_len) {
1526 /*
1527 * only delay the copy if we're consuming the
1528 * mbuf and we're NOT in MSG_PEEK mode
1529 * and we have enough data to make it worthwile
1530 * to drop and retake the funnel... can_delay
1531 * reflects the state of the 2 latter constraints
1532 * moff should always be zero in these cases
1533 */
1534 delayed_copy_len += len;
1535 } else {
1536 splx(s);
1537
1538 if (delayed_copy_len) {
1539 error = sodelayed_copy(uio, &free_list, &delayed_copy_len);
1540
1541 if (error) {
1542 s = splnet();
1543 goto release;
1544 }
1545 if (m != so->so_rcv.sb_mb) {
1546 /*
1547 * can only get here if MSG_PEEK is not set
1548 * therefore, m should point at the head of the rcv queue...
1549 * if it doesn't, it means something drastically changed
1550 * while we were out from behind the funnel in sodelayed_copy...
1551 * perhaps a RST on the stream... in any event, the stream has
1552 * been interrupted... it's probably best just to return
1553 * whatever data we've moved and let the caller sort it out...
1554 */
1555 break;
1556 }
1557 }
1558 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
1559
1560 s = splnet();
1561 if (error)
1562 goto release;
1563 }
1c79356b
A
1564 } else
1565 uio->uio_resid -= len;
55e303ae 1566
1c79356b
A
1567 if (len == m->m_len - moff) {
1568 if (m->m_flags & M_EOR)
1569 flags |= MSG_EOR;
1570 if (flags & MSG_PEEK) {
1571 m = m->m_next;
1572 moff = 0;
1573 } else {
1574 nextrecord = m->m_nextpkt;
1575 sbfree(&so->so_rcv, m);
55e303ae 1576
1c79356b
A
1577 if (mp) {
1578 *mp = m;
1579 mp = &m->m_next;
1580 so->so_rcv.sb_mb = m = m->m_next;
1581 *mp = (struct mbuf *)0;
1582 } else {
fa4905b1 1583 m->m_nextpkt = 0;
55e303ae
A
1584 if (free_list == NULL)
1585 free_list = m;
1586 else
14353aa8
A
1587 ml->m_next = m;
1588 ml = m;
1589 so->so_rcv.sb_mb = m = m->m_next;
1590 ml->m_next = 0;
1c79356b
A
1591 }
1592 if (m)
1593 m->m_nextpkt = nextrecord;
1594 }
1595 } else {
1596 if (flags & MSG_PEEK)
1597 moff += len;
1598 else {
1599 if (mp)
1600 *mp = m_copym(m, 0, len, M_WAIT);
1601 m->m_data += len;
1602 m->m_len -= len;
1603 so->so_rcv.sb_cc -= len;
1604 }
1605 }
1606 if (so->so_oobmark) {
1607 if ((flags & MSG_PEEK) == 0) {
1608 so->so_oobmark -= len;
1609 if (so->so_oobmark == 0) {
1610 so->so_state |= SS_RCVATMARK;
55e303ae
A
1611 /*
1612 * delay posting the actual event until after
1613 * any delayed copy processing has finished
1614 */
1615 need_event = 1;
1c79356b
A
1616 break;
1617 }
1618 } else {
1619 offset += len;
1620 if (offset == so->so_oobmark)
1621 break;
1622 }
1623 }
1624 if (flags & MSG_EOR)
1625 break;
1626 /*
55e303ae 1627 * If the MSG_WAITALL or MSG_WAITSTREAM flag is set (for non-atomic socket),
1c79356b
A
1628 * we must not quit until "uio->uio_resid == 0" or an error
1629 * termination. If a signal/timeout occurs, return
1630 * with a short count but without error.
1631 * Keep sockbuf locked against other readers.
1632 */
55e303ae 1633 while (flags & (MSG_WAITALL|MSG_WAITSTREAM) && m == 0 && (uio->uio_resid - delayed_copy_len) > 0 &&
1c79356b
A
1634 !sosendallatonce(so) && !nextrecord) {
1635 if (so->so_error || so->so_state & SS_CANTRCVMORE)
55e303ae 1636 goto release;
fa4905b1 1637
55e303ae
A
1638 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
1639 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1640 if (sbwait(&so->so_rcv)) {
1641 error = 0;
1642 goto release;
fa4905b1 1643 }
55e303ae
A
1644 /*
1645 * have to wait until after we get back from the sbwait to do the copy because
1646 * we will drop the funnel if we have enough data that has been delayed... by dropping
1647 * the funnel we open up a window allowing the netisr thread to process the incoming packets
1648 * and to change the state of this socket... we're issuing the sbwait because
1649 * the socket is empty and we're expecting the netisr thread to wake us up when more
1650 * packets arrive... if we allow that processing to happen and then sbwait, we
1651 * could stall forever with packets sitting in the socket if no further packets
1652 * arrive from the remote side.
1653 *
1654 * we want to copy before we've collected all the data to satisfy this request to
1655 * allow the copy to overlap the incoming packet processing on an MP system
1656 */
1657 if (delayed_copy_len > sorecvmincopy && (delayed_copy_len > (so->so_rcv.sb_hiwat / 2))) {
1658
1659 error = sodelayed_copy(uio, &free_list, &delayed_copy_len);
1660
1661 if (error)
1662 goto release;
1c79356b
A
1663 }
1664 m = so->so_rcv.sb_mb;
fa4905b1 1665 if (m) {
1c79356b 1666 nextrecord = m->m_nextpkt;
fa4905b1 1667 }
1c79356b
A
1668 }
1669 }
1670
1671 if (m && pr->pr_flags & PR_ATOMIC) {
9bccf70c 1672#ifdef __APPLE__
1c79356b
A
1673 if (so->so_options & SO_DONTTRUNC)
1674 flags |= MSG_RCVMORE;
9bccf70c
A
1675 else {
1676#endif
1677 flags |= MSG_TRUNC;
1c79356b
A
1678 if ((flags & MSG_PEEK) == 0)
1679 (void) sbdroprecord(&so->so_rcv);
9bccf70c 1680#ifdef __APPLE__
1c79356b 1681 }
9bccf70c 1682#endif
1c79356b
A
1683 }
1684 if ((flags & MSG_PEEK) == 0) {
1685 if (m == 0)
1686 so->so_rcv.sb_mb = nextrecord;
1687 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
1688 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1689 }
9bccf70c 1690#ifdef __APPLE__
1c79356b
A
1691 if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0)
1692 flags |= MSG_HAVEMORE;
55e303ae
A
1693
1694 if (delayed_copy_len) {
1695 error = sodelayed_copy(uio, &free_list, &delayed_copy_len);
1696
1697 if (error)
1698 goto release;
1699 }
1700 if (free_list) {
1701 m_freem_list((struct mbuf *)free_list);
1702 free_list = (struct mbuf *)0;
1703 }
1704 if (need_event)
1705 postevent(so, 0, EV_OOB);
9bccf70c 1706#endif
1c79356b
A
1707 if (orig_resid == uio->uio_resid && orig_resid &&
1708 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
1709 sbunlock(&so->so_rcv);
1710 splx(s);
1711 goto restart;
1712 }
1713
1714 if (flagsp)
1715 *flagsp |= flags;
1716release:
55e303ae
A
1717 if (delayed_copy_len) {
1718 error = sodelayed_copy(uio, &free_list, &delayed_copy_len);
1719 }
1720 if (free_list) {
1721 m_freem_list((struct mbuf *)free_list);
1722 }
1c79356b
A
1723 sbunlock(&so->so_rcv);
1724 splx(s);
1725
1726 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END,
1727 so,
1728 uio->uio_resid,
1729 so->so_rcv.sb_cc,
1730 0,
1731 error);
1732
1733 return (error);
1734}
1735
55e303ae
A
1736
1737int sodelayed_copy(struct uio *uio, struct mbuf **free_list, int *resid)
1738{
1739 int error = 0;
1740 boolean_t dropped_funnel = FALSE;
1741 struct mbuf *m;
1742
1743 m = *free_list;
1744
1745 if (*resid >= sorecvmincopy) {
1746 dropped_funnel = TRUE;
1747
1748 (void)thread_funnel_set(network_flock, FALSE);
1749 }
1750 while (m && error == 0) {
1751
1752 error = uiomove(mtod(m, caddr_t), (int)m->m_len, uio);
1753
1754 m = m->m_next;
1755 }
1756 m_freem_list(*free_list);
1757
1758 *free_list = (struct mbuf *)NULL;
1759 *resid = 0;
1760
1761 if (dropped_funnel == TRUE)
1762 (void)thread_funnel_set(network_flock, TRUE);
1763
1764 return (error);
1765}
1766
1767
1c79356b
A
1768int
1769soshutdown(so, how)
1770 register struct socket *so;
1771 register int how;
1772{
1773 register struct protosw *pr = so->so_proto;
1774 struct kextcb *kp;
1775 int ret;
1776
1777
1778 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_START, 0,0,0,0,0);
1779 kp = sotokextcb(so);
9bccf70c
A
1780 while (kp) {
1781 if (kp->e_soif && kp->e_soif->sf_soshutdown) {
1782 ret = (*kp->e_soif->sf_soshutdown)(so, how, kp);
55e303ae
A
1783 if (ret) {
1784 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
1c79356b 1785 return((ret == EJUSTRETURN) ? 0 : ret);
55e303ae 1786 }
1c79356b
A
1787 }
1788 kp = kp->e_next;
1789 }
1790
9bccf70c 1791 if (how != SHUT_WR) {
1c79356b
A
1792 sorflush(so);
1793 postevent(so, 0, EV_RCLOSED);
1794 }
9bccf70c 1795 if (how != SHUT_RD) {
1c79356b
A
1796 ret = ((*pr->pr_usrreqs->pru_shutdown)(so));
1797 postevent(so, 0, EV_WCLOSED);
1798 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
1799 return(ret);
1800 }
1801
1802 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
1803 return (0);
1804}
1805
1806void
1807sorflush(so)
1808 register struct socket *so;
1809{
1810 register struct sockbuf *sb = &so->so_rcv;
1811 register struct protosw *pr = so->so_proto;
1812 register int s, error;
1813 struct sockbuf asb;
1814 struct kextcb *kp;
1815
1816 kp = sotokextcb(so);
9bccf70c
A
1817 while (kp) {
1818 if (kp->e_soif && kp->e_soif->sf_sorflush) {
1819 if ((*kp->e_soif->sf_sorflush)(so, kp))
1c79356b
A
1820 return;
1821 }
1822 kp = kp->e_next;
1823 }
1824
1825 sb->sb_flags |= SB_NOINTR;
1826 (void) sblock(sb, M_WAIT);
1827 s = splimp();
1828 socantrcvmore(so);
1829 sbunlock(sb);
9bccf70c 1830#ifdef __APPLE__
0b4e3aa0 1831 selthreadclear(&sb->sb_sel);
9bccf70c 1832#endif
1c79356b
A
1833 asb = *sb;
1834 bzero((caddr_t)sb, sizeof (*sb));
9bccf70c
A
1835 if (asb.sb_flags & SB_KNOTE) {
1836 sb->sb_sel.si_note = asb.sb_sel.si_note;
1837 sb->sb_flags = SB_KNOTE;
1838 }
1c79356b
A
1839 splx(s);
1840 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
1841 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
9bccf70c 1842
1c79356b
A
1843 sbrelease(&asb);
1844}
1845
1846/*
1847 * Perhaps this routine, and sooptcopyout(), below, ought to come in
1848 * an additional variant to handle the case where the option value needs
1849 * to be some kind of integer, but not a specific size.
1850 * In addition to their use here, these functions are also called by the
1851 * protocol-level pr_ctloutput() routines.
1852 */
1853int
1854sooptcopyin(sopt, buf, len, minlen)
1855 struct sockopt *sopt;
1856 void *buf;
1857 size_t len;
1858 size_t minlen;
1859{
1860 size_t valsize;
1861
1862 /*
1863 * If the user gives us more than we wanted, we ignore it,
1864 * but if we don't get the minimum length the caller
1865 * wants, we return EINVAL. On success, sopt->sopt_valsize
1866 * is set to however much we actually retrieved.
1867 */
1868 if ((valsize = sopt->sopt_valsize) < minlen)
1869 return EINVAL;
1870 if (valsize > len)
1871 sopt->sopt_valsize = valsize = len;
1872
1873 if (sopt->sopt_p != 0)
1874 return (copyin(sopt->sopt_val, buf, valsize));
1875
1876 bcopy(sopt->sopt_val, buf, valsize);
1877 return 0;
1878}
1879
1880int
1881sosetopt(so, sopt)
1882 struct socket *so;
1883 struct sockopt *sopt;
1884{
1885 int error, optval;
1886 struct linger l;
1887 struct timeval tv;
1888 short val;
1889 struct kextcb *kp;
1890
9bccf70c
A
1891 if (sopt->sopt_dir != SOPT_SET) {
1892 sopt->sopt_dir = SOPT_SET;
1893 }
1894
1c79356b 1895 kp = sotokextcb(so);
9bccf70c
A
1896 while (kp) {
1897 if (kp->e_soif && kp->e_soif->sf_socontrol) {
1898 error = (*kp->e_soif->sf_socontrol)(so, sopt, kp);
1c79356b
A
1899 if (error)
1900 return((error == EJUSTRETURN) ? 0 : error);
1901 }
1902 kp = kp->e_next;
1903 }
1904
1905 error = 0;
1906 if (sopt->sopt_level != SOL_SOCKET) {
1907 if (so->so_proto && so->so_proto->pr_ctloutput)
1908 return ((*so->so_proto->pr_ctloutput)
1909 (so, sopt));
1910 error = ENOPROTOOPT;
1911 } else {
1912 switch (sopt->sopt_name) {
1913 case SO_LINGER:
1914 error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
1915 if (error)
1916 goto bad;
1917
1918 so->so_linger = l.l_linger;
1919 if (l.l_onoff)
1920 so->so_options |= SO_LINGER;
1921 else
1922 so->so_options &= ~SO_LINGER;
1923 break;
1924
1925 case SO_DEBUG:
1926 case SO_KEEPALIVE:
1927 case SO_DONTROUTE:
1928 case SO_USELOOPBACK:
1929 case SO_BROADCAST:
1930 case SO_REUSEADDR:
1931 case SO_REUSEPORT:
1932 case SO_OOBINLINE:
1933 case SO_TIMESTAMP:
9bccf70c 1934#ifdef __APPLE__
1c79356b
A
1935 case SO_DONTTRUNC:
1936 case SO_WANTMORE:
9bccf70c
A
1937 case SO_WANTOOBFLAG:
1938#endif
1c79356b
A
1939 error = sooptcopyin(sopt, &optval, sizeof optval,
1940 sizeof optval);
1941 if (error)
1942 goto bad;
1943 if (optval)
1944 so->so_options |= sopt->sopt_name;
1945 else
1946 so->so_options &= ~sopt->sopt_name;
1947 break;
1948
1949 case SO_SNDBUF:
1950 case SO_RCVBUF:
1951 case SO_SNDLOWAT:
1952 case SO_RCVLOWAT:
1953 error = sooptcopyin(sopt, &optval, sizeof optval,
1954 sizeof optval);
1955 if (error)
1956 goto bad;
1957
1958 /*
1959 * Values < 1 make no sense for any of these
1960 * options, so disallow them.
1961 */
1962 if (optval < 1) {
1963 error = EINVAL;
1964 goto bad;
1965 }
1966
1967 switch (sopt->sopt_name) {
1968 case SO_SNDBUF:
1969 case SO_RCVBUF:
1970 if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
1971 &so->so_snd : &so->so_rcv,
1972 (u_long) optval) == 0) {
1973 error = ENOBUFS;
1974 goto bad;
1975 }
1976 break;
1977
1978 /*
1979 * Make sure the low-water is never greater than
1980 * the high-water.
1981 */
1982 case SO_SNDLOWAT:
1983 so->so_snd.sb_lowat =
1984 (optval > so->so_snd.sb_hiwat) ?
1985 so->so_snd.sb_hiwat : optval;
1986 break;
1987 case SO_RCVLOWAT:
1988 so->so_rcv.sb_lowat =
1989 (optval > so->so_rcv.sb_hiwat) ?
1990 so->so_rcv.sb_hiwat : optval;
1991 break;
1992 }
1993 break;
1994
1995 case SO_SNDTIMEO:
1996 case SO_RCVTIMEO:
1997 error = sooptcopyin(sopt, &tv, sizeof tv,
1998 sizeof tv);
1999 if (error)
2000 goto bad;
2001
9bccf70c
A
2002 /* assert(hz > 0); */
2003 if (tv.tv_sec < 0 || tv.tv_sec > SHRT_MAX / hz ||
2004 tv.tv_usec < 0 || tv.tv_usec >= 1000000) {
2005 error = EDOM;
2006 goto bad;
2007 }
2008 /* assert(tick > 0); */
2009 /* assert(ULONG_MAX - SHRT_MAX >= 1000000); */
2010 {
2011 long tmp = (u_long)(tv.tv_sec * hz) + tv.tv_usec / tick;
2012 if (tmp > SHRT_MAX) {
1c79356b
A
2013 error = EDOM;
2014 goto bad;
2015 }
9bccf70c
A
2016 val = tmp;
2017 }
1c79356b
A
2018
2019 switch (sopt->sopt_name) {
2020 case SO_SNDTIMEO:
2021 so->so_snd.sb_timeo = val;
2022 break;
2023 case SO_RCVTIMEO:
2024 so->so_rcv.sb_timeo = val;
2025 break;
2026 }
2027 break;
2028
2029 case SO_NKE:
9bccf70c
A
2030 {
2031 struct so_nke nke;
1c79356b
A
2032 struct NFDescriptor *nf1, *nf2 = NULL;
2033
9bccf70c
A
2034 error = sooptcopyin(sopt, &nke,
2035 sizeof nke, sizeof nke);
1c79356b
A
2036 if (error)
2037 goto bad;
2038
2039 error = nke_insert(so, &nke);
2040 break;
2041 }
2042
9bccf70c
A
2043 case SO_NOSIGPIPE:
2044 error = sooptcopyin(sopt, &optval, sizeof optval,
2045 sizeof optval);
2046 if (error)
2047 goto bad;
2048 if (optval)
2049 so->so_flags |= SOF_NOSIGPIPE;
2050 else
2051 so->so_flags &= ~SOF_NOSIGPIPE;
2052
2053 break;
2054
55e303ae
A
2055 case SO_NOADDRERR:
2056 error = sooptcopyin(sopt, &optval, sizeof optval,
2057 sizeof optval);
2058 if (error)
2059 goto bad;
2060 if (optval)
2061 so->so_flags |= SOF_NOADDRAVAIL;
2062 else
2063 so->so_flags &= ~SOF_NOADDRAVAIL;
2064
2065 break;
2066
1c79356b
A
2067 default:
2068 error = ENOPROTOOPT;
2069 break;
2070 }
2071 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
2072 (void) ((*so->so_proto->pr_ctloutput)
2073 (so, sopt));
2074 }
2075 }
2076bad:
2077 return (error);
2078}
2079
2080/* Helper routine for getsockopt */
2081int
2082sooptcopyout(sopt, buf, len)
2083 struct sockopt *sopt;
2084 void *buf;
2085 size_t len;
2086{
2087 int error;
2088 size_t valsize;
2089
2090 error = 0;
2091
2092 /*
2093 * Documented get behavior is that we always return a value,
2094 * possibly truncated to fit in the user's buffer.
2095 * Traditional behavior is that we always tell the user
2096 * precisely how much we copied, rather than something useful
2097 * like the total amount we had available for her.
2098 * Note that this interface is not idempotent; the entire answer must
2099 * generated ahead of time.
2100 */
2101 valsize = min(len, sopt->sopt_valsize);
2102 sopt->sopt_valsize = valsize;
2103 if (sopt->sopt_val != 0) {
2104 if (sopt->sopt_p != 0)
2105 error = copyout(buf, sopt->sopt_val, valsize);
2106 else
2107 bcopy(buf, sopt->sopt_val, valsize);
2108 }
2109 return error;
2110}
2111
2112int
2113sogetopt(so, sopt)
2114 struct socket *so;
2115 struct sockopt *sopt;
2116{
2117 int error, optval;
2118 struct linger l;
2119 struct timeval tv;
2120 struct mbuf *m;
2121 struct kextcb *kp;
2122
9bccf70c
A
2123 if (sopt->sopt_dir != SOPT_GET) {
2124 sopt->sopt_dir = SOPT_GET;
2125 }
2126
1c79356b 2127 kp = sotokextcb(so);
9bccf70c
A
2128 while (kp) {
2129 if (kp->e_soif && kp->e_soif->sf_socontrol) {
2130 error = (*kp->e_soif->sf_socontrol)(so, sopt, kp);
1c79356b
A
2131 if (error)
2132 return((error == EJUSTRETURN) ? 0 : error);
2133 }
2134 kp = kp->e_next;
2135 }
2136
2137 error = 0;
2138 if (sopt->sopt_level != SOL_SOCKET) {
2139 if (so->so_proto && so->so_proto->pr_ctloutput) {
2140 return ((*so->so_proto->pr_ctloutput)
2141 (so, sopt));
2142 } else
2143 return (ENOPROTOOPT);
2144 } else {
2145 switch (sopt->sopt_name) {
2146 case SO_LINGER:
2147 l.l_onoff = so->so_options & SO_LINGER;
2148 l.l_linger = so->so_linger;
2149 error = sooptcopyout(sopt, &l, sizeof l);
2150 break;
2151
2152 case SO_USELOOPBACK:
2153 case SO_DONTROUTE:
2154 case SO_DEBUG:
2155 case SO_KEEPALIVE:
2156 case SO_REUSEADDR:
2157 case SO_REUSEPORT:
2158 case SO_BROADCAST:
2159 case SO_OOBINLINE:
2160 case SO_TIMESTAMP:
9bccf70c 2161#ifdef __APPLE__
1c79356b
A
2162 case SO_DONTTRUNC:
2163 case SO_WANTMORE:
9bccf70c
A
2164 case SO_WANTOOBFLAG:
2165#endif
1c79356b
A
2166 optval = so->so_options & sopt->sopt_name;
2167integer:
2168 error = sooptcopyout(sopt, &optval, sizeof optval);
2169 break;
2170
2171 case SO_TYPE:
2172 optval = so->so_type;
2173 goto integer;
2174
9bccf70c 2175#ifdef __APPLE__
1c79356b 2176 case SO_NREAD:
9bccf70c
A
2177 {
2178 int pkt_total;
1c79356b
A
2179 struct mbuf *m1;
2180
2181 pkt_total = 0;
2182 m1 = so->so_rcv.sb_mb;
2183 if (so->so_proto->pr_flags & PR_ATOMIC)
2184 {
2185#if 0
2186 kprintf("SKT CC: %d\n", so->so_rcv.sb_cc);
2187#endif
9bccf70c
A
2188 while (m1) {
2189 if (m1->m_type == MT_DATA)
1c79356b
A
2190 pkt_total += m1->m_len;
2191#if 0
2192 kprintf("CNT: %d/%d\n", m1->m_len, pkt_total);
2193#endif
2194 m1 = m1->m_next;
2195 }
2196 optval = pkt_total;
2197 } else
2198 optval = so->so_rcv.sb_cc;
2199#if 0
2200 kprintf("RTN: %d\n", optval);
2201#endif
2202 goto integer;
2203 }
9bccf70c 2204#endif
1c79356b
A
2205 case SO_ERROR:
2206 optval = so->so_error;
2207 so->so_error = 0;
2208 goto integer;
2209
2210 case SO_SNDBUF:
2211 optval = so->so_snd.sb_hiwat;
2212 goto integer;
2213
2214 case SO_RCVBUF:
2215 optval = so->so_rcv.sb_hiwat;
2216 goto integer;
2217
2218 case SO_SNDLOWAT:
2219 optval = so->so_snd.sb_lowat;
2220 goto integer;
2221
2222 case SO_RCVLOWAT:
2223 optval = so->so_rcv.sb_lowat;
2224 goto integer;
2225
2226 case SO_SNDTIMEO:
2227 case SO_RCVTIMEO:
2228 optval = (sopt->sopt_name == SO_SNDTIMEO ?
2229 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
2230
2231 tv.tv_sec = optval / hz;
2232 tv.tv_usec = (optval % hz) * tick;
2233 error = sooptcopyout(sopt, &tv, sizeof tv);
2234 break;
2235
9bccf70c
A
2236 case SO_NOSIGPIPE:
2237 optval = (so->so_flags & SOF_NOSIGPIPE);
2238 goto integer;
2239
55e303ae
A
2240 case SO_NOADDRERR:
2241 optval = (so->so_flags & SOF_NOADDRAVAIL);
2242 goto integer;
2243
1c79356b
A
2244 default:
2245 error = ENOPROTOOPT;
2246 break;
2247 }
2248 return (error);
2249 }
2250}
2251
9bccf70c 2252#ifdef __APPLE__
1c79356b
A
2253/*
2254 * Network filter support
2255 */
2256/* Run the list of filters, creating extension control blocks */
2257sfilter_init(register struct socket *so)
2258{ struct kextcb *kp, **kpp;
2259 struct protosw *prp;
2260 struct NFDescriptor *nfp;
2261
2262 prp = so->so_proto;
2263 nfp = prp->pr_sfilter.tqh_first; /* non-null */
2264 kpp = &so->so_ext;
2265 kp = NULL;
2266 while (nfp)
2267 { MALLOC(kp, struct kextcb *, sizeof(*kp),
2268 M_TEMP, M_WAITOK);
2269 if (kp == NULL)
2270 return(ENOBUFS); /* so_free will clean up */
2271 *kpp = kp;
2272 kpp = &kp->e_next;
2273 kp->e_next = NULL;
2274 kp->e_fcb = NULL;
2275 kp->e_nfd = nfp;
2276 kp->e_soif = nfp->nf_soif;
2277 kp->e_sout = nfp->nf_soutil;
2278 /*
2279 * Ignore return value for create
2280 * Everyone gets a chance at startup
2281 */
2282 if (kp->e_soif && kp->e_soif->sf_socreate)
2283 (*kp->e_soif->sf_socreate)(so, prp, kp);
2284 nfp = nfp->nf_next.tqe_next;
2285 }
2286 return(0);
2287}
2288
1c79356b
A
2289/*
2290 * Run the list of filters, freeing extension control blocks
2291 * Assumes the soif/soutil blocks have been handled.
2292 */
2293sfilter_term(struct socket *so)
2294{ struct kextcb *kp, *kp1;
2295
2296 kp = so->so_ext;
2297 while (kp)
2298 { kp1 = kp->e_next;
2299 /*
2300 * Ignore return code on termination; everyone must
2301 * get terminated.
2302 */
2303 if (kp->e_soif && kp->e_soif->sf_sofree)
2304 kp->e_soif->sf_sofree(so, kp);
2305 FREE(kp, M_TEMP);
2306 kp = kp1;
2307 }
2308 return(0);
2309}
9bccf70c 2310#endif __APPLE__
1c79356b 2311
9bccf70c 2312/* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
1c79356b 2313int
9bccf70c 2314soopt_getm(struct sockopt *sopt, struct mbuf **mp)
1c79356b
A
2315{
2316 struct mbuf *m, *m_prev;
2317 int sopt_size = sopt->sopt_valsize;
2318
2319 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2320 if (m == 0)
2321 return ENOBUFS;
2322 if (sopt_size > MLEN) {
2323 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2324 if ((m->m_flags & M_EXT) == 0) {
2325 m_free(m);
2326 return ENOBUFS;
2327 }
2328 m->m_len = min(MCLBYTES, sopt_size);
2329 } else {
2330 m->m_len = min(MLEN, sopt_size);
2331 }
2332 sopt_size -= m->m_len;
2333 *mp = m;
2334 m_prev = m;
2335
2336 while (sopt_size) {
2337 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2338 if (m == 0) {
2339 m_freem(*mp);
2340 return ENOBUFS;
2341 }
2342 if (sopt_size > MLEN) {
2343 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2344 if ((m->m_flags & M_EXT) == 0) {
2345 m_freem(*mp);
2346 return ENOBUFS;
2347 }
2348 m->m_len = min(MCLBYTES, sopt_size);
2349 } else {
2350 m->m_len = min(MLEN, sopt_size);
2351 }
2352 sopt_size -= m->m_len;
2353 m_prev->m_next = m;
2354 m_prev = m;
2355 }
2356 return 0;
2357}
2358
2359/* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
2360int
9bccf70c 2361soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
1c79356b
A
2362{
2363 struct mbuf *m0 = m;
2364
2365 if (sopt->sopt_val == NULL)
2366 return 0;
2367 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2368 if (sopt->sopt_p != NULL) {
2369 int error;
2370
2371 error = copyin(sopt->sopt_val, mtod(m, char *),
2372 m->m_len);
2373 if (error != 0) {
2374 m_freem(m0);
2375 return(error);
2376 }
2377 } else
2378 bcopy(sopt->sopt_val, mtod(m, char *), m->m_len);
2379 sopt->sopt_valsize -= m->m_len;
2380 (caddr_t)sopt->sopt_val += m->m_len;
2381 m = m->m_next;
2382 }
2383 if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
9bccf70c 2384 panic("soopt_mcopyin");
1c79356b
A
2385 return 0;
2386}
2387
2388/* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
2389int
9bccf70c 2390soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
1c79356b
A
2391{
2392 struct mbuf *m0 = m;
2393 size_t valsize = 0;
2394
2395 if (sopt->sopt_val == NULL)
2396 return 0;
2397 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2398 if (sopt->sopt_p != NULL) {
2399 int error;
2400
2401 error = copyout(mtod(m, char *), sopt->sopt_val,
2402 m->m_len);
2403 if (error != 0) {
2404 m_freem(m0);
2405 return(error);
2406 }
2407 } else
2408 bcopy(mtod(m, char *), sopt->sopt_val, m->m_len);
2409 sopt->sopt_valsize -= m->m_len;
2410 (caddr_t)sopt->sopt_val += m->m_len;
2411 valsize += m->m_len;
2412 m = m->m_next;
2413 }
2414 if (m != NULL) {
2415 /* enough soopt buffer should be given from user-land */
2416 m_freem(m0);
2417 return(EINVAL);
2418 }
2419 sopt->sopt_valsize = valsize;
2420 return 0;
2421}
2422
9bccf70c
A
2423void
2424sohasoutofband(so)
2425 register struct socket *so;
2426{
2427 struct proc *p;
2428 struct kextcb *kp;
2429
2430 kp = sotokextcb(so);
2431 while (kp) {
2432 if (kp->e_soif && kp->e_soif->sf_sohasoutofband) {
2433 if ((*kp->e_soif->sf_sohasoutofband)(so, kp))
2434 return;
2435 }
2436 kp = kp->e_next;
2437 }
2438 if (so->so_pgid < 0)
2439 gsignal(-so->so_pgid, SIGURG);
2440 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
2441 psignal(p, SIGURG);
2442 selwakeup(&so->so_rcv.sb_sel);
2443}
2444
2445int
2446sopoll(struct socket *so, int events, struct ucred *cred, void * wql)
2447{
2448 struct proc *p = current_proc();
2449 int revents = 0;
2450 int s = splnet();
2451
2452 if (events & (POLLIN | POLLRDNORM))
2453 if (soreadable(so))
2454 revents |= events & (POLLIN | POLLRDNORM);
2455
2456 if (events & (POLLOUT | POLLWRNORM))
2457 if (sowriteable(so))
2458 revents |= events & (POLLOUT | POLLWRNORM);
2459
2460 if (events & (POLLPRI | POLLRDBAND))
2461 if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
2462 revents |= events & (POLLPRI | POLLRDBAND);
2463
2464 if (revents == 0) {
2465 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
2466 /* Darwin sets the flag first, BSD calls selrecord first */
2467 so->so_rcv.sb_flags |= SB_SEL;
2468 selrecord(p, &so->so_rcv.sb_sel, wql);
2469 }
2470
2471 if (events & (POLLOUT | POLLWRNORM)) {
2472 /* Darwin sets the flag first, BSD calls selrecord first */
2473 so->so_snd.sb_flags |= SB_SEL;
2474 selrecord(p, &so->so_snd.sb_sel, wql);
2475 }
2476 }
2477
2478 splx(s);
2479 return (revents);
2480}
55e303ae
A
2481
2482
2483int
2484soo_kqfilter(struct file *fp, struct knote *kn, struct proc *p)
2485{
2486 struct socket *so = (struct socket *)kn->kn_fp->f_data;
2487 struct sockbuf *sb;
2488 int s;
2489
2490 switch (kn->kn_filter) {
2491 case EVFILT_READ:
2492 if (so->so_options & SO_ACCEPTCONN)
2493 kn->kn_fop = &solisten_filtops;
2494 else
2495 kn->kn_fop = &soread_filtops;
2496 sb = &so->so_rcv;
2497 break;
2498 case EVFILT_WRITE:
2499 kn->kn_fop = &sowrite_filtops;
2500 sb = &so->so_snd;
2501 break;
2502 default:
2503 return (1);
2504 }
2505
2506 if (sb->sb_sel.si_flags & SI_INITED)
2507 return (1);
2508
2509 s = splnet();
2510 if (KNOTE_ATTACH(&sb->sb_sel.si_note, kn))
2511 sb->sb_flags |= SB_KNOTE;
2512 splx(s);
2513 return (0);
2514}
2515
2516static void
2517filt_sordetach(struct knote *kn)
2518{
2519 struct socket *so = (struct socket *)kn->kn_fp->f_data;
2520 int s = splnet();
2521
2522 if (so->so_rcv.sb_flags & SB_KNOTE &&
2523 !(so->so_rcv.sb_sel.si_flags & SI_INITED))
2524 if (KNOTE_DETACH(&so->so_rcv.sb_sel.si_note, kn))
2525 so->so_rcv.sb_flags &= ~SB_KNOTE;
2526 splx(s);
2527}
2528
2529/*ARGSUSED*/
2530static int
2531filt_soread(struct knote *kn, long hint)
2532{
2533 struct socket *so = (struct socket *)kn->kn_fp->f_data;
2534
2535 kn->kn_data = so->so_rcv.sb_cc;
2536 if (so->so_state & SS_CANTRCVMORE) {
2537 kn->kn_flags |= EV_EOF;
2538 kn->kn_fflags = so->so_error;
2539 return (1);
2540 }
2541 if (so->so_error) /* temporary udp error */
2542 return (1);
2543 if (kn->kn_sfflags & NOTE_LOWAT)
2544 return (kn->kn_data >= kn->kn_sdata);
2545 return (kn->kn_data >= so->so_rcv.sb_lowat);
2546}
2547
2548static void
2549filt_sowdetach(struct knote *kn)
2550{
2551 struct socket *so = (struct socket *)kn->kn_fp->f_data;
2552 int s = splnet();
2553
2554 if(so->so_snd.sb_flags & SB_KNOTE &&
2555 !(so->so_snd.sb_sel.si_flags & SI_INITED))
2556 if (KNOTE_DETACH(&so->so_snd.sb_sel.si_note, kn))
2557 so->so_snd.sb_flags &= ~SB_KNOTE;
2558 splx(s);
2559}
2560
2561/*ARGSUSED*/
2562static int
2563filt_sowrite(struct knote *kn, long hint)
2564{
2565 struct socket *so = (struct socket *)kn->kn_fp->f_data;
2566
2567 kn->kn_data = sbspace(&so->so_snd);
2568 if (so->so_state & SS_CANTSENDMORE) {
2569 kn->kn_flags |= EV_EOF;
2570 kn->kn_fflags = so->so_error;
2571 return (1);
2572 }
2573 if (so->so_error) /* temporary udp error */
2574 return (1);
2575 if (((so->so_state & SS_ISCONNECTED) == 0) &&
2576 (so->so_proto->pr_flags & PR_CONNREQUIRED))
2577 return (0);
2578 if (kn->kn_sfflags & NOTE_LOWAT)
2579 return (kn->kn_data >= kn->kn_sdata);
2580 return (kn->kn_data >= so->so_snd.sb_lowat);
2581}
2582
2583/*ARGSUSED*/
2584static int
2585filt_solisten(struct knote *kn, long hint)
2586{
2587 struct socket *so = (struct socket *)kn->kn_fp->f_data;
2588
2589 kn->kn_data = so->so_qlen;
2590 return (! TAILQ_EMPTY(&so->so_comp));
2591}
2592