]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet/tcp_usrreq.c
xnu-517.tar.gz
[apple/xnu.git] / bsd / netinet / tcp_usrreq.c
CommitLineData
1c79356b
A
1/*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
43866e37 6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
1c79356b 7 *
43866e37
A
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
43866e37
A
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
1c79356b
A
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25/*
26 * Copyright (c) 1982, 1986, 1988, 1993
27 * The Regents of the University of California. All rights reserved.
28 *
29 * Redistribution and use in source and binary forms, with or without
30 * modification, are permitted provided that the following conditions
31 * are met:
32 * 1. Redistributions of source code must retain the above copyright
33 * notice, this list of conditions and the following disclaimer.
34 * 2. Redistributions in binary form must reproduce the above copyright
35 * notice, this list of conditions and the following disclaimer in the
36 * documentation and/or other materials provided with the distribution.
37 * 3. All advertising materials mentioning features or use of this software
38 * must display the following acknowledgement:
39 * This product includes software developed by the University of
40 * California, Berkeley and its contributors.
41 * 4. Neither the name of the University nor the names of its contributors
42 * may be used to endorse or promote products derived from this software
43 * without specific prior written permission.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE.
56 *
57 * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94
9bccf70c 58 * $FreeBSD: src/sys/netinet/tcp_usrreq.c,v 1.51.2.9 2001/08/22 00:59:12 silby Exp $
1c79356b
A
59 */
60
1c79356b
A
61
62#include <sys/param.h>
63#include <sys/systm.h>
64#include <sys/kernel.h>
65#include <sys/sysctl.h>
66#include <sys/mbuf.h>
67#if INET6
68#include <sys/domain.h>
69#endif /* INET6 */
70#include <sys/socket.h>
71#include <sys/socketvar.h>
72#include <sys/protosw.h>
73
74#include <net/if.h>
75#include <net/route.h>
76
77#include <netinet/in.h>
78#include <netinet/in_systm.h>
9bccf70c
A
79#if INET6
80#include <netinet/ip6.h>
81#endif
1c79356b 82#include <netinet/in_pcb.h>
9bccf70c
A
83#if INET6
84#include <netinet6/in6_pcb.h>
85#endif
1c79356b
A
86#include <netinet/in_var.h>
87#include <netinet/ip_var.h>
88#if INET6
1c79356b
A
89#include <netinet6/ip6_var.h>
90#endif
91#include <netinet/tcp.h>
92#include <netinet/tcp_fsm.h>
93#include <netinet/tcp_seq.h>
94#include <netinet/tcp_timer.h>
95#include <netinet/tcp_var.h>
96#include <netinet/tcpip.h>
97#if TCPDEBUG
98#include <netinet/tcp_debug.h>
99#endif
100
101#if IPSEC
102#include <netinet6/ipsec.h>
103#endif /*IPSEC*/
104
105/*
106 * TCP protocol interface to socket abstraction.
107 */
108extern char *tcpstates[]; /* XXX ??? */
109
110static int tcp_attach __P((struct socket *, struct proc *));
111static int tcp_connect __P((struct tcpcb *, struct sockaddr *,
112 struct proc *));
113#if INET6
114static int tcp6_connect __P((struct tcpcb *, struct sockaddr *,
115 struct proc *));
116#endif /* INET6 */
9bccf70c
A
117static struct tcpcb *
118 tcp_disconnect __P((struct tcpcb *));
1c79356b
A
119static struct tcpcb *
120 tcp_usrclosed __P((struct tcpcb *));
121
122#if TCPDEBUG
9bccf70c 123#define TCPDEBUG0 int ostate = 0
1c79356b
A
124#define TCPDEBUG1() ostate = tp ? tp->t_state : 0
125#define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \
9bccf70c 126 tcp_trace(TA_USER, ostate, tp, 0, 0, req)
1c79356b
A
127#else
128#define TCPDEBUG0
129#define TCPDEBUG1()
130#define TCPDEBUG2(req)
131#endif
132
133/*
134 * TCP attaches to socket via pru_attach(), reserving space,
135 * and an internet control block.
136 */
137static int
138tcp_usr_attach(struct socket *so, int proto, struct proc *p)
139{
140 int s = splnet();
141 int error;
142 struct inpcb *inp = sotoinpcb(so);
143 struct tcpcb *tp = 0;
144 TCPDEBUG0;
145
146 TCPDEBUG1();
147 if (inp) {
148 error = EISCONN;
149 goto out;
150 }
151
152 error = tcp_attach(so, p);
153 if (error)
154 goto out;
155
156 if ((so->so_options & SO_LINGER) && so->so_linger == 0)
157 so->so_linger = TCP_LINGERTIME * hz;
158 tp = sototcpcb(so);
159out:
160 TCPDEBUG2(PRU_ATTACH);
161 splx(s);
162 return error;
163}
164
165/*
166 * pru_detach() detaches the TCP protocol from the socket.
167 * If the protocol state is non-embryonic, then can't
168 * do this directly: have to initiate a pru_disconnect(),
169 * which may finish later; embryonic TCB's can just
170 * be discarded here.
171 */
172static int
173tcp_usr_detach(struct socket *so)
174{
175 int s = splnet();
176 int error = 0;
177 struct inpcb *inp = sotoinpcb(so);
178 struct tcpcb *tp;
179 TCPDEBUG0;
180
181 if (inp == 0) {
182 splx(s);
183 return EINVAL; /* XXX */
184 }
185 tp = intotcpcb(inp);
186 /* In case we got disconnected from the peer */
187 if (tp == 0)
188 goto out;
189 TCPDEBUG1();
190 tp = tcp_disconnect(tp);
191out:
192 TCPDEBUG2(PRU_DETACH);
193 splx(s);
194 return error;
195}
196
197#define COMMON_START() TCPDEBUG0; \
198 do { \
199 if (inp == 0) { \
200 splx(s); \
201 return EINVAL; \
202 } \
203 tp = intotcpcb(inp); \
204 TCPDEBUG1(); \
205 } while(0)
206
207#define COMMON_END(req) out: TCPDEBUG2(req); splx(s); return error; goto out
208
209
210/*
211 * Give the socket an address.
212 */
213static int
214tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
215{
216 int s = splnet();
217 int error = 0;
218 struct inpcb *inp = sotoinpcb(so);
219 struct tcpcb *tp;
220 struct sockaddr_in *sinp;
221
222 COMMON_START();
223
224 /*
225 * Must check for multicast addresses and disallow binding
226 * to them.
227 */
228 sinp = (struct sockaddr_in *)nam;
229 if (sinp->sin_family == AF_INET &&
230 IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
231 error = EAFNOSUPPORT;
232 goto out;
233 }
234 error = in_pcbbind(inp, nam, p);
235 if (error)
236 goto out;
237 COMMON_END(PRU_BIND);
238
239}
240
241#if INET6
242static int
243tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
244{
245 int s = splnet();
246 int error = 0;
247 struct inpcb *inp = sotoinpcb(so);
248 struct tcpcb *tp;
249 struct sockaddr_in6 *sin6p;
250
251 COMMON_START();
252
253 /*
254 * Must check for multicast addresses and disallow binding
255 * to them.
256 */
257 sin6p = (struct sockaddr_in6 *)nam;
258 if (sin6p->sin6_family == AF_INET6 &&
259 IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) {
260 error = EAFNOSUPPORT;
261 goto out;
262 }
263 inp->inp_vflag &= ~INP_IPV4;
264 inp->inp_vflag |= INP_IPV6;
55e303ae 265 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
1c79356b
A
266 if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr))
267 inp->inp_vflag |= INP_IPV4;
268 else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
269 struct sockaddr_in sin;
270
271 in6_sin6_2_sin(&sin, sin6p);
272 inp->inp_vflag |= INP_IPV4;
273 inp->inp_vflag &= ~INP_IPV6;
274 error = in_pcbbind(inp, (struct sockaddr *)&sin, p);
275 goto out;
276 }
277 }
278 error = in6_pcbbind(inp, nam, p);
9bccf70c
A
279 if (error)
280 goto out;
1c79356b
A
281 COMMON_END(PRU_BIND);
282}
283#endif /* INET6 */
284
285/*
286 * Prepare to accept connections.
287 */
288static int
289tcp_usr_listen(struct socket *so, struct proc *p)
290{
291 int s = splnet();
292 int error = 0;
293 struct inpcb *inp = sotoinpcb(so);
294 struct tcpcb *tp;
295
296 COMMON_START();
297 if (inp->inp_lport == 0)
298 error = in_pcbbind(inp, (struct sockaddr *)0, p);
299 if (error == 0)
300 tp->t_state = TCPS_LISTEN;
301 COMMON_END(PRU_LISTEN);
302}
303
304#if INET6
305static int
306tcp6_usr_listen(struct socket *so, struct proc *p)
307{
308 int s = splnet();
309 int error = 0;
310 struct inpcb *inp = sotoinpcb(so);
311 struct tcpcb *tp;
312
313 COMMON_START();
314 if (inp->inp_lport == 0) {
315 inp->inp_vflag &= ~INP_IPV4;
55e303ae 316 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
1c79356b
A
317 inp->inp_vflag |= INP_IPV4;
318 error = in6_pcbbind(inp, (struct sockaddr *)0, p);
319 }
320 if (error == 0)
321 tp->t_state = TCPS_LISTEN;
322 COMMON_END(PRU_LISTEN);
323}
324#endif /* INET6 */
325
326/*
327 * Initiate connection to peer.
328 * Create a template for use in transmissions on this connection.
329 * Enter SYN_SENT state, and mark socket as connecting.
330 * Start keep-alive timer, and seed output sequence space.
331 * Send initial segment on connection.
332 */
333static int
334tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
335{
336 int s = splnet();
337 int error = 0;
338 struct inpcb *inp = sotoinpcb(so);
339 struct tcpcb *tp;
340 struct sockaddr_in *sinp;
341
342 COMMON_START();
343
344 /*
345 * Must disallow TCP ``connections'' to multicast addresses.
346 */
347 sinp = (struct sockaddr_in *)nam;
348 if (sinp->sin_family == AF_INET
349 && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
350 error = EAFNOSUPPORT;
351 goto out;
352 }
353
9bccf70c
A
354#ifndef __APPLE__
355 prison_remote_ip(p, 0, &sinp->sin_addr.s_addr);
356#endif
357
1c79356b
A
358 if ((error = tcp_connect(tp, nam, p)) != 0)
359 goto out;
360 error = tcp_output(tp);
361 COMMON_END(PRU_CONNECT);
362}
363
364#if INET6
365static int
366tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
367{
368 int s = splnet();
369 int error = 0;
370 struct inpcb *inp = sotoinpcb(so);
371 struct tcpcb *tp;
372 struct sockaddr_in6 *sin6p;
373
374 COMMON_START();
375
376 /*
377 * Must disallow TCP ``connections'' to multicast addresses.
378 */
379 sin6p = (struct sockaddr_in6 *)nam;
380 if (sin6p->sin6_family == AF_INET6
381 && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) {
382 error = EAFNOSUPPORT;
383 goto out;
384 }
9bccf70c
A
385
386 if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
1c79356b
A
387 struct sockaddr_in sin;
388
55e303ae
A
389 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)
390 return (EINVAL);
9bccf70c 391
1c79356b
A
392 in6_sin6_2_sin(&sin, sin6p);
393 inp->inp_vflag |= INP_IPV4;
394 inp->inp_vflag &= ~INP_IPV6;
395 if ((error = tcp_connect(tp, (struct sockaddr *)&sin, p)) != 0)
396 goto out;
397 error = tcp_output(tp);
398 goto out;
399 }
9bccf70c
A
400 inp->inp_vflag &= ~INP_IPV4;
401 inp->inp_vflag |= INP_IPV6;
1c79356b
A
402 if ((error = tcp6_connect(tp, nam, p)) != 0)
403 goto out;
404 error = tcp_output(tp);
405 if (error)
406 goto out;
1c79356b
A
407 COMMON_END(PRU_CONNECT);
408}
409#endif /* INET6 */
410
411/*
412 * Initiate disconnect from peer.
413 * If connection never passed embryonic stage, just drop;
414 * else if don't need to let data drain, then can just drop anyways,
415 * else have to begin TCP shutdown process: mark socket disconnecting,
416 * drain unread data, state switch to reflect user close, and
417 * send segment (e.g. FIN) to peer. Socket will be really disconnected
418 * when peer sends FIN and acks ours.
419 *
420 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
421 */
422static int
423tcp_usr_disconnect(struct socket *so)
424{
425 int s = splnet();
426 int error = 0;
427 struct inpcb *inp = sotoinpcb(so);
428 struct tcpcb *tp;
429
430 COMMON_START();
431 /* In case we got disconnected from the peer */
432 if (tp == 0)
433 goto out;
434 tp = tcp_disconnect(tp);
435 COMMON_END(PRU_DISCONNECT);
436}
437
438/*
439 * Accept a connection. Essentially all the work is
440 * done at higher levels; just return the address
441 * of the peer, storing through addr.
442 */
443static int
444tcp_usr_accept(struct socket *so, struct sockaddr **nam)
445{
446 int s = splnet();
447 int error = 0;
448 struct inpcb *inp = sotoinpcb(so);
9bccf70c
A
449 struct tcpcb *tp = NULL;
450 TCPDEBUG0;
1c79356b 451
9bccf70c
A
452 if (so->so_state & SS_ISDISCONNECTED) {
453 error = ECONNABORTED;
454 goto out;
455 }
456 if (inp == 0) {
457 splx(s);
458 return (EINVAL);
459 }
460 tp = intotcpcb(inp);
461 TCPDEBUG1();
1c79356b
A
462 in_setpeeraddr(so, nam);
463 COMMON_END(PRU_ACCEPT);
464}
465
466#if INET6
467static int
468tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
469{
470 int s = splnet();
471 int error = 0;
472 struct inpcb *inp = sotoinpcb(so);
9bccf70c
A
473 struct tcpcb *tp = NULL;
474 TCPDEBUG0;
1c79356b 475
9bccf70c
A
476 if (so->so_state & SS_ISDISCONNECTED) {
477 error = ECONNABORTED;
478 goto out;
479 }
480 if (inp == 0) {
481 splx(s);
482 return (EINVAL);
483 }
484 tp = intotcpcb(inp);
485 TCPDEBUG1();
1c79356b
A
486 in6_mapped_peeraddr(so, nam);
487 COMMON_END(PRU_ACCEPT);
488}
489#endif /* INET6 */
1c79356b
A
490/*
491 * Mark the connection as being incapable of further output.
492 */
493static int
494tcp_usr_shutdown(struct socket *so)
495{
496 int s = splnet();
497 int error = 0;
498 struct inpcb *inp = sotoinpcb(so);
499 struct tcpcb *tp;
500
501 COMMON_START();
502 socantsendmore(so);
503 /* In case we got disconnected from the peer */
504 if (tp == 0)
505 goto out;
506 tp = tcp_usrclosed(tp);
507 if (tp)
508 error = tcp_output(tp);
509 COMMON_END(PRU_SHUTDOWN);
510}
511
512/*
513 * After a receive, possibly send window update to peer.
514 */
515static int
516tcp_usr_rcvd(struct socket *so, int flags)
517{
518 int s = splnet();
519 int error = 0;
520 struct inpcb *inp = sotoinpcb(so);
521 struct tcpcb *tp;
522
523 COMMON_START();
524 /* In case we got disconnected from the peer */
525 if (tp == 0)
526 goto out;
527 tcp_output(tp);
528 COMMON_END(PRU_RCVD);
529}
530
531/*
532 * Do a send by putting data in output queue and updating urgent
9bccf70c
A
533 * marker if URG set. Possibly send more data. Unlike the other
534 * pru_*() routines, the mbuf chains are our responsibility. We
535 * must either enqueue them or free them. The other pru_* routines
536 * generally are caller-frees.
1c79356b
A
537 */
538static int
9bccf70c 539tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
1c79356b
A
540 struct sockaddr *nam, struct mbuf *control, struct proc *p)
541{
542 int s = splnet();
543 int error = 0;
544 struct inpcb *inp = sotoinpcb(so);
545 struct tcpcb *tp;
546#if INET6
547 int isipv6;
9bccf70c
A
548#endif
549 TCPDEBUG0;
1c79356b 550
9bccf70c
A
551 if (inp == NULL) {
552 /*
553 * OOPS! we lost a race, the TCP session got reset after
554 * we checked SS_CANTSENDMORE, eg: while doing uiomove or a
555 * network interrupt in the non-splnet() section of sosend().
556 */
1c79356b
A
557 if (m)
558 m_freem(m);
9bccf70c
A
559 if (control)
560 m_freem(control);
561 error = ECONNRESET; /* XXX EPIPE? */
562 tp = NULL;
563 TCPDEBUG1();
1c79356b
A
564 goto out;
565 }
1c79356b
A
566#if INET6
567 isipv6 = nam && nam->sa_family == AF_INET6;
568#endif /* INET6 */
9bccf70c
A
569 tp = intotcpcb(inp);
570 TCPDEBUG1();
571 if (control) {
572 /* TCP doesn't do control messages (rights, creds, etc) */
573 if (control->m_len) {
574 m_freem(control);
575 if (m)
576 m_freem(m);
577 error = EINVAL;
578 goto out;
579 }
580 m_freem(control); /* empty control, just free it */
581 }
1c79356b
A
582 if(!(flags & PRUS_OOB)) {
583 sbappend(&so->so_snd, m);
584 if (nam && tp->t_state < TCPS_SYN_SENT) {
585 /*
586 * Do implied connect if not yet connected,
587 * initialize window to default value, and
588 * initialize maxseg/maxopd using peer's cached
589 * MSS.
590 */
591#if INET6
592 if (isipv6)
593 error = tcp6_connect(tp, nam, p);
594 else
595#endif /* INET6 */
596 error = tcp_connect(tp, nam, p);
597 if (error)
598 goto out;
599 tp->snd_wnd = TTCP_CLIENT_SND_WND;
9bccf70c 600 tcp_mss(tp, -1);
1c79356b
A
601 }
602
603 if (flags & PRUS_EOF) {
604 /*
605 * Close the send side of the connection after
606 * the data is sent.
607 */
608 socantsendmore(so);
609 tp = tcp_usrclosed(tp);
610 }
611 if (tp != NULL) {
612 if (flags & PRUS_MORETOCOME)
613 tp->t_flags |= TF_MORETOCOME;
614 error = tcp_output(tp);
615 if (flags & PRUS_MORETOCOME)
616 tp->t_flags &= ~TF_MORETOCOME;
617 }
618 } else {
619 if (sbspace(&so->so_snd) < -512) {
620 m_freem(m);
621 error = ENOBUFS;
622 goto out;
623 }
624 /*
625 * According to RFC961 (Assigned Protocols),
626 * the urgent pointer points to the last octet
627 * of urgent data. We continue, however,
628 * to consider it to indicate the first octet
629 * of data past the urgent section.
630 * Otherwise, snd_up should be one lower.
631 */
632 sbappend(&so->so_snd, m);
633 if (nam && tp->t_state < TCPS_SYN_SENT) {
634 /*
635 * Do implied connect if not yet connected,
636 * initialize window to default value, and
637 * initialize maxseg/maxopd using peer's cached
638 * MSS.
639 */
640#if INET6
641 if (isipv6)
642 error = tcp6_connect(tp, nam, p);
643 else
644#endif /* INET6 */
645 error = tcp_connect(tp, nam, p);
646 if (error)
647 goto out;
648 tp->snd_wnd = TTCP_CLIENT_SND_WND;
9bccf70c 649 tcp_mss(tp, -1);
1c79356b
A
650 }
651 tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
652 tp->t_force = 1;
653 error = tcp_output(tp);
654 tp->t_force = 0;
655 }
656 COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB :
657 ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
658}
659
660/*
661 * Abort the TCP.
662 */
663static int
664tcp_usr_abort(struct socket *so)
665{
666 int s = splnet();
667 int error = 0;
668 struct inpcb *inp = sotoinpcb(so);
669 struct tcpcb *tp;
670
671 COMMON_START();
672 /* In case we got disconnected from the peer */
673 if (tp == 0)
674 goto out;
675 tp = tcp_drop(tp, ECONNABORTED);
676 COMMON_END(PRU_ABORT);
677}
678
679/*
680 * Receive out-of-band data.
681 */
682static int
683tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
684{
685 int s = splnet();
686 int error = 0;
687 struct inpcb *inp = sotoinpcb(so);
688 struct tcpcb *tp;
689
690 COMMON_START();
691 if ((so->so_oobmark == 0 &&
692 (so->so_state & SS_RCVATMARK) == 0) ||
693 so->so_options & SO_OOBINLINE ||
694 tp->t_oobflags & TCPOOB_HADDATA) {
695 error = EINVAL;
696 goto out;
697 }
698 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
699 error = EWOULDBLOCK;
700 goto out;
701 }
702 m->m_len = 1;
703 *mtod(m, caddr_t) = tp->t_iobc;
704 if ((flags & MSG_PEEK) == 0)
705 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
706 COMMON_END(PRU_RCVOOB);
707}
708
709/* xxx - should be const */
710struct pr_usrreqs tcp_usrreqs = {
711 tcp_usr_abort, tcp_usr_accept, tcp_usr_attach, tcp_usr_bind,
712 tcp_usr_connect, pru_connect2_notsupp, in_control, tcp_usr_detach,
713 tcp_usr_disconnect, tcp_usr_listen, in_setpeeraddr, tcp_usr_rcvd,
714 tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown,
715 in_setsockaddr, sosend, soreceive, sopoll
716};
717
718#if INET6
719struct pr_usrreqs tcp6_usrreqs = {
720 tcp_usr_abort, tcp6_usr_accept, tcp_usr_attach, tcp6_usr_bind,
721 tcp6_usr_connect, pru_connect2_notsupp, in6_control, tcp_usr_detach,
722 tcp_usr_disconnect, tcp6_usr_listen, in6_mapped_peeraddr, tcp_usr_rcvd,
723 tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown,
724 in6_mapped_sockaddr, sosend, soreceive, sopoll
725};
726#endif /* INET6 */
727
728/*
729 * Common subroutine to open a TCP connection to remote host specified
730 * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local
731 * port number if needed. Call in_pcbladdr to do the routing and to choose
732 * a local host address (interface). If there is an existing incarnation
733 * of the same connection in TIME-WAIT state and if the remote host was
734 * sending CC options and if the connection duration was < MSL, then
735 * truncate the previous TIME-WAIT state and proceed.
736 * Initialize connection parameters and enter SYN-SENT state.
737 */
738static int
739tcp_connect(tp, nam, p)
740 register struct tcpcb *tp;
741 struct sockaddr *nam;
742 struct proc *p;
743{
744 struct inpcb *inp = tp->t_inpcb, *oinp;
745 struct socket *so = inp->inp_socket;
746 struct tcpcb *otp;
747 struct sockaddr_in *sin = (struct sockaddr_in *)nam;
748 struct sockaddr_in *ifaddr;
749 struct rmxp_tao *taop;
750 struct rmxp_tao tao_noncached;
751 int error;
752
753 if (inp->inp_lport == 0) {
754 error = in_pcbbind(inp, (struct sockaddr *)0, p);
755 if (error)
756 return error;
757 }
758
759 /*
760 * Cannot simply call in_pcbconnect, because there might be an
761 * earlier incarnation of this same connection still in
762 * TIME_WAIT state, creating an ADDRINUSE error.
763 */
764 error = in_pcbladdr(inp, nam, &ifaddr);
765 if (error)
766 return error;
767 oinp = in_pcblookup_hash(inp->inp_pcbinfo,
768 sin->sin_addr, sin->sin_port,
769 inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr
770 : ifaddr->sin_addr,
771 inp->inp_lport, 0, NULL);
772 if (oinp) {
773 if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
774 otp->t_state == TCPS_TIME_WAIT &&
9bccf70c 775 otp->t_starttime < tcp_msl &&
1c79356b
A
776 (otp->t_flags & TF_RCVD_CC))
777 otp = tcp_close(otp);
778 else
779 return EADDRINUSE;
780 }
0b4e3aa0
A
781 if ((inp->inp_laddr.s_addr == INADDR_ANY ? ifaddr->sin_addr.s_addr :
782 inp->inp_laddr.s_addr) == sin->sin_addr.s_addr &&
783 inp->inp_lport == sin->sin_port)
784 return EINVAL;
1c79356b
A
785 if (inp->inp_laddr.s_addr == INADDR_ANY)
786 inp->inp_laddr = ifaddr->sin_addr;
787 inp->inp_faddr = sin->sin_addr;
788 inp->inp_fport = sin->sin_port;
789 in_pcbrehash(inp);
790
1c79356b
A
791 /* Compute window scaling to request. */
792 while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
793 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
794 tp->request_r_scale++;
795
796 soisconnecting(so);
797 tcpstat.tcps_connattempt++;
798 tp->t_state = TCPS_SYN_SENT;
799 tp->t_timer[TCPT_KEEP] = tcp_keepinit;
9bccf70c 800 tp->iss = tcp_new_isn(tp);
1c79356b
A
801 tcp_sendseqinit(tp);
802
803 /*
804 * Generate a CC value for this connection and
805 * check whether CC or CCnew should be used.
806 */
807 if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) {
808 taop = &tao_noncached;
809 bzero(taop, sizeof(*taop));
810 }
811
812 tp->cc_send = CC_INC(tcp_ccgen);
813 if (taop->tao_ccsent != 0 &&
814 CC_GEQ(tp->cc_send, taop->tao_ccsent)) {
815 taop->tao_ccsent = tp->cc_send;
816 } else {
817 taop->tao_ccsent = 0;
818 tp->t_flags |= TF_SENDCCNEW;
819 }
820
821 return 0;
822}
823
824#if INET6
825static int
826tcp6_connect(tp, nam, p)
827 register struct tcpcb *tp;
828 struct sockaddr *nam;
829 struct proc *p;
830{
831 struct inpcb *inp = tp->t_inpcb, *oinp;
832 struct socket *so = inp->inp_socket;
833 struct tcpcb *otp;
834 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
835 struct in6_addr *addr6;
836 struct rmxp_tao *taop;
837 struct rmxp_tao tao_noncached;
838 int error;
839
840 if (inp->inp_lport == 0) {
841 error = in6_pcbbind(inp, (struct sockaddr *)0, p);
842 if (error)
843 return error;
844 }
845
846 /*
847 * Cannot simply call in_pcbconnect, because there might be an
848 * earlier incarnation of this same connection still in
849 * TIME_WAIT state, creating an ADDRINUSE error.
850 */
851 error = in6_pcbladdr(inp, nam, &addr6);
852 if (error)
853 return error;
854 oinp = in6_pcblookup_hash(inp->inp_pcbinfo,
855 &sin6->sin6_addr, sin6->sin6_port,
856 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
857 ? addr6
858 : &inp->in6p_laddr,
859 inp->inp_lport, 0, NULL);
860 if (oinp) {
861 if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
862 otp->t_state == TCPS_TIME_WAIT &&
9bccf70c 863 otp->t_starttime < tcp_msl &&
1c79356b
A
864 (otp->t_flags & TF_RCVD_CC))
865 otp = tcp_close(otp);
866 else
867 return EADDRINUSE;
868 }
869 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
870 inp->in6p_laddr = *addr6;
871 inp->in6p_faddr = sin6->sin6_addr;
872 inp->inp_fport = sin6->sin6_port;
9bccf70c
A
873 if ((sin6->sin6_flowinfo & IPV6_FLOWINFO_MASK) != NULL)
874 inp->in6p_flowinfo = sin6->sin6_flowinfo;
1c79356b
A
875 in_pcbrehash(inp);
876
1c79356b
A
877 /* Compute window scaling to request. */
878 while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
879 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
880 tp->request_r_scale++;
881
882 soisconnecting(so);
883 tcpstat.tcps_connattempt++;
884 tp->t_state = TCPS_SYN_SENT;
885 tp->t_timer[TCPT_KEEP] = tcp_keepinit;
9bccf70c 886 tp->iss = tcp_new_isn(tp);
1c79356b
A
887 tcp_sendseqinit(tp);
888
889 /*
890 * Generate a CC value for this connection and
891 * check whether CC or CCnew should be used.
892 */
893 if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) {
894 taop = &tao_noncached;
895 bzero(taop, sizeof(*taop));
896 }
897
898 tp->cc_send = CC_INC(tcp_ccgen);
899 if (taop->tao_ccsent != 0 &&
900 CC_GEQ(tp->cc_send, taop->tao_ccsent)) {
901 taop->tao_ccsent = tp->cc_send;
902 } else {
903 taop->tao_ccsent = 0;
904 tp->t_flags |= TF_SENDCCNEW;
905 }
906
907 return 0;
908}
909#endif /* INET6 */
910
911/*
912 * The new sockopt interface makes it possible for us to block in the
913 * copyin/out step (if we take a page fault). Taking a page fault at
914 * splnet() is probably a Bad Thing. (Since sockets and pcbs both now
915 * use TSM, there probably isn't any need for this function to run at
916 * splnet() any more. This needs more examination.)
917 */
918int
919tcp_ctloutput(so, sopt)
920 struct socket *so;
921 struct sockopt *sopt;
922{
923 int error, opt, optval, s;
924 struct inpcb *inp;
925 struct tcpcb *tp;
926
927 error = 0;
928 s = splnet(); /* XXX */
929 inp = sotoinpcb(so);
930 if (inp == NULL) {
931 splx(s);
932 return (ECONNRESET);
933 }
934 if (sopt->sopt_level != IPPROTO_TCP) {
935#if INET6
936 if (INP_CHECK_SOCKAF(so, AF_INET6))
937 error = ip6_ctloutput(so, sopt);
938 else
939#endif /* INET6 */
940 error = ip_ctloutput(so, sopt);
941 splx(s);
942 return (error);
943 }
944 tp = intotcpcb(inp);
945 if (tp == NULL) {
946 splx(s);
947 return (ECONNRESET);
948 }
949
950 switch (sopt->sopt_dir) {
951 case SOPT_SET:
952 switch (sopt->sopt_name) {
953 case TCP_NODELAY:
954 case TCP_NOOPT:
955 case TCP_NOPUSH:
956 error = sooptcopyin(sopt, &optval, sizeof optval,
957 sizeof optval);
958 if (error)
959 break;
960
961 switch (sopt->sopt_name) {
962 case TCP_NODELAY:
963 opt = TF_NODELAY;
964 break;
965 case TCP_NOOPT:
966 opt = TF_NOOPT;
967 break;
968 case TCP_NOPUSH:
969 opt = TF_NOPUSH;
970 break;
971 default:
972 opt = 0; /* dead code to fool gcc */
973 break;
974 }
975
976 if (optval)
977 tp->t_flags |= opt;
978 else
979 tp->t_flags &= ~opt;
980 break;
981
982 case TCP_MAXSEG:
983 error = sooptcopyin(sopt, &optval, sizeof optval,
984 sizeof optval);
985 if (error)
986 break;
987
988 if (optval > 0 && optval <= tp->t_maxseg)
989 tp->t_maxseg = optval;
990 else
991 error = EINVAL;
992 break;
993
55e303ae
A
994 case TCP_KEEPALIVE:
995 error = sooptcopyin(sopt, &optval, sizeof optval,
996 sizeof optval);
997 if (error)
998 break;
999 if (optval < 0)
1000 error = EINVAL;
1001 else
1002 tp->t_keepidle = optval * PR_SLOWHZ;
1003 break;
1004
1c79356b
A
1005 default:
1006 error = ENOPROTOOPT;
1007 break;
1008 }
1009 break;
1010
1011 case SOPT_GET:
1012 switch (sopt->sopt_name) {
1013 case TCP_NODELAY:
1014 optval = tp->t_flags & TF_NODELAY;
1015 break;
1016 case TCP_MAXSEG:
1017 optval = tp->t_maxseg;
1018 break;
55e303ae
A
1019 case TCP_KEEPALIVE:
1020 optval = tp->t_keepidle / PR_SLOWHZ;
1021 break;
1c79356b
A
1022 case TCP_NOOPT:
1023 optval = tp->t_flags & TF_NOOPT;
1024 break;
1025 case TCP_NOPUSH:
1026 optval = tp->t_flags & TF_NOPUSH;
1027 break;
1028 default:
1029 error = ENOPROTOOPT;
1030 break;
1031 }
1032 if (error == 0)
1033 error = sooptcopyout(sopt, &optval, sizeof optval);
1034 break;
1035 }
1036 splx(s);
1037 return (error);
1038}
1039
1040/*
1041 * tcp_sendspace and tcp_recvspace are the default send and receive window
1042 * sizes, respectively. These are obsolescent (this information should
1043 * be set by the route).
1044 */
1045u_long tcp_sendspace = 1024*16;
9bccf70c
A
1046SYSCTL_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW,
1047 &tcp_sendspace , 0, "Maximum outgoing TCP datagram size");
1c79356b 1048u_long tcp_recvspace = 1024*16;
9bccf70c
A
1049SYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
1050 &tcp_recvspace , 0, "Maximum incoming TCP datagram size");
1c79356b 1051
55e303ae
A
1052__private_extern__ int tcp_sockthreshold = 256;
1053SYSCTL_INT(_net_inet_tcp, OID_AUTO, sockthreshold, CTLFLAG_RW,
1054 &tcp_sockthreshold , 0, "TCP Socket size increased if less than threshold");
1055
1056#define TCP_INCREASED_SPACE 65535 /* Automatically increase tcp send/rcv space to this value */
1c79356b
A
1057/*
1058 * Attach TCP protocol to socket, allocating
1059 * internet protocol control block, tcp control block,
1060 * bufer space, and entering LISTEN state if to accept connections.
1061 */
1062static int
1063tcp_attach(so, p)
1064 struct socket *so;
1065 struct proc *p;
1066{
1067 register struct tcpcb *tp;
1068 struct inpcb *inp;
1069 int error;
1070#if INET6
9bccf70c
A
1071 int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != NULL;
1072#endif
1c79356b 1073
1c79356b
A
1074 error = in_pcballoc(so, &tcbinfo, p);
1075 if (error)
1076 return (error);
55e303ae 1077
1c79356b 1078 inp = sotoinpcb(so);
55e303ae
A
1079
1080 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
1081 /*
1082 * The goal is to let clients have large send/rcv default windows (TCP_INCREASED_SPACE)
1083 * while not hogging mbuf space for servers. This is done by watching a threshold
1084 * of tcpcbs in use and bumping the default send and rcvspace only if under that threshold.
1085 * The theory being that busy servers have a lot more active tcpcbs and don't want the potential
1086 * memory penalty of having much larger sockbuffs. The sysctl allows to fine tune that threshold value. */
1087
1088 if (inp->inp_pcbinfo->ipi_count < tcp_sockthreshold)
1089 error = soreserve(so, MAX(TCP_INCREASED_SPACE, tcp_sendspace), MAX(TCP_INCREASED_SPACE,tcp_recvspace));
1090 else
1091 error = soreserve(so, tcp_sendspace, tcp_recvspace);
1092 if (error)
1093 return (error);
1094 }
1095
1c79356b
A
1096#if INET6
1097 if (isipv6) {
1098 inp->inp_vflag |= INP_IPV6;
1099 inp->in6p_hops = -1; /* use kernel default */
1100 }
1101 else
1102#endif /* INET6 */
1103 inp->inp_vflag |= INP_IPV4;
1104 tp = tcp_newtcpcb(inp);
1105 if (tp == 0) {
1106 int nofd = so->so_state & SS_NOFDREF; /* XXX */
1107
1108 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */
1109#if INET6
1110 if (isipv6)
1111 in6_pcbdetach(inp);
1112 else
1113#endif /* INET6 */
1114 in_pcbdetach(inp);
1115 so->so_state |= nofd;
1116 return (ENOBUFS);
1117 }
1118 tp->t_state = TCPS_CLOSED;
1119 return (0);
1120}
1121
1122/*
1123 * Initiate (or continue) disconnect.
1124 * If embryonic state, just send reset (once).
1125 * If in ``let data drain'' option and linger null, just drop.
1126 * Otherwise (hard), mark socket disconnecting and drop
1127 * current input data; switch states based on user close, and
1128 * send segment to peer (with FIN).
1129 */
1130static struct tcpcb *
1131tcp_disconnect(tp)
1132 register struct tcpcb *tp;
1133{
1134 struct socket *so = tp->t_inpcb->inp_socket;
1135
1136 if (tp->t_state < TCPS_ESTABLISHED)
1137 tp = tcp_close(tp);
1138 else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
1139 tp = tcp_drop(tp, 0);
1140 else {
1141 soisdisconnecting(so);
1142 sbflush(&so->so_rcv);
1143 tp = tcp_usrclosed(tp);
1144 if (tp)
1145 (void) tcp_output(tp);
1146 }
1147 return (tp);
1148}
1149
1150/*
1151 * User issued close, and wish to trail through shutdown states:
1152 * if never received SYN, just forget it. If got a SYN from peer,
1153 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
1154 * If already got a FIN from peer, then almost done; go to LAST_ACK
1155 * state. In all other cases, have already sent FIN to peer (e.g.
1156 * after PRU_SHUTDOWN), and just have to play tedious game waiting
1157 * for peer to send FIN or not respond to keep-alives, etc.
1158 * We can let the user exit from the close as soon as the FIN is acked.
1159 */
1160static struct tcpcb *
1161tcp_usrclosed(tp)
1162 register struct tcpcb *tp;
1163{
1164
1165 switch (tp->t_state) {
1166
1167 case TCPS_CLOSED:
1168 case TCPS_LISTEN:
1169 tp->t_state = TCPS_CLOSED;
1170 tp = tcp_close(tp);
1171 break;
1172
1173 case TCPS_SYN_SENT:
1174 case TCPS_SYN_RECEIVED:
1175 tp->t_flags |= TF_NEEDFIN;
1176 break;
1177
1178 case TCPS_ESTABLISHED:
1179 tp->t_state = TCPS_FIN_WAIT_1;
1180 break;
1181
1182 case TCPS_CLOSE_WAIT:
1183 tp->t_state = TCPS_LAST_ACK;
1184 break;
1185 }
1186 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
1187 soisdisconnected(tp->t_inpcb->inp_socket);
1188 /* To prevent the connection hanging in FIN_WAIT_2 forever. */
1189 if (tp->t_state == TCPS_FIN_WAIT_2)
1190 tp->t_timer[TCPT_2MSL] = tcp_maxidle;
1191 }
1192 return (tp);
1193}
1194