]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/tcp_usrreq.c
19405c584e28b18a91630d49a31c99a9faa1e55c
[apple/xnu.git] / bsd / netinet / tcp_usrreq.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1988, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94
61 * $FreeBSD: src/sys/netinet/tcp_usrreq.c,v 1.51.2.9 2001/08/22 00:59:12 silby Exp $
62 */
63
64
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/kernel.h>
68 #include <sys/sysctl.h>
69 #include <sys/mbuf.h>
70 #if INET6
71 #include <sys/domain.h>
72 #endif /* INET6 */
73 #include <sys/socket.h>
74 #include <sys/socketvar.h>
75 #include <sys/protosw.h>
76
77 #include <net/if.h>
78 #include <net/route.h>
79 #include <net/ntstat.h>
80
81 #include <netinet/in.h>
82 #include <netinet/in_systm.h>
83 #if INET6
84 #include <netinet/ip6.h>
85 #endif
86 #include <netinet/in_pcb.h>
87 #if INET6
88 #include <netinet6/in6_pcb.h>
89 #endif
90 #include <netinet/in_var.h>
91 #include <netinet/ip_var.h>
92 #if INET6
93 #include <netinet6/ip6_var.h>
94 #endif
95 #include <netinet/tcp.h>
96 #include <netinet/tcp_fsm.h>
97 #include <netinet/tcp_seq.h>
98 #include <netinet/tcp_timer.h>
99 #include <netinet/tcp_var.h>
100 #include <netinet/tcpip.h>
101 #if TCPDEBUG
102 #include <netinet/tcp_debug.h>
103 #endif
104
105 #if IPSEC
106 #include <netinet6/ipsec.h>
107 #endif /*IPSEC*/
108
109 void tcp_fill_info(struct tcpcb *, struct tcp_info *);
110 errno_t tcp_fill_info_for_info_tuple(struct info_tuple *, struct tcp_info *);
111
112 int tcp_sysctl_info(struct sysctl_oid *, void *, int , struct sysctl_req *);
113
114 /*
115 * TCP protocol interface to socket abstraction.
116 */
117 extern char *tcpstates[]; /* XXX ??? */
118
119 static int tcp_attach(struct socket *, struct proc *);
120 static int tcp_connect(struct tcpcb *, struct sockaddr *, struct proc *);
121 #if INET6
122 static int tcp6_connect(struct tcpcb *, struct sockaddr *, struct proc *);
123 #endif /* INET6 */
124 static struct tcpcb *
125 tcp_disconnect(struct tcpcb *);
126 static struct tcpcb *
127 tcp_usrclosed(struct tcpcb *);
128
129 static u_int32_t tcps_in_sw_cksum;
130 SYSCTL_UINT(_net_inet_tcp, OID_AUTO, in_sw_cksum, CTLFLAG_RD | CTLFLAG_LOCKED,
131 &tcps_in_sw_cksum, 0,
132 "Number of received packets checksummed in software");
133
134 static u_int64_t tcps_in_sw_cksum_bytes;
135 SYSCTL_QUAD(_net_inet_tcp, OID_AUTO, in_sw_cksum_bytes, CTLFLAG_RD | CTLFLAG_LOCKED,
136 &tcps_in_sw_cksum_bytes,
137 "Amount of received data checksummed in software");
138
139 static u_int32_t tcps_out_sw_cksum;
140 SYSCTL_UINT(_net_inet_tcp, OID_AUTO, out_sw_cksum, CTLFLAG_RD | CTLFLAG_LOCKED,
141 &tcps_out_sw_cksum, 0,
142 "Number of transmitted packets checksummed in software");
143
144 static u_int64_t tcps_out_sw_cksum_bytes;
145 SYSCTL_QUAD(_net_inet_tcp, OID_AUTO, out_sw_cksum_bytes, CTLFLAG_RD | CTLFLAG_LOCKED,
146 &tcps_out_sw_cksum_bytes,
147 "Amount of transmitted data checksummed in software");
148
149 extern uint32_t tcp_autorcvbuf_max;
150
151 extern void tcp_sbrcv_trim(struct tcpcb *tp, struct sockbuf *sb);
152
153 #if TCPDEBUG
154 #define TCPDEBUG0 int ostate = 0
155 #define TCPDEBUG1() ostate = tp ? tp->t_state : 0
156 #define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \
157 tcp_trace(TA_USER, ostate, tp, 0, 0, req)
158 #else
159 #define TCPDEBUG0
160 #define TCPDEBUG1()
161 #define TCPDEBUG2(req)
162 #endif
163
164 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, info, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY,
165 0 , 0, tcp_sysctl_info, "S", "TCP info per tuple");
166
167 /*
168 * TCP attaches to socket via pru_attach(), reserving space,
169 * and an internet control block.
170 *
171 * Returns: 0 Success
172 * EISCONN
173 * tcp_attach:ENOBUFS
174 * tcp_attach:ENOMEM
175 * tcp_attach:??? [IPSEC specific]
176 */
177 static int
178 tcp_usr_attach(struct socket *so, __unused int proto, struct proc *p)
179 {
180 int error;
181 struct inpcb *inp = sotoinpcb(so);
182 struct tcpcb *tp = 0;
183 TCPDEBUG0;
184
185 TCPDEBUG1();
186 if (inp) {
187 error = EISCONN;
188 goto out;
189 }
190
191 error = tcp_attach(so, p);
192 if (error)
193 goto out;
194
195 if ((so->so_options & SO_LINGER) && so->so_linger == 0)
196 so->so_linger = TCP_LINGERTIME * hz;
197 tp = sototcpcb(so);
198 out:
199 TCPDEBUG2(PRU_ATTACH);
200 return error;
201 }
202
203 /*
204 * pru_detach() detaches the TCP protocol from the socket.
205 * If the protocol state is non-embryonic, then can't
206 * do this directly: have to initiate a pru_disconnect(),
207 * which may finish later; embryonic TCB's can just
208 * be discarded here.
209 */
210 static int
211 tcp_usr_detach(struct socket *so)
212 {
213 int error = 0;
214 struct inpcb *inp = sotoinpcb(so);
215 struct tcpcb *tp;
216 TCPDEBUG0;
217
218 if (inp == 0 || (inp->inp_state == INPCB_STATE_DEAD)) {
219 return EINVAL; /* XXX */
220 }
221 lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
222 tp = intotcpcb(inp);
223 /* In case we got disconnected from the peer */
224 if (tp == 0)
225 goto out;
226 TCPDEBUG1();
227
228 calculate_tcp_clock();
229
230 tp = tcp_disconnect(tp);
231 out:
232 TCPDEBUG2(PRU_DETACH);
233 return error;
234 }
235
236 #define COMMON_START() TCPDEBUG0; \
237 do { \
238 if (inp == 0 || (inp->inp_state == INPCB_STATE_DEAD)) { \
239 return EINVAL; \
240 } \
241 tp = intotcpcb(inp); \
242 TCPDEBUG1(); \
243 calculate_tcp_clock(); \
244 } while(0)
245
246 #define COMMON_END(req) out: TCPDEBUG2(req); return error; goto out
247
248
249 /*
250 * Give the socket an address.
251 *
252 * Returns: 0 Success
253 * EINVAL Invalid argument [COMMON_START]
254 * EAFNOSUPPORT Address family not supported
255 * in_pcbbind:EADDRNOTAVAIL Address not available.
256 * in_pcbbind:EINVAL Invalid argument
257 * in_pcbbind:EAFNOSUPPORT Address family not supported [notdef]
258 * in_pcbbind:EACCES Permission denied
259 * in_pcbbind:EADDRINUSE Address in use
260 * in_pcbbind:EAGAIN Resource unavailable, try again
261 * in_pcbbind:EPERM Operation not permitted
262 */
263 static int
264 tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
265 {
266 int error = 0;
267 struct inpcb *inp = sotoinpcb(so);
268 struct tcpcb *tp;
269 struct sockaddr_in *sinp;
270
271 COMMON_START();
272
273 if (nam->sa_family != 0 && nam->sa_family != AF_INET) {
274 error = EAFNOSUPPORT;
275 goto out;
276 }
277
278 /*
279 * Must check for multicast addresses and disallow binding
280 * to them.
281 */
282 sinp = (struct sockaddr_in *)(void *)nam;
283 if (sinp->sin_family == AF_INET &&
284 IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
285 error = EAFNOSUPPORT;
286 goto out;
287 }
288 error = in_pcbbind(inp, nam, p);
289 if (error)
290 goto out;
291 COMMON_END(PRU_BIND);
292
293 }
294
295 #if INET6
296 static int
297 tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
298 {
299 int error = 0;
300 struct inpcb *inp = sotoinpcb(so);
301 struct tcpcb *tp;
302 struct sockaddr_in6 *sin6p;
303
304 COMMON_START();
305
306 if (nam->sa_family != 0 && nam->sa_family != AF_INET6) {
307 error = EAFNOSUPPORT;
308 goto out;
309 }
310
311 /*
312 * Must check for multicast addresses and disallow binding
313 * to them.
314 */
315 sin6p = (struct sockaddr_in6 *)(void *)nam;
316 if (sin6p->sin6_family == AF_INET6 &&
317 IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) {
318 error = EAFNOSUPPORT;
319 goto out;
320 }
321 inp->inp_vflag &= ~INP_IPV4;
322 inp->inp_vflag |= INP_IPV6;
323 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
324 if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr))
325 inp->inp_vflag |= INP_IPV4;
326 else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
327 struct sockaddr_in sin;
328
329 in6_sin6_2_sin(&sin, sin6p);
330 inp->inp_vflag |= INP_IPV4;
331 inp->inp_vflag &= ~INP_IPV6;
332 error = in_pcbbind(inp, (struct sockaddr *)&sin, p);
333 goto out;
334 }
335 }
336 error = in6_pcbbind(inp, nam, p);
337 if (error)
338 goto out;
339 COMMON_END(PRU_BIND);
340 }
341 #endif /* INET6 */
342
343 /*
344 * Prepare to accept connections.
345 *
346 * Returns: 0 Success
347 * EINVAL [COMMON_START]
348 * in_pcbbind:EADDRNOTAVAIL Address not available.
349 * in_pcbbind:EINVAL Invalid argument
350 * in_pcbbind:EAFNOSUPPORT Address family not supported [notdef]
351 * in_pcbbind:EACCES Permission denied
352 * in_pcbbind:EADDRINUSE Address in use
353 * in_pcbbind:EAGAIN Resource unavailable, try again
354 * in_pcbbind:EPERM Operation not permitted
355 */
356 static int
357 tcp_usr_listen(struct socket *so, struct proc *p)
358 {
359 int error = 0;
360 struct inpcb *inp = sotoinpcb(so);
361 struct tcpcb *tp;
362
363 COMMON_START();
364 if (inp->inp_lport == 0)
365 error = in_pcbbind(inp, (struct sockaddr *)0, p);
366 if (error == 0)
367 tp->t_state = TCPS_LISTEN;
368 COMMON_END(PRU_LISTEN);
369 }
370
371 #if INET6
372 static int
373 tcp6_usr_listen(struct socket *so, struct proc *p)
374 {
375 int error = 0;
376 struct inpcb *inp = sotoinpcb(so);
377 struct tcpcb *tp;
378
379 COMMON_START();
380 if (inp->inp_lport == 0) {
381 inp->inp_vflag &= ~INP_IPV4;
382 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
383 inp->inp_vflag |= INP_IPV4;
384 error = in6_pcbbind(inp, (struct sockaddr *)0, p);
385 }
386 if (error == 0)
387 tp->t_state = TCPS_LISTEN;
388 COMMON_END(PRU_LISTEN);
389 }
390 #endif /* INET6 */
391
392 /*
393 * Initiate connection to peer.
394 * Create a template for use in transmissions on this connection.
395 * Enter SYN_SENT state, and mark socket as connecting.
396 * Start keep-alive timer, and seed output sequence space.
397 * Send initial segment on connection.
398 */
399 static int
400 tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
401 {
402 int error = 0;
403 struct inpcb *inp = sotoinpcb(so);
404 struct tcpcb *tp;
405 struct sockaddr_in *sinp;
406
407 TCPDEBUG0;
408 if (inp == 0)
409 return EINVAL;
410 else if (inp->inp_state == INPCB_STATE_DEAD) {
411 if (so->so_error) {
412 error = so->so_error;
413 so->so_error = 0;
414 return error;
415 } else
416 return EINVAL;
417 }
418 tp = intotcpcb(inp);
419 TCPDEBUG1();
420
421 calculate_tcp_clock();
422
423 if (nam->sa_family != 0 && nam->sa_family != AF_INET) {
424 error = EAFNOSUPPORT;
425 goto out;
426 }
427 /*
428 * Must disallow TCP ``connections'' to multicast addresses.
429 */
430 sinp = (struct sockaddr_in *)(void *)nam;
431 if (sinp->sin_family == AF_INET
432 && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
433 error = EAFNOSUPPORT;
434 goto out;
435 }
436
437
438 if ((error = tcp_connect(tp, nam, p)) != 0)
439 goto out;
440 error = tcp_output(tp);
441 COMMON_END(PRU_CONNECT);
442 }
443
444 #if INET6
445 static int
446 tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
447 {
448 int error = 0;
449 struct inpcb *inp = sotoinpcb(so);
450 struct tcpcb *tp;
451 struct sockaddr_in6 *sin6p;
452
453 COMMON_START();
454
455 if (nam->sa_family != 0 && nam->sa_family != AF_INET6) {
456 error = EAFNOSUPPORT;
457 goto out;
458 }
459
460 /*
461 * Must disallow TCP ``connections'' to multicast addresses.
462 */
463 sin6p = (struct sockaddr_in6 *)(void *)nam;
464 if (sin6p->sin6_family == AF_INET6
465 && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) {
466 error = EAFNOSUPPORT;
467 goto out;
468 }
469
470 if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
471 struct sockaddr_in sin;
472
473 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)
474 return (EINVAL);
475
476 in6_sin6_2_sin(&sin, sin6p);
477 inp->inp_vflag |= INP_IPV4;
478 inp->inp_vflag &= ~INP_IPV6;
479 if ((error = tcp_connect(tp, (struct sockaddr *)&sin, p)) != 0)
480 goto out;
481 error = tcp_output(tp);
482 goto out;
483 }
484 inp->inp_vflag &= ~INP_IPV4;
485 inp->inp_vflag |= INP_IPV6;
486 if ((error = tcp6_connect(tp, nam, p)) != 0)
487 goto out;
488 error = tcp_output(tp);
489 if (error)
490 goto out;
491 COMMON_END(PRU_CONNECT);
492 }
493 #endif /* INET6 */
494
495 /*
496 * Initiate disconnect from peer.
497 * If connection never passed embryonic stage, just drop;
498 * else if don't need to let data drain, then can just drop anyways,
499 * else have to begin TCP shutdown process: mark socket disconnecting,
500 * drain unread data, state switch to reflect user close, and
501 * send segment (e.g. FIN) to peer. Socket will be really disconnected
502 * when peer sends FIN and acks ours.
503 *
504 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
505 */
506 static int
507 tcp_usr_disconnect(struct socket *so)
508 {
509 int error = 0;
510 struct inpcb *inp = sotoinpcb(so);
511 struct tcpcb *tp;
512
513 lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
514 COMMON_START();
515 /* In case we got disconnected from the peer */
516 if (tp == 0)
517 goto out;
518 tp = tcp_disconnect(tp);
519 COMMON_END(PRU_DISCONNECT);
520 }
521
522 /*
523 * Accept a connection. Essentially all the work is
524 * done at higher levels; just return the address
525 * of the peer, storing through addr.
526 */
527 static int
528 tcp_usr_accept(struct socket *so, struct sockaddr **nam)
529 {
530 int error = 0;
531 struct inpcb *inp = sotoinpcb(so);
532 struct tcpcb *tp = NULL;
533 TCPDEBUG0;
534
535 in_setpeeraddr(so, nam);
536
537 if (so->so_state & SS_ISDISCONNECTED) {
538 error = ECONNABORTED;
539 goto out;
540 }
541 if (inp == 0 || (inp->inp_state == INPCB_STATE_DEAD)) {
542 return (EINVAL);
543 }
544 tp = intotcpcb(inp);
545 TCPDEBUG1();
546
547 calculate_tcp_clock();
548
549 COMMON_END(PRU_ACCEPT);
550 }
551
552 #if INET6
553 static int
554 tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
555 {
556 int error = 0;
557 struct inpcb *inp = sotoinpcb(so);
558 struct tcpcb *tp = NULL;
559 TCPDEBUG0;
560
561 if (so->so_state & SS_ISDISCONNECTED) {
562 error = ECONNABORTED;
563 goto out;
564 }
565 if (inp == 0 || (inp->inp_state == INPCB_STATE_DEAD)) {
566 return (EINVAL);
567 }
568 tp = intotcpcb(inp);
569 TCPDEBUG1();
570
571 calculate_tcp_clock();
572
573 in6_mapped_peeraddr(so, nam);
574 COMMON_END(PRU_ACCEPT);
575 }
576 #endif /* INET6 */
577
578 /*
579 * Mark the connection as being incapable of further output.
580 *
581 * Returns: 0 Success
582 * EINVAL [COMMON_START]
583 * tcp_output:EADDRNOTAVAIL
584 * tcp_output:ENOBUFS
585 * tcp_output:EMSGSIZE
586 * tcp_output:EHOSTUNREACH
587 * tcp_output:ENETUNREACH
588 * tcp_output:ENETDOWN
589 * tcp_output:ENOMEM
590 * tcp_output:EACCES
591 * tcp_output:EMSGSIZE
592 * tcp_output:ENOBUFS
593 * tcp_output:??? [ignorable: mostly IPSEC/firewall/DLIL]
594 */
595 static int
596 tcp_usr_shutdown(struct socket *so)
597 {
598 int error = 0;
599 struct inpcb *inp = sotoinpcb(so);
600 struct tcpcb *tp;
601
602 COMMON_START();
603 socantsendmore(so);
604 /* In case we got disconnected from the peer */
605 if (tp == 0)
606 goto out;
607 tp = tcp_usrclosed(tp);
608 if (tp)
609 error = tcp_output(tp);
610 COMMON_END(PRU_SHUTDOWN);
611 }
612
613 /*
614 * After a receive, possibly send window update to peer.
615 */
616 static int
617 tcp_usr_rcvd(struct socket *so, __unused int flags)
618 {
619 int error = 0;
620 struct inpcb *inp = sotoinpcb(so);
621 struct tcpcb *tp;
622
623 COMMON_START();
624 /* In case we got disconnected from the peer */
625 if (tp == 0)
626 goto out;
627 tcp_sbrcv_trim(tp, &so->so_rcv);
628
629 tcp_output(tp);
630 COMMON_END(PRU_RCVD);
631 }
632
633 /*
634 * Do a send by putting data in output queue and updating urgent
635 * marker if URG set. Possibly send more data. Unlike the other
636 * pru_*() routines, the mbuf chains are our responsibility. We
637 * must either enqueue them or free them. The other pru_* routines
638 * generally are caller-frees.
639 *
640 * Returns: 0 Success
641 * ECONNRESET
642 * EINVAL
643 * ENOBUFS
644 * tcp_connect:EADDRINUSE Address in use
645 * tcp_connect:EADDRNOTAVAIL Address not available.
646 * tcp_connect:EINVAL Invalid argument
647 * tcp_connect:EAFNOSUPPORT Address family not supported [notdef]
648 * tcp_connect:EACCES Permission denied
649 * tcp_connect:EAGAIN Resource unavailable, try again
650 * tcp_connect:EPERM Operation not permitted
651 * tcp_output:EADDRNOTAVAIL
652 * tcp_output:ENOBUFS
653 * tcp_output:EMSGSIZE
654 * tcp_output:EHOSTUNREACH
655 * tcp_output:ENETUNREACH
656 * tcp_output:ENETDOWN
657 * tcp_output:ENOMEM
658 * tcp_output:EACCES
659 * tcp_output:EMSGSIZE
660 * tcp_output:ENOBUFS
661 * tcp_output:??? [ignorable: mostly IPSEC/firewall/DLIL]
662 * tcp6_connect:??? [IPV6 only]
663 */
664 static int
665 tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
666 struct sockaddr *nam, struct mbuf *control, struct proc *p)
667 {
668 int error = 0;
669 struct inpcb *inp = sotoinpcb(so);
670 struct tcpcb *tp;
671 #if INET6
672 int isipv6;
673 #endif
674 TCPDEBUG0;
675
676 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) {
677 /*
678 * OOPS! we lost a race, the TCP session got reset after
679 * we checked SS_CANTSENDMORE, eg: while doing uiomove or a
680 * network interrupt in the non-splnet() section of sosend().
681 */
682 if (m)
683 m_freem(m);
684 if (control)
685 m_freem(control);
686 error = ECONNRESET; /* XXX EPIPE? */
687 tp = NULL;
688 TCPDEBUG1();
689 goto out;
690 }
691 #if INET6
692 isipv6 = nam && nam->sa_family == AF_INET6;
693 #endif /* INET6 */
694 tp = intotcpcb(inp);
695 TCPDEBUG1();
696
697 calculate_tcp_clock();
698
699 if (control) {
700 /* TCP doesn't do control messages (rights, creds, etc) */
701 if (control->m_len) {
702 m_freem(control);
703 if (m)
704 m_freem(m);
705 error = EINVAL;
706 goto out;
707 }
708 m_freem(control); /* empty control, just free it */
709 }
710 if(!(flags & PRUS_OOB)) {
711 sbappendstream(&so->so_snd, m);
712 if (nam && tp->t_state < TCPS_SYN_SENT) {
713 /*
714 * Do implied connect if not yet connected,
715 * initialize window to default value, and
716 * initialize maxseg/maxopd using peer's cached
717 * MSS.
718 */
719 #if INET6
720 if (isipv6)
721 error = tcp6_connect(tp, nam, p);
722 else
723 #endif /* INET6 */
724 error = tcp_connect(tp, nam, p);
725 if (error)
726 goto out;
727 tp->snd_wnd = TTCP_CLIENT_SND_WND;
728 tcp_mss(tp, -1, IFSCOPE_NONE);
729 }
730
731 if (flags & PRUS_EOF) {
732 /*
733 * Close the send side of the connection after
734 * the data is sent.
735 */
736 socantsendmore(so);
737 tp = tcp_usrclosed(tp);
738 }
739 if (tp != NULL) {
740 if (flags & PRUS_MORETOCOME)
741 tp->t_flags |= TF_MORETOCOME;
742 error = tcp_output(tp);
743 if (flags & PRUS_MORETOCOME)
744 tp->t_flags &= ~TF_MORETOCOME;
745 }
746 } else {
747 if (sbspace(&so->so_snd) == 0) {
748 /* if no space is left in sockbuf,
749 * do not try to squeeze in OOB traffic */
750 m_freem(m);
751 error = ENOBUFS;
752 goto out;
753 }
754 /*
755 * According to RFC961 (Assigned Protocols),
756 * the urgent pointer points to the last octet
757 * of urgent data. We continue, however,
758 * to consider it to indicate the first octet
759 * of data past the urgent section.
760 * Otherwise, snd_up should be one lower.
761 */
762 sbappendstream(&so->so_snd, m);
763 if (nam && tp->t_state < TCPS_SYN_SENT) {
764 /*
765 * Do implied connect if not yet connected,
766 * initialize window to default value, and
767 * initialize maxseg/maxopd using peer's cached
768 * MSS.
769 */
770 #if INET6
771 if (isipv6)
772 error = tcp6_connect(tp, nam, p);
773 else
774 #endif /* INET6 */
775 error = tcp_connect(tp, nam, p);
776 if (error)
777 goto out;
778 tp->snd_wnd = TTCP_CLIENT_SND_WND;
779 tcp_mss(tp, -1, IFSCOPE_NONE);
780 }
781 tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
782 tp->t_force = 1;
783 error = tcp_output(tp);
784 tp->t_force = 0;
785 }
786 COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB :
787 ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
788 }
789
790 /*
791 * Abort the TCP.
792 */
793 static int
794 tcp_usr_abort(struct socket *so)
795 {
796 int error = 0;
797 struct inpcb *inp = sotoinpcb(so);
798 struct tcpcb *tp;
799
800 COMMON_START();
801 /* In case we got disconnected from the peer */
802 if (tp == 0)
803 goto out;
804 tp = tcp_drop(tp, ECONNABORTED);
805 so->so_usecount--;
806 COMMON_END(PRU_ABORT);
807 }
808
809 /*
810 * Receive out-of-band data.
811 *
812 * Returns: 0 Success
813 * EINVAL [COMMON_START]
814 * EINVAL
815 * EWOULDBLOCK
816 */
817 static int
818 tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
819 {
820 int error = 0;
821 struct inpcb *inp = sotoinpcb(so);
822 struct tcpcb *tp;
823
824 COMMON_START();
825 if ((so->so_oobmark == 0 &&
826 (so->so_state & SS_RCVATMARK) == 0) ||
827 so->so_options & SO_OOBINLINE ||
828 tp->t_oobflags & TCPOOB_HADDATA) {
829 error = EINVAL;
830 goto out;
831 }
832 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
833 error = EWOULDBLOCK;
834 goto out;
835 }
836 m->m_len = 1;
837 *mtod(m, caddr_t) = tp->t_iobc;
838 if ((flags & MSG_PEEK) == 0)
839 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
840 COMMON_END(PRU_RCVOOB);
841 }
842
843 /* xxx - should be const */
844 struct pr_usrreqs tcp_usrreqs = {
845 tcp_usr_abort, tcp_usr_accept, tcp_usr_attach, tcp_usr_bind,
846 tcp_usr_connect, pru_connect2_notsupp, in_control, tcp_usr_detach,
847 tcp_usr_disconnect, tcp_usr_listen, in_setpeeraddr, tcp_usr_rcvd,
848 tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown,
849 in_setsockaddr, sosend, soreceive, pru_sopoll_notsupp
850 };
851
852 #if INET6
853 struct pr_usrreqs tcp6_usrreqs = {
854 tcp_usr_abort, tcp6_usr_accept, tcp_usr_attach, tcp6_usr_bind,
855 tcp6_usr_connect, pru_connect2_notsupp, in6_control, tcp_usr_detach,
856 tcp_usr_disconnect, tcp6_usr_listen, in6_mapped_peeraddr, tcp_usr_rcvd,
857 tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown,
858 in6_mapped_sockaddr, sosend, soreceive, pru_sopoll_notsupp
859 };
860 #endif /* INET6 */
861
862 /*
863 * Common subroutine to open a TCP connection to remote host specified
864 * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local
865 * port number if needed. Call in_pcbladdr to do the routing and to choose
866 * a local host address (interface). If there is an existing incarnation
867 * of the same connection in TIME-WAIT state and if the remote host was
868 * sending CC options and if the connection duration was < MSL, then
869 * truncate the previous TIME-WAIT state and proceed.
870 * Initialize connection parameters and enter SYN-SENT state.
871 *
872 * Returns: 0 Success
873 * EADDRINUSE
874 * EINVAL
875 * in_pcbbind:EADDRNOTAVAIL Address not available.
876 * in_pcbbind:EINVAL Invalid argument
877 * in_pcbbind:EAFNOSUPPORT Address family not supported [notdef]
878 * in_pcbbind:EACCES Permission denied
879 * in_pcbbind:EADDRINUSE Address in use
880 * in_pcbbind:EAGAIN Resource unavailable, try again
881 * in_pcbbind:EPERM Operation not permitted
882 * in_pcbladdr:EINVAL Invalid argument
883 * in_pcbladdr:EAFNOSUPPORT Address family not supported
884 * in_pcbladdr:EADDRNOTAVAIL Address not available
885 */
886 static int
887 tcp_connect(tp, nam, p)
888 register struct tcpcb *tp;
889 struct sockaddr *nam;
890 struct proc *p;
891 {
892 struct inpcb *inp = tp->t_inpcb, *oinp;
893 struct socket *so = inp->inp_socket;
894 struct tcpcb *otp;
895 struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam;
896 struct sockaddr_in ifaddr;
897 struct rmxp_tao *taop;
898 struct rmxp_tao tao_noncached;
899 int error;
900 struct ifnet *outif = NULL;
901
902 if (inp->inp_lport == 0) {
903 error = in_pcbbind(inp, (struct sockaddr *)0, p);
904 if (error)
905 return error;
906 }
907
908 /*
909 * Cannot simply call in_pcbconnect, because there might be an
910 * earlier incarnation of this same connection still in
911 * TIME_WAIT state, creating an ADDRINUSE error.
912 */
913 error = in_pcbladdr(inp, nam, &ifaddr, &outif);
914 if (error)
915 return error;
916
917 tcp_unlock(inp->inp_socket, 0, 0);
918 oinp = in_pcblookup_hash(inp->inp_pcbinfo,
919 sin->sin_addr, sin->sin_port,
920 inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr
921 : ifaddr.sin_addr,
922 inp->inp_lport, 0, NULL);
923
924 tcp_lock(inp->inp_socket, 0, 0);
925 if (oinp) {
926 if (oinp != inp) /* 4143933: avoid deadlock if inp == oinp */
927 tcp_lock(oinp->inp_socket, 1, 0);
928 if (in_pcb_checkstate(oinp, WNT_RELEASE, 1) == WNT_STOPUSING) {
929 if (oinp != inp)
930 tcp_unlock(oinp->inp_socket, 1, 0);
931 goto skip_oinp;
932 }
933
934 if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
935 otp->t_state == TCPS_TIME_WAIT &&
936 ((int)(tcp_now - otp->t_starttime)) < tcp_msl &&
937 (otp->t_flags & TF_RCVD_CC))
938 otp = tcp_close(otp);
939 else {
940 printf("tcp_connect: inp=%p err=EADDRINUSE\n", inp);
941 if (oinp != inp)
942 tcp_unlock(oinp->inp_socket, 1, 0);
943 return EADDRINUSE;
944 }
945 if (oinp != inp)
946 tcp_unlock(oinp->inp_socket, 1, 0);
947 }
948 skip_oinp:
949 if ((inp->inp_laddr.s_addr == INADDR_ANY ? ifaddr.sin_addr.s_addr :
950 inp->inp_laddr.s_addr) == sin->sin_addr.s_addr &&
951 inp->inp_lport == sin->sin_port)
952 return EINVAL;
953 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
954 /*lock inversion issue, mostly with udp multicast packets */
955 socket_unlock(inp->inp_socket, 0);
956 lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
957 socket_lock(inp->inp_socket, 0);
958 }
959 if (inp->inp_laddr.s_addr == INADDR_ANY) {
960 inp->inp_laddr = ifaddr.sin_addr;
961 inp->inp_last_outifp = outif;
962 }
963 inp->inp_faddr = sin->sin_addr;
964 inp->inp_fport = sin->sin_port;
965 in_pcbrehash(inp);
966 lck_rw_done(inp->inp_pcbinfo->mtx);
967
968 if (inp->inp_flowhash == 0)
969 inp->inp_flowhash = inp_calc_flowhash(inp);
970
971 tcp_set_max_rwinscale(tp, so);
972
973 soisconnecting(so);
974 tcpstat.tcps_connattempt++;
975 tp->t_state = TCPS_SYN_SENT;
976 tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp,
977 tp->t_keepinit ? tp->t_keepinit : tcp_keepinit);
978 tp->iss = tcp_new_isn(tp);
979 tcp_sendseqinit(tp);
980 if (nstat_collect)
981 nstat_route_connect_attempt(inp->inp_route.ro_rt);
982
983 /*
984 * Generate a CC value for this connection and
985 * check whether CC or CCnew should be used.
986 */
987 if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) {
988 taop = &tao_noncached;
989 bzero(taop, sizeof(*taop));
990 }
991
992 tp->cc_send = CC_INC(tcp_ccgen);
993 if (taop->tao_ccsent != 0 &&
994 CC_GEQ(tp->cc_send, taop->tao_ccsent)) {
995 taop->tao_ccsent = tp->cc_send;
996 } else {
997 taop->tao_ccsent = 0;
998 tp->t_flags |= TF_SENDCCNEW;
999 }
1000
1001 return 0;
1002 }
1003
1004 #if INET6
1005 static int
1006 tcp6_connect(tp, nam, p)
1007 register struct tcpcb *tp;
1008 struct sockaddr *nam;
1009 struct proc *p;
1010 {
1011 struct inpcb *inp = tp->t_inpcb, *oinp;
1012 struct socket *so = inp->inp_socket;
1013 struct tcpcb *otp;
1014 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)(void *)nam;
1015 struct in6_addr addr6;
1016 struct rmxp_tao *taop;
1017 struct rmxp_tao tao_noncached;
1018 int error = 0;
1019 struct ifnet *outif = NULL;
1020
1021 if (inp->inp_lport == 0) {
1022 error = in6_pcbbind(inp, (struct sockaddr *)0, p);
1023 if (error)
1024 goto done;
1025 }
1026
1027 /*
1028 * Cannot simply call in_pcbconnect, because there might be an
1029 * earlier incarnation of this same connection still in
1030 * TIME_WAIT state, creating an ADDRINUSE error.
1031 *
1032 * in6_pcbladdr() might return an ifp with its reference held
1033 * even in the error case, so make sure that it's released
1034 * whenever it's non-NULL.
1035 */
1036 error = in6_pcbladdr(inp, nam, &addr6, &outif);
1037 if (error)
1038 goto done;
1039 tcp_unlock(inp->inp_socket, 0, 0);
1040 oinp = in6_pcblookup_hash(inp->inp_pcbinfo,
1041 &sin6->sin6_addr, sin6->sin6_port,
1042 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
1043 ? &addr6
1044 : &inp->in6p_laddr,
1045 inp->inp_lport, 0, NULL);
1046 tcp_lock(inp->inp_socket, 0, 0);
1047 if (oinp) {
1048 if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
1049 otp->t_state == TCPS_TIME_WAIT &&
1050 ((int)(tcp_now - otp->t_starttime)) < tcp_msl &&
1051 (otp->t_flags & TF_RCVD_CC)) {
1052 otp = tcp_close(otp);
1053 } else {
1054 error = EADDRINUSE;
1055 goto done;
1056 }
1057 }
1058 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
1059 /*lock inversion issue, mostly with udp multicast packets */
1060 socket_unlock(inp->inp_socket, 0);
1061 lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
1062 socket_lock(inp->inp_socket, 0);
1063 }
1064 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
1065 inp->in6p_laddr = addr6;
1066 inp->in6p_last_outifp = outif; /* no reference needed */
1067 }
1068 inp->in6p_faddr = sin6->sin6_addr;
1069 inp->inp_fport = sin6->sin6_port;
1070 if ((sin6->sin6_flowinfo & IPV6_FLOWINFO_MASK) != 0)
1071 inp->in6p_flowinfo = sin6->sin6_flowinfo;
1072 in_pcbrehash(inp);
1073 lck_rw_done(inp->inp_pcbinfo->mtx);
1074
1075 if (inp->inp_flowhash == 0)
1076 inp->inp_flowhash = inp_calc_flowhash(inp);
1077
1078 tcp_set_max_rwinscale(tp, so);
1079
1080 soisconnecting(so);
1081 tcpstat.tcps_connattempt++;
1082 tp->t_state = TCPS_SYN_SENT;
1083 tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp,
1084 tp->t_keepinit ? tp->t_keepinit : tcp_keepinit);
1085 tp->iss = tcp_new_isn(tp);
1086 tcp_sendseqinit(tp);
1087 if (nstat_collect)
1088 nstat_route_connect_attempt(inp->inp_route.ro_rt);
1089
1090 /*
1091 * Generate a CC value for this connection and
1092 * check whether CC or CCnew should be used.
1093 */
1094 if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) {
1095 taop = &tao_noncached;
1096 bzero(taop, sizeof(*taop));
1097 }
1098
1099 tp->cc_send = CC_INC(tcp_ccgen);
1100 if (taop->tao_ccsent != 0 &&
1101 CC_GEQ(tp->cc_send, taop->tao_ccsent)) {
1102 taop->tao_ccsent = tp->cc_send;
1103 } else {
1104 taop->tao_ccsent = 0;
1105 tp->t_flags |= TF_SENDCCNEW;
1106 }
1107
1108 done:
1109 if (outif != NULL)
1110 ifnet_release(outif);
1111
1112 return (error);
1113 }
1114 #endif /* INET6 */
1115
1116 /*
1117 * Export TCP internal state information via a struct tcp_info
1118 */
1119 __private_extern__ void
1120 tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
1121 {
1122 struct inpcb *inp = tp->t_inpcb;
1123
1124 bzero(ti, sizeof(*ti));
1125
1126 ti->tcpi_state = tp->t_state;
1127
1128 if (tp->t_state > TCPS_LISTEN) {
1129 if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
1130 ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
1131 if (tp->t_flags & TF_SACK_PERMIT)
1132 ti->tcpi_options |= TCPI_OPT_SACK;
1133 if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
1134 ti->tcpi_options |= TCPI_OPT_WSCALE;
1135 ti->tcpi_snd_wscale = tp->snd_scale;
1136 ti->tcpi_rcv_wscale = tp->rcv_scale;
1137 }
1138
1139 /* Are we in retranmission episode */
1140 if (tp->snd_max != tp->snd_nxt)
1141 ti->tcpi_flags |= TCPI_FLAG_LOSSRECOVERY;
1142 else
1143 ti->tcpi_flags &= ~TCPI_FLAG_LOSSRECOVERY;
1144
1145 ti->tcpi_rto = tp->t_timer[TCPT_REXMT] ? tp->t_rxtcur : 0;
1146 ti->tcpi_snd_mss = tp->t_maxseg;
1147 ti->tcpi_rcv_mss = tp->t_maxseg;
1148
1149 ti->tcpi_rttcur = tp->t_rttcur;
1150 ti->tcpi_srtt = tp->t_srtt >> TCP_RTT_SHIFT;
1151 ti->tcpi_rttvar = tp->t_rttvar >> TCP_RTTVAR_SHIFT;
1152
1153 ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
1154 ti->tcpi_snd_cwnd = tp->snd_cwnd;
1155 ti->tcpi_snd_sbbytes = tp->t_inpcb->inp_socket->so_snd.sb_cc;
1156
1157 ti->tcpi_rcv_space = tp->rcv_wnd;
1158
1159 ti->tcpi_snd_wnd = tp->snd_wnd;
1160 ti->tcpi_snd_nxt = tp->snd_nxt;
1161 ti->tcpi_rcv_nxt = tp->rcv_nxt;
1162
1163 /* convert bytes/msec to bits/sec */
1164 if ((tp->t_flagsext & TF_MEASURESNDBW) != 0 &&
1165 tp->t_bwmeas != NULL) {
1166 ti->tcpi_snd_bw = (tp->t_bwmeas->bw_sndbw * 8000);
1167 }
1168
1169 ti->tcpi_last_outif = (tp->t_inpcb->inp_last_outifp == NULL) ? 0 :
1170 tp->t_inpcb->inp_last_outifp->if_index;
1171
1172 //atomic_get_64(ti->tcpi_txbytes, &inp->inp_stat->txbytes);
1173 ti->tcpi_txbytes = inp->inp_stat->txbytes;
1174 ti->tcpi_txretransmitbytes = tp->t_stat.txretransmitbytes;
1175 ti->tcpi_txunacked = tp->snd_max - tp->snd_una;
1176
1177 //atomic_get_64(ti->tcpi_rxbytes, &inp->inp_stat->rxbytes);
1178 ti->tcpi_rxbytes = inp->inp_stat->rxbytes;
1179 ti->tcpi_rxduplicatebytes = tp->t_stat.rxduplicatebytes;
1180 }
1181 }
1182
1183 __private_extern__ errno_t
1184 tcp_fill_info_for_info_tuple(struct info_tuple *itpl, struct tcp_info *ti)
1185 {
1186 struct inpcbinfo *pcbinfo = NULL;
1187 struct inpcb *inp = NULL;
1188 struct socket *so;
1189 struct tcpcb *tp;
1190
1191 if (itpl->itpl_proto == IPPROTO_TCP)
1192 pcbinfo = &tcbinfo;
1193 else
1194 return EINVAL;
1195
1196 if (itpl->itpl_local_sa.sa_family == AF_INET &&
1197 itpl->itpl_remote_sa.sa_family == AF_INET) {
1198 inp = in_pcblookup_hash(pcbinfo,
1199 itpl->itpl_remote_sin.sin_addr,
1200 itpl->itpl_remote_sin.sin_port,
1201 itpl->itpl_local_sin.sin_addr,
1202 itpl->itpl_local_sin.sin_port,
1203 0, NULL);
1204 } else if (itpl->itpl_local_sa.sa_family == AF_INET6 &&
1205 itpl->itpl_remote_sa.sa_family == AF_INET6) {
1206 struct in6_addr ina6_local;
1207 struct in6_addr ina6_remote;
1208
1209 ina6_local = itpl->itpl_local_sin6.sin6_addr;
1210 if (IN6_IS_SCOPE_LINKLOCAL(&ina6_local) && itpl->itpl_local_sin6.sin6_scope_id)
1211 ina6_local.s6_addr16[1] = htons(itpl->itpl_local_sin6.sin6_scope_id);
1212
1213 ina6_remote = itpl->itpl_remote_sin6.sin6_addr;
1214 if (IN6_IS_SCOPE_LINKLOCAL(&ina6_remote) && itpl->itpl_remote_sin6.sin6_scope_id)
1215 ina6_remote.s6_addr16[1] = htons(itpl->itpl_remote_sin6.sin6_scope_id);
1216
1217 inp = in6_pcblookup_hash(pcbinfo,
1218 &ina6_remote,
1219 itpl->itpl_remote_sin6.sin6_port,
1220 &ina6_local,
1221 itpl->itpl_local_sin6.sin6_port,
1222 0, NULL);
1223 } else
1224 return EINVAL;
1225 if (inp == NULL || (so = inp->inp_socket) == NULL)
1226 return ENOENT;
1227
1228 socket_lock(so, 0);
1229 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
1230 socket_unlock(so, 0);
1231 return ENOENT;
1232 }
1233 tp = intotcpcb(inp);
1234
1235 tcp_fill_info(tp, ti);
1236 socket_unlock(so, 0);
1237
1238 return 0;
1239 }
1240
1241
1242 __private_extern__ int
1243 tcp_sysctl_info(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
1244 {
1245 int error;
1246 struct tcp_info ti;
1247 struct info_tuple itpl;
1248
1249 if (req->newptr == USER_ADDR_NULL) {
1250 return EINVAL;
1251 }
1252 if (req->newlen < sizeof(struct info_tuple)) {
1253 return EINVAL;
1254 }
1255 error = SYSCTL_IN(req, &itpl, sizeof(struct info_tuple));
1256 if (error != 0) {
1257 return error;
1258 }
1259 error = tcp_fill_info_for_info_tuple(&itpl, &ti);
1260 if (error != 0) {
1261 return error;
1262 }
1263 error = SYSCTL_OUT(req, &ti, sizeof(struct tcp_info));
1264 if (error != 0) {
1265 return error;
1266 }
1267
1268 return 0;
1269 }
1270
1271 static int
1272 tcp_lookup_peer_pid_locked(struct socket *so, pid_t *out_pid)
1273 {
1274 int error = EHOSTUNREACH;
1275 *out_pid = -1;
1276 if ((so->so_state & SS_ISCONNECTED) == 0) return ENOTCONN;
1277
1278 struct inpcb *inp = (struct inpcb*)so->so_pcb;
1279 uint16_t lport = inp->inp_lport;
1280 uint16_t fport = inp->inp_fport;
1281 struct inpcb *finp = NULL;
1282
1283 if (inp->inp_vflag & INP_IPV6) {
1284 struct in6_addr laddr6 = inp->in6p_laddr;
1285 struct in6_addr faddr6 = inp->in6p_faddr;
1286 socket_unlock(so, 0);
1287 finp = in6_pcblookup_hash(&tcbinfo, &laddr6, lport, &faddr6, fport, 0, NULL);
1288 socket_lock(so, 0);
1289 } else if (inp->inp_vflag & INP_IPV4) {
1290 struct in_addr laddr4 = inp->inp_laddr;
1291 struct in_addr faddr4 = inp->inp_faddr;
1292 socket_unlock(so, 0);
1293 finp = in_pcblookup_hash(&tcbinfo, laddr4, lport, faddr4, fport, 0, NULL);
1294 socket_lock(so, 0);
1295 }
1296
1297 if (finp) {
1298 *out_pid = finp->inp_socket->last_pid;
1299 error = 0;
1300 in_pcb_checkstate(finp, WNT_RELEASE, 0);
1301 }
1302
1303 return error;
1304 }
1305
1306 /*
1307 * The new sockopt interface makes it possible for us to block in the
1308 * copyin/out step (if we take a page fault). Taking a page fault at
1309 * splnet() is probably a Bad Thing. (Since sockets and pcbs both now
1310 * use TSM, there probably isn't any need for this function to run at
1311 * splnet() any more. This needs more examination.)
1312 */
1313 int
1314 tcp_ctloutput(so, sopt)
1315 struct socket *so;
1316 struct sockopt *sopt;
1317 {
1318 int error, opt, optval;
1319 struct inpcb *inp;
1320 struct tcpcb *tp;
1321
1322 error = 0;
1323 inp = sotoinpcb(so);
1324 if (inp == NULL) {
1325 return (ECONNRESET);
1326 }
1327 /* Allow <SOL_SOCKET,SO_FLUSH> at this level */
1328 if (sopt->sopt_level != IPPROTO_TCP &&
1329 !(sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_FLUSH)) {
1330 #if INET6
1331 if (INP_CHECK_SOCKAF(so, AF_INET6))
1332 error = ip6_ctloutput(so, sopt);
1333 else
1334 #endif /* INET6 */
1335 error = ip_ctloutput(so, sopt);
1336 return (error);
1337 }
1338 tp = intotcpcb(inp);
1339 if (tp == NULL) {
1340 return (ECONNRESET);
1341 }
1342
1343 calculate_tcp_clock();
1344
1345 switch (sopt->sopt_dir) {
1346 case SOPT_SET:
1347 switch (sopt->sopt_name) {
1348 case TCP_NODELAY:
1349 case TCP_NOOPT:
1350 case TCP_NOPUSH:
1351 error = sooptcopyin(sopt, &optval, sizeof optval,
1352 sizeof optval);
1353 if (error)
1354 break;
1355
1356 switch (sopt->sopt_name) {
1357 case TCP_NODELAY:
1358 opt = TF_NODELAY;
1359 break;
1360 case TCP_NOOPT:
1361 opt = TF_NOOPT;
1362 break;
1363 case TCP_NOPUSH:
1364 opt = TF_NOPUSH;
1365 break;
1366 default:
1367 opt = 0; /* dead code to fool gcc */
1368 break;
1369 }
1370
1371 if (optval)
1372 tp->t_flags |= opt;
1373 else
1374 tp->t_flags &= ~opt;
1375 break;
1376 case TCP_RXT_FINDROP:
1377 error = sooptcopyin(sopt, &optval, sizeof optval,
1378 sizeof optval);
1379 if (error)
1380 break;
1381 opt = TF_RXTFINDROP;
1382 if (optval)
1383 tp->t_flagsext |= opt;
1384 else
1385 tp->t_flagsext &= ~opt;
1386 break;
1387 case TCP_MEASURE_SND_BW:
1388 error = sooptcopyin(sopt, &optval, sizeof optval,
1389 sizeof optval);
1390 if (error)
1391 break;
1392 opt = TF_MEASURESNDBW;
1393 if (optval) {
1394 if (tp->t_bwmeas == NULL) {
1395 tp->t_bwmeas = tcp_bwmeas_alloc(tp);
1396 if (tp->t_bwmeas == NULL) {
1397 error = ENOMEM;
1398 break;
1399 }
1400 }
1401 tp->t_flagsext |= opt;
1402 } else {
1403 tp->t_flagsext &= ~opt;
1404 /* Reset snd bw measurement state */
1405 tp->t_flagsext &= ~(TF_BWMEAS_INPROGRESS);
1406 if (tp->t_bwmeas != NULL) {
1407 tcp_bwmeas_free(tp);
1408 }
1409 }
1410 break;
1411 case TCP_MEASURE_BW_BURST: {
1412 struct tcp_measure_bw_burst in;
1413 uint32_t minpkts, maxpkts;
1414 bzero(&in, sizeof(in));
1415
1416 error = sooptcopyin(sopt, &in, sizeof(in),
1417 sizeof(in));
1418 if (error)
1419 break;
1420 if ((tp->t_flagsext & TF_MEASURESNDBW) == 0 ||
1421 tp->t_bwmeas == NULL) {
1422 error = EINVAL;
1423 break;
1424 }
1425 minpkts = (in.min_burst_size != 0) ? in.min_burst_size :
1426 tp->t_bwmeas->bw_minsizepkts;
1427 maxpkts = (in.max_burst_size != 0) ? in.max_burst_size :
1428 tp->t_bwmeas->bw_maxsizepkts;
1429 if (minpkts > maxpkts) {
1430 error = EINVAL;
1431 break;
1432 }
1433 tp->t_bwmeas->bw_minsizepkts = minpkts;
1434 tp->t_bwmeas->bw_maxsizepkts = maxpkts;
1435 tp->t_bwmeas->bw_minsize = (minpkts * tp->t_maxseg);
1436 tp->t_bwmeas->bw_maxsize = (maxpkts * tp->t_maxseg);
1437 break;
1438 }
1439 case TCP_MAXSEG:
1440 error = sooptcopyin(sopt, &optval, sizeof optval,
1441 sizeof optval);
1442 if (error)
1443 break;
1444
1445 if (optval > 0 && optval <= tp->t_maxseg &&
1446 optval + 40 >= tcp_minmss)
1447 tp->t_maxseg = optval;
1448 else
1449 error = EINVAL;
1450 break;
1451
1452 case TCP_KEEPALIVE:
1453 error = sooptcopyin(sopt, &optval, sizeof optval,
1454 sizeof optval);
1455 if (error)
1456 break;
1457 if (optval < 0)
1458 error = EINVAL;
1459 else {
1460 tp->t_keepidle = optval * TCP_RETRANSHZ;
1461 tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp,
1462 TCP_KEEPIDLE(tp)); /* reset the timer to new value */
1463 tcp_check_timer_state(tp);
1464 }
1465 break;
1466
1467 case TCP_CONNECTIONTIMEOUT:
1468 error = sooptcopyin(sopt, &optval, sizeof optval,
1469 sizeof optval);
1470 if (error)
1471 break;
1472 if (optval < 0)
1473 error = EINVAL;
1474 else
1475 tp->t_keepinit = optval * TCP_RETRANSHZ;
1476 break;
1477
1478 case PERSIST_TIMEOUT:
1479 error = sooptcopyin(sopt, &optval, sizeof optval,
1480 sizeof optval);
1481 if (error)
1482 break;
1483 if (optval < 0)
1484 error = EINVAL;
1485 else
1486 tp->t_persist_timeout = optval * TCP_RETRANSHZ;
1487 break;
1488 case TCP_RXT_CONNDROPTIME:
1489 error = sooptcopyin(sopt, &optval, sizeof(optval),
1490 sizeof(optval));
1491 if (error)
1492 break;
1493 if (optval < 0)
1494 error = EINVAL;
1495 else
1496 tp->rxt_conndroptime = optval * TCP_RETRANSHZ;
1497 break;
1498 case TCP_NOTSENT_LOWAT:
1499 error = sooptcopyin(sopt, &optval, sizeof(optval),
1500 sizeof(optval));
1501 if (error)
1502 break;
1503 if (optval < 0) {
1504 error = EINVAL;
1505 break;
1506 } else {
1507 if (optval == 0) {
1508 so->so_flags &= ~(SOF_NOTSENT_LOWAT);
1509 tp->t_notsent_lowat = 0;
1510 } else {
1511 so->so_flags |= SOF_NOTSENT_LOWAT;
1512 tp->t_notsent_lowat = optval;
1513 }
1514 }
1515 break;
1516
1517 case SO_FLUSH:
1518 if ((error = sooptcopyin(sopt, &optval, sizeof (optval),
1519 sizeof (optval))) != 0)
1520 break;
1521
1522 error = inp_flush(inp, optval);
1523 break;
1524
1525 default:
1526 error = ENOPROTOOPT;
1527 break;
1528 }
1529 break;
1530
1531 case SOPT_GET:
1532 switch (sopt->sopt_name) {
1533 case TCP_NODELAY:
1534 optval = tp->t_flags & TF_NODELAY;
1535 break;
1536 case TCP_MAXSEG:
1537 optval = tp->t_maxseg;
1538 break;
1539 case TCP_KEEPALIVE:
1540 optval = tp->t_keepidle / TCP_RETRANSHZ;
1541 break;
1542 case TCP_NOOPT:
1543 optval = tp->t_flags & TF_NOOPT;
1544 break;
1545 case TCP_NOPUSH:
1546 optval = tp->t_flags & TF_NOPUSH;
1547 break;
1548 case TCP_CONNECTIONTIMEOUT:
1549 optval = tp->t_keepinit / TCP_RETRANSHZ;
1550 break;
1551 case PERSIST_TIMEOUT:
1552 optval = tp->t_persist_timeout / TCP_RETRANSHZ;
1553 break;
1554 case TCP_RXT_CONNDROPTIME:
1555 optval = tp->rxt_conndroptime / TCP_RETRANSHZ;
1556 break;
1557 case TCP_RXT_FINDROP:
1558 optval = tp->t_flagsext & TF_RXTFINDROP;
1559 break;
1560 case TCP_MEASURE_SND_BW:
1561 optval = tp->t_flagsext & TF_MEASURESNDBW;
1562 break;
1563 case TCP_INFO: {
1564 struct tcp_info ti;
1565
1566 tcp_fill_info(tp, &ti);
1567 error = sooptcopyout(sopt, &ti, sizeof(struct tcp_info));
1568 goto done;
1569 /* NOT REACHED */
1570 }
1571 case TCP_MEASURE_BW_BURST: {
1572 struct tcp_measure_bw_burst out;
1573 if ((tp->t_flagsext & TF_MEASURESNDBW) == 0 ||
1574 tp->t_bwmeas == NULL) {
1575 error = EINVAL;
1576 break;
1577 }
1578 out.min_burst_size = tp->t_bwmeas->bw_minsizepkts;
1579 out.max_burst_size = tp->t_bwmeas->bw_maxsizepkts;
1580 error = sooptcopyout(sopt, &out, sizeof(out));
1581 goto done;
1582 }
1583 case TCP_NOTSENT_LOWAT:
1584 if ((so->so_flags & SOF_NOTSENT_LOWAT) != 0) {
1585 optval = tp->t_notsent_lowat;
1586 } else {
1587 optval = 0;
1588 }
1589 break;
1590 case TCP_PEER_PID: {
1591 pid_t pid;
1592 error = tcp_lookup_peer_pid_locked(so, &pid);
1593 if (error == 0)
1594 error = sooptcopyout(sopt, &pid, sizeof(pid));
1595 goto done;
1596 }
1597 default:
1598 error = ENOPROTOOPT;
1599 break;
1600 }
1601 if (error == 0)
1602 error = sooptcopyout(sopt, &optval, sizeof optval);
1603 break;
1604 }
1605 done:
1606 return (error);
1607 }
1608
1609 /*
1610 * tcp_sendspace and tcp_recvspace are the default send and receive window
1611 * sizes, respectively. These are obsolescent (this information should
1612 * be set by the route).
1613 */
1614 u_int32_t tcp_sendspace = 1448*256;
1615 u_int32_t tcp_recvspace = 1448*384;
1616
1617 /* During attach, the size of socket buffer allocated is limited to
1618 * sb_max in sbreserve. Disallow setting the tcp send and recv space
1619 * to be more than sb_max because that will cause tcp_attach to fail
1620 * (see radar 5713060)
1621 */
1622 static int
1623 sysctl_tcp_sospace(struct sysctl_oid *oidp, __unused void *arg1,
1624 __unused int arg2, struct sysctl_req *req) {
1625 u_int32_t new_value = 0, *space_p = NULL;
1626 int changed = 0, error = 0;
1627 u_quad_t sb_effective_max = (sb_max / (MSIZE+MCLBYTES)) * MCLBYTES;
1628
1629 switch (oidp->oid_number) {
1630 case TCPCTL_SENDSPACE:
1631 space_p = &tcp_sendspace;
1632 break;
1633 case TCPCTL_RECVSPACE:
1634 space_p = &tcp_recvspace;
1635 break;
1636 default:
1637 return EINVAL;
1638 }
1639 error = sysctl_io_number(req, *space_p, sizeof(u_int32_t),
1640 &new_value, &changed);
1641 if (changed) {
1642 if (new_value > 0 && new_value <= sb_effective_max) {
1643 *space_p = new_value;
1644 } else {
1645 error = ERANGE;
1646 }
1647 }
1648 return error;
1649 }
1650
1651 SYSCTL_PROC(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1652 &tcp_sendspace , 0, &sysctl_tcp_sospace, "IU", "Maximum outgoing TCP datagram size");
1653 SYSCTL_PROC(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1654 &tcp_recvspace , 0, &sysctl_tcp_sospace, "IU", "Maximum incoming TCP datagram size");
1655
1656
1657 /*
1658 * Attach TCP protocol to socket, allocating
1659 * internet protocol control block, tcp control block,
1660 * bufer space, and entering LISTEN state if to accept connections.
1661 *
1662 * Returns: 0 Success
1663 * in_pcballoc:ENOBUFS
1664 * in_pcballoc:ENOMEM
1665 * in_pcballoc:??? [IPSEC specific]
1666 * soreserve:ENOBUFS
1667 */
1668 static int
1669 tcp_attach(so, p)
1670 struct socket *so;
1671 struct proc *p;
1672 {
1673 register struct tcpcb *tp;
1674 struct inpcb *inp;
1675 int error;
1676 #if INET6
1677 int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != 0;
1678 #endif
1679
1680 error = in_pcballoc(so, &tcbinfo, p);
1681 if (error)
1682 return (error);
1683
1684 inp = sotoinpcb(so);
1685
1686 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
1687 error = soreserve(so, tcp_sendspace, tcp_recvspace);
1688 if (error)
1689 return (error);
1690 }
1691 if ((so->so_rcv.sb_flags & SB_USRSIZE) == 0)
1692 so->so_rcv.sb_flags |= SB_AUTOSIZE;
1693 if ((so->so_snd.sb_flags & SB_USRSIZE) == 0)
1694 so->so_snd.sb_flags |= SB_AUTOSIZE;
1695
1696 #if INET6
1697 if (isipv6) {
1698 inp->inp_vflag |= INP_IPV6;
1699 inp->in6p_hops = -1; /* use kernel default */
1700 }
1701 else
1702 #endif /* INET6 */
1703 inp->inp_vflag |= INP_IPV4;
1704 tp = tcp_newtcpcb(inp);
1705 if (tp == 0) {
1706 int nofd = so->so_state & SS_NOFDREF; /* XXX */
1707
1708 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */
1709 #if INET6
1710 if (isipv6)
1711 in6_pcbdetach(inp);
1712 else
1713 #endif /* INET6 */
1714 in_pcbdetach(inp);
1715 so->so_state |= nofd;
1716 return (ENOBUFS);
1717 }
1718 if (nstat_collect) {
1719 nstat_tcp_new_pcb(inp);
1720 }
1721 tp->t_state = TCPS_CLOSED;
1722 return (0);
1723 }
1724
1725 /*
1726 * Initiate (or continue) disconnect.
1727 * If embryonic state, just send reset (once).
1728 * If in ``let data drain'' option and linger null, just drop.
1729 * Otherwise (hard), mark socket disconnecting and drop
1730 * current input data; switch states based on user close, and
1731 * send segment to peer (with FIN).
1732 */
1733 static struct tcpcb *
1734 tcp_disconnect(tp)
1735 register struct tcpcb *tp;
1736 {
1737 struct socket *so = tp->t_inpcb->inp_socket;
1738
1739 if (tp->t_state < TCPS_ESTABLISHED)
1740 tp = tcp_close(tp);
1741 else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
1742 tp = tcp_drop(tp, 0);
1743 else {
1744 soisdisconnecting(so);
1745 sbflush(&so->so_rcv);
1746 tp = tcp_usrclosed(tp);
1747 if (tp)
1748 (void) tcp_output(tp);
1749 }
1750 return (tp);
1751 }
1752
1753 /*
1754 * User issued close, and wish to trail through shutdown states:
1755 * if never received SYN, just forget it. If got a SYN from peer,
1756 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
1757 * If already got a FIN from peer, then almost done; go to LAST_ACK
1758 * state. In all other cases, have already sent FIN to peer (e.g.
1759 * after PRU_SHUTDOWN), and just have to play tedious game waiting
1760 * for peer to send FIN or not respond to keep-alives, etc.
1761 * We can let the user exit from the close as soon as the FIN is acked.
1762 */
1763 static struct tcpcb *
1764 tcp_usrclosed(tp)
1765 register struct tcpcb *tp;
1766 {
1767
1768 switch (tp->t_state) {
1769
1770 case TCPS_CLOSED:
1771 case TCPS_LISTEN:
1772 tp->t_state = TCPS_CLOSED;
1773 tp = tcp_close(tp);
1774 break;
1775
1776 case TCPS_SYN_SENT:
1777 case TCPS_SYN_RECEIVED:
1778 tp->t_flags |= TF_NEEDFIN;
1779 break;
1780
1781 case TCPS_ESTABLISHED:
1782 tp->t_state = TCPS_FIN_WAIT_1;
1783 break;
1784
1785 case TCPS_CLOSE_WAIT:
1786 tp->t_state = TCPS_LAST_ACK;
1787 break;
1788 }
1789 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
1790 soisdisconnected(tp->t_inpcb->inp_socket);
1791 /* To prevent the connection hanging in FIN_WAIT_2 forever. */
1792 if (tp->t_state == TCPS_FIN_WAIT_2)
1793 tp->t_timer[TCPT_2MSL] = OFFSET_FROM_START(tp, tcp_maxidle);
1794 }
1795 return (tp);
1796 }
1797
1798 void
1799 tcp_in_cksum_stats(u_int32_t len)
1800 {
1801 tcps_in_sw_cksum++;
1802 tcps_in_sw_cksum_bytes += len;
1803 }
1804
1805 void
1806 tcp_out_cksum_stats(u_int32_t len)
1807 {
1808 tcps_out_sw_cksum++;
1809 tcps_out_sw_cksum_bytes += len;
1810 }