]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/tcp_var.h
xnu-1504.9.37.tar.gz
[apple/xnu.git] / bsd / netinet / tcp_var.h
1 /*
2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1993, 1994, 1995
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)tcp_var.h 8.4 (Berkeley) 5/24/95
61 * $FreeBSD: src/sys/netinet/tcp_var.h,v 1.56.2.8 2001/08/22 00:59:13 silby Exp $
62 */
63
64 #ifndef _NETINET_TCP_VAR_H_
65 #define _NETINET_TCP_VAR_H_
66 #include <sys/appleapiopts.h>
67 #include <sys/queue.h>
68 #include <netinet/in_pcb.h>
69 #include <netinet/tcp_timer.h>
70
71 #if defined(__LP64__)
72 #define _TCPCB_PTR(x) u_int32_t
73 #define _TCPCB_LIST_HEAD(name, type) \
74 struct name { \
75 u_int32_t lh_first; \
76 };
77 #else
78 #define _TCPCB_PTR(x) x
79 #define _TCPCB_LIST_HEAD(name, type) LIST_HEAD(name, type)
80 #endif
81
82 #define TCP_RETRANSHZ 10 /* tcp retrans timer (100ms) per hz */
83
84 #ifdef KERNEL_PRIVATE
85 #define N_TIME_WAIT_SLOTS 128 /* must be power of 2 */
86
87 /*
88 * Kernel variables for tcp.
89 */
90
91 /* TCP segment queue entry */
92 struct tseg_qent {
93 LIST_ENTRY(tseg_qent) tqe_q;
94 int tqe_len; /* TCP segment data length */
95 struct tcphdr *tqe_th; /* a pointer to tcp header */
96 struct mbuf *tqe_m; /* mbuf contains packet */
97 };
98 LIST_HEAD(tsegqe_head, tseg_qent);
99 extern int tcp_reass_maxseg;
100 extern int tcp_reass_qsize;
101 #ifdef MALLOC_DECLARE
102 MALLOC_DECLARE(M_TSEGQ);
103 #endif
104
105 struct sackblk {
106 tcp_seq start; /* start seq no. of sack block */
107 tcp_seq end; /* end seq no. */
108 };
109
110 struct sackhole {
111 tcp_seq start; /* start seq no. of hole */
112 tcp_seq end; /* end seq no. */
113 tcp_seq rxmit; /* next seq. no in hole to be retransmitted */
114 TAILQ_ENTRY(sackhole) scblink; /* scoreboard linkage */
115 };
116
117 struct sackhint {
118 struct sackhole *nexthole;
119 int sack_bytes_rexmit;
120 };
121
122 struct tcptemp {
123 u_char tt_ipgen[40]; /* the size must be of max ip header, now IPv6 */
124 struct tcphdr tt_t;
125 };
126
127 #define tcp6cb tcpcb /* for KAME src sync over BSD*'s */
128
129 /*
130 * Tcp control block, one per tcp; fields:
131 * Organized for 16 byte cacheline efficiency.
132 */
133 struct tcpcb {
134 struct tsegqe_head t_segq;
135 int t_dupacks; /* consecutive dup acks recd */
136 struct tcptemp *unused; /* unused now: was t_template */
137
138 int t_timer[TCPT_NTIMERS]; /* tcp timers */
139
140 struct inpcb *t_inpcb; /* back pointer to internet pcb */
141 int t_state; /* state of this connection */
142 u_int t_flags;
143 #define TF_ACKNOW 0x00001 /* ack peer immediately */
144 #define TF_DELACK 0x00002 /* ack, but try to delay it */
145 #define TF_NODELAY 0x00004 /* don't delay packets to coalesce */
146 #define TF_NOOPT 0x00008 /* don't use tcp options */
147 #define TF_SENTFIN 0x00010 /* have sent FIN */
148 #define TF_REQ_SCALE 0x00020 /* have/will request window scaling */
149 #define TF_RCVD_SCALE 0x00040 /* other side has requested scaling */
150 #define TF_REQ_TSTMP 0x00080 /* have/will request timestamps */
151 #define TF_RCVD_TSTMP 0x00100 /* a timestamp was received in SYN */
152 #define TF_SACK_PERMIT 0x00200 /* other side said I could SACK */
153 #define TF_NEEDSYN 0x00400 /* send SYN (implicit state) */
154 #define TF_NEEDFIN 0x00800 /* send FIN (implicit state) */
155 #define TF_NOPUSH 0x01000 /* don't push */
156 #define TF_REQ_CC 0x02000 /* have/will request CC */
157 #define TF_RCVD_CC 0x04000 /* a CC was received in SYN */
158 #define TF_SENDCCNEW 0x08000 /* send CCnew instead of CC in SYN */
159 #define TF_MORETOCOME 0x10000 /* More data to be appended to sock */
160 #define TF_LQ_OVERFLOW 0x20000 /* UNUSED listen queue overflow */
161 #define TF_RXWIN0SENT 0x40000 /* sent a receiver win 0 in response */
162 #define TF_SLOWLINK 0x80000 /* route is a on a modem speed link */
163
164
165 #define TF_LASTIDLE 0x100000 /* connection was previously idle */
166 #define TF_FASTRECOVERY 0x200000 /* in NewReno Fast Recovery */
167 #define TF_WASFRECOVERY 0x400000 /* was in NewReno Fast Recovery */
168 #define TF_SIGNATURE 0x800000 /* require MD5 digests (RFC2385) */
169 #define TF_MAXSEGSNT 0x1000000 /* last segment sent was a full segment */
170 #define TF_SENDINPROG 0x2000000 /* send is in progress */
171 #define TF_PMTUD 0x4000000 /* Perform Path MTU Discovery for this connection */
172 #define TF_CLOSING 0x8000000 /* pending tcp close */
173 #define TF_TSO 0x10000000 /* TCP Segment Offloading is enable on this connection */
174 #define TF_BLACKHOLE 0x20000000 /* Path MTU Discovery Black Hole detection */
175
176 int t_force; /* 1 if forcing out a byte */
177
178 tcp_seq snd_una; /* send unacknowledged */
179 tcp_seq snd_max; /* highest sequence number sent;
180 * used to recognize retransmits
181 */
182 tcp_seq snd_nxt; /* send next */
183 tcp_seq snd_up; /* send urgent pointer */
184
185 tcp_seq snd_wl1; /* window update seg seq number */
186 tcp_seq snd_wl2; /* window update seg ack number */
187 tcp_seq iss; /* initial send sequence number */
188 tcp_seq irs; /* initial receive sequence number */
189
190 tcp_seq rcv_nxt; /* receive next */
191 tcp_seq rcv_adv; /* advertised window */
192 u_int32_t rcv_wnd; /* receive window */
193 tcp_seq rcv_up; /* receive urgent pointer */
194
195 u_int32_t snd_wnd; /* send window */
196 u_int32_t snd_cwnd; /* congestion-controlled window */
197 u_int32_t snd_bwnd; /* bandwidth-controlled window */
198 u_int32_t snd_ssthresh; /* snd_cwnd size threshold for
199 * for slow start exponential to
200 * linear switch
201 */
202 u_int32_t snd_bandwidth; /* calculated bandwidth or 0 */
203 tcp_seq snd_recover; /* for use in NewReno Fast Recovery */
204
205 u_int t_maxopd; /* mss plus options */
206
207 u_int32_t t_rcvtime; /* inactivity time */
208 u_int32_t t_starttime; /* time connection was established */
209 int t_rtttime; /* round trip time */
210 tcp_seq t_rtseq; /* sequence number being timed */
211
212 int t_bw_rtttime; /* used for bandwidth calculation */
213 tcp_seq t_bw_rtseq; /* used for bandwidth calculation */
214
215 int t_rxtcur; /* current retransmit value (ticks) */
216 u_int t_maxseg; /* maximum segment size */
217 int t_srtt; /* smoothed round-trip time */
218 int t_rttvar; /* variance in round-trip time */
219
220 int t_rxtshift; /* log(2) of rexmt exp. backoff */
221 u_int t_rttmin; /* minimum rtt allowed */
222 u_int t_rttbest; /* best rtt we've seen */
223 u_int32_t t_rttupdated; /* number of times rtt sampled */
224 u_int32_t max_sndwnd; /* largest window peer has offered */
225
226 int t_softerror; /* possible error not yet reported */
227 /* out-of-band data */
228 char t_oobflags; /* have some */
229 char t_iobc; /* input character */
230 #define TCPOOB_HAVEDATA 0x01
231 #define TCPOOB_HADDATA 0x02
232 /* RFC 1323 variables */
233 u_char snd_scale; /* window scaling for send window */
234 u_char rcv_scale; /* window scaling for recv window */
235 u_char request_r_scale; /* pending window scaling */
236 u_char requested_s_scale;
237 u_int32_t ts_recent; /* timestamp echo data */
238
239 u_int32_t ts_recent_age; /* when last updated */
240 tcp_seq last_ack_sent;
241 /* RFC 1644 variables */
242 tcp_cc cc_send; /* send connection count */
243 tcp_cc cc_recv; /* receive connection count */
244 /* RFC 3465 variables */
245 u_int32_t t_bytes_acked; /* ABC "bytes_acked" parameter */
246 /* experimental */
247 u_int32_t snd_cwnd_prev; /* cwnd prior to retransmit */
248 u_int32_t snd_ssthresh_prev; /* ssthresh prior to retransmit */
249 u_int32_t t_badrxtwin; /* window for retransmit recovery */
250
251 int t_keepidle; /* keepalive idle timer (override global if > 0) */
252 int t_lastchain; /* amount of packets chained last time around */
253 int t_unacksegs; /* received but unacked segments: used for delaying acks */
254
255
256 /* 3529618 MSS overload prevention */
257 u_int32_t rcv_reset;
258 u_int32_t rcv_pps;
259 u_int32_t rcv_byps;
260 u_int32_t rcv_maxbyps;
261 tcp_seq snd_high; /* for use in NewReno Fast Recovery */
262 tcp_seq snd_high_prev; /* snd_high prior to retransmit */
263
264 tcp_seq snd_recover_prev; /* snd_recover prior to retransmit */
265 u_char snd_limited; /* segments limited transmitted */
266 /* anti DoS counters */
267 u_int32_t rcv_second; /* start of interval second */
268 /* SACK related state */
269 int sack_enable; /* enable SACK for this connection */
270 int snd_numholes; /* number of holes seen by sender */
271
272 TAILQ_HEAD(sackhole_head, sackhole) snd_holes;
273 /* SACK scoreboard (sorted) */
274 tcp_seq snd_fack; /* last seq number(+1) sack'd by rcv'r*/
275 int rcv_numsacks; /* # distinct sack blks present */
276 struct sackblk sackblks[MAX_SACK_BLKS]; /* seq nos. of sack blocks */
277 tcp_seq sack_newdata; /* New data xmitted in this recovery
278 episode starts at this seq number */
279 struct sackhint sackhint; /* SACK scoreboard hint */
280 int t_rttlow; /* smallest observerved RTT */
281 u_long ecn_flags;
282 #define TE_SETUPSENT 0x01 /* Indicate we have sent ECN-SETUP SYN or SYN-ACK */
283 #define TE_SETUPRECEIVED 0x02 /* Indicate we have received ECN-SETUP SYN or SYN-ACK */
284 #define TE_SENDIPECT 0x04 /* Indicate we haven't sent or received non-ECN-setup SYN or SYN-ACK */
285 #define TE_SENDCWR 0x08 /* Indicate that the next non-retransmit should have the TCP CWR flag set */
286 #define TE_SENDECE 0x10 /* Indicate that the next packet should have the TCP ECE flag set */
287
288 #if TRAFFIC_MGT
289 u_int32_t tot_recv_snapshot; /* snapshot of global total pkts received */
290 u_int32_t bg_recv_snapshot; /* snapshot of global background pkts received */
291 #endif /* TRAFFIC_MGT */
292 u_int32_t t_pktlist_sentlen; /* total bytes in transmit chain */
293 struct mbuf *t_pktlist_head; /* First packet in transmit chain */
294 struct mbuf *t_pktlist_tail; /* Last packet in transmit chain */
295
296 int t_keepinit; /* connection timeout, i.e. idle time in SYN_SENT or SYN_RECV state */
297 u_int32_t tso_max_segment_size; /* TCP Segment Offloading maximum segment unit for NIC */
298 u_int t_pmtud_saved_maxopd; /* MSS saved before performing PMTU-D BlackHole detection */
299 };
300
301 #define IN_FASTRECOVERY(tp) (tp->t_flags & TF_FASTRECOVERY)
302 #define ENTER_FASTRECOVERY(tp) tp->t_flags |= TF_FASTRECOVERY
303 #define EXIT_FASTRECOVERY(tp) tp->t_flags &= ~TF_FASTRECOVERY
304
305
306 /*
307 * Structure to hold TCP options that are only used during segment
308 * processing (in tcp_input), but not held in the tcpcb.
309 * It's basically used to reduce the number of parameters
310 * to tcp_dooptions.
311 */
312 struct tcpopt {
313 u_int32_t to_flags; /* which options are present */
314 #define TOF_TS 0x0001 /* timestamp */
315 #define TOF_MSS 0x0010
316 #define TOF_SCALE 0x0020
317 #define TOF_SIGNATURE 0x0040 /* signature option present */
318 #define TOF_SIGLEN 0x0080 /* signature length valid (RFC2385) */
319 #define TOF_SACK 0x0100 /* Peer sent SACK option */
320 u_int32_t to_tsval;
321 u_int32_t to_tsecr;
322 u_int16_t to_mss;
323 u_int8_t to_requested_s_scale;
324 u_int8_t to_nsacks; /* number of SACK blocks */
325 u_char *to_sacks; /* pointer to the first SACK blocks */
326 };
327
328 /*
329 * The TAO cache entry which is stored in the protocol family specific
330 * portion of the route metrics.
331 */
332 struct rmxp_tao {
333 tcp_cc tao_cc; /* latest CC in valid SYN */
334 tcp_cc tao_ccsent; /* latest CC sent to peer */
335 u_short tao_mssopt; /* peer's cached MSS */
336 #ifdef notyet
337 u_short tao_flags; /* cache status flags */
338 #define TAOF_DONT 0x0001 /* peer doesn't understand rfc1644 */
339 #define TAOF_OK 0x0002 /* peer does understand rfc1644 */
340 #define TAOF_UNDEF 0 /* we don't know yet */
341 #endif /* notyet */
342 };
343 #define rmx_taop(r) ((struct rmxp_tao *)(r).rmx_filler)
344
345 #define intotcpcb(ip) ((struct tcpcb *)(ip)->inp_ppcb)
346 #define sototcpcb(so) (intotcpcb(sotoinpcb(so)))
347
348 /*
349 * The smoothed round-trip time and estimated variance
350 * are stored as fixed point numbers scaled by the values below.
351 * For convenience, these scales are also used in smoothing the average
352 * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed).
353 * With these scales, srtt has 3 bits to the right of the binary point,
354 * and thus an "ALPHA" of 0.875. rttvar has 2 bits to the right of the
355 * binary point, and is smoothed with an ALPHA of 0.75.
356 */
357 #define TCP_RTT_SCALE 32 /* multiplier for srtt; 3 bits frac. */
358 #define TCP_RTT_SHIFT 5 /* shift for srtt; 3 bits frac. */
359 #define TCP_RTTVAR_SCALE 16 /* multiplier for rttvar; 2 bits */
360 #define TCP_RTTVAR_SHIFT 4 /* shift for rttvar; 2 bits */
361 #define TCP_DELTA_SHIFT 2 /* see tcp_input.c */
362
363 /*
364 * The initial retransmission should happen at rtt + 4 * rttvar.
365 * Because of the way we do the smoothing, srtt and rttvar
366 * will each average +1/2 tick of bias. When we compute
367 * the retransmit timer, we want 1/2 tick of rounding and
368 * 1 extra tick because of +-1/2 tick uncertainty in the
369 * firing of the timer. The bias will give us exactly the
370 * 1.5 tick we need. But, because the bias is
371 * statistical, we have to test that we don't drop below
372 * the minimum feasible timer (which is 2 ticks).
373 * This version of the macro adapted from a paper by Lawrence
374 * Brakmo and Larry Peterson which outlines a problem caused
375 * by insufficient precision in the original implementation,
376 * which results in inappropriately large RTO values for very
377 * fast networks.
378 */
379 #define TCP_REXMTVAL(tp) \
380 max((tp)->t_rttmin, (((tp)->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT)) \
381 + (tp)->t_rttvar) >> TCP_DELTA_SHIFT)
382
383 /*
384 * Jaguar compatible TCP control block, for xtcpcb
385 * Does not have the old fields
386 */
387 struct otcpcb {
388 #else
389 struct tseg_qent;
390 _TCPCB_LIST_HEAD(tsegqe_head, tseg_qent);
391
392 struct tcpcb {
393 #endif /* KERNEL_PRIVATE */
394 #if defined(KERNEL_PRIVATE)
395 u_int32_t t_segq;
396 #else
397 struct tsegqe_head t_segq;
398 #endif /* KERNEL_PRIVATE */
399 int t_dupacks; /* consecutive dup acks recd */
400 u_int32_t unused; /* unused now: was t_template */
401
402 int t_timer[TCPT_NTIMERS]; /* tcp timers */
403
404 _TCPCB_PTR(struct inpcb *) t_inpcb; /* back pointer to internet pcb */
405 int t_state; /* state of this connection */
406 u_int t_flags;
407 #define TF_ACKNOW 0x00001 /* ack peer immediately */
408 #define TF_DELACK 0x00002 /* ack, but try to delay it */
409 #define TF_NODELAY 0x00004 /* don't delay packets to coalesce */
410 #define TF_NOOPT 0x00008 /* don't use tcp options */
411 #define TF_SENTFIN 0x00010 /* have sent FIN */
412 #define TF_REQ_SCALE 0x00020 /* have/will request window scaling */
413 #define TF_RCVD_SCALE 0x00040 /* other side has requested scaling */
414 #define TF_REQ_TSTMP 0x00080 /* have/will request timestamps */
415 #define TF_RCVD_TSTMP 0x00100 /* a timestamp was received in SYN */
416 #define TF_SACK_PERMIT 0x00200 /* other side said I could SACK */
417 #define TF_NEEDSYN 0x00400 /* send SYN (implicit state) */
418 #define TF_NEEDFIN 0x00800 /* send FIN (implicit state) */
419 #define TF_NOPUSH 0x01000 /* don't push */
420 #define TF_REQ_CC 0x02000 /* have/will request CC */
421 #define TF_RCVD_CC 0x04000 /* a CC was received in SYN */
422 #define TF_SENDCCNEW 0x08000 /* send CCnew instead of CC in SYN */
423 #define TF_MORETOCOME 0x10000 /* More data to be appended to sock */
424 #define TF_LQ_OVERFLOW 0x20000 /* listen queue overflow */
425 #define TF_RXWIN0SENT 0x40000 /* sent a receiver win 0 in response */
426 #define TF_SLOWLINK 0x80000 /* route is a on a modem speed link */
427
428 int t_force; /* 1 if forcing out a byte */
429
430 tcp_seq snd_una; /* send unacknowledged */
431 tcp_seq snd_max; /* highest sequence number sent;
432 * used to recognize retransmits
433 */
434 tcp_seq snd_nxt; /* send next */
435 tcp_seq snd_up; /* send urgent pointer */
436
437 tcp_seq snd_wl1; /* window update seg seq number */
438 tcp_seq snd_wl2; /* window update seg ack number */
439 tcp_seq iss; /* initial send sequence number */
440 tcp_seq irs; /* initial receive sequence number */
441
442 tcp_seq rcv_nxt; /* receive next */
443 tcp_seq rcv_adv; /* advertised window */
444 u_int32_t rcv_wnd; /* receive window */
445 tcp_seq rcv_up; /* receive urgent pointer */
446
447 u_int32_t snd_wnd; /* send window */
448 u_int32_t snd_cwnd; /* congestion-controlled window */
449 u_int32_t snd_ssthresh; /* snd_cwnd size threshold for
450 * for slow start exponential to
451 * linear switch
452 */
453 u_int t_maxopd; /* mss plus options */
454
455 u_int32_t t_rcvtime; /* inactivity time */
456 u_int32_t t_starttime; /* time connection was established */
457 int t_rtttime; /* round trip time */
458 tcp_seq t_rtseq; /* sequence number being timed */
459
460 int t_rxtcur; /* current retransmit value (ticks) */
461 u_int t_maxseg; /* maximum segment size */
462 int t_srtt; /* smoothed round-trip time */
463 int t_rttvar; /* variance in round-trip time */
464
465 int t_rxtshift; /* log(2) of rexmt exp. backoff */
466 u_int t_rttmin; /* minimum rtt allowed */
467 u_int32_t t_rttupdated; /* number of times rtt sampled */
468 u_int32_t max_sndwnd; /* largest window peer has offered */
469
470 int t_softerror; /* possible error not yet reported */
471 /* out-of-band data */
472 char t_oobflags; /* have some */
473 char t_iobc; /* input character */
474 #define TCPOOB_HAVEDATA 0x01
475 #define TCPOOB_HADDATA 0x02
476 /* RFC 1323 variables */
477 u_char snd_scale; /* window scaling for send window */
478 u_char rcv_scale; /* window scaling for recv window */
479 u_char request_r_scale; /* pending window scaling */
480 u_char requested_s_scale;
481 u_int32_t ts_recent; /* timestamp echo data */
482
483 u_int32_t ts_recent_age; /* when last updated */
484 tcp_seq last_ack_sent;
485 /* RFC 1644 variables */
486 tcp_cc cc_send; /* send connection count */
487 tcp_cc cc_recv; /* receive connection count */
488 tcp_seq snd_recover; /* for use in fast recovery */
489 /* experimental */
490 u_int32_t snd_cwnd_prev; /* cwnd prior to retransmit */
491 u_int32_t snd_ssthresh_prev; /* ssthresh prior to retransmit */
492 u_int32_t t_badrxtwin; /* window for retransmit recovery */
493 };
494
495
496 /*
497 * TCP statistics.
498 * Many of these should be kept per connection,
499 * but that's inconvenient at the moment.
500 */
501 struct tcpstat {
502 u_int32_t tcps_connattempt; /* connections initiated */
503 u_int32_t tcps_accepts; /* connections accepted */
504 u_int32_t tcps_connects; /* connections established */
505 u_int32_t tcps_drops; /* connections dropped */
506 u_int32_t tcps_conndrops; /* embryonic connections dropped */
507 u_int32_t tcps_closed; /* conn. closed (includes drops) */
508 u_int32_t tcps_segstimed; /* segs where we tried to get rtt */
509 u_int32_t tcps_rttupdated; /* times we succeeded */
510 u_int32_t tcps_delack; /* delayed acks sent */
511 u_int32_t tcps_timeoutdrop; /* conn. dropped in rxmt timeout */
512 u_int32_t tcps_rexmttimeo; /* retransmit timeouts */
513 u_int32_t tcps_persisttimeo; /* persist timeouts */
514 u_int32_t tcps_keeptimeo; /* keepalive timeouts */
515 u_int32_t tcps_keepprobe; /* keepalive probes sent */
516 u_int32_t tcps_keepdrops; /* connections dropped in keepalive */
517
518 u_int32_t tcps_sndtotal; /* total packets sent */
519 u_int32_t tcps_sndpack; /* data packets sent */
520 u_int32_t tcps_sndbyte; /* data bytes sent */
521 u_int32_t tcps_sndrexmitpack; /* data packets retransmitted */
522 u_int32_t tcps_sndrexmitbyte; /* data bytes retransmitted */
523 u_int32_t tcps_sndacks; /* ack-only packets sent */
524 u_int32_t tcps_sndprobe; /* window probes sent */
525 u_int32_t tcps_sndurg; /* packets sent with URG only */
526 u_int32_t tcps_sndwinup; /* window update-only packets sent */
527 u_int32_t tcps_sndctrl; /* control (SYN|FIN|RST) packets sent */
528
529 u_int32_t tcps_rcvtotal; /* total packets received */
530 u_int32_t tcps_rcvpack; /* packets received in sequence */
531 u_int32_t tcps_rcvbyte; /* bytes received in sequence */
532 u_int32_t tcps_rcvbadsum; /* packets received with ccksum errs */
533 u_int32_t tcps_rcvbadoff; /* packets received with bad offset */
534 u_int32_t tcps_rcvmemdrop; /* packets dropped for lack of memory */
535 u_int32_t tcps_rcvshort; /* packets received too short */
536 u_int32_t tcps_rcvduppack; /* duplicate-only packets received */
537 u_int32_t tcps_rcvdupbyte; /* duplicate-only bytes received */
538 u_int32_t tcps_rcvpartduppack; /* packets with some duplicate data */
539 u_int32_t tcps_rcvpartdupbyte; /* dup. bytes in part-dup. packets */
540 u_int32_t tcps_rcvoopack; /* out-of-order packets received */
541 u_int32_t tcps_rcvoobyte; /* out-of-order bytes received */
542 u_int32_t tcps_rcvpackafterwin; /* packets with data after window */
543 u_int32_t tcps_rcvbyteafterwin; /* bytes rcvd after window */
544 u_int32_t tcps_rcvafterclose; /* packets rcvd after "close" */
545 u_int32_t tcps_rcvwinprobe; /* rcvd window probe packets */
546 u_int32_t tcps_rcvdupack; /* rcvd duplicate acks */
547 u_int32_t tcps_rcvacktoomuch; /* rcvd acks for unsent data */
548 u_int32_t tcps_rcvackpack; /* rcvd ack packets */
549 u_int32_t tcps_rcvackbyte; /* bytes acked by rcvd acks */
550 u_int32_t tcps_rcvwinupd; /* rcvd window update packets */
551 u_int32_t tcps_pawsdrop; /* segments dropped due to PAWS */
552 u_int32_t tcps_predack; /* times hdr predict ok for acks */
553 u_int32_t tcps_preddat; /* times hdr predict ok for data pkts */
554 u_int32_t tcps_pcbcachemiss;
555 u_int32_t tcps_cachedrtt; /* times cached RTT in route updated */
556 u_int32_t tcps_cachedrttvar; /* times cached rttvar updated */
557 u_int32_t tcps_cachedssthresh; /* times cached ssthresh updated */
558 u_int32_t tcps_usedrtt; /* times RTT initialized from route */
559 u_int32_t tcps_usedrttvar; /* times RTTVAR initialized from rt */
560 u_int32_t tcps_usedssthresh; /* times ssthresh initialized from rt*/
561 u_int32_t tcps_persistdrop; /* timeout in persist state */
562 u_int32_t tcps_badsyn; /* bogus SYN, e.g. premature ACK */
563 u_int32_t tcps_mturesent; /* resends due to MTU discovery */
564 u_int32_t tcps_listendrop; /* listen queue overflows */
565
566 /* new stats from FreeBSD 5.4 sync up */
567 u_int32_t tcps_minmssdrops; /* average minmss too low drops */
568 u_int32_t tcps_sndrexmitbad; /* unnecessary packet retransmissions */
569 u_int32_t tcps_badrst; /* ignored RSTs in the window */
570
571 u_int32_t tcps_sc_added; /* entry added to syncache */
572 u_int32_t tcps_sc_retransmitted; /* syncache entry was retransmitted */
573 u_int32_t tcps_sc_dupsyn; /* duplicate SYN packet */
574 u_int32_t tcps_sc_dropped; /* could not reply to packet */
575 u_int32_t tcps_sc_completed; /* successful extraction of entry */
576 u_int32_t tcps_sc_bucketoverflow; /* syncache per-bucket limit hit */
577 u_int32_t tcps_sc_cacheoverflow; /* syncache cache limit hit */
578 u_int32_t tcps_sc_reset; /* RST removed entry from syncache */
579 u_int32_t tcps_sc_stale; /* timed out or listen socket gone */
580 u_int32_t tcps_sc_aborted; /* syncache entry aborted */
581 u_int32_t tcps_sc_badack; /* removed due to bad ACK */
582 u_int32_t tcps_sc_unreach; /* ICMP unreachable received */
583 u_int32_t tcps_sc_zonefail; /* zalloc() failed */
584 u_int32_t tcps_sc_sendcookie; /* SYN cookie sent */
585 u_int32_t tcps_sc_recvcookie; /* SYN cookie received */
586
587 u_int32_t tcps_hc_added; /* entry added to hostcache */
588 u_int32_t tcps_hc_bucketoverflow; /* hostcache per bucket limit hit */
589
590 /* SACK related stats */
591 u_int32_t tcps_sack_recovery_episode; /* SACK recovery episodes */
592 u_int32_t tcps_sack_rexmits; /* SACK rexmit segments */
593 u_int32_t tcps_sack_rexmit_bytes; /* SACK rexmit bytes */
594 u_int32_t tcps_sack_rcv_blocks; /* SACK blocks (options) received */
595 u_int32_t tcps_sack_send_blocks; /* SACK blocks (options) sent */
596 u_int32_t tcps_sack_sboverflow; /* SACK sendblock overflow */
597
598 #if TRAFFIC_MGT
599 u_int32_t tcps_bg_rcvtotal; /* total background packets received */
600 #endif /* TRAFFIC_MGT */
601 };
602
603 #pragma pack(4)
604
605 /*
606 * TCB structure exported to user-land via sysctl(3).
607 * Evil hack: declare only if in_pcb.h and sys/socketvar.h have been
608 * included. Not all of our clients do.
609 */
610
611 struct xtcpcb {
612 u_int32_t xt_len;
613 #ifdef KERNEL_PRIVATE
614 struct inpcb_compat xt_inp;
615 #else
616 struct inpcb xt_inp;
617 #endif
618 #ifdef KERNEL_PRIVATE
619 struct otcpcb xt_tp;
620 #else
621 struct tcpcb xt_tp;
622 #endif
623 struct xsocket xt_socket;
624 u_quad_t xt_alignment_hack;
625 };
626
627 #if !CONFIG_EMBEDDED
628
629 struct xtcpcb64 {
630 u_int32_t xt_len;
631 struct xinpcb64 xt_inpcb;
632
633 u_int64_t t_segq;
634 int t_dupacks; /* consecutive dup acks recd */
635
636 int t_timer[TCPT_NTIMERS]; /* tcp timers */
637
638 int t_state; /* state of this connection */
639 u_int t_flags;
640
641 int t_force; /* 1 if forcing out a byte */
642
643 tcp_seq snd_una; /* send unacknowledged */
644 tcp_seq snd_max; /* highest sequence number sent;
645 * used to recognize retransmits
646 */
647 tcp_seq snd_nxt; /* send next */
648 tcp_seq snd_up; /* send urgent pointer */
649
650 tcp_seq snd_wl1; /* window update seg seq number */
651 tcp_seq snd_wl2; /* window update seg ack number */
652 tcp_seq iss; /* initial send sequence number */
653 tcp_seq irs; /* initial receive sequence number */
654
655 tcp_seq rcv_nxt; /* receive next */
656 tcp_seq rcv_adv; /* advertised window */
657 u_int32_t rcv_wnd; /* receive window */
658 tcp_seq rcv_up; /* receive urgent pointer */
659
660 u_int32_t snd_wnd; /* send window */
661 u_int32_t snd_cwnd; /* congestion-controlled window */
662 u_int32_t snd_ssthresh; /* snd_cwnd size threshold for
663 * for slow start exponential to
664 * linear switch
665 */
666 u_int t_maxopd; /* mss plus options */
667
668 u_int32_t t_rcvtime; /* inactivity time */
669 u_int32_t t_starttime; /* time connection was established */
670 int t_rtttime; /* round trip time */
671 tcp_seq t_rtseq; /* sequence number being timed */
672
673 int t_rxtcur; /* current retransmit value (ticks) */
674 u_int t_maxseg; /* maximum segment size */
675 int t_srtt; /* smoothed round-trip time */
676 int t_rttvar; /* variance in round-trip time */
677
678 int t_rxtshift; /* log(2) of rexmt exp. backoff */
679 u_int t_rttmin; /* minimum rtt allowed */
680 u_int32_t t_rttupdated; /* number of times rtt sampled */
681 u_int32_t max_sndwnd; /* largest window peer has offered */
682
683 int t_softerror; /* possible error not yet reported */
684 /* out-of-band data */
685 char t_oobflags; /* have some */
686 char t_iobc; /* input character */
687 /* RFC 1323 variables */
688 u_char snd_scale; /* window scaling for send window */
689 u_char rcv_scale; /* window scaling for recv window */
690 u_char request_r_scale; /* pending window scaling */
691 u_char requested_s_scale;
692 u_int32_t ts_recent; /* timestamp echo data */
693
694 u_int32_t ts_recent_age; /* when last updated */
695 tcp_seq last_ack_sent;
696 /* RFC 1644 variables */
697 tcp_cc cc_send; /* send connection count */
698 tcp_cc cc_recv; /* receive connection count */
699 tcp_seq snd_recover; /* for use in fast recovery */
700 /* experimental */
701 u_int32_t snd_cwnd_prev; /* cwnd prior to retransmit */
702 u_int32_t snd_ssthresh_prev; /* ssthresh prior to retransmit */
703 u_int32_t t_badrxtwin; /* window for retransmit recovery */
704
705 u_quad_t xt_alignment_hack;
706 };
707
708 #endif /* !CONFIG_EMBEDDED */
709
710 #pragma pack()
711
712 /*
713 * Names for TCP sysctl objects
714 */
715 #define TCPCTL_DO_RFC1323 1 /* use RFC-1323 extensions */
716 #define TCPCTL_DO_RFC1644 2 /* use RFC-1644 extensions */
717 #define TCPCTL_MSSDFLT 3 /* MSS default */
718 #define TCPCTL_STATS 4 /* statistics (read-only) */
719 #define TCPCTL_RTTDFLT 5 /* default RTT estimate */
720 #define TCPCTL_KEEPIDLE 6 /* keepalive idle timer */
721 #define TCPCTL_KEEPINTVL 7 /* interval to send keepalives */
722 #define TCPCTL_SENDSPACE 8 /* send buffer space */
723 #define TCPCTL_RECVSPACE 9 /* receive buffer space */
724 #define TCPCTL_KEEPINIT 10 /* timeout for establishing syn */
725 #define TCPCTL_PCBLIST 11 /* list of all outstanding PCBs */
726 #define TCPCTL_DELACKTIME 12 /* time before sending delayed ACK */
727 #define TCPCTL_V6MSSDFLT 13 /* MSS default for IPv6 */
728 #define TCPCTL_MAXID 14
729
730 #ifdef KERNEL_PRIVATE
731 #define TCP_PKTLIST_CLEAR(tp) { \
732 (tp)->t_pktlist_head = (tp)->t_pktlist_tail = NULL; \
733 (tp)->t_lastchain = (tp)->t_pktlist_sentlen = 0; \
734 }
735
736 #define TCPCTL_NAMES { \
737 { 0, 0 }, \
738 { "rfc1323", CTLTYPE_INT }, \
739 { "rfc1644", CTLTYPE_INT }, \
740 { "mssdflt", CTLTYPE_INT }, \
741 { "stats", CTLTYPE_STRUCT }, \
742 { "rttdflt", CTLTYPE_INT }, \
743 { "keepidle", CTLTYPE_INT }, \
744 { "keepintvl", CTLTYPE_INT }, \
745 { "sendspace", CTLTYPE_INT }, \
746 { "recvspace", CTLTYPE_INT }, \
747 { "keepinit", CTLTYPE_INT }, \
748 { "pcblist", CTLTYPE_STRUCT }, \
749 { "delacktime", CTLTYPE_INT }, \
750 { "v6mssdflt", CTLTYPE_INT }, \
751 }
752
753 #ifdef SYSCTL_DECL
754 SYSCTL_DECL(_net_inet_tcp);
755 #endif /* SYSCTL_DECL */
756
757 extern struct inpcbhead tcb; /* head of queue of active tcpcb's */
758 extern struct inpcbinfo tcbinfo;
759 extern struct tcpstat tcpstat; /* tcp statistics */
760 extern int tcp_mssdflt; /* XXX */
761 extern int tcp_minmss;
762 extern int tcp_minmssoverload;
763 extern int tcp_do_newreno;
764 extern int ss_fltsz;
765 extern int ss_fltsz_local;
766 #ifdef __APPLE__
767 extern u_int32_t tcp_now; /* for RFC 1323 timestamps */
768 extern int tcp_delack_enabled;
769 #endif /* __APPLE__ */
770
771 extern int tcp_do_sack; /* SACK enabled/disabled */
772
773 #if CONFIG_IFEF_NOWINDOWSCALE
774 extern int tcp_obey_ifef_nowindowscale;
775 #endif
776
777 void tcp_canceltimers(struct tcpcb *);
778 struct tcpcb *
779 tcp_close(struct tcpcb *);
780 void tcp_ctlinput(int, struct sockaddr *, void *);
781 int tcp_ctloutput(struct socket *, struct sockopt *);
782 struct tcpcb *
783 tcp_drop(struct tcpcb *, int);
784 void tcp_drain(void);
785 void tcp_fasttimo(void *);
786 struct rmxp_tao *
787 tcp_gettaocache(struct inpcb *);
788 void tcp_init(void) __attribute__((section("__TEXT, initcode")));
789 void tcp_input(struct mbuf *, int);
790 void tcp_mss(struct tcpcb *, int, unsigned int);
791 int tcp_mssopt(struct tcpcb *);
792 void tcp_drop_syn_sent(struct inpcb *, int);
793 void tcp_mtudisc(struct inpcb *, int);
794 struct tcpcb *
795 tcp_newtcpcb(struct inpcb *);
796 int tcp_output(struct tcpcb *);
797 void tcp_respond(struct tcpcb *, void *,
798 struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int,
799 unsigned int);
800 struct rtentry *tcp_rtlookup(struct inpcb *, unsigned int);
801 void tcp_setpersist(struct tcpcb *);
802 void tcp_slowtimo(void);
803 struct tcptemp *
804 tcp_maketemplate(struct tcpcb *);
805 void tcp_fillheaders(struct tcpcb *, void *, void *);
806 struct tcpcb *
807 tcp_timers(struct tcpcb *, int);
808 void tcp_trace(int, int, struct tcpcb *, void *, struct tcphdr *, int);
809
810 void tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
811 void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend);
812 void tcp_clean_sackreport(struct tcpcb *tp);
813 void tcp_sack_adjust(struct tcpcb *tp);
814 struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt);
815 void tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
816 void tcp_free_sackholes(struct tcpcb *tp);
817 int32_t tcp_sbspace(struct tcpcb *tp);
818 void tcp_set_tso(struct tcpcb *tp, struct ifnet *ifp);
819
820
821 int tcp_lock (struct socket *, int, void *);
822 int tcp_unlock (struct socket *, int, void *);
823 #ifdef _KERN_LOCKS_H_
824 lck_mtx_t * tcp_getlock (struct socket *, int);
825 #else
826 void * tcp_getlock (struct socket *, int);
827 #endif
828
829
830 extern struct pr_usrreqs tcp_usrreqs;
831 extern u_int32_t tcp_sendspace;
832 extern u_int32_t tcp_recvspace;
833 tcp_seq tcp_new_isn(struct tcpcb *);
834
835 #endif /* KERNEL_RPIVATE */
836
837 #endif /* _NETINET_TCP_VAR_H_ */