X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/4bd07ac2140668789aa3ee8ec4dde4a3e0a3bba5..5c9f46613a83ebfc29a5b1f099448259e96a98f0:/bsd/netinet/tcp_var.h diff --git a/bsd/netinet/tcp_var.h b/bsd/netinet/tcp_var.h index 26f5b49d0..4fde35c90 100644 --- a/bsd/netinet/tcp_var.h +++ b/bsd/netinet/tcp_var.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2015 Apple Inc. All rights reserved. + * Copyright (c) 2000-2017 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @@ -75,7 +75,7 @@ #define _TCPCB_LIST_HEAD(name, type) \ struct name { \ u_int32_t lh_first; \ -}; +} #else #define _TCPCB_PTR(x) x #define _TCPCB_LIST_HEAD(name, type) LIST_HEAD(name, type) @@ -83,7 +83,7 @@ struct name { \ #ifdef KERNEL_PRIVATE -#define TCP_RETRANSHZ 1000 /* granularity of TCP timestamps, 1ms */ +#define TCP_RETRANSHZ 1000 /* granularity of TCP timestamps, 1ms */ /* Minimum time quantum within which the timers are coalesced */ #define TCP_TIMER_10MS_QUANTUM (TCP_RETRANSHZ/100) /* every 10ms */ #define TCP_TIMER_100MS_QUANTUM (TCP_RETRANSHZ/10) /* every 100ms */ @@ -93,18 +93,12 @@ struct name { \ #define N_TIME_WAIT_SLOTS 128 /* must be power of 2 */ -/* Base RTT is stored for N_MIN_RTT_HISTORY slots. This is used to - * estimate expected minimum RTT for delay based congestion control - * algorithms. - */ -#define N_RTT_BASE 5 - -/* Always allow at least 4 packets worth of recv window when adjusting +/* Always allow at least 16 packets worth of recv window when adjusting * recv window using inter-packet arrival jitter. */ -#define MIN_IAJ_WIN 4 +#define MIN_IAJ_WIN 16 -/* A variation in delay of this many milliseconds is tolerable. This limit has to +/* A variation in delay of this many milliseconds is tolerable. This limit has to * be low but greater than zero. We also use standard deviation on jitter to adjust * this limit for different link and connection types. */ @@ -120,12 +114,12 @@ struct name { \ */ #define ACC_IAJ_HIGH_THRESH 100 -/* When accumulated IAJ reaches this value, the receiver starts to react by +/* When accumulated IAJ reaches this value, the receiver starts to react by * closing the window */ #define ACC_IAJ_REACT_LIMIT 200 -/* If the number of small packets (smaller than IAJ packet size) seen on a +/* If the number of small packets (smaller than IAJ packet size) seen on a * connection is more than this threshold, reset the size and learn it again. * This is needed because the sender might send smaller segments after PMTU * discovery and the receiver has to learn the new size. @@ -134,12 +128,12 @@ struct name { \ /* * Adaptive timeout is a read/write timeout specified by the application to - * get a socket event when the transport layer detects a stall in data - * transfer. The value specified is the number of probes that can be sent + * get a socket event when the transport layer detects a stall in data + * transfer. The value specified is the number of probes that can be sent * to the peer before generating an event. Since it is not specified as * a time value, the timeout will adjust based on the RTT seen on the link. - * The timeout will start only when there is an indication that the read/write - * operation is not making progress. + * The timeout will start only when there is an indication that the read/write + * operation is not making progress. * * If a write operation stalls, the probe will be retransmission of data. * If a read operation stalls, the probe will be a keep-alive packet. @@ -192,20 +186,28 @@ struct tcp_rxt_seg { SLIST_ENTRY(tcp_rxt_seg) rx_link; }; +struct tcp_notify_ack_marker { + tcp_seq notify_snd_una; /* Notify when snd_una crosses this seq */ + tcp_notify_ack_id_t notify_id; + SLIST_ENTRY(tcp_notify_ack_marker) notify_next; +}; + struct tcptemp { u_char tt_ipgen[40]; /* the size must be of max ip header, now IPv6 */ struct tcphdr tt_t; }; struct bwmeas { - tcp_seq bw_start; /* start of bw measurement */ + tcp_seq bw_start; /* start of bw measurement */ uint32_t bw_ts; /* timestamp when bw measurement started */ - uint32_t bw_size; /* burst size in bytes for this bw measurement */ - uint32_t bw_minsizepkts; /* Min burst size as segments */ - uint32_t bw_maxsizepkts; /* Max burst size as segments */ + uint32_t bw_size; /* burst size in bytes for this bw measurement */ + uint32_t bw_minsizepkts; /* Min burst size as segments */ + uint32_t bw_maxsizepkts; /* Max burst size as segments */ uint32_t bw_minsize; /* Min size in bytes */ uint32_t bw_maxsize; /* Max size in bytes */ - uint32_t bw_sndbw; /* Measured send bw */ + uint32_t bw_sndbw; /* Measured send bandwidth */ + uint32_t bw_sndbw_max; /* Max measured bandwidth */ + uint32_t bw_rcvbw_max; /* Max receive bandwidth measured */ }; /* MPTCP Data sequence map entry */ @@ -214,6 +216,7 @@ struct mpt_dsn_map { uint32_t mpt_sseq; /* relative subflow # */ uint16_t mpt_len; /* length of mapping */ uint16_t mpt_csum; /* checksum value if on */ + uint8_t mpt_dfin; /* It's a DATA_FIN */ }; #define tcp6cb tcpcb /* for KAME src sync over BSD*'s */ @@ -270,7 +273,7 @@ struct tcpcb { #define TF_NOPUSH 0x01000 /* don't push */ #define TF_REQ_CC 0x02000 /* have/will request CC */ #define TF_RCVD_CC 0x04000 /* a CC was received in SYN */ -#define TF_SENDCCNEW 0x08000 /* send CCnew instead of CC in SYN */ +#define TF_SENDCCNEW 0x08000 /* Unused */ #define TF_MORETOCOME 0x10000 /* More data to be appended to sock */ #define TF_LOCAL 0x20000 /* connection to a host on local link */ #define TF_RXWIN0SENT 0x40000 /* sent a receiver win 0 in response */ @@ -280,6 +283,7 @@ struct tcpcb { #define TF_WASFRECOVERY 0x400000 /* was in NewReno Fast Recovery */ #define TF_SIGNATURE 0x800000 /* require MD5 digests (RFC2385) */ #define TF_MAXSEGSNT 0x1000000 /* last segment sent was a full segment */ +#define TF_STREAMING_ON 0x2000000 /* Receiver detected streaming */ #define TF_PMTUD 0x4000000 /* Perform Path MTU Discovery for this connection */ #define TF_CLOSING 0x8000000 /* pending tcp close */ #define TF_TSO 0x10000000 /* TCP Segment Offloading is enable on this connection */ @@ -312,9 +316,9 @@ struct tcpcb { */ tcp_seq snd_recover; /* for use in NewReno Fast Recovery */ - u_int t_maxopd; /* mss plus options */ - + u_int32_t t_maxopd; /* mss plus options */ u_int32_t t_rcvtime; /* time at which a packet was received */ + u_int32_t t_sndtime; /* time at which we last sent new data */ u_int32_t t_starttime; /* time connection was established */ int t_rtttime; /* tcp clock when rtt calculation was started */ tcp_seq t_rtseq; /* sequence number being timed */ @@ -356,15 +360,12 @@ struct tcpcb { /* State for limiting early retransmits when SACK is not enabled */ u_int16_t t_early_rexmt_count; /* count of early rexmts */ - u_int32_t t_early_rexmt_win; /* window for limiting early rexmts */ + u_int32_t t_early_rexmt_win; /* window for limiting early rexmts */ u_int32_t ts_recent; /* timestamp echo data */ u_int32_t ts_recent_age; /* when last updated */ tcp_seq last_ack_sent; -/* RFC 1644 variables */ - tcp_cc cc_send; /* send connection count */ - tcp_cc cc_recv; /* receive connection count */ /* RFC 3465 variables */ u_int32_t t_bytes_acked; /* ABC "bytes_acked" parameter */ @@ -380,6 +381,7 @@ struct tcpcb { u_int32_t rcv_unackwin; /* to measure win for stretching acks */ u_int32_t rcv_by_unackwin; /* bytes seen during the last ack-stretching win */ u_int32_t rcv_nostrack_ts; /* timestamp when stretch ack was disabled automatically */ + u_int32_t rcv_nostrack_pkts; /* pkts received since strech ack was disabled */ u_int16_t rcv_waitforss; /* wait for packets during slow-start */ /* ECN stats */ @@ -400,10 +402,10 @@ struct tcpcb { #define TE_ECN_ON (TE_SETUPSENT | TE_SETUPRECEIVED) /* Indicate ECN was successfully negotiated on a connection) */ #define TE_CEHEURI_SET 0x2000 /* We did our CE-probing at the beginning */ #define TE_CLIENT_SETUP 0x4000 /* setup from client side */ +#define TE_RCVD_SYN_RST 0x8000 /* Received RST to the first ECN enabled SYN */ u_int32_t t_ecn_recv_ce; /* Received CE from the network */ u_int32_t t_ecn_recv_cwr; /* Packets received with CWR */ - u_int8_t t_ecn_recv_ce_pkt; /* Received packet with CE-bit set (independent from last_ack_sent) */ /* state for bad retransmit recovery */ u_int32_t snd_cwnd_prev; /* cwnd prior to retransmit */ @@ -412,7 +414,7 @@ struct tcpcb { int t_srtt_prev; /* srtt prior to retransmit */ int t_rttvar_prev; /* rttvar prior to retransmit */ u_int32_t t_badrexmt_time; /* bad rexmt detection time */ - + /* Packet reordering metric */ u_int16_t t_reorderwin; /* Reordering late time offset */ @@ -426,13 +428,13 @@ struct tcpcb { int rcv_numsacks; /* # distinct sack blks present */ struct sackblk sackblks[MAX_SACK_BLKS]; /* seq nos. of sack blocks */ struct sackhint sackhint; /* SACK scoreboard hint */ - + struct mbuf *t_pktlist_head; /* First packet in transmit chain */ struct mbuf *t_pktlist_tail; /* Last packet in transmit chain */ u_int32_t t_pktlist_sentlen; /* total bytes in transmit chain */ u_int32_t t_keepidle; /* keepalive idle timer (override global if > 0) */ - u_int32_t t_keepinit; /* connection timeout, i.e. idle time + u_int32_t t_keepinit; /* connection timeout, i.e. idle time in SYN_SENT or SYN_RECV state */ u_int32_t t_keepintvl; /* interval between keepalives */ u_int32_t t_keepcnt; /* number of keepalives before close */ @@ -441,7 +443,7 @@ struct tcpcb { u_int16_t t_pmtud_lastseg_size; /* size of the last sent segment */ u_int16_t t_pmtud_saved_maxopd; /* MSS saved before performing PMTU-D BlackHole detection */ u_int32_t t_pmtud_start_ts; /* Time of PMTUD blackhole detection */ - + struct { u_int32_t rxduplicatebytes; @@ -450,11 +452,12 @@ struct tcpcb { u_int8_t synrxtshift; u_int8_t unused; u_int16_t unused_pad_to_8; + u_int32_t rxmitpkts; } t_stat; - - /* Background congestion related state */ - uint32_t rtt_hist[N_RTT_BASE]; /* history of minimum RTT */ - uint32_t rtt_count; /* Number of RTT samples in recent base history */ + u_int8_t t_notify_ack_count; + u_int8_t t_ecn_recv_ce_pkt; /* Received packet with CE-bit set (independent from last_ack_sent) */ + u_int16_t t_cached_maxopd; /* default for MSS adjustment using link status report */ + uint32_t bg_ssthresh; /* Slow start threshold until delay increases */ uint32_t t_flagsext; /* Another field to accommodate more flags */ #define TF_RXTFINDROP 0x1 /* Drop conn after retransmitting FIN 3 times */ @@ -467,7 +470,6 @@ struct tcpcb { #define TF_DETECT_READSTALL 0x80 /* Used to detect a stall during read operation */ #define TF_RECV_THROTTLE 0x100 /* Input throttling active */ #define TF_NOSTRETCHACK 0x200 /* ack every other packet */ -#define TF_STREAMEOW 0x400 /* Last packet was small indicating end of write */ #define TF_NOTIMEWAIT 0x800 /* Avoid going into time-wait */ #define TF_SENT_TLPROBE 0x1000 /* Sent data in PTO */ #define TF_PKTS_REORDERED 0x2000 /* Detected reordering */ @@ -480,20 +482,21 @@ struct tcpcb { #define TF_CWND_NONVALIDATED 0x100000 /* cwnd non validated */ #define TF_PROBING 0x200000 /* Trigger probe timeout */ #define TF_FASTOPEN 0x400000 /* TCP Fastopen is enabled */ +#define TF_REASS_INPROG 0x800000 /* Reassembly is in progress */ +#define TF_FASTOPEN_HEUR 0x1000000 /* Make sure that heuristics get never skipped */ #if TRAFFIC_MGT /* Inter-arrival jitter related state */ - uint32_t iaj_rcv_ts; /* tcp clock when the first packet was received */ + uint32_t iaj_rcv_ts; /* tcp clock when the first packet was received */ uint16_t iaj_size; /* Size of packet for iaj measurement */ uint8_t iaj_small_pkt; /* Count of packets smaller than iaj_size */ uint8_t t_pipeack_ind; /* index for next pipeack sample */ uint16_t iaj_pktcnt; /* packet count, to avoid throttling initially */ uint16_t acc_iaj; /* Accumulated iaj */ - tcp_seq iaj_rwintop; /* recent max advertised window */ uint32_t avg_iaj; /* Mean */ uint32_t std_dev_iaj; /* Standard deviation */ #endif /* TRAFFIC_MGT */ - struct bwmeas *t_bwmeas; /* State for bandwidth measurement */ + struct bwmeas *t_bwmeas; /* State for bandwidth measurement */ uint32_t t_lropktlen; /* Bytes in a LRO frame */ tcp_seq t_idleat; /* rcv_nxt at idle time */ TAILQ_ENTRY(tcpcb) t_twentry; /* link for time wait queue */ @@ -517,13 +520,13 @@ struct tcpcb { #if MPTCP u_int32_t t_mpflags; /* flags for multipath TCP */ -#define TMPF_PREESTABLISHED 0x00000001 /* conn in pre-established state */ -#define TMPF_SENT_KEYS 0x00000002 /* indicates that keys were sent */ +#define TMPF_PREESTABLISHED 0x00000001 /* conn in pre-established state */ +#define TMPF_SND_KEYS 0x00000002 /* indicates that keys should be send */ #define TMPF_MPTCP_TRUE 0x00000004 /* negotiated MPTCP successfully */ -#define TMPF_MPTCP_RCVD_KEY 0x00000008 /* state for 3-way handshake */ +#define TMPF_MPTCP_RCVD_KEY 0x00000008 /* state for 3-way handshake */ #define TMPF_SND_MPPRIO 0x00000010 /* send priority of subflow */ #define TMPF_SND_REM_ADDR 0x00000020 /* initiate address removal */ -#define TMPF_UNUSED 0x00000040 /* address addition acked by peer */ +#define TMPF_RCVD_DACK 0x00000040 /* received a data-ack */ #define TMPF_JOINED_FLOW 0x00000080 /* Indicates additional flow */ #define TMPF_BACKUP_PATH 0x00000100 /* Indicates backup path */ #define TMPF_MPTCP_ACKNOW 0x00000200 /* Send Data ACK */ @@ -534,17 +537,19 @@ struct tcpcb { #define TMPF_RECVD_JOIN 0x00004000 /* Received Join */ #define TMPF_RESET 0x00008000 /* Send RST */ #define TMPF_TCP_FALLBACK 0x00010000 /* Fallback to TCP */ -#define TMPF_FASTCLOSE 0x00020000 /* Send Fastclose option */ +#define TMPF_FASTCLOSERCV 0x00020000 /* Received Fastclose option */ #define TMPF_EMBED_DSN 0x00040000 /* tp has DSN mapping */ #define TMPF_MPTCP_READY 0x00080000 /* Can send DSS options on data */ #define TMPF_INFIN_SENT 0x00100000 /* Sent infinite mapping */ #define TMPF_SND_MPFAIL 0x00200000 /* Received mapping csum failure */ -#define TMPF_FASTJOIN_SEND 0x00400000 /* Fast join early data send */ -#define TMPF_FASTJOINBY2_SEND 0x00800000 /* Fast join send after 3 WHS */ -#define TMPF_MPCAP_RETRANSMIT 0x01000000 /* Retransmission of 3rd ACK */ +#define TMPF_SND_JACK 0x00400000 /* Send a Join-ACK */ +#define TMPF_TFO_REQUEST 0x00800000 /* TFO Requested */ + +#define TMPF_MPTCP_SIGNALS (TMPF_SND_MPPRIO | TMPF_SND_REM_ADDR | TMPF_SND_MPFAIL | TMPF_SND_KEYS | TMPF_SND_JACK) tcp_seq t_mpuna; /* unacknowledged sequence */ - void *t_mptcb; /* pointer to MPTCP TCB */ + struct mptcb *t_mptcb; /* pointer to MPTCP TCB */ + struct mptsub *t_mpsub; /* pointer to the MPTCP subflow */ struct mpt_dsn_map t_rcv_map; /* Receive mapping list */ u_int8_t t_local_aid; /* Addr Id for authentication */ u_int8_t t_rem_aid; /* Addr ID of another subflow */ @@ -556,8 +561,8 @@ struct tcpcb { #define TFO_F_COOKIE_REQ 0x04 /* Client requested a new cookie */ #define TFO_F_COOKIE_SENT 0x08 /* Client did send a cookie in the SYN */ #define TFO_F_SYN_LOSS 0x10 /* A SYN-loss triggered a fallback to regular TCP on the client-side */ -#define TFO_F_NO_RCVPROBING 0x20 /* This network is guaranteed to support TFO in the downstream direction */ -#define TFO_F_NO_SNDPROBING 0x40 /* This network is guaranteed to support TFO in the upstream direction */ +#define TFO_F_NO_SNDPROBING 0x20 /* This network is guaranteed to support TFO in the upstream direction */ +#define TFO_F_HEURISTIC_DONE 0x40 /* We have already marked this network as bad */ u_int8_t t_tfo_flags; #define TFO_S_SYNDATA_RCV 0x01 /* SYN+data has been received */ #define TFO_S_COOKIEREQ_RECV 0x02 /* TFO-cookie request received */ @@ -568,6 +573,12 @@ struct tcpcb { #define TFO_S_SYN_DATA_SENT 0x40 /* SYN+data sent */ #define TFO_S_SYN_DATA_ACKED 0x80 /* SYN+data has been acknowledged in SYN/ACK */ #define TFO_S_SYN_LOSS 0x0100 /* SYN+TFO has been lost - fallback to regular TCP */ +#define TFO_S_COOKIE_WRONG 0x0200 /* Cookie we sent in the SYN was wrong */ +#define TFO_S_NO_COOKIE_RCV 0x0400 /* We asked for a cookie but didn't get one */ +#define TFO_S_HEURISTICS_DISABLE 0x0800 /* TFO-heuristics disabled it for this connection */ +#define TFO_S_SEND_BLACKHOLE 0x1000 /* TFO got blackholed in the send direction */ +#define TFO_S_RECV_BLACKHOLE 0x2000 /* TFO got blackholed in the recv direction */ +#define TFO_S_ONE_BYTE_PROXY 0x4000 /* TFO failed because of a proxy acknowledging just one byte */ u_int16_t t_tfo_stats; u_int8_t t_tfo_probes; /* TFO-probes we did send */ @@ -599,22 +610,25 @@ struct tcpcb { #define TFO_PROBE_PROBING 1 /* Sending out TCP-keepalives waiting for reply */ #define TFO_PROBE_WAIT_DATA 2 /* Received reply, waiting for data */ u_int8_t t_tfo_probe_state; - + u_int32_t t_rcvoopack; /* out-of-order packets received */ u_int32_t t_pawsdrop; /* segments dropped due to PAWS */ u_int32_t t_sack_recovery_episode; /* SACK recovery episodes */ u_int32_t t_reordered_pkts; /* packets reorderd */ u_int32_t t_dsack_sent; /* Sent DSACK notification */ u_int32_t t_dsack_recvd; /* Received a valid DSACK option */ + SLIST_HEAD(,tcp_notify_ack_marker) t_notify_ack; /* state for notifying data acknowledgements */ + u_int32_t t_recv_throttle_ts; /* TS for start of recv throttle */ + u_int32_t t_rxt_minimum_timeout; /* minimum retransmit timeout in ms */ }; #define IN_FASTRECOVERY(tp) (tp->t_flags & TF_FASTRECOVERY) #define SACK_ENABLED(tp) (tp->t_flagsext & TF_SACK_ENABLE) /* - * If the connection is in a throttled state due to advisory feedback from + * If the connection is in a throttled state due to advisory feedback from * the interface output queue, reset that state. We do this in favor - * of entering recovery because the data transfer during recovery + * of entering recovery because the data transfer during recovery * should be just a trickle and it will help to improve performance. * We also do not want to back off twice in the same RTT. */ @@ -638,7 +652,7 @@ struct tcpcb { } while(0) /* - * When the number of duplicate acks received is less than + * When the number of duplicate acks received is less than * the retransmit threshold, use Limited Transmit algorithm */ extern int tcprexmtthresh; @@ -698,6 +712,15 @@ extern int tcprexmtthresh; (SEQ_LEQ((_seq_), (_tp_)->snd_max) && \ SEQ_GEQ((_seq_), ((_una_) - TCP_DSACK_MAX_SEND_WINDOW(_tp_)))) +#define TCP_RESET_REXMT_STATE(_tp_) do { \ + (_tp_)->t_rxtshift = 0; \ + (_tp_)->t_rxtstart = 0; \ + mptcp_reset_rexmit_state((_tp_)); \ +} while(0); + +#define TCP_AUTORCVBUF_MAX(_ifp_) (((_ifp_) != NULL && \ + ((_ifp_)->if_eflags & IFEF_3CA)) ? tcp_autorcvbuf_max_ca : \ + tcp_autorcvbuf_max) enum tcp_cc_event { TCP_CC_CWND_INIT, /* 0 */ @@ -770,22 +793,6 @@ struct rmxp_tao { #define intotcpcb(ip) ((struct tcpcb *)(ip)->inp_ppcb) #define sototcpcb(so) (intotcpcb(sotoinpcb(so))) -/* - * The rtt measured is in milliseconds as the timestamp granularity is - * a millisecond. The smoothed round-trip time and estimated variance - * are stored as fixed point numbers scaled by the values below. - * For convenience, these scales are also used in smoothing the average - * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed). - * With these scales, srtt has 5 bits to the right of the binary point, - * and thus an "ALPHA" of 0.875. rttvar has 4 bits to the right of the - * binary point, and is smoothed with an ALPHA of 0.75. - */ -#define TCP_RTT_SCALE 32 /* multiplier for srtt; 3 bits frac. */ -#define TCP_RTT_SHIFT 5 /* shift for srtt; 5 bits frac. */ -#define TCP_RTTVAR_SCALE 16 /* multiplier for rttvar; 4 bits */ -#define TCP_RTTVAR_SHIFT 4 /* shift for rttvar; 4 bits */ -#define TCP_DELTA_SHIFT 2 /* see tcp_input.c */ - /* TFO-specific defines */ #define TFO_COOKIE_LEN_MIN 4 #define TFO_COOKIE_LEN_DEFAULT 8 @@ -850,7 +857,7 @@ struct tcpcb { #define TF_NOPUSH 0x01000 /* don't push */ #define TF_REQ_CC 0x02000 /* have/will request CC */ #define TF_RCVD_CC 0x04000 /* a CC was received in SYN */ -#define TF_SENDCCNEW 0x08000 /* send CCnew instead of CC in SYN */ +#define TF_SENDCCNEW 0x08000 /* Not implemented */ #define TF_MORETOCOME 0x10000 /* More data to be appended to sock */ #define TF_LQ_OVERFLOW 0x20000 /* listen queue overflow */ #define TF_RXWIN0SENT 0x40000 /* sent a receiver win 0 in response */ @@ -1141,6 +1148,60 @@ struct tcpstat { u_int32_t tcps_tfo_syn_data_acked;/* SYN+data has been acknowledged */ u_int32_t tcps_tfo_syn_loss; /* SYN+TFO has been lost and we fallback */ u_int32_t tcps_tfo_blackhole; /* TFO got blackholed by a middlebox. */ + u_int32_t tcps_tfo_cookie_wrong; /* TFO-cookie we sent was wrong */ + u_int32_t tcps_tfo_no_cookie_rcv; /* We asked for a cookie but didn't get one */ + u_int32_t tcps_tfo_heuristics_disable; /* TFO got disabled due to heuristics */ + u_int32_t tcps_tfo_sndblackhole; /* TFO got blackholed in the sending direction */ + u_int32_t tcps_mss_to_default; /* Change MSS to default using link status report */ + u_int32_t tcps_mss_to_medium; /* Change MSS to medium using link status report */ + u_int32_t tcps_mss_to_low; /* Change MSS to low using link status report */ + u_int32_t tcps_ecn_fallback_droprst; /* ECN fallback caused by connection drop due to RST */ + u_int32_t tcps_ecn_fallback_droprxmt; /* ECN fallback due to drop after multiple retransmits */ + u_int32_t tcps_ecn_fallback_synrst; /* ECN fallback due to rst after syn */ + + u_int32_t tcps_mptcp_rcvmemdrop; /* MPTCP packets dropped for lack of memory */ + u_int32_t tcps_mptcp_rcvduppack; /* MPTCP duplicate-only packets received */ + u_int32_t tcps_mptcp_rcvpackafterwin; /* MPTCP packets with data after window */ + + /* TCP timer statistics */ + u_int32_t tcps_timer_drift_le_1_ms; /* Timer drift less or equal to 1 ms */ + u_int32_t tcps_timer_drift_le_10_ms; /* Timer drift less or equal to 10 ms */ + u_int32_t tcps_timer_drift_le_20_ms; /* Timer drift less or equal to 20 ms */ + u_int32_t tcps_timer_drift_le_50_ms; /* Timer drift less or equal to 50 ms */ + u_int32_t tcps_timer_drift_le_100_ms; /* Timer drift less or equal to 100 ms */ + u_int32_t tcps_timer_drift_le_200_ms; /* Timer drift less or equal to 200 ms */ + u_int32_t tcps_timer_drift_le_500_ms; /* Timer drift less or equal to 500 ms */ + u_int32_t tcps_timer_drift_le_1000_ms; /* Timer drift less or equal to 1000 ms */ + u_int32_t tcps_timer_drift_gt_1000_ms; /* Timer drift greater than 1000 ms */ + + u_int32_t tcps_mptcp_handover_attempt; /* Total number of MPTCP-attempts using handover mode */ + u_int32_t tcps_mptcp_interactive_attempt; /* Total number of MPTCP-attempts using interactive mode */ + u_int32_t tcps_mptcp_aggregate_attempt; /* Total number of MPTCP-attempts using aggregate mode */ + u_int32_t tcps_mptcp_fp_handover_attempt; /* Same as previous three but only for first-party apps */ + u_int32_t tcps_mptcp_fp_interactive_attempt; + u_int32_t tcps_mptcp_fp_aggregate_attempt; + u_int32_t tcps_mptcp_heuristic_fallback; /* Total number of MPTCP-connections that fell back due to heuristics */ + u_int32_t tcps_mptcp_fp_heuristic_fallback; /* Same as previous but for first-party apps */ + u_int32_t tcps_mptcp_handover_success_wifi; /* Total number of successfull handover-mode connections that *started* on WiFi */ + u_int32_t tcps_mptcp_handover_success_cell; /* Total number of successfull handover-mode connections that *started* on Cell */ + u_int32_t tcps_mptcp_interactive_success; /* Total number of interactive-mode connections that negotiated MPTCP */ + u_int32_t tcps_mptcp_aggregate_success; /* Same as previous but for aggregate */ + u_int32_t tcps_mptcp_fp_handover_success_wifi; /* Same as previous four, but for first-party apps */ + u_int32_t tcps_mptcp_fp_handover_success_cell; + u_int32_t tcps_mptcp_fp_interactive_success; + u_int32_t tcps_mptcp_fp_aggregate_success; + u_int32_t tcps_mptcp_handover_cell_from_wifi; /* Total number of connections that use cell in handover-mode (coming from WiFi) */ + u_int32_t tcps_mptcp_handover_wifi_from_cell; /* Total number of connections that use WiFi in handover-mode (coming from cell) */ + u_int32_t tcps_mptcp_interactive_cell_from_wifi; /* Total number of connections that use cell in interactive mode (coming from WiFi) */ + u_int64_t tcps_mptcp_handover_cell_bytes; /* Total number of bytes sent on cell in handover-mode (on new subflows, ignoring initial one) */ + u_int64_t tcps_mptcp_interactive_cell_bytes; /* Same as previous but for interactive */ + u_int64_t tcps_mptcp_aggregate_cell_bytes; + u_int64_t tcps_mptcp_handover_all_bytes; /* Total number of bytes sent in handover */ + u_int64_t tcps_mptcp_interactive_all_bytes; + u_int64_t tcps_mptcp_aggregate_all_bytes; + u_int32_t tcps_mptcp_back_to_wifi; /* Total number of connections that succeed to move traffic away from cell (when starting on cell) */ + u_int32_t tcps_mptcp_wifi_proxy; /* Total number of new subflows that fell back to regular TCP on cell */ + u_int32_t tcps_mptcp_cell_proxy; /* Total number of new subflows that fell back to regular TCP on WiFi */ }; @@ -1185,6 +1246,7 @@ struct xtcpcb { u_quad_t xt_alignment_hack; }; +#if !CONFIG_EMBEDDED struct xtcpcb64 { u_int32_t xt_len; @@ -1265,6 +1327,7 @@ struct xtcpcb64 { u_quad_t xt_alignment_hack; }; +#endif /* !CONFIG_EMBEDDED */ #ifdef PRIVATE @@ -1274,54 +1337,54 @@ struct xtcpcb_n { u_int64_t t_segq; int t_dupacks; /* consecutive dup acks recd */ - + int t_timer[TCPT_NTIMERS_EXT]; /* tcp timers */ - + int t_state; /* state of this connection */ u_int t_flags; - + int t_force; /* 1 if forcing out a byte */ - + tcp_seq snd_una; /* send unacknowledged */ tcp_seq snd_max; /* highest sequence number sent; - * used to recognize retransmits - */ + * used to recognize retransmits + */ tcp_seq snd_nxt; /* send next */ tcp_seq snd_up; /* send urgent pointer */ - + tcp_seq snd_wl1; /* window update seg seq number */ tcp_seq snd_wl2; /* window update seg ack number */ tcp_seq iss; /* initial send sequence number */ tcp_seq irs; /* initial receive sequence number */ - + tcp_seq rcv_nxt; /* receive next */ tcp_seq rcv_adv; /* advertised window */ u_int32_t rcv_wnd; /* receive window */ tcp_seq rcv_up; /* receive urgent pointer */ - + u_int32_t snd_wnd; /* send window */ u_int32_t snd_cwnd; /* congestion-controlled window */ u_int32_t snd_ssthresh; /* snd_cwnd size threshold for - * for slow start exponential to - * linear switch - */ + * for slow start exponential to + * linear switch + */ u_int t_maxopd; /* mss plus options */ - + u_int32_t t_rcvtime; /* time at which a packet was received */ u_int32_t t_starttime; /* time connection was established */ int t_rtttime; /* round trip time */ tcp_seq t_rtseq; /* sequence number being timed */ - + int t_rxtcur; /* current retransmit value (ticks) */ u_int t_maxseg; /* maximum segment size */ int t_srtt; /* smoothed round-trip time */ int t_rttvar; /* variance in round-trip time */ - + int t_rxtshift; /* log(2) of rexmt exp. backoff */ u_int t_rttmin; /* minimum rtt allowed */ u_int32_t t_rttupdated; /* number of times rtt sampled */ u_int32_t max_sndwnd; /* largest window peer has offered */ - + int t_softerror; /* possible error not yet reported */ /* out-of-band data */ char t_oobflags; /* have some */ @@ -1332,7 +1395,7 @@ struct xtcpcb_n { u_char request_r_scale; /* pending window scaling */ u_char requested_s_scale; u_int32_t ts_recent; /* timestamp echo data */ - + u_int32_t ts_recent_age; /* when last updated */ tcp_seq last_ack_sent; /* RFC 1644 variables */ @@ -1344,6 +1407,22 @@ struct xtcpcb_n { u_int32_t snd_ssthresh_prev; /* ssthresh prior to retransmit */ }; + /* + * The rtt measured is in milliseconds as the timestamp granularity is + * a millisecond. The smoothed round-trip time and estimated variance + * are stored as fixed point numbers scaled by the values below. + * For convenience, these scales are also used in smoothing the average + * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed). + * With these scales, srtt has 5 bits to the right of the binary point, + * and thus an "ALPHA" of 0.875. rttvar has 4 bits to the right of the + * binary point, and is smoothed with an ALPHA of 0.75. + */ +#define TCP_RTT_SCALE 32 /* multiplier for srtt; 3 bits frac. */ +#define TCP_RTT_SHIFT 5 /* shift for srtt; 5 bits frac. */ +#define TCP_RTTVAR_SCALE 16 /* multiplier for rttvar; 4 bits */ +#define TCP_RTTVAR_SHIFT 4 /* shift for rttvar; 4 bits */ +#define TCP_DELTA_SHIFT 2 /* see tcp_input.c */ + #endif /* PRIVATE */ #pragma pack() @@ -1403,20 +1482,19 @@ extern int tcp_minmss; #define TCP_FASTOPEN_SERVER 0x01 #define TCP_FASTOPEN_CLIENT 0x02 -extern int tcp_tfo_halfcnt; -extern int tcp_tfo_backlog; -extern int tcp_fastopen; -extern int tcp_tfo_fallback_min; -extern int ss_fltsz; -extern int ss_fltsz_local; -extern int tcp_do_rfc3390; /* Calculate ss_fltsz according to RFC 3390 */ +extern int tcp_tfo_halfcnt; +extern int tcp_tfo_backlog; +extern int tcp_fastopen; +extern int ss_fltsz; +extern int ss_fltsz_local; +extern int tcp_do_rfc3390; /* Calculate ss_fltsz according to RFC 3390 */ extern int tcp_do_rfc1323; extern int target_qdelay; -extern u_int32_t tcp_now; /* for RFC 1323 timestamps */ +extern u_int32_t tcp_now; /* for RFC 1323 timestamps */ extern struct timeval tcp_uptime; extern lck_spin_t *tcp_uptime_lock; -extern int tcp_delack_enabled; -extern int tcp_do_sack; /* SACK enabled/disabled */ +extern int tcp_delack_enabled; +extern int tcp_do_sack; /* SACK enabled/disabled */ extern int tcp_do_rfc3465; extern int tcp_do_rfc3465_lim2; extern int maxseg_unacked; @@ -1425,11 +1503,11 @@ extern struct zone *tcp_reass_zone; extern struct zone *tcp_rxt_seg_zone; extern int tcp_ecn_outbound; extern int tcp_ecn_inbound; - - -#if CONFIG_IFEF_NOWINDOWSCALE -extern int tcp_obey_ifef_nowindowscale; -#endif +extern u_int32_t tcp_do_autorcvbuf; +extern u_int32_t tcp_autorcvbuf_max; +extern u_int32_t tcp_autorcvbuf_max_ca; +extern u_int32_t tcp_autorcvbuf_inc_shift; +extern int tcp_recv_bg; struct protosw; struct domain; @@ -1438,13 +1516,14 @@ struct tcp_respond_args { unsigned int ifscope; unsigned int nocell:1, noexpensive:1, - awdl_unrestricted:1; + awdl_unrestricted:1, + intcoproc_allowed:1; }; void tcp_canceltimers(struct tcpcb *); struct tcpcb * tcp_close(struct tcpcb *); -void tcp_ctlinput(int, struct sockaddr *, void *); +void tcp_ctlinput(int, struct sockaddr *, void *, struct ifnet *); int tcp_ctloutput(struct socket *, struct sockopt *); struct tcpcb * tcp_drop(struct tcpcb *, int); @@ -1476,6 +1555,7 @@ void tcp_fillheaders(struct tcpcb *, void *, void *); struct tcpcb *tcp_timers(struct tcpcb *, int); void tcp_trace(int, int, struct tcpcb *, void *, struct tcphdr *, int); +void tcp_fill_info(struct tcpcb *, struct tcp_info *); void tcp_sack_doack(struct tcpcb *, struct tcpopt *, struct tcphdr *, u_int32_t *); extern boolean_t tcp_sack_process_dsack(struct tcpcb *, struct tcpopt *, @@ -1495,7 +1575,8 @@ void tcp_reset_stretch_ack(struct tcpcb *tp); extern void tcp_get_ports_used(u_int32_t, int, u_int32_t, bitstr_t *); uint32_t tcp_count_opportunistic(unsigned int ifindex, u_int32_t flags); uint32_t tcp_find_anypcb_byaddr(struct ifaddr *ifa); -void tcp_set_max_rwinscale(struct tcpcb *tp, struct socket *so); +void tcp_set_max_rwinscale(struct tcpcb *tp, struct socket *so, + u_int32_t maxrcvbuf); struct bwmeas* tcp_bwmeas_alloc(struct tcpcb *tp); void tcp_bwmeas_free(struct tcpcb *tp); extern int32_t timer_diff(uint32_t t1, uint32_t toff1, uint32_t t2, uint32_t toff2); @@ -1506,7 +1587,7 @@ extern void tcp_set_recv_bg(struct socket *); extern void tcp_clear_recv_bg(struct socket *); extern boolean_t tcp_sack_byte_islost(struct tcpcb *tp); #define IS_TCP_RECV_BG(_so) \ - ((_so)->so_traffic_mgt_flags & TRAFFIC_MGT_TCP_RECVBG) + ((_so)->so_flags1 & SOF1_TRAFFIC_MGT_TCP_RECVBG) #if TRAFFIC_MGT #define CLEAR_IAJ_STATE(_tp_) (_tp_)->iaj_rcv_ts = 0 @@ -1526,7 +1607,6 @@ lck_mtx_t * tcp_getlock (struct socket *, int); void * tcp_getlock (struct socket *, int); #endif - extern struct pr_usrreqs tcp_usrreqs; extern u_int32_t tcp_sendspace; extern u_int32_t tcp_recvspace; @@ -1548,18 +1628,39 @@ extern void tcp_probe_connectivity(struct ifnet *ifp, u_int32_t enable); extern void tcp_get_connectivity_status(struct tcpcb *, struct tcp_conn_status *); +extern void tcp_fill_keepalive_offload_frames(struct ifnet *, + struct ifnet_keepalive_offload_frame *, u_int32_t, size_t, u_int32_t *); + extern boolean_t tfo_enabled(const struct tcpcb *tp); extern void tcp_disable_tfo(struct tcpcb *tp); extern void tcp_tfo_gen_cookie(struct inpcb *inp, u_char *out, size_t blk_size); #define TCP_FASTOPEN_KEYLEN 16 +extern int tcp_freeq(struct tcpcb *tp); +extern errno_t tcp_notify_ack_id_valid(struct tcpcb *, struct socket *, u_int32_t); +extern errno_t tcp_add_notify_ack_marker(struct tcpcb *, u_int32_t); +extern void tcp_notify_ack_free(struct tcpcb *); +extern void tcp_notify_acknowledgement(struct tcpcb *, struct socket *); +extern void tcp_get_notify_ack_count(struct tcpcb *, + struct tcp_notify_ack_complete *); +extern void tcp_get_notify_ack_ids(struct tcpcb *tp, + struct tcp_notify_ack_complete *); +extern void tcp_update_mss_locked(struct socket *, struct ifnet *); + +extern int get_tcp_inp_list(struct inpcb **, int, inp_gen_t); +extern bool tcp_notify_ack_active(struct socket *so); #if MPTCP -extern int mptcp_input_preproc(struct tcpcb *, struct mbuf *, int); -extern void mptcp_output_csum(struct tcpcb *, struct mbuf *, int32_t, unsigned, - u_int64_t, u_int32_t *); +extern int mptcp_input_preproc(struct tcpcb *tp, struct mbuf *m, + struct tcphdr *th, int drop_hdrlen); +extern uint32_t mptcp_output_csum(struct mbuf *m, uint64_t dss_val, + uint32_t sseq, uint16_t dlen); extern int mptcp_adj_mss(struct tcpcb *, boolean_t); -extern void mptcp_insert_rmap(struct tcpcb *, struct mbuf *); +extern void mptcp_insert_rmap(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th); #endif + +__private_extern__ void tcp_update_stats_per_flow( + struct ifnet_stats_per_flow *, struct ifnet *); + #endif /* BSD_KERNEL_RPIVATE */ #endif /* _NETINET_TCP_VAR_H_ */