+
+/*
+ * Determine if we can grow the recieve socket buffer to avoid sending
+ * a zero window update to the peer. We allow even socket buffers that
+ * have fixed size (set by the application) to grow if the resource
+ * constraints are met. They will also be trimmed after the application
+ * reads data.
+ */
+static void
+tcp_sbrcv_grow_rwin(struct tcpcb *tp, struct sockbuf *sb)
+{
+ u_int32_t rcvbufinc = tp->t_maxseg << 4;
+ u_int32_t rcvbuf = sb->sb_hiwat;
+ struct socket *so = tp->t_inpcb->inp_socket;
+
+ /*
+ * If message delivery is enabled, do not count
+ * unordered bytes in receive buffer towards hiwat
+ */
+ if (so->so_flags & SOF_ENABLE_MSGS)
+ rcvbuf = rcvbuf - so->so_msg_state->msg_uno_bytes;
+
+ if (tcp_do_autorcvbuf == 1 &&
+ tcp_cansbgrow(sb) &&
+ (tp->t_flags & TF_SLOWLINK) == 0 &&
+ (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) == 0 &&
+ (rcvbuf - sb->sb_cc) < rcvbufinc &&
+ rcvbuf < tcp_autorcvbuf_max &&
+ (sb->sb_idealsize > 0 &&
+ sb->sb_hiwat <= (sb->sb_idealsize + rcvbufinc))) {
+ sbreserve(sb,
+ min((sb->sb_hiwat + rcvbufinc), tcp_autorcvbuf_max));
+ }
+}
+
+int32_t
+tcp_sbspace(struct tcpcb *tp)
+{
+ struct sockbuf *sb = &tp->t_inpcb->inp_socket->so_rcv;
+ u_int32_t rcvbuf = sb->sb_hiwat;
+ int32_t space;
+ struct socket *so = tp->t_inpcb->inp_socket;
+ int32_t pending = 0;
+
+ /*
+ * If message delivery is enabled, do not count
+ * unordered bytes in receive buffer towards hiwat mark.
+ * This value is used to return correct rwnd that does
+ * not reflect the extra unordered bytes added to the
+ * receive socket buffer.
+ */
+ if (so->so_flags & SOF_ENABLE_MSGS)
+ rcvbuf = rcvbuf - so->so_msg_state->msg_uno_bytes;
+
+ tcp_sbrcv_grow_rwin(tp, sb);
+
+ space = ((int32_t) imin((rcvbuf - sb->sb_cc),
+ (sb->sb_mbmax - sb->sb_mbcnt)));
+ if (space < 0)
+ space = 0;
+
+#if CONTENT_FILTER
+ /* Compensate for data being processed by content filters */
+ pending = cfil_sock_data_space(sb);
+#endif /* CONTENT_FILTER */
+ if (pending > space)
+ space = 0;
+ else
+ space -= pending;
+
+ /* Avoid increasing window size if the current window
+ * is already very low, we could be in "persist" mode and
+ * we could break some apps (see rdar://5409343)
+ */
+
+ if (space < tp->t_maxseg)
+ return space;
+
+ /* Clip window size for slower link */
+
+ if (((tp->t_flags & TF_SLOWLINK) != 0) && slowlink_wsize > 0 )
+ return imin(space, slowlink_wsize);
+
+ return space;
+}
+/*
+ * Checks TCP Segment Offloading capability for a given connection
+ * and interface pair.
+ */
+void
+tcp_set_tso(struct tcpcb *tp, struct ifnet *ifp)
+{
+#if INET6
+ struct inpcb *inp;
+ int isipv6;
+#endif /* INET6 */
+#if MPTCP
+ /*
+ * We can't use TSO if this tcpcb belongs to an MPTCP session.
+ */
+ if (tp->t_mpflags & TMPF_MPTCP_TRUE) {
+ tp->t_flags &= ~TF_TSO;
+ return;
+ }
+#endif
+#if INET6
+ inp = tp->t_inpcb;
+ isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
+
+ if (isipv6) {
+ if (ifp && (ifp->if_hwassist & IFNET_TSO_IPV6)) {
+ tp->t_flags |= TF_TSO;
+ if (ifp->if_tso_v6_mtu != 0)
+ tp->tso_max_segment_size = ifp->if_tso_v6_mtu;
+ else
+ tp->tso_max_segment_size = TCP_MAXWIN;
+ } else
+ tp->t_flags &= ~TF_TSO;
+
+ } else
+#endif /* INET6 */
+
+ {
+ if (ifp && (ifp->if_hwassist & IFNET_TSO_IPV4)) {
+ tp->t_flags |= TF_TSO;
+ if (ifp->if_tso_v4_mtu != 0)
+ tp->tso_max_segment_size = ifp->if_tso_v4_mtu;
+ else
+ tp->tso_max_segment_size = TCP_MAXWIN;
+ } else
+ tp->t_flags &= ~TF_TSO;
+ }
+}
+
+#define TIMEVAL_TO_TCPHZ(_tv_) ((_tv_).tv_sec * TCP_RETRANSHZ + (_tv_).tv_usec / TCP_RETRANSHZ_TO_USEC)
+
+/* Function to calculate the tcp clock. The tcp clock will get updated
+ * at the boundaries of the tcp layer. This is done at 3 places:
+ * 1. Right before processing an input tcp packet
+ * 2. Whenever a connection wants to access the network using tcp_usrreqs
+ * 3. When a tcp timer fires or before tcp slow timeout
+ *
+ */
+
+void
+calculate_tcp_clock()
+{
+ struct timeval tv = tcp_uptime;
+ struct timeval interval = {0, TCP_RETRANSHZ_TO_USEC};
+ struct timeval now, hold_now;
+ uint32_t incr = 0;
+
+ microuptime(&now);
+
+ /*
+ * Update coarse-grained networking timestamp (in sec.); the idea
+ * is to update the counter returnable via net_uptime() when
+ * we read time.
+ */
+ net_update_uptime_secs(now.tv_sec);
+
+ timevaladd(&tv, &interval);
+ if (timevalcmp(&now, &tv, >)) {
+ /* time to update the clock */
+ lck_spin_lock(tcp_uptime_lock);
+ if (timevalcmp(&tcp_uptime, &now, >=)) {
+ /* clock got updated while waiting for the lock */
+ lck_spin_unlock(tcp_uptime_lock);
+ return;
+ }
+
+ microuptime(&now);
+ hold_now = now;
+ tv = tcp_uptime;
+ timevalsub(&now, &tv);
+
+ incr = TIMEVAL_TO_TCPHZ(now);
+ if (incr > 0) {
+ tcp_uptime = hold_now;
+ tcp_now += incr;
+ }
+
+ lck_spin_unlock(tcp_uptime_lock);
+ }
+ return;
+}
+
+/* Compute receive window scaling that we are going to request
+ * for this connection based on sb_hiwat. Try to leave some
+ * room to potentially increase the window size upto a maximum
+ * defined by the constant tcp_autorcvbuf_max.
+ */
+void
+tcp_set_max_rwinscale(struct tcpcb *tp, struct socket *so) {
+ u_int32_t maxsockbufsize;
+ if (!tcp_do_rfc1323) {
+ tp->request_r_scale = 0;
+ return;
+ }
+
+ tp->request_r_scale = max(tcp_win_scale, tp->request_r_scale);
+ maxsockbufsize = ((so->so_rcv.sb_flags & SB_USRSIZE) != 0) ?
+ so->so_rcv.sb_hiwat : tcp_autorcvbuf_max;
+
+ while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
+ (TCP_MAXWIN << tp->request_r_scale) < maxsockbufsize)
+ tp->request_r_scale++;
+ tp->request_r_scale = min(tp->request_r_scale, TCP_MAX_WINSHIFT);
+
+}
+
+int
+tcp_notsent_lowat_check(struct socket *so) {
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp = NULL;
+ int notsent = 0;
+ if (inp != NULL) {
+ tp = intotcpcb(inp);
+ }
+
+ notsent = so->so_snd.sb_cc -
+ (tp->snd_nxt - tp->snd_una);
+
+ /* When we send a FIN or SYN, not_sent can be negative.
+ * In that case also we need to send a write event to the
+ * process if it is waiting. In the FIN case, it will
+ * get an error from send because cantsendmore will be set.
+ */
+ if (notsent <= tp->t_notsent_lowat) {
+ return(1);
+ }
+
+ /* When Nagle's algorithm is not disabled, it is better
+ * to wakeup the client until there is atleast one
+ * maxseg of data to write.
+ */
+ if ((tp->t_flags & TF_NODELAY) == 0 &&
+ notsent > 0 && notsent < tp->t_maxseg) {
+ return(1);
+ }
+ return(0);
+}
+
+void
+tcp_rxtseg_insert(struct tcpcb *tp, tcp_seq start, tcp_seq end) {
+ struct tcp_rxt_seg *rxseg = NULL, *prev = NULL, *next = NULL;
+ u_int32_t rxcount = 0;
+
+ if (SLIST_EMPTY(&tp->t_rxt_segments))
+ tp->t_dsack_lastuna = tp->snd_una;
+ /*
+ * First check if there is a segment already existing for this
+ * sequence space.
+ */
+
+ SLIST_FOREACH(rxseg, &tp->t_rxt_segments, rx_link) {
+ if (SEQ_GT(rxseg->rx_start, start))
+ break;
+ prev = rxseg;
+ }
+ next = rxseg;
+
+ /* check if prev seg is for this sequence */
+ if (prev != NULL && SEQ_LEQ(prev->rx_start, start) &&
+ SEQ_GEQ(prev->rx_end, end)) {
+ prev->rx_count++;
+ return;
+ }
+
+ /*
+ * There are a couple of possibilities at this point.
+ * 1. prev overlaps with the beginning of this sequence
+ * 2. next overlaps with the end of this sequence
+ * 3. there is no overlap.
+ */
+
+ if (prev != NULL && SEQ_GT(prev->rx_end, start)) {
+ if (prev->rx_start == start && SEQ_GT(end, prev->rx_end)) {
+ start = prev->rx_end + 1;
+ prev->rx_count++;
+ } else {
+ prev->rx_end = (start - 1);
+ rxcount = prev->rx_count;
+ }
+ }
+
+ if (next != NULL && SEQ_LT(next->rx_start, end)) {
+ if (SEQ_LEQ(next->rx_end, end)) {
+ end = next->rx_start - 1;
+ next->rx_count++;
+ } else {
+ next->rx_start = end + 1;
+ rxcount = next->rx_count;
+ }
+ }
+ if (!SEQ_LT(start, end))
+ return;
+
+ rxseg = (struct tcp_rxt_seg *) zalloc(tcp_rxt_seg_zone);
+ if (rxseg == NULL) {
+ return;
+ }
+ bzero(rxseg, sizeof(*rxseg));
+ rxseg->rx_start = start;
+ rxseg->rx_end = end;
+ rxseg->rx_count = rxcount + 1;
+
+ if (prev != NULL) {
+ SLIST_INSERT_AFTER(prev, rxseg, rx_link);
+ } else {
+ SLIST_INSERT_HEAD(&tp->t_rxt_segments, rxseg, rx_link);
+ }
+ return;
+}
+
+struct tcp_rxt_seg *
+tcp_rxtseg_find(struct tcpcb *tp, tcp_seq start, tcp_seq end)
+{
+ struct tcp_rxt_seg *rxseg;
+ if (SLIST_EMPTY(&tp->t_rxt_segments))
+ return (NULL);
+
+ SLIST_FOREACH(rxseg, &tp->t_rxt_segments, rx_link) {
+ if (SEQ_LEQ(rxseg->rx_start, start) &&
+ SEQ_GEQ(rxseg->rx_end, end))
+ return (rxseg);
+ if (SEQ_GT(rxseg->rx_start, start))
+ break;
+ }
+ return (NULL);
+}
+
+void
+tcp_rxtseg_clean(struct tcpcb *tp)
+{
+ struct tcp_rxt_seg *rxseg, *next;
+
+ SLIST_FOREACH_SAFE(rxseg, &tp->t_rxt_segments, rx_link, next) {
+ SLIST_REMOVE(&tp->t_rxt_segments, rxseg,
+ tcp_rxt_seg, rx_link);
+ zfree(tcp_rxt_seg_zone, rxseg);
+ }
+ tp->t_dsack_lastuna = tp->snd_max;
+}
+
+boolean_t
+tcp_rxtseg_detect_bad_rexmt(struct tcpcb *tp, tcp_seq th_ack)
+{
+ boolean_t bad_rexmt;
+ struct tcp_rxt_seg *rxseg;
+
+ if (SLIST_EMPTY(&tp->t_rxt_segments))
+ return (FALSE);
+
+ /*
+ * If all of the segments in this window are not cumulatively
+ * acknowledged, then there can still be undetected packet loss.
+ * Do not restore congestion window in that case.
+ */
+ if (SEQ_LT(th_ack, tp->snd_recover))
+ return (FALSE);
+
+ bad_rexmt = TRUE;
+ SLIST_FOREACH(rxseg, &tp->t_rxt_segments, rx_link) {
+ if (rxseg->rx_count > 1 ||
+ !(rxseg->rx_flags & TCP_RXT_SPURIOUS)) {
+ bad_rexmt = FALSE;
+ break;
+ }
+ }
+ return (bad_rexmt);
+}
+
+boolean_t
+tcp_rxtseg_dsack_for_tlp(struct tcpcb *tp)
+{
+ boolean_t dsack_for_tlp = FALSE;
+ struct tcp_rxt_seg *rxseg;
+ if (SLIST_EMPTY(&tp->t_rxt_segments))
+ return (FALSE);
+
+ SLIST_FOREACH(rxseg, &tp->t_rxt_segments, rx_link) {
+ if (rxseg->rx_count == 1 &&
+ SLIST_NEXT(rxseg,rx_link) == NULL &&
+ (rxseg->rx_flags & TCP_RXT_DSACK_FOR_TLP)) {
+ dsack_for_tlp = TRUE;
+ break;
+ }
+ }
+ return (dsack_for_tlp);
+}
+
+u_int32_t
+tcp_rxtseg_total_size(struct tcpcb *tp) {
+ struct tcp_rxt_seg *rxseg;
+ u_int32_t total_size = 0;
+
+ SLIST_FOREACH(rxseg, &tp->t_rxt_segments, rx_link) {
+ total_size += (rxseg->rx_end - rxseg->rx_start) + 1;
+ }
+ return (total_size);
+}
+
+void
+tcp_get_connectivity_status(struct tcpcb *tp,
+ struct tcp_conn_status *connstatus)
+{
+ if (tp == NULL || connstatus == NULL)
+ return;
+ bzero(connstatus, sizeof(*connstatus));
+ if (tp->t_rxtshift >= TCP_CONNECTIVITY_PROBES_MAX) {
+ if (TCPS_HAVEESTABLISHED(tp->t_state)) {
+ connstatus->write_probe_failed = 1;
+ } else {
+ connstatus->conn_probe_failed = 1;
+ }
+ }
+ if (tp->t_rtimo_probes >= TCP_CONNECTIVITY_PROBES_MAX)
+ connstatus->read_probe_failed = 1;
+ if (tp->t_inpcb != NULL && tp->t_inpcb->inp_last_outifp != NULL
+ && (tp->t_inpcb->inp_last_outifp->if_eflags & IFEF_PROBE_CONNECTIVITY))
+ connstatus->probe_activated = 1;
+ return;
+}
+
+boolean_t
+tfo_enabled(const struct tcpcb *tp)
+{
+ return !!(tp->t_flagsext & TF_FASTOPEN);
+}
+
+void
+tcp_disable_tfo(struct tcpcb *tp)
+{
+ tp->t_flagsext &= ~TF_FASTOPEN;
+}
+