+ next = rxseg;
+
+ /* check if prev seg is for this sequence */
+ if (prev != NULL && SEQ_LEQ(prev->rx_start, start) &&
+ SEQ_GEQ(prev->rx_end, end)) {
+ prev->rx_count++;
+ return;
+ }
+
+ /*
+ * There are a couple of possibilities at this point.
+ * 1. prev overlaps with the beginning of this sequence
+ * 2. next overlaps with the end of this sequence
+ * 3. there is no overlap.
+ */
+
+ if (prev != NULL && SEQ_GT(prev->rx_end, start)) {
+ if (prev->rx_start == start && SEQ_GT(end, prev->rx_end)) {
+ start = prev->rx_end + 1;
+ prev->rx_count++;
+ } else {
+ prev->rx_end = (start - 1);
+ rxcount = prev->rx_count;
+ }
+ }
+
+ if (next != NULL && SEQ_LT(next->rx_start, end)) {
+ if (SEQ_LEQ(next->rx_end, end)) {
+ end = next->rx_start - 1;
+ next->rx_count++;
+ } else {
+ next->rx_start = end + 1;
+ rxcount = next->rx_count;
+ }
+ }
+ if (!SEQ_LT(start, end)) {
+ return;
+ }
+
+ rxseg = (struct tcp_rxt_seg *) zalloc(tcp_rxt_seg_zone);
+ if (rxseg == NULL) {
+ return;
+ }
+ bzero(rxseg, sizeof(*rxseg));
+ rxseg->rx_start = start;
+ rxseg->rx_end = end;
+ rxseg->rx_count = rxcount + 1;
+
+ if (prev != NULL) {
+ SLIST_INSERT_AFTER(prev, rxseg, rx_link);
+ } else {
+ SLIST_INSERT_HEAD(&tp->t_rxt_segments, rxseg, rx_link);
+ }
+}
+
+struct tcp_rxt_seg *
+tcp_rxtseg_find(struct tcpcb *tp, tcp_seq start, tcp_seq end)
+{
+ struct tcp_rxt_seg *rxseg;
+ if (SLIST_EMPTY(&tp->t_rxt_segments)) {
+ return NULL;
+ }
+
+ SLIST_FOREACH(rxseg, &tp->t_rxt_segments, rx_link) {
+ if (SEQ_LEQ(rxseg->rx_start, start) &&
+ SEQ_GEQ(rxseg->rx_end, end)) {
+ return rxseg;
+ }
+ if (SEQ_GT(rxseg->rx_start, start)) {
+ break;
+ }
+ }
+ return NULL;
+}
+
+void
+tcp_rxtseg_set_spurious(struct tcpcb *tp, tcp_seq start, tcp_seq end)
+{
+ struct tcp_rxt_seg *rxseg;
+ if (SLIST_EMPTY(&tp->t_rxt_segments)) {
+ return;
+ }
+
+ SLIST_FOREACH(rxseg, &tp->t_rxt_segments, rx_link) {
+ if (SEQ_GEQ(rxseg->rx_start, start) &&
+ SEQ_LEQ(rxseg->rx_end, end)) {
+ /*
+ * If the segment was retransmitted only once, mark it as
+ * spurious.
+ */
+ if (rxseg->rx_count == 1) {
+ rxseg->rx_flags |= TCP_RXT_SPURIOUS;
+ }
+ }
+
+ if (SEQ_GEQ(rxseg->rx_start, end)) {
+ break;
+ }
+ }
+ return;
+}
+
+void
+tcp_rxtseg_clean(struct tcpcb *tp)
+{
+ struct tcp_rxt_seg *rxseg, *next;
+
+ SLIST_FOREACH_SAFE(rxseg, &tp->t_rxt_segments, rx_link, next) {
+ SLIST_REMOVE(&tp->t_rxt_segments, rxseg,
+ tcp_rxt_seg, rx_link);
+ zfree(tcp_rxt_seg_zone, rxseg);
+ }
+ tp->t_dsack_lastuna = tp->snd_max;
+}
+
+boolean_t
+tcp_rxtseg_detect_bad_rexmt(struct tcpcb *tp, tcp_seq th_ack)
+{
+ boolean_t bad_rexmt;
+ struct tcp_rxt_seg *rxseg;
+
+ if (SLIST_EMPTY(&tp->t_rxt_segments)) {
+ return FALSE;
+ }
+
+ /*
+ * If all of the segments in this window are not cumulatively
+ * acknowledged, then there can still be undetected packet loss.
+ * Do not restore congestion window in that case.
+ */
+ if (SEQ_LT(th_ack, tp->snd_recover)) {
+ return FALSE;
+ }
+
+ bad_rexmt = TRUE;
+ SLIST_FOREACH(rxseg, &tp->t_rxt_segments, rx_link) {
+ if (!(rxseg->rx_flags & TCP_RXT_SPURIOUS)) {
+ bad_rexmt = FALSE;
+ break;
+ }
+ }
+ return bad_rexmt;
+}
+
+boolean_t
+tcp_rxtseg_dsack_for_tlp(struct tcpcb *tp)
+{
+ boolean_t dsack_for_tlp = FALSE;
+ struct tcp_rxt_seg *rxseg;
+ if (SLIST_EMPTY(&tp->t_rxt_segments)) {
+ return FALSE;
+ }
+
+ SLIST_FOREACH(rxseg, &tp->t_rxt_segments, rx_link) {
+ if (rxseg->rx_count == 1 &&
+ SLIST_NEXT(rxseg, rx_link) == NULL &&
+ (rxseg->rx_flags & TCP_RXT_DSACK_FOR_TLP)) {
+ dsack_for_tlp = TRUE;
+ break;
+ }
+ }
+ return dsack_for_tlp;
+}
+
+u_int32_t
+tcp_rxtseg_total_size(struct tcpcb *tp)
+{
+ struct tcp_rxt_seg *rxseg;
+ u_int32_t total_size = 0;
+
+ SLIST_FOREACH(rxseg, &tp->t_rxt_segments, rx_link) {
+ total_size += (rxseg->rx_end - rxseg->rx_start) + 1;
+ }
+ return total_size;
+}
+
+void
+tcp_get_connectivity_status(struct tcpcb *tp,
+ struct tcp_conn_status *connstatus)
+{
+ if (tp == NULL || connstatus == NULL) {
+ return;
+ }
+ bzero(connstatus, sizeof(*connstatus));
+ if (tp->t_rxtshift >= TCP_CONNECTIVITY_PROBES_MAX) {
+ if (TCPS_HAVEESTABLISHED(tp->t_state)) {
+ connstatus->write_probe_failed = 1;
+ } else {
+ connstatus->conn_probe_failed = 1;
+ }
+ }
+ if (tp->t_rtimo_probes >= TCP_CONNECTIVITY_PROBES_MAX) {
+ connstatus->read_probe_failed = 1;
+ }
+ if (tp->t_inpcb != NULL && tp->t_inpcb->inp_last_outifp != NULL &&
+ (tp->t_inpcb->inp_last_outifp->if_eflags & IFEF_PROBE_CONNECTIVITY)) {
+ connstatus->probe_activated = 1;
+ }
+}
+
+boolean_t
+tfo_enabled(const struct tcpcb *tp)
+{
+ return (tp->t_flagsext & TF_FASTOPEN)? TRUE : FALSE;
+}
+
+void
+tcp_disable_tfo(struct tcpcb *tp)
+{
+ tp->t_flagsext &= ~TF_FASTOPEN;
+}
+
+static struct mbuf *
+tcp_make_keepalive_frame(struct tcpcb *tp, struct ifnet *ifp,
+ boolean_t is_probe)
+{
+ struct inpcb *inp = tp->t_inpcb;
+ struct tcphdr *th;
+ u_int8_t *data;
+ int win = 0;
+ struct mbuf *m;
+
+ /*
+ * The code assumes the IP + TCP headers fit in an mbuf packet header
+ */
+ _CASSERT(sizeof(struct ip) + sizeof(struct tcphdr) <= _MHLEN);
+ _CASSERT(sizeof(struct ip6_hdr) + sizeof(struct tcphdr) <= _MHLEN);
+
+ MGETHDR(m, M_WAIT, MT_HEADER);
+ if (m == NULL) {
+ return NULL;
+ }
+ m->m_pkthdr.pkt_proto = IPPROTO_TCP;
+
+ data = mbuf_datastart(m);
+
+ if (inp->inp_vflag & INP_IPV4) {
+ bzero(data, sizeof(struct ip) + sizeof(struct tcphdr));
+ th = (struct tcphdr *)(void *) (data + sizeof(struct ip));
+ m->m_len = sizeof(struct ip) + sizeof(struct tcphdr);
+ m->m_pkthdr.len = m->m_len;
+ } else {
+ VERIFY(inp->inp_vflag & INP_IPV6);
+
+ bzero(data, sizeof(struct ip6_hdr)
+ + sizeof(struct tcphdr));
+ th = (struct tcphdr *)(void *)(data + sizeof(struct ip6_hdr));
+ m->m_len = sizeof(struct ip6_hdr) +
+ sizeof(struct tcphdr);
+ m->m_pkthdr.len = m->m_len;
+ }
+
+ tcp_fillheaders(tp, data, th);
+
+ if (inp->inp_vflag & INP_IPV4) {
+ struct ip *ip;
+
+ ip = (__typeof__(ip))(void *)data;
+
+ ip->ip_id = rfc6864 ? 0 : ip_randomid();
+ ip->ip_off = htons(IP_DF);
+ ip->ip_len = htons(sizeof(struct ip) + sizeof(struct tcphdr));
+ ip->ip_ttl = inp->inp_ip_ttl;
+ ip->ip_tos |= (inp->inp_ip_tos & ~IPTOS_ECN_MASK);
+ ip->ip_sum = in_cksum_hdr(ip);
+ } else {
+ struct ip6_hdr *ip6;
+
+ ip6 = (__typeof__(ip6))(void *)data;
+
+ ip6->ip6_plen = htons(sizeof(struct tcphdr));
+ ip6->ip6_hlim = in6_selecthlim(inp, ifp);
+ ip6->ip6_flow = ip6->ip6_flow & ~IPV6_FLOW_ECN_MASK;
+
+ if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) {
+ ip6->ip6_src.s6_addr16[1] = 0;
+ }
+ if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) {
+ ip6->ip6_dst.s6_addr16[1] = 0;
+ }
+ }
+ th->th_flags = TH_ACK;
+
+ win = tcp_sbspace(tp);
+ if (win > ((int32_t)TCP_MAXWIN << tp->rcv_scale)) {
+ win = (int32_t)TCP_MAXWIN << tp->rcv_scale;
+ }
+ th->th_win = htons((u_short) (win >> tp->rcv_scale));
+
+ if (is_probe) {
+ th->th_seq = htonl(tp->snd_una - 1);
+ } else {
+ th->th_seq = htonl(tp->snd_una);
+ }
+ th->th_ack = htonl(tp->rcv_nxt);
+
+ /* Force recompute TCP checksum to be the final value */
+ th->th_sum = 0;
+ if (inp->inp_vflag & INP_IPV4) {
+ th->th_sum = inet_cksum(m, IPPROTO_TCP,
+ sizeof(struct ip), sizeof(struct tcphdr));
+ } else {
+ th->th_sum = inet6_cksum(m, IPPROTO_TCP,
+ sizeof(struct ip6_hdr), sizeof(struct tcphdr));
+ }
+
+ return m;
+}
+
+void
+tcp_fill_keepalive_offload_frames(ifnet_t ifp,
+ struct ifnet_keepalive_offload_frame *frames_array,
+ u_int32_t frames_array_count, size_t frame_data_offset,
+ u_int32_t *used_frames_count)
+{
+ struct inpcb *inp;
+ inp_gen_t gencnt;
+ u_int32_t frame_index = *used_frames_count;
+
+ if (ifp == NULL || frames_array == NULL ||
+ frames_array_count == 0 ||
+ frame_index >= frames_array_count ||
+ frame_data_offset >= IFNET_KEEPALIVE_OFFLOAD_FRAME_DATA_SIZE) {
+ return;
+ }
+
+ /*
+ * This function is called outside the regular TCP processing
+ * so we need to update the TCP clock.
+ */
+ calculate_tcp_clock();
+
+ lck_rw_lock_shared(tcbinfo.ipi_lock);
+ gencnt = tcbinfo.ipi_gencnt;
+ LIST_FOREACH(inp, tcbinfo.ipi_listhead, inp_list) {
+ struct socket *so;
+ struct ifnet_keepalive_offload_frame *frame;
+ struct mbuf *m = NULL;
+ struct tcpcb *tp = intotcpcb(inp);
+
+ if (frame_index >= frames_array_count) {
+ break;
+ }
+
+ if (inp->inp_gencnt > gencnt ||
+ inp->inp_state == INPCB_STATE_DEAD) {
+ continue;
+ }
+
+ if ((so = inp->inp_socket) == NULL ||
+ (so->so_state & SS_DEFUNCT)) {
+ continue;
+ }
+ /*
+ * check for keepalive offload flag without socket
+ * lock to avoid a deadlock
+ */
+ if (!(inp->inp_flags2 & INP2_KEEPALIVE_OFFLOAD)) {
+ continue;
+ }
+
+ if (!(inp->inp_vflag & (INP_IPV4 | INP_IPV6))) {
+ continue;
+ }
+ if (inp->inp_ppcb == NULL ||
+ in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) {
+ continue;
+ }
+ socket_lock(so, 1);
+ /* Release the want count */
+ if (inp->inp_ppcb == NULL ||
+ (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING)) {
+ socket_unlock(so, 1);
+ continue;
+ }
+ if ((inp->inp_vflag & INP_IPV4) &&
+ (inp->inp_laddr.s_addr == INADDR_ANY ||
+ inp->inp_faddr.s_addr == INADDR_ANY)) {
+ socket_unlock(so, 1);
+ continue;
+ }
+ if ((inp->inp_vflag & INP_IPV6) &&
+ (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) ||
+ IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))) {
+ socket_unlock(so, 1);
+ continue;
+ }
+ if (inp->inp_lport == 0 || inp->inp_fport == 0) {
+ socket_unlock(so, 1);
+ continue;
+ }
+ if (inp->inp_last_outifp == NULL ||
+ inp->inp_last_outifp->if_index != ifp->if_index) {
+ socket_unlock(so, 1);
+ continue;
+ }
+ if ((inp->inp_vflag & INP_IPV4) && frame_data_offset +
+ sizeof(struct ip) + sizeof(struct tcphdr) >
+ IFNET_KEEPALIVE_OFFLOAD_FRAME_DATA_SIZE) {
+ socket_unlock(so, 1);
+ continue;
+ } else if (!(inp->inp_vflag & INP_IPV4) && frame_data_offset +
+ sizeof(struct ip6_hdr) + sizeof(struct tcphdr) >
+ IFNET_KEEPALIVE_OFFLOAD_FRAME_DATA_SIZE) {
+ socket_unlock(so, 1);
+ continue;
+ }
+ /*
+ * There is no point in waking up the device for connections
+ * that are not established. Long lived connection are meant
+ * for processes that will sent and receive data
+ */
+ if (tp->t_state != TCPS_ESTABLISHED) {
+ socket_unlock(so, 1);
+ continue;
+ }
+ /*
+ * This inp has all the information that is needed to
+ * generate an offload frame.
+ */
+ frame = &frames_array[frame_index];
+ frame->type = IFNET_KEEPALIVE_OFFLOAD_FRAME_TCP;
+ frame->ether_type = (inp->inp_vflag & INP_IPV4) ?
+ IFNET_KEEPALIVE_OFFLOAD_FRAME_ETHERTYPE_IPV4 :
+ IFNET_KEEPALIVE_OFFLOAD_FRAME_ETHERTYPE_IPV6;
+ frame->interval = (uint16_t)(tp->t_keepidle > 0 ? tp->t_keepidle :
+ tcp_keepidle);
+ frame->keep_cnt = (uint8_t)TCP_CONN_KEEPCNT(tp);
+ frame->keep_retry = (uint16_t)TCP_CONN_KEEPINTVL(tp);
+ if (so->so_options & SO_NOWAKEFROMSLEEP) {
+ frame->flags |=
+ IFNET_KEEPALIVE_OFFLOAD_FLAG_NOWAKEFROMSLEEP;
+ }
+ frame->local_port = ntohs(inp->inp_lport);
+ frame->remote_port = ntohs(inp->inp_fport);
+ frame->local_seq = tp->snd_nxt;
+ frame->remote_seq = tp->rcv_nxt;
+ if (inp->inp_vflag & INP_IPV4) {
+ ASSERT(frame_data_offset + sizeof(struct ip) + sizeof(struct tcphdr) <= UINT8_MAX);
+ frame->length = (uint8_t)(frame_data_offset +
+ sizeof(struct ip) + sizeof(struct tcphdr));
+ frame->reply_length = frame->length;
+
+ frame->addr_length = sizeof(struct in_addr);
+ bcopy(&inp->inp_laddr, frame->local_addr,
+ sizeof(struct in_addr));
+ bcopy(&inp->inp_faddr, frame->remote_addr,
+ sizeof(struct in_addr));
+ } else {
+ struct in6_addr *ip6;
+
+ ASSERT(frame_data_offset + sizeof(struct ip6_hdr) + sizeof(struct tcphdr) <= UINT8_MAX);
+ frame->length = (uint8_t)(frame_data_offset +
+ sizeof(struct ip6_hdr) + sizeof(struct tcphdr));
+ frame->reply_length = frame->length;
+
+ frame->addr_length = sizeof(struct in6_addr);
+ ip6 = (struct in6_addr *)(void *)frame->local_addr;
+ bcopy(&inp->in6p_laddr, ip6, sizeof(struct in6_addr));
+ if (IN6_IS_SCOPE_EMBED(ip6)) {
+ ip6->s6_addr16[1] = 0;
+ }
+
+ ip6 = (struct in6_addr *)(void *)frame->remote_addr;
+ bcopy(&inp->in6p_faddr, ip6, sizeof(struct in6_addr));
+ if (IN6_IS_SCOPE_EMBED(ip6)) {
+ ip6->s6_addr16[1] = 0;
+ }
+ }
+
+ /*
+ * First the probe
+ */
+ m = tcp_make_keepalive_frame(tp, ifp, TRUE);
+ if (m == NULL) {
+ socket_unlock(so, 1);
+ continue;
+ }
+ bcopy(m->m_data, frame->data + frame_data_offset,
+ m->m_len);
+ m_freem(m);
+
+ /*
+ * Now the response packet to incoming probes
+ */
+ m = tcp_make_keepalive_frame(tp, ifp, FALSE);
+ if (m == NULL) {
+ socket_unlock(so, 1);
+ continue;
+ }
+ bcopy(m->m_data, frame->reply_data + frame_data_offset,
+ m->m_len);
+ m_freem(m);
+
+ frame_index++;
+ socket_unlock(so, 1);
+ }
+ lck_rw_done(tcbinfo.ipi_lock);
+ *used_frames_count = frame_index;
+}
+
+static bool
+inp_matches_kao_frame(ifnet_t ifp, struct ifnet_keepalive_offload_frame *frame,
+ struct inpcb *inp)
+{
+ if (inp->inp_ppcb == NULL) {
+ return false;
+ }
+ /* Release the want count */
+ if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
+ return false;
+ }
+ if (inp->inp_last_outifp == NULL ||
+ inp->inp_last_outifp->if_index != ifp->if_index) {
+ return false;
+ }
+ if (frame->local_port != ntohs(inp->inp_lport) ||
+ frame->remote_port != ntohs(inp->inp_fport)) {
+ return false;
+ }
+ if (inp->inp_vflag & INP_IPV4) {
+ if (memcmp(&inp->inp_laddr, frame->local_addr,
+ sizeof(struct in_addr)) != 0 ||
+ memcmp(&inp->inp_faddr, frame->remote_addr,
+ sizeof(struct in_addr)) != 0) {
+ return false;
+ }
+ } else if (inp->inp_vflag & INP_IPV6) {
+ if (memcmp(&inp->inp_laddr, frame->local_addr,
+ sizeof(struct in6_addr)) != 0 ||
+ memcmp(&inp->inp_faddr, frame->remote_addr,
+ sizeof(struct in6_addr)) != 0) {
+ return false;
+ }
+ } else {
+ return false;
+ }
+ return true;
+}
+
+int
+tcp_notify_kao_timeout(ifnet_t ifp,
+ struct ifnet_keepalive_offload_frame *frame)
+{
+ struct inpcb *inp = NULL;
+ struct socket *so = NULL;
+ bool found = false;
+
+ /*
+ * Unlock the list before posting event on the matching socket
+ */
+ lck_rw_lock_shared(tcbinfo.ipi_lock);
+
+ LIST_FOREACH(inp, tcbinfo.ipi_listhead, inp_list) {
+ if ((so = inp->inp_socket) == NULL ||
+ (so->so_state & SS_DEFUNCT)) {
+ continue;
+ }
+ if (!(inp->inp_flags2 & INP2_KEEPALIVE_OFFLOAD)) {
+ continue;
+ }
+ if (!(inp->inp_vflag & (INP_IPV4 | INP_IPV6))) {
+ continue;
+ }
+ if (inp->inp_ppcb == NULL ||
+ in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) {
+ continue;
+ }
+ socket_lock(so, 1);
+ if (inp_matches_kao_frame(ifp, frame, inp)) {
+ /*
+ * Keep the matching socket locked
+ */
+ found = true;
+ break;
+ }
+ socket_unlock(so, 1);
+ }
+ lck_rw_done(tcbinfo.ipi_lock);
+
+ if (found) {
+ ASSERT(inp != NULL);
+ ASSERT(so != NULL);
+ ASSERT(so == inp->inp_socket);
+ /*
+ * Drop the TCP connection like tcptimers() does
+ */
+ struct tcpcb *tp = inp->inp_ppcb;
+
+ tcpstat.tcps_keepdrops++;
+ soevent(so,
+ (SO_FILT_HINT_LOCKED | SO_FILT_HINT_TIMEOUT));
+ tp = tcp_drop(tp, ETIMEDOUT);
+
+ tcpstat.tcps_ka_offload_drops++;
+ os_log_info(OS_LOG_DEFAULT, "%s: dropped lport %u fport %u\n",
+ __func__, frame->local_port, frame->remote_port);
+
+ socket_unlock(so, 1);
+ }
+
+ return 0;
+}
+
+errno_t
+tcp_notify_ack_id_valid(struct tcpcb *tp, struct socket *so,
+ u_int32_t notify_id)
+{
+ struct tcp_notify_ack_marker *elm;
+
+ if (so->so_snd.sb_cc == 0) {
+ return ENOBUFS;
+ }
+
+ SLIST_FOREACH(elm, &tp->t_notify_ack, notify_next) {
+ /* Duplicate id is not allowed */
+ if (elm->notify_id == notify_id) {
+ return EINVAL;
+ }
+ /* Duplicate position is not allowed */
+ if (elm->notify_snd_una == tp->snd_una + so->so_snd.sb_cc) {
+ return EINVAL;
+ }
+ }
+ return 0;
+}
+
+errno_t
+tcp_add_notify_ack_marker(struct tcpcb *tp, u_int32_t notify_id)
+{
+ struct tcp_notify_ack_marker *nm, *elm = NULL;
+ struct socket *so = tp->t_inpcb->inp_socket;
+
+ MALLOC(nm, struct tcp_notify_ack_marker *, sizeof(*nm),
+ M_TEMP, M_WAIT | M_ZERO);
+ if (nm == NULL) {
+ return ENOMEM;
+ }
+ nm->notify_id = notify_id;
+ nm->notify_snd_una = tp->snd_una + so->so_snd.sb_cc;
+
+ SLIST_FOREACH(elm, &tp->t_notify_ack, notify_next) {
+ if (SEQ_GT(nm->notify_snd_una, elm->notify_snd_una)) {
+ break;
+ }
+ }
+
+ if (elm == NULL) {
+ VERIFY(SLIST_EMPTY(&tp->t_notify_ack));
+ SLIST_INSERT_HEAD(&tp->t_notify_ack, nm, notify_next);
+ } else {
+ SLIST_INSERT_AFTER(elm, nm, notify_next);
+ }
+ tp->t_notify_ack_count++;
+ return 0;
+}
+
+void
+tcp_notify_ack_free(struct tcpcb *tp)
+{
+ struct tcp_notify_ack_marker *elm, *next;
+ if (SLIST_EMPTY(&tp->t_notify_ack)) {
+ return;
+ }
+
+ SLIST_FOREACH_SAFE(elm, &tp->t_notify_ack, notify_next, next) {
+ SLIST_REMOVE(&tp->t_notify_ack, elm, tcp_notify_ack_marker,
+ notify_next);
+ FREE(elm, M_TEMP);
+ }
+ SLIST_INIT(&tp->t_notify_ack);
+ tp->t_notify_ack_count = 0;
+}
+
+inline void
+tcp_notify_acknowledgement(struct tcpcb *tp, struct socket *so)
+{
+ struct tcp_notify_ack_marker *elm;
+
+ elm = SLIST_FIRST(&tp->t_notify_ack);
+ if (SEQ_GEQ(tp->snd_una, elm->notify_snd_una)) {
+ soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_NOTIFY_ACK);
+ }
+}
+
+void
+tcp_get_notify_ack_count(struct tcpcb *tp,
+ struct tcp_notify_ack_complete *retid)
+{
+ struct tcp_notify_ack_marker *elm;
+ uint32_t complete = 0;
+
+ SLIST_FOREACH(elm, &tp->t_notify_ack, notify_next) {
+ if (SEQ_GEQ(tp->snd_una, elm->notify_snd_una)) {
+ ASSERT(complete < UINT32_MAX);
+ complete++;
+ } else {
+ break;
+ }
+ }
+ retid->notify_pending = tp->t_notify_ack_count - complete;
+ retid->notify_complete_count = min(TCP_MAX_NOTIFY_ACK, complete);
+}
+
+void
+tcp_get_notify_ack_ids(struct tcpcb *tp,
+ struct tcp_notify_ack_complete *retid)
+{
+ size_t i = 0;
+ struct tcp_notify_ack_marker *elm, *next;
+
+ SLIST_FOREACH_SAFE(elm, &tp->t_notify_ack, notify_next, next) {
+ if (i >= retid->notify_complete_count) {
+ break;
+ }
+ if (SEQ_GEQ(tp->snd_una, elm->notify_snd_una)) {
+ retid->notify_complete_id[i++] = elm->notify_id;
+ SLIST_REMOVE(&tp->t_notify_ack, elm,
+ tcp_notify_ack_marker, notify_next);
+ FREE(elm, M_TEMP);
+ tp->t_notify_ack_count--;
+ } else {
+ break;
+ }
+ }
+}
+
+bool
+tcp_notify_ack_active(struct socket *so)
+{
+ if ((SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) &&
+ SOCK_TYPE(so) == SOCK_STREAM) {
+ struct tcpcb *tp = intotcpcb(sotoinpcb(so));
+
+ if (!SLIST_EMPTY(&tp->t_notify_ack)) {
+ struct tcp_notify_ack_marker *elm;
+ elm = SLIST_FIRST(&tp->t_notify_ack);
+ if (SEQ_GEQ(tp->snd_una, elm->notify_snd_una)) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+inline int32_t
+inp_get_sndbytes_allunsent(struct socket *so, u_int32_t th_ack)
+{
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp = intotcpcb(inp);
+
+ if ((so->so_snd.sb_flags & SB_SNDBYTE_CNT) &&
+ so->so_snd.sb_cc > 0) {
+ int32_t unsent, sent;
+ sent = tp->snd_max - th_ack;
+ if (tp->t_flags & TF_SENTFIN) {
+ sent--;
+ }
+ unsent = so->so_snd.sb_cc - sent;
+ return unsent;
+ }
+ return 0;
+}
+
+#define IFP_PER_FLOW_STAT(_ipv4_, _stat_) { \
+ if (_ipv4_) { \
+ ifp->if_ipv4_stat->_stat_++; \
+ } else { \
+ ifp->if_ipv6_stat->_stat_++; \
+ } \
+}
+
+#define FLOW_ECN_ENABLED(_flags_) \
+ ((_flags_ & (TE_ECN_ON)) == (TE_ECN_ON))
+
+void
+tcp_update_stats_per_flow(struct ifnet_stats_per_flow *ifs,
+ struct ifnet *ifp)
+{
+ if (ifp == NULL || !IF_FULLY_ATTACHED(ifp)) {
+ return;
+ }
+
+ ifnet_lock_shared(ifp);
+ if (ifs->ecn_flags & TE_SETUPSENT) {
+ if (ifs->ecn_flags & TE_CLIENT_SETUP) {
+ IFP_PER_FLOW_STAT(ifs->ipv4, ecn_client_setup);
+ if (FLOW_ECN_ENABLED(ifs->ecn_flags)) {
+ IFP_PER_FLOW_STAT(ifs->ipv4,
+ ecn_client_success);
+ } else if (ifs->ecn_flags & TE_LOST_SYN) {
+ IFP_PER_FLOW_STAT(ifs->ipv4,
+ ecn_syn_lost);
+ } else {
+ IFP_PER_FLOW_STAT(ifs->ipv4,
+ ecn_peer_nosupport);
+ }
+ } else {
+ IFP_PER_FLOW_STAT(ifs->ipv4, ecn_server_setup);
+ if (FLOW_ECN_ENABLED(ifs->ecn_flags)) {
+ IFP_PER_FLOW_STAT(ifs->ipv4,
+ ecn_server_success);
+ } else if (ifs->ecn_flags & TE_LOST_SYN) {
+ IFP_PER_FLOW_STAT(ifs->ipv4,
+ ecn_synack_lost);
+ } else {
+ IFP_PER_FLOW_STAT(ifs->ipv4,
+ ecn_peer_nosupport);
+ }
+ }
+ } else {
+ IFP_PER_FLOW_STAT(ifs->ipv4, ecn_off_conn);
+ }
+ if (FLOW_ECN_ENABLED(ifs->ecn_flags)) {
+ if (ifs->ecn_flags & TE_RECV_ECN_CE) {
+ tcpstat.tcps_ecn_conn_recv_ce++;
+ IFP_PER_FLOW_STAT(ifs->ipv4, ecn_conn_recv_ce);
+ }
+ if (ifs->ecn_flags & TE_RECV_ECN_ECE) {
+ tcpstat.tcps_ecn_conn_recv_ece++;
+ IFP_PER_FLOW_STAT(ifs->ipv4, ecn_conn_recv_ece);
+ }
+ if (ifs->ecn_flags & (TE_RECV_ECN_CE | TE_RECV_ECN_ECE)) {
+ if (ifs->txretransmitbytes > 0 ||
+ ifs->rxoutoforderbytes > 0) {
+ tcpstat.tcps_ecn_conn_pl_ce++;
+ IFP_PER_FLOW_STAT(ifs->ipv4, ecn_conn_plce);
+ } else {
+ tcpstat.tcps_ecn_conn_nopl_ce++;
+ IFP_PER_FLOW_STAT(ifs->ipv4, ecn_conn_noplce);
+ }
+ } else {
+ if (ifs->txretransmitbytes > 0 ||
+ ifs->rxoutoforderbytes > 0) {
+ tcpstat.tcps_ecn_conn_plnoce++;
+ IFP_PER_FLOW_STAT(ifs->ipv4, ecn_conn_plnoce);
+ }
+ }
+ }
+
+ /* Other stats are interesting for non-local connections only */
+ if (ifs->local) {
+ ifnet_lock_done(ifp);
+ return;
+ }
+
+ if (ifs->ipv4) {
+ ifp->if_ipv4_stat->timestamp = net_uptime();
+ if (FLOW_ECN_ENABLED(ifs->ecn_flags)) {
+ tcp_flow_ecn_perf_stats(ifs, &ifp->if_ipv4_stat->ecn_on);
+ } else {
+ tcp_flow_ecn_perf_stats(ifs, &ifp->if_ipv4_stat->ecn_off);
+ }
+ } else {
+ ifp->if_ipv6_stat->timestamp = net_uptime();
+ if (FLOW_ECN_ENABLED(ifs->ecn_flags)) {
+ tcp_flow_ecn_perf_stats(ifs, &ifp->if_ipv6_stat->ecn_on);
+ } else {
+ tcp_flow_ecn_perf_stats(ifs, &ifp->if_ipv6_stat->ecn_off);
+ }
+ }
+
+ if (ifs->rxmit_drop) {
+ if (FLOW_ECN_ENABLED(ifs->ecn_flags)) {
+ IFP_PER_FLOW_STAT(ifs->ipv4, ecn_on.rxmit_drop);
+ } else {
+ IFP_PER_FLOW_STAT(ifs->ipv4, ecn_off.rxmit_drop);
+ }
+ }
+ if (ifs->ecn_fallback_synloss) {
+ IFP_PER_FLOW_STAT(ifs->ipv4, ecn_fallback_synloss);
+ }
+ if (ifs->ecn_fallback_droprst) {
+ IFP_PER_FLOW_STAT(ifs->ipv4, ecn_fallback_droprst);
+ }
+ if (ifs->ecn_fallback_droprxmt) {
+ IFP_PER_FLOW_STAT(ifs->ipv4, ecn_fallback_droprxmt);
+ }
+ if (ifs->ecn_fallback_ce) {
+ IFP_PER_FLOW_STAT(ifs->ipv4, ecn_fallback_ce);
+ }
+ if (ifs->ecn_fallback_reorder) {
+ IFP_PER_FLOW_STAT(ifs->ipv4, ecn_fallback_reorder);
+ }
+ if (ifs->ecn_recv_ce > 0) {
+ IFP_PER_FLOW_STAT(ifs->ipv4, ecn_recv_ce);
+ }
+ if (ifs->ecn_recv_ece > 0) {
+ IFP_PER_FLOW_STAT(ifs->ipv4, ecn_recv_ece);
+ }
+
+ tcp_flow_lim_stats(ifs, &ifp->if_lim_stat);
+ ifnet_lock_done(ifp);
+}
+