+static struct mbuf *
+tcp_make_keepalive_frame(struct tcpcb *tp, struct ifnet *ifp,
+ boolean_t is_probe)
+{
+ struct inpcb *inp = tp->t_inpcb;
+ struct tcphdr *th;
+ u_int8_t *data;
+ int win = 0;
+ struct mbuf *m;
+
+ /*
+ * The code assumes the IP + TCP headers fit in an mbuf packet header
+ */
+ _CASSERT(sizeof(struct ip) + sizeof(struct tcphdr) <= _MHLEN);
+ _CASSERT(sizeof(struct ip6_hdr) + sizeof(struct tcphdr) <= _MHLEN);
+
+ MGETHDR(m, M_WAIT, MT_HEADER);
+ if (m == NULL) {
+ return (NULL);
+ }
+ m->m_pkthdr.pkt_proto = IPPROTO_TCP;
+
+ data = mbuf_datastart(m);
+
+ if (inp->inp_vflag & INP_IPV4) {
+ bzero(data, sizeof(struct ip) + sizeof(struct tcphdr));
+ th = (struct tcphdr *)(void *) (data + sizeof(struct ip));
+ m->m_len = sizeof(struct ip) + sizeof(struct tcphdr);
+ m->m_pkthdr.len = m->m_len;
+ } else {
+ VERIFY(inp->inp_vflag & INP_IPV6);
+
+ bzero(data, sizeof(struct ip6_hdr)
+ + sizeof(struct tcphdr));
+ th = (struct tcphdr *)(void *)(data + sizeof(struct ip6_hdr));
+ m->m_len = sizeof(struct ip6_hdr) +
+ sizeof(struct tcphdr);
+ m->m_pkthdr.len = m->m_len;
+ }
+
+ tcp_fillheaders(tp, data, th);
+
+ if (inp->inp_vflag & INP_IPV4) {
+ struct ip *ip;
+
+ ip = (__typeof__(ip))(void *)data;
+
+ ip->ip_id = ip_randomid();
+ ip->ip_len = htons(sizeof(struct ip) + sizeof(struct tcphdr));
+ ip->ip_ttl = inp->inp_ip_ttl;
+ ip->ip_tos |= (inp->inp_ip_tos & ~IPTOS_ECN_MASK);
+ ip->ip_sum = in_cksum_hdr(ip);
+ } else {
+ struct ip6_hdr *ip6;
+
+ ip6 = (__typeof__(ip6))(void *)data;
+
+ ip6->ip6_plen = htons(sizeof(struct tcphdr));
+ ip6->ip6_hlim = in6_selecthlim(inp, ifp);
+ ip6->ip6_flow = ip6->ip6_flow & ~IPV6_FLOW_ECN_MASK;
+
+ if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src))
+ ip6->ip6_src.s6_addr16[1] = 0;
+ if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst))
+ ip6->ip6_dst.s6_addr16[1] = 0;
+ }
+ th->th_flags = TH_ACK;
+
+ win = tcp_sbspace(tp);
+ if (win > ((int32_t)TCP_MAXWIN << tp->rcv_scale))
+ win = (int32_t)TCP_MAXWIN << tp->rcv_scale;
+ th->th_win = htons((u_short) (win >> tp->rcv_scale));
+
+ if (is_probe) {
+ th->th_seq = htonl(tp->snd_una - 1);
+ } else {
+ th->th_seq = htonl(tp->snd_una);
+ }
+ th->th_ack = htonl(tp->rcv_nxt);
+
+ /* Force recompute TCP checksum to be the final value */
+ th->th_sum = 0;
+ if (inp->inp_vflag & INP_IPV4) {
+ th->th_sum = inet_cksum(m, IPPROTO_TCP,
+ sizeof(struct ip), sizeof(struct tcphdr));
+ } else {
+ th->th_sum = inet6_cksum(m, IPPROTO_TCP,
+ sizeof(struct ip6_hdr), sizeof(struct tcphdr));
+ }
+
+ return (m);
+}
+
+void
+tcp_fill_keepalive_offload_frames(ifnet_t ifp,
+ struct ifnet_keepalive_offload_frame *frames_array,
+ u_int32_t frames_array_count, size_t frame_data_offset,
+ u_int32_t *used_frames_count)
+{
+ struct inpcb *inp;
+ inp_gen_t gencnt;
+ u_int32_t frame_index = *used_frames_count;
+
+ if (ifp == NULL || frames_array == NULL ||
+ frames_array_count == 0 ||
+ frame_index >= frames_array_count ||
+ frame_data_offset >= IFNET_KEEPALIVE_OFFLOAD_FRAME_DATA_SIZE)
+ return;
+
+ /*
+ * This function is called outside the regular TCP processing
+ * so we need to update the TCP clock.
+ */
+ calculate_tcp_clock();
+
+ lck_rw_lock_shared(tcbinfo.ipi_lock);
+ gencnt = tcbinfo.ipi_gencnt;
+ LIST_FOREACH(inp, tcbinfo.ipi_listhead, inp_list) {
+ struct socket *so;
+ struct ifnet_keepalive_offload_frame *frame;
+ struct mbuf *m = NULL;
+ struct tcpcb *tp = intotcpcb(inp);
+
+ if (frame_index >= frames_array_count)
+ break;
+
+ if (inp->inp_gencnt > gencnt ||
+ inp->inp_state == INPCB_STATE_DEAD)
+ continue;
+
+ if ((so = inp->inp_socket) == NULL ||
+ (so->so_state & SS_DEFUNCT))
+ continue;
+ /*
+ * check for keepalive offload flag without socket
+ * lock to avoid a deadlock
+ */
+ if (!(inp->inp_flags2 & INP2_KEEPALIVE_OFFLOAD)) {
+ continue;
+ }
+
+ if (!(inp->inp_vflag & (INP_IPV4 | INP_IPV6))) {
+ continue;
+ }
+ if (inp->inp_ppcb == NULL ||
+ in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING)
+ continue;
+ tcp_lock(so, 1, 0);
+ /* Release the want count */
+ if (inp->inp_ppcb == NULL ||
+ (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING)) {
+ tcp_unlock(so, 1, 0);
+ continue;
+ }
+ if ((inp->inp_vflag & INP_IPV4) &&
+ (inp->inp_laddr.s_addr == INADDR_ANY ||
+ inp->inp_faddr.s_addr == INADDR_ANY)) {
+ tcp_unlock(so, 1, 0);
+ continue;
+ }
+ if ((inp->inp_vflag & INP_IPV6) &&
+ (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) ||
+ IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))) {
+ tcp_unlock(so, 1, 0);
+ continue;
+ }
+ if (inp->inp_lport == 0 || inp->inp_fport == 0) {
+ tcp_unlock(so, 1, 0);
+ continue;
+ }
+ if (inp->inp_last_outifp == NULL ||
+ inp->inp_last_outifp->if_index != ifp->if_index) {
+ tcp_unlock(so, 1, 0);
+ continue;
+ }
+ if ((inp->inp_vflag & INP_IPV4) && frame_data_offset +
+ sizeof(struct ip) + sizeof(struct tcphdr) >
+ IFNET_KEEPALIVE_OFFLOAD_FRAME_DATA_SIZE) {
+ tcp_unlock(so, 1, 0);
+ continue;
+ } else if (!(inp->inp_vflag & INP_IPV4) && frame_data_offset +
+ sizeof(struct ip6_hdr) + sizeof(struct tcphdr) >
+ IFNET_KEEPALIVE_OFFLOAD_FRAME_DATA_SIZE) {
+ tcp_unlock(so, 1, 0);
+ continue;
+ }
+ /*
+ * There is no point in waking up the device for connections
+ * that are not established. Long lived connection are meant
+ * for processes that will sent and receive data
+ */
+ if (tp->t_state != TCPS_ESTABLISHED) {
+ tcp_unlock(so, 1, 0);
+ continue;
+ }
+ /*
+ * This inp has all the information that is needed to
+ * generate an offload frame.
+ */
+ frame = &frames_array[frame_index];
+ frame->type = IFNET_KEEPALIVE_OFFLOAD_FRAME_TCP;
+ frame->ether_type = (inp->inp_vflag & INP_IPV4) ?
+ IFNET_KEEPALIVE_OFFLOAD_FRAME_ETHERTYPE_IPV4 :
+ IFNET_KEEPALIVE_OFFLOAD_FRAME_ETHERTYPE_IPV6;
+ frame->interval = tp->t_keepidle > 0 ? tp->t_keepidle :
+ tcp_keepidle;
+ frame->keep_cnt = TCP_CONN_KEEPCNT(tp);
+ frame->keep_retry = TCP_CONN_KEEPINTVL(tp);
+ frame->local_port = ntohs(inp->inp_lport);
+ frame->remote_port = ntohs(inp->inp_fport);
+ frame->local_seq = tp->snd_nxt;
+ frame->remote_seq = tp->rcv_nxt;
+ if (inp->inp_vflag & INP_IPV4) {
+ frame->length = frame_data_offset +
+ sizeof(struct ip) + sizeof(struct tcphdr);
+ frame->reply_length = frame->length;
+
+ frame->addr_length = sizeof(struct in_addr);
+ bcopy(&inp->inp_laddr, frame->local_addr,
+ sizeof(struct in_addr));
+ bcopy(&inp->inp_faddr, frame->remote_addr,
+ sizeof(struct in_addr));
+ } else {
+ struct in6_addr *ip6;
+
+ frame->length = frame_data_offset +
+ sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
+ frame->reply_length = frame->length;
+
+ frame->addr_length = sizeof(struct in6_addr);
+ ip6 = (struct in6_addr *)(void *)frame->local_addr;
+ bcopy(&inp->in6p_laddr, ip6, sizeof(struct in6_addr));
+ if (IN6_IS_SCOPE_EMBED(ip6))
+ ip6->s6_addr16[1] = 0;
+
+ ip6 = (struct in6_addr *)(void *)frame->remote_addr;
+ bcopy(&inp->in6p_faddr, ip6, sizeof(struct in6_addr));
+ if (IN6_IS_SCOPE_EMBED(ip6))
+ ip6->s6_addr16[1] = 0;
+ }
+
+ /*
+ * First the probe
+ */
+ m = tcp_make_keepalive_frame(tp, ifp, TRUE);
+ if (m == NULL) {
+ tcp_unlock(so, 1, 0);
+ continue;
+ }
+ bcopy(m->m_data, frame->data + frame_data_offset,
+ m->m_len);
+ m_freem(m);
+
+ /*
+ * Now the response packet to incoming probes
+ */
+ m = tcp_make_keepalive_frame(tp, ifp, FALSE);
+ if (m == NULL) {
+ tcp_unlock(so, 1, 0);
+ continue;
+ }
+ bcopy(m->m_data, frame->reply_data + frame_data_offset,
+ m->m_len);
+ m_freem(m);
+
+ frame_index++;
+ tcp_unlock(so, 1, 0);
+ }
+ lck_rw_done(tcbinfo.ipi_lock);
+ *used_frames_count = frame_index;
+}
+
+errno_t
+tcp_notify_ack_id_valid(struct tcpcb *tp, struct socket *so,
+ u_int32_t notify_id)
+{
+ struct tcp_notify_ack_marker *elm;
+
+ if (so->so_snd.sb_cc == 0)
+ return (ENOBUFS);
+
+ SLIST_FOREACH(elm, &tp->t_notify_ack, notify_next) {
+ /* Duplicate id is not allowed */
+ if (elm->notify_id == notify_id)
+ return (EINVAL);
+ /* Duplicate position is not allowed */
+ if (elm->notify_snd_una == tp->snd_una + so->so_snd.sb_cc)
+ return (EINVAL);
+ }
+ return (0);
+}
+
+errno_t
+tcp_add_notify_ack_marker(struct tcpcb *tp, u_int32_t notify_id)
+{
+ struct tcp_notify_ack_marker *nm, *elm = NULL;
+ struct socket *so = tp->t_inpcb->inp_socket;
+
+ MALLOC(nm, struct tcp_notify_ack_marker *, sizeof (*nm),
+ M_TEMP, M_WAIT | M_ZERO);
+ if (nm == NULL)
+ return (ENOMEM);
+ nm->notify_id = notify_id;
+ nm->notify_snd_una = tp->snd_una + so->so_snd.sb_cc;
+
+ SLIST_FOREACH(elm, &tp->t_notify_ack, notify_next) {
+ if (SEQ_GT(nm->notify_snd_una, elm->notify_snd_una))
+ break;
+ }
+
+ if (elm == NULL) {
+ VERIFY(SLIST_EMPTY(&tp->t_notify_ack));
+ SLIST_INSERT_HEAD(&tp->t_notify_ack, nm, notify_next);
+ } else {
+ SLIST_INSERT_AFTER(elm, nm, notify_next);
+ }
+ tp->t_notify_ack_count++;
+ return (0);
+}
+
+void
+tcp_notify_ack_free(struct tcpcb *tp)
+{
+ struct tcp_notify_ack_marker *elm, *next;
+ if (SLIST_EMPTY(&tp->t_notify_ack))
+ return;
+
+ SLIST_FOREACH_SAFE(elm, &tp->t_notify_ack, notify_next, next) {
+ SLIST_REMOVE(&tp->t_notify_ack, elm, tcp_notify_ack_marker,
+ notify_next);
+ FREE(elm, M_TEMP);
+ }
+ SLIST_INIT(&tp->t_notify_ack);
+ tp->t_notify_ack_count = 0;
+}
+
+inline void
+tcp_notify_acknowledgement(struct tcpcb *tp, struct socket *so)
+{
+ struct tcp_notify_ack_marker *elm;
+
+ elm = SLIST_FIRST(&tp->t_notify_ack);
+ if (SEQ_GEQ(tp->snd_una, elm->notify_snd_una)) {
+ soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_NOTIFY_ACK);
+ }
+}
+
+void
+tcp_get_notify_ack_count(struct tcpcb *tp,
+ struct tcp_notify_ack_complete *retid)
+{
+ struct tcp_notify_ack_marker *elm;
+ size_t complete = 0;
+
+ SLIST_FOREACH(elm, &tp->t_notify_ack, notify_next) {
+ if (SEQ_GEQ(tp->snd_una, elm->notify_snd_una))
+ complete++;
+ else
+ break;
+ }
+ retid->notify_pending = tp->t_notify_ack_count - complete;
+ retid->notify_complete_count = min(TCP_MAX_NOTIFY_ACK, complete);
+}
+
+void
+tcp_get_notify_ack_ids(struct tcpcb *tp,
+ struct tcp_notify_ack_complete *retid)
+{
+ size_t i = 0;
+ struct tcp_notify_ack_marker *elm, *next;
+
+ SLIST_FOREACH_SAFE(elm, &tp->t_notify_ack, notify_next, next) {
+ if (i >= retid->notify_complete_count)
+ break;
+ if (SEQ_GEQ(tp->snd_una, elm->notify_snd_una)) {
+ retid->notify_complete_id[i++] = elm->notify_id;
+ SLIST_REMOVE(&tp->t_notify_ack, elm,
+ tcp_notify_ack_marker, notify_next);
+ FREE(elm, M_TEMP);
+ tp->t_notify_ack_count--;
+ } else {
+ break;
+ }
+ }
+}
+
+bool
+tcp_notify_ack_active(struct socket *so)
+{
+ if ((SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) &&
+ SOCK_TYPE(so) == SOCK_STREAM) {
+ struct tcpcb *tp = intotcpcb(sotoinpcb(so));
+
+ if (!SLIST_EMPTY(&tp->t_notify_ack)) {
+ struct tcp_notify_ack_marker *elm;
+ elm = SLIST_FIRST(&tp->t_notify_ack);
+ if (SEQ_GEQ(tp->snd_una, elm->notify_snd_una))
+ return (true);
+ }
+ }
+ return (false);
+}
+
+inline int32_t
+inp_get_sndbytes_allunsent(struct socket *so, u_int32_t th_ack)
+{
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp = intotcpcb(inp);
+
+ if ((so->so_snd.sb_flags & SB_SNDBYTE_CNT) &&
+ so->so_snd.sb_cc > 0) {
+ int32_t unsent, sent;
+ sent = tp->snd_max - th_ack;
+ if (tp->t_flags & TF_SENTFIN)
+ sent--;
+ unsent = so->so_snd.sb_cc - sent;
+ return (unsent);
+ }
+ return (0);
+}