+ if (tp->snd_cwnd > BYTES_ACKED(th, tp))
+ tp->snd_cwnd -= BYTES_ACKED(th, tp);
+ else
+ tp->snd_cwnd = 0;
+ tp->snd_cwnd += tp->t_maxseg;
+
+}
+
+/*
+ * Drop a random TCP connection that hasn't been serviced yet and
+ * is eligible for discard. There is a one in qlen chance that
+ * we will return a null, saying that there are no dropable
+ * requests. In this case, the protocol specific code should drop
+ * the new request. This insures fairness.
+ *
+ * The listening TCP socket "head" must be locked
+ */
+static int
+tcp_dropdropablreq(struct socket *head)
+{
+ struct socket *so, *sonext;
+ unsigned int i, j, qlen;
+ static u_int32_t rnd = 0;
+ static u_int64_t old_runtime;
+ static unsigned int cur_cnt, old_cnt;
+ u_int64_t now_sec;
+ struct inpcb *inp = NULL;
+ struct tcpcb *tp;
+
+ if ((head->so_options & SO_ACCEPTCONN) == 0)
+ return (0);
+
+ if (TAILQ_EMPTY(&head->so_incomp))
+ return (0);
+
+ /*
+ * Check if there is any socket in the incomp queue
+ * that is closed because of a reset from the peer and is
+ * waiting to be garbage collected. If so, pick that as
+ * the victim
+ */
+ TAILQ_FOREACH_SAFE(so, &head->so_incomp, so_list, sonext) {
+ inp = sotoinpcb(so);
+ tp = intotcpcb(inp);
+ if (tp != NULL && tp->t_state == TCPS_CLOSED &&
+ so->so_head != NULL &&
+ (so->so_state & (SS_INCOMP|SS_CANTSENDMORE|SS_CANTRCVMORE)) ==
+ (SS_INCOMP|SS_CANTSENDMORE|SS_CANTRCVMORE)) {
+ /*
+ * The listen socket is already locked but we
+ * can lock this socket here without lock ordering
+ * issues because it is in the incomp queue and
+ * is not visible to others.
+ */
+ if (lck_mtx_try_lock(&inp->inpcb_mtx)) {
+ so->so_usecount++;
+ goto found_victim;
+ } else {
+ continue;
+ }
+ }
+ }
+
+ so = TAILQ_FIRST(&head->so_incomp);
+
+ now_sec = net_uptime();
+ if ((i = (now_sec - old_runtime)) != 0) {
+ old_runtime = now_sec;
+ old_cnt = cur_cnt / i;
+ cur_cnt = 0;
+ }
+
+
+ qlen = head->so_incqlen;
+ if (rnd == 0)
+ rnd = RandomULong();
+
+ if (++cur_cnt > qlen || old_cnt > qlen) {
+ rnd = (314159 * rnd + 66329) & 0xffff;
+ j = ((qlen + 1) * rnd) >> 16;
+
+ while (j-- && so)
+ so = TAILQ_NEXT(so, so_list);
+ }
+ /* Find a connection that is not already closing (or being served) */
+ while (so) {
+ inp = (struct inpcb *)so->so_pcb;
+
+ sonext = TAILQ_NEXT(so, so_list);
+
+ if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0)
+ != WNT_STOPUSING) {
+ /*
+ * Avoid the issue of a socket being accepted
+ * by one input thread and being dropped by
+ * another input thread. If we can't get a hold
+ * on this mutex, then grab the next socket in
+ * line.
+ */
+ if (lck_mtx_try_lock(&inp->inpcb_mtx)) {
+ so->so_usecount++;
+ if ((so->so_usecount == 2) &&
+ (so->so_state & SS_INCOMP) &&
+ !(so->so_flags & SOF_INCOMP_INPROGRESS)) {
+ break;
+ } else {
+ /*
+ * don't use if being accepted or
+ * used in any other way
+ */
+ in_pcb_checkstate(inp, WNT_RELEASE, 1);
+ tcp_unlock(so, 1, 0);
+ }
+ } else {
+ /*
+ * do not try to lock the inp in
+ * in_pcb_checkstate because the lock
+ * is already held in some other thread.
+ * Only drop the inp_wntcnt reference.
+ */
+ in_pcb_checkstate(inp, WNT_RELEASE, 1);
+ }
+ }
+ so = sonext;
+
+ }
+ if (so == NULL) {
+ return (0);
+ }
+
+ /* Makes sure socket is still in the right state to be discarded */
+
+ if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
+ tcp_unlock(so, 1, 0);
+ return (0);
+ }
+
+found_victim:
+ if (so->so_usecount != 2 || !(so->so_state & SS_INCOMP)) {
+ /* do not discard: that socket is being accepted */
+ tcp_unlock(so, 1, 0);
+ return (0);
+ }
+
+ TAILQ_REMOVE(&head->so_incomp, so, so_list);
+ tcp_unlock(head, 0, 0);
+
+ lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
+ tp = sototcpcb(so);
+ so->so_flags |= SOF_OVERFLOW;
+ so->so_head = NULL;
+
+ tcp_close(tp);
+ if (inp->inp_wantcnt > 0 && inp->inp_wantcnt != WNT_STOPUSING) {
+ /*
+ * Some one has a wantcnt on this pcb. Since WNT_ACQUIRE
+ * doesn't require a lock, it could have happened while
+ * we are holding the lock. This pcb will have to
+ * be garbage collected later.
+ * Release the reference held for so_incomp queue
+ */
+ so->so_usecount--;
+ tcp_unlock(so, 1, 0);
+ } else {
+ /*
+ * Unlock this socket and leave the reference on.
+ * We need to acquire the pcbinfo lock in order to
+ * fully dispose it off
+ */
+ tcp_unlock(so, 0, 0);
+
+ lck_rw_lock_exclusive(tcbinfo.ipi_lock);
+
+ tcp_lock(so, 0, 0);
+ /* Release the reference held for so_incomp queue */
+ so->so_usecount--;
+
+ if (so->so_usecount != 1 ||
+ (inp->inp_wantcnt > 0 &&
+ inp->inp_wantcnt != WNT_STOPUSING)) {
+ /*
+ * There is an extra wantcount or usecount
+ * that must have been added when the socket
+ * was unlocked. This socket will have to be
+ * garbage collected later
+ */
+ tcp_unlock(so, 1, 0);
+ } else {
+
+ /* Drop the reference held for this function */
+ so->so_usecount--;
+
+ in_pcbdispose(inp);
+ }
+ lck_rw_done(tcbinfo.ipi_lock);
+ }
+ tcpstat.tcps_drops++;
+
+ tcp_lock(head, 0, 0);
+ head->so_incqlen--;
+ head->so_qlen--;
+ return(1);
+}
+
+/* Set background congestion control on a socket */
+void
+tcp_set_background_cc(struct socket *so)
+{
+ tcp_set_new_cc(so, TCP_CC_ALGO_BACKGROUND_INDEX);
+}
+
+/* Set foreground congestion control on a socket */
+void
+tcp_set_foreground_cc(struct socket *so)
+{
+ if (tcp_use_newreno)
+ tcp_set_new_cc(so, TCP_CC_ALGO_NEWRENO_INDEX);
+ else
+ tcp_set_new_cc(so, TCP_CC_ALGO_CUBIC_INDEX);
+}
+
+static void
+tcp_set_new_cc(struct socket *so, uint16_t cc_index)
+{
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp = intotcpcb(inp);
+ u_char old_cc_index = 0;
+ if (tp->tcp_cc_index != cc_index) {
+
+ old_cc_index = tp->tcp_cc_index;
+
+ if (CC_ALGO(tp)->cleanup != NULL)
+ CC_ALGO(tp)->cleanup(tp);
+ tp->tcp_cc_index = cc_index;
+
+ tcp_cc_allocate_state(tp);
+
+ if (CC_ALGO(tp)->switch_to != NULL)
+ CC_ALGO(tp)->switch_to(tp, old_cc_index);
+
+ tcp_ccdbg_trace(tp, NULL, TCP_CC_CHANGE_ALGO);
+ }
+}
+
+void
+tcp_set_recv_bg(struct socket *so)
+{
+ if (!IS_TCP_RECV_BG(so))
+ so->so_traffic_mgt_flags |= TRAFFIC_MGT_TCP_RECVBG;
+
+ /* Unset Large Receive Offload on background sockets */
+ so_set_lro(so, SO_TC_BK);
+}
+
+void
+tcp_clear_recv_bg(struct socket *so)
+{
+ if (IS_TCP_RECV_BG(so))
+ so->so_traffic_mgt_flags &= ~(TRAFFIC_MGT_TCP_RECVBG);
+
+ /*
+ * Set/unset use of Large Receive Offload depending on
+ * the traffic class
+ */
+ so_set_lro(so, so->so_traffic_class);
+}
+
+void
+inp_fc_unthrottle_tcp(struct inpcb *inp)
+{
+ struct tcpcb *tp = inp->inp_ppcb;
+ /*
+ * Back off the slow-start threshold and enter
+ * congestion avoidance phase
+ */
+ if (CC_ALGO(tp)->pre_fr != NULL)
+ CC_ALGO(tp)->pre_fr(tp);
+
+ tp->snd_cwnd = tp->snd_ssthresh;
+
+ /*
+ * Restart counting for ABC as we changed the
+ * congestion window just now.
+ */
+ tp->t_bytes_acked = 0;
+
+ /* Reset retransmit shift as we know that the reason
+ * for delay in sending a packet is due to flow
+ * control on the outgoing interface. There is no need
+ * to backoff retransmit timer.
+ */
+ tp->t_rxtshift = 0;
+
+ /*
+ * Start the output stream again. Since we are
+ * not retransmitting data, do not reset the
+ * retransmit timer or rtt calculation.
+ */
+ tcp_output(tp);
+}
+
+static int
+tcp_getstat SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+
+ int error;
+
+ proc_t caller = PROC_NULL;
+ proc_t caller_parent = PROC_NULL;
+ char command_name[MAXCOMLEN + 1] = "";
+ char parent_name[MAXCOMLEN + 1] = "";
+
+ if ((caller = proc_self()) != PROC_NULL) {
+ /* get process name */
+ strlcpy(command_name, caller->p_comm, sizeof(command_name));
+
+ /* get parent process name if possible */
+ if ((caller_parent = proc_find(caller->p_ppid)) != PROC_NULL) {
+ strlcpy(parent_name, caller_parent->p_comm,
+ sizeof(parent_name));
+ proc_rele(caller_parent);
+ }
+
+ if ((escape_str(command_name, strlen(command_name),
+ sizeof(command_name)) == 0) &&
+ (escape_str(parent_name, strlen(parent_name),
+ sizeof(parent_name)) == 0)) {
+ kern_asl_msg(LOG_DEBUG, "messagetracer",
+ 5,
+ "com.apple.message.domain",
+ "com.apple.kernel.tcpstat", /* 1 */
+ "com.apple.message.signature",
+ "tcpstat", /* 2 */
+ "com.apple.message.signature2", command_name, /* 3 */
+ "com.apple.message.signature3", parent_name, /* 4 */
+ "com.apple.message.summarize", "YES", /* 5 */
+ NULL);
+ }
+ }
+ if (caller != PROC_NULL)
+ proc_rele(caller);
+
+ if (req->oldptr == 0) {
+ req->oldlen= (size_t)sizeof(struct tcpstat);
+ }
+
+ error = SYSCTL_OUT(req, &tcpstat, MIN(sizeof (tcpstat), req->oldlen));
+
+ return (error);
+
+}
+
+/*
+ * Checksum extended TCP header and data.
+ */
+int
+tcp_input_checksum(int af, struct mbuf *m, struct tcphdr *th, int off, int tlen)
+{
+ struct ifnet *ifp = m->m_pkthdr.rcvif;
+
+ switch (af) {
+ case AF_INET: {
+ struct ip *ip = mtod(m, struct ip *);
+ struct ipovly *ipov = (struct ipovly *)ip;
+
+ if (m->m_pkthdr.pkt_flags & PKTF_SW_LRO_DID_CSUM)
+ return (0);
+
+ if ((hwcksum_rx || (ifp->if_flags & IFF_LOOPBACK) ||
+ (m->m_pkthdr.pkt_flags & PKTF_LOOP)) &&
+ (m->m_pkthdr.csum_flags & CSUM_DATA_VALID)) {
+ if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
+ th->th_sum = m->m_pkthdr.csum_rx_val;
+ } else {
+ uint16_t sum = m->m_pkthdr.csum_rx_val;
+ uint16_t start = m->m_pkthdr.csum_rx_start;
+
+ /*
+ * Perform 1's complement adjustment of octets
+ * that got included/excluded in the hardware-
+ * calculated checksum value. Ignore cases
+ * where the value includes or excludes the IP
+ * header span, as the sum for those octets
+ * would already be 0xffff and thus no-op.
+ */
+ if ((m->m_pkthdr.csum_flags & CSUM_PARTIAL) &&
+ start != 0 && (off - start) != off) {
+#if BYTE_ORDER != BIG_ENDIAN
+ if (start < off) {
+ HTONS(ip->ip_len);
+ HTONS(ip->ip_off);
+ }
+#endif
+ /* callee folds in sum */
+ sum = m_adj_sum16(m, start, off, sum);
+#if BYTE_ORDER != BIG_ENDIAN
+ if (start < off) {
+ NTOHS(ip->ip_off);
+ NTOHS(ip->ip_len);
+ }
+#endif
+ }
+
+ /* callee folds in sum */
+ th->th_sum = in_pseudo(ip->ip_src.s_addr,
+ ip->ip_dst.s_addr,
+ sum + htonl(tlen + IPPROTO_TCP));
+ }
+ th->th_sum ^= 0xffff;
+ } else {
+ uint16_t ip_sum;
+ int len;
+ char b[9];
+
+ bcopy(ipov->ih_x1, b, sizeof (ipov->ih_x1));
+ bzero(ipov->ih_x1, sizeof (ipov->ih_x1));
+ ip_sum = ipov->ih_len;
+ ipov->ih_len = (u_short)tlen;
+#if BYTE_ORDER != BIG_ENDIAN
+ HTONS(ipov->ih_len);
+#endif
+ len = sizeof (struct ip) + tlen;
+ th->th_sum = in_cksum(m, len);
+ bcopy(b, ipov->ih_x1, sizeof (ipov->ih_x1));
+ ipov->ih_len = ip_sum;
+
+ tcp_in_cksum_stats(len);
+ }
+ break;
+ }
+#if INET6
+ case AF_INET6: {
+ struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
+
+ if (m->m_pkthdr.pkt_flags & PKTF_SW_LRO_DID_CSUM)
+ return (0);
+
+ if ((hwcksum_rx || (ifp->if_flags & IFF_LOOPBACK) ||
+ (m->m_pkthdr.pkt_flags & PKTF_LOOP)) &&
+ (m->m_pkthdr.csum_flags & CSUM_DATA_VALID)) {
+ if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
+ th->th_sum = m->m_pkthdr.csum_rx_val;
+ } else {
+ uint16_t sum = m->m_pkthdr.csum_rx_val;
+ uint16_t start = m->m_pkthdr.csum_rx_start;
+
+ /*
+ * Perform 1's complement adjustment of octets
+ * that got included/excluded in the hardware-
+ * calculated checksum value.
+ */
+ if ((m->m_pkthdr.csum_flags & CSUM_PARTIAL) &&
+ start != off) {
+ uint16_t s, d;
+
+ if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) {
+ s = ip6->ip6_src.s6_addr16[1];
+ ip6->ip6_src.s6_addr16[1] = 0 ;
+ }
+ if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) {
+ d = ip6->ip6_dst.s6_addr16[1];
+ ip6->ip6_dst.s6_addr16[1] = 0;
+ }
+
+ /* callee folds in sum */
+ sum = m_adj_sum16(m, start, off, sum);
+
+ if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src))
+ ip6->ip6_src.s6_addr16[1] = s;
+ if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst))
+ ip6->ip6_dst.s6_addr16[1] = d;
+ }
+
+ th->th_sum = in6_pseudo(
+ &ip6->ip6_src, &ip6->ip6_dst,
+ sum + htonl(tlen + IPPROTO_TCP));
+ }
+ th->th_sum ^= 0xffff;
+ } else {
+ tcp_in6_cksum_stats(tlen);
+ th->th_sum = in6_cksum(m, IPPROTO_TCP, off, tlen);
+ }
+ break;
+ }
+#endif /* INET6 */
+ default:
+ VERIFY(0);
+ /* NOTREACHED */
+ }
+
+ if (th->th_sum != 0) {
+ tcpstat.tcps_rcvbadsum++;
+ IF_TCP_STATINC(ifp, badformat);
+ return (-1);