/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
struct inpcbinfo tcbinfo;
static void tcp_dooptions(struct tcpcb *,
- u_char *, int, struct tcphdr *, struct tcpopt *);
+ u_char *, int, struct tcphdr *, struct tcpopt *, unsigned int);
static void tcp_pulloutofband(struct socket *,
struct tcphdr *, struct mbuf *, int);
static int tcp_reass(struct tcpcb *, struct tcphdr *, int *,
#endif
struct m_tag *fwd_tag;
u_char ip_ecn = IPTOS_ECN_NOTECT;
+ unsigned int ifscope;
+
+ /*
+ * Record the interface where this segment arrived on; this does not
+ * affect normal data output (for non-detached TCP) as it provides a
+ * hint about which route and interface to use for sending in the
+ * absence of a PCB, when scoped routing (and thus source interface
+ * selection) are enabled.
+ */
+ if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.rcvif != NULL)
+ ifscope = m->m_pkthdr.rcvif->if_index;
+ else
+ ifscope = IFSCOPE_NONE;
/* Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */
fwd_tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, NULL);
ip->ip_dst, th->th_dport, 1, m->m_pkthdr.rcvif);
}
+ /*
+ * Use the interface scope information from the PCB for outbound
+ * segments. If the PCB isn't present and if scoped routing is
+ * enabled, tcp_respond will use the scope of the interface where
+ * the segment arrived on.
+ */
+ if (inp != NULL && (inp->inp_flags & INP_BOUND_IF))
+ ifscope = inp->inp_boundif;
#if IPSEC
if (ipsec_bypass == 0) {
#if INET6
struct inpcb *oinp = sotoinpcb(so);
#endif /* INET6 */
int ogencnt = so->so_gencnt;
+ unsigned int head_ifscope;
+
+ /* Get listener's bound-to-interface, if any */
+ head_ifscope = (inp->inp_flags & INP_BOUND_IF) ?
+ inp->inp_boundif : IFSCOPE_NONE;
#if !IPSEC
/*
*/
dropsocket++;
inp = (struct inpcb *)so->so_pcb;
+
+ /*
+ * Inherit INP_BOUND_IF from listener; testing if
+ * head_ifscope is non-zero is sufficient, since it
+ * can only be set to a non-zero value earlier if
+ * the listener has such a flag set.
+ */
+#if INET6
+ if (head_ifscope != IFSCOPE_NONE && !isipv6) {
+#else
+ if (head_ifscope != IFSCOPE_NONE) {
+#endif /* INET6 */
+ inp->inp_flags |= INP_BOUND_IF;
+ inp->inp_boundif = head_ifscope;
+ }
#if INET6
if (isipv6)
inp->in6p_laddr = ip6->ip6_dst;
* else do it below (after getting remote address).
*/
if (tp->t_state != TCPS_LISTEN && optp)
- tcp_dooptions(tp, optp, optlen, th, &to);
+ tcp_dooptions(tp, optp, optlen, th, &to, ifscope);
if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
if (to.to_flags & TOF_SCALE) {
tp->ts_recent_age = tcp_now;
}
if (to.to_flags & TOF_MSS)
- tcp_mss(tp, to.to_mss);
+ tcp_mss(tp, to.to_mss, ifscope);
if (tp->sack_enable) {
if (!(to.to_flags & TOF_SACK))
tp->sack_enable = 0;
tp->ts_recent = to.to_tsval;
}
+ /* Force acknowledgment if we received a FIN */
+
+ if (thflags & TH_FIN)
+ tp->t_flags |= TF_ACKNOW;
+
if (tlen == 0) {
if (SEQ_GT(th->th_ack, tp->snd_una) &&
SEQ_LEQ(th->th_ack, tp->snd_max) &&
FREE(sin, M_SONAME);
}
- tcp_dooptions(tp, optp, optlen, th, &to);
+ tcp_dooptions(tp, optp, optlen, th, &to, ifscope);
if (tp->sack_enable) {
if (!(to.to_flags & TOF_SACK))
/* ECN-setup SYN */
tp->ecn_flags |= (TE_SETUPRECEIVED | TE_SENDIPECT);
}
+#ifdef IFEF_NOWINDOWSCALE
+ if (m->m_pkthdr.rcvif != NULL &&
+ (m->m_pkthdr.rcvif->if_eflags & IFEF_NOWINDOWSCALE) != 0)
+ {
+ // Timestamps are not enabled on this interface
+ tp->t_flags &= ~(TF_REQ_SCALE);
+ }
+#endif
goto trimthenstep6;
}
tp->t_dupacks = 0;
break;
}
+
+ if (!IN_FASTRECOVERY(tp)) {
+ /*
+ * We were not in fast recovery. Reset the duplicate ack
+ * counter.
+ */
+ tp->t_dupacks = 0;
+ }
/*
* If the congestion window was inflated to account
* for the other side's cached packets, retract it.
*/
- if (tcp_do_newreno || tp->sack_enable) {
- if (IN_FASTRECOVERY(tp)) {
+ else {
+ if (tcp_do_newreno || tp->sack_enable) {
if (SEQ_LT(th->th_ack, tp->snd_recover)) {
if (tp->sack_enable)
tcp_sack_partialack(tp, th);
else
- tcp_newreno_partial_ack(tp, th);
- } else {
- /*
- * Out of fast recovery.
- * Window inflation should have left us
- * with approximately snd_ssthresh
- * outstanding data.
- * But in case we would be inclined to
- * send a burst, better to do it via
- * the slow start mechanism.
- */
- if (SEQ_GT(th->th_ack +
- tp->snd_ssthresh,
- tp->snd_max))
- tp->snd_cwnd = tp->snd_max -
- th->th_ack +
- tp->t_maxseg;
- else
- tp->snd_cwnd = tp->snd_ssthresh;
+ tcp_newreno_partial_ack(tp, th);
+ }
+ else {
+ if (tcp_do_newreno) {
+ long ss = tp->snd_max - th->th_ack;
+
+ /*
+ * Complete ack. Inflate the congestion window to
+ * ssthresh and exit fast recovery.
+ *
+ * Window inflation should have left us with approx.
+ * snd_ssthresh outstanding data. But in case we
+ * would be inclined to send a burst, better to do
+ * it via the slow start mechanism.
+ */
+ if (ss < tp->snd_ssthresh)
+ tp->snd_cwnd = ss + tp->t_maxseg;
+ else
+ tp->snd_cwnd = tp->snd_ssthresh;
+ }
+ else {
+ /*
+ * Clamp the congestion window to the crossover point
+ * and exit fast recovery.
+ */
+ if (tp->snd_cwnd > tp->snd_ssthresh)
+ tp->snd_cwnd = tp->snd_ssthresh;
+ }
+
+ EXIT_FASTRECOVERY(tp);
+ tp->t_dupacks = 0;
+ tp->t_bytes_acked = 0;
}
}
- } else {
- if (tp->t_dupacks >= tcprexmtthresh &&
- tp->snd_cwnd > tp->snd_ssthresh)
- tp->snd_cwnd = tp->snd_ssthresh;
+ else {
+ /*
+ * Clamp the congestion window to the crossover point
+ * and exit fast recovery in non-newreno and non-SACK case.
+ */
+ if (tp->snd_cwnd > tp->snd_ssthresh)
+ tp->snd_cwnd = tp->snd_ssthresh;
+ EXIT_FASTRECOVERY(tp);
+ tp->t_dupacks = 0;
+ tp->t_bytes_acked = 0;
+ }
}
- tp->t_dupacks = 0;
- tp->t_bytes_acked = 0;
+
+
/*
* If we reach this point, ACK is not a duplicate,
* i.e., it ACKs something we sent.
soisdisconnected(so);
}
tp->t_state = TCPS_FIN_WAIT_2;
- goto drop;
+ /* fall through and make sure we also recognize data ACKed with the FIN */
}
+ tp->t_flags |= TF_ACKNOW;
break;
/*
add_to_time_wait(tp);
soisdisconnected(so);
}
+ tp->t_flags |= TF_ACKNOW;
break;
/*
* case PRU_RCVD). If a FIN has already been received on this
* connection then we just ignore the text.
*/
- if ((tlen || (thflags&TH_FIN)) &&
+ if ((tlen || (thflags & TH_FIN)) &&
TCPS_HAVERCVDFIN(tp->t_state) == 0) {
tcp_seq save_start = th->th_seq;
tcp_seq save_end = th->th_seq + tlen;
if (thflags & TH_ACK)
/* mtod() below is safe as long as hdr dropping is delayed */
tcp_respond(tp, mtod(m, void *), th, m, (tcp_seq)0, th->th_ack,
- TH_RST, m->m_pkthdr.rcvif);
+ TH_RST, ifscope);
else {
if (thflags & TH_SYN)
tlen++;
/* mtod() below is safe as long as hdr dropping is delayed */
tcp_respond(tp, mtod(m, void *), th, m, th->th_seq+tlen,
- (tcp_seq)0, TH_RST|TH_ACK, m->m_pkthdr.rcvif);
+ (tcp_seq)0, TH_RST|TH_ACK, ifscope);
}
/* destroy temporarily created socket */
if (dropsocket) {
}
static void
-tcp_dooptions(tp, cp, cnt, th, to)
+tcp_dooptions(tp, cp, cnt, th, to, input_ifscope)
/*
* Parse TCP options and place in tcpopt.
*/
int cnt;
struct tcphdr *th;
struct tcpopt *to;
+ unsigned int input_ifscope;
{
u_short mss = 0;
int opt, optlen;
}
}
if (th->th_flags & TH_SYN)
- tcp_mss(tp, mss); /* sets t_maxseg */
+ tcp_mss(tp, mss, input_ifscope); /* sets t_maxseg */
}
/*
*
*/
void
-tcp_mss(tp, offer)
+tcp_mss(tp, offer, input_ifscope)
struct tcpcb *tp;
int offer;
+ unsigned int input_ifscope;
{
register struct rtentry *rt;
struct ifnet *ifp;
else
#endif /* INET6 */
{
- rt = tcp_rtlookup(inp);
+ rt = tcp_rtlookup(inp, input_ifscope);
if (rt && (rt->rt_gateway->sa_family == AF_LINK ||
rt->rt_ifp->if_flags & IFF_LOOPBACK))
isnetlocal = TRUE;
rt = tcp_rtlookup6(tp->t_inpcb);
else
#endif /* INET6 */
- rt = tcp_rtlookup(tp->t_inpcb);
+ rt = tcp_rtlookup(tp->t_inpcb, IFSCOPE_NONE);
if (rt == NULL) {
lck_mtx_unlock(rt_mtx);
return (