/*
- * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <sys/kernel.h>
#include <sys/sysctl.h>
#include <sys/mbuf.h>
-#if INET6
#include <sys/domain.h>
-#endif /* INET6 */
-#if !CONFIG_EMBEDDED
+#if XNU_TARGET_OS_OSX
#include <sys/kasl.h>
-#endif
+#endif /* XNU_TARGET_OS_OSX */
+#include <sys/priv.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/protosw.h>
#include <net/route.h>
#include <net/ntstat.h>
#include <net/content_filter.h>
+#include <net/multi_layer_pkt_log.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
-#if INET6
#include <netinet/ip6.h>
-#endif
#include <netinet/in_pcb.h>
-#if INET6
#include <netinet6/in6_pcb.h>
-#endif
#include <netinet/in_var.h>
#include <netinet/ip_var.h>
-#if INET6
#include <netinet6/ip6_var.h>
-#endif
#include <netinet/tcp.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_var.h>
#include <netinet/tcpip.h>
#include <netinet/tcp_cc.h>
+#include <netinet/tcp_log.h>
#include <mach/sdt.h>
#if TCPDEBUG
#include <netinet/tcp_debug.h>
int tcp_sysctl_info(struct sysctl_oid *, void *, int, struct sysctl_req *);
static void tcp_connection_fill_info(struct tcpcb *tp,
struct tcp_connection_info *tci);
+static int tcp_get_mpkl_send_info(struct mbuf *, struct so_mpkl_send_info *);
/*
* TCP protocol interface to socket abstraction.
*/
-extern char *tcpstates[]; /* XXX ??? */
-
static int tcp_attach(struct socket *, struct proc *);
static int tcp_connect(struct tcpcb *, struct sockaddr *, struct proc *);
-#if INET6
static int tcp6_connect(struct tcpcb *, struct sockaddr *, struct proc *);
static int tcp6_usr_connect(struct socket *, struct sockaddr *,
struct proc *);
-#endif /* INET6 */
static struct tcpcb *tcp_disconnect(struct tcpcb *);
static struct tcpcb *tcp_usrclosed(struct tcpcb *);
extern void tcp_sbrcv_trim(struct tcpcb *tp, struct sockbuf *sb);
}
if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
- so->so_linger = TCP_LINGERTIME * hz;
+ so->so_linger = (short)(TCP_LINGERTIME * hz);
}
tp = sototcpcb(so);
out:
}
#if NECP
-#define COMMON_START() TCPDEBUG0; \
+#define COMMON_START_ALLOW_FLOW_DIVERT(allow) TCPDEBUG0; \
do { \
if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) \
return (EINVAL); \
- if (necp_socket_should_use_flow_divert(inp)) \
+ if (!(allow) && necp_socket_should_use_flow_divert(inp)) \
return (EPROTOTYPE); \
tp = intotcpcb(inp); \
TCPDEBUG1(); \
calculate_tcp_clock(); \
} while (0)
#else /* NECP */
-#define COMMON_START() TCPDEBUG0; \
+#define COMMON_START_ALLOW_FLOW_DIVERT(allow) TCPDEBUG0; \
do { \
if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) \
return (EINVAL); \
} while (0)
#endif /* !NECP */
+#define COMMON_START() COMMON_START_ALLOW_FLOW_DIVERT(false)
#define COMMON_END(req) out: TCPDEBUG2(req); return error; goto out
struct tcpcb *tp;
struct sockaddr_in *sinp;
- COMMON_START();
+ COMMON_START_ALLOW_FLOW_DIVERT(true);
if (nam->sa_family != 0 && nam->sa_family != AF_INET) {
error = EAFNOSUPPORT;
COMMON_END(PRU_BIND);
}
-#if INET6
static int
tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
{
struct tcpcb *tp;
struct sockaddr_in6 *sin6p;
- COMMON_START();
+ COMMON_START_ALLOW_FLOW_DIVERT(true);
if (nam->sa_family != 0 && nam->sa_family != AF_INET6) {
error = EAFNOSUPPORT;
}
COMMON_END(PRU_BIND);
}
-#endif /* INET6 */
/*
* Prepare to accept connections.
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp;
- COMMON_START();
+ COMMON_START_ALLOW_FLOW_DIVERT(true);
if (inp->inp_lport == 0) {
error = in_pcbbind(inp, NULL, p);
}
if (error == 0) {
tp->t_state = TCPS_LISTEN;
}
+ TCP_LOG_LISTEN(tp, error);
COMMON_END(PRU_LISTEN);
}
-#if INET6
static int
tcp6_usr_listen(struct socket *so, struct proc *p)
{
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp;
- COMMON_START();
+ COMMON_START_ALLOW_FLOW_DIVERT(true);
if (inp->inp_lport == 0) {
inp->inp_vflag &= ~INP_IPV4;
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
if (error == 0) {
tp->t_state = TCPS_LISTEN;
}
+ TCP_LOG_LISTEN(tp, error);
COMMON_END(PRU_LISTEN);
}
-#endif /* INET6 */
static int
tcp_connect_complete(struct socket *so)
/* TFO delays the tcp_output until later, when the app calls write() */
if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
- if (!necp_socket_is_allowed_to_send_recv(sotoinpcb(so), NULL, NULL, NULL)) {
+ if (!necp_socket_is_allowed_to_send_recv(sotoinpcb(so), NULL, 0, NULL, NULL, NULL, NULL)) {
+ TCP_LOG_DROP_NECP(NULL, NULL, tp, true);
return EHOSTUNREACH;
}
}
}
#if NECP
-#if FLOW_DIVERT
- else if (necp_socket_should_use_flow_divert(inp)) {
- uint32_t fd_ctl_unit = necp_socket_get_flow_divert_control_unit(inp);
- if (fd_ctl_unit > 0) {
- error = flow_divert_pcb_init(so, fd_ctl_unit);
- if (error == 0) {
- error = flow_divert_connect_out(so, nam, p);
- }
- } else {
- error = ENETDOWN;
- }
-
- return error;
- }
-#endif /* FLOW_DIVERT */
#if CONTENT_FILTER
- error = cfil_sock_attach(so);
+ error = cfil_sock_attach(so, NULL, nam, CFS_CONNECTION_DIR_OUT);
if (error != 0) {
return error;
}
#endif /* CONTENT_FILTER */
+#if FLOW_DIVERT
+ if (necp_socket_should_use_flow_divert(inp)) {
+ error = flow_divert_pcb_init(so);
+ if (error == 0) {
+ error = flow_divert_connect_out(so, nam, p);
+ }
+ return error;
+ }
+#endif /* FLOW_DIVERT */
#endif /* NECP */
tp = intotcpcb(inp);
TCPDEBUG1();
}
if ((error = tcp_connect(tp, nam, p)) != 0) {
+ TCP_LOG_CONNECT(tp, true, error);
goto out;
}
error = tcp_connect_complete(so);
+ TCP_LOG_CONNECT(tp, true, error);
+
COMMON_END(PRU_CONNECT);
}
case AF_INET:
error = tcp_usr_connect(so, dst, p);
break;
-#if INET6
case AF_INET6:
error = tcp6_usr_connect(so, dst, p);
break;
-#endif /* INET6 */
default:
VERIFY(0);
/* NOTREACHED */
pcid, flags, arg, arglen, uio, bytes_written);
}
-#if INET6
static int
tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
{
}
}
#if NECP
-#if FLOW_DIVERT
- else if (necp_socket_should_use_flow_divert(inp)) {
- uint32_t fd_ctl_unit = necp_socket_get_flow_divert_control_unit(inp);
- if (fd_ctl_unit > 0) {
- error = flow_divert_pcb_init(so, fd_ctl_unit);
- if (error == 0) {
- error = flow_divert_connect_out(so, nam, p);
- }
- } else {
- error = ENETDOWN;
- }
-
- return error;
- }
-#endif /* FLOW_DIVERT */
#if CONTENT_FILTER
- error = cfil_sock_attach(so);
+ error = cfil_sock_attach(so, NULL, nam, CFS_CONNECTION_DIR_OUT);
if (error != 0) {
return error;
}
#endif /* CONTENT_FILTER */
+#if FLOW_DIVERT
+ if (necp_socket_should_use_flow_divert(inp)) {
+ error = flow_divert_pcb_init(so);
+ if (error == 0) {
+ error = flow_divert_connect_out(so, nam, p);
+ }
+ return error;
+ }
+#endif /* FLOW_DIVERT */
#endif /* NECP */
tp = intotcpcb(inp);
struct sockaddr_in sin;
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
- return EINVAL;
+ error = EINVAL;
+ goto out;
}
in6_sin6_2_sin(&sin, sin6p);
+ /*
+ * Must disallow TCP ``connections'' to multicast addresses.
+ */
+ if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
+ error = EAFNOSUPPORT;
+ goto out;
+ }
inp->inp_vflag |= INP_IPV4;
inp->inp_vflag &= ~INP_IPV6;
if ((error = tcp_connect(tp, (struct sockaddr *)&sin, p)) != 0) {
+ TCP_LOG_CONNECT(tp, true, error);
goto out;
}
inp->inp_vflag &= ~INP_IPV4;
inp->inp_vflag |= INP_IPV6;
if ((error = tcp6_connect(tp, nam, p)) != 0) {
+ TCP_LOG_CONNECT(tp, true, error);
goto out;
}
error = tcp_connect_complete(so);
+
+ TCP_LOG_CONNECT(tp, true, error);
+
COMMON_END(PRU_CONNECT);
}
return tcp_usr_connectx_common(so, AF_INET6, src, dst, p, ifscope, aid,
pcid, flags, arg, arglen, uio, bytes_written);
}
-#endif /* INET6 */
/*
* Initiate disconnect from peer.
else if (necp_socket_should_use_flow_divert(inp)) {
return EPROTOTYPE;
}
-#if CONTENT_FILTER
- error = cfil_sock_attach(so);
- if (error != 0) {
- return error;
- }
-#endif /* CONTENT_FILTER */
+
#endif /* NECP */
tp = intotcpcb(inp);
TCPDEBUG1();
+ TCP_LOG_ACCEPT(tp, 0);
+
calculate_tcp_clock();
COMMON_END(PRU_ACCEPT);
}
-#if INET6
static int
tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
{
else if (necp_socket_should_use_flow_divert(inp)) {
return EPROTOTYPE;
}
-#if CONTENT_FILTER
- error = cfil_sock_attach(so);
- if (error != 0) {
- return error;
- }
-#endif /* CONTENT_FILTER */
+
#endif /* NECP */
tp = intotcpcb(inp);
TCPDEBUG1();
+ TCP_LOG_ACCEPT(tp, 0);
+
calculate_tcp_clock();
in6_mapped_peeraddr(so, nam);
COMMON_END(PRU_ACCEPT);
}
-#endif /* INET6 */
/*
* Mark the connection as being incapable of further output.
* After a receive, possibly send window update to peer.
*/
static int
-tcp_usr_rcvd(struct socket *so, __unused int flags)
+tcp_usr_rcvd(struct socket *so, int flags)
{
int error = 0;
struct inpcb *inp = sotoinpcb(so);
}
tcp_sbrcv_trim(tp, &so->so_rcv);
+ if (flags & MSG_WAITALL) {
+ tp->t_flags |= TF_ACKNOW;
+ }
+
/*
* This tcp_output is solely there to trigger window-updates.
* However, we really do not want these window-updates while we
int error = 0;
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp;
- uint32_t msgpri = MSG_PRI_DEFAULT;
-#if INET6
+ uint32_t mpkl_len = 0; /* length of mbuf chain */
+ uint32_t mpkl_seq; /* sequence number where new data is added */
+ struct so_mpkl_send_info mpkl_send_info = {};
+
int isipv6;
-#endif
TCPDEBUG0;
if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD
TCPDEBUG1();
goto out;
}
-#if INET6
isipv6 = nam && nam->sa_family == AF_INET6;
-#endif /* INET6 */
tp = intotcpcb(inp);
TCPDEBUG1();
calculate_tcp_clock();
+ if (net_mpklog_enabled) {
+ mpkl_seq = tp->snd_una + so->so_snd.sb_cc;
+ if (m) {
+ mpkl_len = m_length(m);
+ }
+ if (so->so_flags1 & SOF1_MPKL_SEND_INFO) {
+ uuid_copy(mpkl_send_info.mpkl_uuid, so->so_mpkl_send_uuid);
+ mpkl_send_info.mpkl_proto = so->so_mpkl_send_proto;
+ }
+ }
+
if (control != NULL) {
- if (so->so_flags & SOF_ENABLE_MSGS) {
- /* Get the msg priority from control mbufs */
- error = tcp_get_msg_priority(control, &msgpri);
- if (error) {
+ if (control->m_len > 0 && net_mpklog_enabled) {
+ error = tcp_get_mpkl_send_info(control, &mpkl_send_info);
+ /*
+ * Intepretation of the returned code:
+ * 0: client wants us to use value passed in SCM_MPKL_SEND_INFO
+ * 1: SCM_MPKL_SEND_INFO was not present
+ * other: failure
+ */
+ if (error != 0 && error != ENOMSG) {
m_freem(control);
if (m != NULL) {
m_freem(m);
m = NULL;
goto out;
}
- m_freem(control);
- control = NULL;
- } else if (control->m_len) {
- /*
- * if not unordered, TCP should not have
- * control mbufs
- */
- m_freem(control);
- if (m != NULL) {
- m_freem(m);
- }
- control = NULL;
- m = NULL;
- error = EINVAL;
- goto out;
}
- }
-
- if (so->so_flags & SOF_ENABLE_MSGS) {
- VERIFY(m->m_flags & M_PKTHDR);
- m->m_pkthdr.msg_pri = msgpri;
+ /*
+ * Silently drop unsupported ancillary data messages
+ */
+ m_freem(control);
+ control = NULL;
}
/* MPTCP sublow socket buffers must not be compressed */
(so->so_snd.sb_flags & SB_NOCOMPRESS));
if (!(flags & PRUS_OOB) || (so->so_flags1 & SOF1_PRECONNECT_DATA)) {
- /* Call msg send if message delivery is enabled */
- if (so->so_flags & SOF_ENABLE_MSGS) {
- sbappendmsg_snd(&so->so_snd, m);
- } else {
- sbappendstream(&so->so_snd, m);
- }
+ sbappendstream(&so->so_snd, m);
if (nam && tp->t_state < TCPS_SYN_SENT) {
/*
* initialize maxseg/maxopd using peer's cached
* MSS.
*/
-#if INET6
if (isipv6) {
error = tcp6_connect(tp, nam, p);
- } else
-#endif /* INET6 */
- error = tcp_connect(tp, nam, p);
+ } else {
+ error = tcp_connect(tp, nam, p);
+ }
if (error) {
+ TCP_LOG_CONNECT(tp, true, error);
goto out;
}
tp->snd_wnd = TTCP_CLIENT_SND_WND;
tp->max_sndwnd = tp->snd_wnd;
tcp_mss(tp, -1, IFSCOPE_NONE);
+
+ TCP_LOG_CONNECT(tp, true, error);
+
+ /* The sequence number of the data is past the SYN */
+ mpkl_seq = tp->iss + 1;
}
if (flags & PRUS_EOF) {
* initialize maxseg/maxopd using peer's cached
* MSS.
*/
-#if INET6
if (isipv6) {
error = tcp6_connect(tp, nam, p);
- } else
-#endif /* INET6 */
- error = tcp_connect(tp, nam, p);
+ } else {
+ error = tcp_connect(tp, nam, p);
+ }
if (error) {
+ TCP_LOG_CONNECT(tp, true, error);
goto out;
}
tp->snd_wnd = TTCP_CLIENT_SND_WND;
tp->max_sndwnd = tp->snd_wnd;
tcp_mss(tp, -1, IFSCOPE_NONE);
+
+ TCP_LOG_CONNECT(tp, true, error);
}
tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
tp->t_flagsext |= TF_FORCE;
tp->t_flagsext &= ~TF_FORCE;
}
+ if (net_mpklog_enabled && (inp = tp->t_inpcb) != NULL &&
+ ((inp->inp_last_outifp != NULL &&
+ (inp->inp_last_outifp->if_xflags & IFXF_MPK_LOG)) ||
+ (inp->inp_boundifp != NULL &&
+ (inp->inp_boundifp->if_xflags & IFXF_MPK_LOG)))) {
+ MPKL_TCP_SEND(tcp_mpkl_log_object,
+ mpkl_send_info.mpkl_proto, mpkl_send_info.mpkl_uuid,
+ ntohs(inp->inp_lport), ntohs(inp->inp_fport),
+ mpkl_seq, mpkl_len,
+ so->last_pid, so->so_log_seqn++);
+ }
/*
* We wait for the socket to successfully connect before returning.
.pru_preconnect = tcp_usr_preconnect,
};
-#if INET6
struct pr_usrreqs tcp6_usrreqs = {
.pru_abort = tcp_usr_abort,
.pru_accept = tcp6_usr_accept,
.pru_soreceive = soreceive,
.pru_preconnect = tcp_usr_preconnect,
};
-#endif /* INET6 */
/*
* Common subroutine to open a TCP connection to remote host specified
inp->inp_flowhash = inp_calc_flowhash(inp);
}
- tcp_set_max_rwinscale(tp, so, outif);
+ tcp_set_max_rwinscale(tp, so);
soisconnecting(so);
tcpstat.tcps_connattempt++;
tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, TCP_CONN_KEEPINIT(tp));
tp->iss = tcp_new_isn(tp);
tcp_sendseqinit(tp);
+ tp->t_connect_time = tcp_now;
if (nstat_collect) {
nstat_route_connect_attempt(inp->inp_route.ro_rt);
}
+ tcp_add_fsw_flow(tp, outif);
+
done:
if (outif != NULL) {
ifnet_release(outif);
return error;
}
-#if INET6
static int
tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct proc *p)
{
if (inp->inp_flow == 0 && inp->in6p_flags & IN6P_AUTOFLOWLABEL) {
inp->inp_flow &= ~IPV6_FLOWLABEL_MASK;
inp->inp_flow |=
- (htonl(inp->inp_flowhash) & IPV6_FLOWLABEL_MASK);
+ (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
}
- tcp_set_max_rwinscale(tp, so, outif);
+ tcp_set_max_rwinscale(tp, so);
soisconnecting(so);
tcpstat.tcps_connattempt++;
TCP_CONN_KEEPINIT(tp));
tp->iss = tcp_new_isn(tp);
tcp_sendseqinit(tp);
+ tp->t_connect_time = tcp_now;
if (nstat_collect) {
nstat_route_connect_attempt(inp->inp_route.ro_rt);
}
+ tcp_add_fsw_flow(tp, outif);
+
done:
if (outif != NULL) {
ifnet_release(outif);
return error;
}
-#endif /* INET6 */
/*
* Export TCP internal state information via a struct tcp_info
bzero(ti, sizeof(*ti));
- ti->tcpi_state = tp->t_state;
+ ti->tcpi_state = (uint8_t)tp->t_state;
ti->tcpi_flowhash = inp->inp_flowhash;
if (tp->t_state > TCPS_LISTEN) {
ti->tcpi_rxoutoforderbytes = tp->t_stat.rxoutoforderbytes;
if (tp->t_state > TCPS_LISTEN) {
- ti->tcpi_synrexmits = tp->t_stat.synrxtshift;
+ ti->tcpi_synrexmits = (uint8_t)tp->t_stat.rxmitsyns;
}
ti->tcpi_cell_rxpackets = inp->inp_cstat->rxpackets;
ti->tcpi_cell_rxbytes = inp->inp_cstat->rxbytes;
ina6_local = itpl->itpl_local_sin6.sin6_addr;
if (IN6_IS_SCOPE_LINKLOCAL(&ina6_local) &&
itpl->itpl_local_sin6.sin6_scope_id) {
- ina6_local.s6_addr16[1] = htons(itpl->itpl_local_sin6.sin6_scope_id);
+ ina6_local.s6_addr16[1] = htons((uint16_t)itpl->itpl_local_sin6.sin6_scope_id);
}
ina6_remote = itpl->itpl_remote_sin6.sin6_addr;
if (IN6_IS_SCOPE_LINKLOCAL(&ina6_remote) &&
itpl->itpl_remote_sin6.sin6_scope_id) {
- ina6_remote.s6_addr16[1] = htons(itpl->itpl_remote_sin6.sin6_scope_id);
+ ina6_remote.s6_addr16[1] = htons((uint16_t)itpl->itpl_remote_sin6.sin6_scope_id);
}
inp = in6_pcblookup_hash(pcbinfo,
struct inpcb *inp = tp->t_inpcb;
bzero(tci, sizeof(*tci));
- tci->tcpi_state = tp->t_state;
+ tci->tcpi_state = (uint8_t)tp->t_state;
if (tp->t_state > TCPS_LISTEN) {
if (TSTMP_SUPPORTED(tp)) {
tci->tcpi_options |= TCPCI_OPT_TIMESTAMPS;
int error;
struct tcp_info ti = {};
struct info_tuple itpl;
-#if !CONFIG_EMBEDDED
- proc_t caller = PROC_NULL;
- proc_t caller_parent = PROC_NULL;
- char command_name[MAXCOMLEN + 1] = "";
- char parent_name[MAXCOMLEN + 1] = "";
-
- if ((caller = proc_self()) != PROC_NULL) {
- /* get process name */
- strlcpy(command_name, caller->p_comm, sizeof(command_name));
-
- /* get parent process name if possible */
- if ((caller_parent = proc_find(caller->p_ppid)) != PROC_NULL) {
- strlcpy(parent_name, caller_parent->p_comm,
- sizeof(parent_name));
- proc_rele(caller_parent);
- }
-
- if ((escape_str(command_name, strlen(command_name) + 1,
- sizeof(command_name)) == 0) &&
- (escape_str(parent_name, strlen(parent_name) + 1,
- sizeof(parent_name)) == 0)) {
- kern_asl_msg(LOG_DEBUG, "messagetracer",
- 5,
- "com.apple.message.domain",
- "com.apple.kernel.tcpstat", /* 1 */
- "com.apple.message.signature",
- "tcpinfo", /* 2 */
- "com.apple.message.signature2", command_name, /* 3 */
- "com.apple.message.signature3", parent_name, /* 4 */
- "com.apple.message.summarize", "YES", /* 5 */
- NULL);
- }
- }
-
- if (caller != PROC_NULL) {
- proc_rele(caller);
- }
-#endif /* !CONFIG_EMBEDDED */
if (req->newptr == USER_ADDR_NULL) {
return EINVAL;
tcp_fill_info(sototcpcb(so), &tcp_ci->tcpci_tcp_info);
}
+void
+tcp_clear_keep_alive_offload(struct socket *so)
+{
+ struct inpcb *inp;
+ struct ifnet *ifp;
+
+ inp = sotoinpcb(so);
+ if (inp == NULL) {
+ return;
+ }
+
+ if ((inp->inp_flags2 & INP2_KEEPALIVE_OFFLOAD) == 0) {
+ return;
+ }
+
+ ifp = inp->inp_boundifp != NULL ? inp->inp_boundifp :
+ inp->inp_last_outifp;
+ if (ifp == NULL) {
+ panic("%s: so %p inp %p ifp NULL",
+ __func__, so, inp);
+ }
+
+ ifnet_lock_exclusive(ifp);
+
+ if (ifp->if_tcp_kao_cnt == 0) {
+ panic("%s: so %p inp %p ifp %p if_tcp_kao_cnt == 0",
+ __func__, so, inp, ifp);
+ }
+ ifp->if_tcp_kao_cnt--;
+ inp->inp_flags2 &= ~INP2_KEEPALIVE_OFFLOAD;
+
+ ifnet_lock_done(ifp);
+}
+
+static int
+tcp_set_keep_alive_offload(struct socket *so, struct proc *proc)
+{
+ int error = 0;
+ struct inpcb *inp;
+ struct ifnet *ifp;
+
+ inp = sotoinpcb(so);
+ if (inp == NULL) {
+ return ECONNRESET;
+ }
+ if ((inp->inp_flags2 & INP2_KEEPALIVE_OFFLOAD) != 0) {
+ return 0;
+ }
+
+ ifp = inp->inp_boundifp != NULL ? inp->inp_boundifp :
+ inp->inp_last_outifp;
+ if (ifp == NULL) {
+ error = ENXIO;
+ os_log_info(OS_LOG_DEFAULT,
+ "%s: error %d for proc %s[%u] out ifp is not set\n",
+ __func__, error,
+ proc != NULL ? proc->p_comm : "kernel",
+ proc != NULL ? proc->p_pid : 0);
+ return ENXIO;
+ }
+
+ error = if_get_tcp_kao_max(ifp);
+ if (error != 0) {
+ return error;
+ }
+
+ ifnet_lock_exclusive(ifp);
+ if (ifp->if_tcp_kao_cnt < ifp->if_tcp_kao_max) {
+ ifp->if_tcp_kao_cnt++;
+ inp->inp_flags2 |= INP2_KEEPALIVE_OFFLOAD;
+ } else {
+ error = ETOOMANYREFS;
+ os_log_info(OS_LOG_DEFAULT,
+ "%s: error %d for proc %s[%u] if_tcp_kao_max %u\n",
+ __func__, error,
+ proc != NULL ? proc->p_comm : "kernel",
+ proc != NULL ? proc->p_pid : 0,
+ ifp->if_tcp_kao_max);
+ }
+ ifnet_lock_done(ifp);
+
+ return error;
+}
+
/*
* The new sockopt interface makes it possible for us to block in the
* copyin/out step (if we take a page fault). Taking a page fault at
if (sopt->sopt_level != IPPROTO_TCP &&
!(sopt->sopt_level == SOL_SOCKET && (sopt->sopt_name == SO_FLUSH ||
sopt->sopt_name == SO_TRAFFIC_MGT_BACKGROUND))) {
-#if INET6
if (SOCK_CHECK_DOM(so, PF_INET6)) {
error = ip6_ctloutput(so, sopt);
- } else
-#endif /* INET6 */
- error = ip_ctloutput(so, sopt);
+ } else {
+ error = ip_ctloutput(so, sopt);
+ }
return error;
}
tp = intotcpcb(inp);
break;
case TCP_KEEPALIVE_OFFLOAD:
+ if ((error = priv_check_cred(kauth_cred_get(),
+ PRIV_NETINET_TCP_KA_OFFLOAD, 0)) != 0) {
+ break;
+ }
error = sooptcopyin(sopt, &optval, sizeof(optval),
sizeof(optval));
if (error) {
break;
}
if (optval != 0) {
- inp->inp_flags2 |= INP2_KEEPALIVE_OFFLOAD;
+ error = tcp_set_keep_alive_offload(so,
+ sopt->sopt_p);
} else {
- inp->inp_flags2 &= ~INP2_KEEPALIVE_OFFLOAD;
+ tcp_clear_keep_alive_offload(so);
}
break;
mptcp_reset_keepalive(tp);
}
} else {
- tp->t_adaptive_rtimo = optval;
+ tp->t_adaptive_rtimo = (uint8_t)optval;
}
break;
case TCP_ADAPTIVE_WRITE_TIMEOUT:
error = EINVAL;
break;
} else {
- tp->t_adaptive_wtimo = optval;
- }
- break;
- case TCP_ENABLE_MSGS:
- error = sooptcopyin(sopt, &optval, sizeof(optval),
- sizeof(optval));
- if (error) {
- break;
- }
- if (optval < 0 || optval > 1) {
- error = EINVAL;
- } else if (optval == 1) {
- /*
- * Check if messages option is already
- * enabled, if so return.
- */
- if (so->so_flags & SOF_ENABLE_MSGS) {
- VERIFY(so->so_msg_state != NULL);
- break;
- }
-
- /*
- * allocate memory for storing message
- * related state
- */
- VERIFY(so->so_msg_state == NULL);
- MALLOC(so->so_msg_state,
- struct msg_state *,
- sizeof(struct msg_state),
- M_TEMP, M_WAITOK | M_ZERO);
- if (so->so_msg_state == NULL) {
- error = ENOMEM;
- break;
- }
-
- /* Enable message delivery */
- so->so_flags |= SOF_ENABLE_MSGS;
- } else {
- /*
- * Can't disable message delivery on socket
- * because of restrictions imposed by
- * encoding/decoding
- */
- error = EINVAL;
+ tp->t_adaptive_wtimo = (uint8_t)optval;
}
break;
case TCP_SENDMOREACKS:
}
break;
case TCP_FASTOPEN_FORCE_HEURISTICS:
+
+ break;
+ case TCP_FASTOPEN_FORCE_ENABLE:
error = sooptcopyin(sopt, &optval, sizeof(optval),
sizeof(optval));
break;
}
if (optval) {
- tp->t_flagsext |= TF_FASTOPEN_HEUR;
+ tp->t_flagsext |= TF_FASTOPEN_FORCE_ENABLE;
} else {
- tp->t_flagsext &= ~TF_FASTOPEN_HEUR;
+ tp->t_flagsext &= ~TF_FASTOPEN_FORCE_ENABLE;
}
break;
optval = tfo_enabled(tp);
break;
case TCP_FASTOPEN_FORCE_HEURISTICS:
- optval = (tp->t_flagsext & TF_FASTOPEN_HEUR) ? 1 : 0;
+ optval = 0;
+ break;
+ case TCP_FASTOPEN_FORCE_ENABLE:
+ optval = (tp->t_flagsext & TF_FASTOPEN_FORCE_ENABLE) ? 1 : 0;
break;
case TCP_MEASURE_SND_BW:
optval = tp->t_flagsext & TF_MEASURESNDBW;
optval = 0;
}
break;
-
- case TCP_ENABLE_MSGS:
- if (so->so_flags & SOF_ENABLE_MSGS) {
- optval = 1;
- } else {
- optval = 0;
- }
- break;
case TCP_SENDMOREACKS:
if (tp->t_flagsext & TF_NOSTRETCHACK) {
optval = 1;
struct tcpcb *tp;
struct inpcb *inp;
int error;
-#if INET6
int isipv6 = SOCK_CHECK_DOM(so, PF_INET6) != 0;
-#endif
error = in_pcballoc(so, &tcbinfo, p);
if (error) {
so->so_snd.sb_flags |= SB_AUTOSIZE;
}
-#if INET6
if (isipv6) {
inp->inp_vflag |= INP_IPV6;
inp->in6p_hops = -1; /* use kernel default */
- } else
-#endif /* INET6 */
- inp->inp_vflag |= INP_IPV4;
+ } else {
+ inp->inp_vflag |= INP_IPV4;
+ }
tp = tcp_newtcpcb(inp);
if (tp == NULL) {
int nofd = so->so_state & SS_NOFDREF; /* XXX */
so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */
-#if INET6
if (isipv6) {
in6_pcbdetach(inp);
- } else
-#endif /* INET6 */
- in_pcbdetach(inp);
+ } else {
+ in_pcbdetach(inp);
+ }
so->so_state |= nofd;
return ENOBUFS;
}
struct tcpcb *, tp,
int32_t, TCPS_FIN_WAIT_1);
tp->t_state = TCPS_FIN_WAIT_1;
+ TCP_LOG_CONNECTION_SUMMARY(tp);
break;
case TCPS_CLOSE_WAIT:
struct tcpcb *, tp,
int32_t, TCPS_LAST_ACK);
tp->t_state = TCPS_LAST_ACK;
+ TCP_LOG_CONNECTION_SUMMARY(tp);
break;
}
if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
tcpstat.tcps_snd_swcsum_bytes += len;
}
-#if INET6
void
tcp_in6_cksum_stats(u_int32_t len)
{
tcpstat.tcps_snd6_swcsum_bytes += len;
}
-/*
- * When messages are enabled on a TCP socket, the message priority
- * is sent as a control message. This function will extract it.
- */
int
-tcp_get_msg_priority(struct mbuf *control, uint32_t *msgpri)
+tcp_get_mpkl_send_info(struct mbuf *control,
+ struct so_mpkl_send_info *mpkl_send_info)
{
struct cmsghdr *cm;
- if (control == NULL) {
+
+ if (control == NULL || mpkl_send_info == NULL) {
return EINVAL;
}
- for (cm = M_FIRST_CMSGHDR(control);
- is_cmsg_valid(control, cm);
+ for (cm = M_FIRST_CMSGHDR(control); cm;
cm = M_NXT_CMSGHDR(control, cm)) {
- if (cm->cmsg_level == SOL_SOCKET &&
- cm->cmsg_type == SCM_MSG_PRIORITY) {
- if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
- return EINVAL;
- }
- *msgpri = *(uint32_t *)(void *)CMSG_DATA(cm);
- if (*msgpri < MSG_PRI_MIN || *msgpri > MSG_PRI_MAX) {
- return EINVAL;
- }
- break;
+ if (cm->cmsg_len < sizeof(struct cmsghdr) ||
+ cm->cmsg_len > control->m_len) {
+ return EINVAL;
+ }
+ if (cm->cmsg_level != SOL_SOCKET ||
+ cm->cmsg_type != SCM_MPKL_SEND_INFO) {
+ continue;
}
+ if (cm->cmsg_len != CMSG_LEN(sizeof(struct so_mpkl_send_info))) {
+ return EINVAL;
+ }
+ memcpy(mpkl_send_info, CMSG_DATA(cm),
+ sizeof(struct so_mpkl_send_info));
+ return 0;
}
- return 0;
+ return ENOMSG;
}
-#endif /* INET6 */