*
* @APPLE_LICENSE_HEADER_START@
*
- * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
+ * The contents of this file constitute Original Code as defined in and
+ * are subject to the Apple Public Source License Version 1.1 (the
+ * "License"). You may not use this file except in compliance with the
+ * License. Please obtain a copy of the License at
+ * http://www.apple.com/publicsource and read it before using this file.
*
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * This Original Code and all software distributed under the License are
+ * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
+ * License for the specific language governing rights and limitations
+ * under the License.
*
* @APPLE_LICENSE_HEADER_END@
*/
*/
extern char *tcpstates[]; /* XXX ??? */
-static int tcp_attach __P((struct socket *, struct proc *));
-static int tcp_connect __P((struct tcpcb *, struct sockaddr *,
- struct proc *));
+static int tcp_attach(struct socket *, struct proc *);
+static int tcp_connect(struct tcpcb *, struct sockaddr *, struct proc *);
#if INET6
-static int tcp6_connect __P((struct tcpcb *, struct sockaddr *,
- struct proc *));
+static int tcp6_connect(struct tcpcb *, struct sockaddr *, struct proc *);
#endif /* INET6 */
static struct tcpcb *
- tcp_disconnect __P((struct tcpcb *));
+ tcp_disconnect(struct tcpcb *);
static struct tcpcb *
- tcp_usrclosed __P((struct tcpcb *));
+ tcp_usrclosed(struct tcpcb *);
#if TCPDEBUG
#define TCPDEBUG0 int ostate = 0
static int
tcp_usr_attach(struct socket *so, int proto, struct proc *p)
{
- int s = splnet();
int error;
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp = 0;
tp = sototcpcb(so);
out:
TCPDEBUG2(PRU_ATTACH);
- splx(s);
return error;
}
static int
tcp_usr_detach(struct socket *so)
{
- int s = splnet();
int error = 0;
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp;
TCPDEBUG0;
- if (inp == 0) {
- splx(s);
+ if (inp == 0 || (inp->inp_state == INPCB_STATE_DEAD)) {
return EINVAL; /* XXX */
}
+#if 1
+ lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
+#endif
tp = intotcpcb(inp);
/* In case we got disconnected from the peer */
if (tp == 0)
tp = tcp_disconnect(tp);
out:
TCPDEBUG2(PRU_DETACH);
- splx(s);
return error;
}
#define COMMON_START() TCPDEBUG0; \
do { \
- if (inp == 0) { \
- splx(s); \
+ if (inp == 0 || (inp->inp_state == INPCB_STATE_DEAD)) { \
return EINVAL; \
} \
tp = intotcpcb(inp); \
TCPDEBUG1(); \
} while(0)
-#define COMMON_END(req) out: TCPDEBUG2(req); splx(s); return error; goto out
+#define COMMON_END(req) out: TCPDEBUG2(req); return error; goto out
/*
static int
tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
{
- int s = splnet();
int error = 0;
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp;
static int
tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
{
- int s = splnet();
int error = 0;
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp;
}
inp->inp_vflag &= ~INP_IPV4;
inp->inp_vflag |= INP_IPV6;
- if (ip6_mapped_addr_on && (inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
+ if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr))
inp->inp_vflag |= INP_IPV4;
else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
static int
tcp_usr_listen(struct socket *so, struct proc *p)
{
- int s = splnet();
int error = 0;
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp;
static int
tcp6_usr_listen(struct socket *so, struct proc *p)
{
- int s = splnet();
int error = 0;
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp;
COMMON_START();
if (inp->inp_lport == 0) {
inp->inp_vflag &= ~INP_IPV4;
- if (ip6_mapped_addr_on &&
- (inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
+ if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
inp->inp_vflag |= INP_IPV4;
error = in6_pcbbind(inp, (struct sockaddr *)0, p);
}
static int
tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
{
- int s = splnet();
int error = 0;
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp;
static int
tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
{
- int s = splnet();
int error = 0;
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp;
if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
struct sockaddr_in sin;
- if (!ip6_mapped_addr_on ||
- (inp->inp_flags & IN6P_IPV6_V6ONLY))
- return(EINVAL);
+ if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)
+ return (EINVAL);
in6_sin6_2_sin(&sin, sin6p);
inp->inp_vflag |= INP_IPV4;
static int
tcp_usr_disconnect(struct socket *so)
{
- int s = splnet();
int error = 0;
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp;
-
+
+#if 1
+ lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
+#endif
COMMON_START();
/* In case we got disconnected from the peer */
if (tp == 0)
static int
tcp_usr_accept(struct socket *so, struct sockaddr **nam)
{
- int s = splnet();
int error = 0;
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp = NULL;
error = ECONNABORTED;
goto out;
}
- if (inp == 0) {
- splx(s);
+ if (inp == 0 || (inp->inp_state == INPCB_STATE_DEAD)) {
return (EINVAL);
}
tp = intotcpcb(inp);
static int
tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
{
- int s = splnet();
int error = 0;
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp = NULL;
error = ECONNABORTED;
goto out;
}
- if (inp == 0) {
- splx(s);
+ if (inp == 0 || (inp->inp_state == INPCB_STATE_DEAD)) {
return (EINVAL);
}
tp = intotcpcb(inp);
static int
tcp_usr_shutdown(struct socket *so)
{
- int s = splnet();
int error = 0;
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp;
static int
tcp_usr_rcvd(struct socket *so, int flags)
{
- int s = splnet();
int error = 0;
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp;
tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
struct sockaddr *nam, struct mbuf *control, struct proc *p)
{
- int s = splnet();
int error = 0;
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp;
#endif
TCPDEBUG0;
- if (inp == NULL) {
+ if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) {
/*
* OOPS! we lost a race, the TCP session got reset after
* we checked SS_CANTSENDMORE, eg: while doing uiomove or a
static int
tcp_usr_abort(struct socket *so)
{
- int s = splnet();
int error = 0;
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp;
if (tp == 0)
goto out;
tp = tcp_drop(tp, ECONNABORTED);
+ so->so_usecount--;
COMMON_END(PRU_ABORT);
}
static int
tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
{
- int s = splnet();
int error = 0;
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp;
tcp_usr_connect, pru_connect2_notsupp, in_control, tcp_usr_detach,
tcp_usr_disconnect, tcp_usr_listen, in_setpeeraddr, tcp_usr_rcvd,
tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown,
- in_setsockaddr, sosend, soreceive, sopoll
+ in_setsockaddr, sosend, soreceive, pru_sopoll_notsupp
};
#if INET6
tcp6_usr_connect, pru_connect2_notsupp, in6_control, tcp_usr_detach,
tcp_usr_disconnect, tcp6_usr_listen, in6_mapped_peeraddr, tcp_usr_rcvd,
tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown,
- in6_mapped_sockaddr, sosend, soreceive, sopoll
+ in6_mapped_sockaddr, sosend, soreceive, pru_sopoll_notsupp
};
#endif /* INET6 */
error = in_pcbladdr(inp, nam, &ifaddr);
if (error)
return error;
+
+ tcp_unlock(inp->inp_socket, 0, 0);
oinp = in_pcblookup_hash(inp->inp_pcbinfo,
sin->sin_addr, sin->sin_port,
inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr
: ifaddr->sin_addr,
inp->inp_lport, 0, NULL);
+
+ tcp_lock(inp->inp_socket, 0, 0);
if (oinp) {
+ if (oinp != inp) /* 4143933: avoid deadlock if inp == oinp */
+ tcp_lock(oinp->inp_socket, 1, 0);
+ if (in_pcb_checkstate(oinp, WNT_RELEASE, 1) == WNT_STOPUSING) {
+ if (oinp != inp)
+ tcp_unlock(oinp->inp_socket, 1, 0);
+ goto skip_oinp;
+ }
+
if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
otp->t_state == TCPS_TIME_WAIT &&
otp->t_starttime < tcp_msl &&
(otp->t_flags & TF_RCVD_CC))
otp = tcp_close(otp);
- else
+ else {
+ printf("tcp_connect: inp=%x err=EADDRINUSE\n", inp);
+ if (oinp != inp)
+ tcp_unlock(oinp->inp_socket, 1, 0);
return EADDRINUSE;
+ }
+ if (oinp != inp)
+ tcp_unlock(oinp->inp_socket, 1, 0);
}
+skip_oinp:
if ((inp->inp_laddr.s_addr == INADDR_ANY ? ifaddr->sin_addr.s_addr :
inp->inp_laddr.s_addr) == sin->sin_addr.s_addr &&
inp->inp_lport == sin->sin_port)
return EINVAL;
+ if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
+ /*lock inversion issue, mostly with udp multicast packets */
+ socket_unlock(inp->inp_socket, 0);
+ lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
+ socket_lock(inp->inp_socket, 0);
+ }
if (inp->inp_laddr.s_addr == INADDR_ANY)
inp->inp_laddr = ifaddr->sin_addr;
inp->inp_faddr = sin->sin_addr;
inp->inp_fport = sin->sin_port;
in_pcbrehash(inp);
+ lck_rw_done(inp->inp_pcbinfo->mtx);
/* Compute window scaling to request. */
while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
struct socket *so = inp->inp_socket;
struct tcpcb *otp;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
- struct in6_addr *addr6;
+ struct in6_addr addr6;
struct rmxp_tao *taop;
struct rmxp_tao tao_noncached;
int error;
error = in6_pcbladdr(inp, nam, &addr6);
if (error)
return error;
+ tcp_unlock(inp->inp_socket, 0, 0);
oinp = in6_pcblookup_hash(inp->inp_pcbinfo,
&sin6->sin6_addr, sin6->sin6_port,
IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
- ? addr6
+ ? &addr6
: &inp->in6p_laddr,
inp->inp_lport, 0, NULL);
+ tcp_lock(inp->inp_socket, 0, 0);
if (oinp) {
if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
otp->t_state == TCPS_TIME_WAIT &&
else
return EADDRINUSE;
}
+ if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
+ /*lock inversion issue, mostly with udp multicast packets */
+ socket_unlock(inp->inp_socket, 0);
+ lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
+ socket_lock(inp->inp_socket, 0);
+ }
if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
- inp->in6p_laddr = *addr6;
+ inp->in6p_laddr = addr6;
inp->in6p_faddr = sin6->sin6_addr;
inp->inp_fport = sin6->sin6_port;
if ((sin6->sin6_flowinfo & IPV6_FLOWINFO_MASK) != NULL)
inp->in6p_flowinfo = sin6->sin6_flowinfo;
in_pcbrehash(inp);
+ lck_rw_done(inp->inp_pcbinfo->mtx);
/* Compute window scaling to request. */
while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
struct socket *so;
struct sockopt *sopt;
{
- int error, opt, optval, s;
+ int error, opt, optval;
struct inpcb *inp;
struct tcpcb *tp;
error = 0;
- s = splnet(); /* XXX */
inp = sotoinpcb(so);
if (inp == NULL) {
- splx(s);
return (ECONNRESET);
}
if (sopt->sopt_level != IPPROTO_TCP) {
else
#endif /* INET6 */
error = ip_ctloutput(so, sopt);
- splx(s);
return (error);
}
tp = intotcpcb(inp);
if (tp == NULL) {
- splx(s);
return (ECONNRESET);
}
if (error)
break;
- if (optval > 0 && optval <= tp->t_maxseg)
+ if (optval > 0 && optval <= tp->t_maxseg &&
+ optval + 40 >= tcp_minmss)
tp->t_maxseg = optval;
else
error = EINVAL;
break;
+ case TCP_KEEPALIVE:
+ error = sooptcopyin(sopt, &optval, sizeof optval,
+ sizeof optval);
+ if (error)
+ break;
+ if (optval < 0)
+ error = EINVAL;
+ else
+ tp->t_keepidle = optval * PR_SLOWHZ;
+ break;
+
default:
error = ENOPROTOOPT;
break;
case TCP_MAXSEG:
optval = tp->t_maxseg;
break;
+ case TCP_KEEPALIVE:
+ optval = tp->t_keepidle / PR_SLOWHZ;
+ break;
case TCP_NOOPT:
optval = tp->t_flags & TF_NOOPT;
break;
error = sooptcopyout(sopt, &optval, sizeof optval);
break;
}
- splx(s);
return (error);
}
SYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
&tcp_recvspace , 0, "Maximum incoming TCP datagram size");
+__private_extern__ int tcp_sockthreshold = 256;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, sockthreshold, CTLFLAG_RW,
+ &tcp_sockthreshold , 0, "TCP Socket size increased if less than threshold");
+
+#define TCP_INCREASED_SPACE 65535 /* Automatically increase tcp send/rcv space to this value */
/*
* Attach TCP protocol to socket, allocating
* internet protocol control block, tcp control block,
int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != NULL;
#endif
- if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
- error = soreserve(so, tcp_sendspace, tcp_recvspace);
- if (error)
- return (error);
- }
error = in_pcballoc(so, &tcbinfo, p);
if (error)
return (error);
+
inp = sotoinpcb(so);
+
+ if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
+ /*
+ * The goal is to let clients have large send/rcv default windows (TCP_INCREASED_SPACE)
+ * while not hogging mbuf space for servers. This is done by watching a threshold
+ * of tcpcbs in use and bumping the default send and rcvspace only if under that threshold.
+ * The theory being that busy servers have a lot more active tcpcbs and don't want the potential
+ * memory penalty of having much larger sockbuffs. The sysctl allows to fine tune that threshold value. */
+
+ if (inp->inp_pcbinfo->ipi_count < tcp_sockthreshold)
+ error = soreserve(so, MAX(TCP_INCREASED_SPACE, tcp_sendspace), MAX(TCP_INCREASED_SPACE,tcp_recvspace));
+ else
+ error = soreserve(so, tcp_sendspace, tcp_recvspace);
+ if (error)
+ return (error);
+ }
+
#if INET6
if (isipv6) {
inp->inp_vflag |= INP_IPV6;