/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <sys/systm.h>
#include <sys/proc.h>
+#include <machine/endian.h>
#include <net/if.h>
#include <net/route.h>
static struct inpcbhead divcb;
static struct inpcbinfo divcbinfo;
-static u_long div_sendspace = DIVSNDQ; /* XXX sysctl ? */
-static u_long div_recvspace = DIVRCVQ; /* XXX sysctl ? */
+static u_int32_t div_sendspace = DIVSNDQ; /* XXX sysctl ? */
+static u_int32_t div_recvspace = DIVRCVQ; /* XXX sysctl ? */
/* Optimization: have this preinitialized */
static struct sockaddr_in divsrc = { sizeof(divsrc), AF_INET, 0, { 0 }, { 0,0,0,0,0,0,0,0 } };
/* Internal functions */
static int div_output(struct socket *so,
- struct mbuf *m, struct sockaddr *addr, struct mbuf *control);
+ struct mbuf *m, struct sockaddr_in *addr, struct mbuf *control);
extern int load_ipfw(void);
/*
* Initialize divert connection block queue.
*/
void
-div_init(void)
+div_init(struct protosw *pp, struct domain *dp)
{
+#pragma unused(dp)
+ static int div_initialized = 0;
struct inpcbinfo *pcbinfo;
+
+ VERIFY((pp->pr_flags & (PR_INITIALIZED|PR_ATTACHED)) == PR_ATTACHED);
+
+ if (div_initialized)
+ return;
+ div_initialized = 1;
+
LIST_INIT(&divcb);
- divcbinfo.listhead = &divcb;
+ divcbinfo.ipi_listhead = &divcb;
/*
* XXX We don't use the hash list for divert IP, but it's easier
* to allocate a one entry hash list than it is to check all
- * over the place for hashbase == NULL.
+ * over the place for ipi_hashbase == NULL.
*/
- divcbinfo.hashbase = hashinit(1, M_PCB, &divcbinfo.hashmask);
- divcbinfo.porthashbase = hashinit(1, M_PCB, &divcbinfo.porthashmask);
- divcbinfo.ipi_zone = (void *) zinit(sizeof(struct inpcb),(maxsockets * sizeof(struct inpcb)),
+ divcbinfo.ipi_hashbase = hashinit(1, M_PCB, &divcbinfo.ipi_hashmask);
+ divcbinfo.ipi_porthashbase = hashinit(1, M_PCB, &divcbinfo.ipi_porthashmask);
+ divcbinfo.ipi_zone = zinit(sizeof(struct inpcb),(maxsockets * sizeof(struct inpcb)),
4096, "divzone");
pcbinfo = &divcbinfo;
/*
* allocate lock group attribute and group for udp pcb mutexes
*/
- pcbinfo->mtx_grp_attr = lck_grp_attr_alloc_init();
+ pcbinfo->ipi_lock_grp_attr = lck_grp_attr_alloc_init();
+
+ pcbinfo->ipi_lock_grp = lck_grp_alloc_init("divcb", pcbinfo->ipi_lock_grp_attr);
- pcbinfo->mtx_grp = lck_grp_alloc_init("divcb", pcbinfo->mtx_grp_attr);
-
/*
* allocate the lock attribute for divert pcb mutexes
*/
- pcbinfo->mtx_attr = lck_attr_alloc_init();
+ pcbinfo->ipi_lock_attr = lck_attr_alloc_init();
+
+ if ((pcbinfo->ipi_lock = lck_rw_alloc_init(pcbinfo->ipi_lock_grp,
+ pcbinfo->ipi_lock_attr)) == NULL) {
+ panic("%s: unable to allocate PCB lock\n", __func__);
+ /* NOTREACHED */
+ }
- if ((pcbinfo->mtx = lck_rw_alloc_init(pcbinfo->mtx_grp, pcbinfo->mtx_attr)) == NULL)
- return; /* pretty much dead if this fails... */
+ in_pcbinfo_attach(&divcbinfo);
#if IPFIREWALL
if (!IPFW_LOADED) {
void
div_input(struct mbuf *m, __unused int off)
{
- OSAddAtomic(1, (SInt32*)&ipstat.ips_noproto);
+ OSAddAtomic(1, &ipstat.ips_noproto);
m_freem(m);
}
*
* Setup generic address and protocol structures for div_input routine,
* then pass them along with mbuf chain.
- * ###LOCK called in ip_mutex from ip_output/ip_input
*/
void
divert_packet(struct mbuf *m, int incoming, int port, int rule)
/* Find IP address for receive interface */
ifnet_lock_shared(m->m_pkthdr.rcvif);
TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) {
- if (ifa->ifa_addr == NULL)
- continue;
- if (ifa->ifa_addr->sa_family != AF_INET)
+ IFA_LOCK(ifa);
+ if (ifa->ifa_addr->sa_family != AF_INET) {
+ IFA_UNLOCK(ifa);
continue;
+ }
divsrc.sin_addr =
- ((struct sockaddr_in *) ifa->ifa_addr)->sin_addr;
+ ((struct sockaddr_in *)(void *) ifa->ifa_addr)->sin_addr;
+ IFA_UNLOCK(ifa);
break;
}
ifnet_lock_done(m->m_pkthdr.rcvif);
* (see div_output for the other half of this.)
*/
snprintf(divsrc.sin_zero, sizeof(divsrc.sin_zero),
- "%s%d", m->m_pkthdr.rcvif->if_name,
- m->m_pkthdr.rcvif->if_unit);
+ "%s", if_name(m->m_pkthdr.rcvif));
}
/* Put packet on socket queue, if any */
sa = NULL;
nport = htons((u_int16_t)port);
- lck_rw_lock_shared(divcbinfo.mtx);
+ lck_rw_lock_shared(divcbinfo.ipi_lock);
LIST_FOREACH(inp, &divcb, inp_list) {
if (inp->inp_lport == nport)
sa = inp->inp_socket;
socket_unlock(sa, 1);
} else {
m_freem(m);
- OSAddAtomic(1, (SInt32*)&ipstat.ips_noproto);
- OSAddAtomic(-1, (SInt32*)&ipstat.ips_delivered);
+ OSAddAtomic(1, &ipstat.ips_noproto);
+ OSAddAtomic(-1, &ipstat.ips_delivered);
}
- lck_rw_done(divcbinfo.mtx);
+ lck_rw_done(divcbinfo.ipi_lock);
}
/*
* ###LOCK called in inet_proto mutex when from div_send.
*/
static int
-div_output(struct socket *so, struct mbuf *m, struct sockaddr *addr,
+div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin,
struct mbuf *control)
{
struct inpcb *const inp = sotoinpcb(so);
struct ip *const ip = mtod(m, struct ip *);
- struct sockaddr_in *sin = (struct sockaddr_in *)addr;
int error = 0;
+ mbuf_svc_class_t msc = MBUF_SC_UNSPEC;
- if (control)
- m_freem(control); /* XXX */
+ if (control != NULL) {
+ msc = mbuf_service_class_from_control(control);
+ m_freem(control); /* XXX */
+ control = NULL;
+ }
/* Loopback avoidance and state recovery */
if (sin) {
struct m_tag *mtag;
int len = 0;
char *c = sin->sin_zero;
- mtag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DIVERT,
- sizeof(struct divert_tag), M_NOWAIT);
+ mtag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DIVERT,
+ sizeof(struct divert_tag), M_NOWAIT, m);
if (mtag == NULL) {
error = ENOBUFS;
goto cantsend;
/* Reinject packet into the system as incoming or outgoing */
if (!sin || sin->sin_addr.s_addr == 0) {
+ struct ip_out_args ipoa =
+ { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF, 0 };
+ struct route ro;
+ struct ip_moptions *imo;
+
/*
* Don't allow both user specified and setsockopt options,
* and don't allow packet length sizes that will crash
}
/* Convert fields to host order for ip_output() */
+#if BYTE_ORDER != BIG_ENDIAN
NTOHS(ip->ip_len);
NTOHS(ip->ip_off);
+#endif
- /* Send packet to output processing */
- OSAddAtomic(1, (SInt32*)&ipstat.ips_rawout);
+ OSAddAtomic(1, &ipstat.ips_rawout);
+ /* Copy the cached route and take an extra reference */
+ inp_route_copyout(inp, &ro);
+
+ set_packet_service_class(m, so, msc, 0);
+
+ imo = inp->inp_moptions;
+ if (imo != NULL)
+ IMO_ADDREF(imo);
socket_unlock(so, 0);
#if CONFIG_MACF_NET
mac_mbuf_label_associate_inpcb(inp, m);
#endif
- error = ip_output(m,
- inp->inp_options, &inp->inp_route,
+ /* Send packet to output processing */
+ error = ip_output(m, inp->inp_options, &ro,
(so->so_options & SO_DONTROUTE) |
- IP_ALLOWBROADCAST | IP_RAWOUTPUT,
- inp->inp_moptions, NULL);
+ IP_ALLOWBROADCAST | IP_RAWOUTPUT | IP_OUTARGS,
+ imo, &ipoa);
+
socket_lock(so, 0);
+ if (imo != NULL)
+ IMO_REMREF(imo);
+ /* Synchronize cached PCB route */
+ inp_route_copyin(inp, &ro);
} else {
struct ifaddr *ifa;
/* If no luck with the name above. check by IP address. */
if (m->m_pkthdr.rcvif == NULL) {
+ struct sockaddr_in _sin;
/*
- * Make sure there are no distractions
- * for ifa_ifwithaddr. Clear the port and the ifname.
- * Maybe zap all 8 bytes at once using a 64bit write?
+ * Make sure there are no distractions for
+ * ifa_ifwithaddr; use sanitized version.
*/
- bzero(sin->sin_zero, sizeof(sin->sin_zero));
- /* *((u_int64_t *)sin->sin_zero) = 0; */ /* XXX ?? */
- sin->sin_port = 0;
- if (!(ifa = ifa_ifwithaddr((struct sockaddr *) sin))) {
+ bzero(&_sin, sizeof (_sin));
+ _sin.sin_family = AF_INET;
+ _sin.sin_len = sizeof (struct sockaddr_in);
+ _sin.sin_addr.s_addr = sin->sin_addr.s_addr;
+ if (!(ifa = ifa_ifwithaddr(SA(&_sin)))) {
error = EADDRNOTAVAIL;
goto cantsend;
}
m->m_pkthdr.rcvif = ifa->ifa_ifp;
- ifafree(ifa);
+ IFA_REMREF(ifa);
}
#if CONFIG_MACF_NET
mac_mbuf_label_associate_socket(so, m);
inp = sotoinpcb(so);
if (inp)
panic("div_attach");
- if (p && (error = proc_suser(p)) != 0)
+ if ((error = proc_suser(p)) != 0)
return error;
error = soreserve(so, div_sendspace, div_recvspace);
so->so_state |= SS_ISCONNECTED;
#ifdef MORE_DICVLOCK_DEBUG
- printf("div_attach: so=%p sopcb=%p lock=%x ref=%x\n",
- so, so->so_pcb, ((struct inpcb *)so->so_pcb)->inpcb_mtx, so->so_usecount);
+ printf("div_attach: so=0x%llx sopcb=0x%llx lock=0x%llx ref=%x\n",
+ (uint64_t)VM_KERNEL_ADDRPERM(so),
+ (uint64_t)VM_KERNEL_ADDRPERM(so->so_pcb),
+ (uint64_t)VM_KERNEL_ADDRPERM(&(sotoinpcb(so)->inpcb_mtx)),
+ so->so_usecount);
#endif
return 0;
}
struct inpcb *inp;
#ifdef MORE_DICVLOCK_DEBUG
- printf("div_detach: so=%p sopcb=%p lock=%x ref=%x\n",
- so, so->so_pcb, ((struct inpcb *)so->so_pcb)->inpcb_mtx, so->so_usecount);
+ printf("div_detach: so=0x%llx sopcb=0x%llx lock=0x%llx ref=%x\n",
+ (uint64_t)VM_KERNEL_ADDRPERM(so),
+ (uint64_t)VM_KERNEL_ADDRPERM(so->so_pcb),
+ (uint64_t)VM_KERNEL_ADDRPERM(&(sotoinpcb(so)->inpcb_mtx)),
+ so->so_usecount);
#endif
inp = sotoinpcb(so);
if (inp == 0)
if (nam->sa_family != AF_INET) {
error = EAFNOSUPPORT;
} else {
- ((struct sockaddr_in *)nam)->sin_addr.s_addr = INADDR_ANY;
+ ((struct sockaddr_in *)(void *)nam)->sin_addr.s_addr = INADDR_ANY;
error = in_pcbbind(inp, nam, p);
}
return error;
/* Packet must have a header (but that's about it) */
if (m->m_len < sizeof (struct ip) &&
(m = m_pullup(m, sizeof (struct ip))) == 0) {
- OSAddAtomic(1, (SInt32*)&ipstat.ips_toosmall);
+ OSAddAtomic(1, &ipstat.ips_toosmall);
m_freem(m);
return EINVAL;
}
/* Send packet */
- return div_output(so, m, nam, control);
+ return div_output(so, m, SIN(nam), control);
}
+#if 0
static int
div_pcblist SYSCTL_HANDLER_ARGS
{
+#pragma unused(oidp, arg1, arg2)
int error, i, n;
struct inpcb *inp, **inp_list;
inp_gen_t gencnt;
* The process of preparing the TCB list is too time-consuming and
* resource-intensive to repeat twice on every request.
*/
- lck_rw_lock_exclusive(divcbinfo.mtx);
+ lck_rw_lock_exclusive(divcbinfo.ipi_lock);
if (req->oldptr == USER_ADDR_NULL) {
n = divcbinfo.ipi_count;
req->oldidx = 2 * (sizeof xig)
+ (n + n/8) * sizeof(struct xinpcb);
- lck_rw_done(divcbinfo.mtx);
+ lck_rw_done(divcbinfo.ipi_lock);
return 0;
}
if (req->newptr != USER_ADDR_NULL) {
- lck_rw_done(divcbinfo.mtx);
+ lck_rw_done(divcbinfo.ipi_lock);
return EPERM;
}
xig.xig_sogen = so_gencnt;
error = SYSCTL_OUT(req, &xig, sizeof xig);
if (error) {
- lck_rw_done(divcbinfo.mtx);
+ lck_rw_done(divcbinfo.ipi_lock);
return error;
}
inp_list = _MALLOC(n * sizeof *inp_list, M_TEMP, M_WAITOK);
if (inp_list == 0) {
- lck_rw_done(divcbinfo.mtx);
+ lck_rw_done(divcbinfo.ipi_lock);
return ENOMEM;
}
- for (inp = LIST_FIRST(divcbinfo.listhead), i = 0; inp && i < n;
+ for (inp = LIST_FIRST(divcbinfo.ipi_listhead), i = 0; inp && i < n;
inp = LIST_NEXT(inp, inp_list)) {
#ifdef __APPLE__
if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD)
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
FREE(inp_list, M_TEMP);
- lck_rw_done(divcbinfo.mtx);
+ lck_rw_done(divcbinfo.ipi_lock);
return error;
}
+#endif
__private_extern__ int
-div_lock(struct socket *so, int refcount, int lr)
- {
- int lr_saved;
- if (lr == 0)
- lr_saved = (unsigned int) __builtin_return_address(0);
- else lr_saved = lr;
-
+div_lock(struct socket *so, int refcount, void *lr)
+{
+ void *lr_saved;
+
+ if (lr == NULL)
+ lr_saved = __builtin_return_address(0);
+ else
+ lr_saved = lr;
+
#ifdef MORE_DICVLOCK_DEBUG
- printf("div_lock: so=%p sopcb=%p lock=%x ref=%x lr=%x\n",
- so,
- so->so_pcb,
- so->so_pcb ? ((struct inpcb *)so->so_pcb)->inpcb_mtx : 0,
- so->so_usecount,
- lr_saved);
+ printf("div_lock: so=0x%llx sopcb=0x%llx lock=0x%llx ref=%x "
+ "lr=0x%llx\n", (uint64_t)VM_KERNEL_ADDRPERM(so),
+ (uint64_t)VM_KERNEL_ADDRPERM(so->so_pcb), so->so_pcb ?
+ (uint64_t)VM_KERNEL_ADDRPERM(&(sotoinpcb(so)->inpcb_mtx)) : NULL,
+ so->so_usecount, (uint64_t)VM_KERNEL_ADDRPERM(lr_saved));
#endif
if (so->so_pcb) {
- lck_mtx_lock(((struct inpcb *)so->so_pcb)->inpcb_mtx);
+ lck_mtx_lock(&((struct inpcb *)so->so_pcb)->inpcb_mtx);
} else {
- panic("div_lock: so=%p NO PCB! lr=%x\n", so, lr_saved);
- lck_mtx_lock(so->so_proto->pr_domain->dom_mtx);
+ panic("div_lock: so=%p NO PCB! lr=%p lrh= lrh= %s\n",
+ so, lr_saved, solockhistory_nr(so));
+ /* NOTREACHED */
}
-
- if (so->so_usecount < 0)
- panic("div_lock: so=%p so_pcb=%p lr=%x ref=%x\n",
- so, so->so_pcb, lr_saved, so->so_usecount);
-
+
+ if (so->so_usecount < 0) {
+ panic("div_lock: so=%p so_pcb=%p lr=%p ref=%x lrh= %s\n",
+ so, so->so_pcb, lr_saved, so->so_usecount,
+ solockhistory_nr(so));
+ /* NOTREACHED */
+ }
+
if (refcount)
so->so_usecount++;
- so->lock_lr[so->next_lock_lr] = (u_int32_t)lr_saved;
+ so->lock_lr[so->next_lock_lr] = lr_saved;
so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX;
return (0);
}
__private_extern__ int
-div_unlock(struct socket *so, int refcount, int lr)
+div_unlock(struct socket *so, int refcount, void *lr)
{
- int lr_saved;
+ void *lr_saved;
lck_mtx_t * mutex_held;
- struct inpcb *inp = sotoinpcb(so);
+ struct inpcb *inp = sotoinpcb(so);
- if (lr == 0)
- lr_saved = (unsigned int) __builtin_return_address(0);
- else lr_saved = lr;
+ if (lr == NULL)
+ lr_saved = __builtin_return_address(0);
+ else
+ lr_saved = lr;
-
#ifdef MORE_DICVLOCK_DEBUG
- printf("div_unlock: so=%p sopcb=%p lock=%x ref=%x lr=%x\n",
- so,
- so->so_pcb,
- so->so_pcb ? ((struct inpcb *)so->so_pcb)->inpcb_mtx : 0,
- so->so_usecount,
- lr_saved);
+ printf("div_unlock: so=0x%llx sopcb=0x%llx lock=0x%llx ref=%x "
+ "lr=0x%llx\n", (uint64_t)VM_KERNEL_ADDRPERM(so),
+ (uint64_t)VM_KERNEL_ADDRPERM(so->so_pcb), so->so_pcb ?
+ (uint64_t)VM_KERNEL_ADDRPERM(&(sotoinpcb(so)->inpcb_mtx)) : NULL,
+ so->so_usecount, lr_saved);
#endif
if (refcount)
so->so_usecount--;
-
- if (so->so_usecount < 0)
- panic("div_unlock: so=%p usecount=%x\n", so, so->so_usecount);
+
+ if (so->so_usecount < 0) {
+ panic("div_unlock: so=%p usecount=%x lrh= %s\n",
+ so, so->so_usecount, solockhistory_nr(so));
+ /* NOTREACHED */
+ }
if (so->so_pcb == NULL) {
- panic("div_unlock: so=%p NO PCB usecount=%x lr=%x\n", so, so->so_usecount, lr_saved);
- mutex_held = so->so_proto->pr_domain->dom_mtx;
- } else {
- mutex_held = ((struct inpcb *)so->so_pcb)->inpcb_mtx;
+ panic("div_unlock: so=%p NO PCB usecount=%x lr=%p lrh= %s\n",
+ so, so->so_usecount, lr_saved, solockhistory_nr(so));
+ /* NOTREACHED */
}
+ mutex_held = &((struct inpcb *)so->so_pcb)->inpcb_mtx;
if (so->so_usecount == 0 && (inp->inp_wantcnt == WNT_STOPUSING)) {
- lck_rw_lock_exclusive(divcbinfo.mtx);
+ lck_rw_lock_exclusive(divcbinfo.ipi_lock);
+ if (inp->inp_state != INPCB_STATE_DEAD)
+ in_pcbdetach(inp);
in_pcbdispose(inp);
- lck_rw_done(divcbinfo.mtx);
+ lck_rw_done(divcbinfo.ipi_lock);
return (0);
}
lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
- so->unlock_lr[so->next_unlock_lr] = (u_int32_t) lr_saved;
+ so->unlock_lr[so->next_unlock_lr] = lr_saved;
so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;
lck_mtx_unlock(mutex_held);
return (0);
if (so->so_pcb) {
if (so->so_usecount < 0)
- panic("div_getlock: so=%p usecount=%x\n", so, so->so_usecount);
- return(inpcb->inpcb_mtx);
+ panic("div_getlock: so=%p usecount=%x lrh= %s\n",
+ so, so->so_usecount, solockhistory_nr(so));
+ return(&inpcb->inpcb_mtx);
} else {
- panic("div_getlock: so=%p NULL so_pcb\n", so);
+ panic("div_getlock: so=%p NULL NO PCB lrh= %s\n",
+ so, solockhistory_nr(so));
return (so->so_proto->pr_domain->dom_mtx);
}
}
-
struct pr_usrreqs div_usrreqs = {
- div_abort, pru_accept_notsupp, div_attach, div_bind,
- pru_connect_notsupp, pru_connect2_notsupp, in_control, div_detach,
- div_disconnect, pru_listen_notsupp, in_setpeeraddr, pru_rcvd_notsupp,
- pru_rcvoob_notsupp, div_send, pru_sense_null, div_shutdown,
- in_setsockaddr, sosend, soreceive, pru_sopoll_notsupp
+ .pru_abort = div_abort,
+ .pru_attach = div_attach,
+ .pru_bind = div_bind,
+ .pru_control = in_control,
+ .pru_detach = div_detach,
+ .pru_disconnect = div_disconnect,
+ .pru_peeraddr = in_getpeeraddr,
+ .pru_send = div_send,
+ .pru_shutdown = div_shutdown,
+ .pru_sockaddr = in_getsockaddr,
+ .pru_sosend = sosend,
+ .pru_soreceive = soreceive,
};