X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/fe8ab488e9161c46dd9885d58fc52996dc0249ff..a39ff7e25e19b3a8c3020042a3872ca9ec9659f1:/bsd/kern/kern_control.c diff --git a/bsd/kern/kern_control.c b/bsd/kern/kern_control.c index 16a66ae82..099201dac 100644 --- a/bsd/kern/kern_control.c +++ b/bsd/kern/kern_control.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2014 Apple Inc. All rights reserved. + * Copyright (c) 1999-2017 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -50,12 +50,51 @@ #include #include #include +#include #include #include #include +struct kctl { + TAILQ_ENTRY(kctl) next; /* controller chain */ + kern_ctl_ref kctlref; + + /* controller information provided when registering */ + char name[MAX_KCTL_NAME]; /* unique identifier */ + u_int32_t id; + u_int32_t reg_unit; + + /* misc communication information */ + u_int32_t flags; /* support flags */ + u_int32_t recvbufsize; /* request more than the default buffer size */ + u_int32_t sendbufsize; /* request more than the default buffer size */ + + /* Dispatch functions */ + ctl_bind_func bind; /* Prepare contact */ + ctl_connect_func connect; /* Make contact */ + ctl_disconnect_func disconnect; /* Break contact */ + ctl_send_func send; /* Send data to nke */ + ctl_send_list_func send_list; /* Send list of packets */ + ctl_setopt_func setopt; /* set kctl configuration */ + ctl_getopt_func getopt; /* get kctl configuration */ + ctl_rcvd_func rcvd; /* Notify nke when client reads data */ + + TAILQ_HEAD(, ctl_cb) kcb_head; + u_int32_t lastunit; +}; + +struct ctl_cb { + TAILQ_ENTRY(ctl_cb) next; /* controller chain */ + lck_mtx_t *mtx; + struct socket *so; /* controlling socket */ + struct kctl *kctl; /* back pointer to controller */ + void *userdata; + struct sockaddr_ctl sac; + u_int32_t usecount; +}; + #ifndef ROUNDUP64 #define ROUNDUP64(x) P2ROUNDUP((x), sizeof (u_int64_t)) #endif @@ -84,10 +123,10 @@ static lck_mtx_t *ctl_mtx; /* all the controllers are chained */ TAILQ_HEAD(kctl_list, kctl) ctl_head; - static int ctl_attach(struct socket *, int, struct proc *); static int ctl_detach(struct socket *); static int ctl_sofreelastref(struct socket *so); +static int ctl_bind(struct socket *, struct sockaddr *, struct proc *); static int ctl_connect(struct socket *, struct sockaddr *, struct proc *); static int ctl_disconnect(struct socket *); static int ctl_ioctl(struct socket *so, u_long cmd, caddr_t data, @@ -103,7 +142,8 @@ static int ctl_usr_rcvd(struct socket *so, int flags); static struct kctl *ctl_find_by_name(const char *); static struct kctl *ctl_find_by_id_unit(u_int32_t id, u_int32_t unit); -static struct socket *kcb_find_socket(struct kctl *, u_int32_t unit); +static struct socket *kcb_find_socket(kern_ctl_ref kctlref, u_int32_t unit, + u_int32_t *); static struct ctl_cb *kcb_find(struct kctl *, u_int32_t unit); static void ctl_post_msg(u_int32_t event_code, u_int32_t id); @@ -113,6 +153,7 @@ static lck_mtx_t * ctl_getlock(struct socket *, int); static struct pr_usrreqs ctl_usrreqs = { .pru_attach = ctl_attach, + .pru_bind = ctl_bind, .pru_connect = ctl_connect, .pru_control = ctl_ioctl, .pru_detach = ctl_detach, @@ -154,7 +195,6 @@ __private_extern__ int kctl_reg_list SYSCTL_HANDLER_ARGS; __private_extern__ int kctl_pcblist SYSCTL_HANDLER_ARGS; __private_extern__ int kctl_getstat SYSCTL_HANDLER_ARGS; -static int kctl_proto_count = (sizeof (kctlsw) / sizeof (struct protosw)); SYSCTL_NODE(_net_systm, OID_AUTO, kctl, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Kernel control family"); @@ -184,6 +224,20 @@ u_int32_t ctl_debug = 0; SYSCTL_INT(_net_systm_kctl, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED, &ctl_debug, 0, ""); +#define KCTL_TBL_INC 16 + +static uintptr_t kctl_tbl_size = 0; +static u_int32_t kctl_tbl_growing = 0; +static u_int32_t kctl_tbl_growing_waiting = 0; +static uintptr_t kctl_tbl_count = 0; +static struct kctl **kctl_table = NULL; +static uintptr_t kctl_ref_gencnt = 0; + +static void kctl_tbl_grow(void); +static kern_ctl_ref kctl_make_ref(struct kctl *kctl); +static void kctl_delete_ref(kern_ctl_ref); +static struct kctl *kctl_from_ref(kern_ctl_ref); + /* * Install the protosw's for the Kernel Control manager. */ @@ -192,6 +246,7 @@ kern_control_init(struct domain *dp) { struct protosw *pr; int i; + int kctl_proto_count = (sizeof (kctlsw) / sizeof (struct protosw)); VERIFY(!(dp->dom_flags & DOM_INITIALIZED)); VERIFY(dp == systemdomain); @@ -302,27 +357,46 @@ ctl_detach(struct socket *so) if (kcb == 0) return (0); + if (kcb->kctl != NULL && kcb->kctl->bind != NULL && + kcb->userdata != NULL && !(so->so_state & SS_ISCONNECTED)) { + // The unit was bound, but not connected + // Invoke the disconnected call to cleanup + if (kcb->kctl->disconnect != NULL) { + socket_unlock(so, 0); + (*kcb->kctl->disconnect)(kcb->kctl->kctlref, + kcb->sac.sc_unit, kcb->userdata); + socket_lock(so, 0); + } + } + soisdisconnected(so); so->so_flags |= SOF_PCBCLEARING; return (0); } - static int -ctl_connect(struct socket *so, struct sockaddr *nam, struct proc *p) +ctl_setup_kctl(struct socket *so, struct sockaddr *nam, struct proc *p) { -#pragma unused(p) - struct kctl *kctl; - int error = 0; + struct kctl *kctl = NULL; + int error = 0; struct sockaddr_ctl sa; - struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; - struct ctl_cb *kcb_next = NULL; + struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; + struct ctl_cb *kcb_next = NULL; + u_quad_t sbmaxsize; + u_int32_t recvbufsize, sendbufsize; - if (kcb == 0) - panic("ctl_connect so_pcb null\n"); + if (kcb == 0) { + panic("ctl_setup_kctl so_pcb null\n"); + } + + if (kcb->kctl != NULL) { + // Already set up, skip + return (0); + } - if (nam->sa_len != sizeof(struct sockaddr_ctl)) + if (nam->sa_len != sizeof(struct sockaddr_ctl)) { return (EINVAL); + } bcopy(nam, &sa, sizeof(struct sockaddr_ctl)); @@ -334,12 +408,12 @@ ctl_connect(struct socket *so, struct sockaddr *nam, struct proc *p) } if (((kctl->flags & CTL_FLAG_REG_SOCK_STREAM) && - (so->so_type != SOCK_STREAM)) || + (so->so_type != SOCK_STREAM)) || (!(kctl->flags & CTL_FLAG_REG_SOCK_STREAM) && - (so->so_type != SOCK_DGRAM))) { - lck_mtx_unlock(ctl_mtx); - return (EPROTOTYPE); - } + (so->so_type != SOCK_DGRAM))) { + lck_mtx_unlock(ctl_mtx); + return (EPROTOTYPE); + } if (kctl->flags & CTL_FLAG_PRIVILEGED) { if (p == 0) { @@ -359,16 +433,17 @@ ctl_connect(struct socket *so, struct sockaddr *nam, struct proc *p) } } else { /* Find an unused ID, assumes control IDs are in order */ - u_int32_t unit = 1; + u_int32_t unit = 1; TAILQ_FOREACH(kcb_next, &kctl->kcb_head, next) { - if (kcb_next->unit > unit) { + if (kcb_next->sac.sc_unit > unit) { /* Found a gap, lets fill it in */ break; } - unit = kcb_next->unit + 1; - if (unit == ctl_maxunit) + unit = kcb_next->sac.sc_unit + 1; + if (unit == ctl_maxunit) { break; + } } if (unit == ctl_maxunit) { @@ -379,7 +454,7 @@ ctl_connect(struct socket *so, struct sockaddr *nam, struct proc *p) sa.sc_unit = unit; } - kcb->unit = sa.sc_unit; + bcopy(&sa, &kcb->sac, sizeof(struct sockaddr_ctl)); kcb->kctl = kctl; if (kcb_next != NULL) { TAILQ_INSERT_BEFORE(kcb_next, kcb, next); @@ -391,36 +466,127 @@ ctl_connect(struct socket *so, struct sockaddr *nam, struct proc *p) kctlstat.kcs_connections++; lck_mtx_unlock(ctl_mtx); - error = soreserve(so, kctl->sendbufsize, kctl->recvbufsize); + /* + * rdar://15526688: Limit the send and receive sizes to sb_max + * by using the same scaling as sbreserve() + */ + sbmaxsize = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES); + + if (kctl->sendbufsize > sbmaxsize) { + sendbufsize = sbmaxsize; + } else { + sendbufsize = kctl->sendbufsize; + } + + if (kctl->recvbufsize > sbmaxsize) { + recvbufsize = sbmaxsize; + } else { + recvbufsize = kctl->recvbufsize; + } + + error = soreserve(so, sendbufsize, recvbufsize); if (error) { - printf("%s - soreserve(%llx, %u, %u) error %d\n", __func__, - (uint64_t)VM_KERNEL_ADDRPERM(so), - kctl->sendbufsize, kctl->recvbufsize, error); + if (ctl_debug) + printf("%s - soreserve(%llx, %u, %u) error %d\n", + __func__, (uint64_t)VM_KERNEL_ADDRPERM(so), + sendbufsize, recvbufsize, error); goto done; } - soisconnecting(so); + +done: + if (error) { + soisdisconnected(so); + lck_mtx_lock(ctl_mtx); + TAILQ_REMOVE(&kctl->kcb_head, kcb, next); + kcb->kctl = NULL; + kcb->sac.sc_unit = 0; + kctlstat.kcs_pcbcount--; + kctlstat.kcs_gencnt++; + kctlstat.kcs_conn_fail++; + lck_mtx_unlock(ctl_mtx); + } + return (error); +} + +static int +ctl_bind(struct socket *so, struct sockaddr *nam, struct proc *p) +{ + int error = 0; + struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; + + if (kcb == NULL) { + panic("ctl_bind so_pcb null\n"); + } + + error = ctl_setup_kctl(so, nam, p); + if (error) { + return (error); + } + + if (kcb->kctl == NULL) { + panic("ctl_bind kctl null\n"); + } + + if (kcb->kctl->bind == NULL) { + return (EINVAL); + } socket_unlock(so, 0); - error = (*kctl->connect)(kctl, &sa, &kcb->userdata); + error = (*kcb->kctl->bind)(kcb->kctl->kctlref, &kcb->sac, &kcb->userdata); socket_lock(so, 0); - if (error) - goto end; + return (error); +} + +static int +ctl_connect(struct socket *so, struct sockaddr *nam, struct proc *p) +{ + int error = 0; + struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; + + if (kcb == NULL) { + panic("ctl_connect so_pcb null\n"); + } + + error = ctl_setup_kctl(so, nam, p); + if (error) { + return (error); + } + + if (kcb->kctl == NULL) { + panic("ctl_connect kctl null\n"); + } + + soisconnecting(so); + socket_unlock(so, 0); + error = (*kcb->kctl->connect)(kcb->kctl->kctlref, &kcb->sac, &kcb->userdata); + socket_lock(so, 0); + if (error) { + goto end; + } soisconnected(so); end: - if (error && kctl->disconnect) { + if (error && kcb->kctl->disconnect) { + /* + * XXX Make sure we Don't check the return value + * of disconnect here. + * ipsec/utun_ctl_disconnect will return error when + * disconnect gets called after connect failure. + * However if we decide to check for disconnect return + * value here. Please make sure to revisit + * ipsec/utun_ctl_disconnect. + */ socket_unlock(so, 0); - (*kctl->disconnect)(kctl, kcb->unit, kcb->userdata); + (*kcb->kctl->disconnect)(kcb->kctl->kctlref, kcb->sac.sc_unit, kcb->userdata); socket_lock(so, 0); } -done: if (error) { soisdisconnected(so); lck_mtx_lock(ctl_mtx); - kcb->kctl = 0; - kcb->unit = 0; - TAILQ_REMOVE(&kctl->kcb_head, kcb, next); + TAILQ_REMOVE(&kcb->kctl->kcb_head, kcb, next); + kcb->kctl = NULL; + kcb->sac.sc_unit = 0; kctlstat.kcs_pcbcount--; kctlstat.kcs_gencnt++; kctlstat.kcs_conn_fail++; @@ -439,7 +605,8 @@ ctl_disconnect(struct socket *so) if (kctl && kctl->disconnect) { socket_unlock(so, 0); - (*kctl->disconnect)(kctl, kcb->unit, kcb->userdata); + (*kctl->disconnect)(kctl->kctlref, kcb->sac.sc_unit, + kcb->userdata); socket_lock(so, 0); } @@ -448,7 +615,7 @@ ctl_disconnect(struct socket *so) socket_unlock(so, 0); lck_mtx_lock(ctl_mtx); kcb->kctl = 0; - kcb->unit = 0; + kcb->sac.sc_unit = 0; while (kcb->usecount != 0) { msleep(&kcb->usecount, ctl_mtx, 0, "kcb->usecount", 0); } @@ -479,7 +646,7 @@ ctl_peeraddr(struct socket *so, struct sockaddr **nam) sc.sc_family = AF_SYSTEM; sc.ss_sysaddr = AF_SYS_CONTROL; sc.sc_id = kctl->id; - sc.sc_unit = kcb->unit; + sc.sc_unit = kcb->sac.sc_unit; *nam = dup_sockaddr((struct sockaddr *)&sc, 1); @@ -529,7 +696,7 @@ ctl_usr_rcvd(struct socket *so, int flags) if (kctl->rcvd) { socket_unlock(so, 0); - (*kctl->rcvd)(kctl, kcb->unit, kcb->userdata, flags); + (*kctl->rcvd)(kctl->kctlref, kcb->sac.sc_unit, kcb->userdata, flags); socket_lock(so, 0); } @@ -560,7 +727,8 @@ ctl_send(struct socket *so, int flags, struct mbuf *m, if (error == 0 && kctl->send) { so_tc_update_stats(m, so, m_get_service_class(m)); socket_unlock(so, 0); - error = (*kctl->send)(kctl, kcb->unit, kcb->userdata, m, flags); + error = (*kctl->send)(kctl->kctlref, kcb->sac.sc_unit, kcb->userdata, + m, flags); socket_lock(so, 0); } else { m_freem(m); @@ -597,8 +765,8 @@ ctl_send_list(struct socket *so, int flags, struct mbuf *m, so_tc_update_stats(nxt, so, m_get_service_class(nxt)); socket_unlock(so, 0); - error = (*kctl->send_list)(kctl, kcb->unit, kcb->userdata, m, - flags); + error = (*kctl->send_list)(kctl->kctlref, kcb->sac.sc_unit, + kcb->userdata, m, flags); socket_lock(so, 0); } else if (error == 0 && kctl->send) { while (m != NULL && error == 0) { @@ -607,8 +775,8 @@ ctl_send_list(struct socket *so, int flags, struct mbuf *m, m->m_nextpkt = NULL; so_tc_update_stats(m, so, m_get_service_class(m)); socket_unlock(so, 0); - error = (*kctl->send)(kctl, kcb->unit, kcb->userdata, m, - flags); + error = (*kctl->send)(kctl->kctlref, kcb->sac.sc_unit, + kcb->userdata, m, flags); socket_lock(so, 0); m = nextpkt; } @@ -625,27 +793,27 @@ ctl_send_list(struct socket *so, int flags, struct mbuf *m, } static errno_t -ctl_rcvbspace(struct kctl *kctl, struct socket *so, u_int32_t datasize, - u_int32_t flags) +ctl_rcvbspace(struct socket *so, u_int32_t datasize, + u_int32_t kctlflags, u_int32_t flags) { struct sockbuf *sb = &so->so_rcv; u_int32_t space = sbspace(sb); errno_t error; - - if ((kctl->flags & CTL_FLAG_REG_CRIT) == 0) { + + if ((kctlflags & CTL_FLAG_REG_CRIT) == 0) { if ((u_int32_t) space >= datasize) error = 0; else error = ENOBUFS; } else if ((flags & CTL_DATA_CRIT) == 0) { - /* - * Reserve 25% for critical messages - */ - if (space < (sb->sb_hiwat >> 2) || - space < datasize) - error = ENOBUFS; - else - error = 0; + /* + * Reserve 25% for critical messages + */ + if (space < (sb->sb_hiwat >> 2) || + space < datasize) + error = ENOBUFS; + else + error = 0; } else { u_int32_t autorcvbuf_max; @@ -670,10 +838,18 @@ ctl_rcvbspace(struct kctl *kctl, struct socket *so, u_int32_t datasize, if (sb->sb_hiwat > ctl_autorcvbuf_high) ctl_autorcvbuf_high = sb->sb_hiwat; + /* + * A final check + */ + if ((u_int32_t) sbspace(sb) >= datasize) { + error = 0; + } else { + error = ENOBUFS; + } + if (ctl_debug) - printf("%s - grown to %d\n", - __func__, sb->sb_hiwat); - error = 0; + printf("%s - grown to %d error %d\n", + __func__, sb->sb_hiwat, error); } else { error = ENOBUFS; } @@ -685,22 +861,20 @@ ctl_rcvbspace(struct kctl *kctl, struct socket *so, u_int32_t datasize, } errno_t -ctl_enqueuembuf(void *kctlref, u_int32_t unit, struct mbuf *m, u_int32_t flags) +ctl_enqueuembuf(kern_ctl_ref kctlref, u_int32_t unit, struct mbuf *m, + u_int32_t flags) { struct socket *so; errno_t error = 0; - struct kctl *kctl = (struct kctl *)kctlref; int len = m->m_pkthdr.len; + u_int32_t kctlflags; - if (kctl == NULL) - return (EINVAL); - - so = kcb_find_socket(kctl, unit); - - if (so == NULL) + so = kcb_find_socket(kctlref, unit, &kctlflags); + if (so == NULL) { return (EINVAL); + } - if (ctl_rcvbspace(kctl, so, len, flags) != 0) { + if (ctl_rcvbspace(so, len, kctlflags, flags) != 0) { error = ENOBUFS; OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_enqueue_fullsock); goto bye; @@ -750,21 +924,26 @@ ctl_enqueuembuf_list(void *kctlref, u_int32_t unit, struct mbuf *m_list, { struct socket *so = NULL; errno_t error = 0; - struct kctl *kctl = (struct kctl *)kctlref; struct mbuf *m, *nextpkt; int needwakeup = 0; - int len; + int len = 0; + u_int32_t kctlflags; /* * Need to point the beginning of the list in case of early exit */ m = m_list; - if (kctl == NULL) { + /* + * kcb_find_socket takes the socket lock with a reference + */ + so = kcb_find_socket(kctlref, unit, &kctlflags); + if (so == NULL) { error = EINVAL; goto done; } - if (kctl->flags & CTL_FLAG_REG_SOCK_STREAM) { + + if (kctlflags & CTL_FLAG_REG_SOCK_STREAM) { error = EOPNOTSUPP; goto done; } @@ -772,19 +951,11 @@ ctl_enqueuembuf_list(void *kctlref, u_int32_t unit, struct mbuf *m_list, error = EINVAL; goto done; } - /* - * kcb_find_socket takes the socket lock with a reference - */ - so = kcb_find_socket(kctl, unit); - if (so == NULL) { - error = EINVAL; - goto done; - } for (m = m_list; m != NULL; m = nextpkt) { nextpkt = m->m_nextpkt; - if (m->m_pkthdr.len == 0) + if (m->m_pkthdr.len == 0 && ctl_debug) printf("%s: %llx m_pkthdr.len is 0", __func__, (uint64_t)VM_KERNEL_ADDRPERM(m)); @@ -793,7 +964,7 @@ ctl_enqueuembuf_list(void *kctlref, u_int32_t unit, struct mbuf *m_list, * so it's not reliable from a data standpoint */ len = m_space(m); - if (ctl_rcvbspace(kctl, so, len, flags) != 0) { + if (ctl_rcvbspace(so, len, kctlflags, flags) != 0) { error = ENOBUFS; OSIncrementAtomic64( (SInt64 *)&kctlstat.kcs_enqueue_fullsock); @@ -861,19 +1032,17 @@ ctl_enqueuedata(void *kctlref, u_int32_t unit, void *data, size_t len, struct socket *so; struct mbuf *m; errno_t error = 0; - struct kctl *kctl = (struct kctl *)kctlref; unsigned int num_needed; struct mbuf *n; size_t curlen = 0; + u_int32_t kctlflags; - if (kctlref == NULL) - return (EINVAL); - - so = kcb_find_socket(kctl, unit); - if (so == NULL) + so = kcb_find_socket(kctlref, unit, &kctlflags); + if (so == NULL) { return (EINVAL); + } - if (ctl_rcvbspace(kctl, so, len, flags) != 0) { + if (ctl_rcvbspace(so, len, kctlflags, flags) != 0) { error = ENOBUFS; OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_enqueue_fullsock); goto bye; @@ -882,8 +1051,10 @@ ctl_enqueuedata(void *kctlref, u_int32_t unit, void *data, size_t len, num_needed = 1; m = m_allocpacket_internal(&num_needed, len, NULL, M_NOWAIT, 1, 0); if (m == NULL) { - printf("ctl_enqueuedata: m_allocpacket_internal(%lu) failed\n", - len); + kctlstat.kcs_enqdata_mb_alloc_fail++; + if (ctl_debug) + printf("%s: m_allocpacket_internal(%lu) failed\n", + __func__, len); error = ENOMEM; goto bye; } @@ -906,6 +1077,7 @@ ctl_enqueuedata(void *kctlref, u_int32_t unit, void *data, size_t len, if ((flags & CTL_DATA_NOWAKEUP) == 0) sorwakeup(so); } else { + kctlstat.kcs_enqdata_sbappend_fail++; error = ENOBUFS; OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_enqueue_fullsock); } @@ -922,20 +1094,50 @@ bye: return (error); } +errno_t +ctl_getenqueuepacketcount(kern_ctl_ref kctlref, u_int32_t unit, u_int32_t *pcnt) +{ + struct socket *so; + u_int32_t cnt; + struct mbuf *m1; + + if (pcnt == NULL) + return (EINVAL); + + so = kcb_find_socket(kctlref, unit, NULL); + if (so == NULL) { + return (EINVAL); + } + + cnt = 0; + m1 = so->so_rcv.sb_mb; + while (m1 != NULL) { + if (m1->m_type == MT_DATA || + m1->m_type == MT_HEADER || + m1->m_type == MT_OOBDATA) + cnt += 1; + m1 = m1->m_nextpkt; + } + *pcnt = cnt; + + socket_unlock(so, 1); + + return (0); +} errno_t ctl_getenqueuespace(kern_ctl_ref kctlref, u_int32_t unit, size_t *space) { - struct kctl *kctl = (struct kctl *)kctlref; struct socket *so; long avail; - if (kctlref == NULL || space == NULL) + if (space == NULL) return (EINVAL); - so = kcb_find_socket(kctl, unit); - if (so == NULL) + so = kcb_find_socket(kctlref, unit, NULL); + if (so == NULL) { return (EINVAL); + } avail = sbspace(&so->so_rcv); *space = (avail < 0) ? 0 : avail; @@ -948,15 +1150,15 @@ errno_t ctl_getenqueuereadable(kern_ctl_ref kctlref, u_int32_t unit, u_int32_t *difference) { - struct kctl *kctl = (struct kctl *)kctlref; struct socket *so; - if (kctlref == NULL || difference == NULL) + if (difference == NULL) return (EINVAL); - so = kcb_find_socket(kctl, unit); - if (so == NULL) + so = kcb_find_socket(kctlref, unit, NULL); + if (so == NULL) { return (EINVAL); + } if (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat) { *difference = 0; @@ -974,7 +1176,7 @@ ctl_ctloutput(struct socket *so, struct sockopt *sopt) struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; struct kctl *kctl; int error = 0; - void *data; + void *data = NULL; size_t len; if (sopt->sopt_level != SYSPROTO_CONTROL) { @@ -991,36 +1193,33 @@ ctl_ctloutput(struct socket *so, struct sockopt *sopt) case SOPT_SET: if (kctl->setopt == NULL) return (ENOTSUP); - if (sopt->sopt_valsize == 0) { - data = NULL; - } else { + if (sopt->sopt_valsize != 0) { MALLOC(data, void *, sopt->sopt_valsize, M_TEMP, - M_WAITOK); + M_WAITOK | M_ZERO); if (data == NULL) return (ENOMEM); error = sooptcopyin(sopt, data, - sopt->sopt_valsize, - sopt->sopt_valsize); + sopt->sopt_valsize, sopt->sopt_valsize); } if (error == 0) { socket_unlock(so, 0); - error = (*kctl->setopt)(kcb->kctl, kcb->unit, - kcb->userdata, - sopt->sopt_name, - data, - sopt->sopt_valsize); + error = (*kctl->setopt)(kctl->kctlref, + kcb->sac.sc_unit, kcb->userdata, sopt->sopt_name, + data, sopt->sopt_valsize); socket_lock(so, 0); } - FREE(data, M_TEMP); + + if (data != NULL) + FREE(data, M_TEMP); break; case SOPT_GET: if (kctl->getopt == NULL) return (ENOTSUP); - data = NULL; + if (sopt->sopt_valsize && sopt->sopt_val) { MALLOC(data, void *, sopt->sopt_valsize, M_TEMP, - M_WAITOK); + M_WAITOK | M_ZERO); if (data == NULL) return (ENOMEM); /* @@ -1030,22 +1229,25 @@ ctl_ctloutput(struct socket *so, struct sockopt *sopt) error = sooptcopyin(sopt, data, sopt->sopt_valsize, sopt->sopt_valsize); } - len = sopt->sopt_valsize; - socket_unlock(so, 0); - error = (*kctl->getopt)(kcb->kctl, kcb->unit, - kcb->userdata, sopt->sopt_name, - data, &len); - if (data != NULL && len > sopt->sopt_valsize) - panic_plain("ctl_ctloutput: ctl %s returned " - "len (%lu) > sopt_valsize (%lu)\n", - kcb->kctl->name, len, - sopt->sopt_valsize); - socket_lock(so, 0); + if (error == 0) { - if (data != NULL) - error = sooptcopyout(sopt, data, len); - else - sopt->sopt_valsize = len; + len = sopt->sopt_valsize; + socket_unlock(so, 0); + error = (*kctl->getopt)(kctl->kctlref, kcb->sac.sc_unit, + kcb->userdata, sopt->sopt_name, + data, &len); + if (data != NULL && len > sopt->sopt_valsize) + panic_plain("ctl_ctloutput: ctl %s returned " + "len (%lu) > sopt_valsize (%lu)\n", + kcb->kctl->name, len, + sopt->sopt_valsize); + socket_lock(so, 0); + if (error == 0) { + if (data != NULL) + error = sooptcopyout(sopt, data, len); + else + sopt->sopt_valsize = len; + } } if (data != NULL) FREE(data, M_TEMP); @@ -1108,6 +1310,159 @@ ctl_ioctl(struct socket *so, u_long cmd, caddr_t data, return (error); } +static void +kctl_tbl_grow() +{ + struct kctl **new_table; + uintptr_t new_size; + + lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_OWNED); + + if (kctl_tbl_growing) { + /* Another thread is allocating */ + kctl_tbl_growing_waiting++; + + do { + (void) msleep((caddr_t) &kctl_tbl_growing, ctl_mtx, + PSOCK | PCATCH, "kctl_tbl_growing", 0); + } while (kctl_tbl_growing); + kctl_tbl_growing_waiting--; + } + /* Another thread grew the table */ + if (kctl_table != NULL && kctl_tbl_count < kctl_tbl_size) + return; + + /* Verify we have a sane size */ + if (kctl_tbl_size + KCTL_TBL_INC >= UINT16_MAX) { + kctlstat.kcs_tbl_size_too_big++; + if (ctl_debug) + printf("%s kctl_tbl_size %lu too big\n", + __func__, kctl_tbl_size); + return; + } + kctl_tbl_growing = 1; + + new_size = kctl_tbl_size + KCTL_TBL_INC; + + lck_mtx_unlock(ctl_mtx); + new_table = _MALLOC(sizeof(struct kctl *) * new_size, + M_TEMP, M_WAIT | M_ZERO); + lck_mtx_lock(ctl_mtx); + + if (new_table != NULL) { + if (kctl_table != NULL) { + bcopy(kctl_table, new_table, + kctl_tbl_size * sizeof(struct kctl *)); + + _FREE(kctl_table, M_TEMP); + } + kctl_table = new_table; + kctl_tbl_size = new_size; + } + + kctl_tbl_growing = 0; + + if (kctl_tbl_growing_waiting) { + wakeup(&kctl_tbl_growing); + } +} + +#define KCTLREF_INDEX_MASK 0x0000FFFF +#define KCTLREF_GENCNT_MASK 0xFFFF0000 +#define KCTLREF_GENCNT_SHIFT 16 + +static kern_ctl_ref +kctl_make_ref(struct kctl *kctl) +{ + uintptr_t i; + + lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_OWNED); + + if (kctl_tbl_count >= kctl_tbl_size) + kctl_tbl_grow(); + + kctl->kctlref = NULL; + for (i = 0; i < kctl_tbl_size; i++) { + if (kctl_table[i] == NULL) { + uintptr_t ref; + + /* + * Reference is index plus one + */ + kctl_ref_gencnt += 1; + + /* + * Add generation count as salt to reference to prevent + * use after deregister + */ + ref = ((kctl_ref_gencnt << KCTLREF_GENCNT_SHIFT) & + KCTLREF_GENCNT_MASK) + + ((i + 1) & KCTLREF_INDEX_MASK); + + kctl->kctlref = (void *)(ref); + kctl_table[i] = kctl; + kctl_tbl_count++; + break; + } + } + + if (kctl->kctlref == NULL) + panic("%s no space in table", __func__); + + if (ctl_debug > 0) + printf("%s %p for %p\n", + __func__, kctl->kctlref, kctl); + + return (kctl->kctlref); +} + +static void +kctl_delete_ref(kern_ctl_ref kctlref) +{ + /* + * Reference is index plus one + */ + uintptr_t i = (((uintptr_t)kctlref) & KCTLREF_INDEX_MASK) - 1; + + lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_OWNED); + + if (i < kctl_tbl_size) { + struct kctl *kctl = kctl_table[i]; + + if (kctl->kctlref == kctlref) { + kctl_table[i] = NULL; + kctl_tbl_count--; + } else { + kctlstat.kcs_bad_kctlref++; + } + } else { + kctlstat.kcs_bad_kctlref++; + } +} + +static struct kctl * +kctl_from_ref(kern_ctl_ref kctlref) +{ + /* + * Reference is index plus one + */ + uintptr_t i = (((uintptr_t)kctlref) & KCTLREF_INDEX_MASK) - 1; + struct kctl *kctl = NULL; + + lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_OWNED); + + if (i >= kctl_tbl_size) { + kctlstat.kcs_bad_kctlref++; + return (NULL); + } + kctl = kctl_table[i]; + if (kctl->kctlref != kctlref) { + kctlstat.kcs_bad_kctlref++; + return (NULL); + } + return (kctl); +} + /* * Register/unregister a NKE */ @@ -1116,10 +1471,9 @@ ctl_register(struct kern_ctl_reg *userkctl, kern_ctl_ref *kctlref) { struct kctl *kctl = NULL; struct kctl *kctl_next = NULL; - u_int32_t id = 1; - size_t name_len; - int is_extended = 0; - u_quad_t sbmaxsize; + u_int32_t id = 1; + size_t name_len; + int is_extended = 0; if (userkctl == NULL) /* sanity check */ return (EINVAL); @@ -1136,6 +1490,12 @@ ctl_register(struct kern_ctl_reg *userkctl, kern_ctl_ref *kctlref) lck_mtx_lock(ctl_mtx); + if (kctl_make_ref(kctl) == NULL) { + lck_mtx_unlock(ctl_mtx); + FREE(kctl, M_TEMP); + return (ENOMEM); + } + /* * Kernel Control IDs * @@ -1152,6 +1512,7 @@ ctl_register(struct kern_ctl_reg *userkctl, kern_ctl_ref *kctlref) /* Verify the same name isn't already registered */ if (ctl_find_by_name(userkctl->ctl_name) != NULL) { + kctl_delete_ref(kctl->kctlref); lck_mtx_unlock(ctl_mtx); FREE(kctl, M_TEMP); return (EEXIST); @@ -1195,6 +1556,7 @@ ctl_register(struct kern_ctl_reg *userkctl, kern_ctl_ref *kctlref) } if (ctl_find_by_id_unit(userkctl->ctl_id, userkctl->ctl_unit)) { + kctl_delete_ref(kctl->kctlref); lck_mtx_unlock(ctl_mtx); FREE(kctl, M_TEMP); return (EEXIST); @@ -1210,28 +1572,21 @@ ctl_register(struct kern_ctl_reg *userkctl, kern_ctl_ref *kctlref) /* * Let the caller know the default send and receive sizes - * - * rdar://15526688: Limit the send and receive sizes to sb_max - * by using the same scaling as sbreserve() */ - sbmaxsize = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES); - - if (userkctl->ctl_sendsize == 0) + if (userkctl->ctl_sendsize == 0) { kctl->sendbufsize = CTL_SENDSIZE; - else if (userkctl->ctl_sendsize > sbmaxsize) - kctl->sendbufsize = sbmaxsize; - else - kctl->sendbufsize = userkctl->ctl_sendsize; - userkctl->ctl_sendsize = kctl->sendbufsize; - - if (userkctl->ctl_recvsize == 0) + userkctl->ctl_sendsize = kctl->sendbufsize; + } else { + kctl->sendbufsize = userkctl->ctl_sendsize; + } + if (userkctl->ctl_recvsize == 0) { kctl->recvbufsize = CTL_RECVSIZE; - else if (userkctl->ctl_recvsize > sbmaxsize) - kctl->recvbufsize = sbmaxsize; - else - kctl->recvbufsize = userkctl->ctl_recvsize; - userkctl->ctl_recvsize = kctl->recvbufsize; + userkctl->ctl_recvsize = kctl->recvbufsize; + } else { + kctl->recvbufsize = userkctl->ctl_recvsize; + } + kctl->bind = userkctl->ctl_bind; kctl->connect = userkctl->ctl_connect; kctl->disconnect = userkctl->ctl_disconnect; kctl->send = userkctl->ctl_send; @@ -1254,7 +1609,7 @@ ctl_register(struct kern_ctl_reg *userkctl, kern_ctl_ref *kctlref) lck_mtx_unlock(ctl_mtx); - *kctlref = kctl; + *kctlref = kctl->kctlref; ctl_post_msg(KEV_CTL_REGISTERED, kctl->id); return (0); @@ -1265,18 +1620,16 @@ ctl_deregister(void *kctlref) { struct kctl *kctl; - if (kctlref == NULL) /* sanity check */ - return (EINVAL); - lck_mtx_lock(ctl_mtx); - TAILQ_FOREACH(kctl, &ctl_head, next) { - if (kctl == (struct kctl *)kctlref) - break; - } - if (kctl != (struct kctl *)kctlref) { + if ((kctl = kctl_from_ref(kctlref)) == NULL) { + kctlstat.kcs_bad_kctlref++; lck_mtx_unlock(ctl_mtx); + if (ctl_debug != 0) + printf("%s invalid kctlref %p\n", + __func__, kctlref); return (EINVAL); } + if (!TAILQ_EMPTY(&kctl->kcb_head)) { lck_mtx_unlock(ctl_mtx); return (EBUSY); @@ -1287,6 +1640,7 @@ ctl_deregister(void *kctlref) kctlstat.kcs_reg_count--; kctlstat.kcs_gencnt++; + kctl_delete_ref(kctl->kctlref); lck_mtx_unlock(ctl_mtx); ctl_post_msg(KEV_CTL_DEREGISTERED, kctl->id); @@ -1338,7 +1692,7 @@ ctl_name_by_id(u_int32_t id, char *out_name, size_t maxsize) break; } - if (kctl && kctl->name) { + if (kctl) { if (maxsize > MAX_KCTL_NAME) maxsize = MAX_KCTL_NAME; strlcpy(out_name, kctl->name, maxsize); @@ -1380,55 +1734,73 @@ kcb_find(struct kctl *kctl, u_int32_t unit) lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_OWNED); TAILQ_FOREACH(kcb, &kctl->kcb_head, next) - if (kcb->unit == unit) + if (kcb->sac.sc_unit == unit) return (kcb); return (NULL); } static struct socket * -kcb_find_socket(struct kctl *kctl, u_int32_t unit) +kcb_find_socket(kern_ctl_ref kctlref, u_int32_t unit, u_int32_t *kctlflags) { struct socket *so = NULL; struct ctl_cb *kcb; void *lr_saved; + struct kctl *kctl; + int i; lr_saved = __builtin_return_address(0); lck_mtx_lock(ctl_mtx); - kcb = kcb_find(kctl, unit); - if (kcb && kcb->kctl == kctl) { - so = kcb->so; - if (so) { - kcb->usecount++; - } + /* + * First validate the kctlref + */ + if ((kctl = kctl_from_ref(kctlref)) == NULL) { + kctlstat.kcs_bad_kctlref++; + lck_mtx_unlock(ctl_mtx); + if (ctl_debug != 0) + printf("%s invalid kctlref %p\n", + __func__, kctlref); + return (NULL); } - lck_mtx_unlock(ctl_mtx); - if (so == NULL) { + kcb = kcb_find(kctl, unit); + if (kcb == NULL || kcb->kctl != kctl || (so = kcb->so) == NULL) { + lck_mtx_unlock(ctl_mtx); return (NULL); } + /* + * This prevents the socket from being closed + */ + kcb->usecount++; + /* + * Respect lock ordering: socket before ctl_mtx + */ + lck_mtx_unlock(ctl_mtx); socket_lock(so, 1); + /* + * The socket lock history is more useful if we store + * the address of the caller. + */ + i = (so->next_lock_lr + SO_LCKDBG_MAX - 1) % SO_LCKDBG_MAX; + so->lock_lr[i] = lr_saved; lck_mtx_lock(ctl_mtx); - if (kcb->kctl == NULL) { + + if ((kctl = kctl_from_ref(kctlref)) == NULL || kcb->kctl == NULL) { lck_mtx_unlock(ctl_mtx); socket_unlock(so, 1); so = NULL; lck_mtx_lock(ctl_mtx); - } else { - /* - * The socket lock history is more useful if we store - * the address of the caller. - */ - int i = (so->next_lock_lr + SO_LCKDBG_MAX - 1) % SO_LCKDBG_MAX; - - so->lock_lr[i] = lr_saved; + } else if (kctlflags != NULL) { + *kctlflags = kctl->flags; } + kcb->usecount--; if (kcb->usecount == 0) wakeup((event_t)&kcb->usecount); + lck_mtx_unlock(ctl_mtx); return (so); @@ -1504,13 +1876,13 @@ ctl_unlock(struct socket *so, int refcount, void *lr) else lr_saved = lr; -#ifdef MORE_KCTLLOCK_DEBUG +#if (MORE_KCTLLOCK_DEBUG && (DEVELOPMENT || DEBUG)) printf("ctl_unlock: so=%llx sopcb=%x lock=%llx ref=%u lr=%llx\n", (uint64_t)VM_KERNEL_ADDRPERM(so), (uint64_t)VM_KERNEL_ADDRPERM(so->so_pcb, (uint64_t)VM_KERNEL_ADDRPERM(((struct ctl_cb *)so->so_pcb)->mtx), so->so_usecount, (uint64_t)VM_KERNEL_ADDRPERM(lr_saved)); -#endif +#endif /* (MORE_KCTLLOCK_DEBUG && (DEVELOPMENT || DEBUG)) */ if (refcount) so->so_usecount--; @@ -1539,9 +1911,9 @@ ctl_unlock(struct socket *so, int refcount, void *lr) } static lck_mtx_t * -ctl_getlock(struct socket *so, int locktype) +ctl_getlock(struct socket *so, int flags) { -#pragma unused(locktype) +#pragma unused(flags) struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; if (so->so_pcb) { @@ -1617,20 +1989,20 @@ kctl_reg_list SYSCTL_HANDLER_ARGS xkr->xkr_id = kctl->id; xkr->xkr_reg_unit = kctl->reg_unit; xkr->xkr_flags = kctl->flags; - xkr->xkr_kctlref = (uint64_t)VM_KERNEL_ADDRPERM(kctl); + xkr->xkr_kctlref = (uint64_t)(kctl->kctlref); xkr->xkr_recvbufsize = kctl->recvbufsize; xkr->xkr_sendbufsize = kctl->sendbufsize; xkr->xkr_lastunit = kctl->lastunit; xkr->xkr_pcbcount = pcbcount; - xkr->xkr_connect = (uint64_t)VM_KERNEL_ADDRPERM(kctl->connect); + xkr->xkr_connect = (uint64_t)VM_KERNEL_UNSLIDE(kctl->connect); xkr->xkr_disconnect = - (uint64_t)VM_KERNEL_ADDRPERM(kctl->disconnect); - xkr->xkr_send = (uint64_t)VM_KERNEL_ADDRPERM(kctl->send); + (uint64_t)VM_KERNEL_UNSLIDE(kctl->disconnect); + xkr->xkr_send = (uint64_t)VM_KERNEL_UNSLIDE(kctl->send); xkr->xkr_send_list = - (uint64_t)VM_KERNEL_ADDRPERM(kctl->send_list); - xkr->xkr_setopt = (uint64_t)VM_KERNEL_ADDRPERM(kctl->setopt); - xkr->xkr_getopt = (uint64_t)VM_KERNEL_ADDRPERM(kctl->getopt); - xkr->xkr_rcvd = (uint64_t)VM_KERNEL_ADDRPERM(kctl->rcvd); + (uint64_t)VM_KERNEL_UNSLIDE(kctl->send_list); + xkr->xkr_setopt = (uint64_t)VM_KERNEL_UNSLIDE(kctl->setopt); + xkr->xkr_getopt = (uint64_t)VM_KERNEL_UNSLIDE(kctl->getopt); + xkr->xkr_rcvd = (uint64_t)VM_KERNEL_UNSLIDE(kctl->rcvd); strlcpy(xkr->xkr_name, kctl->name, sizeof(xkr->xkr_name)); error = SYSCTL_OUT(req, buf, item_size); @@ -1733,7 +2105,7 @@ kctl_pcblist SYSCTL_HANDLER_ARGS xk->xkp_len = sizeof(struct xkctlpcb); xk->xkp_kind = XSO_KCB; - xk->xkp_unit = kcb->unit; + xk->xkp_unit = kcb->sac.sc_unit; xk->xkp_kctpcb = (uint64_t)VM_KERNEL_ADDRPERM(kcb); xk->xkp_kctlref = (uint64_t)VM_KERNEL_ADDRPERM(kctl); xk->xkp_kctlid = kctl->id; @@ -1799,3 +2171,25 @@ done: lck_mtx_unlock(ctl_mtx); return (error); } + +void +kctl_fill_socketinfo(struct socket *so, struct socket_info *si) +{ + struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; + struct kern_ctl_info *kcsi = + &si->soi_proto.pri_kern_ctl; + struct kctl *kctl = kcb->kctl; + + si->soi_kind = SOCKINFO_KERN_CTL; + + if (kctl == 0) + return; + + kcsi->kcsi_id = kctl->id; + kcsi->kcsi_reg_unit = kctl->reg_unit; + kcsi->kcsi_flags = kctl->flags; + kcsi->kcsi_recvbufsize = kctl->recvbufsize; + kcsi->kcsi_sendbufsize = kctl->sendbufsize; + kcsi->kcsi_unit = kcb->sac.sc_unit; + strlcpy(kcsi->kcsi_name, kctl->name, MAX_KCTL_NAME); +}