+int
+sopoll(struct socket *so, int events, kauth_cred_t cred, void * wql)
+{
+#pragma unused(cred)
+ struct proc *p = current_proc();
+ int revents = 0;
+
+ socket_lock(so, 1);
+ so_update_last_owner_locked(so, PROC_NULL);
+ so_update_policy(so);
+
+ if (events & (POLLIN | POLLRDNORM))
+ if (soreadable(so))
+ revents |= events & (POLLIN | POLLRDNORM);
+
+ if (events & (POLLOUT | POLLWRNORM))
+ if (sowriteable(so))
+ revents |= events & (POLLOUT | POLLWRNORM);
+
+ if (events & (POLLPRI | POLLRDBAND))
+ if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
+ revents |= events & (POLLPRI | POLLRDBAND);
+
+ if (revents == 0) {
+ if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
+ /*
+ * Darwin sets the flag first,
+ * BSD calls selrecord first
+ */
+ so->so_rcv.sb_flags |= SB_SEL;
+ selrecord(p, &so->so_rcv.sb_sel, wql);
+ }
+
+ if (events & (POLLOUT | POLLWRNORM)) {
+ /*
+ * Darwin sets the flag first,
+ * BSD calls selrecord first
+ */
+ so->so_snd.sb_flags |= SB_SEL;
+ selrecord(p, &so->so_snd.sb_sel, wql);
+ }
+ }
+
+ socket_unlock(so, 1);
+ return (revents);
+}
+
+int
+soo_kqfilter(struct fileproc *fp, struct knote *kn, vfs_context_t ctx)
+{
+#pragma unused(fp)
+#if !CONFIG_MACF_SOCKET
+#pragma unused(ctx)
+#endif /* MAC_SOCKET */
+ struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
+ struct klist *skl;
+
+ socket_lock(so, 1);
+ so_update_last_owner_locked(so, PROC_NULL);
+ so_update_policy(so);
+
+#if CONFIG_MACF_SOCKET
+ if (mac_socket_check_kqfilter(proc_ucred(vfs_context_proc(ctx)),
+ kn, so) != 0) {
+ socket_unlock(so, 1);
+ return (1);
+ }
+#endif /* MAC_SOCKET */
+
+ switch (kn->kn_filter) {
+ case EVFILT_READ:
+ kn->kn_fop = &soread_filtops;
+ skl = &so->so_rcv.sb_sel.si_note;
+ break;
+ case EVFILT_WRITE:
+ kn->kn_fop = &sowrite_filtops;
+ skl = &so->so_snd.sb_sel.si_note;
+ break;
+ case EVFILT_SOCK:
+ kn->kn_fop = &sock_filtops;
+ skl = &so->so_klist;
+ break;
+ default:
+ socket_unlock(so, 1);
+ return (1);
+ }
+
+ if (KNOTE_ATTACH(skl, kn)) {
+ switch (kn->kn_filter) {
+ case EVFILT_READ:
+ so->so_rcv.sb_flags |= SB_KNOTE;
+ break;
+ case EVFILT_WRITE:
+ so->so_snd.sb_flags |= SB_KNOTE;
+ break;
+ case EVFILT_SOCK:
+ so->so_flags |= SOF_KNOTE;
+ break;
+ default:
+ socket_unlock(so, 1);
+ return (1);
+ }
+ }
+ socket_unlock(so, 1);
+ return (0);
+}
+
+static void
+filt_sordetach(struct knote *kn)
+{
+ struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
+
+ socket_lock(so, 1);
+ if (so->so_rcv.sb_flags & SB_KNOTE)
+ if (KNOTE_DETACH(&so->so_rcv.sb_sel.si_note, kn))
+ so->so_rcv.sb_flags &= ~SB_KNOTE;
+ socket_unlock(so, 1);
+}
+
+/*ARGSUSED*/
+static int
+filt_soread(struct knote *kn, long hint)
+{
+ struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
+
+ if ((hint & SO_FILT_HINT_LOCKED) == 0)
+ socket_lock(so, 1);
+
+ if (so->so_options & SO_ACCEPTCONN) {
+ int isempty;
+
+ /*
+ * Radar 6615193 handle the listen case dynamically
+ * for kqueue read filter. This allows to call listen()
+ * after registering the kqueue EVFILT_READ.
+ */
+
+ kn->kn_data = so->so_qlen;
+ isempty = ! TAILQ_EMPTY(&so->so_comp);
+
+ if ((hint & SO_FILT_HINT_LOCKED) == 0)
+ socket_unlock(so, 1);
+
+ return (isempty);
+ }
+
+ /* socket isn't a listener */
+
+ kn->kn_data = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
+
+ if (so->so_oobmark) {
+ if (kn->kn_flags & EV_OOBAND) {
+ kn->kn_data -= so->so_oobmark;
+ if ((hint & SO_FILT_HINT_LOCKED) == 0)
+ socket_unlock(so, 1);
+ return (1);
+ }
+ kn->kn_data = so->so_oobmark;
+ kn->kn_flags |= EV_OOBAND;
+ } else {
+ if (so->so_state & SS_CANTRCVMORE) {
+ kn->kn_flags |= EV_EOF;
+ kn->kn_fflags = so->so_error;
+ if ((hint & SO_FILT_HINT_LOCKED) == 0)
+ socket_unlock(so, 1);
+ return (1);
+ }
+ }
+
+ if (so->so_state & SS_RCVATMARK) {
+ if (kn->kn_flags & EV_OOBAND) {
+ if ((hint & SO_FILT_HINT_LOCKED) == 0)
+ socket_unlock(so, 1);
+ return (1);
+ }
+ kn->kn_flags |= EV_OOBAND;
+ } else if (kn->kn_flags & EV_OOBAND) {
+ kn->kn_data = 0;
+ if ((hint & SO_FILT_HINT_LOCKED) == 0)
+ socket_unlock(so, 1);
+ return (0);
+ }
+
+ if (so->so_error) { /* temporary udp error */
+ if ((hint & SO_FILT_HINT_LOCKED) == 0)
+ socket_unlock(so, 1);
+ return (1);
+ }
+
+ int64_t lowwat = so->so_rcv.sb_lowat;
+ if (kn->kn_sfflags & NOTE_LOWAT) {
+ if (kn->kn_sdata > so->so_rcv.sb_hiwat)
+ lowwat = so->so_rcv.sb_hiwat;
+ else if (kn->kn_sdata > lowwat)
+ lowwat = kn->kn_sdata;
+ }
+
+ if ((hint & SO_FILT_HINT_LOCKED) == 0)
+ socket_unlock(so, 1);
+
+ return ((kn->kn_flags & EV_OOBAND) || kn->kn_data >= lowwat);
+}
+
+static void
+filt_sowdetach(struct knote *kn)
+{
+ struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
+ socket_lock(so, 1);
+
+ if (so->so_snd.sb_flags & SB_KNOTE)
+ if (KNOTE_DETACH(&so->so_snd.sb_sel.si_note, kn))
+ so->so_snd.sb_flags &= ~SB_KNOTE;
+ socket_unlock(so, 1);
+}
+
+int
+so_wait_for_if_feedback(struct socket *so)
+{
+ if ((SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) &&
+ (so->so_state & SS_ISCONNECTED)) {
+ struct inpcb *inp = sotoinpcb(so);
+ if (INP_WAIT_FOR_IF_FEEDBACK(inp))
+ return (1);
+ }
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+filt_sowrite(struct knote *kn, long hint)
+{
+ struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
+ int ret = 0;
+
+ if ((hint & SO_FILT_HINT_LOCKED) == 0)
+ socket_lock(so, 1);
+
+ kn->kn_data = sbspace(&so->so_snd);
+ if (so->so_state & SS_CANTSENDMORE) {
+ kn->kn_flags |= EV_EOF;
+ kn->kn_fflags = so->so_error;
+ ret = 1;
+ goto out;
+ }
+ if (so->so_error) { /* temporary udp error */
+ ret = 1;
+ goto out;
+ }
+ if (((so->so_state & SS_ISCONNECTED) == 0) &&
+ (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
+ ret = 0;
+ goto out;
+ }
+ int64_t lowwat = so->so_snd.sb_lowat;
+ if (kn->kn_sfflags & NOTE_LOWAT) {
+ if (kn->kn_sdata > so->so_snd.sb_hiwat)
+ lowwat = so->so_snd.sb_hiwat;
+ else if (kn->kn_sdata > lowwat)
+ lowwat = kn->kn_sdata;
+ }
+ if (kn->kn_data >= lowwat) {
+ if ((so->so_flags & SOF_NOTSENT_LOWAT) != 0) {
+ ret = tcp_notsent_lowat_check(so);
+ } else {
+ ret = 1;
+ }
+ }
+ if (so_wait_for_if_feedback(so))
+ ret = 0;
+out:
+ if ((hint & SO_FILT_HINT_LOCKED) == 0)
+ socket_unlock(so, 1);
+ return (ret);
+}
+
+static void
+filt_sockdetach(struct knote *kn)
+{
+ struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
+ socket_lock(so, 1);
+
+ if ((so->so_flags & SOF_KNOTE) != 0)
+ if (KNOTE_DETACH(&so->so_klist, kn))
+ so->so_flags &= ~SOF_KNOTE;
+ socket_unlock(so, 1);
+}
+
+static int
+filt_sockev(struct knote *kn, long hint)
+{
+ int ret = 0, locked = 0;
+ struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
+ long ev_hint = (hint & SO_FILT_HINT_EV);
+
+ if ((hint & SO_FILT_HINT_LOCKED) == 0) {
+ socket_lock(so, 1);
+ locked = 1;
+ }
+
+ if (ev_hint & SO_FILT_HINT_CONNRESET) {
+ if (kn->kn_sfflags & NOTE_CONNRESET)
+ kn->kn_fflags |= NOTE_CONNRESET;
+ }
+ if (ev_hint & SO_FILT_HINT_TIMEOUT) {
+ if (kn->kn_sfflags & NOTE_TIMEOUT)
+ kn->kn_fflags |= NOTE_TIMEOUT;
+ }
+ if (ev_hint & SO_FILT_HINT_NOSRCADDR) {
+ if (kn->kn_sfflags & NOTE_NOSRCADDR)
+ kn->kn_fflags |= NOTE_NOSRCADDR;
+ }
+ if (ev_hint & SO_FILT_HINT_IFDENIED) {
+ if ((kn->kn_sfflags & NOTE_IFDENIED))
+ kn->kn_fflags |= NOTE_IFDENIED;
+ }
+ if (ev_hint & SO_FILT_HINT_KEEPALIVE) {
+ if (kn->kn_sfflags & NOTE_KEEPALIVE)
+ kn->kn_fflags |= NOTE_KEEPALIVE;
+ }
+ if (ev_hint & SO_FILT_HINT_ADAPTIVE_WTIMO) {
+ if (kn->kn_sfflags & NOTE_ADAPTIVE_WTIMO)
+ kn->kn_fflags |= NOTE_ADAPTIVE_WTIMO;
+ }
+ if (ev_hint & SO_FILT_HINT_ADAPTIVE_RTIMO) {
+ if (kn->kn_sfflags & NOTE_ADAPTIVE_RTIMO)
+ kn->kn_fflags |= NOTE_ADAPTIVE_RTIMO;
+ }
+ if (ev_hint & SO_FILT_HINT_CONNECTED) {
+ if (kn->kn_sfflags & NOTE_CONNECTED)
+ kn->kn_fflags |= NOTE_CONNECTED;
+ }
+ if (ev_hint & SO_FILT_HINT_DISCONNECTED) {
+ if (kn->kn_sfflags & NOTE_DISCONNECTED)
+ kn->kn_fflags |= NOTE_DISCONNECTED;
+ }
+ if (ev_hint & SO_FILT_HINT_CONNINFO_UPDATED) {
+ if (so->so_proto != NULL &&
+ (so->so_proto->pr_flags & PR_EVCONNINFO) &&
+ (kn->kn_sfflags & NOTE_CONNINFO_UPDATED))
+ kn->kn_fflags |= NOTE_CONNINFO_UPDATED;
+ }
+
+ if ((kn->kn_sfflags & NOTE_READCLOSED) &&
+ (so->so_state & SS_CANTRCVMORE))
+ kn->kn_fflags |= NOTE_READCLOSED;
+
+ if ((kn->kn_sfflags & NOTE_WRITECLOSED) &&
+ (so->so_state & SS_CANTSENDMORE))
+ kn->kn_fflags |= NOTE_WRITECLOSED;
+
+ if ((kn->kn_sfflags & NOTE_SUSPEND) &&
+ ((ev_hint & SO_FILT_HINT_SUSPEND) ||
+ (so->so_flags & SOF_SUSPENDED))) {
+ kn->kn_fflags &= ~(NOTE_SUSPEND | NOTE_RESUME);
+ kn->kn_fflags |= NOTE_SUSPEND;
+ }
+
+ if ((kn->kn_sfflags & NOTE_RESUME) &&
+ ((ev_hint & SO_FILT_HINT_RESUME) ||
+ (so->so_flags & SOF_SUSPENDED) == 0)) {
+ kn->kn_fflags &= ~(NOTE_SUSPEND | NOTE_RESUME);
+ kn->kn_fflags |= NOTE_RESUME;
+ }
+
+ if (so->so_error != 0) {
+ ret = 1;
+ kn->kn_data = so->so_error;
+ kn->kn_flags |= EV_EOF;
+ } else {
+ get_sockev_state(so, (u_int32_t *)&(kn->kn_data));
+ }
+
+ if (kn->kn_fflags != 0)
+ ret = 1;
+
+ if (locked)
+ socket_unlock(so, 1);
+
+ return (ret);
+}
+
+void
+get_sockev_state(struct socket *so, u_int32_t *statep)
+{
+ u_int32_t state = *(statep);
+
+ if (so->so_state & SS_ISCONNECTED)
+ state |= SOCKEV_CONNECTED;
+ else
+ state &= ~(SOCKEV_CONNECTED);
+ state |= ((so->so_state & SS_ISDISCONNECTED) ? SOCKEV_DISCONNECTED : 0);
+ *(statep) = state;
+}
+
+#define SO_LOCK_HISTORY_STR_LEN \
+ (2 * SO_LCKDBG_MAX * (2 + (2 * sizeof (void *)) + 1) + 1)
+
+__private_extern__ const char *
+solockhistory_nr(struct socket *so)
+{
+ size_t n = 0;
+ int i;
+ static char lock_history_str[SO_LOCK_HISTORY_STR_LEN];
+
+ bzero(lock_history_str, sizeof (lock_history_str));
+ for (i = SO_LCKDBG_MAX - 1; i >= 0; i--) {
+ n += snprintf(lock_history_str + n,
+ SO_LOCK_HISTORY_STR_LEN - n, "%p:%p ",
+ so->lock_lr[(so->next_lock_lr + i) % SO_LCKDBG_MAX],
+ so->unlock_lr[(so->next_unlock_lr + i) % SO_LCKDBG_MAX]);
+ }
+ return (lock_history_str);
+}
+
+int
+socket_lock(struct socket *so, int refcount)
+{
+ int error = 0;
+ void *lr_saved;
+
+ lr_saved = __builtin_return_address(0);
+
+ if (so->so_proto->pr_lock) {
+ error = (*so->so_proto->pr_lock)(so, refcount, lr_saved);
+ } else {
+#ifdef MORE_LOCKING_DEBUG
+ lck_mtx_assert(so->so_proto->pr_domain->dom_mtx,
+ LCK_MTX_ASSERT_NOTOWNED);
+#endif
+ lck_mtx_lock(so->so_proto->pr_domain->dom_mtx);
+ if (refcount)
+ so->so_usecount++;
+ so->lock_lr[so->next_lock_lr] = lr_saved;
+ so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX;
+ }
+
+ return (error);
+}
+
+int
+socket_unlock(struct socket *so, int refcount)
+{
+ int error = 0;
+ void *lr_saved;
+ lck_mtx_t *mutex_held;
+
+ lr_saved = __builtin_return_address(0);
+
+ if (so->so_proto == NULL) {
+ panic("%s: null so_proto so=%p\n", __func__, so);
+ /* NOTREACHED */
+ }
+
+ if (so && so->so_proto->pr_unlock) {
+ error = (*so->so_proto->pr_unlock)(so, refcount, lr_saved);
+ } else {
+ mutex_held = so->so_proto->pr_domain->dom_mtx;
+#ifdef MORE_LOCKING_DEBUG
+ lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
+#endif
+ so->unlock_lr[so->next_unlock_lr] = lr_saved;
+ so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;
+
+ if (refcount) {
+ if (so->so_usecount <= 0) {
+ panic("%s: bad refcount=%d so=%p (%d, %d, %d) "
+ "lrh=%s", __func__, so->so_usecount, so,
+ SOCK_DOM(so), so->so_type,
+ SOCK_PROTO(so), solockhistory_nr(so));
+ /* NOTREACHED */
+ }
+
+ so->so_usecount--;
+ if (so->so_usecount == 0)
+ sofreelastref(so, 1);
+ }
+ lck_mtx_unlock(mutex_held);
+ }
+
+ return (error);
+}
+
+/* Called with socket locked, will unlock socket */
+void
+sofree(struct socket *so)
+{
+ lck_mtx_t *mutex_held;
+
+ if (so->so_proto->pr_getlock != NULL)
+ mutex_held = (*so->so_proto->pr_getlock)(so, 0);
+ else
+ mutex_held = so->so_proto->pr_domain->dom_mtx;
+ lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
+
+ sofreelastref(so, 0);
+}
+
+void
+soreference(struct socket *so)
+{
+ socket_lock(so, 1); /* locks & take one reference on socket */
+ socket_unlock(so, 0); /* unlock only */
+}
+
+void
+sodereference(struct socket *so)
+{
+ socket_lock(so, 0);
+ socket_unlock(so, 1);
+}
+
+/*
+ * Set or clear SOF_MULTIPAGES on the socket to enable or disable the
+ * possibility of using jumbo clusters. Caller must ensure to hold
+ * the socket lock.
+ */
+void
+somultipages(struct socket *so, boolean_t set)
+{
+ if (set)
+ so->so_flags |= SOF_MULTIPAGES;
+ else
+ so->so_flags &= ~SOF_MULTIPAGES;
+}
+
+int
+so_isdstlocal(struct socket *so) {
+
+ struct inpcb *inp = (struct inpcb *)so->so_pcb;
+
+ if (SOCK_DOM(so) == PF_INET)
+ return (inaddr_local(inp->inp_faddr));
+ else if (SOCK_DOM(so) == PF_INET6)
+ return (in6addr_local(&inp->in6p_faddr));
+
+ return (0);
+}
+
+int
+sosetdefunct(struct proc *p, struct socket *so, int level, boolean_t noforce)
+{
+ struct sockbuf *rcv, *snd;
+ int err = 0, defunct;
+
+ rcv = &so->so_rcv;
+ snd = &so->so_snd;
+
+ defunct = (so->so_flags & SOF_DEFUNCT);
+ if (defunct) {
+ if (!(snd->sb_flags & rcv->sb_flags & SB_DROP)) {
+ panic("%s: SB_DROP not set", __func__);
+ /* NOTREACHED */
+ }
+ goto done;
+ }
+
+ if (so->so_flags & SOF_NODEFUNCT) {
+ if (noforce) {
+ err = EOPNOTSUPP;
+ SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) "
+ "so 0x%llx [%d,%d] is not eligible for defunct "
+ "(%d)\n", __func__, proc_selfpid(), proc_pid(p),
+ level, (uint64_t)VM_KERNEL_ADDRPERM(so),
+ SOCK_DOM(so), SOCK_TYPE(so), err));
+ return (err);
+ }
+ so->so_flags &= ~SOF_NODEFUNCT;
+ SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so 0x%llx "
+ "[%d,%d] defunct by force\n", __func__, proc_selfpid(),
+ proc_pid(p), level, (uint64_t)VM_KERNEL_ADDRPERM(so),
+ SOCK_DOM(so), SOCK_TYPE(so)));
+ }
+
+ so->so_flags |= SOF_DEFUNCT;
+
+ /* Prevent further data from being appended to the socket buffers */
+ snd->sb_flags |= SB_DROP;
+ rcv->sb_flags |= SB_DROP;
+
+ /* Flush any existing data in the socket buffers */
+ if (rcv->sb_cc != 0) {
+ rcv->sb_flags &= ~SB_SEL;
+ selthreadclear(&rcv->sb_sel);
+ sbrelease(rcv);
+ }
+ if (snd->sb_cc != 0) {
+ snd->sb_flags &= ~SB_SEL;
+ selthreadclear(&snd->sb_sel);
+ sbrelease(snd);
+ }
+
+done:
+ SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so 0x%llx [%d,%d] %s "
+ "defunct\n", __func__, proc_selfpid(), proc_pid(p), level,
+ (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), SOCK_TYPE(so),
+ defunct ? "is already" : "marked as"));
+
+ return (err);
+}
+
+int
+sodefunct(struct proc *p, struct socket *so, int level)
+{
+ struct sockbuf *rcv, *snd;
+
+ if (!(so->so_flags & SOF_DEFUNCT)) {
+ panic("%s improperly called", __func__);
+ /* NOTREACHED */
+ }
+ if (so->so_state & SS_DEFUNCT)
+ goto done;
+
+ rcv = &so->so_rcv;
+ snd = &so->so_snd;
+
+ if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
+ char s[MAX_IPv6_STR_LEN];
+ char d[MAX_IPv6_STR_LEN];
+ struct inpcb *inp = sotoinpcb(so);
+
+ SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so 0x%llx [%s "
+ "%s:%d -> %s:%d] is now defunct [rcv_si 0x%x, snd_si 0x%x, "
+ "rcv_fl 0x%x, snd_fl 0x%x]\n", __func__, proc_selfpid(),
+ proc_pid(p), level, (uint64_t)VM_KERNEL_ADDRPERM(so),
+ (SOCK_TYPE(so) == SOCK_STREAM) ? "TCP" : "UDP",
+ inet_ntop(SOCK_DOM(so), ((SOCK_DOM(so) == PF_INET) ?
+ (void *)&inp->inp_laddr.s_addr : (void *)&inp->in6p_laddr),
+ s, sizeof (s)), ntohs(inp->in6p_lport),
+ inet_ntop(SOCK_DOM(so), (SOCK_DOM(so) == PF_INET) ?
+ (void *)&inp->inp_faddr.s_addr : (void *)&inp->in6p_faddr,
+ d, sizeof (d)), ntohs(inp->in6p_fport),
+ (uint32_t)rcv->sb_sel.si_flags,
+ (uint32_t)snd->sb_sel.si_flags,
+ rcv->sb_flags, snd->sb_flags));
+ } else {
+ SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so 0x%llx "
+ "[%d,%d] is now defunct [rcv_si 0x%x, snd_si 0x%x, "
+ "rcv_fl 0x%x, snd_fl 0x%x]\n", __func__, proc_selfpid(),
+ proc_pid(p), level, (uint64_t)VM_KERNEL_ADDRPERM(so),
+ SOCK_DOM(so), SOCK_TYPE(so), (uint32_t)rcv->sb_sel.si_flags,
+ (uint32_t)snd->sb_sel.si_flags, rcv->sb_flags,
+ snd->sb_flags));
+ }
+
+ /*
+ * Unwedge threads blocked on sbwait() and sb_lock().
+ */
+ sbwakeup(rcv);
+ sbwakeup(snd);
+
+ if (rcv->sb_flags & SB_LOCK)
+ sbunlock(rcv, TRUE); /* keep socket locked */
+ if (snd->sb_flags & SB_LOCK)
+ sbunlock(snd, TRUE); /* keep socket locked */
+
+ /*
+ * Flush the buffers and disconnect. We explicitly call shutdown
+ * on both data directions to ensure that SS_CANT{RCV,SEND}MORE
+ * states are set for the socket. This would also flush out data
+ * hanging off the receive list of this socket.
+ */
+ (void) soshutdownlock(so, SHUT_RD);
+ (void) soshutdownlock(so, SHUT_WR);
+ (void) sodisconnectlocked(so);
+
+ /*
+ * Explicitly handle connectionless-protocol disconnection
+ * and release any remaining data in the socket buffers.
+ */
+ if (!(so->so_flags & SS_ISDISCONNECTED))
+ (void) soisdisconnected(so);
+
+ if (so->so_error == 0)
+ so->so_error = EBADF;
+
+ if (rcv->sb_cc != 0) {
+ rcv->sb_flags &= ~SB_SEL;
+ selthreadclear(&rcv->sb_sel);
+ sbrelease(rcv);
+ }
+ if (snd->sb_cc != 0) {
+ snd->sb_flags &= ~SB_SEL;
+ selthreadclear(&snd->sb_sel);
+ sbrelease(snd);
+ }
+ so->so_state |= SS_DEFUNCT;
+
+done:
+ return (0);
+}
+
+__private_extern__ int
+so_set_recv_anyif(struct socket *so, int optval)
+{
+ int ret = 0;
+
+#if INET6
+ if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
+#else
+ if (SOCK_DOM(so) == PF_INET) {
+#endif /* !INET6 */
+ if (optval)
+ sotoinpcb(so)->inp_flags |= INP_RECV_ANYIF;
+ else
+ sotoinpcb(so)->inp_flags &= ~INP_RECV_ANYIF;
+ }
+
+ return (ret);
+}
+
+__private_extern__ int
+so_get_recv_anyif(struct socket *so)
+{
+ int ret = 0;
+
+#if INET6
+ if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
+#else
+ if (SOCK_DOM(so) == PF_INET) {
+#endif /* !INET6 */
+ ret = (sotoinpcb(so)->inp_flags & INP_RECV_ANYIF) ? 1 : 0;
+ }
+
+ return (ret);
+}
+
+int
+so_set_restrictions(struct socket *so, uint32_t vals)
+{
+ int nocell_old, nocell_new;
+ int ret = 0;
+
+ /*
+ * Deny-type restrictions are trapdoors; once set they cannot be
+ * unset for the lifetime of the socket. This allows them to be
+ * issued by a framework on behalf of the application without
+ * having to worry that they can be undone.
+ *
+ * Note here that socket-level restrictions overrides any protocol
+ * level restrictions. For instance, SO_RESTRICT_DENY_CELLULAR
+ * socket restriction issued on the socket has a higher precendence
+ * than INP_NO_IFT_CELLULAR. The latter is affected by the UUID
+ * policy PROC_UUID_NO_CELLULAR for unrestricted sockets only,
+ * i.e. when SO_RESTRICT_DENY_CELLULAR has not been issued.
+ */
+ nocell_old = (so->so_restrictions & SO_RESTRICT_DENY_CELLULAR);
+ so->so_restrictions |= (vals & (SO_RESTRICT_DENY_IN |
+ SO_RESTRICT_DENY_OUT | SO_RESTRICT_DENY_CELLULAR));
+ nocell_new = (so->so_restrictions & SO_RESTRICT_DENY_CELLULAR);
+
+ /* other than deny cellular, there's nothing more to do */
+ if ((nocell_new - nocell_old) == 0)
+ return (ret);
+
+ /* we can only set, not clear restrictions */
+ VERIFY((nocell_new - nocell_old) > 0);
+
+#if INET6
+ if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
+#else
+ if (SOCK_DOM(so) == PF_INET) {
+#endif /* !INET6 */
+ /* if deny cellular is now set, do what's needed for INPCB */
+ inp_set_nocellular(sotoinpcb(so));
+ }
+
+ return (ret);
+}
+
+uint32_t
+so_get_restrictions(struct socket *so)
+{
+ return (so->so_restrictions & (SO_RESTRICT_DENY_IN |
+ SO_RESTRICT_DENY_OUT | SO_RESTRICT_DENY_CELLULAR));
+}
+
+struct sockaddr_entry *
+sockaddrentry_alloc(int how)
+{
+ struct sockaddr_entry *se;
+
+ se = (how == M_WAITOK) ? zalloc(se_zone) : zalloc_noblock(se_zone);
+ if (se != NULL)
+ bzero(se, se_zone_size);
+
+ return (se);
+}
+
+void
+sockaddrentry_free(struct sockaddr_entry *se)
+{
+ if (se->se_addr != NULL) {
+ FREE(se->se_addr, M_SONAME);
+ se->se_addr = NULL;
+ }
+ zfree(se_zone, se);
+}
+
+struct sockaddr_entry *
+sockaddrentry_dup(const struct sockaddr_entry *src_se, int how)
+{
+ struct sockaddr_entry *dst_se;
+
+ dst_se = sockaddrentry_alloc(how);
+ if (dst_se != NULL) {
+ int len = src_se->se_addr->sa_len;
+
+ MALLOC(dst_se->se_addr, struct sockaddr *,
+ len, M_SONAME, how | M_ZERO);
+ if (dst_se->se_addr != NULL) {
+ bcopy(src_se->se_addr, dst_se->se_addr, len);
+ } else {
+ sockaddrentry_free(dst_se);
+ dst_se = NULL;
+ }
+ }
+
+ return (dst_se);
+}
+
+struct sockaddr_list *
+sockaddrlist_alloc(int how)
+{
+ struct sockaddr_list *sl;
+
+ sl = (how == M_WAITOK) ? zalloc(sl_zone) : zalloc_noblock(sl_zone);
+ if (sl != NULL) {
+ bzero(sl, sl_zone_size);
+ TAILQ_INIT(&sl->sl_head);
+ }
+ return (sl);
+}
+
+void
+sockaddrlist_free(struct sockaddr_list *sl)
+{
+ struct sockaddr_entry *se, *tse;
+
+ TAILQ_FOREACH_SAFE(se, &sl->sl_head, se_link, tse) {
+ sockaddrlist_remove(sl, se);
+ sockaddrentry_free(se);
+ }
+ VERIFY(sl->sl_cnt == 0 && TAILQ_EMPTY(&sl->sl_head));
+ zfree(sl_zone, sl);
+}
+
+void
+sockaddrlist_insert(struct sockaddr_list *sl, struct sockaddr_entry *se)
+{
+ VERIFY(!(se->se_flags & SEF_ATTACHED));
+ se->se_flags |= SEF_ATTACHED;
+ TAILQ_INSERT_TAIL(&sl->sl_head, se, se_link);
+ sl->sl_cnt++;
+ VERIFY(sl->sl_cnt != 0);
+}
+
+void
+sockaddrlist_remove(struct sockaddr_list *sl, struct sockaddr_entry *se)
+{
+ VERIFY(se->se_flags & SEF_ATTACHED);
+ se->se_flags &= ~SEF_ATTACHED;
+ VERIFY(sl->sl_cnt != 0);
+ sl->sl_cnt--;
+ TAILQ_REMOVE(&sl->sl_head, se, se_link);
+}
+
+struct sockaddr_list *
+sockaddrlist_dup(const struct sockaddr_list *src_sl, int how)
+{
+ struct sockaddr_entry *src_se, *tse;
+ struct sockaddr_list *dst_sl;
+
+ dst_sl = sockaddrlist_alloc(how);
+ if (dst_sl == NULL)
+ return (NULL);
+
+ TAILQ_FOREACH_SAFE(src_se, &src_sl->sl_head, se_link, tse) {
+ struct sockaddr_entry *dst_se;
+
+ if (src_se->se_addr == NULL)
+ continue;
+
+ dst_se = sockaddrentry_dup(src_se, how);
+ if (dst_se == NULL) {
+ sockaddrlist_free(dst_sl);
+ return (NULL);
+ }
+
+ sockaddrlist_insert(dst_sl, dst_se);
+ }
+ VERIFY(src_sl->sl_cnt == dst_sl->sl_cnt);
+
+ return (dst_sl);
+}
+
+int
+so_set_effective_pid(struct socket *so, int epid, struct proc *p)
+{
+ struct proc *ep = PROC_NULL;
+ int error = 0;
+
+ /* pid 0 is reserved for kernel */
+ if (epid == 0) {
+ error = EINVAL;
+ goto done;
+ }
+
+ /*
+ * If this is an in-kernel socket, prevent its delegate
+ * association from changing unless the socket option is
+ * coming from within the kernel itself.
+ */
+ if (so->last_pid == 0 && p != kernproc) {
+ error = EACCES;
+ goto done;
+ }
+
+ /*
+ * If this is issued by a process that's recorded as the
+ * real owner of the socket, or if the pid is the same as
+ * the process's own pid, then proceed. Otherwise ensure
+ * that the issuing process has the necessary privileges.
+ */
+ if (epid != so->last_pid || epid != proc_pid(p)) {
+ if ((error = priv_check_cred(kauth_cred_get(),
+ PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0))) {
+ error = EACCES;
+ goto done;
+ }
+ }
+
+ /* Find the process that corresponds to the effective pid */
+ if ((ep = proc_find(epid)) == PROC_NULL) {
+ error = ESRCH;
+ goto done;
+ }
+
+ /*
+ * If a process tries to delegate the socket to itself, then
+ * there's really nothing to do; treat it as a way for the
+ * delegate association to be cleared. Note that we check
+ * the passed-in proc rather than calling proc_selfpid(),
+ * as we need to check the process issuing the socket option
+ * which could be kernproc. Given that we don't allow 0 for
+ * effective pid, it means that a delegated in-kernel socket
+ * stays delegated during its lifetime (which is probably OK.)
+ */
+ if (epid == proc_pid(p)) {
+ so->so_flags &= ~SOF_DELEGATED;
+ so->e_upid = 0;
+ so->e_pid = 0;
+ uuid_clear(so->e_uuid);
+ } else {
+ so->so_flags |= SOF_DELEGATED;
+ so->e_upid = proc_uniqueid(ep);
+ so->e_pid = proc_pid(ep);
+ proc_getexecutableuuid(ep, so->e_uuid, sizeof (so->e_uuid));
+ }
+
+done:
+ if (error == 0 && net_io_policy_log) {
+ uuid_string_t buf;
+
+ uuid_unparse(so->e_uuid, buf);
+ log(LOG_DEBUG, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d (%s) "
+ "euuid %s%s\n", __func__, proc_name_address(p),
+ proc_pid(p), (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
+ SOCK_TYPE(so), so->e_pid, proc_name_address(ep), buf,
+ ((so->so_flags & SOF_DELEGATED) ? " [delegated]" : ""));
+ } else if (error != 0 && net_io_policy_log) {
+ log(LOG_ERR, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d (%s) "
+ "ERROR (%d)\n", __func__, proc_name_address(p),
+ proc_pid(p), (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
+ SOCK_TYPE(so), epid, (ep == PROC_NULL) ? "PROC_NULL" :
+ proc_name_address(ep), error);
+ }
+
+ if (ep != PROC_NULL)
+ proc_rele(ep);
+
+ return (error);
+}
+
+int
+so_set_effective_uuid(struct socket *so, uuid_t euuid, struct proc *p)
+{
+ uuid_string_t buf;
+ uuid_t uuid;
+ int error = 0;
+
+ /* UUID must not be all-zeroes (reserved for kernel) */
+ if (uuid_is_null(euuid)) {
+ error = EINVAL;
+ goto done;;
+ }
+
+ /*
+ * If this is an in-kernel socket, prevent its delegate
+ * association from changing unless the socket option is
+ * coming from within the kernel itself.
+ */
+ if (so->last_pid == 0 && p != kernproc) {
+ error = EACCES;
+ goto done;
+ }
+
+ /* Get the UUID of the issuing process */
+ proc_getexecutableuuid(p, uuid, sizeof (uuid));
+
+ /*
+ * If this is issued by a process that's recorded as the
+ * real owner of the socket, or if the uuid is the same as
+ * the process's own uuid, then proceed. Otherwise ensure
+ * that the issuing process has the necessary privileges.
+ */
+ if (uuid_compare(euuid, so->last_uuid) != 0 ||
+ uuid_compare(euuid, uuid) != 0) {
+ if ((error = priv_check_cred(kauth_cred_get(),
+ PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0))) {
+ error = EACCES;
+ goto done;
+ }
+ }
+
+ /*
+ * If a process tries to delegate the socket to itself, then
+ * there's really nothing to do; treat it as a way for the
+ * delegate association to be cleared. Note that we check
+ * the uuid of the passed-in proc rather than that of the
+ * current process, as we need to check the process issuing
+ * the socket option which could be kernproc itself. Given
+ * that we don't allow 0 for effective uuid, it means that
+ * a delegated in-kernel socket stays delegated during its
+ * lifetime (which is okay.)
+ */
+ if (uuid_compare(euuid, uuid) == 0) {
+ so->so_flags &= ~SOF_DELEGATED;
+ so->e_upid = 0;
+ so->e_pid = 0;
+ uuid_clear(so->e_uuid);
+ } else {
+ so->so_flags |= SOF_DELEGATED;
+ /*
+ * Unlike so_set_effective_pid(), we only have the UUID
+ * here and the process ID is not known. Inherit the
+ * real {pid,upid} of the socket.
+ */
+ so->e_upid = so->last_upid;
+ so->e_pid = so->last_pid;
+ uuid_copy(so->e_uuid, euuid);
+ }
+
+done:
+ if (error == 0 && net_io_policy_log) {
+ uuid_unparse(so->e_uuid, buf);
+ log(LOG_DEBUG, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d "
+ "euuid %s%s\n", __func__, proc_name_address(p), proc_pid(p),
+ (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
+ SOCK_TYPE(so), so->e_pid, buf,
+ ((so->so_flags & SOF_DELEGATED) ? " [delegated]" : ""));
+ } else if (error != 0 && net_io_policy_log) {
+ uuid_unparse(euuid, buf);
+ log(LOG_DEBUG, "%s[%s,%d]: so 0x%llx [%d,%d] euuid %s "
+ "ERROR (%d)\n", __func__, proc_name_address(p), proc_pid(p),
+ (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
+ SOCK_TYPE(so), buf, error);
+ }
+
+ return (error);
+}
+
+void
+netpolicy_post_msg(uint32_t ev_code, struct netpolicy_event_data *ev_data,
+ uint32_t ev_datalen)
+{
+ struct kev_msg ev_msg;
+
+ /*
+ * A netpolicy event always starts with a netpolicy_event_data
+ * structure, but the caller can provide for a longer event
+ * structure to post, depending on the event code.
+ */
+ VERIFY(ev_data != NULL && ev_datalen >= sizeof (*ev_data));
+
+ bzero(&ev_msg, sizeof (ev_msg));
+ ev_msg.vendor_code = KEV_VENDOR_APPLE;
+ ev_msg.kev_class = KEV_NETWORK_CLASS;
+ ev_msg.kev_subclass = KEV_NETPOLICY_SUBCLASS;
+ ev_msg.event_code = ev_code;
+
+ ev_msg.dv[0].data_ptr = ev_data;
+ ev_msg.dv[0].data_length = ev_datalen;
+
+ kev_post_msg(&ev_msg);
+}