+
+int
+soo_kqfilter(__unused struct fileproc *fp, struct knote *kn,
+ __unused struct proc *p)
+{
+ struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
+ struct klist *skl;
+
+ socket_lock(so, 1);
+
+#if CONFIG_MACF_SOCKET
+ if (mac_socket_check_kqfilter(proc_ucred(p), kn, so) != 0) {
+ socket_unlock(so, 1);
+ return (1);
+ }
+#endif /* MAC_SOCKET */
+
+ switch (kn->kn_filter) {
+ case EVFILT_READ:
+ kn->kn_fop = &soread_filtops;
+ skl = &so->so_rcv.sb_sel.si_note;
+ break;
+ case EVFILT_WRITE:
+ kn->kn_fop = &sowrite_filtops;
+ skl = &so->so_snd.sb_sel.si_note;
+ break;
+ case EVFILT_SOCK:
+ kn->kn_fop = &sock_filtops;
+ skl = &so->so_klist;
+ break;
+ default:
+ socket_unlock(so, 1);
+ return (1);
+ }
+
+ if (KNOTE_ATTACH(skl, kn)) {
+ switch(kn->kn_filter) {
+ case EVFILT_READ:
+ so->so_rcv.sb_flags |= SB_KNOTE;
+ break;
+ case EVFILT_WRITE:
+ so->so_snd.sb_flags |= SB_KNOTE;
+ break;
+ case EVFILT_SOCK:
+ so->so_flags |= SOF_KNOTE;
+ break;
+ default:
+ socket_unlock(so, 1);
+ return (1);
+ }
+ }
+ socket_unlock(so, 1);
+ return (0);
+}
+
+static void
+filt_sordetach(struct knote *kn)
+{
+ struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
+
+ socket_lock(so, 1);
+ if (so->so_rcv.sb_flags & SB_KNOTE)
+ if (KNOTE_DETACH(&so->so_rcv.sb_sel.si_note, kn))
+ so->so_rcv.sb_flags &= ~SB_KNOTE;
+ socket_unlock(so, 1);
+}
+
+/*ARGSUSED*/
+static int
+filt_soread(struct knote *kn, long hint)
+{
+ struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
+
+ if ((hint & SO_FILT_HINT_LOCKED) == 0)
+ socket_lock(so, 1);
+
+ if (so->so_options & SO_ACCEPTCONN) {
+ int isempty;
+
+ /* Radar 6615193 handle the listen case dynamically
+ * for kqueue read filter. This allows to call listen() after registering
+ * the kqueue EVFILT_READ.
+ */
+
+ kn->kn_data = so->so_qlen;
+ isempty = ! TAILQ_EMPTY(&so->so_comp);
+
+ if ((hint & SO_FILT_HINT_LOCKED) == 0)
+ socket_unlock(so, 1);
+
+ return (isempty);
+ }
+
+ /* socket isn't a listener */
+
+ kn->kn_data = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
+
+ if (so->so_oobmark) {
+ if (kn->kn_flags & EV_OOBAND) {
+ kn->kn_data -= so->so_oobmark;
+ if ((hint & SO_FILT_HINT_LOCKED) == 0)
+ socket_unlock(so, 1);
+ return (1);
+ }
+ kn->kn_data = so->so_oobmark;
+ kn->kn_flags |= EV_OOBAND;
+ } else {
+ if (so->so_state & SS_CANTRCVMORE) {
+ kn->kn_flags |= EV_EOF;
+ kn->kn_fflags = so->so_error;
+ if ((hint & SO_FILT_HINT_LOCKED) == 0)
+ socket_unlock(so, 1);
+ return (1);
+ }
+ }
+
+ if (so->so_state & SS_RCVATMARK) {
+ if (kn->kn_flags & EV_OOBAND) {
+ if ((hint & SO_FILT_HINT_LOCKED) == 0)
+ socket_unlock(so, 1);
+ return (1);
+ }
+ kn->kn_flags |= EV_OOBAND;
+ } else if (kn->kn_flags & EV_OOBAND) {
+ kn->kn_data = 0;
+ if ((hint & SO_FILT_HINT_LOCKED) == 0)
+ socket_unlock(so, 1);
+ return (0);
+ }
+
+ if (so->so_error) { /* temporary udp error */
+ if ((hint & SO_FILT_HINT_LOCKED) == 0)
+ socket_unlock(so, 1);
+ return (1);
+ }
+
+ int64_t lowwat = so->so_rcv.sb_lowat;
+ if (kn->kn_sfflags & NOTE_LOWAT)
+ {
+ if (kn->kn_sdata > so->so_rcv.sb_hiwat)
+ lowwat = so->so_rcv.sb_hiwat;
+ else if (kn->kn_sdata > lowwat)
+ lowwat = kn->kn_sdata;
+ }
+
+ if ((hint & SO_FILT_HINT_LOCKED) == 0)
+ socket_unlock(so, 1);
+
+ return ((kn->kn_flags & EV_OOBAND) || kn->kn_data >= lowwat);
+}
+
+static void
+filt_sowdetach(struct knote *kn)
+{
+ struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
+ socket_lock(so, 1);
+
+ if (so->so_snd.sb_flags & SB_KNOTE)
+ if (KNOTE_DETACH(&so->so_snd.sb_sel.si_note, kn))
+ so->so_snd.sb_flags &= ~SB_KNOTE;
+ socket_unlock(so, 1);
+}
+
+int
+so_wait_for_if_feedback(struct socket *so)
+{
+ if ((so->so_proto->pr_domain->dom_family == AF_INET ||
+ so->so_proto->pr_domain->dom_family == AF_INET6) &&
+ (so->so_state & SS_ISCONNECTED)) {
+ struct inpcb *inp = sotoinpcb(so);
+ if (INP_WAIT_FOR_IF_FEEDBACK(inp))
+ return (1);
+ }
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+filt_sowrite(struct knote *kn, long hint)
+{
+ struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
+ int ret = 0;
+
+ if ((hint & SO_FILT_HINT_LOCKED) == 0)
+ socket_lock(so, 1);
+
+ kn->kn_data = sbspace(&so->so_snd);
+ if (so->so_state & SS_CANTSENDMORE) {
+ kn->kn_flags |= EV_EOF;
+ kn->kn_fflags = so->so_error;
+ ret = 1;
+ goto out;
+ }
+ if (so->so_error) { /* temporary udp error */
+ ret = 1;
+ goto out;
+ }
+ if (((so->so_state & SS_ISCONNECTED) == 0) &&
+ (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
+ ret = 0;
+ goto out;
+ }
+ int64_t lowwat = so->so_snd.sb_lowat;
+ if (kn->kn_sfflags & NOTE_LOWAT)
+ {
+ if (kn->kn_sdata > so->so_snd.sb_hiwat)
+ lowwat = so->so_snd.sb_hiwat;
+ else if (kn->kn_sdata > lowwat)
+ lowwat = kn->kn_sdata;
+ }
+ if (kn->kn_data >= lowwat) {
+ if ((so->so_flags & SOF_NOTSENT_LOWAT) != 0) {
+ ret = tcp_notsent_lowat_check(so);
+ } else {
+ ret = 1;
+ }
+ }
+ if (so_wait_for_if_feedback(so))
+ ret = 0;
+out:
+ if ((hint & SO_FILT_HINT_LOCKED) == 0)
+ socket_unlock(so, 1);
+ return(ret);
+}
+
+static void
+filt_sockdetach(struct knote *kn)
+{
+ struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
+ socket_lock(so, 1);
+
+ if ((so->so_flags & SOF_KNOTE) != 0)
+ if (KNOTE_DETACH(&so->so_klist, kn))
+ so->so_flags &= ~SOF_KNOTE;
+ socket_unlock(so, 1);
+}
+
+static int
+filt_sockev(struct knote *kn, long hint)
+{
+ int ret = 0, locked = 0;
+ struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
+
+ if ((hint & SO_FILT_HINT_LOCKED) == 0) {
+ socket_lock(so, 1);
+ locked = 1;
+ }
+
+ switch (hint & SO_FILT_HINT_EV) {
+ case SO_FILT_HINT_CONNRESET:
+ if (kn->kn_sfflags & NOTE_CONNRESET)
+ kn->kn_fflags |= NOTE_CONNRESET;
+ break;
+ case SO_FILT_HINT_TIMEOUT:
+ if (kn->kn_sfflags & NOTE_TIMEOUT)
+ kn->kn_fflags |= NOTE_TIMEOUT;
+ break;
+ case SO_FILT_HINT_NOSRCADDR:
+ if (kn->kn_sfflags & NOTE_NOSRCADDR)
+ kn->kn_fflags |= NOTE_NOSRCADDR;
+ break;
+ case SO_FILT_HINT_IFDENIED:
+ if ((kn->kn_sfflags & NOTE_IFDENIED))
+ kn->kn_fflags |= NOTE_IFDENIED;
+ break;
+ case SO_FILT_HINT_KEEPALIVE:
+ if (kn->kn_sfflags & NOTE_KEEPALIVE)
+ kn->kn_fflags |= NOTE_KEEPALIVE;
+ }
+
+ if ((kn->kn_sfflags & NOTE_READCLOSED) &&
+ (so->so_state & SS_CANTRCVMORE))
+ kn->kn_fflags |= NOTE_READCLOSED;
+
+ if ((kn->kn_sfflags & NOTE_WRITECLOSED) &&
+ (so->so_state & SS_CANTSENDMORE))
+ kn->kn_fflags |= NOTE_WRITECLOSED;
+
+ if ((kn->kn_sfflags & NOTE_SUSPEND) &&
+ ((hint & SO_FILT_HINT_SUSPEND) ||
+ (so->so_flags & SOF_SUSPENDED))) {
+ kn->kn_fflags &=
+ ~(NOTE_SUSPEND | NOTE_RESUME);
+ kn->kn_fflags |= NOTE_SUSPEND;
+ }
+
+ if ((kn->kn_sfflags & NOTE_RESUME) &&
+ ((hint & SO_FILT_HINT_RESUME) ||
+ (so->so_flags & SOF_SUSPENDED) == 0)) {
+ kn->kn_fflags &=
+ ~(NOTE_SUSPEND | NOTE_RESUME);
+ kn->kn_fflags |= NOTE_RESUME;
+ }
+
+ if (so->so_error != 0) {
+ ret = 1;
+ kn->kn_data = so->so_error;
+ kn->kn_flags |= EV_EOF;
+ } else {
+ get_sockev_state(so, (u_int32_t *)&(kn->kn_data));
+ }
+
+ if (kn->kn_fflags != 0)
+ ret = 1;
+
+ if (locked)
+ socket_unlock(so, 1);
+
+ return(ret);
+}
+
+void
+get_sockev_state(struct socket *so, u_int32_t *statep) {
+ u_int32_t state = *(statep);
+
+ if (so->so_state & SS_ISCONNECTED)
+ state |= SOCKEV_CONNECTED;
+ else
+ state &= ~(SOCKEV_CONNECTED);
+ state |= ((so->so_state & SS_ISDISCONNECTED) ?
+ SOCKEV_DISCONNECTED : 0);
+ *(statep) = state;
+ return;
+}
+
+#define SO_LOCK_HISTORY_STR_LEN (2 * SO_LCKDBG_MAX * (2 + (2 * sizeof(void *)) + 1) + 1)
+
+__private_extern__ const char * solockhistory_nr(struct socket *so)
+{
+ size_t n = 0;
+ int i;
+ static char lock_history_str[SO_LOCK_HISTORY_STR_LEN];
+
+ bzero(lock_history_str, sizeof(lock_history_str));
+ for (i = SO_LCKDBG_MAX - 1; i >= 0; i--) {
+ n += snprintf(lock_history_str + n, SO_LOCK_HISTORY_STR_LEN - n, "%lx:%lx ",
+ (uintptr_t) so->lock_lr[(so->next_lock_lr + i) % SO_LCKDBG_MAX],
+ (uintptr_t) so->unlock_lr[(so->next_unlock_lr + i) % SO_LCKDBG_MAX]);
+ }
+ return lock_history_str;
+}
+
+int
+socket_lock(struct socket *so, int refcount)
+{
+ int error = 0;
+ void *lr_saved;
+
+ lr_saved = __builtin_return_address(0);
+
+ if (so->so_proto->pr_lock) {
+ error = (*so->so_proto->pr_lock)(so, refcount, lr_saved);
+ } else {
+#ifdef MORE_LOCKING_DEBUG
+ lck_mtx_assert(so->so_proto->pr_domain->dom_mtx,
+ LCK_MTX_ASSERT_NOTOWNED);
+#endif
+ lck_mtx_lock(so->so_proto->pr_domain->dom_mtx);
+ if (refcount)
+ so->so_usecount++;
+ so->lock_lr[so->next_lock_lr] = lr_saved;
+ so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX;
+ }
+
+ return (error);
+}
+
+int
+socket_unlock(struct socket *so, int refcount)
+{
+ int error = 0;
+ void *lr_saved;
+ lck_mtx_t *mutex_held;
+
+ lr_saved = __builtin_return_address(0);
+
+ if (so->so_proto == NULL)
+ panic("socket_unlock null so_proto so=%p\n", so);
+
+ if (so && so->so_proto->pr_unlock) {
+ error = (*so->so_proto->pr_unlock)(so, refcount, lr_saved);
+ } else {
+ mutex_held = so->so_proto->pr_domain->dom_mtx;
+#ifdef MORE_LOCKING_DEBUG
+ lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
+#endif
+ so->unlock_lr[so->next_unlock_lr] = lr_saved;
+ so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;
+
+ if (refcount) {
+ if (so->so_usecount <= 0)
+ panic("socket_unlock: bad refcount=%d so=%p (%d, %d, %d) lrh=%s",
+ so->so_usecount, so, so->so_proto->pr_domain->dom_family,
+ so->so_type, so->so_proto->pr_protocol,
+ solockhistory_nr(so));
+
+ so->so_usecount--;
+ if (so->so_usecount == 0) {
+ sofreelastref(so, 1);
+ }
+ }
+ lck_mtx_unlock(mutex_held);
+ }
+
+ return (error);
+}
+
+/* Called with socket locked, will unlock socket */
+void
+sofree(struct socket *so)
+{
+
+ lck_mtx_t *mutex_held;
+ if (so->so_proto->pr_getlock != NULL)
+ mutex_held = (*so->so_proto->pr_getlock)(so, 0);
+ else
+ mutex_held = so->so_proto->pr_domain->dom_mtx;
+ lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
+
+ sofreelastref(so, 0);
+}
+
+void
+soreference(struct socket *so)
+{
+ socket_lock(so, 1); /* locks & take one reference on socket */
+ socket_unlock(so, 0); /* unlock only */
+}
+
+void
+sodereference(struct socket *so)
+{
+ socket_lock(so, 0);
+ socket_unlock(so, 1);
+}
+
+/*
+ * Set or clear SOF_MULTIPAGES on the socket to enable or disable the
+ * possibility of using jumbo clusters. Caller must ensure to hold
+ * the socket lock.
+ */
+void
+somultipages(struct socket *so, boolean_t set)
+{
+ if (set)
+ so->so_flags |= SOF_MULTIPAGES;
+ else
+ so->so_flags &= ~SOF_MULTIPAGES;
+}
+
+int
+so_isdstlocal(struct socket *so) {
+
+ struct inpcb *inp = (struct inpcb *)so->so_pcb;
+
+ if (so->so_proto->pr_domain->dom_family == AF_INET) {
+ return inaddr_local(inp->inp_faddr);
+ } else if (so->so_proto->pr_domain->dom_family == AF_INET6) {
+ return in6addr_local(&inp->in6p_faddr);
+ }
+ return 0;
+}
+
+int
+sosetdefunct(struct proc *p, struct socket *so, int level, boolean_t noforce)
+{
+ int err = 0, defunct;
+
+ defunct = (so->so_flags & SOF_DEFUNCT);
+ if (defunct) {
+ if (!(so->so_snd.sb_flags & so->so_rcv.sb_flags & SB_DROP))
+ panic("%s: SB_DROP not set", __func__);
+ goto done;
+ }
+
+ if (so->so_flags & SOF_NODEFUNCT) {
+ if (noforce) {
+ err = EOPNOTSUPP;
+ SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so %p "
+ "[%d,%d] is not eligible for defunct (%d)\n",
+ __func__, proc_selfpid(), proc_pid(p), level, so,
+ INP_SOCKAF(so), INP_SOCKTYPE(so), err));
+ return (err);
+ }
+ so->so_flags &= ~SOF_NODEFUNCT;
+ SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so %p [%d,%d] "
+ "defunct by force\n", __func__, proc_selfpid(), proc_pid(p),
+ level, so, INP_SOCKAF(so), INP_SOCKTYPE(so)));
+ }
+
+ so->so_flags |= SOF_DEFUNCT;
+ /* Prevent further data from being appended to the socket buffers */
+ so->so_snd.sb_flags |= SB_DROP;
+ so->so_rcv.sb_flags |= SB_DROP;
+
+done:
+ SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so %p [%d,%d] %s "
+ "defunct\n", __func__, proc_selfpid(), proc_pid(p), level, so,
+ INP_SOCKAF(so), INP_SOCKTYPE(so),
+ defunct ? "is already" : "marked as"));
+
+ return (err);
+}
+
+int
+sodefunct(struct proc *p, struct socket *so, int level)
+{
+ struct sockbuf *rcv, *snd;
+
+ if (!(so->so_flags & SOF_DEFUNCT))
+ panic("%s improperly called", __func__);
+
+ if (so->so_state & SS_DEFUNCT)
+ goto done;
+
+ rcv = &so->so_rcv;
+ snd = &so->so_snd;
+
+ SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so %p [%d,%d] is now "
+ "defunct [rcv_si 0x%x, snd_si 0x%x, rcv_fl 0x%x, snd_fl 0x%x]\n",
+ __func__, proc_selfpid(), proc_pid(p), level, so,
+ INP_SOCKAF(so), INP_SOCKTYPE(so),
+ (uint32_t)rcv->sb_sel.si_flags, (uint32_t)snd->sb_sel.si_flags,
+ (uint16_t)rcv->sb_flags, (uint16_t)snd->sb_flags));
+
+ /*
+ * Unwedge threads blocked on sbwait() and sb_lock().
+ */
+ sbwakeup(rcv);
+ sbwakeup(snd);
+
+ if (rcv->sb_flags & SB_LOCK)
+ sbunlock(rcv, 1);
+ if (snd->sb_flags & SB_LOCK)
+ sbunlock(snd, 1);
+
+ /*
+ * Flush the buffers and disconnect. We explicitly call shutdown
+ * on both data directions to ensure that SS_CANT{RCV,SEND}MORE
+ * states are set for the socket. This would also flush out data
+ * hanging off the receive list of this socket.
+ */
+ (void) soshutdownlock(so, SHUT_RD);
+ (void) soshutdownlock(so, SHUT_WR);
+ (void) sodisconnectlocked(so);
+
+ /*
+ * Explicitly handle connectionless-protocol disconnection
+ * and release any remaining data in the socket buffers.
+ */
+ if (!(so->so_flags & SS_ISDISCONNECTED))
+ (void) soisdisconnected(so);
+
+ if (so->so_error == 0)
+ so->so_error = EBADF;
+
+ if (rcv->sb_cc != 0)
+ sbrelease(rcv);
+ if (snd->sb_cc != 0)
+ sbrelease(snd);
+
+ so->so_state |= SS_DEFUNCT;
+
+done:
+ return (0);
+}
+
+__private_extern__ int
+so_set_recv_anyif(struct socket *so, int optval)
+{
+ int ret = 0;
+
+#if INET6
+ if (INP_SOCKAF(so) == AF_INET || INP_SOCKAF(so) == AF_INET6) {
+#else
+ if (INP_SOCKAF(so) == AF_INET) {
+#endif /* !INET6 */
+ if (optval)
+ sotoinpcb(so)->inp_flags |= INP_RECV_ANYIF;
+ else
+ sotoinpcb(so)->inp_flags &= ~INP_RECV_ANYIF;
+ } else {
+ ret = EPROTONOSUPPORT;
+ }
+
+ return (ret);
+}
+
+__private_extern__ int
+so_get_recv_anyif(struct socket *so)
+{
+ int ret = 0;
+
+#if INET6
+ if (INP_SOCKAF(so) == AF_INET || INP_SOCKAF(so) == AF_INET6) {
+#else
+ if (INP_SOCKAF(so) == AF_INET) {
+#endif /* !INET6 */
+ ret = (sotoinpcb(so)->inp_flags & INP_RECV_ANYIF) ? 1 : 0;
+ }
+
+ return (ret);
+}