+ * May return ERESTART when packet is dropped by MAC policy check
+ */
+static int
+soreceive_addr(struct proc *p, struct socket *so, struct sockaddr **psa,
+ int flags, struct mbuf **mp, struct mbuf **nextrecordp, int canwait)
+{
+ int error = 0;
+ struct mbuf *m = *mp;
+ struct mbuf *nextrecord = *nextrecordp;
+
+ KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
+#if CONFIG_MACF_SOCKET_SUBSET
+ /*
+ * Call the MAC framework for policy checking if we're in
+ * the user process context and the socket isn't connected.
+ */
+ if (p != kernproc && !(so->so_state & SS_ISCONNECTED)) {
+ struct mbuf *m0 = m;
+ /*
+ * Dequeue this record (temporarily) from the receive
+ * list since we're about to drop the socket's lock
+ * where a new record may arrive and be appended to
+ * the list. Upon MAC policy failure, the record
+ * will be freed. Otherwise, we'll add it back to
+ * the head of the list. We cannot rely on SB_LOCK
+ * because append operation uses the socket's lock.
+ */
+ do {
+ m->m_nextpkt = NULL;
+ sbfree(&so->so_rcv, m);
+ m = m->m_next;
+ } while (m != NULL);
+ m = m0;
+ so->so_rcv.sb_mb = nextrecord;
+ SB_EMPTY_FIXUP(&so->so_rcv);
+ SBLASTRECORDCHK(&so->so_rcv, "soreceive 1a");
+ SBLASTMBUFCHK(&so->so_rcv, "soreceive 1a");
+ socket_unlock(so, 0);
+
+ if (mac_socket_check_received(proc_ucred(p), so,
+ mtod(m, struct sockaddr *)) != 0) {
+ /*
+ * MAC policy failure; free this record and
+ * process the next record (or block until
+ * one is available). We have adjusted sb_cc
+ * and sb_mbcnt above so there is no need to
+ * call sbfree() again.
+ */
+ m_freem(m);
+ /*
+ * Clear SB_LOCK but don't unlock the socket.
+ * Process the next record or wait for one.
+ */
+ socket_lock(so, 0);
+ sbunlock(&so->so_rcv, TRUE); /* stay locked */
+ error = ERESTART;
+ goto done;
+ }
+ socket_lock(so, 0);
+ /*
+ * If the socket has been defunct'd, drop it.
+ */
+ if (so->so_flags & SOF_DEFUNCT) {
+ m_freem(m);
+ error = ENOTCONN;
+ goto done;
+ }
+ /*
+ * Re-adjust the socket receive list and re-enqueue
+ * the record in front of any packets which may have
+ * been appended while we dropped the lock.
+ */
+ for (m = m0; m->m_next != NULL; m = m->m_next) {
+ sballoc(&so->so_rcv, m);
+ }
+ sballoc(&so->so_rcv, m);
+ if (so->so_rcv.sb_mb == NULL) {
+ so->so_rcv.sb_lastrecord = m0;
+ so->so_rcv.sb_mbtail = m;
+ }
+ m = m0;
+ nextrecord = m->m_nextpkt = so->so_rcv.sb_mb;
+ so->so_rcv.sb_mb = m;
+ SBLASTRECORDCHK(&so->so_rcv, "soreceive 1b");
+ SBLASTMBUFCHK(&so->so_rcv, "soreceive 1b");
+ }
+#endif /* CONFIG_MACF_SOCKET_SUBSET */
+ if (psa != NULL) {
+ *psa = dup_sockaddr(mtod(m, struct sockaddr *), canwait);
+ if ((*psa == NULL) && (flags & MSG_NEEDSA)) {
+ error = EWOULDBLOCK;
+ goto done;
+ }
+ }
+ if (flags & MSG_PEEK) {
+ m = m->m_next;
+ } else {
+ sbfree(&so->so_rcv, m);
+ if (m->m_next == NULL && so->so_rcv.sb_cc != 0) {
+ panic("%s: about to create invalid socketbuf",
+ __func__);
+ /* NOTREACHED */
+ }
+ MFREE(m, so->so_rcv.sb_mb);
+ m = so->so_rcv.sb_mb;
+ if (m != NULL) {
+ m->m_nextpkt = nextrecord;
+ } else {
+ so->so_rcv.sb_mb = nextrecord;
+ SB_EMPTY_FIXUP(&so->so_rcv);
+ }
+ }
+done:
+ *mp = m;
+ *nextrecordp = nextrecord;
+
+ return error;
+}
+
+/*
+ * Process one or more MT_CONTROL mbufs present before any data mbufs
+ * in the first mbuf chain on the socket buffer. If MSG_PEEK, we
+ * just copy the data; if !MSG_PEEK, we call into the protocol to
+ * perform externalization.
+ */
+static int
+soreceive_ctl(struct socket *so, struct mbuf **controlp, int flags,
+ struct mbuf **mp, struct mbuf **nextrecordp)
+{
+ int error = 0;
+ struct mbuf *cm = NULL, *cmn;
+ struct mbuf **cme = &cm;
+ struct sockbuf *sb_rcv = &so->so_rcv;
+ struct mbuf **msgpcm = NULL;
+ struct mbuf *m = *mp;
+ struct mbuf *nextrecord = *nextrecordp;
+ struct protosw *pr = so->so_proto;
+
+ /*
+ * Externalizing the control messages would require us to
+ * drop the socket's lock below. Once we re-acquire the
+ * lock, the mbuf chain might change. In order to preserve
+ * consistency, we unlink all control messages from the
+ * first mbuf chain in one shot and link them separately
+ * onto a different chain.
+ */
+ do {
+ if (flags & MSG_PEEK) {
+ if (controlp != NULL) {
+ if (*controlp == NULL) {
+ msgpcm = controlp;
+ }
+ *controlp = m_copy(m, 0, m->m_len);
+
+ /*
+ * If we failed to allocate an mbuf,
+ * release any previously allocated
+ * mbufs for control data. Return
+ * an error. Keep the mbufs in the
+ * socket as this is using
+ * MSG_PEEK flag.
+ */
+ if (*controlp == NULL) {
+ m_freem(*msgpcm);
+ error = ENOBUFS;
+ goto done;
+ }
+ controlp = &(*controlp)->m_next;
+ }
+ m = m->m_next;
+ } else {
+ m->m_nextpkt = NULL;
+ sbfree(sb_rcv, m);
+ sb_rcv->sb_mb = m->m_next;
+ m->m_next = NULL;
+ *cme = m;
+ cme = &(*cme)->m_next;
+ m = sb_rcv->sb_mb;
+ }
+ } while (m != NULL && m->m_type == MT_CONTROL);
+
+ if (!(flags & MSG_PEEK)) {
+ if (sb_rcv->sb_mb != NULL) {
+ sb_rcv->sb_mb->m_nextpkt = nextrecord;
+ } else {
+ sb_rcv->sb_mb = nextrecord;
+ SB_EMPTY_FIXUP(sb_rcv);
+ }
+ if (nextrecord == NULL) {
+ sb_rcv->sb_lastrecord = m;
+ }
+ }
+
+ SBLASTRECORDCHK(&so->so_rcv, "soreceive ctl");
+ SBLASTMBUFCHK(&so->so_rcv, "soreceive ctl");
+
+ while (cm != NULL) {
+ int cmsg_type;
+
+ cmn = cm->m_next;
+ cm->m_next = NULL;
+ cmsg_type = mtod(cm, struct cmsghdr *)->cmsg_type;
+
+ /*
+ * Call the protocol to externalize SCM_RIGHTS message
+ * and return the modified message to the caller upon
+ * success. Otherwise, all other control messages are
+ * returned unmodified to the caller. Note that we
+ * only get into this loop if MSG_PEEK is not set.
+ */
+ if (pr->pr_domain->dom_externalize != NULL &&
+ cmsg_type == SCM_RIGHTS) {
+ /*
+ * Release socket lock: see 3903171. This
+ * would also allow more records to be appended
+ * to the socket buffer. We still have SB_LOCK
+ * set on it, so we can be sure that the head
+ * of the mbuf chain won't change.
+ */
+ socket_unlock(so, 0);
+ error = (*pr->pr_domain->dom_externalize)(cm);
+ socket_lock(so, 0);
+ } else {
+ error = 0;
+ }
+
+ if (controlp != NULL && error == 0) {
+ *controlp = cm;
+ controlp = &(*controlp)->m_next;
+ } else {
+ (void) m_free(cm);
+ }
+ cm = cmn;
+ }
+ /*
+ * Update the value of nextrecord in case we received new
+ * records when the socket was unlocked above for
+ * externalizing SCM_RIGHTS.
+ */
+ if (m != NULL) {
+ nextrecord = sb_rcv->sb_mb->m_nextpkt;
+ } else {
+ nextrecord = sb_rcv->sb_mb;
+ }
+
+done:
+ *mp = m;
+ *nextrecordp = nextrecord;
+
+ return error;
+}
+
+/*
+ * If we have less data than requested, block awaiting more
+ * (subject to any timeout) if:
+ * 1. the current count is less than the low water mark, or
+ * 2. MSG_WAITALL is set, and it is possible to do the entire
+ * receive operation at once if we block (resid <= hiwat).
+ * 3. MSG_DONTWAIT is not set
+ * If MSG_WAITALL is set but resid is larger than the receive buffer,
+ * we have to do the receive in sections, and thus risk returning
+ * a short count if a timeout or signal occurs after we start.
+ */
+static boolean_t
+so_should_wait(struct socket *so, struct uio *uio, struct mbuf *m, int flags)
+{
+ struct protosw *pr = so->so_proto;
+
+ /* No mbufs in the receive-queue? Wait! */
+ if (m == NULL) {
+ return true;
+ }
+
+ /* Not enough data in the receive socket-buffer - we may have to wait */
+ if ((flags & MSG_DONTWAIT) == 0 && so->so_rcv.sb_cc < uio_resid(uio) &&
+ m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0) {
+ /*
+ * Application did set the lowater-mark, so we should wait for
+ * this data to be present.
+ */
+ if (so->so_rcv.sb_cc < so->so_rcv.sb_lowat) {
+ return true;
+ }
+
+ /*
+ * Application wants all the data - so let's try to do the
+ * receive-operation at once by waiting for everything to
+ * be there.
+ */
+ if ((flags & MSG_WAITALL) && uio_resid(uio) <= so->so_rcv.sb_hiwat) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/*
+ * Implement receive operations on a socket.
+ * We depend on the way that records are added to the sockbuf
+ * by sbappend*. In particular, each record (mbufs linked through m_next)
+ * must begin with an address if the protocol so specifies,
+ * followed by an optional mbuf or mbufs containing ancillary data,
+ * and then zero or more mbufs of data.
+ * In order to avoid blocking network interrupts for the entire time here,
+ * we splx() while doing the actual copy to user space.
+ * Although the sockbuf is locked, new data may still be appended,
+ * and thus we must maintain consistency of the sockbuf during that time.
+ *
+ * The caller may receive the data as a single mbuf chain by supplying
+ * an mbuf **mp0 for use in returning the chain. The uio is then used
+ * only for the count in uio_resid.
+ *
+ * Returns: 0 Success
+ * ENOBUFS
+ * ENOTCONN
+ * EWOULDBLOCK
+ * uiomove:EFAULT
+ * sblock:EWOULDBLOCK
+ * sblock:EINTR