+ /*
+ * When SO_WANTOOBFLAG is set we try to get out-of-band data
+ * regardless of the flags argument. Here is the case were
+ * out-of-band data is not inline.
+ */
+ if ((flags & MSG_OOB) ||
+ ((so->so_options & SO_WANTOOBFLAG) != 0 &&
+ (so->so_options & SO_OOBINLINE) == 0 &&
+ (so->so_oobmark || (so->so_state & SS_RCVATMARK)))) {
+ m = m_get(M_WAIT, MT_DATA);
+ if (m == NULL) {
+ socket_unlock(so, 1);
+ KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END,
+ ENOBUFS, 0, 0, 0, 0);
+ return (ENOBUFS);
+ }
+ error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
+ if (error)
+ goto bad;
+ socket_unlock(so, 0);
+ do {
+ error = uiomove(mtod(m, caddr_t),
+ imin(uio_resid(uio), m->m_len), uio);
+ m = m_free(m);
+ } while (uio_resid(uio) && error == 0 && m != NULL);
+ socket_lock(so, 0);
+bad:
+ if (m != NULL)
+ m_freem(m);
+
+ if ((so->so_options & SO_WANTOOBFLAG) != 0) {
+ if (error == EWOULDBLOCK || error == EINVAL) {
+ /*
+ * Let's try to get normal data:
+ * EWOULDBLOCK: out-of-band data not
+ * receive yet. EINVAL: out-of-band data
+ * already read.
+ */
+ error = 0;
+ goto nooob;
+ } else if (error == 0 && flagsp != NULL) {
+ *flagsp |= MSG_OOB;
+ }
+ }
+ socket_unlock(so, 1);
+ if (en_tracing) {
+ KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_END,
+ VM_KERNEL_ADDRPERM(so), 0,
+ (int64_t)(orig_resid - uio_resid(uio)));
+ }
+ KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,
+ 0, 0, 0, 0);
+
+ return (error);
+ }
+nooob:
+ if (mp != NULL)
+ *mp = NULL;
+
+ if (so->so_state & SS_ISCONFIRMING && uio_resid(uio)) {
+ (*pr->pr_usrreqs->pru_rcvd)(so, 0);
+ }
+
+ free_list = NULL;
+ delayed_copy_len = 0;
+restart:
+#ifdef MORE_LOCKING_DEBUG
+ if (so->so_usecount <= 1)
+ printf("soreceive: sblock so=0x%llx ref=%d on socket\n",
+ (uint64_t)DEBUG_KERNEL_ADDRPERM(so), so->so_usecount);
+#endif
+ /*
+ * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
+ * and if so just return to the caller. This could happen when
+ * soreceive() is called by a socket upcall function during the
+ * time the socket is freed. The socket buffer would have been
+ * locked across the upcall, therefore we cannot put this thread
+ * to sleep (else we will deadlock) or return EWOULDBLOCK (else
+ * we may livelock), because the lock on the socket buffer will
+ * only be released when the upcall routine returns to its caller.
+ * Because the socket has been officially closed, there can be
+ * no further read on it.
+ *
+ * A multipath subflow socket would have its SS_NOFDREF set by
+ * default, so check for SOF_MP_SUBFLOW socket flag; when the
+ * socket is closed for real, SOF_MP_SUBFLOW would be cleared.
+ */
+ if ((so->so_state & (SS_NOFDREF | SS_CANTRCVMORE)) ==
+ (SS_NOFDREF | SS_CANTRCVMORE) && !(so->so_flags & SOF_MP_SUBFLOW)) {
+ socket_unlock(so, 1);
+ return (0);
+ }
+
+ error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
+ if (error) {
+ socket_unlock(so, 1);
+ KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,
+ 0, 0, 0, 0);
+ if (en_tracing) {
+ KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_END,
+ VM_KERNEL_ADDRPERM(so), 0,
+ (int64_t)(orig_resid - uio_resid(uio)));
+ }
+ return (error);
+ }
+
+ m = so->so_rcv.sb_mb;
+ /*
+ * If we have less data than requested, block awaiting more
+ * (subject to any timeout) if:
+ * 1. the current count is less than the low water mark, or
+ * 2. MSG_WAITALL is set, and it is possible to do the entire
+ * receive operation at once if we block (resid <= hiwat).
+ * 3. MSG_DONTWAIT is not set
+ * If MSG_WAITALL is set but resid is larger than the receive buffer,
+ * we have to do the receive in sections, and thus risk returning
+ * a short count if a timeout or signal occurs after we start.
+ */
+ if (m == NULL || (((flags & MSG_DONTWAIT) == 0 &&
+ so->so_rcv.sb_cc < uio_resid(uio)) &&
+ (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
+ ((flags & MSG_WAITALL) && uio_resid(uio) <= so->so_rcv.sb_hiwat)) &&
+ m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) {
+ /*
+ * Panic if we notice inconsistencies in the socket's
+ * receive list; both sb_mb and sb_cc should correctly
+ * reflect the contents of the list, otherwise we may
+ * end up with false positives during select() or poll()
+ * which could put the application in a bad state.
+ */
+ SB_MB_CHECK(&so->so_rcv);
+
+ if (so->so_error) {
+ if (m != NULL)
+ goto dontblock;
+ error = so->so_error;
+ if ((flags & MSG_PEEK) == 0)
+ so->so_error = 0;
+ goto release;
+ }
+ if (so->so_state & SS_CANTRCVMORE) {
+#if CONTENT_FILTER
+ /*
+ * Deal with half closed connections
+ */
+ if ((so->so_state & SS_ISDISCONNECTED) == 0 &&
+ cfil_sock_data_pending(&so->so_rcv) != 0)
+ CFIL_LOG(LOG_INFO,
+ "so %llx ignore SS_CANTRCVMORE",
+ (uint64_t)DEBUG_KERNEL_ADDRPERM(so));
+ else
+#endif /* CONTENT_FILTER */
+ if (m != NULL)
+ goto dontblock;
+ else
+ goto release;
+ }
+ for (; m != NULL; m = m->m_next)
+ if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
+ m = so->so_rcv.sb_mb;
+ goto dontblock;
+ }
+ if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
+ (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
+ error = ENOTCONN;
+ goto release;
+ }
+ if (uio_resid(uio) == 0)
+ goto release;
+
+ if ((so->so_state & SS_NBIO) ||
+ (flags & (MSG_DONTWAIT|MSG_NBIO))) {
+ error = EWOULDBLOCK;
+ goto release;
+ }
+ SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1");
+ SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1");
+ sbunlock(&so->so_rcv, TRUE); /* keep socket locked */
+#if EVEN_MORE_LOCKING_DEBUG
+ if (socket_debug)
+ printf("Waiting for socket data\n");
+#endif
+
+ error = sbwait(&so->so_rcv);
+#if EVEN_MORE_LOCKING_DEBUG
+ if (socket_debug)
+ printf("SORECEIVE - sbwait returned %d\n", error);
+#endif
+ if (so->so_usecount < 1) {
+ panic("%s: after 2nd sblock so=%p ref=%d on socket\n",
+ __func__, so, so->so_usecount);
+ /* NOTREACHED */
+ }
+ if (error) {
+ socket_unlock(so, 1);
+ KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,
+ 0, 0, 0, 0);
+ if (en_tracing) {
+ KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_END,
+ VM_KERNEL_ADDRPERM(so), 0,
+ (int64_t)(orig_resid - uio_resid(uio)));
+ }
+ return (error);
+ }
+ goto restart;
+ }
+dontblock:
+ OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgrcv);
+ SBLASTRECORDCHK(&so->so_rcv, "soreceive 1");
+ SBLASTMBUFCHK(&so->so_rcv, "soreceive 1");
+ nextrecord = m->m_nextpkt;
+
+ if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) {
+ error = soreceive_addr(p, so, psa, flags, &m, &nextrecord,
+ mp0 == NULL);
+ if (error == ERESTART)
+ goto restart;
+ else if (error != 0)
+ goto release;
+ orig_resid = 0;
+ }
+
+ /*
+ * Process one or more MT_CONTROL mbufs present before any data mbufs
+ * in the first mbuf chain on the socket buffer. If MSG_PEEK, we
+ * just copy the data; if !MSG_PEEK, we call into the protocol to
+ * perform externalization.
+ */
+ if (m != NULL && m->m_type == MT_CONTROL) {
+ error = soreceive_ctl(so, controlp, flags, &m, &nextrecord);
+ if (error != 0)
+ goto release;
+ orig_resid = 0;
+ }
+
+ /*
+ * If the socket is a TCP socket with message delivery
+ * enabled, then create a control msg to deliver the
+ * relative TCP sequence number for this data. Waiting
+ * until this point will protect against failures to
+ * allocate an mbuf for control msgs.
+ */
+ if (so->so_type == SOCK_STREAM && SOCK_PROTO(so) == IPPROTO_TCP &&
+ (so->so_flags & SOF_ENABLE_MSGS) && controlp != NULL) {
+ struct mbuf *seq_cm;
+
+ seq_cm = sbcreatecontrol((caddr_t)&m->m_pkthdr.msg_seq,
+ sizeof (uint32_t), SCM_SEQNUM, SOL_SOCKET);
+ if (seq_cm == NULL) {
+ /* unable to allocate a control mbuf */
+ error = ENOBUFS;
+ goto release;
+ }
+ *controlp = seq_cm;
+ controlp = &seq_cm->m_next;
+ }
+
+ if (m != NULL) {
+ if (!(flags & MSG_PEEK)) {
+ /*
+ * We get here because m points to an mbuf following
+ * any MT_SONAME or MT_CONTROL mbufs which have been
+ * processed above. In any case, m should be pointing
+ * to the head of the mbuf chain, and the nextrecord
+ * should be either NULL or equal to m->m_nextpkt.
+ * See comments above about SB_LOCK.
+ */
+ if (m != so->so_rcv.sb_mb ||
+ m->m_nextpkt != nextrecord) {
+ panic("%s: post-control !sync so=%p m=%p "
+ "nextrecord=%p\n", __func__, so, m,
+ nextrecord);
+ /* NOTREACHED */
+ }
+ if (nextrecord == NULL)
+ so->so_rcv.sb_lastrecord = m;
+ }
+ type = m->m_type;
+ if (type == MT_OOBDATA)
+ flags |= MSG_OOB;
+ } else {
+ if (!(flags & MSG_PEEK)) {
+ SB_EMPTY_FIXUP(&so->so_rcv);
+ }
+ }
+ SBLASTRECORDCHK(&so->so_rcv, "soreceive 2");
+ SBLASTMBUFCHK(&so->so_rcv, "soreceive 2");
+
+ moff = 0;
+ offset = 0;
+
+ if (!(flags & MSG_PEEK) && uio_resid(uio) > sorecvmincopy)
+ can_delay = 1;
+ else
+ can_delay = 0;
+
+ need_event = 0;
+
+ while (m != NULL &&
+ (uio_resid(uio) - delayed_copy_len) > 0 && error == 0) {
+ if (m->m_type == MT_OOBDATA) {
+ if (type != MT_OOBDATA)
+ break;
+ } else if (type == MT_OOBDATA) {
+ break;
+ }
+ /*
+ * Make sure to allways set MSG_OOB event when getting
+ * out of band data inline.
+ */
+ if ((so->so_options & SO_WANTOOBFLAG) != 0 &&
+ (so->so_options & SO_OOBINLINE) != 0 &&
+ (so->so_state & SS_RCVATMARK) != 0) {
+ flags |= MSG_OOB;
+ }
+ so->so_state &= ~SS_RCVATMARK;
+ len = uio_resid(uio) - delayed_copy_len;
+ if (so->so_oobmark && len > so->so_oobmark - offset)
+ len = so->so_oobmark - offset;
+ if (len > m->m_len - moff)
+ len = m->m_len - moff;
+ /*
+ * If mp is set, just pass back the mbufs.
+ * Otherwise copy them out via the uio, then free.
+ * Sockbuf must be consistent here (points to current mbuf,
+ * it points to next record) when we drop priority;
+ * we must note any additions to the sockbuf when we
+ * block interrupts again.
+ */
+ if (mp == NULL) {
+ SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove");
+ SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
+ if (can_delay && len == m->m_len) {
+ /*
+ * only delay the copy if we're consuming the
+ * mbuf and we're NOT in MSG_PEEK mode
+ * and we have enough data to make it worthwile
+ * to drop and retake the lock... can_delay
+ * reflects the state of the 2 latter
+ * constraints moff should always be zero
+ * in these cases
+ */
+ delayed_copy_len += len;
+ } else {
+ if (delayed_copy_len) {
+ error = sodelayed_copy(so, uio,
+ &free_list, &delayed_copy_len);
+
+ if (error) {
+ goto release;
+ }
+ /*
+ * can only get here if MSG_PEEK is not
+ * set therefore, m should point at the
+ * head of the rcv queue; if it doesn't,
+ * it means something drastically
+ * changed while we were out from behind
+ * the lock in sodelayed_copy. perhaps
+ * a RST on the stream. in any event,
+ * the stream has been interrupted. it's
+ * probably best just to return whatever
+ * data we've moved and let the caller
+ * sort it out...
+ */
+ if (m != so->so_rcv.sb_mb) {
+ break;
+ }
+ }
+ socket_unlock(so, 0);
+ error = uiomove(mtod(m, caddr_t) + moff,
+ (int)len, uio);
+ socket_lock(so, 0);
+
+ if (error)
+ goto release;
+ }
+ } else {
+ uio_setresid(uio, (uio_resid(uio) - len));
+ }
+ if (len == m->m_len - moff) {
+ if (m->m_flags & M_EOR)
+ flags |= MSG_EOR;
+ if (flags & MSG_PEEK) {
+ m = m->m_next;
+ moff = 0;
+ } else {
+ nextrecord = m->m_nextpkt;
+ sbfree(&so->so_rcv, m);
+ m->m_nextpkt = NULL;
+
+ /*
+ * If this packet is an unordered packet
+ * (indicated by M_UNORDERED_DATA flag), remove
+ * the additional bytes added to the
+ * receive socket buffer size.
+ */
+ if ((so->so_flags & SOF_ENABLE_MSGS) &&