X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/0a7de7458d150b5d4dffc935ba399be265ef0a1a..f427ee49d309d8fc33ebf3042c3a775f2f530ded:/bsd/kern/uipc_socket.c diff --git a/bsd/kern/uipc_socket.c b/bsd/kern/uipc_socket.c index 4d6e12a70..607af6d3c 100644 --- a/bsd/kern/uipc_socket.c +++ b/bsd/kern/uipc_socket.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2018 Apple Inc. All rights reserved. + * Copyright (c) 1998-2020 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -104,6 +104,7 @@ #include #include #include +#include #include #include #include @@ -159,23 +160,23 @@ static lck_mtx_t *so_cache_mtx; #include -static int filt_sorattach(struct knote *kn, struct kevent_internal_s *kev); +static int filt_sorattach(struct knote *kn, struct kevent_qos_s *kev); static void filt_sordetach(struct knote *kn); static int filt_soread(struct knote *kn, long hint); -static int filt_sortouch(struct knote *kn, struct kevent_internal_s *kev); -static int filt_sorprocess(struct knote *kn, struct filt_process_s *data, struct kevent_internal_s *kev); +static int filt_sortouch(struct knote *kn, struct kevent_qos_s *kev); +static int filt_sorprocess(struct knote *kn, struct kevent_qos_s *kev); -static int filt_sowattach(struct knote *kn, struct kevent_internal_s *kev); +static int filt_sowattach(struct knote *kn, struct kevent_qos_s *kev); static void filt_sowdetach(struct knote *kn); static int filt_sowrite(struct knote *kn, long hint); -static int filt_sowtouch(struct knote *kn, struct kevent_internal_s *kev); -static int filt_sowprocess(struct knote *kn, struct filt_process_s *data, struct kevent_internal_s *kev); +static int filt_sowtouch(struct knote *kn, struct kevent_qos_s *kev); +static int filt_sowprocess(struct knote *kn, struct kevent_qos_s *kev); -static int filt_sockattach(struct knote *kn, struct kevent_internal_s *kev); +static int filt_sockattach(struct knote *kn, struct kevent_qos_s *kev); static void filt_sockdetach(struct knote *kn); static int filt_sockev(struct knote *kn, long hint); -static int filt_socktouch(struct knote *kn, struct kevent_internal_s *kev); -static int filt_sockprocess(struct knote *kn, struct filt_process_s *data, struct kevent_internal_s *kev); +static int filt_socktouch(struct knote *kn, struct kevent_qos_s *kev); +static int filt_sockprocess(struct knote *kn, struct kevent_qos_s *kev); static int sooptcopyin_timeval(struct sockopt *, struct timeval *); static int sooptcopyout_timeval(struct sockopt *, const struct timeval *); @@ -228,7 +229,7 @@ static unsigned long sodefunct_calls = 0; SYSCTL_LONG(_kern_ipc, OID_AUTO, sodefunct_calls, CTLFLAG_LOCKED, &sodefunct_calls, ""); -static int socket_zone = M_SOCKET; +ZONE_DECLARE(socket_zone, "socket", sizeof(struct socket), ZC_ZFREE_CLEARMEM); so_gen_t so_gencnt; /* generation count for sockets */ MALLOC_DEFINE(M_SONAME, "soname", "socket name"); @@ -337,7 +338,7 @@ vm_size_t so_cache_zone_element_size; static int sodelayed_copy(struct socket *, struct uio *, struct mbuf **, user_ssize_t *); -static void cached_sock_alloc(struct socket **, int); +static void cached_sock_alloc(struct socket **, zalloc_flags_t); static void cached_sock_free(struct socket *); /* @@ -432,10 +433,8 @@ socketinit(void) so_cache_zone_element_size = (vm_size_t)(sizeof(struct socket) + 4 + get_inpcb_str_size() + 4 + get_tcp_str_size()); - so_cache_zone = zinit(so_cache_zone_element_size, - (120000 * so_cache_zone_element_size), 8192, "socache zone"); - zone_change(so_cache_zone, Z_CALLERACCT, FALSE); - zone_change(so_cache_zone, Z_NOENCRYPT, TRUE); + so_cache_zone = zone_create("socache zone", so_cache_zone_element_size, + ZC_ZFREE_CLEARMEM | ZC_NOENCRYPT); bzero(&soextbkidlestat, sizeof(struct soextbkidlestat)); soextbkidlestat.so_xbkidle_maxperproc = SO_IDLE_BK_IDLE_MAX_PER_PROC; @@ -451,7 +450,7 @@ socketinit(void) } static void -cached_sock_alloc(struct socket **so, int waitok) +cached_sock_alloc(struct socket **so, zalloc_flags_t how) { caddr_t temp; uintptr_t offset; @@ -475,17 +474,7 @@ cached_sock_alloc(struct socket **so, int waitok) } else { lck_mtx_unlock(so_cache_mtx); - if (waitok) { - *so = (struct socket *)zalloc(so_cache_zone); - } else { - *so = (struct socket *)zalloc_noblock(so_cache_zone); - } - - if (*so == NULL) { - return; - } - - bzero((caddr_t)*so, sizeof(struct socket)); + *so = zalloc_flags(so_cache_zone, how | Z_ZERO); /* * Define offsets for extra structures into our @@ -550,6 +539,9 @@ so_update_last_owner_locked(struct socket *so, proc_t self) so->last_pid = proc_pid(self); proc_getexecutableuuid(self, so->last_uuid, sizeof(so->last_uuid)); + if (so->so_proto != NULL && so->so_proto->pr_update_last_owner != NULL) { + (*so->so_proto->pr_update_last_owner)(so, self, NULL); + } } proc_pidoriginatoruuid(so->so_vuuid, sizeof(so->so_vuuid)); } @@ -624,33 +616,21 @@ so_cache_timer(void) struct socket * soalloc(int waitok, int dom, int type) { + zalloc_flags_t how = waitok ? Z_WAITOK : Z_NOWAIT; struct socket *so; if ((dom == PF_INET) && (type == SOCK_STREAM)) { - cached_sock_alloc(&so, waitok); + cached_sock_alloc(&so, how); } else { - MALLOC_ZONE(so, struct socket *, sizeof(*so), socket_zone, - M_WAITOK); - if (so != NULL) { - bzero(so, sizeof(*so)); - } + so = zalloc_flags(socket_zone, how | Z_ZERO); } if (so != NULL) { so->so_gencnt = OSIncrementAtomic64((SInt64 *)&so_gencnt); - so->so_zone = socket_zone; /* * Increment the socket allocation statistics */ INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_alloc_total); - -#if CONFIG_MACF_SOCKET - /* Convert waitok to M_WAITOK/M_NOWAIT for MAC Framework. */ - if (mac_socket_label_init(so, !waitok) != 0) { - sodealloc(so); - return NULL; - } -#endif /* MAC_SOCKET */ } return so; @@ -663,6 +643,9 @@ socreate_internal(int dom, struct socket **aso, int type, int proto, struct protosw *prp; struct socket *so; int error = 0; +#if defined(XNU_TARGET_OS_OSX) + pid_t rpid = -1; +#endif #if TCPDEBUG extern int tcpconsdebug; @@ -736,7 +719,7 @@ socreate_internal(int dom, struct socket **aso, int type, int proto, break; } - if (flags & SOCF_ASYNC) { + if (flags & SOCF_MPTCP) { so->so_state |= SS_NBIO; } @@ -753,8 +736,30 @@ socreate_internal(int dom, struct socket **aso, int type, int proto, so->e_pid = proc_pid(ep); proc_getexecutableuuid(ep, so->e_uuid, sizeof(so->e_uuid)); so->so_flags |= SOF_DELEGATED; +#if defined(XNU_TARGET_OS_OSX) + if (ep->p_responsible_pid != so->e_pid) { + rpid = ep->p_responsible_pid; + } +#endif + } + +#if defined(XNU_TARGET_OS_OSX) + if (rpid < 0 && p->p_responsible_pid != so->last_pid) { + rpid = p->p_responsible_pid; } + so->so_rpid = -1; + uuid_clear(so->so_ruuid); + if (rpid >= 0) { + proc_t rp = proc_find(rpid); + if (rp != PROC_NULL) { + proc_getexecutableuuid(rp, so->so_ruuid, sizeof(so->so_ruuid)); + so->so_rpid = rpid; + proc_rele(rp); + } + } +#endif + so->so_cred = kauth_cred_proc_ref(p); if (!suser(kauth_cred_get(), NULL)) { so->so_state |= SS_PRIV; @@ -766,10 +771,6 @@ socreate_internal(int dom, struct socket **aso, int type, int proto, so->next_lock_lr = 0; so->next_unlock_lr = 0; -#if CONFIG_MACF_SOCKET - mac_socket_label_associate(kauth_cred_get(), so); -#endif /* MAC_SOCKET */ - /* * Attachment will create the per pcb lock if necessary and * increase refcount for creation, make sure it's done before @@ -791,8 +792,14 @@ socreate_internal(int dom, struct socket **aso, int type, int proto, return error; } + /* + * Note: needs so_pcb to be set after pru_attach + */ + if (prp->pr_update_last_owner != NULL) { + (*prp->pr_update_last_owner)(so, p, ep); + } + atomic_add_32(&prp->pr_domain->dom_refs, 1); - TAILQ_INIT(&so->so_evlist); /* Attach socket filters for this protocol */ sflt_initsock(so); @@ -807,20 +814,19 @@ socreate_internal(int dom, struct socket **aso, int type, int proto, * If this thread or task is marked to create backgrounded sockets, * mark the socket as background. */ - if (proc_get_effective_thread_policy(current_thread(), - TASK_POLICY_NEW_SOCKETS_BG)) { + if (!(flags & SOCF_MPTCP) && + proc_get_effective_thread_policy(current_thread(), TASK_POLICY_NEW_SOCKETS_BG)) { socket_set_traffic_mgt_flags(so, TRAFFIC_MGT_SO_BACKGROUND); so->so_background_thread = current_thread(); } switch (dom) { /* - * Don't mark Unix domain, system or multipath sockets as + * Don't mark Unix domain or system * eligible for defunct by default. */ case PF_LOCAL: case PF_SYSTEM: - case PF_MULTIPATH: so->so_flags |= SOF_NODEFUNCT; break; default: @@ -968,23 +974,12 @@ sodealloc(struct socket *so) cfil_sock_detach(so); #endif /* CONTENT_FILTER */ - /* Delete the state allocated for msg queues on a socket */ - if (so->so_flags & SOF_ENABLE_MSGS) { - FREE(so->so_msg_state, M_TEMP); - so->so_msg_state = NULL; - } - VERIFY(so->so_msg_state == NULL); - so->so_gencnt = OSIncrementAtomic64((SInt64 *)&so_gencnt); -#if CONFIG_MACF_SOCKET - mac_socket_label_destroy(so); -#endif /* MAC_SOCKET */ - if (so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) { cached_sock_free(so); } else { - FREE_ZONE(so, sizeof(*so), so->so_zone); + zfree(socket_zone, so); } } @@ -1487,7 +1482,6 @@ discard: } atomic_add_32(&so->so_proto->pr_domain->dom_refs, -1); - evsofree(so); VERIFY(so->so_usecount > 0); so->so_usecount--; @@ -1717,6 +1711,9 @@ soconnectlock(struct socket *so, struct sockaddr *nam, int dolock) } else { error = (*so->so_proto->pr_usrreqs->pru_connect) (so, nam, p); + if (error != 0) { + so->so_state &= ~SS_ISCONNECTING; + } } } if (dolock) { @@ -1801,6 +1798,39 @@ soconnectxlocked(struct socket *so, struct sockaddr *src, (error = sodisconnectlocked(so)) != 0)) { error = EISCONN; } else { + if ((so->so_proto->pr_flags & PR_DATA_IDEMPOTENT) && + (flags & CONNECT_DATA_IDEMPOTENT)) { + so->so_flags1 |= SOF1_DATA_IDEMPOTENT; + + if (flags & CONNECT_DATA_AUTHENTICATED) { + so->so_flags1 |= SOF1_DATA_AUTHENTICATED; + } + } + + /* + * Case 1: CONNECT_RESUME_ON_READ_WRITE set, no data. + * Case 2: CONNECT_RESUME_ON_READ_WRITE set, with data (user error) + * Case 3: CONNECT_RESUME_ON_READ_WRITE not set, with data + * Case 3 allows user to combine write with connect even if they have + * no use for TFO (such as regular TCP, and UDP). + * Case 4: CONNECT_RESUME_ON_READ_WRITE not set, no data (regular case) + */ + if ((so->so_proto->pr_flags & PR_PRECONN_WRITE) && + ((flags & CONNECT_RESUME_ON_READ_WRITE) || auio)) { + so->so_flags1 |= SOF1_PRECONNECT_DATA; + } + + /* + * If a user sets data idempotent and does not pass an uio, or + * sets CONNECT_RESUME_ON_READ_WRITE, this is an error, reset + * SOF1_DATA_IDEMPOTENT. + */ + if (!(so->so_flags1 & SOF1_PRECONNECT_DATA) && + (so->so_flags1 & SOF1_DATA_IDEMPOTENT)) { + /* We should return EINVAL instead perhaps. */ + so->so_flags1 &= ~SOF1_DATA_IDEMPOTENT; + } + /* * Run connect filter before calling protocol: * - non-blocking connect returns before completion; @@ -1816,6 +1846,12 @@ soconnectxlocked(struct socket *so, struct sockaddr *src, error = (*so->so_proto->pr_usrreqs->pru_connectx) (so, src, dst, p, ifscope, aid, pcid, flags, arg, arglen, auio, bytes_written); + if (error != 0) { + so->so_state &= ~SS_ISCONNECTING; + if (error != EINPROGRESS) { + so->so_flags1 &= ~SOF1_PRECONNECT_DATA; + } + } } } @@ -1906,8 +1942,7 @@ sodisconnectx(struct socket *so, sae_associd_t aid, sae_connid_t cid) */ int sosendcheck(struct socket *so, struct sockaddr *addr, user_ssize_t resid, - int32_t clen, int32_t atomic, int flags, int *sblocked, - struct mbuf *control) + int32_t clen, int32_t atomic, int flags, int *sblocked) { int error = 0; int32_t space; @@ -1979,17 +2014,13 @@ defunct: !(so->so_flags1 & SOF1_PRECONNECT_DATA)) { return ENOTCONN; } - } else if (addr == 0 && !(flags & MSG_HOLD)) { + } else if (addr == 0) { return (so->so_proto->pr_flags & PR_CONNREQUIRED) ? ENOTCONN : EDESTADDRREQ; } } - if (so->so_flags & SOF_ENABLE_MSGS) { - space = msgq_sbspace(so, control); - } else { - space = sbspace(&so->so_snd); - } + space = sbspace(&so->so_snd); if (flags & MSG_OOB) { space += 1024; @@ -2049,10 +2080,6 @@ defunct: * Returns nonzero on error, timeout or signal; callers * must check for short counts if EINTR/ERESTART are returned. * Data and control buffers are freed on return. - * Experiment: - * MSG_HOLD: go thru most of sosend(), but just enqueue the mbuf - * MSG_SEND: go thru as for MSG_HOLD on current fragment, then - * point at the mbuf chain being constructed and go from there. * * Returns: 0 Success * EOPNOTSUPP @@ -2104,7 +2131,6 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, int atomic = sosendallatonce(so) || top; int sblocked = 0; struct proc *p = current_proc(); - struct mbuf *control_copy = NULL; uint16_t headroom = 0; boolean_t en_tracing = FALSE; @@ -2162,13 +2188,12 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, * causes us to loop sending 0-length segments to the protocol. * * Usually, MSG_EOR isn't used on SOCK_STREAM type sockets. - * But it will be used by sockets doing message delivery. * * Note: We limit resid to be a positive int value as we use * imin() to set bytes_to_copy -- radr://14558484 */ - if (resid < 0 || resid > INT_MAX || (so->so_type == SOCK_STREAM && - !(so->so_flags & SOF_ENABLE_MSGS) && (flags & MSG_EOR))) { + if (resid < 0 || resid > INT_MAX || + (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) { error = EINVAL; goto out_locked; } @@ -2188,17 +2213,13 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, do { error = sosendcheck(so, addr, resid, clen, atomic, flags, - &sblocked, control); + &sblocked); if (error) { goto out_locked; } mp = ⊤ - if (so->so_flags & SOF_ENABLE_MSGS) { - space = msgq_sbspace(so, control); - } else { - space = sbspace(&so->so_snd) - clen; - } + space = sbspace(&so->so_snd) - clen; space += ((flags & MSG_OOB) ? 1024 : 0); do { @@ -2373,12 +2394,14 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, * reserving the socket headroom */ if (freelist == NULL) { - if (top == NULL) { - MGETHDR(freelist, - M_WAIT, MT_DATA); - } else { - MGET(freelist, - M_WAIT, MT_DATA); + if (SOCK_TYPE(so) != SOCK_STREAM || bytes_to_alloc <= MINCLSIZE) { + if (top == NULL) { + MGETHDR(freelist, + M_WAIT, MT_DATA); + } else { + MGET(freelist, + M_WAIT, MT_DATA); + } } if (freelist == NULL) { @@ -2446,29 +2469,6 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, } } - if (flags & (MSG_HOLD | MSG_SEND)) { - /* Enqueue for later, go away if HOLD */ - struct mbuf *mb1; - if (so->so_temp && (flags & MSG_FLUSH)) { - m_freem(so->so_temp); - so->so_temp = NULL; - } - if (so->so_temp) { - so->so_tail->m_next = top; - } else { - so->so_temp = top; - } - mb1 = top; - while (mb1->m_next) { - mb1 = mb1->m_next; - } - so->so_tail = mb1; - if (flags & MSG_HOLD) { - top = NULL; - goto out_locked; - } - top = so->so_temp; - } if (dontroute) { so->so_options |= SO_DONTROUTE; } @@ -2520,28 +2520,15 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, } #endif /* CONTENT_FILTER */ } - if (so->so_flags & SOF_ENABLE_MSGS) { - /* - * Make a copy of control mbuf, - * so that msg priority can be - * passed to subsequent mbufs. - */ - control_copy = m_dup(control, M_NOWAIT); - } error = (*so->so_proto->pr_usrreqs->pru_send) (so, sendflags, top, addr, control, p); - if (flags & MSG_SEND) { - so->so_temp = NULL; - } - if (dontroute) { so->so_options &= ~SO_DONTROUTE; } clen = 0; - control = control_copy; - control_copy = NULL; + control = NULL; top = NULL; mp = ⊤ if (error) { @@ -2565,9 +2552,6 @@ out_locked: if (freelist != NULL) { m_freem_list(freelist); } - if (control_copy != NULL) { - m_freem(control_copy); - } soclearfastopen(so); @@ -2587,7 +2571,7 @@ out_locked: int sosend_reinject(struct socket *so, struct sockaddr *addr, struct mbuf *top, struct mbuf *control, uint32_t sendflags) { - struct mbuf *m0, *control_end; + struct mbuf *m0 = NULL, *control_end = NULL; socket_lock_assert_owned(so); @@ -2698,8 +2682,7 @@ sosend_list(struct socket *so, struct uio **uioarray, u_int uiocnt, int flags) (so->so_proto->pr_flags & PR_ATOMIC); OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgsnd); - error = sosendcheck(so, NULL, resid, 0, atomic, flags, - &sblocked, NULL); + error = sosendcheck(so, NULL, resid, 0, atomic, flags, &sblocked); if (error) { goto release; } @@ -3206,6 +3189,51 @@ done: return error; } +/* + * If we have less data than requested, block awaiting more + * (subject to any timeout) if: + * 1. the current count is less than the low water mark, or + * 2. MSG_WAITALL is set, and it is possible to do the entire + * receive operation at once if we block (resid <= hiwat). + * 3. MSG_DONTWAIT is not set + * If MSG_WAITALL is set but resid is larger than the receive buffer, + * we have to do the receive in sections, and thus risk returning + * a short count if a timeout or signal occurs after we start. + */ +static boolean_t +so_should_wait(struct socket *so, struct uio *uio, struct mbuf *m, int flags) +{ + struct protosw *pr = so->so_proto; + + /* No mbufs in the receive-queue? Wait! */ + if (m == NULL) { + return true; + } + + /* Not enough data in the receive socket-buffer - we may have to wait */ + if ((flags & MSG_DONTWAIT) == 0 && so->so_rcv.sb_cc < uio_resid(uio) && + m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0) { + /* + * Application did set the lowater-mark, so we should wait for + * this data to be present. + */ + if (so->so_rcv.sb_cc < so->so_rcv.sb_lowat) { + return true; + } + + /* + * Application wants all the data - so let's try to do the + * receive-operation at once by waiting for everything to + * be there. + */ + if ((flags & MSG_WAITALL) && uio_resid(uio) <= so->so_rcv.sb_hiwat) { + return true; + } + } + + return false; +} + /* * Implement receive operations on a socket. * We depend on the way that records are added to the sockbuf @@ -3256,7 +3284,6 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, user_ssize_t orig_resid = uio_resid(uio); user_ssize_t delayed_copy_len; int can_delay; - int need_event; struct proc *p = current_proc(); boolean_t en_tracing = FALSE; @@ -3465,22 +3492,7 @@ restart: } m = so->so_rcv.sb_mb; - /* - * If we have less data than requested, block awaiting more - * (subject to any timeout) if: - * 1. the current count is less than the low water mark, or - * 2. MSG_WAITALL is set, and it is possible to do the entire - * receive operation at once if we block (resid <= hiwat). - * 3. MSG_DONTWAIT is not set - * If MSG_WAITALL is set but resid is larger than the receive buffer, - * we have to do the receive in sections, and thus risk returning - * a short count if a timeout or signal occurs after we start. - */ - if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && - so->so_rcv.sb_cc < uio_resid(uio)) && - (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || - ((flags & MSG_WAITALL) && uio_resid(uio) <= so->so_rcv.sb_hiwat)) && - m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) { + if (so_should_wait(so, uio, m, flags)) { /* * Panic if we notice inconsistencies in the socket's * receive list; both sb_mb and sb_cc should correctly @@ -3547,7 +3559,24 @@ restart: } #endif - error = sbwait(&so->so_rcv); + /* + * Depending on the protocol (e.g. TCP), the following + * might cause the socket lock to be dropped and later + * be reacquired, and more data could have arrived and + * have been appended to the receive socket buffer by + * the time it returns. Therefore, we only sleep in + * sbwait() below if and only if the wait-condition is still + * true. + */ + if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb != NULL) { + (*pr->pr_usrreqs->pru_rcvd)(so, flags); + } + + error = 0; + if (so_should_wait(so, uio, so->so_rcv.sb_mb, flags)) { + error = sbwait(&so->so_rcv); + } + #if EVEN_MORE_LOCKING_DEBUG if (socket_debug) { printf("SORECEIVE - sbwait returned %d\n", error); @@ -3602,28 +3631,6 @@ dontblock: orig_resid = 0; } - /* - * If the socket is a TCP socket with message delivery - * enabled, then create a control msg to deliver the - * relative TCP sequence number for this data. Waiting - * until this point will protect against failures to - * allocate an mbuf for control msgs. - */ - if (so->so_type == SOCK_STREAM && SOCK_PROTO(so) == IPPROTO_TCP && - (so->so_flags & SOF_ENABLE_MSGS) && controlp != NULL) { - struct mbuf *seq_cm; - - seq_cm = sbcreatecontrol((caddr_t)&m->m_pkthdr.msg_seq, - sizeof(uint32_t), SCM_SEQNUM, SOL_SOCKET); - if (seq_cm == NULL) { - /* unable to allocate a control mbuf */ - error = ENOBUFS; - goto release; - } - *controlp = seq_cm; - controlp = &seq_cm->m_next; - } - if (m != NULL) { if (!(flags & MSG_PEEK)) { /* @@ -3666,8 +3673,6 @@ dontblock: can_delay = 0; } - need_event = 0; - while (m != NULL && (uio_resid(uio) - delayed_copy_len) > 0 && error == 0) { if (m->m_type == MT_OOBDATA) { @@ -3765,28 +3770,6 @@ dontblock: sbfree(&so->so_rcv, m); m->m_nextpkt = NULL; - /* - * If this packet is an unordered packet - * (indicated by M_UNORDERED_DATA flag), remove - * the additional bytes added to the - * receive socket buffer size. - */ - if ((so->so_flags & SOF_ENABLE_MSGS) && - m->m_len && - (m->m_flags & M_UNORDERED_DATA) && - sbreserve(&so->so_rcv, - so->so_rcv.sb_hiwat - m->m_len)) { - if (so->so_msg_state->msg_uno_bytes > - m->m_len) { - so->so_msg_state-> - msg_uno_bytes -= m->m_len; - } else { - so->so_msg_state-> - msg_uno_bytes = 0; - } - m->m_flags &= ~M_UNORDERED_DATA; - } - if (mp != NULL) { *mp = m; mp = &m->m_next; @@ -3849,12 +3832,6 @@ dontblock: so->so_oobmark -= len; if (so->so_oobmark == 0) { so->so_state |= SS_RCVATMARK; - /* - * delay posting the actual event until - * after any delayed copy processing - * has finished - */ - need_event = 1; break; } } else { @@ -3895,9 +3872,7 @@ dontblock: * sbwait() below if and only if the socket buffer is * empty, in order to avoid a false sleep. */ - if (pr->pr_flags & PR_WANTRCVD && so->so_pcb && - (((struct inpcb *)so->so_pcb)->inp_state != - INPCB_STATE_DEAD)) { + if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb != NULL) { (*pr->pr_usrreqs->pru_rcvd)(so, flags); } @@ -4006,9 +3981,6 @@ dontblock: m_freem_list(free_list); free_list = NULL; } - if (need_event) { - postevent(so, 0, EV_OOB); - } if (orig_resid == uio_resid(uio) && orig_resid && (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { @@ -4566,6 +4538,33 @@ out: return error; } +static int +so_statistics_event_to_nstat_event(int64_t *input_options, + uint64_t *nstat_event) +{ + int error = 0; + switch (*input_options) { + case SO_STATISTICS_EVENT_ENTER_CELLFALLBACK: + *nstat_event = NSTAT_EVENT_SRC_ENTER_CELLFALLBACK; + break; + case SO_STATISTICS_EVENT_EXIT_CELLFALLBACK: + *nstat_event = NSTAT_EVENT_SRC_EXIT_CELLFALLBACK; + break; +#if (DEBUG || DEVELOPMENT) + case SO_STATISTICS_EVENT_RESERVED_1: + *nstat_event = NSTAT_EVENT_SRC_RESERVED_1; + break; + case SO_STATISTICS_EVENT_RESERVED_2: + *nstat_event = NSTAT_EVENT_SRC_RESERVED_2; + break; +#endif /* (DEBUG || DEVELOPMENT) */ + default: + error = EINVAL; + break; + } + return error; +} + /* * Returns: 0 Success * EINVAL @@ -4629,7 +4628,6 @@ soshutdownlock_final(struct socket *so, int how) goto done; } sorflush(so); - postevent(so, 0, EV_RCLOSED); } if (how != SHUT_RD) { if ((so->so_state & SS_CANTSENDMORE) != 0) { @@ -4638,7 +4636,6 @@ soshutdownlock_final(struct socket *so, int how) goto done; } error = (*pr->pr_usrreqs->pru_shutdown)(so); - postevent(so, 0, EV_WCLOSED); } done: KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN, how, 1, 0, 0, 0); @@ -4906,14 +4903,15 @@ sooptcopyin_timeval(struct sockopt *sopt, struct timeval *tv_p) } int -soopt_cred_check(struct socket *so, int priv, boolean_t allow_root) +soopt_cred_check(struct socket *so, int priv, boolean_t allow_root, + boolean_t ignore_delegate) { kauth_cred_t cred = NULL; proc_t ep = PROC_NULL; uid_t uid; int error = 0; - if (so->so_flags & SOF_DELEGATED) { + if (ignore_delegate == false && so->so_flags & SOF_DELEGATED) { ep = proc_find(so->e_pid); if (ep) { cred = kauth_cred_proc_ref(ep); @@ -4960,11 +4958,9 @@ int sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock) { int error, optval; + int64_t long_optval; struct linger l; struct timeval tv; -#if CONFIG_MACF_SOCKET - struct mac extmac; -#endif /* MAC_SOCKET */ if (sopt->sopt_dir != SOPT_SET) { sopt->sopt_dir = SOPT_SET; @@ -5240,7 +5236,7 @@ sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock) } if (optval != 0) { error = soopt_cred_check(so, - PRIV_NET_RESTRICTED_AWDL, false); + PRIV_NET_RESTRICTED_AWDL, false, false); if (error == 0) { inp_set_awdl_unrestricted( sotoinpcb(so)); @@ -5262,7 +5258,7 @@ sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock) if (optval != 0 && inp_get_intcoproc_allowed(sotoinpcb(so)) == FALSE) { error = soopt_cred_check(so, - PRIV_NET_RESTRICTED_INTCOPROC, false); + PRIV_NET_RESTRICTED_INTCOPROC, false, false); if (error == 0) { inp_set_intcoproc_allowed( sotoinpcb(so)); @@ -5273,17 +5269,7 @@ sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock) break; case SO_LABEL: -#if CONFIG_MACF_SOCKET - if ((error = sooptcopyin(sopt, &extmac, sizeof(extmac), - sizeof(extmac))) != 0) { - goto out; - } - - error = mac_setsockopt_label(proc_ucred(sopt->sopt_p), - so, &extmac); -#else error = EOPNOTSUPP; -#endif /* MAC_SOCKET */ break; case SO_UPCALLCLOSEWAIT: @@ -5524,7 +5510,7 @@ sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock) break; } - error = so_set_effective_pid(so, optval, sopt->sopt_p); + error = so_set_effective_pid(so, optval, sopt->sopt_p, true); break; case SO_DELEGATED_UUID: { @@ -5535,7 +5521,7 @@ sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock) break; } - error = so_set_effective_uuid(so, euuid, sopt->sopt_p); + error = so_set_effective_uuid(so, euuid, sopt->sopt_p, true); break; } @@ -5544,7 +5530,7 @@ sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock) error = necp_set_socket_attributes(so, sopt); break; - case SO_NECP_CLIENTUUID: + case SO_NECP_CLIENTUUID: { if (SOCK_DOM(so) == PF_MULTIPATH) { /* Handled by MPTCP itself */ break; @@ -5572,7 +5558,8 @@ sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock) goto out; } - error = necp_client_register_socket_flow(so->last_pid, + pid_t current_pid = proc_pid(current_proc()); + error = necp_client_register_socket_flow(current_pid, inp->necp_client_uuid, inp); if (error != 0) { uuid_clear(inp->necp_client_uuid); @@ -5580,12 +5567,48 @@ sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock) } if (inp->inp_lport != 0) { - // There is bound local port, so this is not + // There is a bound local port, so this is not // a fresh socket. Assign to the client. - necp_client_assign_from_socket(so->last_pid, inp->necp_client_uuid, inp); + necp_client_assign_from_socket(current_pid, inp->necp_client_uuid, inp); + } + + break; + } + case SO_NECP_LISTENUUID: { + if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) { + error = EINVAL; + goto out; + } + + struct inpcb *inp = sotoinpcb(so); + if (!uuid_is_null(inp->necp_client_uuid)) { + error = EINVAL; + goto out; + } + + error = sooptcopyin(sopt, &inp->necp_client_uuid, + sizeof(uuid_t), sizeof(uuid_t)); + if (error != 0) { + goto out; } + if (uuid_is_null(inp->necp_client_uuid)) { + error = EINVAL; + goto out; + } + + error = necp_client_register_socket_listener(proc_pid(current_proc()), + inp->necp_client_uuid, inp); + if (error != 0) { + uuid_clear(inp->necp_client_uuid); + goto out; + } + + // Mark that the port registration is held by NECP + inp->inp_flags2 |= INP2_EXTERNAL_PORT; + break; + } #endif /* NECP */ case SO_EXTENDED_BK_IDLE: @@ -5613,6 +5636,21 @@ sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock) } break; + case SO_STATISTICS_EVENT: + error = sooptcopyin(sopt, &long_optval, + sizeof(long_optval), sizeof(long_optval)); + if (error != 0) { + goto out; + } + u_int64_t nstat_event = 0; + error = so_statistics_event_to_nstat_event( + &long_optval, &nstat_event); + if (error != 0) { + goto out; + } + nstat_pcb_event(sotoinpcb(so), nstat_event); + break; + case SO_NET_SERVICE_TYPE: { error = sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval)); @@ -5641,6 +5679,37 @@ sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock) } break; + case SO_MPKL_SEND_INFO: { + struct so_mpkl_send_info so_mpkl_send_info; + + error = sooptcopyin(sopt, &so_mpkl_send_info, + sizeof(struct so_mpkl_send_info), sizeof(struct so_mpkl_send_info)); + if (error != 0) { + goto out; + } + uuid_copy(so->so_mpkl_send_uuid, so_mpkl_send_info.mpkl_uuid); + so->so_mpkl_send_proto = so_mpkl_send_info.mpkl_proto; + + if (uuid_is_null(so->so_mpkl_send_uuid) && so->so_mpkl_send_proto == 0) { + so->so_flags1 &= ~SOF1_MPKL_SEND_INFO; + } else { + so->so_flags1 |= SOF1_MPKL_SEND_INFO; + } + break; + } + case SO_WANT_KEV_SOCKET_CLOSED: { + error = sooptcopyin(sopt, &optval, sizeof(optval), + sizeof(optval)); + if (error != 0) { + goto out; + } + if (optval == 0) { + so->so_flags1 &= ~SOF1_WANT_KEV_SOCK_CLOSED; + } else { + so->so_flags1 |= SOF1_WANT_KEV_SOCK_CLOSED; + } + break; + } default: error = ENOPROTOOPT; break; @@ -5734,9 +5803,6 @@ sogetoptlock(struct socket *so, struct sockopt *sopt, int dolock) int error, optval; struct linger l; struct timeval tv; -#if CONFIG_MACF_SOCKET - struct mac extmac; -#endif /* MAC_SOCKET */ if (sopt->sopt_dir != SOPT_GET) { sopt->sopt_dir = SOPT_GET; @@ -5837,17 +5903,13 @@ integer: m1 = so->so_rcv.sb_mb; while (m1 != NULL) { - if (m1->m_type == MT_DATA || - m1->m_type == MT_HEADER || - m1->m_type == MT_OOBDATA) { - cnt += 1; - } + cnt += 1; m1 = m1->m_nextpkt; } optval = cnt; goto integer; } else { - error = EINVAL; + error = ENOPROTOOPT; break; } @@ -5937,33 +5999,11 @@ integer: break; case SO_LABEL: -#if CONFIG_MACF_SOCKET - if ((error = sooptcopyin(sopt, &extmac, sizeof(extmac), - sizeof(extmac))) != 0 || - (error = mac_socket_label_get(proc_ucred( - sopt->sopt_p), so, &extmac)) != 0) { - break; - } - - error = sooptcopyout(sopt, &extmac, sizeof(extmac)); -#else error = EOPNOTSUPP; -#endif /* MAC_SOCKET */ break; case SO_PEERLABEL: -#if CONFIG_MACF_SOCKET - if ((error = sooptcopyin(sopt, &extmac, sizeof(extmac), - sizeof(extmac))) != 0 || - (error = mac_socketpeer_label_get(proc_ucred( - sopt->sopt_p), so, &extmac)) != 0) { - break; - } - - error = sooptcopyout(sopt, &extmac, sizeof(extmac)); -#else error = EOPNOTSUPP; -#endif /* MAC_SOCKET */ break; #ifdef __APPLE_API_PRIVATE @@ -5995,11 +6035,6 @@ integer: optval = (so->so_flags & SOF_RECV_TRAFFIC_CLASS); goto integer; - case SO_TRAFFIC_CLASS_STATS: - error = sooptcopyout(sopt, &so->so_tc_stats, - sizeof(so->so_tc_stats)); - break; - #if (DEVELOPMENT || DEBUG) case SO_TRAFFIC_CLASS_DBG: error = sogetopt_tcdbg(so, sopt); @@ -6050,8 +6085,7 @@ integer: error = necp_get_socket_attributes(so, sopt); break; - case SO_NECP_CLIENTUUID: - { + case SO_NECP_CLIENTUUID: { uuid_t *ncu; if (SOCK_DOM(so) == PF_MULTIPATH) { @@ -6066,6 +6100,25 @@ integer: error = sooptcopyout(sopt, ncu, sizeof(uuid_t)); break; } + + case SO_NECP_LISTENUUID: { + uuid_t *nlu; + + if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) { + if (sotoinpcb(so)->inp_flags2 & INP2_EXTERNAL_PORT) { + nlu = &sotoinpcb(so)->necp_client_uuid; + } else { + error = ENOENT; + goto out; + } + } else { + error = EINVAL; + goto out; + } + + error = sooptcopyout(sopt, nlu, sizeof(uuid_t)); + break; + } #endif /* NECP */ #if CONTENT_FILTER @@ -6099,6 +6152,15 @@ integer: optval = so_get_netsvc_marking_level(so); goto integer; + case SO_MPKL_SEND_INFO: { + struct so_mpkl_send_info so_mpkl_send_info; + + uuid_copy(so_mpkl_send_info.mpkl_uuid, so->so_mpkl_send_uuid); + so_mpkl_send_info.mpkl_proto = so->so_mpkl_send_proto; + error = sooptcopyout(sopt, &so_mpkl_send_info, + sizeof(struct so_mpkl_send_info)); + break; + } default: error = ENOPROTOOPT; break; @@ -6312,30 +6374,15 @@ sopoll(struct socket *so, int events, kauth_cred_t cred, void * wql) } int -soo_kqfilter(struct fileproc *fp, struct knote *kn, - struct kevent_internal_s *kev, vfs_context_t ctx) +soo_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev) { -#pragma unused(fp) -#if !CONFIG_MACF_SOCKET -#pragma unused(ctx) -#endif /* MAC_SOCKET */ - struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; + struct socket *so = (struct socket *)fp->fp_glob->fg_data; int result; socket_lock(so, 1); so_update_last_owner_locked(so, PROC_NULL); so_update_policy(so); -#if CONFIG_MACF_SOCKET - if (mac_socket_check_kqfilter(proc_ucred(vfs_context_proc(ctx)), - kn, so) != 0) { - socket_unlock(so, 1); - kn->kn_flags = EV_ERROR; - kn->kn_data = EPERM; - return 0; - } -#endif /* MAC_SOCKET */ - switch (kn->kn_filter) { case EVFILT_READ: kn->kn_filtid = EVFILTID_SOREAD; @@ -6351,8 +6398,7 @@ soo_kqfilter(struct fileproc *fp, struct knote *kn, break; default: socket_unlock(so, 1); - kn->kn_flags = EV_ERROR; - kn->kn_data = EINVAL; + knote_set_error(kn, EINVAL); return 0; } @@ -6368,21 +6414,21 @@ soo_kqfilter(struct fileproc *fp, struct knote *kn, } static int -filt_soread_common(struct knote *kn, struct socket *so) +filt_soread_common(struct knote *kn, struct kevent_qos_s *kev, struct socket *so) { - if (so->so_options & SO_ACCEPTCONN) { - int is_not_empty; + int retval = 0; + int64_t data = 0; + if (so->so_options & SO_ACCEPTCONN) { /* * Radar 6615193 handle the listen case dynamically * for kqueue read filter. This allows to call listen() * after registering the kqueue EVFILT_READ. */ - kn->kn_data = so->so_qlen; - is_not_empty = !TAILQ_EMPTY(&so->so_comp); - - return is_not_empty; + retval = !TAILQ_EMPTY(&so->so_comp); + data = so->so_qlen; + goto out; } /* socket isn't a listener */ @@ -6391,13 +6437,14 @@ filt_soread_common(struct knote *kn, struct socket *so) * the bytes of protocol data. We therefore exclude any * control bytes. */ - kn->kn_data = so->so_rcv.sb_cc - so->so_rcv.sb_ctl; + data = so->so_rcv.sb_cc - so->so_rcv.sb_ctl; if (kn->kn_sfflags & NOTE_OOB) { if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) { kn->kn_fflags |= NOTE_OOB; - kn->kn_data -= so->so_oobmark; - return 1; + data -= so->so_oobmark; + retval = 1; + goto out; } } @@ -6408,11 +6455,13 @@ filt_soread_common(struct knote *kn, struct socket *so) ) { kn->kn_flags |= EV_EOF; kn->kn_fflags = so->so_error; - return 1; + retval = 1; + goto out; } if (so->so_error) { /* temporary udp error */ - return 1; + retval = 1; + goto out; } int64_t lowwat = so->so_rcv.sb_lowat; @@ -6430,21 +6479,23 @@ filt_soread_common(struct knote *kn, struct socket *so) } /* - * The order below is important. Since NOTE_LOWAT - * overrides sb_lowat, check for NOTE_LOWAT case - * first. + * While the `data` field is the amount of data to read, + * 0-sized packets need to wake up the kqueue, see 58140856, + * so we need to take control bytes into account too. */ - if (kn->kn_sfflags & NOTE_LOWAT) { - return kn->kn_data >= lowwat; - } + retval = (so->so_rcv.sb_cc >= lowwat); - return so->so_rcv.sb_cc >= lowwat; +out: + if (retval && kev) { + knote_fill_kevent(kn, kev, data); + } + return retval; } static int -filt_sorattach(struct knote *kn, __unused struct kevent_internal_s *kev) +filt_sorattach(struct knote *kn, __unused struct kevent_qos_s *kev) { - struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; + struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data; /* socket locked */ @@ -6456,22 +6507,22 @@ filt_sorattach(struct knote *kn, __unused struct kevent_internal_s *kev) if (kn->kn_filter == EVFILT_READ && kn->kn_flags & EV_OOBAND) { kn->kn_flags &= ~EV_OOBAND; - kn->kn_hookid = EV_OOBAND; + kn->kn_hook32 = EV_OOBAND; } else { - kn->kn_hookid = 0; + kn->kn_hook32 = 0; } if (KNOTE_ATTACH(&so->so_rcv.sb_sel.si_note, kn)) { so->so_rcv.sb_flags |= SB_KNOTE; } /* indicate if event is already fired */ - return filt_soread_common(kn, so); + return filt_soread_common(kn, NULL, so); } static void filt_sordetach(struct knote *kn) { - struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; + struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data; socket_lock(so, 1); if (so->so_rcv.sb_flags & SB_KNOTE) { @@ -6486,14 +6537,14 @@ filt_sordetach(struct knote *kn) static int filt_soread(struct knote *kn, long hint) { - struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; + struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data; int retval; if ((hint & SO_FILT_HINT_LOCKED) == 0) { socket_lock(so, 1); } - retval = filt_soread_common(kn, so); + retval = filt_soread_common(kn, NULL, so); if ((hint & SO_FILT_HINT_LOCKED) == 0) { socket_unlock(so, 1); @@ -6503,9 +6554,9 @@ filt_soread(struct knote *kn, long hint) } static int -filt_sortouch(struct knote *kn, struct kevent_internal_s *kev) +filt_sortouch(struct knote *kn, struct kevent_qos_s *kev) { - struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; + struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data; int retval; socket_lock(so, 1); @@ -6515,7 +6566,7 @@ filt_sortouch(struct knote *kn, struct kevent_internal_s *kev) kn->kn_sdata = kev->data; /* determine if changes result in fired events */ - retval = filt_soread_common(kn, so); + retval = filt_soread_common(kn, NULL, so); socket_unlock(so, 1); @@ -6523,21 +6574,13 @@ filt_sortouch(struct knote *kn, struct kevent_internal_s *kev) } static int -filt_sorprocess(struct knote *kn, struct filt_process_s *data, struct kevent_internal_s *kev) +filt_sorprocess(struct knote *kn, struct kevent_qos_s *kev) { -#pragma unused(data) - struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; + struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data; int retval; socket_lock(so, 1); - retval = filt_soread_common(kn, so); - if (retval) { - *kev = kn->kn_kevent; - if (kn->kn_flags & EV_CLEAR) { - kn->kn_fflags = 0; - kn->kn_data = 0; - } - } + retval = filt_soread_common(kn, kev, so); socket_unlock(so, 1); return retval; @@ -6557,26 +6600,35 @@ so_wait_for_if_feedback(struct socket *so) } static int -filt_sowrite_common(struct knote *kn, struct socket *so) +filt_sowrite_common(struct knote *kn, struct kevent_qos_s *kev, struct socket *so) { int ret = 0; + int64_t data = sbspace(&so->so_snd); - kn->kn_data = sbspace(&so->so_snd); if (so->so_state & SS_CANTSENDMORE) { kn->kn_flags |= EV_EOF; kn->kn_fflags = so->so_error; - return 1; + ret = 1; + goto out; } + if (so->so_error) { /* temporary udp error */ - return 1; + ret = 1; + goto out; } + if (!socanwrite(so)) { - return 0; + ret = 0; + goto out; } + if (so->so_flags1 & SOF1_PRECONNECT_DATA) { - return 1; + ret = 1; + goto out; } + int64_t lowwat = so->so_snd.sb_lowat; + if (kn->kn_sfflags & NOTE_LOWAT) { if (kn->kn_sdata > so->so_snd.sb_hiwat) { lowwat = so->so_snd.sb_hiwat; @@ -6584,7 +6636,8 @@ filt_sowrite_common(struct knote *kn, struct socket *so) lowwat = kn->kn_sdata; } } - if (kn->kn_data >= lowwat) { + + if (data >= lowwat) { if ((so->so_flags & SOF_NOTSENT_LOWAT) #if (DEBUG || DEVELOPMENT) && so_notsent_lowat_check == 1 @@ -6602,7 +6655,8 @@ filt_sowrite_common(struct knote *kn, struct socket *so) } #endif else { - return 1; + ret = 1; + goto out; } } else { ret = 1; @@ -6611,13 +6665,18 @@ filt_sowrite_common(struct knote *kn, struct socket *so) if (so_wait_for_if_feedback(so)) { ret = 0; } + +out: + if (ret && kev) { + knote_fill_kevent(kn, kev, data); + } return ret; } static int -filt_sowattach(struct knote *kn, __unused struct kevent_internal_s *kev) +filt_sowattach(struct knote *kn, __unused struct kevent_qos_s *kev) { - struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; + struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data; /* socket locked */ if (KNOTE_ATTACH(&so->so_snd.sb_sel.si_note, kn)) { @@ -6625,13 +6684,13 @@ filt_sowattach(struct knote *kn, __unused struct kevent_internal_s *kev) } /* determine if its already fired */ - return filt_sowrite_common(kn, so); + return filt_sowrite_common(kn, NULL, so); } static void filt_sowdetach(struct knote *kn) { - struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; + struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data; socket_lock(so, 1); if (so->so_snd.sb_flags & SB_KNOTE) { @@ -6646,14 +6705,14 @@ filt_sowdetach(struct knote *kn) static int filt_sowrite(struct knote *kn, long hint) { - struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; + struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data; int ret; if ((hint & SO_FILT_HINT_LOCKED) == 0) { socket_lock(so, 1); } - ret = filt_sowrite_common(kn, so); + ret = filt_sowrite_common(kn, NULL, so); if ((hint & SO_FILT_HINT_LOCKED) == 0) { socket_unlock(so, 1); @@ -6663,9 +6722,9 @@ filt_sowrite(struct knote *kn, long hint) } static int -filt_sowtouch(struct knote *kn, struct kevent_internal_s *kev) +filt_sowtouch(struct knote *kn, struct kevent_qos_s *kev) { - struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; + struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data; int ret; socket_lock(so, 1); @@ -6675,7 +6734,7 @@ filt_sowtouch(struct knote *kn, struct kevent_internal_s *kev) kn->kn_sdata = kev->data; /* determine if these changes result in a triggered event */ - ret = filt_sowrite_common(kn, so); + ret = filt_sowrite_common(kn, NULL, so); socket_unlock(so, 1); @@ -6683,29 +6742,24 @@ filt_sowtouch(struct knote *kn, struct kevent_internal_s *kev) } static int -filt_sowprocess(struct knote *kn, struct filt_process_s *data, struct kevent_internal_s *kev) +filt_sowprocess(struct knote *kn, struct kevent_qos_s *kev) { -#pragma unused(data) - struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; + struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data; int ret; socket_lock(so, 1); - ret = filt_sowrite_common(kn, so); - if (ret) { - *kev = kn->kn_kevent; - if (kn->kn_flags & EV_CLEAR) { - kn->kn_fflags = 0; - kn->kn_data = 0; - } - } + ret = filt_sowrite_common(kn, kev, so); socket_unlock(so, 1); + return ret; } static int -filt_sockev_common(struct knote *kn, struct socket *so, long ev_hint) +filt_sockev_common(struct knote *kn, struct kevent_qos_s *kev, + struct socket *so, long ev_hint) { int ret = 0; + int64_t data = 0; uint32_t level_trigger = 0; if (ev_hint & SO_FILT_HINT_CONNRESET) { @@ -6770,7 +6824,7 @@ filt_sockev_common(struct knote *kn, struct socket *so, long ev_hint) kn->kn_fflags &= ~(NOTE_SUSPEND | NOTE_RESUME); /* If resume event was delivered before, reset it */ - kn->kn_hookid &= ~NOTE_RESUME; + kn->kn_hook32 &= ~NOTE_RESUME; kn->kn_fflags |= NOTE_SUSPEND; level_trigger |= NOTE_SUSPEND; @@ -6781,7 +6835,7 @@ filt_sockev_common(struct knote *kn, struct socket *so, long ev_hint) kn->kn_fflags &= ~(NOTE_SUSPEND | NOTE_RESUME); /* If suspend event was delivered before, reset it */ - kn->kn_hookid &= ~NOTE_SUSPEND; + kn->kn_hook32 &= ~NOTE_SUSPEND; kn->kn_fflags |= NOTE_RESUME; level_trigger |= NOTE_RESUME; @@ -6789,10 +6843,12 @@ filt_sockev_common(struct knote *kn, struct socket *so, long ev_hint) if (so->so_error != 0) { ret = 1; - kn->kn_data = so->so_error; + data = so->so_error; kn->kn_flags |= EV_EOF; } else { - get_sockev_state(so, (u_int32_t *)&(kn->kn_data)); + u_int32_t data32 = 0; + get_sockev_state(so, &data32); + data = data32; } /* Reset any events that are not requested on this knote */ @@ -6800,7 +6856,7 @@ filt_sockev_common(struct knote *kn, struct socket *so, long ev_hint) level_trigger &= (kn->kn_sfflags & EVFILT_SOCK_ALL_MASK); /* Find the level triggerred events that are already delivered */ - level_trigger &= kn->kn_hookid; + level_trigger &= kn->kn_hook32; level_trigger &= EVFILT_SOCK_LEVEL_TRIGGER_MASK; /* Do not deliver level triggerred events more than once */ @@ -6808,28 +6864,54 @@ filt_sockev_common(struct knote *kn, struct socket *so, long ev_hint) ret = 1; } + if (ret && kev) { + /* + * Store the state of the events being delivered. This + * state can be used to deliver level triggered events + * ateast once and still avoid waking up the application + * multiple times as long as the event is active. + */ + if (kn->kn_fflags != 0) { + kn->kn_hook32 |= (kn->kn_fflags & + EVFILT_SOCK_LEVEL_TRIGGER_MASK); + } + + /* + * NOTE_RESUME and NOTE_SUSPEND are an exception, deliver + * only one of them and remember the last one that was + * delivered last + */ + if (kn->kn_fflags & NOTE_SUSPEND) { + kn->kn_hook32 &= ~NOTE_RESUME; + } + if (kn->kn_fflags & NOTE_RESUME) { + kn->kn_hook32 &= ~NOTE_SUSPEND; + } + + knote_fill_kevent(kn, kev, data); + } return ret; } static int -filt_sockattach(struct knote *kn, __unused struct kevent_internal_s *kev) +filt_sockattach(struct knote *kn, __unused struct kevent_qos_s *kev) { - struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; + struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data; /* socket locked */ - kn->kn_hookid = 0; + kn->kn_hook32 = 0; if (KNOTE_ATTACH(&so->so_klist, kn)) { so->so_flags |= SOF_KNOTE; } /* determine if event already fired */ - return filt_sockev_common(kn, so, 0); + return filt_sockev_common(kn, NULL, so, 0); } static void filt_sockdetach(struct knote *kn) { - struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; + struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data; socket_lock(so, 1); if ((so->so_flags & SOF_KNOTE) != 0) { @@ -6844,7 +6926,7 @@ static int filt_sockev(struct knote *kn, long hint) { int ret = 0, locked = 0; - struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; + struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data; long ev_hint = (hint & SO_FILT_HINT_EV); if ((hint & SO_FILT_HINT_LOCKED) == 0) { @@ -6852,7 +6934,7 @@ filt_sockev(struct knote *kn, long hint) locked = 1; } - ret = filt_sockev_common(kn, so, ev_hint); + ret = filt_sockev_common(kn, NULL, so, ev_hint); if (locked) { socket_unlock(so, 1); @@ -6869,16 +6951,16 @@ filt_sockev(struct knote *kn, long hint) static int filt_socktouch( struct knote *kn, - struct kevent_internal_s *kev) + struct kevent_qos_s *kev) { - struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; + struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data; uint32_t changed_flags; int ret; socket_lock(so, 1); /* save off the [result] data and fflags */ - changed_flags = (kn->kn_sfflags ^ kn->kn_hookid); + changed_flags = (kn->kn_sfflags ^ kn->kn_hook32); /* save off the new input fflags and data */ kn->kn_sfflags = kev->fflags; @@ -6896,11 +6978,10 @@ filt_socktouch( * delivered, if any of those events are not requested * anymore the state related to them can be reset */ - kn->kn_hookid &= - ~(changed_flags & EVFILT_SOCK_LEVEL_TRIGGER_MASK); + kn->kn_hook32 &= ~(changed_flags & EVFILT_SOCK_LEVEL_TRIGGER_MASK); /* determine if we have events to deliver */ - ret = filt_sockev_common(kn, so, 0); + ret = filt_sockev_common(kn, NULL, so, 0); socket_unlock(so, 1); @@ -6911,50 +6992,14 @@ filt_socktouch( * filt_sockprocess - query event fired state and return data */ static int -filt_sockprocess( - struct knote *kn, - struct filt_process_s *data, - struct kevent_internal_s *kev) +filt_sockprocess(struct knote *kn, struct kevent_qos_s *kev) { -#pragma unused(data) - - struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; + struct socket *so = (struct socket *)kn->kn_fp->fp_glob->fg_data; int ret = 0; socket_lock(so, 1); - ret = filt_sockev_common(kn, so, 0); - if (ret) { - *kev = kn->kn_kevent; - - /* - * Store the state of the events being delivered. This - * state can be used to deliver level triggered events - * ateast once and still avoid waking up the application - * multiple times as long as the event is active. - */ - if (kn->kn_fflags != 0) { - kn->kn_hookid |= (kn->kn_fflags & - EVFILT_SOCK_LEVEL_TRIGGER_MASK); - } - - /* - * NOTE_RESUME and NOTE_SUSPEND are an exception, deliver - * only one of them and remember the last one that was - * delivered last - */ - if (kn->kn_fflags & NOTE_SUSPEND) { - kn->kn_hookid &= ~NOTE_RESUME; - } - if (kn->kn_fflags & NOTE_RESUME) { - kn->kn_hookid &= ~NOTE_SUSPEND; - } - - if (kn->kn_flags & EV_CLEAR) { - kn->kn_data = 0; - kn->kn_fflags = 0; - } - } + ret = filt_sockev_common(kn, kev, so, 0); socket_unlock(so, 1); @@ -6995,7 +7040,7 @@ solockhistory_nr(struct socket *so) bzero(lock_history_str, sizeof(lock_history_str)); for (i = SO_LCKDBG_MAX - 1; i >= 0; i--) { - n += snprintf(lock_history_str + n, + n += scnprintf(lock_history_str + n, SO_LOCK_HISTORY_STR_LEN - n, "%p:%p ", so->lock_lr[(so->next_lock_lr + i) % SO_LCKDBG_MAX], so->unlock_lr[(so->next_unlock_lr + i) % SO_LCKDBG_MAX]); @@ -7003,6 +7048,16 @@ solockhistory_nr(struct socket *so) return lock_history_str; } +lck_mtx_t * +socket_getlock(struct socket *so, int flags) +{ + if (so->so_proto->pr_getlock != NULL) { + return (*so->so_proto->pr_getlock)(so, flags); + } else { + return so->so_proto->pr_domain->dom_mtx; + } +} + void socket_lock(struct socket *so, int refcount) { @@ -7062,12 +7117,12 @@ socket_unlock(struct socket *so, int refcount) lr_saved = __builtin_return_address(0); - if (so->so_proto == NULL) { + if (so == NULL || so->so_proto == NULL) { panic("%s: null so_proto so=%p\n", __func__, so); /* NOTREACHED */ } - if (so && so->so_proto->pr_unlock) { + if (so->so_proto->pr_unlock) { (*so->so_proto->pr_unlock)(so, refcount, lr_saved); } else { mutex_held = so->so_proto->pr_domain->dom_mtx; @@ -7431,8 +7486,7 @@ so_set_extended_bk_idle(struct socket *so, int optval) soresume(current_proc(), so, 1); } else { struct proc *p = current_proc(); - int i; - struct filedesc *fdp; + struct fileproc *fp; int count = 0; /* @@ -7442,19 +7496,14 @@ so_set_extended_bk_idle(struct socket *so, int optval) socket_unlock(so, 0); proc_fdlock(p); - - fdp = p->p_fd; - for (i = 0; i < fdp->fd_nfiles; i++) { - struct fileproc *fp = fdp->fd_ofiles[i]; + fdt_foreach(fp, p) { struct socket *so2; - if (fp == NULL || - (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 || - FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_SOCKET) { + if (FILEGLOB_DTYPE(fp->fp_glob) != DTYPE_SOCKET) { continue; } - so2 = (struct socket *)fp->f_fglob->fg_data; + so2 = (struct socket *)fp->fp_glob->fg_data; if (so != so2 && so2->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) { count++; @@ -7558,23 +7607,16 @@ void resume_proc_sockets(proc_t p) { if (p->p_ladvflag & P_LXBKIDLEINPROG) { - struct filedesc *fdp; - int i; + struct fileproc *fp; + struct socket *so; proc_fdlock(p); - fdp = p->p_fd; - for (i = 0; i < fdp->fd_nfiles; i++) { - struct fileproc *fp; - struct socket *so; - - fp = fdp->fd_ofiles[i]; - if (fp == NULL || - (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 || - FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_SOCKET) { + fdt_foreach(fp, p) { + if (FILEGLOB_DTYPE(fp->fp_glob) != DTYPE_SOCKET) { continue; } - so = (struct socket *)fp->f_fglob->fg_data; + so = (struct socket *)fp->fp_glob->fg_data; (void) soresume(p, so, 0); } proc_fdunlock(p); @@ -7588,11 +7630,7 @@ so_set_recv_anyif(struct socket *so, int optval) { int ret = 0; -#if INET6 if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) { -#else - if (SOCK_DOM(so) == PF_INET) { -#endif /* !INET6 */ if (optval) { sotoinpcb(so)->inp_flags |= INP_RECV_ANYIF; } else { @@ -7609,11 +7647,7 @@ so_get_recv_anyif(struct socket *so) { int ret = 0; -#if INET6 if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) { -#else - if (SOCK_DOM(so) == PF_INET) { -#endif /* !INET6 */ ret = (sotoinpcb(so)->inp_flags & INP_RECV_ANYIF) ? 1 : 0; } @@ -7625,6 +7659,7 @@ so_set_restrictions(struct socket *so, uint32_t vals) { int nocell_old, nocell_new; int noexpensive_old, noexpensive_new; + int noconstrained_old, noconstrained_new; /* * Deny-type restrictions are trapdoors; once set they cannot be @@ -7641,22 +7676,21 @@ so_set_restrictions(struct socket *so, uint32_t vals) */ nocell_old = (so->so_restrictions & SO_RESTRICT_DENY_CELLULAR); noexpensive_old = (so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE); + noconstrained_old = (so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED); so->so_restrictions |= (vals & (SO_RESTRICT_DENY_IN | SO_RESTRICT_DENY_OUT | SO_RESTRICT_DENY_CELLULAR | - SO_RESTRICT_DENY_EXPENSIVE)); + SO_RESTRICT_DENY_EXPENSIVE | SO_RESTRICT_DENY_CONSTRAINED)); nocell_new = (so->so_restrictions & SO_RESTRICT_DENY_CELLULAR); noexpensive_new = (so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE); + noconstrained_new = (so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED); /* we can only set, not clear restrictions */ if ((nocell_new - nocell_old) == 0 && - (noexpensive_new - noexpensive_old) == 0) { + (noexpensive_new - noexpensive_old) == 0 && + (noconstrained_new - noconstrained_old) == 0) { return 0; } -#if INET6 if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) { -#else - if (SOCK_DOM(so) == PF_INET) { -#endif /* !INET6 */ if (nocell_new - nocell_old != 0) { /* * if deny cellular is now set, do what's needed @@ -7667,6 +7701,9 @@ so_set_restrictions(struct socket *so, uint32_t vals) if (noexpensive_new - noexpensive_old != 0) { inp_set_noexpensive(sotoinpcb(so)); } + if (noconstrained_new - noconstrained_old != 0) { + inp_set_noconstrained(sotoinpcb(so)); + } } if (SOCK_DOM(so) == PF_MULTIPATH) { @@ -7685,7 +7722,7 @@ so_get_restrictions(struct socket *so) } int -so_set_effective_pid(struct socket *so, int epid, struct proc *p) +so_set_effective_pid(struct socket *so, int epid, struct proc *p, boolean_t check_cred) { struct proc *ep = PROC_NULL; int error = 0; @@ -7712,7 +7749,7 @@ so_set_effective_pid(struct socket *so, int epid, struct proc *p) * the process's own pid, then proceed. Otherwise ensure * that the issuing process has the necessary privileges. */ - if (epid != so->last_pid || epid != proc_pid(p)) { + if (check_cred && (epid != so->last_pid || epid != proc_pid(p))) { if ((error = priv_check_cred(kauth_cred_get(), PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0))) { error = EACCES; @@ -7746,6 +7783,23 @@ so_set_effective_pid(struct socket *so, int epid, struct proc *p) so->e_upid = proc_uniqueid(ep); so->e_pid = proc_pid(ep); proc_getexecutableuuid(ep, so->e_uuid, sizeof(so->e_uuid)); + +#if defined(XNU_TARGET_OS_OSX) + if (ep->p_responsible_pid != so->e_pid) { + proc_t rp = proc_find(ep->p_responsible_pid); + if (rp != PROC_NULL) { + proc_getexecutableuuid(rp, so->so_ruuid, sizeof(so->so_ruuid)); + so->so_rpid = ep->p_responsible_pid; + proc_rele(rp); + } else { + uuid_clear(so->so_ruuid); + so->so_rpid = -1; + } + } +#endif + } + if (so->so_proto != NULL && so->so_proto->pr_update_last_owner != NULL) { + (*so->so_proto->pr_update_last_owner)(so, NULL, ep); } done: if (error == 0 && net_io_policy_log) { @@ -7784,7 +7838,7 @@ done: } int -so_set_effective_uuid(struct socket *so, uuid_t euuid, struct proc *p) +so_set_effective_uuid(struct socket *so, uuid_t euuid, struct proc *p, boolean_t check_cred) { uuid_string_t buf; uuid_t uuid; @@ -7815,8 +7869,9 @@ so_set_effective_uuid(struct socket *so, uuid_t euuid, struct proc *p) * the process's own uuid, then proceed. Otherwise ensure * that the issuing process has the necessary privileges. */ - if (uuid_compare(euuid, so->last_uuid) != 0 || - uuid_compare(euuid, uuid) != 0) { + if (check_cred && + (uuid_compare(euuid, so->last_uuid) != 0 || + uuid_compare(euuid, uuid) != 0)) { if ((error = priv_check_cred(kauth_cred_get(), PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0))) { error = EACCES; @@ -7851,7 +7906,13 @@ so_set_effective_uuid(struct socket *so, uuid_t euuid, struct proc *p) so->e_pid = so->last_pid; uuid_copy(so->e_uuid, euuid); } - + /* + * The following will clear the effective process name as it's the same + * as the real process + */ + if (so->so_proto != NULL && so->so_proto->pr_update_last_owner != NULL) { + (*so->so_proto->pr_update_last_owner)(so, NULL, NULL); + } done: if (error == 0 && net_io_policy_log) { uuid_unparse(so->e_uuid, buf); @@ -7927,10 +7988,13 @@ socket_post_kev_msg(uint32_t ev_code, void socket_post_kev_msg_closed(struct socket *so) { - struct kev_socket_closed ev; + struct kev_socket_closed ev = {}; struct sockaddr *socksa = NULL, *peersa = NULL; int err; - bzero(&ev, sizeof(ev)); + + if ((so->so_flags1 & SOF1_WANT_KEV_SOCK_CLOSED) == 0) { + return; + } err = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &socksa); if (err == 0) { err = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so,