X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/15129b1c8dbb3650c63b70adb1cad9af601c6c17..743345f9a4b36f7e2f9ba37691e70c50baecb56e:/bsd/netinet/in_pcb.c diff --git a/bsd/netinet/in_pcb.c b/bsd/netinet/in_pcb.c index 973abc9ac..c20961c22 100644 --- a/bsd/netinet/in_pcb.c +++ b/bsd/netinet/in_pcb.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2016 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -77,6 +77,8 @@ #include #include #include +#include +#include #include #include @@ -90,6 +92,7 @@ #include #include #include +#include #include #include @@ -100,21 +103,22 @@ #include #endif /* INET6 */ -#if IPSEC -#include -#include -#endif /* IPSEC */ - #include #include #include #include -#if FLOW_DIVERT -#include +#include + +#if NECP +#include #endif +#include +#include +#include + static lck_grp_t *inpcb_lock_grp; static lck_attr_t *inpcb_lock_attr; static lck_grp_attr_t *inpcb_lock_grp_attr; @@ -127,22 +131,33 @@ static u_int16_t inpcb_timeout_run = 0; /* INPCB timer is scheduled to run */ static boolean_t inpcb_garbage_collecting = FALSE; /* gc timer is scheduled */ static boolean_t inpcb_ticking = FALSE; /* "slow" timer is scheduled */ static boolean_t inpcb_fast_timer_on = FALSE; -static void inpcb_sched_timeout(struct timeval *); -static void inpcb_timeout(void *); -int inpcb_timeout_lazy = 10; /* 10 seconds leeway for lazy timers */ +static boolean_t intcoproc_unrestricted = FALSE; + +extern char *proc_best_name(proc_t); + +/* + * If the total number of gc reqs is above a threshold, schedule + * garbage collect timer sooner + */ +static boolean_t inpcb_toomany_gcreq = FALSE; + +#define INPCB_GCREQ_THRESHOLD 50000 + +static thread_call_t inpcb_thread_call, inpcb_fast_thread_call; +static void inpcb_sched_timeout(void); +static void inpcb_sched_lazy_timeout(void); +static void _inpcb_sched_timeout(unsigned int); +static void inpcb_timeout(void *, void *); +const int inpcb_timeout_lazy = 10; /* 10 seconds leeway for lazy timers */ extern int tvtohz(struct timeval *); #if CONFIG_PROC_UUID_POLICY static void inp_update_cellular_policy(struct inpcb *, boolean_t); -#if FLOW_DIVERT -static void inp_update_flow_divert_policy(struct inpcb *, boolean_t); -#endif /* FLOW_DIVERT */ +#if NECP +static void inp_update_necp_want_app_policy(struct inpcb *, boolean_t); +#endif /* NECP */ #endif /* !CONFIG_PROC_UUID_POLICY */ -#if IPSEC -extern int ipsec_bypass; -#endif /* IPSEC */ - #define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8)) #define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1)) @@ -203,6 +218,11 @@ SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); +static uint32_t apn_fallbk_debug = 0; +#define apn_fallbk_log(x) do { if (apn_fallbk_debug >= 1) log x; } while (0) + +static boolean_t apn_fallbk_enabled = FALSE; + extern int udp_use_randomport; extern int tcp_use_randomport; @@ -269,6 +289,12 @@ in_pcbinit(void) inpcb_lock_attr = lck_attr_alloc_init(); lck_mtx_init(&inpcb_lock, inpcb_lock_grp, inpcb_lock_attr); lck_mtx_init(&inpcb_timeout_lock, inpcb_lock_grp, inpcb_lock_attr); + inpcb_thread_call = thread_call_allocate_with_priority(inpcb_timeout, + NULL, THREAD_CALL_PRIORITY_KERNEL); + inpcb_fast_thread_call = thread_call_allocate_with_priority( + inpcb_timeout, NULL, THREAD_CALL_PRIORITY_KERNEL); + if (inpcb_thread_call == NULL || inpcb_fast_thread_call == NULL) + panic("unable to alloc the inpcb thread call"); /* * Initialize data structures required to deliver @@ -279,18 +305,26 @@ in_pcbinit(void) RB_INIT(&inp_fc_tree); bzero(&key_inp, sizeof(key_inp)); lck_mtx_unlock(&inp_fc_lck); + + PE_parse_boot_argn("intcoproc_unrestricted", &intcoproc_unrestricted, + sizeof (intcoproc_unrestricted)); } #define INPCB_HAVE_TIMER_REQ(req) (((req).intimer_lazy > 0) || \ ((req).intimer_fast > 0) || ((req).intimer_nodelay > 0)) static void -inpcb_timeout(void *arg) +inpcb_timeout(void *arg0, void *arg1) { -#pragma unused(arg) +#pragma unused(arg0) struct inpcbinfo *ipi; boolean_t t, gc; struct intimercount gccnt, tmcnt; - struct timeval leeway; + boolean_t toomany_gc = FALSE; + + if (arg1 != NULL) { + VERIFY(arg1 == &inpcb_toomany_gcreq); + toomany_gc = *(boolean_t *)arg1; + } /* * Update coarse-grained networking timestamp (in sec.); the idea @@ -299,11 +333,12 @@ inpcb_timeout(void *arg) */ net_update_uptime(); + bzero(&gccnt, sizeof(gccnt)); + bzero(&tmcnt, sizeof(tmcnt)); + lck_mtx_lock_spin(&inpcb_timeout_lock); gc = inpcb_garbage_collecting; inpcb_garbage_collecting = FALSE; - bzero(&gccnt, sizeof(gccnt)); - bzero(&tmcnt, sizeof(tmcnt)); t = inpcb_ticking; inpcb_ticking = FALSE; @@ -351,73 +386,110 @@ inpcb_timeout(void *arg) inpcb_ticking = INPCB_HAVE_TIMER_REQ(tmcnt); /* re-arm the timer if there's work to do */ - inpcb_timeout_run--; - VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2); + if (toomany_gc) { + inpcb_toomany_gcreq = FALSE; + } else { + inpcb_timeout_run--; + VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2); + } - bzero(&leeway, sizeof(leeway)); - leeway.tv_sec = inpcb_timeout_lazy; if (gccnt.intimer_nodelay > 0 || tmcnt.intimer_nodelay > 0) - inpcb_sched_timeout(NULL); + inpcb_sched_timeout(); else if ((gccnt.intimer_fast + tmcnt.intimer_fast) <= 5) /* be lazy when idle with little activity */ - inpcb_sched_timeout(&leeway); + inpcb_sched_lazy_timeout(); else - inpcb_sched_timeout(NULL); + inpcb_sched_timeout(); lck_mtx_unlock(&inpcb_timeout_lock); } static void -inpcb_sched_timeout(struct timeval *leeway) +inpcb_sched_timeout(void) { - lck_mtx_assert(&inpcb_timeout_lock, LCK_MTX_ASSERT_OWNED); + _inpcb_sched_timeout(0); +} +static void +inpcb_sched_lazy_timeout(void) +{ + _inpcb_sched_timeout(inpcb_timeout_lazy); +} + +static void +_inpcb_sched_timeout(unsigned int offset) +{ + uint64_t deadline, leeway; + + clock_interval_to_deadline(1, NSEC_PER_SEC, &deadline); + lck_mtx_assert(&inpcb_timeout_lock, LCK_MTX_ASSERT_OWNED); if (inpcb_timeout_run == 0 && - (inpcb_garbage_collecting || inpcb_ticking)) { + (inpcb_garbage_collecting || inpcb_ticking)) { lck_mtx_convert_spin(&inpcb_timeout_lock); inpcb_timeout_run++; - if (leeway == NULL) { + if (offset == 0) { inpcb_fast_timer_on = TRUE; - timeout(inpcb_timeout, NULL, hz); + thread_call_enter_delayed(inpcb_thread_call, + deadline); } else { inpcb_fast_timer_on = FALSE; - timeout_with_leeway(inpcb_timeout, NULL, hz, - tvtohz(leeway)); + clock_interval_to_absolutetime_interval(offset, + NSEC_PER_SEC, &leeway); + thread_call_enter_delayed_with_leeway( + inpcb_thread_call, NULL, deadline, leeway, + THREAD_CALL_DELAY_LEEWAY); } } else if (inpcb_timeout_run == 1 && - leeway == NULL && !inpcb_fast_timer_on) { + offset == 0 && !inpcb_fast_timer_on) { /* * Since the request was for a fast timer but the * scheduled timer is a lazy timer, try to schedule - * another instance of fast timer also + * another instance of fast timer also. */ lck_mtx_convert_spin(&inpcb_timeout_lock); inpcb_timeout_run++; inpcb_fast_timer_on = TRUE; - timeout(inpcb_timeout, NULL, hz); + thread_call_enter_delayed(inpcb_fast_thread_call, deadline); } } void inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type) { - struct timeval leeway; + u_int32_t gccnt; + uint64_t deadline; + lck_mtx_lock_spin(&inpcb_timeout_lock); inpcb_garbage_collecting = TRUE; + gccnt = ipi->ipi_gc_req.intimer_nodelay + + ipi->ipi_gc_req.intimer_fast; + + if (gccnt > INPCB_GCREQ_THRESHOLD && !inpcb_toomany_gcreq) { + inpcb_toomany_gcreq = TRUE; + + /* + * There are toomany pcbs waiting to be garbage collected, + * schedule a much faster timeout in addition to + * the caller's request + */ + lck_mtx_convert_spin(&inpcb_timeout_lock); + clock_interval_to_deadline(100, NSEC_PER_MSEC, &deadline); + thread_call_enter1_delayed(inpcb_thread_call, + &inpcb_toomany_gcreq, deadline); + } + switch (type) { case INPCB_TIMER_NODELAY: atomic_add_32(&ipi->ipi_gc_req.intimer_nodelay, 1); - inpcb_sched_timeout(NULL); + inpcb_sched_timeout(); break; case INPCB_TIMER_FAST: atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1); - inpcb_sched_timeout(NULL); + inpcb_sched_timeout(); break; default: atomic_add_32(&ipi->ipi_gc_req.intimer_lazy, 1); - leeway.tv_sec = inpcb_timeout_lazy; - leeway.tv_usec = 0; - inpcb_sched_timeout(&leeway); + inpcb_sched_lazy_timeout(); break; } lck_mtx_unlock(&inpcb_timeout_lock); @@ -426,23 +498,21 @@ inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type) void inpcb_timer_sched(struct inpcbinfo *ipi, u_int32_t type) { - struct timeval leeway; + lck_mtx_lock_spin(&inpcb_timeout_lock); inpcb_ticking = TRUE; switch (type) { case INPCB_TIMER_NODELAY: atomic_add_32(&ipi->ipi_timer_req.intimer_nodelay, 1); - inpcb_sched_timeout(NULL); + inpcb_sched_timeout(); break; case INPCB_TIMER_FAST: atomic_add_32(&ipi->ipi_timer_req.intimer_fast, 1); - inpcb_sched_timeout(NULL); + inpcb_sched_timeout(); break; default: atomic_add_32(&ipi->ipi_timer_req.intimer_lazy, 1); - leeway.tv_sec = inpcb_timeout_lazy; - leeway.tv_usec = 0; - inpcb_sched_timeout(&leeway); + inpcb_sched_lazy_timeout(); break; } lck_mtx_unlock(&inpcb_timeout_lock); @@ -491,7 +561,6 @@ in_pcbinfo_detach(struct inpcbinfo *ipi) * Returns: 0 Success * ENOBUFS * ENOMEM - * ipsec_init_policy:??? [IPSEC] */ int in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p) @@ -503,7 +572,7 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p) int mac_error; #endif /* CONFIG_MACF_NET */ - if (!so->cached_in_sock_layer) { + if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) { inp = (struct inpcb *)zalloc(pcbinfo->ipi_zone); if (inp == NULL) return (ENOBUFS); @@ -521,7 +590,7 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p) #if CONFIG_MACF_NET mac_error = mac_inpcb_label_init(inp, M_WAITOK); if (mac_error != 0) { - if (!so->cached_in_sock_layer) + if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) zfree(pcbinfo->ipi_zone, inp); return (mac_error); } @@ -554,6 +623,15 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p) /* NOTREACHED */ } + /* make sure inp_Wstat is always 64-bit aligned */ + inp->inp_Wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_Wstat_store, + sizeof (u_int64_t)); + if (((uintptr_t)inp->inp_Wstat - (uintptr_t)inp->inp_Wstat_store) + + sizeof (*inp->inp_Wstat) > sizeof (inp->inp_Wstat_store)) { + panic("%s: insufficient space to align inp_Wstat", __func__); + /* NOTREACHED */ + } + so->so_pcb = (caddr_t)inp; if (so->so_proto->pr_flags & PR_PCBLOCK) { @@ -561,7 +639,6 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p) pcbinfo->ipi_lock_attr); } - #if INET6 if (SOCK_DOM(so) == PF_INET6 && !ip6_mapped_addr_on) inp->inp_flags |= IN6P_IPV6_V6ONLY; @@ -569,6 +646,8 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p) if (ip6_auto_flowlabel) inp->inp_flags |= IN6P_AUTOFLOWLABEL; #endif /* INET6 */ + if (intcoproc_unrestricted) + inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED; (void) inp_update_policy(inp); @@ -639,7 +718,7 @@ in_pcb_conflict_post_msg(u_int16_t port) ev_msg.dv[0].data_ptr = &in_portinuse; ev_msg.dv[0].data_length = sizeof (struct kev_in_portinuse); ev_msg.dv[1].data_length = 0; - kev_post_msg(&ev_msg); + dlil_post_complete_msg(NULL, &ev_msg); } /* @@ -664,7 +743,10 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) u_short lport = 0, rand_port = 0; int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); int error, randomport, conflict = 0; + boolean_t anonport = FALSE; kauth_cred_t cred; + struct in_addr laddr; + struct ifnet *outif = NULL; if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */ return (EADDRNOTAVAIL); @@ -672,10 +754,13 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) return (EINVAL); if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT))) wild = 1; + + bzero(&laddr, sizeof(laddr)); + socket_unlock(so, 0); /* keep reference on socket */ lck_rw_lock_exclusive(pcbinfo->ipi_lock); + if (nam != NULL) { - struct ifnet *outif = NULL; if (nam->sa_len != sizeof (struct sockaddr_in)) { lck_rw_done(pcbinfo->ipi_lock); @@ -739,7 +824,6 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) struct inpcb *t; uid_t u; - /* GROSS */ if (ntohs(lport) < IPPORT_RESERVED) { cred = kauth_cred_proc_ref(p); error = priv_check_cred(cred, @@ -802,8 +886,7 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) } } } - inp->inp_laddr = SIN(nam)->sin_addr; - inp->inp_last_outifp = outif; + laddr = SIN(nam)->sin_addr; } if (lport == 0) { u_short first, last; @@ -814,15 +897,10 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) udp_use_randomport); /* - * TODO: - * - * The following should be moved into its own routine and - * thus can be shared with in6_pcbsetport(); the latter - * currently duplicates the logic. + * Even though this looks similar to the code in + * in6_pcbsetport, the v6 vs v4 checks are different. */ - - inp->inp_flags |= INP_ANONPORT; - + anonport = TRUE; if (inp->inp_flags & INP_HIGHPORT) { first = ipport_hifirstauto; /* sysctl */ last = ipport_hilastauto; @@ -871,8 +949,6 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) if (count-- < 0) { /* completely used? */ lck_rw_done(pcbinfo->ipi_lock); socket_lock(so, 0); - inp->inp_laddr.s_addr = INADDR_ANY; - inp->inp_last_outifp = NULL; return (EADDRNOTAVAIL); } --*lastport; @@ -880,7 +956,8 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) *lastport = first; lport = htons(*lastport); } while (in_pcblookup_local_and_cleanup(pcbinfo, - inp->inp_laddr, lport, wild)); + ((laddr.s_addr != INADDR_ANY) ? laddr : + inp->inp_laddr), lport, wild)); } else { /* * counting up @@ -896,8 +973,6 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) if (count-- < 0) { /* completely used? */ lck_rw_done(pcbinfo->ipi_lock); socket_lock(so, 0); - inp->inp_laddr.s_addr = INADDR_ANY; - inp->inp_last_outifp = NULL; return (EADDRNOTAVAIL); } ++*lastport; @@ -905,15 +980,42 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) *lastport = first; lport = htons(*lastport); } while (in_pcblookup_local_and_cleanup(pcbinfo, - inp->inp_laddr, lport, wild)); + ((laddr.s_addr != INADDR_ANY) ? laddr : + inp->inp_laddr), lport, wild)); } } socket_lock(so, 0); + + /* + * We unlocked socket's protocol lock for a long time. + * The socket might have been dropped/defuncted. + * Checking if world has changed since. + */ + if (inp->inp_state == INPCB_STATE_DEAD) { + lck_rw_done(pcbinfo->ipi_lock); + return (ECONNABORTED); + } + + if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) { + lck_rw_done(pcbinfo->ipi_lock); + return (EINVAL); + } + + if (laddr.s_addr != INADDR_ANY) { + inp->inp_laddr = laddr; + inp->inp_last_outifp = outif; + } inp->inp_lport = lport; + if (anonport) + inp->inp_flags |= INP_ANONPORT; + if (in_pcbinshash(inp, 1) != 0) { inp->inp_laddr.s_addr = INADDR_ANY; - inp->inp_lport = 0; inp->inp_last_outifp = NULL; + + inp->inp_lport = 0; + if (anonport) + inp->inp_flags &= ~INP_ANONPORT; lck_rw_done(pcbinfo->ipi_lock); return (EAGAIN); } @@ -922,6 +1024,161 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) return (0); } +#define APN_FALLBACK_IP_FILTER(a) \ + (IN_LINKLOCAL(ntohl((a)->sin_addr.s_addr)) || \ + IN_LOOPBACK(ntohl((a)->sin_addr.s_addr)) || \ + IN_ZERONET(ntohl((a)->sin_addr.s_addr)) || \ + IN_MULTICAST(ntohl((a)->sin_addr.s_addr)) || \ + IN_PRIVATE(ntohl((a)->sin_addr.s_addr))) + +#define APN_FALLBACK_NOTIF_INTERVAL 2 /* Magic Number */ +static uint64_t last_apn_fallback = 0; + +static boolean_t +apn_fallback_required (proc_t proc, struct socket *so, struct sockaddr_in *p_dstv4) +{ + uint64_t timenow; + struct sockaddr_storage lookup_default_addr; + struct rtentry *rt = NULL; + + VERIFY(proc != NULL); + + if (apn_fallbk_enabled == FALSE) + return FALSE; + + if (proc == kernproc) + return FALSE; + + if (so && (so->so_options & SO_NOAPNFALLBK)) + return FALSE; + + timenow = net_uptime(); + if ((timenow - last_apn_fallback) < APN_FALLBACK_NOTIF_INTERVAL) { + apn_fallbk_log((LOG_INFO, "APN fallback notification throttled.\n")); + return FALSE; + } + + if (p_dstv4 && APN_FALLBACK_IP_FILTER(p_dstv4)) + return FALSE; + + /* Check if we have unscoped IPv6 default route through cellular */ + bzero(&lookup_default_addr, sizeof(lookup_default_addr)); + lookup_default_addr.ss_family = AF_INET6; + lookup_default_addr.ss_len = sizeof(struct sockaddr_in6); + + rt = rtalloc1((struct sockaddr *)&lookup_default_addr, 0, 0); + if (NULL == rt) { + apn_fallbk_log((LOG_INFO, "APN fallback notification could not find " + "unscoped default IPv6 route.\n")); + return FALSE; + } + + if (!IFNET_IS_CELLULAR(rt->rt_ifp)) { + rtfree(rt); + apn_fallbk_log((LOG_INFO, "APN fallback notification could not find " + "unscoped default IPv6 route through cellular interface.\n")); + return FALSE; + } + + /* + * We have a default IPv6 route, ensure that + * we do not have IPv4 default route before triggering + * the event + */ + rtfree(rt); + rt = NULL; + + bzero(&lookup_default_addr, sizeof(lookup_default_addr)); + lookup_default_addr.ss_family = AF_INET; + lookup_default_addr.ss_len = sizeof(struct sockaddr_in); + + rt = rtalloc1((struct sockaddr *)&lookup_default_addr, 0, 0); + + if (rt) { + rtfree(rt); + rt = NULL; + apn_fallbk_log((LOG_INFO, "APN fallback notification found unscoped " + "IPv4 default route!\n")); + return FALSE; + } + + { + /* + * We disable APN fallback if the binary is not a third-party app. + * Note that platform daemons use their process name as a + * bundle ID so we filter out bundle IDs without dots. + */ + const char *bundle_id = cs_identity_get(proc); + if (bundle_id == NULL || + bundle_id[0] == '\0' || + strchr(bundle_id, '.') == NULL || + strncmp(bundle_id, "com.apple.", sizeof("com.apple.") - 1) == 0) { + apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found first-" + "party bundle ID \"%s\"!\n", (bundle_id ? bundle_id : "NULL"))); + return FALSE; + } + } + + { + /* + * The Apple App Store IPv6 requirement started on + * June 1st, 2016 at 12:00:00 AM PDT. + * We disable APN fallback if the binary is more recent than that. + * We check both atime and birthtime since birthtime is not always supported. + */ + static const long ipv6_start_date = 1464764400L; + vfs_context_t context; + struct stat64 sb; + int vn_stat_error; + + bzero(&sb, sizeof(struct stat64)); + context = vfs_context_create(NULL); + vn_stat_error = vn_stat(proc->p_textvp, &sb, NULL, 1, context); + (void)vfs_context_rele(context); + + if (vn_stat_error != 0 || + sb.st_atimespec.tv_sec >= ipv6_start_date || + sb.st_birthtimespec.tv_sec >= ipv6_start_date) { + apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found binary " + "too recent! (err %d atime %ld mtime %ld ctime %ld birthtime %ld)\n", + vn_stat_error, sb.st_atimespec.tv_sec, sb.st_mtimespec.tv_sec, + sb.st_ctimespec.tv_sec, sb.st_birthtimespec.tv_sec)); + return FALSE; + } + } + return TRUE; +} + +static void +apn_fallback_trigger(proc_t proc) +{ + pid_t pid = 0; + struct kev_msg ev_msg; + struct kev_netevent_apnfallbk_data apnfallbk_data; + + last_apn_fallback = net_uptime(); + pid = proc_pid(proc); + uuid_t application_uuid; + uuid_clear(application_uuid); + proc_getexecutableuuid(proc, application_uuid, + sizeof(application_uuid)); + + bzero(&ev_msg, sizeof (struct kev_msg)); + ev_msg.vendor_code = KEV_VENDOR_APPLE; + ev_msg.kev_class = KEV_NETWORK_CLASS; + ev_msg.kev_subclass = KEV_NETEVENT_SUBCLASS; + ev_msg.event_code = KEV_NETEVENT_APNFALLBACK; + + bzero(&apnfallbk_data, sizeof(apnfallbk_data)); + apnfallbk_data.epid = pid; + uuid_copy(apnfallbk_data.euuid, application_uuid); + + ev_msg.dv[0].data_ptr = &apnfallbk_data; + ev_msg.dv[0].data_length = sizeof(apnfallbk_data); + kev_post_msg(&ev_msg); + apn_fallbk_log((LOG_INFO, "APN fallback notification issued.\n")); +} + /* * Transform old in_pcbconnect() into an inner subroutine for new * in_pcbconnect(); do some validity-checking on the remote address @@ -944,13 +1201,13 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) */ int in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr, - unsigned int ifscope, struct ifnet **outif) + unsigned int ifscope, struct ifnet **outif, int raw) { - boolean_t nocell = (inp->inp_flags & INP_NO_IFT_CELLULAR); struct route *ro = &inp->inp_route; struct in_ifaddr *ia = NULL; struct sockaddr_in sin; int error = 0; + boolean_t restricted = FALSE; if (outif != NULL) *outif = NULL; @@ -958,7 +1215,7 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr, return (EINVAL); if (SIN(nam)->sin_family != AF_INET) return (EAFNOSUPPORT); - if (SIN(nam)->sin_port == 0) + if (raw == 0 && SIN(nam)->sin_port == 0) return (EADDRNOTAVAIL); /* @@ -968,8 +1225,8 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr, * and the primary interface supports broadcast, * choose the broadcast address for that interface. */ - if (SIN(nam)->sin_addr.s_addr == INADDR_ANY || - SIN(nam)->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST) { + if (raw == 0 && (SIN(nam)->sin_addr.s_addr == INADDR_ANY || + SIN(nam)->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST)) { lck_rw_lock_shared(in_ifaddr_rwlock); if (!TAILQ_EMPTY(&in_ifaddrhead)) { ia = TAILQ_FIRST(&in_ifaddrhead); @@ -1041,11 +1298,18 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr, * interface to take the source address from. */ if (ro->ro_rt == NULL) { + proc_t proc = current_proc(); + VERIFY(ia == NULL); ia = ifatoia(ifa_ifwithdstaddr(SA(&sin))); if (ia == NULL) ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope)); error = ((ia == NULL) ? ENETUNREACH : 0); + + if (apn_fallback_required(proc, inp->inp_socket, + (void *)nam)) + apn_fallback_trigger(proc); + goto done; } RT_LOCK_ASSERT_HELD(ro->ro_rt); @@ -1059,11 +1323,13 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr, * If the route points to a cellular interface and the * caller forbids our using interfaces of such type, * pretend that there is no route. + * Apply the same logic for expensive interfaces. */ - if (nocell && IFNET_IS_CELLULAR(ro->ro_rt->rt_ifp)) { + if (inp_restricted_send(inp, ro->ro_rt->rt_ifp)) { RT_UNLOCK(ro->ro_rt); ROUTE_RELEASE(ro); error = EHOSTUNREACH; + restricted = TRUE; } else { /* Become a regular mutex */ RT_CONVERT_LOCK(ro->ro_rt); @@ -1140,11 +1406,13 @@ done: * If the source address belongs to a cellular interface * and the socket forbids our using interfaces of such * type, pretend that there is no source address. + * Apply the same logic for expensive interfaces. */ IFA_LOCK_SPIN(&ia->ia_ifa); - if (nocell && IFNET_IS_CELLULAR(ia->ia_ifa.ifa_ifp)) { + if (inp_restricted_send(inp, ia->ia_ifa.ifa_ifp)) { IFA_UNLOCK(&ia->ia_ifa); error = EHOSTUNREACH; + restricted = TRUE; } else if (error == 0) { *laddr = ia->ia_addr.sin_addr; if (outif != NULL) { @@ -1170,7 +1438,7 @@ done: ia = NULL; } - if (nocell && error == EHOSTUNREACH) { + if (restricted && error == EHOSTUNREACH) { soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_IFDENIED)); } @@ -1196,25 +1464,26 @@ in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p, struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam; struct inpcb *pcb; int error; + struct socket *so = inp->inp_socket; /* * Call inner routine, to assign local interface address. */ - if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif)) != 0) + if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif, 0)) != 0) return (error); - socket_unlock(inp->inp_socket, 0); + socket_unlock(so, 0); pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port, inp->inp_laddr.s_addr ? inp->inp_laddr : laddr, inp->inp_lport, 0, NULL); - socket_lock(inp->inp_socket, 0); + socket_lock(so, 0); /* * Check if the socket is still in a valid state. When we unlock this * embryonic socket, it can get aborted if another thread is closing * the listener (radar 7947600). */ - if ((inp->inp_socket->so_flags & SOF_ABORTED) != 0) + if ((so->so_flags & SOF_ABORTED) != 0) return (ECONNREFUSED); if (pcb != NULL) { @@ -1232,27 +1501,39 @@ in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p, * Lock inversion issue, mostly with udp * multicast packets. */ - socket_unlock(inp->inp_socket, 0); + socket_unlock(so, 0); lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); - socket_lock(inp->inp_socket, 0); + socket_lock(so, 0); } inp->inp_laddr = laddr; /* no reference needed */ inp->inp_last_outifp = (outif != NULL) ? *outif : NULL; inp->inp_flags |= INP_INADDR_ANY; } else { + /* + * Usage of IP_PKTINFO, without local port already + * speficified will cause kernel to panic, + * see rdar://problem/18508185. + * For now returning error to avoid a kernel panic + * This routines can be refactored and handle this better + * in future. + */ + if (inp->inp_lport == 0) + return (EINVAL); if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { /* * Lock inversion issue, mostly with udp * multicast packets. */ - socket_unlock(inp->inp_socket, 0); + socket_unlock(so, 0); lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); - socket_lock(inp->inp_socket, 0); + socket_lock(so, 0); } } inp->inp_faddr = sin->sin_addr; inp->inp_fport = sin->sin_port; + if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) + nstat_pcb_invalidate_cache(inp); in_pcbrehash(inp); lck_rw_done(inp->inp_pcbinfo->ipi_lock); return (0); @@ -1263,6 +1544,9 @@ in_pcbdisconnect(struct inpcb *inp) { struct socket *so = inp->inp_socket; + if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) + nstat_pcb_cache(inp); + inp->inp_faddr.s_addr = INADDR_ANY; inp->inp_fport = 0; @@ -1302,6 +1586,20 @@ in_pcbdetach(struct inpcb *inp) } #endif /* IPSEC */ + /* + * Let NetworkStatistics know this PCB is going away + * before we detach it. + */ + if (nstat_collect && + (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP)) + nstat_pcb_detach(inp); + + /* Free memory buffer held for generating keep alives */ + if (inp->inp_keepalive_data != NULL) { + FREE(inp->inp_keepalive_data, M_TEMP); + inp->inp_keepalive_data = NULL; + } + /* mark socket state as dead */ if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) { panic("%s: so=%p proto=%d couldn't set to STOPUSING\n", @@ -1320,14 +1618,21 @@ in_pcbdetach(struct inpcb *inp) ROUTE_RELEASE(&inp->inp_route); imo = inp->inp_moptions; inp->inp_moptions = NULL; - if (imo != NULL) - IMO_REMREF(imo); sofreelastref(so, 0); inp->inp_state = INPCB_STATE_DEAD; /* makes sure we're not called twice from so_close */ so->so_flags |= SOF_PCBCLEARING; inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST); + + /* + * See inp_join_group() for why we need to unlock + */ + if (imo != NULL) { + socket_unlock(so, 0); + IMO_REMREF(imo); + socket_lock(so, 0); + } } } @@ -1392,13 +1697,16 @@ in_pcbdispose(struct inpcb *inp) #if CONFIG_MACF_NET mac_inpcb_label_destroy(inp); #endif /* CONFIG_MACF_NET */ +#if NECP + necp_inpcb_dispose(inp); +#endif /* NECP */ /* * In case there a route cached after a detach (possible * in the tcp case), make sure that it is freed before * we deallocate the structure. */ ROUTE_RELEASE(&inp->inp_route); - if (!so->cached_in_sock_layer) { + if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) { zfree(ipi->ipi_zone, inp); } sodealloc(so); @@ -1450,8 +1758,11 @@ in_getsockaddr_s(struct socket *so, struct sockaddr_storage *ss) sin->sin_family = AF_INET; sin->sin_len = sizeof (*sin); - if ((inp = sotoinpcb(so)) == NULL || - (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) + if ((inp = sotoinpcb(so)) == NULL +#if NECP + || (necp_socket_should_use_flow_divert(inp)) +#endif /* NECP */ + ) return (inp == NULL ? EINVAL : EPROTOTYPE); sin->sin_port = inp->inp_lport; @@ -1498,8 +1809,11 @@ in_getpeeraddr_s(struct socket *so, struct sockaddr_storage *ss) sin->sin_family = AF_INET; sin->sin_len = sizeof (*sin); - if ((inp = sotoinpcb(so)) == NULL || - (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { + if ((inp = sotoinpcb(so)) == NULL +#if NECP + || (necp_socket_should_use_flow_divert(inp)) +#endif /* NECP */ + ) { return (inp == NULL ? EINVAL : EPROTOTYPE); } @@ -1545,18 +1859,11 @@ in_losing(struct inpcb *inp) { boolean_t release = FALSE; struct rtentry *rt; - struct rt_addrinfo info; if ((rt = inp->inp_route.ro_rt) != NULL) { struct in_ifaddr *ia = NULL; - bzero((caddr_t)&info, sizeof (info)); RT_LOCK(rt); - info.rti_info[RTAX_DST] = - (struct sockaddr *)&inp->inp_route.ro_dst; - info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; - info.rti_info[RTAX_NETMASK] = rt_mask(rt); - rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); if (rt->rt_flags & RTF_DYNAMIC) { /* * Prevent another thread from modifying rt_key, @@ -1746,11 +2053,7 @@ in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr, if (!(inp->inp_vflag & INP_IPV4)) continue; #endif /* INET6 */ - if (inp_restricted(inp, ifp)) - continue; - - if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && - (inp->inp_flags & INP_NO_IFT_CELLULAR)) + if (inp_restricted_recv(inp, ifp)) continue; if (inp->inp_faddr.s_addr == faddr.s_addr && @@ -1786,11 +2089,7 @@ in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr, if (!(inp->inp_vflag & INP_IPV4)) continue; #endif /* INET6 */ - if (inp_restricted(inp, ifp)) - continue; - - if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && - (inp->inp_flags & INP_NO_IFT_CELLULAR)) + if (inp_restricted_recv(inp, ifp)) continue; if (inp->inp_faddr.s_addr == INADDR_ANY && @@ -1873,11 +2172,7 @@ in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr, if (!(inp->inp_vflag & INP_IPV4)) continue; #endif /* INET6 */ - if (inp_restricted(inp, ifp)) - continue; - - if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && - (inp->inp_flags & INP_NO_IFT_CELLULAR)) + if (inp_restricted_recv(inp, ifp)) continue; if (inp->inp_faddr.s_addr == faddr.s_addr && @@ -1914,11 +2209,7 @@ in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr, if (!(inp->inp_vflag & INP_IPV4)) continue; #endif /* INET6 */ - if (inp_restricted(inp, ifp)) - continue; - - if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && - (inp->inp_flags & INP_NO_IFT_CELLULAR)) + if (inp_restricted_recv(inp, ifp)) continue; if (inp->inp_faddr.s_addr == INADDR_ANY && @@ -1972,7 +2263,13 @@ in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr, } /* - * Insert PCB onto various hash lists. + * @brief Insert PCB onto various hash lists. + * + * @param inp Pointer to internet protocol control block + * @param locked Implies if ipi_lock (protecting pcb list) + * is already locked or not. + * + * @return int error on failure and 0 on success */ int in_pcbinshash(struct inpcb *inp, int locked) @@ -1992,17 +2289,23 @@ in_pcbinshash(struct inpcb *inp, int locked) socket_unlock(inp->inp_socket, 0); lck_rw_lock_exclusive(pcbinfo->ipi_lock); socket_lock(inp->inp_socket, 0); - if (inp->inp_state == INPCB_STATE_DEAD) { - /* - * The socket got dropped when - * it was unlocked - */ - lck_rw_done(pcbinfo->ipi_lock); - return (ECONNABORTED); - } } } + /* + * This routine or its caller may have given up + * socket's protocol lock briefly. + * During that time the socket may have been dropped. + * Safe-guarding against that. + */ + if (inp->inp_state == INPCB_STATE_DEAD) { + if (!locked) { + lck_rw_done(pcbinfo->ipi_lock); + } + return (ECONNABORTED); + } + + #if INET6 if (inp->inp_vflag & INP_IPV6) hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; @@ -2026,8 +2329,6 @@ in_pcbinshash(struct inpcb *inp, int locked) break; } - VERIFY(inp->inp_state != INPCB_STATE_DEAD); - /* * If none exists, malloc one and tack it on. */ @@ -2043,11 +2344,21 @@ in_pcbinshash(struct inpcb *inp, int locked) LIST_INIT(&phd->phd_pcblist); LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); } + + VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST)); inp->inp_phd = phd; LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); LIST_INSERT_HEAD(pcbhash, inp, inp_hash); + inp->inp_flags2 |= INP2_INHASHLIST; + if (!locked) lck_rw_done(pcbinfo->ipi_lock); + +#if NECP + // This call catches the original setting of the local address + inp_update_necp_policy(inp, NULL, NULL, 0); +#endif /* NECP */ + return (0); } @@ -2074,8 +2385,19 @@ in_pcbrehash(struct inpcb *inp) inp->inp_fport, inp->inp_pcbinfo->ipi_hashmask); head = &inp->inp_pcbinfo->ipi_hashbase[inp->inp_hash_element]; - LIST_REMOVE(inp, inp_hash); + if (inp->inp_flags2 & INP2_INHASHLIST) { + LIST_REMOVE(inp, inp_hash); + inp->inp_flags2 &= ~INP2_INHASHLIST; + } + + VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST)); LIST_INSERT_HEAD(head, inp, inp_hash); + inp->inp_flags2 |= INP2_INHASHLIST; + +#if NECP + // This call catches updates to the remote addresses + inp_update_necp_policy(inp, NULL, NULL, 0); +#endif /* NECP */ } /* @@ -2087,16 +2409,31 @@ in_pcbremlists(struct inpcb *inp) { inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt; - if (inp->inp_lport) { + /* + * Check if it's in hashlist -- an inp is placed in hashlist when + * it's local port gets assigned. So it should also be present + * in the port list. + */ + if (inp->inp_flags2 & INP2_INHASHLIST) { struct inpcbport *phd = inp->inp_phd; + VERIFY(phd != NULL && inp->inp_lport > 0); + LIST_REMOVE(inp, inp_hash); + inp->inp_hash.le_next = NULL; + inp->inp_hash.le_prev = NULL; + LIST_REMOVE(inp, inp_portlist); - if (phd != NULL && (LIST_FIRST(&phd->phd_pcblist) == NULL)) { + inp->inp_portlist.le_next = NULL; + inp->inp_portlist.le_prev = NULL; + if (LIST_EMPTY(&phd->phd_pcblist)) { LIST_REMOVE(phd, phd_hash); FREE(phd, M_PCB); } + inp->inp_phd = NULL; + inp->inp_flags2 &= ~INP2_INHASHLIST; } + VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST)); if (inp->inp_flags2 & INP2_TIMEWAIT) { /* Remove from time-wait queue */ @@ -2164,7 +2501,6 @@ stopusing: OSCompareAndSwap(origwant, newwant, wantcnt); } return (WNT_STOPUSING); - break; case WNT_ACQUIRE: /* @@ -2181,7 +2517,6 @@ stopusing: newwant = origwant + 1; } while (!OSCompareAndSwap(origwant, newwant, wantcnt)); return (WNT_ACQUIRE); - break; case WNT_RELEASE: /* @@ -2218,7 +2553,6 @@ stopusing: if (locked == 0) socket_unlock(pcb->inp_socket, 1); return (WNT_RELEASE); - break; default: panic("%s: so=%p not a valid state =%x\n", __func__, @@ -2338,7 +2672,7 @@ inp_route_copyin(struct inpcb *inp, struct route *src) } /* - * Handler for setting IP_FORCE_OUT_IFP/IP_BOUND_IF/IPV6_BOUND_IF socket option. + * Handler for setting IP_BOUND_IF/IPV6_BOUND_IF socket option. */ int inp_bindif(struct inpcb *inp, unsigned int ifscope, struct ifnet **pifp) @@ -2414,25 +2748,82 @@ inp_clear_nocellular(struct inpcb *inp) } } -#if FLOW_DIVERT +void +inp_set_noexpensive(struct inpcb *inp) +{ + inp->inp_flags2 |= INP2_NO_IFF_EXPENSIVE; + + /* Blow away any cached route in the PCB */ + ROUTE_RELEASE(&inp->inp_route); +} + +void +inp_set_awdl_unrestricted(struct inpcb *inp) +{ + inp->inp_flags2 |= INP2_AWDL_UNRESTRICTED; + + /* Blow away any cached route in the PCB */ + ROUTE_RELEASE(&inp->inp_route); +} + +boolean_t +inp_get_awdl_unrestricted(struct inpcb *inp) +{ + return (inp->inp_flags2 & INP2_AWDL_UNRESTRICTED) ? TRUE : FALSE; +} + +void +inp_clear_awdl_unrestricted(struct inpcb *inp) +{ + inp->inp_flags2 &= ~INP2_AWDL_UNRESTRICTED; + + /* Blow away any cached route in the PCB */ + ROUTE_RELEASE(&inp->inp_route); +} + +void +inp_set_intcoproc_allowed(struct inpcb *inp) +{ + inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED; + + /* Blow away any cached route in the PCB */ + ROUTE_RELEASE(&inp->inp_route); +} + +boolean_t +inp_get_intcoproc_allowed(struct inpcb *inp) +{ + return (inp->inp_flags2 & INP2_INTCOPROC_ALLOWED) ? TRUE : FALSE; +} + +void +inp_clear_intcoproc_allowed(struct inpcb *inp) +{ + inp->inp_flags2 &= ~INP2_INTCOPROC_ALLOWED; + + /* Blow away any cached route in the PCB */ + ROUTE_RELEASE(&inp->inp_route); +} + +#if NECP /* - * Called when PROC_UUID_FLOW_DIVERT is set. + * Called when PROC_UUID_NECP_APP_POLICY is set. */ void -inp_set_flow_divert(struct inpcb *inp) +inp_set_want_app_policy(struct inpcb *inp) { - inp->inp_flags2 |= INP2_WANT_FLOW_DIVERT; + inp->inp_flags2 |= INP2_WANT_APP_POLICY; } /* - * Called when PROC_UUID_FLOW_DIVERT is cleared. + * Called when PROC_UUID_NECP_APP_POLICY is cleared. */ void -inp_clear_flow_divert(struct inpcb *inp) +inp_clear_want_app_policy(struct inpcb *inp) { - inp->inp_flags2 &= ~INP2_WANT_FLOW_DIVERT; + inp->inp_flags2 &= ~INP2_WANT_APP_POLICY; } -#endif /* FLOW_DIVERT */ +#endif /* NECP */ /* * Calculate flow hash for an inp, used by an interface to identify a @@ -2561,6 +2952,9 @@ inp_fc_feedback(struct inpcb *inp) return; } + if (inp->inp_sndinprog_cnt > 0) + inp->inp_flags |= INP_FC_FEEDBACK; + /* * Return if the connection is not in flow-controlled state. * This can happen if the connection experienced @@ -2592,9 +2986,6 @@ inp_reset_fc_state(struct inpcb *inp) soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME)); } - if (inp->inp_sndinprog_cnt > 0) - inp->inp_flags |= INP_FC_FEEDBACK; - /* Give a write wakeup to unblock the socket */ if (needwakeup) sowwakeup(so); @@ -2690,13 +3081,19 @@ inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo) struct socket *so = inp->inp_socket; soprocinfo->spi_pid = so->last_pid; + if (so->last_pid != 0) + uuid_copy(soprocinfo->spi_uuid, so->last_uuid); /* * When not delegated, the effective pid is the same as the real pid */ - if (so->so_flags & SOF_DELEGATED) + if (so->so_flags & SOF_DELEGATED) { + soprocinfo->spi_delegated = 1; soprocinfo->spi_epid = so->e_pid; - else + uuid_copy(soprocinfo->spi_euuid, so->e_uuid); + } else { + soprocinfo->spi_delegated = 0; soprocinfo->spi_epid = so->last_pid; + } } int @@ -2736,13 +3133,13 @@ inp_update_cellular_policy(struct inpcb *inp, boolean_t set) VERIFY(so != NULL); VERIFY(inp->inp_state != INPCB_STATE_DEAD); - before = (inp->inp_flags & INP_NO_IFT_CELLULAR); + before = INP_NO_CELLULAR(inp); if (set) { inp_set_nocellular(inp); } else { inp_clear_nocellular(inp); } - after = (inp->inp_flags & INP_NO_IFT_CELLULAR); + after = INP_NO_CELLULAR(inp); if (net_io_policy_log && (before != after)) { static const char *ok = "OK"; static const char *nok = "NOACCESS"; @@ -2771,9 +3168,9 @@ inp_update_cellular_policy(struct inpcb *inp, boolean_t set) } } -#if FLOW_DIVERT +#if NECP static void -inp_update_flow_divert_policy(struct inpcb *inp, boolean_t set) +inp_update_necp_want_app_policy(struct inpcb *inp, boolean_t set) { struct socket *so = inp->inp_socket; int before, after; @@ -2781,17 +3178,13 @@ inp_update_flow_divert_policy(struct inpcb *inp, boolean_t set) VERIFY(so != NULL); VERIFY(inp->inp_state != INPCB_STATE_DEAD); - if (set && !(inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { - set = !flow_divert_is_dns_service(so); - } - - before = (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT); + before = (inp->inp_flags2 & INP2_WANT_APP_POLICY); if (set) { - inp_set_flow_divert(inp); + inp_set_want_app_policy(inp); } else { - inp_clear_flow_divert(inp); + inp_clear_want_app_policy(inp); } - after = (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT); + after = (inp->inp_flags2 & INP2_WANT_APP_POLICY); if (net_io_policy_log && (before != after)) { static const char *wanted = "WANTED"; static const char *unwanted = "UNWANTED"; @@ -2816,9 +3209,24 @@ inp_update_flow_divert_policy(struct inpcb *inp, boolean_t set) ((before < after) ? wanted : unwanted)); } } -#endif /* FLOW_DIVERT */ +#endif /* NECP */ #endif /* !CONFIG_PROC_UUID_POLICY */ +#if NECP +void +inp_update_necp_policy(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int override_bound_interface) +{ + necp_socket_find_policy_match(inp, override_local_addr, override_remote_addr, override_bound_interface); + if (necp_socket_should_rescope(inp) && + inp->inp_lport == 0 && + inp->inp_laddr.s_addr == INADDR_ANY && + IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { + // If we should rescope, and the socket is not yet bound + inp_bindif(inp, necp_socket_get_rescope_if_index(inp), NULL); + } +} +#endif /* NECP */ + int inp_update_policy(struct inpcb *inp) { @@ -2863,14 +3271,14 @@ inp_update_policy(struct inpcb *inp) } else if (!(pflags & PROC_UUID_NO_CELLULAR)) { inp_update_cellular_policy(inp, FALSE); } -#if FLOW_DIVERT - /* update flow divert policy for this socket */ - if (err == 0 && (pflags & PROC_UUID_FLOW_DIVERT)) { - inp_update_flow_divert_policy(inp, TRUE); - } else if (!(pflags & PROC_UUID_FLOW_DIVERT)) { - inp_update_flow_divert_policy(inp, FALSE); +#if NECP + /* update necp want app policy for this socket */ + if (err == 0 && (pflags & PROC_UUID_NECP_APP_POLICY)) { + inp_update_necp_want_app_policy(inp, TRUE); + } else if (!(pflags & PROC_UUID_NECP_APP_POLICY)) { + inp_update_necp_want_app_policy(inp, FALSE); } -#endif /* FLOW_DIVERT */ +#endif /* NECP */ } return ((err == ENOENT) ? 0 : err); @@ -2880,15 +3288,40 @@ inp_update_policy(struct inpcb *inp) #endif /* !CONFIG_PROC_UUID_POLICY */ } -boolean_t -inp_restricted(struct inpcb *inp, struct ifnet *ifp) +static unsigned int log_restricted; +SYSCTL_DECL(_net_inet); +SYSCTL_INT(_net_inet, OID_AUTO, log_restricted, + CTLFLAG_RW | CTLFLAG_LOCKED, &log_restricted, 0, + "Log network restrictions"); +/* + * Called when we need to enforce policy restrictions in the input path. + * + * Returns TRUE if we're not allowed to receive data, otherwise FALSE. + */ +static boolean_t +_inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp) { VERIFY(inp != NULL); + /* + * Inbound restrictions. + */ if (!sorestrictrecv) return (FALSE); - if (ifp == NULL || !(ifp->if_eflags & IFEF_RESTRICTED_RECV)) + if (ifp == NULL) + return (FALSE); + + if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) + return (TRUE); + + if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) + return (TRUE); + + if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) + return (TRUE); + + if (!(ifp->if_eflags & IFEF_RESTRICTED_RECV)) return (FALSE); if (inp->inp_flags & INP_RECV_ANYIF) @@ -2897,5 +3330,160 @@ inp_restricted(struct inpcb *inp, struct ifnet *ifp) if ((inp->inp_flags & INP_BOUND_IF) && inp->inp_boundifp == ifp) return (FALSE); + if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp)) + return (TRUE); + return (TRUE); } + +boolean_t +inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp) +{ + boolean_t ret; + + ret = _inp_restricted_recv(inp, ifp); + if (ret == TRUE && log_restricted) { + printf("pid %d (%s) is unable to receive packets on %s\n", + current_proc()->p_pid, proc_best_name(current_proc()), + ifp->if_xname); + } + return (ret); +} + +/* + * Called when we need to enforce policy restrictions in the output path. + * + * Returns TRUE if we're not allowed to send data out, otherwise FALSE. + */ +static boolean_t +_inp_restricted_send(struct inpcb *inp, struct ifnet *ifp) +{ + VERIFY(inp != NULL); + + /* + * Outbound restrictions. + */ + if (!sorestrictsend) + return (FALSE); + + if (ifp == NULL) + return (FALSE); + + if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) + return (TRUE); + + if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) + return (TRUE); + + if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) + return (TRUE); + + if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp)) + return (TRUE); + + return (FALSE); +} + +boolean_t +inp_restricted_send(struct inpcb *inp, struct ifnet *ifp) +{ + boolean_t ret; + + ret = _inp_restricted_send(inp, ifp); + if (ret == TRUE && log_restricted) { + printf("pid %d (%s) is unable to transmit packets on %s\n", + current_proc()->p_pid, proc_best_name(current_proc()), + ifp->if_xname); + } + return (ret); +} + +inline void +inp_count_sndbytes(struct inpcb *inp, u_int32_t th_ack) +{ + struct ifnet *ifp = inp->inp_last_outifp; + struct socket *so = inp->inp_socket; + if (ifp != NULL && !(so->so_flags & SOF_MP_SUBFLOW) && + (ifp->if_type == IFT_CELLULAR || + ifp->if_subfamily == IFNET_SUBFAMILY_WIFI)) { + int32_t unsent; + + so->so_snd.sb_flags |= SB_SNDBYTE_CNT; + + /* + * There can be data outstanding before the connection + * becomes established -- TFO case + */ + if (so->so_snd.sb_cc > 0) + inp_incr_sndbytes_total(so, so->so_snd.sb_cc); + + unsent = inp_get_sndbytes_allunsent(so, th_ack); + if (unsent > 0) + inp_incr_sndbytes_unsent(so, unsent); + } +} + +inline void +inp_incr_sndbytes_total(struct socket *so, int32_t len) +{ + struct inpcb *inp = (struct inpcb *)so->so_pcb; + struct ifnet *ifp = inp->inp_last_outifp; + + if (ifp != NULL) { + VERIFY(ifp->if_sndbyte_total >= 0); + OSAddAtomic64(len, &ifp->if_sndbyte_total); + } +} + +inline void +inp_decr_sndbytes_total(struct socket *so, int32_t len) +{ + struct inpcb *inp = (struct inpcb *)so->so_pcb; + struct ifnet *ifp = inp->inp_last_outifp; + + if (ifp != NULL) { + VERIFY(ifp->if_sndbyte_total >= len); + OSAddAtomic64(-len, &ifp->if_sndbyte_total); + } +} + +inline void +inp_incr_sndbytes_unsent(struct socket *so, int32_t len) +{ + struct inpcb *inp = (struct inpcb *)so->so_pcb; + struct ifnet *ifp = inp->inp_last_outifp; + + if (ifp != NULL) { + VERIFY(ifp->if_sndbyte_unsent >= 0); + OSAddAtomic64(len, &ifp->if_sndbyte_unsent); + } +} + +inline void +inp_decr_sndbytes_unsent(struct socket *so, int32_t len) +{ + struct inpcb *inp = (struct inpcb *)so->so_pcb; + struct ifnet *ifp = inp->inp_last_outifp; + + if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT)) + return; + + if (ifp != NULL) { + if (ifp->if_sndbyte_unsent >= len) + OSAddAtomic64(-len, &ifp->if_sndbyte_unsent); + else + ifp->if_sndbyte_unsent = 0; + } +} + +inline void +inp_decr_sndbytes_allunsent(struct socket *so, u_int32_t th_ack) +{ + int32_t len; + + if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT)) + return; + + len = inp_get_sndbytes_allunsent(so, th_ack); + inp_decr_sndbytes_unsent(so, len); +}