X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/fe8ab488e9161c46dd9885d58fc52996dc0249ff..eee3565979933af707c711411001ba11fe406a3c:/bsd/net/dlil.c?ds=sidebyside diff --git a/bsd/net/dlil.c b/bsd/net/dlil.c index 28d307d13..84fb0898f 100644 --- a/bsd/net/dlil.c +++ b/bsd/net/dlil.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2014 Apple Inc. All rights reserved. + * Copyright (c) 1999-2016 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -84,6 +84,7 @@ #include #include #include +#include #endif /* INET */ #if INET6 @@ -117,20 +118,24 @@ #endif /* PF_ALTQ */ #include -#define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0) -#define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2) -#define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8)) -#define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8)) -#define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8)) +#if NECP +#include +#endif /* NECP */ -#define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */ -#define MAX_LINKADDR 4 /* LONGWORDS */ -#define M_NKE M_IFADDR +#define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0) +#define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2) +#define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8)) +#define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8)) +#define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8)) + +#define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */ +#define MAX_LINKADDR 4 /* LONGWORDS */ +#define M_NKE M_IFADDR #if 1 -#define DLIL_PRINTF printf +#define DLIL_PRINTF printf #else -#define DLIL_PRINTF kprintf +#define DLIL_PRINTF kprintf #endif #define IF_DATA_REQUIRE_ALIGNED_64(f) \ @@ -292,33 +297,27 @@ static struct zone *dlif_proto_zone; /* zone for if_proto */ #define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */ #define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */ -static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */ -static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */ +static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */ +static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */ static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */ #define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */ #define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */ -static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */ +static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */ static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */ static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */ #define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */ #define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */ -/* - * Updating this variable should be done by first acquiring the global - * radix node head (rnh_lock), in tandem with settting/clearing the - * PR_AGGDRAIN for routedomain. - */ -u_int32_t ifnet_aggressive_drainers; static u_int32_t net_rtref; static struct dlil_main_threading_info dlil_main_input_thread_info; __private_extern__ struct dlil_threading_info *dlil_main_input_thread = (struct dlil_threading_info *)&dlil_main_input_thread_info; -static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg); +static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg, bool update_generation); static int dlil_detach_filter_internal(interface_filter_t filter, int detached); static void dlil_if_trace(struct dlil_ifnet *, int); static void if_proto_ref(struct if_proto *); @@ -356,6 +355,9 @@ static errno_t ifproto_media_send_arp(struct ifnet *, u_short, static errno_t ifp_if_output(struct ifnet *, struct mbuf *); static void ifp_if_start(struct ifnet *); +static errno_t ifp_if_input(struct ifnet *ifp, struct mbuf *m_head, + struct mbuf *m_tail, const struct ifnet_stat_increment_param *s, + boolean_t poll, struct thread *tp); static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t, struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *); static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *); @@ -425,6 +427,10 @@ static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS; static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS; static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS; static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS; +static int sysctl_get_ports_used SYSCTL_HANDLER_ARGS; + +struct chain_len_stats tx_chain_len_stats; +static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS; /* The following are protected by dlil_ifnet_lock */ static TAILQ_HEAD(, ifnet) ifnet_detaching_head; @@ -472,12 +478,12 @@ static struct zone *ifnet_fc_zone; /* ifnet_fc_entry zone */ #define IFNET_FC_ZONE_NAME "ifnet_fc_zone" #define IFNET_FC_ZONE_MAX 32 -extern void bpfdetach(struct ifnet*); +extern void bpfdetach(struct ifnet *); extern void proto_input_run(void); -extern uint32_t udp_count_opportunistic(unsigned int ifindex, +extern uint32_t udp_count_opportunistic(unsigned int ifindex, u_int32_t flags); -extern uint32_t tcp_count_opportunistic(unsigned int ifindex, +extern uint32_t tcp_count_opportunistic(unsigned int ifindex, u_int32_t flags); __private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *); @@ -516,7 +522,7 @@ SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen, sysctl_sndq_maxlen, "I", "Default transmit queue max length"); #define IF_RCVQ_MINLEN 32 -#define IF_RCVQ_MAXLEN 256 +#define IF_RCVQ_MAXLEN 256 u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN; SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN, @@ -592,12 +598,12 @@ SYSCTL_INT(_net_link_generic_system, OID_AUTO, if_bw_measure_size, static u_int32_t cur_dlil_input_threads = 0; SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_threads, - CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads , 0, + CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads, 0, "Current number of DLIL input threads"); #if IFNET_INPUT_SANITY_CHK SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check, - CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check , 0, + CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check, 0, "Turn on sanity checking in DLIL input"); #endif /* IFNET_INPUT_SANITY_CHK */ @@ -621,6 +627,16 @@ SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_dbg, CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg, 0, "enable hardware cksum debugging"); +u_int32_t ifnet_start_delayed = 0; +SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delayed, + CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_start_delayed, 0, + "number of times start was delayed"); + +u_int32_t ifnet_delay_start_disabled = 0; +SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delay_disabled, + CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_delay_start_disabled, 0, + "number of times start was delayed"); + #define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */ #define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */ #define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */ @@ -696,6 +712,23 @@ SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_rx, CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_rx, 0, "enable receive hardware checksum offload"); +SYSCTL_PROC(_net_link_generic_system, OID_AUTO, tx_chain_len_stats, + CTLFLAG_RD | CTLFLAG_LOCKED, 0, 9, + sysctl_tx_chain_len_stats, "S", ""); + +uint32_t tx_chain_len_count = 0; +SYSCTL_UINT(_net_link_generic_system, OID_AUTO, tx_chain_len_count, + CTLFLAG_RW | CTLFLAG_LOCKED, &tx_chain_len_count, 0, ""); + +SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_ports_used, + CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_ports_used, ""); + +#if (DEVELOPMENT || DEBUG) +static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS; +SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_kao_frames, + CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_kao_frames, ""); +#endif /* DEVELOPMENT || DEBUG */ + unsigned int net_rxpoll = 1; unsigned int net_affinity = 1; static kern_return_t dlil_affinity_set(struct thread *, u_int32_t); @@ -752,7 +785,7 @@ proto_hash_value(u_int32_t protocol_family) * the hash bucket index and the protocol family defined * here; future changes must be applied there as well. */ - switch(protocol_family) { + switch (protocol_family) { case PF_INET: return (0); case PF_INET6: @@ -837,7 +870,7 @@ if_proto_free(struct if_proto *proto) dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED, (struct net_event_data *)&ev_pr_data, - sizeof(struct kev_dl_proto_data)); + sizeof (struct kev_dl_proto_data)); zfree(dlif_proto_zone, proto); } @@ -892,6 +925,26 @@ ifnet_lock_done(struct ifnet *ifp) lck_rw_done(&ifp->if_lock); } +#if INET +__private_extern__ void +if_inetdata_lock_shared(struct ifnet *ifp) +{ + lck_rw_lock_shared(&ifp->if_inetdata_lock); +} + +__private_extern__ void +if_inetdata_lock_exclusive(struct ifnet *ifp) +{ + lck_rw_lock_exclusive(&ifp->if_inetdata_lock); +} + +__private_extern__ void +if_inetdata_lock_done(struct ifnet *ifp) +{ + lck_rw_done(&ifp->if_inetdata_lock); +} +#endif + #if INET6 __private_extern__ void if_inet6data_lock_shared(struct ifnet *ifp) @@ -930,11 +983,17 @@ ifnet_head_done(void) lck_rw_done(&ifnet_head_lock); } +__private_extern__ void +ifnet_head_assert_exclusive(void) +{ + lck_rw_assert(&ifnet_head_lock, LCK_RW_ASSERT_EXCLUSIVE); +} + /* * Caller must already be holding ifnet lock. */ static int -dlil_ifp_proto_count(struct ifnet * ifp) +dlil_ifp_proto_count(struct ifnet *ifp) { int i, count = 0; @@ -975,18 +1034,23 @@ dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass, if (event_data == NULL) { event_data = &ev_data; - event_data_len = sizeof(struct net_event_data); + event_data_len = sizeof (struct net_event_data); } strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ); event_data->if_family = ifp->if_family; - event_data->if_unit = (u_int32_t) ifp->if_unit; + event_data->if_unit = (u_int32_t)ifp->if_unit; ev_msg.dv[0].data_length = event_data_len; ev_msg.dv[0].data_ptr = event_data; ev_msg.dv[1].data_length = 0; - dlil_event_internal(ifp, &ev_msg); + /* Don't update interface generation for quality and RRC state changess */ + bool update_generation = (event_subclass != KEV_DL_SUBCLASS || + (event_code != KEV_DL_LINK_QUALITY_METRIC_CHANGED && + event_code != KEV_DL_RRC_STATE_CHANGED)); + + dlil_event_internal(ifp, &ev_msg, update_generation); } __private_extern__ int @@ -1049,6 +1113,23 @@ dlil_alloc_local_stats(struct ifnet *ifp) ret = 0; } + if (ifp->if_ipv4_stat == NULL) { + MALLOC(ifp->if_ipv4_stat, struct if_tcp_ecn_stat *, + sizeof (struct if_tcp_ecn_stat), M_TEMP, M_WAITOK|M_ZERO); + if (ifp->if_ipv4_stat == NULL) { + ret = ENOMEM; + goto end; + } + } + + if (ifp->if_ipv6_stat == NULL) { + MALLOC(ifp->if_ipv6_stat, struct if_tcp_ecn_stat *, + sizeof (struct if_tcp_ecn_stat), M_TEMP, M_WAITOK|M_ZERO); + if (ifp->if_ipv6_stat == NULL) { + ret = ENOMEM; + goto end; + } + } end: if (ret != 0) { if (ifp->if_tcp_stat != NULL) { @@ -1063,6 +1144,14 @@ end: zfree(dlif_udpstat_zone, *pbuf); ifp->if_udp_stat = NULL; } + if (ifp->if_ipv4_stat != NULL) { + FREE(ifp->if_ipv4_stat, M_TEMP); + ifp->if_ipv4_stat = NULL; + } + if (ifp->if_ipv6_stat != NULL) { + FREE(ifp->if_ipv6_stat, M_TEMP); + ifp->if_ipv6_stat = NULL; + } } return (ret); @@ -1238,7 +1327,7 @@ dlil_init(void) * The following fields must be 64-bit aligned for atomic operations. */ IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets); - IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors) + IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors); IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets); IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors); IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions); @@ -1254,7 +1343,7 @@ dlil_init(void) IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes); IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets); - IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors) + IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors); IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets); IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors); IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions); @@ -1338,6 +1427,7 @@ dlil_init(void) _CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI); _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT); _CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED); + _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC); _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN); _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN); @@ -1438,6 +1528,7 @@ dlil_init(void) TAILQ_INIT(&dlil_ifnet_head); TAILQ_INIT(&ifnet_head); TAILQ_INIT(&ifnet_detaching_head); + TAILQ_INIT(&ifnet_ordered_head); /* Setup the lock groups we will use */ dlil_grp_attributes = lck_grp_attr_alloc_init(); @@ -1496,6 +1587,9 @@ dlil_init(void) /* Initialize the pktap virtual interface */ pktap_init(); + /* Initialize the service class to dscp map */ + net_qos_map_init(); + #if DEBUG /* Run self-tests */ dlil_verify_sum16(); @@ -1584,10 +1678,15 @@ dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter, filter->filt_cookie = if_filter->iff_cookie; filter->filt_name = if_filter->iff_name; filter->filt_protocol = if_filter->iff_protocol; - filter->filt_input = if_filter->iff_input; - filter->filt_output = if_filter->iff_output; - filter->filt_event = if_filter->iff_event; - filter->filt_ioctl = if_filter->iff_ioctl; + /* + * Do not install filter callbacks for internal coproc interface + */ + if (!IFNET_IS_INTCOPROC(ifp)) { + filter->filt_input = if_filter->iff_input; + filter->filt_output = if_filter->iff_output; + filter->filt_event = if_filter->iff_event; + filter->filt_ioctl = if_filter->iff_ioctl; + } filter->filt_detached = if_filter->iff_detached; lck_mtx_lock(&ifp->if_flt_lock); @@ -1687,9 +1786,6 @@ destroy: if (filter->filt_detached) filter->filt_detached(filter->filt_cookie, filter->filt_ifp); - /* Free the filter */ - zfree(dlif_filt_zone, filter); - /* * Decrease filter count and route_generation ID to let TCP * know it should reevalute doing TSO or not @@ -1698,11 +1794,16 @@ destroy: OSAddAtomic(-1, &dlil_filter_disable_tso_count); routegenid_update(); } + + /* Free the filter */ + zfree(dlif_filt_zone, filter); + filter = NULL; done: - if (retval != 0) { + if (retval != 0 && filter != NULL) { DLIL_PRINTF("failed to detach %s filter (err=%d)\n", filter->filt_name, retval); } + return (retval); } @@ -1724,6 +1825,7 @@ dlil_detach_filter(interface_filter_t filter) * c) protocol registrations * d) packet injections */ +__attribute__((noreturn)) static void dlil_main_input_thread_func(void *v, wait_result_t w) { @@ -1760,21 +1862,21 @@ dlil_main_input_thread_func(void *v, wait_result_t w) /* Packets for non-dedicated interfaces other than lo0 */ m_cnt = qlen(&inp->rcvq_pkts); - m = _getq_all(&inp->rcvq_pkts); + m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL); /* Packets exclusive to lo0 */ m_cnt_loop = qlen(&inpm->lo_rcvq_pkts); - m_loop = _getq_all(&inpm->lo_rcvq_pkts); + m_loop = _getq_all(&inpm->lo_rcvq_pkts, NULL, NULL, NULL); inp->wtot = 0; lck_mtx_unlock(&inp->input_lck); /* - * NOTE warning %%% attention !!!! - * We should think about putting some thread starvation - * safeguards if we deal with long chains of packets. - */ + * NOTE warning %%% attention !!!! + * We should think about putting some thread starvation + * safeguards if we deal with long chains of packets. + */ if (m_loop != NULL) dlil_input_packet_list_extended(lo_ifp, m_loop, m_cnt_loop, inp->mode); @@ -1798,9 +1900,15 @@ static void dlil_input_thread_func(void *v, wait_result_t w) { #pragma unused(w) + char thread_name[MAXTHREADNAMESIZE]; struct dlil_threading_info *inp = v; struct ifnet *ifp = inp->ifp; + /* Construct the name for this thread, and then apply it. */ + bzero(thread_name, sizeof(thread_name)); + snprintf(thread_name, sizeof(thread_name), "dlil_input_%s", ifp->if_xname); + thread_set_thread_name(inp->input_thr, thread_name); + VERIFY(inp != dlil_main_input_thread); VERIFY(ifp != NULL); VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll); @@ -1834,7 +1942,7 @@ dlil_input_thread_func(void *v, wait_result_t w) /* Packets for this interface */ m_cnt = qlen(&inp->rcvq_pkts); - m = _getq_all(&inp->rcvq_pkts); + m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL); if (inp->input_waiting & DLIL_INPUT_TERMINATE) { lck_mtx_unlock(&inp->input_lck); @@ -1855,10 +1963,10 @@ dlil_input_thread_func(void *v, wait_result_t w) lck_mtx_unlock(&inp->input_lck); /* - * NOTE warning %%% attention !!!! - * We should think about putting some thread starvation - * safeguards if we deal with long chains of packets. - */ + * NOTE warning %%% attention !!!! + * We should think about putting some thread starvation + * safeguards if we deal with long chains of packets. + */ if (m != NULL) dlil_input_packet_list_extended(NULL, m, m_cnt, inp->mode); @@ -1940,7 +2048,7 @@ dlil_rxpoll_input_thread_func(void *v, wait_result_t w) m_size = qsize(&inp->rcvq_pkts); /* Packets for this interface */ - m = _getq_all(&inp->rcvq_pkts); + m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL); VERIFY(m != NULL || m_cnt == 0); nanouptime(&now); @@ -2101,10 +2209,10 @@ skip: } /* - * NOTE warning %%% attention !!!! - * We should think about putting some thread starvation - * safeguards if we deal with long chains of packets. - */ + * NOTE warning %%% attention !!!! + * We should think about putting some thread starvation + * safeguards if we deal with long chains of packets. + */ if (m != NULL) dlil_input_packet_list_extended(NULL, m, m_cnt, mode); } @@ -2272,10 +2380,11 @@ static errno_t ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail, const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll) { - struct thread *tp = current_thread(); - struct mbuf *last; - struct dlil_threading_info *inp; + ifnet_input_handler_func handler_func; + struct ifnet_stat_increment_param _s; u_int32_t m_cnt = 0, m_size = 0; + struct mbuf *last; + errno_t err = 0; if ((m_head == NULL && !poll) || (s == NULL && ext)) { if (m_head != NULL) @@ -2298,6 +2407,9 @@ ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail, return (EINVAL); } + handler_func = ifp->if_input_handler; + VERIFY(handler_func != NULL); + if (m_tail == NULL) { last = m_head; while (m_head != NULL) { @@ -2355,6 +2467,66 @@ ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail, s->packets_in, m_cnt); } + if (s == NULL) { + bzero(&_s, sizeof (_s)); + s = &_s; + } else { + _s = *s; + } + _s.packets_in = m_cnt; + _s.bytes_in = m_size; + + err = (*handler_func)(ifp, m_head, m_tail, s, poll, current_thread()); + + if (ifp != lo_ifp) { + /* Release the IO refcnt */ + ifnet_decr_iorefcnt(ifp); + } + + return (err); +} + +errno_t +ifnet_set_input_handler(struct ifnet *ifp, ifnet_input_handler_func fn) +{ + return (atomic_test_set_ptr(&ifp->if_input_handler, + dlil_input_handler, fn) ? 0 : EBUSY); +} + +void +ifnet_reset_input_handler(struct ifnet *ifp) +{ + atomic_set_ptr(&ifp->if_input_handler, dlil_input_handler); +} + +errno_t +ifnet_set_output_handler(struct ifnet *ifp, ifnet_output_handler_func fn) +{ + return (atomic_test_set_ptr(&ifp->if_output_handler, + dlil_output_handler, fn) ? 0 : EBUSY); +} + +void +ifnet_reset_output_handler(struct ifnet *ifp) +{ + atomic_set_ptr(&ifp->if_output_handler, dlil_output_handler); +} + +errno_t +dlil_output_handler(struct ifnet *ifp, struct mbuf *m) +{ + return (ifp->if_output(ifp, m)); +} + +errno_t +dlil_input_handler(struct ifnet *ifp, struct mbuf *m_head, + struct mbuf *m_tail, const struct ifnet_stat_increment_param *s, + boolean_t poll, struct thread *tp) +{ + struct dlil_threading_info *inp; + u_int32_t m_cnt = s->packets_in; + u_int32_t m_size = s->bytes_in; + if ((inp = ifp->if_inp) == NULL) inp = dlil_main_input_thread; @@ -2364,7 +2536,7 @@ ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail, * will only do this once. */ lck_mtx_lock_spin(&inp->input_lck); - if (inp != dlil_main_input_thread && inp->net_affinity && + if (inp != dlil_main_input_thread && inp->net_affinity && tp != NULL && ((!poll && inp->wloop_thr == THREAD_NULL) || (poll && inp->poll_thr == THREAD_NULL))) { u_int32_t tag = inp->tag; @@ -2392,7 +2564,7 @@ ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail, VERIFY(m_head != NULL || (m_tail == NULL && m_cnt == 0)); - /* + /* * Because of loopbacked multicast we cannot stuff the ifp in * the rcvif of the packet header: loopback (lo0) packets use a * dedicated list so that we can later associate them with lo_ifp @@ -2430,17 +2602,15 @@ ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail, } #endif /* IFNET_INPUT_SANITY_CHK */ - if (s != NULL) { - dlil_input_stats_add(s, inp, poll); - /* - * If we're using the main input thread, synchronize the - * stats now since we have the interface context. All - * other cases involving dedicated input threads will - * have their stats synchronized there. - */ - if (inp == dlil_main_input_thread) - dlil_input_stats_sync(ifp, inp); - } + dlil_input_stats_add(s, inp, poll); + /* + * If we're using the main input thread, synchronize the + * stats now since we have the interface context. All + * other cases involving dedicated input threads will + * have their stats synchronized there. + */ + if (inp == dlil_main_input_thread) + dlil_input_stats_sync(ifp, inp); inp->input_waiting |= DLIL_INPUT_WAITING; if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) { @@ -2449,11 +2619,6 @@ ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail, } lck_mtx_unlock(&inp->input_lck); - if (ifp != lo_ifp) { - /* Release the IO refcnt */ - ifnet_decr_iorefcnt(ifp); - } - return (0); } @@ -2476,7 +2641,10 @@ ifnet_start_common(struct ifnet *ifp, int resetfc) return; } ifp->if_start_req++; - if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL) { + if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL && + (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) || + IFCQ_LEN(&ifp->if_snd) >= ifp->if_start_delay_qlen || + ifp->if_start_delayed == 0)) { wakeup_one((caddr_t)&ifp->if_start_thread); } lck_mtx_unlock(&ifp->if_start_lock); @@ -2494,8 +2662,15 @@ ifnet_start_thread_fn(void *v, wait_result_t w) #pragma unused(w) struct ifnet *ifp = v; char ifname[IFNAMSIZ + 1]; + char thread_name[MAXTHREADNAMESIZE]; struct timespec *ts = NULL; struct ifclassq *ifq = &ifp->if_snd; + struct timespec delay_start_ts; + + /* Construct the name for this thread, and then apply it. */ + bzero(thread_name, sizeof(thread_name)); + snprintf(thread_name, sizeof(thread_name), "ifnet_start_%s", ifp->if_xname); + thread_set_thread_name(ifp->if_start_thread, thread_name); /* * Treat the dedicated starter thread for lo0 as equivalent to @@ -2530,8 +2705,10 @@ ifnet_start_thread_fn(void *v, wait_result_t w) lck_mtx_lock_spin(&ifp->if_start_lock); for (;;) { - (void) msleep(&ifp->if_start_thread, &ifp->if_start_lock, - (PZERO - 1) | PSPIN, ifname, ts); + if (ifp->if_start_thread != NULL) + (void) msleep(&ifp->if_start_thread, + &ifp->if_start_lock, + (PZERO - 1) | PSPIN, ifname, ts); /* interface is detached? */ if (ifp->if_start_thread == THREAD_NULL) { @@ -2553,20 +2730,51 @@ ifnet_start_thread_fn(void *v, wait_result_t w) } ifp->if_start_active = 1; + for (;;) { u_int32_t req = ifp->if_start_req; - + if (!IFCQ_IS_EMPTY(ifq) && + (ifp->if_eflags & IFEF_ENQUEUE_MULTI) && + ifp->if_start_delayed == 0 && + IFCQ_LEN(ifq) < ifp->if_start_delay_qlen && + (ifp->if_eflags & IFEF_DELAY_START)) { + ifp->if_start_delayed = 1; + ifnet_start_delayed++; + break; + } else { + ifp->if_start_delayed = 0; + } lck_mtx_unlock(&ifp->if_start_lock); + + /* + * If no longer attached, don't call start because ifp + * is being destroyed; else hold an IO refcnt to + * prevent the interface from being detached (will be + * released below.) + */ + if (!ifnet_is_attached(ifp, 1)) { + lck_mtx_lock_spin(&ifp->if_start_lock); + break; + } + /* invoke the driver's start routine */ ((*ifp->if_start)(ifp)); + + /* + * Release the io ref count taken by ifnet_is_attached. + */ + ifnet_decr_iorefcnt(ifp); + lck_mtx_lock_spin(&ifp->if_start_lock); /* if there's no pending request, we're done */ if (req == ifp->if_start_req) break; } + ifp->if_start_req = 0; ifp->if_start_active = 0; + /* * Wakeup N ns from now if rate-controlled by TBR, and if * there are still packets in the send queue which haven't @@ -2576,6 +2784,12 @@ ifnet_start_thread_fn(void *v, wait_result_t w) ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ? &ifp->if_start_cycle : NULL); + if (ts == NULL && ifp->if_start_delayed == 1) { + delay_start_ts.tv_sec = 0; + delay_start_ts.tv_nsec = ifp->if_start_delay_timeout; + ts = &delay_start_ts; + } + if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0) ts = NULL; } @@ -2802,8 +3016,7 @@ ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model) u_int32_t omodel; errno_t err; - if (ifp == NULL || (model != IFNET_SCHED_MODEL_DRIVER_MANAGED && - model != IFNET_SCHED_MODEL_NORMAL)) + if (ifp == NULL || model >= IFNET_SCHED_MODEL_MAX) return (EINVAL); else if (!(ifp->if_eflags & IFEF_TXSTART)) return (ENXIO); @@ -2922,6 +3135,8 @@ errno_t ifnet_enqueue(struct ifnet *ifp, struct mbuf *m) { int error; + struct timespec now; + u_int64_t now_nsec; if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) || m->m_nextpkt != NULL) { @@ -2938,6 +3153,66 @@ ifnet_enqueue(struct ifnet *ifp, struct mbuf *m) return (ENETDOWN); } + nanouptime(&now); + net_timernsec(&now, &now_nsec); + m->m_pkthdr.pkt_timestamp = now_nsec; + m->m_pkthdr.pkt_flags &= ~PKTF_DRV_TS_VALID; + + if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) { + /* + * If the driver chose to delay start callback for + * coalescing multiple packets, Then use the following + * heuristics to make sure that start callback will + * be delayed only when bulk data transfer is detected. + * 1. number of packets enqueued in (delay_win * 2) is + * greater than or equal to the delay qlen. + * 2. If delay_start is enabled it will stay enabled for + * another 10 idle windows. This is to take into account + * variable RTT and burst traffic. + * 3. If the time elapsed since last enqueue is more + * than 200ms we disable delaying start callback. This is + * is to take idle time into account. + */ + u_int64_t dwin = (ifp->if_start_delay_timeout << 1); + if (ifp->if_start_delay_swin > 0) { + if ((ifp->if_start_delay_swin + dwin) > now_nsec) { + ifp->if_start_delay_cnt++; + } else if ((now_nsec - ifp->if_start_delay_swin) + >= (200 * 1000 * 1000)) { + ifp->if_start_delay_swin = now_nsec; + ifp->if_start_delay_cnt = 1; + ifp->if_start_delay_idle = 0; + if (ifp->if_eflags & IFEF_DELAY_START) { + ifp->if_eflags &= + ~(IFEF_DELAY_START); + ifnet_delay_start_disabled++; + } + } else { + if (ifp->if_start_delay_cnt >= + ifp->if_start_delay_qlen) { + ifp->if_eflags |= IFEF_DELAY_START; + ifp->if_start_delay_idle = 0; + } else { + if (ifp->if_start_delay_idle >= 10) { + ifp->if_eflags &= ~(IFEF_DELAY_START); + ifnet_delay_start_disabled++; + } else { + ifp->if_start_delay_idle++; + } + } + ifp->if_start_delay_swin = now_nsec; + ifp->if_start_delay_cnt = 1; + } + } else { + ifp->if_start_delay_swin = now_nsec; + ifp->if_start_delay_cnt = 1; + ifp->if_start_delay_idle = 0; + ifp->if_eflags &= ~(IFEF_DELAY_START); + } + } else { + ifp->if_eflags &= ~(IFEF_DELAY_START); + } + /* enqueue the packet */ error = ifclassq_enqueue(&ifp->if_snd, m); @@ -2946,7 +3221,8 @@ ifnet_enqueue(struct ifnet *ifp, struct mbuf *m) * for the packet is suspended (EQSUSPENDED), as the driver could still * be dequeueing from other unsuspended queues. */ - if (error == 0 || error == EQFULL || error == EQSUSPENDED) + if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) && + (error == 0 || error == EQFULL || error == EQSUSPENDED)) ifnet_start(ifp); return (error); @@ -2959,11 +3235,12 @@ ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp) if (ifp == NULL || mp == NULL) return (EINVAL); else if (!(ifp->if_eflags & IFEF_TXSTART) || - (ifp->if_output_sched_model != IFNET_SCHED_MODEL_NORMAL)) + ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) return (ENXIO); if (!ifnet_is_attached(ifp, 1)) return (ENXIO); - rc = ifclassq_dequeue(&ifp->if_snd, 1, mp, NULL, NULL, NULL); + rc = ifclassq_dequeue(&ifp->if_snd, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, + mp, NULL, NULL, NULL); ifnet_decr_iorefcnt(ifp); return (rc); @@ -2977,48 +3254,69 @@ ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc, if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc)) return (EINVAL); else if (!(ifp->if_eflags & IFEF_TXSTART) || - (ifp->if_output_sched_model != IFNET_SCHED_MODEL_DRIVER_MANAGED)) + ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) return (ENXIO); if (!ifnet_is_attached(ifp, 1)) return (ENXIO); - + rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, 1, mp, NULL, NULL, NULL); ifnet_decr_iorefcnt(ifp); return (rc); } errno_t -ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t limit, struct mbuf **head, - struct mbuf **tail, u_int32_t *cnt, u_int32_t *len) +ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t pkt_limit, + struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len) +{ + errno_t rc; + if (ifp == NULL || head == NULL || pkt_limit < 1) + return (EINVAL); + else if (!(ifp->if_eflags & IFEF_TXSTART) || + ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) + return (ENXIO); + if (!ifnet_is_attached(ifp, 1)) + return (ENXIO); + + rc = ifclassq_dequeue(&ifp->if_snd, pkt_limit, + CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, head, tail, cnt, len); + ifnet_decr_iorefcnt(ifp); + return (rc); +} + +errno_t +ifnet_dequeue_multi_bytes(struct ifnet *ifp, u_int32_t byte_limit, + struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len) { errno_t rc; - if (ifp == NULL || head == NULL || limit < 1) + if (ifp == NULL || head == NULL || byte_limit < 1) return (EINVAL); else if (!(ifp->if_eflags & IFEF_TXSTART) || - (ifp->if_output_sched_model != IFNET_SCHED_MODEL_NORMAL)) + ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) return (ENXIO); if (!ifnet_is_attached(ifp, 1)) return (ENXIO); - - rc = ifclassq_dequeue(&ifp->if_snd, limit, head, tail, cnt, len); + + rc = ifclassq_dequeue(&ifp->if_snd, CLASSQ_DEQUEUE_MAX_PKT_LIMIT, + byte_limit, head, tail, cnt, len); ifnet_decr_iorefcnt(ifp); return (rc); } errno_t ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc, - u_int32_t limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, + u_int32_t pkt_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len) { errno_t rc; - if (ifp == NULL || head == NULL || limit < 1 || !MBUF_VALID_SC(sc)) + if (ifp == NULL || head == NULL || pkt_limit < 1 || + !MBUF_VALID_SC(sc)) return (EINVAL); else if (!(ifp->if_eflags & IFEF_TXSTART) || - (ifp->if_output_sched_model != IFNET_SCHED_MODEL_DRIVER_MANAGED)) + ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) return (ENXIO); if (!ifnet_is_attached(ifp, 1)) return (ENXIO); - rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, limit, head, + rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, pkt_limit, head, tail, cnt, len); ifnet_decr_iorefcnt(ifp); return (rc); @@ -3150,7 +3448,6 @@ dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m) if (error != 0 && error != EJUSTRETURN) m_freem_list(m); } - return; } static void @@ -3232,6 +3529,8 @@ dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp) (ifp->if_ibytes + ifp->if_obytes) - ifp->if_dt_bytes > ifp->if_data_threshold) { ifp->if_dt_bytes = ifp->if_ibytes + ifp->if_obytes; + + lck_mtx_convert_spin(&inp->input_lck); nstat_ifnet_threshold_reached(ifp->if_index); } /* @@ -3276,7 +3575,7 @@ dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m, mbuf_t * pkt_next = NULL; u_int32_t poll_thresh = 0, poll_ival = 0; - KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START,0,0,0,0,0); + KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START, 0, 0, 0, 0, 0); if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 && (poll_ival = if_rxpoll_interval_pkts) > 0) @@ -3388,7 +3687,7 @@ dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m, goto next; } } - if (error != 0 || ((m->m_flags & M_PROMISC) != 0) ) { + if (error != 0 || ((m->m_flags & M_PROMISC) != 0)) { m_freem(m); goto next; } @@ -3453,7 +3752,7 @@ next: ifnet_decr_iorefcnt(ifp); } - KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END,0,0,0,0,0); + KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END, 0, 0, 0, 0, 0); } errno_t @@ -3473,14 +3772,32 @@ if_mcasts_update(struct ifnet *ifp) return (0); } -static int -dlil_event_internal(struct ifnet *ifp, struct kev_msg *event) +/* If ifp is set, we will increment the generation for the interface */ +int +dlil_post_complete_msg(struct ifnet *ifp, struct kev_msg *event) { - struct ifnet_filter *filter; + if (ifp != NULL) { + ifnet_increment_generation(ifp); + } - /* Get an io ref count if the interface is attached */ - if (!ifnet_is_attached(ifp, 1)) - goto done; +#if NECP + necp_update_all_clients(); +#endif /* NECP */ + + return (kev_post_msg(event)); +} + +#define TMP_IF_PROTO_ARR_SIZE 10 +static int +dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_generation) +{ + struct ifnet_filter *filter = NULL; + struct if_proto *proto = NULL; + int if_proto_count = 0; + struct if_proto **tmp_ifproto_arr = NULL; + struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL}; + int tmp_ifproto_arr_idx = 0; + bool tmp_malloc = false; /* * Pass the event to the interface filters @@ -3502,65 +3819,95 @@ dlil_event_internal(struct ifnet *ifp, struct kev_msg *event) if_flt_monitor_unbusy(ifp); lck_mtx_unlock(&ifp->if_flt_lock); + /* Get an io ref count if the interface is attached */ + if (!ifnet_is_attached(ifp, 1)) + goto done; + + /* + * An embedded tmp_list_entry in if_proto may still get + * over-written by another thread after giving up ifnet lock, + * therefore we are avoiding embedded pointers here. + */ ifnet_lock_shared(ifp); - if (ifp->if_proto_hash != NULL) { + if_proto_count = dlil_ifp_proto_count(ifp); + if (if_proto_count) { int i; + VERIFY(ifp->if_proto_hash != NULL); + if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) { + tmp_ifproto_arr = tmp_ifproto_stack_arr; + } else { + MALLOC(tmp_ifproto_arr, struct if_proto **, + sizeof (*tmp_ifproto_arr) * if_proto_count, + M_TEMP, M_ZERO); + if (tmp_ifproto_arr == NULL) { + ifnet_lock_done(ifp); + goto cleanup; + } + tmp_malloc = true; + } for (i = 0; i < PROTO_HASH_SLOTS; i++) { - struct if_proto *proto; - SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) { - proto_media_event eventp = - (proto->proto_kpi == kProtoKPI_v1 ? - proto->kpi.v1.event : - proto->kpi.v2.event); - - if (eventp != NULL) { - if_proto_ref(proto); - ifnet_lock_done(ifp); - - eventp(ifp, proto->protocol_family, - event); - - ifnet_lock_shared(ifp); - if_proto_free(proto); - } + if_proto_ref(proto); + tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto; + tmp_ifproto_arr_idx++; } } + VERIFY(if_proto_count == tmp_ifproto_arr_idx); } ifnet_lock_done(ifp); + for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count; + tmp_ifproto_arr_idx++) { + proto = tmp_ifproto_arr[tmp_ifproto_arr_idx]; + VERIFY(proto != NULL); + proto_media_event eventp = + (proto->proto_kpi == kProtoKPI_v1 ? + proto->kpi.v1.event : + proto->kpi.v2.event); + + if (eventp != NULL) { + eventp(ifp, proto->protocol_family, + event); + } + if_proto_free(proto); + } + +cleanup: + if (tmp_malloc) { + FREE(tmp_ifproto_arr, M_TEMP); + } + /* Pass the event to the interface */ if (ifp->if_event != NULL) ifp->if_event(ifp, event); /* Release the io ref count */ ifnet_decr_iorefcnt(ifp); - done: - return (kev_post_msg(event)); + return (dlil_post_complete_msg(update_generation ? ifp : NULL, event)); } errno_t ifnet_event(ifnet_t ifp, struct kern_event_msg *event) { - struct kev_msg kev_msg; + struct kev_msg kev_msg; int result = 0; if (ifp == NULL || event == NULL) return (EINVAL); bzero(&kev_msg, sizeof (kev_msg)); - kev_msg.vendor_code = event->vendor_code; - kev_msg.kev_class = event->kev_class; - kev_msg.kev_subclass = event->kev_subclass; - kev_msg.event_code = event->event_code; + kev_msg.vendor_code = event->vendor_code; + kev_msg.kev_class = event->kev_class; + kev_msg.kev_subclass = event->kev_subclass; + kev_msg.event_code = event->event_code; kev_msg.dv[0].data_ptr = &event->event_data[0]; kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE; kev_msg.dv[1].data_length = 0; - result = dlil_event_internal(ifp, &kev_msg); + result = dlil_event_internal(ifp, &kev_msg, TRUE); return (result); } @@ -3682,6 +4029,38 @@ ifp_inc_traffic_class_out(struct ifnet *ifp, struct mbuf *m) } } +static void +dlil_count_chain_len(mbuf_t m, struct chain_len_stats *cls) +{ + mbuf_t n = m; + int chainlen = 0; + + while (n != NULL) { + chainlen++; + n = n->m_next; + } + switch (chainlen) { + case 0: + break; + case 1: + atomic_add_64(&cls->cls_one, 1); + break; + case 2: + atomic_add_64(&cls->cls_two, 1); + break; + case 3: + atomic_add_64(&cls->cls_three, 1); + break; + case 4: + atomic_add_64(&cls->cls_four, 1); + break; + case 5: + default: + atomic_add_64(&cls->cls_five_or_more, 1); + break; + } +} + /* * dlil_output * @@ -3703,6 +4082,7 @@ errno_t dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist, void *route, const struct sockaddr *dest, int raw, struct flowadv *adv) { + ifnet_output_handler_func handler_func; char *frame_type = NULL; char *dst_linkaddr = NULL; int retval = 0; @@ -3719,14 +4099,19 @@ dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist, KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0); - /* Get an io refcnt if the interface is attached to prevent ifnet_detach - * from happening while this operation is in progress */ + /* + * Get an io refcnt if the interface is attached to prevent ifnet_detach + * from happening while this operation is in progress + */ if (!ifnet_is_attached(ifp, 1)) { retval = ENXIO; goto cleanup; } iorefcnt = 1; + handler_func = ifp->if_output_handler; + VERIFY(handler_func != NULL); + /* update the driver's multicast filter, if needed */ if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) ifp->if_updatemcasts = 0; @@ -3783,16 +4168,16 @@ preout_again: do { #if CONFIG_DTRACE if (!raw && proto_family == PF_INET) { - struct ip *ip = mtod(m, struct ip*); - DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL, + struct ip *ip = mtod(m, struct ip *); + DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL, struct ip *, ip, struct ifnet *, ifp, struct ip *, ip, struct ip6_hdr *, NULL); } else if (!raw && proto_family == PF_INET6) { - struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr*); - DTRACE_IP6(send, struct mbuf*, m, struct inpcb *, NULL, - struct ip6_hdr *, ip6, struct ifnet*, ifp, - struct ip*, NULL, struct ip6_hdr *, ip6); + struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL, + struct ip6_hdr *, ip6, struct ifnet *, ifp, + struct ip *, NULL, struct ip6_hdr *, ip6); } #endif /* CONFIG_DTRACE */ @@ -3896,18 +4281,29 @@ preout_again: * update the timestamp to indicate recent activity * on a foreground socket. */ - if (!(m->m_pkthdr.pkt_flags & PKTF_SO_BACKGROUND) && - (m->m_pkthdr.pkt_flags & PKTF_FLOW_ID) && - m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) - ifp->if_fg_sendts = net_uptime(); + if ((m->m_pkthdr.pkt_flags & PKTF_FLOW_ID) && + m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) { + if (!(m->m_pkthdr.pkt_flags & PKTF_SO_BACKGROUND)) + ifp->if_fg_sendts = net_uptime(); + + if (m->m_pkthdr.pkt_flags & PKTF_SO_REALTIME) + ifp->if_rt_sendts = net_uptime(); + } ifp_inc_traffic_class_out(ifp, m); pktap_output(ifp, proto_family, m, pre, post); + /* + * Count the number of elements in the mbuf chain + */ + if (tx_chain_len_count) { + dlil_count_chain_len(m, &tx_chain_len_stats); + } + /* * Finally, call the driver. */ - if (ifp->if_eflags & IFEF_SENDLIST) { + if (ifp->if_eflags & (IFEF_SENDLIST | IFEF_ENQUEUE_MULTI)) { if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) { flen += (m_pktlen(m) - (pre + post)); m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED; @@ -3923,7 +4319,7 @@ preout_again: } KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START, 0, 0, 0, 0, 0); - retval = (*ifp->if_output)(ifp, m); + retval = (*handler_func)(ifp, m); if (retval == EQFULL || retval == EQSUSPENDED) { if (adv != NULL && adv->code == FADV_SUCCESS) { adv->code = (retval == EQFULL ? @@ -3955,24 +4351,58 @@ next: } while (m != NULL); if (send_head != NULL) { - VERIFY(ifp->if_eflags & IFEF_SENDLIST); KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START, 0, 0, 0, 0, 0); - retval = (*ifp->if_output)(ifp, send_head); - if (retval == EQFULL || retval == EQSUSPENDED) { - if (adv != NULL) { - adv->code = (retval == EQFULL ? - FADV_FLOW_CONTROLLED : FADV_SUSPENDED); + if (ifp->if_eflags & IFEF_SENDLIST) { + retval = (*handler_func)(ifp, send_head); + if (retval == EQFULL || retval == EQSUSPENDED) { + if (adv != NULL) { + adv->code = (retval == EQFULL ? + FADV_FLOW_CONTROLLED : + FADV_SUSPENDED); + } + retval = 0; + } + if (retval == 0 && flen > 0) { + fbytes += flen; + fpkts++; + } + if (retval != 0 && dlil_verbose) { + printf("%s: output error on %s retval = %d\n", + __func__, if_name(ifp), retval); + } + } else { + struct mbuf *send_m; + int enq_cnt = 0; + VERIFY(ifp->if_eflags & IFEF_ENQUEUE_MULTI); + while (send_head != NULL) { + send_m = send_head; + send_head = send_m->m_nextpkt; + send_m->m_nextpkt = NULL; + retval = (*handler_func)(ifp, send_m); + if (retval == EQFULL || retval == EQSUSPENDED) { + if (adv != NULL) { + adv->code = (retval == EQFULL ? + FADV_FLOW_CONTROLLED : + FADV_SUSPENDED); + } + retval = 0; + } + if (retval == 0) { + enq_cnt++; + if (flen > 0) + fpkts++; + } + if (retval != 0 && dlil_verbose) { + printf("%s: output error on %s " + "retval = %d\n", + __func__, if_name(ifp), retval); + } + } + if (enq_cnt > 0) { + fbytes += flen; + ifnet_start(ifp); } - retval = 0; - } - if (retval == 0 && flen > 0) { - fbytes += flen; - fpkts++; - } - if (retval != 0 && dlil_verbose) { - printf("%s: output error on %s retval = %d\n", - __func__, if_name(ifp), retval); } KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0); } @@ -4011,7 +4441,8 @@ ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code, if (!ifnet_is_attached(ifp, 1)) return (EOPNOTSUPP); - /* Run the interface filters first. + /* + * Run the interface filters first. * We want to run all filters before calling the protocol, * interface family, or interface. */ @@ -4113,7 +4544,7 @@ dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback) if (ifp->if_set_bpf_tap) { /* Get an io reference on the interface if it is attached */ if (!ifnet_is_attached(ifp, 1)) - return ENXIO; + return (ENXIO); error = ifp->if_set_bpf_tap(ifp, mode, callback); ifnet_decr_iorefcnt(ifp); } @@ -4130,7 +4561,7 @@ dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr, proto_media_resolve_multi resolvep; if (!ifnet_is_attached(ifp, 1)) - return result; + return (result); bzero(ll_addr, ll_len); @@ -4143,7 +4574,7 @@ dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr, proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi); if (resolvep != NULL) result = resolvep(ifp, proto_addr, - (struct sockaddr_dl*)(void *)ll_addr, ll_len); + (struct sockaddr_dl *)(void *)ll_addr, ll_len); if_proto_free(proto); } @@ -4162,8 +4593,8 @@ dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr, __private_extern__ errno_t dlil_send_arp_internal(ifnet_t ifp, u_short arpop, - const struct sockaddr_dl* sender_hw, const struct sockaddr* sender_proto, - const struct sockaddr_dl* target_hw, const struct sockaddr* target_proto) + const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto, + const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto) { struct if_proto *proto; errno_t result = 0; @@ -4204,7 +4635,7 @@ struct net_thread_marks { }; static const struct net_thread_marks net_thread_marks_base = { }; __private_extern__ const net_thread_marks_t net_thread_marks_none = - &net_thread_marks_base; + &net_thread_marks_base; __private_extern__ net_thread_marks_t net_thread_marks_push(u_int32_t push) @@ -4244,7 +4675,7 @@ __private_extern__ void net_thread_marks_pop(net_thread_marks_t popx) { static const char *const base = (const void*)&net_thread_marks_base; - ptrdiff_t pop = (caddr_t)popx - (caddr_t)base; + const ptrdiff_t pop = (const char *)popx - (const char *)base; if (pop != 0) { static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U; @@ -4260,7 +4691,7 @@ __private_extern__ void net_thread_unmarks_pop(net_thread_marks_t unpopx) { static const char *const base = (const void*)&net_thread_marks_base; - ptrdiff_t unpop = (caddr_t)unpopx - (caddr_t)base; + ptrdiff_t unpop = (const char *)unpopx - (const char *)base; if (unpop != 0) { static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U; @@ -4305,9 +4736,9 @@ _is_announcement(const struct sockaddr_in * sender_sin, } __private_extern__ errno_t -dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl* sender_hw, - const struct sockaddr* sender_proto, const struct sockaddr_dl* target_hw, - const struct sockaddr* target_proto0, u_int32_t rtflags) +dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw, + const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw, + const struct sockaddr *target_proto0, u_int32_t rtflags) { errno_t result = 0; const struct sockaddr_in * sender_sin; @@ -4429,10 +4860,11 @@ ifnet_lookup(struct ifnet *ifp) } return (_ifp != NULL); } + /* * Caller has to pass a non-zero refio argument to get a * IO reference count. This will prevent ifnet_detach from - * being called when there are outstanding io reference counts. + * being called when there are outstanding io reference counts. */ int ifnet_is_attached(struct ifnet *ifp, int refio) @@ -4450,6 +4882,22 @@ ifnet_is_attached(struct ifnet *ifp, int refio) return (ret); } +/* + * Caller must ensure the interface is attached; the assumption is that + * there is at least an outstanding IO reference count held already. + * Most callers would call ifnet_is_attached() instead. + */ +void +ifnet_incr_iorefcnt(struct ifnet *ifp) +{ + lck_mtx_lock_spin(&ifp->if_ref_lock); + VERIFY((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) == + IFRF_ATTACHED); + VERIFY(ifp->if_refio > 0); + ifp->if_refio++; + lck_mtx_unlock(&ifp->if_ref_lock); +} + void ifnet_decr_iorefcnt(struct ifnet *ifp) { @@ -4458,10 +4906,11 @@ ifnet_decr_iorefcnt(struct ifnet *ifp) VERIFY((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) != 0); ifp->if_refio--; - /* if there are no more outstanding io references, wakeup the + /* + * if there are no more outstanding io references, wakeup the * ifnet_detach thread if detaching flag is set. */ - if (ifp->if_refio == 0 && + if (ifp->if_refio == 0 && (ifp->if_refflags & IFRF_DETACHING) != 0) { wakeup(&(ifp->if_refio)); } @@ -4646,7 +5095,7 @@ end: if_name(ifp), protocol, retval); } ifnet_head_done(); - if (retval != 0 && ifproto != NULL) + if (retval != 0 && ifproto != NULL) zfree(dlif_proto_zone, ifproto); return (retval); } @@ -4736,7 +5185,7 @@ ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family) if (proto->proto_kpi == kProtoKPI_v1) { proto->kpi.v1.input = ifproto_media_input_v1; - proto->kpi.v1.pre_output= ifproto_media_preout; + proto->kpi.v1.pre_output = ifproto_media_preout; proto->kpi.v1.event = ifproto_media_event; proto->kpi.v1.ioctl = ifproto_media_ioctl; proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi; @@ -4833,6 +5282,7 @@ ifproto_media_send_arp(struct ifnet *ifp, u_short arpop, } extern int if_next_index(void); +extern int tcp_ecn_outbound; errno_t ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) @@ -4921,9 +5371,6 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) VERIFY(ifp->if_flt_waiters == 0); lck_mtx_unlock(&ifp->if_flt_lock); - VERIFY(TAILQ_EMPTY(&ifp->if_prefixhead)); - TAILQ_INIT(&ifp->if_prefixhead); - if (!(dl_if->dl_if_flags & DLIF_REUSE)) { VERIFY(LIST_EMPTY(&ifp->if_multiaddrs)); LIST_INIT(&ifp->if_multiaddrs); @@ -4995,7 +5442,8 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) ifnet_touch_lastchange(ifp); VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL || - ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED); + ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED || + ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL); /* By default, use SFB and enable flow advisory */ sflags = PKTSCHEDF_QALG_SFB; @@ -5137,6 +5585,54 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) VERIFY(ifp->if_delegated.subfamily == 0); VERIFY(ifp->if_delegated.expensive == 0); + VERIFY(ifp->if_agentids == NULL); + VERIFY(ifp->if_agentcount == 0); + + /* Reset interface state */ + bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state)); + ifp->if_interface_state.valid_bitmask |= + IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID; + ifp->if_interface_state.interface_availability = + IF_INTERFACE_STATE_INTERFACE_AVAILABLE; + + /* Initialize Link Quality Metric (loopback [lo0] is always good) */ + if (ifp == lo_ifp) { + ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD; + ifp->if_interface_state.valid_bitmask |= + IF_INTERFACE_STATE_LQM_STATE_VALID; + } else { + ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN; + } + + /* + * Enable ECN capability on this interface depending on the + * value of ECN global setting + */ + if (tcp_ecn_outbound == 2 && !IFNET_IS_CELLULAR(ifp)) { + ifp->if_eflags |= IFEF_ECN_ENABLE; + ifp->if_eflags &= ~IFEF_ECN_DISABLE; + } + + /* + * Built-in Cyclops always on policy for WiFi infra + */ + if (IFNET_IS_WIFI_INFRA(ifp) && net_qos_policy_wifi_enabled != 0) { + errno_t error; + + error = if_set_qosmarking_mode(ifp, + IFRTYPE_QOSMARKING_FASTLANE); + if (error != 0) { + printf("%s if_set_qosmarking_mode(%s) error %d\n", + __func__, ifp->if_xname, error); + } else { + ifp->if_eflags |= IFEF_QOSMARKING_ENABLED; +#if (DEVELOPMENT || DEBUG) + printf("%s fastlane enabled on %s\n", + __func__, ifp->if_xname); +#endif /* (DEVELOPMENT || DEBUG) */ + } + } + ifnet_lock_done(ifp); ifnet_head_done(); @@ -5189,9 +5685,6 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) */ lck_mtx_lock(rnh_lock); ifnet_lock_exclusive(ifp); - /* Initialize Link Quality Metric (loopback [lo0] is always good) */ - ifp->if_lqm = (ifp == lo_ifp) ? IFNET_LQM_THRESH_GOOD : - IFNET_LQM_THRESH_UNKNOWN; lck_mtx_lock_spin(&ifp->if_ref_lock); ifp->if_refflags = IFRF_ATTACHED; lck_mtx_unlock(&ifp->if_ref_lock); @@ -5250,9 +5743,10 @@ dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr) namelen = snprintf(workbuf, sizeof (workbuf), "%s", if_name(ifp)); - masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen; + masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + + ((namelen > 0) ? namelen : 0); socksize = masklen + ifp->if_addrlen; -#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1))) +#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1))) if ((u_int32_t)socksize < sizeof (struct sockaddr_dl)) socksize = sizeof(struct sockaddr_dl); socksize = ROUNDUP(socksize); @@ -5315,8 +5809,13 @@ dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr) ifa->ifa_addr = (struct sockaddr *)asdl; asdl->sdl_len = socksize; asdl->sdl_family = AF_LINK; - bcopy(workbuf, asdl->sdl_data, namelen); - asdl->sdl_nlen = namelen; + if (namelen > 0) { + bcopy(workbuf, asdl->sdl_data, min(namelen, + sizeof (asdl->sdl_data))); + asdl->sdl_nlen = namelen; + } else { + asdl->sdl_nlen = 0; + } asdl->sdl_index = ifp->if_index; asdl->sdl_type = ifp->if_type; if (ll_addr != NULL) { @@ -5325,9 +5824,9 @@ dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr) } else { asdl->sdl_alen = 0; } - ifa->ifa_netmask = (struct sockaddr*)msdl; + ifa->ifa_netmask = (struct sockaddr *)msdl; msdl->sdl_len = masklen; - while (namelen != 0) + while (namelen > 0) msdl->sdl_data[--namelen] = 0xff; IFA_UNLOCK(ifa); @@ -5352,10 +5851,15 @@ errno_t ifnet_detach(ifnet_t ifp) { struct ifnet *delegated_ifp; + struct nd_ifinfo *ndi = NULL; if (ifp == NULL) return (EINVAL); + ndi = ND_IFINFO(ifp); + if (NULL != ndi) + ndi->cga_initialized = FALSE; + lck_mtx_lock(rnh_lock); ifnet_head_lock_exclusive(); ifnet_lock_exclusive(ifp); @@ -5369,7 +5873,7 @@ ifnet_detach(ifnet_t ifp) (void) ifnet_set_idle_flags_locked(ifp, 0, ~0); lck_mtx_lock_spin(&ifp->if_ref_lock); - if (!(ifp->if_refflags & IFRF_ATTACHED)) { + if (!(ifp->if_refflags & IFRF_ATTACHED)) { lck_mtx_unlock(&ifp->if_ref_lock); ifnet_lock_done(ifp); ifnet_head_done(); @@ -5391,6 +5895,10 @@ ifnet_detach(ifnet_t ifp) if (dlil_verbose) printf("%s: detaching\n", if_name(ifp)); + /* Reset ECN enable/disable flags */ + ifp->if_eflags &= ~IFEF_ECN_DISABLE; + ifp->if_eflags &= ~IFEF_ECN_ENABLE; + /* * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will * no longer be visible during lookups from this point. @@ -5399,8 +5907,15 @@ ifnet_detach(ifnet_t ifp) TAILQ_REMOVE(&ifnet_head, ifp, if_link); ifp->if_link.tqe_next = NULL; ifp->if_link.tqe_prev = NULL; + if (ifp->if_ordered_link.tqe_next != NULL || + ifp->if_ordered_link.tqe_prev != NULL) { + ifnet_remove_from_ordered_list(ifp); + } ifindex2ifnet[ifp->if_index] = NULL; + /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */ + ifp->if_eflags &= ~(IFEF_IPV4_ROUTER | IFEF_IPV6_ROUTER); + /* Record detach PC stacktrace */ ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach); @@ -5411,6 +5926,9 @@ ifnet_detach(ifnet_t ifp) delegated_ifp = ifp->if_delegated.ifp; bzero(&ifp->if_delegated, sizeof (ifp->if_delegated)); + /* Reset interface state */ + bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state)); + ifnet_lock_done(ifp); ifnet_head_done(); lck_mtx_unlock(rnh_lock); @@ -5421,7 +5939,7 @@ ifnet_detach(ifnet_t ifp) /* Reset Link Quality Metric (unless loopback [lo0]) */ if (ifp != lo_ifp) - if_lqm_update(ifp, IFNET_LQM_THRESH_OFF); + if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0); /* Reset TCP local statistics */ if (ifp->if_tcp_stat != NULL) @@ -5431,6 +5949,28 @@ ifnet_detach(ifnet_t ifp) if (ifp->if_udp_stat != NULL) bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat)); + /* Reset ifnet IPv4 stats */ + if (ifp->if_ipv4_stat != NULL) + bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat)); + + /* Reset ifnet IPv6 stats */ + if (ifp->if_ipv6_stat != NULL) + bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat)); + + /* Release memory held for interface link status report */ + if (ifp->if_link_status != NULL) { + FREE(ifp->if_link_status, M_TEMP); + ifp->if_link_status = NULL; + } + + /* Clear agent IDs */ + if (ifp->if_agentids != NULL) { + FREE(ifp->if_agentids, M_NETAGENT); + ifp->if_agentids = NULL; + } + ifp->if_agentcount = 0; + + /* Let BPF know we're detaching */ bpfdetach(ifp); @@ -5520,8 +6060,6 @@ ifnet_detacher_thread_cont(int err) dlil_if_lock(); } } - /* NOTREACHED */ - return (0); } static void @@ -5629,9 +6167,8 @@ ifnet_detach_final(struct ifnet *ifp) VERIFY(ifp->if_link.tqe_prev == NULL); VERIFY(ifp->if_detaching_link.tqe_next == NULL); VERIFY(ifp->if_detaching_link.tqe_prev == NULL); - - /* Prefix list should be empty by now */ - VERIFY(TAILQ_EMPTY(&ifp->if_prefixhead)); + VERIFY(ifp->if_ordered_link.tqe_next == NULL); + VERIFY(ifp->if_ordered_link.tqe_prev == NULL); /* The slot should have been emptied */ VERIFY(ifindex2ifnet[ifp->if_index] == NULL); @@ -5715,10 +6252,12 @@ ifnet_detach_final(struct ifnet *ifp) /* The driver might unload, so point these to ourselves */ if_free = ifp->if_free; + ifp->if_output_handler = ifp_if_output; ifp->if_output = ifp_if_output; ifp->if_pre_enqueue = ifp_if_output; ifp->if_start = ifp_if_start; ifp->if_output_ctl = ifp_if_ctl; + ifp->if_input_handler = ifp_if_input; ifp->if_input_poll = ifp_if_input_poll; ifp->if_input_ctl = ifp_if_ctl; ifp->if_ioctl = ifp_if_ioctl; @@ -5745,6 +6284,10 @@ ifnet_detach_final(struct ifnet *ifp) VERIFY(ifp->if_delegated.subfamily == 0); VERIFY(ifp->if_delegated.expensive == 0); + /* QoS marking get cleared */ + ifp->if_eflags &= ~IFEF_QOSMARKING_ENABLED; + if_set_qosmarking_mode(ifp, IFRTYPE_QOSMARKING_MODE_NONE); + ifnet_lock_done(ifp); #if PF @@ -5781,9 +6324,6 @@ ifnet_detach_final(struct ifnet *ifp) dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0); - if (if_free != NULL) - if_free(ifp); - /* * Finally, mark this ifnet as detached. */ @@ -5795,6 +6335,8 @@ ifnet_detach_final(struct ifnet *ifp) } ifp->if_refflags &= ~IFRF_DETACHING; lck_mtx_unlock(&ifp->if_ref_lock); + if (if_free != NULL) + if_free(ifp); if (dlil_verbose) printf("%s: detached\n", if_name(ifp)); @@ -5807,7 +6349,7 @@ static errno_t ifp_if_output(struct ifnet *ifp, struct mbuf *m) { #pragma unused(ifp) - m_freem(m); + m_freem_list(m); return (0); } @@ -5817,6 +6359,16 @@ ifp_if_start(struct ifnet *ifp) ifnet_purge(ifp); } +static errno_t +ifp_if_input(struct ifnet *ifp, struct mbuf *m_head, + struct mbuf *m_tail, const struct ifnet_stat_increment_param *s, + boolean_t poll, struct thread *tp) +{ +#pragma unused(ifp, m_tail, s, poll, tp) + m_freem_list(m_head); + return (ENXIO); +} + static void ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt, struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len) @@ -5940,7 +6492,7 @@ int dlil_if_acquire(u_int32_t family, const void *uniqueid, lck_mtx_lock(&dlifp1->dl_if_lock); /* same uniqueid and same len or no unique id specified */ if ((uniqueid_len == dlifp1->dl_if_uniqueid_len) && - !bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len)) { + bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len) == 0) { /* check for matching interface in use */ if (dlifp1->dl_if_flags & DLIF_INUSE) { if (uniqueid_len) { @@ -6023,10 +6575,19 @@ int dlil_if_acquire(u_int32_t family, const void *uniqueid, lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group, ifnet_lock_attr); lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr); +#if INET + lck_rw_init(&ifp1->if_inetdata_lock, ifnet_lock_group, + ifnet_lock_attr); + ifp1->if_inetdata = NULL; +#endif #if INET6 - lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group, ifnet_lock_attr); + lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group, + ifnet_lock_attr); ifp1->if_inet6data = NULL; #endif + lck_rw_init(&ifp1->if_link_status_lock, ifnet_lock_group, + ifnet_lock_attr); + ifp1->if_link_status = NULL; /* for send data paths */ lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group, @@ -6065,16 +6626,16 @@ dlil_if_release(ifnet_t ifp) ifp->if_name = dlifp->dl_if_namestorage; /* Reset external name (name + unit) */ ifp->if_xname = dlifp->dl_if_xnamestorage; - snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ, + snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ, "%s?", ifp->if_name); lck_mtx_unlock(&dlifp->dl_if_lock); #if CONFIG_MACF_NET /* - * We can either recycle the MAC label here or in dlil_if_acquire(). - * It seems logical to do it here but this means that anything that - * still has a handle on ifp will now see it as unlabeled. - * Since the interface is "dead" that may be OK. Revisit later. - */ + * We can either recycle the MAC label here or in dlil_if_acquire(). + * It seems logical to do it here but this means that anything that + * still has a handle on ifp will now see it as unlabeled. + * Since the interface is "dead" that may be OK. Revisit later. + */ mac_ifnet_label_recycle(ifp); #endif ifnet_lock_done(ifp); @@ -6207,7 +6768,7 @@ ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip) } #if INET6 -struct rtentry* +struct rtentry * ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6) { struct route_in6 src_rt; @@ -6246,26 +6807,43 @@ ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6) #endif /* INET6 */ void -if_lqm_update(struct ifnet *ifp, int lqm) +if_lqm_update(struct ifnet *ifp, int lqm, int locked) { struct kev_dl_link_quality_metric_data ev_lqm_data; VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX); /* Normalize to edge */ - if (lqm > IFNET_LQM_THRESH_UNKNOWN && lqm <= IFNET_LQM_THRESH_BAD) + if (lqm >= 0 && lqm <= IFNET_LQM_THRESH_BAD) lqm = IFNET_LQM_THRESH_BAD; else if (lqm > IFNET_LQM_THRESH_BAD && lqm <= IFNET_LQM_THRESH_POOR) lqm = IFNET_LQM_THRESH_POOR; else if (lqm > IFNET_LQM_THRESH_POOR && lqm <= IFNET_LQM_THRESH_GOOD) lqm = IFNET_LQM_THRESH_GOOD; - ifnet_lock_exclusive(ifp); - if (lqm == ifp->if_lqm) { - ifnet_lock_done(ifp); + /* + * Take the lock if needed + */ + if (!locked) + ifnet_lock_exclusive(ifp); + + if (lqm == ifp->if_interface_state.lqm_state && + (ifp->if_interface_state.valid_bitmask & + IF_INTERFACE_STATE_LQM_STATE_VALID)) { + /* + * Release the lock if was not held by the caller + */ + if (!locked) + ifnet_lock_done(ifp); return; /* nothing to update */ } - ifp->if_lqm = lqm; + ifp->if_interface_state.valid_bitmask |= + IF_INTERFACE_STATE_LQM_STATE_VALID; + ifp->if_interface_state.lqm_state = lqm; + + /* + * Don't want to hold the lock when issuing kernel events + */ ifnet_lock_done(ifp); bzero(&ev_lqm_data, sizeof (ev_lqm_data)); @@ -6273,6 +6851,157 @@ if_lqm_update(struct ifnet *ifp, int lqm) dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED, (struct net_event_data *)&ev_lqm_data, sizeof (ev_lqm_data)); + + /* + * Reacquire the lock for the caller + */ + if (locked) + ifnet_lock_exclusive(ifp); +} + +static void +if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state) +{ + struct kev_dl_rrc_state kev; + + if (rrc_state == ifp->if_interface_state.rrc_state && + (ifp->if_interface_state.valid_bitmask & + IF_INTERFACE_STATE_RRC_STATE_VALID)) + return; + + ifp->if_interface_state.valid_bitmask |= + IF_INTERFACE_STATE_RRC_STATE_VALID; + + ifp->if_interface_state.rrc_state = rrc_state; + + /* + * Don't want to hold the lock when issuing kernel events + */ + ifnet_lock_done(ifp); + + bzero(&kev, sizeof(struct kev_dl_rrc_state)); + kev.rrc_state = rrc_state; + + dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED, + (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state)); + + ifnet_lock_exclusive(ifp); +} + +errno_t +if_state_update(struct ifnet *ifp, + struct if_interface_state *if_interface_state) +{ + u_short if_index_available = 0; + + ifnet_lock_exclusive(ifp); + + if ((ifp->if_type != IFT_CELLULAR) && + (if_interface_state->valid_bitmask & + IF_INTERFACE_STATE_RRC_STATE_VALID)) { + ifnet_lock_done(ifp); + return (ENOTSUP); + } + if ((if_interface_state->valid_bitmask & + IF_INTERFACE_STATE_LQM_STATE_VALID) && + (if_interface_state->lqm_state < IFNET_LQM_MIN || + if_interface_state->lqm_state > IFNET_LQM_MAX)) { + ifnet_lock_done(ifp); + return (EINVAL); + } + if ((if_interface_state->valid_bitmask & + IF_INTERFACE_STATE_RRC_STATE_VALID) && + if_interface_state->rrc_state != + IF_INTERFACE_STATE_RRC_STATE_IDLE && + if_interface_state->rrc_state != + IF_INTERFACE_STATE_RRC_STATE_CONNECTED) { + ifnet_lock_done(ifp); + return (EINVAL); + } + + if (if_interface_state->valid_bitmask & + IF_INTERFACE_STATE_LQM_STATE_VALID) { + if_lqm_update(ifp, if_interface_state->lqm_state, 1); + } + if (if_interface_state->valid_bitmask & + IF_INTERFACE_STATE_RRC_STATE_VALID) { + if_rrc_state_update(ifp, if_interface_state->rrc_state); + } + if (if_interface_state->valid_bitmask & + IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) { + ifp->if_interface_state.valid_bitmask |= + IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID; + ifp->if_interface_state.interface_availability = + if_interface_state->interface_availability; + + if (ifp->if_interface_state.interface_availability == + IF_INTERFACE_STATE_INTERFACE_AVAILABLE) { + if_index_available = ifp->if_index; + } + } + ifnet_lock_done(ifp); + + /* + * Check if the TCP connections going on this interface should be + * forced to send probe packets instead of waiting for TCP timers + * to fire. This will be done when there is an explicit + * notification that the interface became available. + */ + if (if_index_available > 0) + tcp_interface_send_probe(if_index_available); + + return (0); +} + +void +if_get_state(struct ifnet *ifp, + struct if_interface_state *if_interface_state) +{ + ifnet_lock_shared(ifp); + + if_interface_state->valid_bitmask = 0; + + if (ifp->if_interface_state.valid_bitmask & + IF_INTERFACE_STATE_RRC_STATE_VALID) { + if_interface_state->valid_bitmask |= + IF_INTERFACE_STATE_RRC_STATE_VALID; + if_interface_state->rrc_state = + ifp->if_interface_state.rrc_state; + } + if (ifp->if_interface_state.valid_bitmask & + IF_INTERFACE_STATE_LQM_STATE_VALID) { + if_interface_state->valid_bitmask |= + IF_INTERFACE_STATE_LQM_STATE_VALID; + if_interface_state->lqm_state = + ifp->if_interface_state.lqm_state; + } + if (ifp->if_interface_state.valid_bitmask & + IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) { + if_interface_state->valid_bitmask |= + IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID; + if_interface_state->interface_availability = + ifp->if_interface_state.interface_availability; + } + + ifnet_lock_done(ifp); +} + +errno_t +if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe) +{ + ifnet_lock_exclusive(ifp); + if (conn_probe > 1) { + ifnet_lock_done(ifp); + return (EINVAL); + } + if (conn_probe == 0) + ifp->if_eflags &= ~IFEF_PROBE_CONNECTIVITY; + else + ifp->if_eflags |= IFEF_PROBE_CONNECTIVITY; + ifnet_lock_done(ifp); + + tcp_probe_connectivity(ifp, conn_probe); + return (0); } /* for uuid.c */ @@ -6571,7 +7300,7 @@ dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN], _CASSERT(sizeof (kev.modid) == DLIL_MODIDLEN); _CASSERT(sizeof (kev.info) == DLIL_MODARGLEN); - bzero(&kev, sizeof (&kev)); + bzero(&kev, sizeof (kev)); microtime(&tv); kev.timestamp = tv.tv_sec; @@ -6625,7 +7354,7 @@ ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr, uint32_t flags = 0; flags |= (cmd == SIOCSIFOPPORTUNISTIC) ? INPCB_OPPORTUNISTIC_SETCMD : 0; - flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ? + flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ? INPCB_OPPORTUNISTIC_THROTTLEON : 0; ifr->ifr_opportunistic.ifo_inuse = udp_count_opportunistic(ifp->if_index, flags) + @@ -6779,7 +7508,7 @@ ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags, * silently ignore facilities other than ours. */ flags &= IFNET_LOGF_DLIL; - if (flags == 0 && (!ifp->if_log.flags & IFNET_LOGF_DLIL)) + if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL))) level = 0; } @@ -7041,6 +7770,125 @@ try_again: return (flowhash); } +int +ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len, + uint16_t flags, uint8_t *data) +{ +#pragma unused(flags) + int error = 0; + + switch (family) { + case AF_INET: + if_inetdata_lock_exclusive(ifp); + if (IN_IFEXTRA(ifp) != NULL) { + if (len == 0) { + /* Allow clearing the signature */ + IN_IFEXTRA(ifp)->netsig_len = 0; + bzero(IN_IFEXTRA(ifp)->netsig, + sizeof (IN_IFEXTRA(ifp)->netsig)); + if_inetdata_lock_done(ifp); + break; + } else if (len > sizeof (IN_IFEXTRA(ifp)->netsig)) { + error = EINVAL; + if_inetdata_lock_done(ifp); + break; + } + IN_IFEXTRA(ifp)->netsig_len = len; + bcopy(data, IN_IFEXTRA(ifp)->netsig, len); + } else { + error = ENOMEM; + } + if_inetdata_lock_done(ifp); + break; + + case AF_INET6: + if_inet6data_lock_exclusive(ifp); + if (IN6_IFEXTRA(ifp) != NULL) { + if (len == 0) { + /* Allow clearing the signature */ + IN6_IFEXTRA(ifp)->netsig_len = 0; + bzero(IN6_IFEXTRA(ifp)->netsig, + sizeof (IN6_IFEXTRA(ifp)->netsig)); + if_inet6data_lock_done(ifp); + break; + } else if (len > sizeof (IN6_IFEXTRA(ifp)->netsig)) { + error = EINVAL; + if_inet6data_lock_done(ifp); + break; + } + IN6_IFEXTRA(ifp)->netsig_len = len; + bcopy(data, IN6_IFEXTRA(ifp)->netsig, len); + } else { + error = ENOMEM; + } + if_inet6data_lock_done(ifp); + break; + + default: + error = EINVAL; + break; + } + + return (error); +} + +int +ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len, + uint16_t *flags, uint8_t *data) +{ + int error = 0; + + if (ifp == NULL || len == NULL || flags == NULL || data == NULL) + return (EINVAL); + + switch (family) { + case AF_INET: + if_inetdata_lock_shared(ifp); + if (IN_IFEXTRA(ifp) != NULL) { + if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) { + error = EINVAL; + if_inetdata_lock_done(ifp); + break; + } + if ((*len = IN_IFEXTRA(ifp)->netsig_len) > 0) + bcopy(IN_IFEXTRA(ifp)->netsig, data, *len); + else + error = ENOENT; + } else { + error = ENOMEM; + } + if_inetdata_lock_done(ifp); + break; + + case AF_INET6: + if_inet6data_lock_shared(ifp); + if (IN6_IFEXTRA(ifp) != NULL) { + if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) { + error = EINVAL; + if_inet6data_lock_done(ifp); + break; + } + if ((*len = IN6_IFEXTRA(ifp)->netsig_len) > 0) + bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len); + else + error = ENOENT; + } else { + error = ENOMEM; + } + if_inet6data_lock_done(ifp); + break; + + default: + error = EINVAL; + break; + } + + if (error == 0) + *flags = 0; + + return (error); +} + static void dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff, protocol_family_t pf) @@ -7156,7 +8004,7 @@ dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header, } return; } - rxoff -=hlen; + rxoff -= hlen; if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) { /* @@ -7270,6 +8118,25 @@ sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS return (err); } +static int +sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + int err; + + if (req->oldptr == USER_ADDR_NULL) { + + } + if (req->newptr != USER_ADDR_NULL) { + return (EPERM); + } + err = SYSCTL_OUT(req, &tx_chain_len_stats, + sizeof(struct chain_len_stats)); + + return (err); +} + + #if DEBUG /* Blob for sum16 verification */ static uint8_t sumdata[] = { @@ -7435,3 +8302,169 @@ dlil_kev_dl_code_str(u_int32_t event_code) } return (""); } + +/* + * Mirror the arguments of ifnet_get_local_ports_extended() + * ifindex + * protocol + * flags + */ +static int +sysctl_get_ports_used SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp) + int *name = (int *)arg1; + int namelen = arg2; + int error = 0; + int idx; + protocol_family_t protocol; + u_int32_t flags; + ifnet_t ifp = NULL; + u_int8_t *bitfield = NULL; + + if (req->newptr != USER_ADDR_NULL) { + error = EPERM; + goto done; + } + if (namelen != 3) { + error = ENOENT; + goto done; + } + + if (req->oldptr == USER_ADDR_NULL) { + req->oldidx = bitstr_size(65536); + goto done; + } + if (req->oldlen < bitstr_size(65536)) { + error = ENOMEM; + goto done; + } + + idx = name[0]; + protocol = name[1]; + flags = name[2]; + + ifnet_head_lock_shared(); + if (idx > if_index) { + ifnet_head_done(); + error = ENOENT; + goto done; + } + ifp = ifindex2ifnet[idx]; + ifnet_head_done(); + + bitfield = _MALLOC(bitstr_size(65536), M_TEMP, M_WAITOK); + if (bitfield == NULL) { + error = ENOMEM; + goto done; + } + error = ifnet_get_local_ports_extended(ifp, protocol, flags, bitfield); + if (error != 0) { + printf("%s: ifnet_get_local_ports_extended() error %d\n", + __func__, error); + goto done; + } + error = SYSCTL_OUT(req, bitfield, bitstr_size(65536)); +done: + if (bitfield != NULL) + _FREE(bitfield, M_TEMP); + return (error); +} + +#if (DEVELOPMENT || DEBUG) +/* + * The sysctl variable name contains the input parameters of + * ifnet_get_keepalive_offload_frames() + * ifp (interface index): name[0] + * frames_array_count: name[1] + * frame_data_offset: name[2] + * The return length gives used_frames_count + */ +static int +sysctl_get_kao_frames SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp) + int *name = (int *)arg1; + u_int namelen = arg2; + int idx; + ifnet_t ifp = NULL; + u_int32_t frames_array_count; + size_t frame_data_offset; + u_int32_t used_frames_count; + struct ifnet_keepalive_offload_frame *frames_array = NULL; + int error = 0; + u_int32_t i; + + /* + * Only root can get look at other people TCP frames + */ + error = proc_suser(current_proc()); + if (error != 0) + goto done; + /* + * Validate the input parameters + */ + if (req->newptr != USER_ADDR_NULL) { + error = EPERM; + goto done; + } + if (namelen != 3) { + error = EINVAL; + goto done; + } + if (req->oldptr == USER_ADDR_NULL) { + error = EINVAL; + goto done; + } + if (req->oldlen == 0) { + error = EINVAL; + goto done; + } + idx = name[0]; + frames_array_count = name[1]; + frame_data_offset = name[2]; + + /* Make sure the passed buffer is large enough */ + if (frames_array_count * sizeof(struct ifnet_keepalive_offload_frame) > + req->oldlen) { + error = ENOMEM; + goto done; + } + + ifnet_head_lock_shared(); + if (idx > if_index) { + ifnet_head_done(); + error = ENOENT; + goto done; + } + ifp = ifindex2ifnet[idx]; + ifnet_head_done(); + + frames_array = _MALLOC(frames_array_count * + sizeof(struct ifnet_keepalive_offload_frame), M_TEMP, M_WAITOK); + if (frames_array == NULL) { + error = ENOMEM; + goto done; + } + + error = ifnet_get_keepalive_offload_frames(ifp, frames_array, + frames_array_count, frame_data_offset, &used_frames_count); + if (error != 0) { + printf("%s: ifnet_get_keepalive_offload_frames error %d\n", + __func__, error); + goto done; + } + + for (i = 0; i < used_frames_count; i++) { + error = SYSCTL_OUT(req, frames_array + i, + sizeof(struct ifnet_keepalive_offload_frame)); + if (error != 0) { + goto done; + } + } +done: + if (frames_array != NULL) + _FREE(frames_array, M_TEMP); + return (error); +} +#endif /* DEVELOPMENT || DEBUG */