X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/5c9f46613a83ebfc29a5b1f099448259e96a98f0..eb6b6ca394357805f2bdba989abae309f718b4d8:/bsd/net/dlil.c diff --git a/bsd/net/dlil.c b/bsd/net/dlil.c index 9219ab940..4a703aee2 100644 --- a/bsd/net/dlil.c +++ b/bsd/net/dlil.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2017 Apple Inc. All rights reserved. + * Copyright (c) 1999-2019 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -67,7 +67,9 @@ #include #include +#include #include +#include #include #include #include @@ -75,7 +77,9 @@ #include #include #include - +#include +#include +#include #if INET #include #include @@ -87,15 +91,21 @@ #include #include #include +#include +#include +#include #endif /* INET */ #if INET6 +#include #include #include #include #include +#include +#include #endif /* INET6 */ - +#include #include #include @@ -116,37 +126,40 @@ #include #endif /* PF */ #include +#include #if NECP #include #endif /* NECP */ -#define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0) -#define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2) -#define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8)) -#define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8)) -#define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8)) +#include -#define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */ -#define MAX_LINKADDR 4 /* LONGWORDS */ -#define M_NKE M_IFADDR +#define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0) +#define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2) +#define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8)) +#define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8)) +#define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8)) + +#define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */ +#define MAX_LINKADDR 4 /* LONGWORDS */ +#define M_NKE M_IFADDR #if 1 -#define DLIL_PRINTF printf +#define DLIL_PRINTF printf #else -#define DLIL_PRINTF kprintf +#define DLIL_PRINTF kprintf #endif -#define IF_DATA_REQUIRE_ALIGNED_64(f) \ +#define IF_DATA_REQUIRE_ALIGNED_64(f) \ _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t))) -#define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \ +#define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \ _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t))) enum { - kProtoKPI_v1 = 1, - kProtoKPI_v2 = 2 + kProtoKPI_v1 = 1, + kProtoKPI_v2 = 2 }; /* @@ -156,101 +169,101 @@ enum { * a reference to it is valid, via if_proto_ref(). */ struct if_proto { - SLIST_ENTRY(if_proto) next_hash; - u_int32_t refcount; - u_int32_t detached; - struct ifnet *ifp; - protocol_family_t protocol_family; - int proto_kpi; - union { + SLIST_ENTRY(if_proto) next_hash; + u_int32_t refcount; + u_int32_t detached; + struct ifnet *ifp; + protocol_family_t protocol_family; + int proto_kpi; + union { struct { - proto_media_input input; - proto_media_preout pre_output; - proto_media_event event; - proto_media_ioctl ioctl; - proto_media_detached detached; - proto_media_resolve_multi resolve_multi; - proto_media_send_arp send_arp; + proto_media_input input; + proto_media_preout pre_output; + proto_media_event event; + proto_media_ioctl ioctl; + proto_media_detached detached; + proto_media_resolve_multi resolve_multi; + proto_media_send_arp send_arp; } v1; struct { - proto_media_input_v2 input; - proto_media_preout pre_output; - proto_media_event event; - proto_media_ioctl ioctl; - proto_media_detached detached; - proto_media_resolve_multi resolve_multi; - proto_media_send_arp send_arp; + proto_media_input_v2 input; + proto_media_preout pre_output; + proto_media_event event; + proto_media_ioctl ioctl; + proto_media_detached detached; + proto_media_resolve_multi resolve_multi; + proto_media_send_arp send_arp; } v2; } kpi; }; SLIST_HEAD(proto_hash_entry, if_proto); -#define DLIL_SDLDATALEN \ +#define DLIL_SDLDATALEN \ (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0])) struct dlil_ifnet { - struct ifnet dl_if; /* public ifnet */ + struct ifnet dl_if; /* public ifnet */ /* * DLIL private fields, protected by dl_if_lock */ decl_lck_mtx_data(, dl_if_lock); - TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */ - u_int32_t dl_if_flags; /* flags (below) */ - u_int32_t dl_if_refcnt; /* refcnt */ + TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */ + u_int32_t dl_if_flags; /* flags (below) */ + u_int32_t dl_if_refcnt; /* refcnt */ void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */ - void *dl_if_uniqueid; /* unique interface id */ - size_t dl_if_uniqueid_len; /* length of the unique id */ - char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */ - char dl_if_xnamestorage[IFXNAMSIZ]; /* external name storage */ + void *dl_if_uniqueid; /* unique interface id */ + size_t dl_if_uniqueid_len; /* length of the unique id */ + char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */ + char dl_if_xnamestorage[IFXNAMSIZ]; /* external name storage */ struct { - struct ifaddr ifa; /* lladdr ifa */ - u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */ - u_int8_t msdl[DLIL_SDLMAXLEN]; /* mask storage */ + struct ifaddr ifa; /* lladdr ifa */ + u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */ + u_int8_t msdl[DLIL_SDLMAXLEN]; /* mask storage */ } dl_if_lladdr; u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */ struct dlil_threading_info dl_if_inpstorage; /* input thread storage */ - ctrace_t dl_if_attach; /* attach PC stacktrace */ - ctrace_t dl_if_detach; /* detach PC stacktrace */ + ctrace_t dl_if_attach; /* attach PC stacktrace */ + ctrace_t dl_if_detach; /* detach PC stacktrace */ }; /* Values for dl_if_flags (private to DLIL) */ -#define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */ -#define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */ -#define DLIF_DEBUG 0x4 /* has debugging info */ +#define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */ +#define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */ +#define DLIF_DEBUG 0x4 /* has debugging info */ -#define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */ +#define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */ /* For gdb */ __private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE; struct dlil_ifnet_dbg { - struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */ - u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */ - u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */ + struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */ + u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */ + u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */ /* * Circular lists of ifnet_{reference,release} callers. */ - ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE]; - ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE]; + ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE]; + ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE]; }; -#define DLIL_TO_IFP(s) (&s->dl_if) -#define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s) +#define DLIL_TO_IFP(s) (&s->dl_if) +#define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s) struct ifnet_filter { - TAILQ_ENTRY(ifnet_filter) filt_next; - u_int32_t filt_skip; - u_int32_t filt_flags; - ifnet_t filt_ifp; - const char *filt_name; - void *filt_cookie; - protocol_family_t filt_protocol; - iff_input_func filt_input; - iff_output_func filt_output; - iff_event_func filt_event; - iff_ioctl_func filt_ioctl; - iff_detached_func filt_detached; + TAILQ_ENTRY(ifnet_filter) filt_next; + u_int32_t filt_skip; + u_int32_t filt_flags; + ifnet_t filt_ifp; + const char *filt_name; + void *filt_cookie; + protocol_family_t filt_protocol; + iff_input_func filt_input; + iff_output_func filt_output; + iff_event_func filt_event; + iff_ioctl_func filt_ioctl; + iff_detached_func filt_detached; }; struct proto_input_entry; @@ -267,48 +280,48 @@ decl_lck_mtx_data(static, dlil_ifnet_lock); u_int32_t dlil_filter_disable_tso_count = 0; #if DEBUG -static unsigned int ifnet_debug = 1; /* debugging (enabled) */ +static unsigned int ifnet_debug = 1; /* debugging (enabled) */ #else -static unsigned int ifnet_debug; /* debugging (disabled) */ +static unsigned int ifnet_debug; /* debugging (disabled) */ #endif /* !DEBUG */ -static unsigned int dlif_size; /* size of dlil_ifnet to allocate */ -static unsigned int dlif_bufsize; /* size of dlif_size + headroom */ -static struct zone *dlif_zone; /* zone for dlil_ifnet */ +static unsigned int dlif_size; /* size of dlil_ifnet to allocate */ +static unsigned int dlif_bufsize; /* size of dlif_size + headroom */ +static struct zone *dlif_zone; /* zone for dlil_ifnet */ -#define DLIF_ZONE_MAX 64 /* maximum elements in zone */ -#define DLIF_ZONE_NAME "ifnet" /* zone name */ +#define DLIF_ZONE_MAX IFNETS_MAX /* maximum elements in zone */ +#define DLIF_ZONE_NAME "ifnet" /* zone name */ -static unsigned int dlif_filt_size; /* size of ifnet_filter */ -static struct zone *dlif_filt_zone; /* zone for ifnet_filter */ +static unsigned int dlif_filt_size; /* size of ifnet_filter */ +static struct zone *dlif_filt_zone; /* zone for ifnet_filter */ -#define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */ -#define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */ +#define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */ +#define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */ -static unsigned int dlif_phash_size; /* size of ifnet proto hash table */ -static struct zone *dlif_phash_zone; /* zone for ifnet proto hash table */ +static unsigned int dlif_phash_size; /* size of ifnet proto hash table */ +static struct zone *dlif_phash_zone; /* zone for ifnet proto hash table */ -#define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */ -#define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */ +#define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */ +#define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */ -static unsigned int dlif_proto_size; /* size of if_proto */ -static struct zone *dlif_proto_zone; /* zone for if_proto */ +static unsigned int dlif_proto_size; /* size of if_proto */ +static struct zone *dlif_proto_zone; /* zone for if_proto */ -#define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */ -#define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */ +#define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */ +#define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */ -static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */ +static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */ static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */ -static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */ +static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */ -#define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */ -#define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */ +#define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */ +#define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */ -static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */ -static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */ -static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */ +static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */ +static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */ +static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */ -#define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */ -#define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */ +#define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */ +#define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */ static u_int32_t net_rtref; @@ -322,7 +335,8 @@ static void dlil_if_trace(struct dlil_ifnet *, int); static void if_proto_ref(struct if_proto *); static void if_proto_free(struct if_proto *); static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t); -static int dlil_ifp_proto_count(struct ifnet *); +static u_int32_t dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list, + u_int32_t list_count); static void if_flt_monitor_busy(struct ifnet *); static void if_flt_monitor_unbusy(struct ifnet *); static void if_flt_monitor_enter(struct ifnet *); @@ -382,18 +396,27 @@ static __inline void ifp_inc_traffic_class_in(struct ifnet *, struct mbuf *); static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *); static void dlil_main_input_thread_func(void *, wait_result_t); +static void dlil_main_input_thread_cont(void *, wait_result_t); + static void dlil_input_thread_func(void *, wait_result_t); +static void dlil_input_thread_cont(void *, wait_result_t); + static void dlil_rxpoll_input_thread_func(void *, wait_result_t); +static void dlil_rxpoll_input_thread_cont(void *, wait_result_t); + static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *); static void dlil_terminate_input_thread(struct dlil_threading_info *); static void dlil_input_stats_add(const struct ifnet_stat_increment_param *, - struct dlil_threading_info *, boolean_t); -static void dlil_input_stats_sync(struct ifnet *, struct dlil_threading_info *); + struct dlil_threading_info *, struct ifnet *, boolean_t); +static boolean_t dlil_input_stats_sync(struct ifnet *, + struct dlil_threading_info *); static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *, u_int32_t, ifnet_model_t, boolean_t); static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *, const struct ifnet_stat_increment_param *, boolean_t, boolean_t); - +static int dlil_is_clat_needed(protocol_family_t, mbuf_t ); +static errno_t dlil_clat46(ifnet_t, protocol_family_t *, mbuf_t *); +static errno_t dlil_clat64(ifnet_t, protocol_family_t *, mbuf_t *); #if DEBUG || DEVELOPMENT static void dlil_verify_sum16(void); #endif /* DEBUG || DEVELOPMENT */ @@ -402,17 +425,23 @@ static void dlil_output_cksum_dbg(struct ifnet *, struct mbuf *, uint32_t, static void dlil_input_cksum_dbg(struct ifnet *, struct mbuf *, char *, protocol_family_t); +static void dlil_incr_pending_thread_count(void); +static void dlil_decr_pending_thread_count(void); + static void ifnet_detacher_thread_func(void *, wait_result_t); static int ifnet_detacher_thread_cont(int); static void ifnet_detach_final(struct ifnet *); static void ifnet_detaching_enqueue(struct ifnet *); static struct ifnet *ifnet_detaching_dequeue(void); -static void ifnet_start_thread_fn(void *, wait_result_t); -static void ifnet_poll_thread_fn(void *, wait_result_t); -static void ifnet_poll(struct ifnet *); -static errno_t ifnet_enqueue_common(struct ifnet *, void *, - classq_pkt_type_t, boolean_t, boolean_t *); +static void ifnet_start_thread_func(void *, wait_result_t); +static void ifnet_start_thread_cont(void *, wait_result_t); + +static void ifnet_poll_thread_func(void *, wait_result_t); +static void ifnet_poll_thread_cont(void *, wait_result_t); + +static errno_t ifnet_enqueue_common(struct ifnet *, classq_pkt_t *, + boolean_t, boolean_t *); static void ifp_src_route_copyout(struct ifnet *, struct route *); static void ifp_src_route_copyin(struct ifnet *, struct route *); @@ -432,7 +461,6 @@ static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS; static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS; static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS; static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS; -static int sysctl_get_ports_used SYSCTL_HANDLER_ARGS; struct chain_len_stats tx_chain_len_stats; static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS; @@ -444,29 +472,29 @@ static int sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS; /* The following are protected by dlil_ifnet_lock */ static TAILQ_HEAD(, ifnet) ifnet_detaching_head; static u_int32_t ifnet_detaching_cnt; -static void *ifnet_delayed_run; /* wait channel for detaching thread */ +static void *ifnet_delayed_run; /* wait channel for detaching thread */ decl_lck_mtx_data(static, ifnet_fc_lock); static uint32_t ifnet_flowhash_seed; struct ifnet_flowhash_key { - char ifk_name[IFNAMSIZ]; - uint32_t ifk_unit; - uint32_t ifk_flags; - uint32_t ifk_eflags; - uint32_t ifk_capabilities; - uint32_t ifk_capenable; - uint32_t ifk_output_sched_model; - uint32_t ifk_rand1; - uint32_t ifk_rand2; + char ifk_name[IFNAMSIZ]; + uint32_t ifk_unit; + uint32_t ifk_flags; + uint32_t ifk_eflags; + uint32_t ifk_capabilities; + uint32_t ifk_capenable; + uint32_t ifk_output_sched_model; + uint32_t ifk_rand1; + uint32_t ifk_rand2; }; /* Flow control entry per interface */ struct ifnet_fc_entry { RB_ENTRY(ifnet_fc_entry) ifce_entry; - u_int32_t ifce_flowhash; - struct ifnet *ifce_ifp; + u_int32_t ifce_flowhash; + struct ifnet *ifce_ifp; }; static uint32_t ifnet_calc_flowhash(struct ifnet *); @@ -481,19 +509,19 @@ RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree; RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp); RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp); -static unsigned int ifnet_fc_zone_size; /* sizeof ifnet_fc_entry */ -static struct zone *ifnet_fc_zone; /* ifnet_fc_entry zone */ +static unsigned int ifnet_fc_zone_size; /* sizeof ifnet_fc_entry */ +static struct zone *ifnet_fc_zone; /* ifnet_fc_entry zone */ -#define IFNET_FC_ZONE_NAME "ifnet_fc_zone" -#define IFNET_FC_ZONE_MAX 32 +#define IFNET_FC_ZONE_NAME "ifnet_fc_zone" +#define IFNET_FC_ZONE_MAX 32 extern void bpfdetach(struct ifnet *); extern void proto_input_run(void); extern uint32_t udp_count_opportunistic(unsigned int ifindex, - u_int32_t flags); + u_int32_t flags); extern uint32_t tcp_count_opportunistic(unsigned int ifindex, - u_int32_t flags); + u_int32_t flags); __private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *); @@ -515,88 +543,80 @@ int dlil_verbose = 0; static u_int32_t dlil_input_sanity_check = 0; #endif /* IFNET_INPUT_SANITY_CHK */ /* rate limit debug messages */ -struct timespec dlil_dbgrate = { 1, 0 }; +struct timespec dlil_dbgrate = { .tv_sec = 1, .tv_nsec = 0 }; SYSCTL_DECL(_net_link_generic_system); -#if CONFIG_MACF -SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_lladdr_ckreq, - CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_lladdr_ckreq, 0, - "Require MACF system info check to expose link-layer address"); -#endif - SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose, CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages"); -#define IF_SNDQ_MINLEN 32 +#define IF_SNDQ_MINLEN 32 u_int32_t if_sndq_maxlen = IFQ_MAXLEN; SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sndq_maxlen, IFQ_MAXLEN, sysctl_sndq_maxlen, "I", "Default transmit queue max length"); -#define IF_RCVQ_MINLEN 32 -#define IF_RCVQ_MAXLEN 256 +#define IF_RCVQ_MINLEN 32 +#define IF_RCVQ_MAXLEN 256 u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN; SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN, sysctl_rcvq_maxlen, "I", "Default receive queue max length"); -#define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */ -static u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY; +#define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */ +u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY; SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay, CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY, "ilog2 of EWMA decay rate of avg inbound packets"); -#define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */ -#define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */ +#define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */ +#define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */ static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME; SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime, IF_RXPOLL_MODE_HOLDTIME, sysctl_rxpoll_mode_holdtime, "Q", "input poll mode freeze time"); -#define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */ -#define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */ +#define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */ +#define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */ static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME; SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_sample_time, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime, IF_RXPOLL_SAMPLETIME, sysctl_rxpoll_sample_holdtime, "Q", "input poll sampling time"); -#define IF_RXPOLL_INTERVALTIME_MIN (1ULL * 1000) /* 1 us */ -#define IF_RXPOLL_INTERVALTIME (1ULL * 1000 * 1000) /* 1 ms */ static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVALTIME; SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_interval_time, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time, IF_RXPOLL_INTERVALTIME, sysctl_rxpoll_interval_time, "Q", "input poll interval (time)"); -#define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */ -static u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS; +#define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */ +u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS; SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts, CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts, IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)"); -#define IF_RXPOLL_WLOWAT 10 -static u_int32_t if_rxpoll_wlowat = IF_RXPOLL_WLOWAT; +#define IF_RXPOLL_WLOWAT 10 +static u_int32_t if_sysctl_rxpoll_wlowat = IF_RXPOLL_WLOWAT; SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat, - CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_wlowat, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sysctl_rxpoll_wlowat, IF_RXPOLL_WLOWAT, sysctl_rxpoll_wlowat, "I", "input poll wakeup low watermark"); -#define IF_RXPOLL_WHIWAT 100 -static u_int32_t if_rxpoll_whiwat = IF_RXPOLL_WHIWAT; +#define IF_RXPOLL_WHIWAT 100 +static u_int32_t if_sysctl_rxpoll_whiwat = IF_RXPOLL_WHIWAT; SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat, - CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_whiwat, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sysctl_rxpoll_whiwat, IF_RXPOLL_WHIWAT, sysctl_rxpoll_whiwat, "I", "input poll wakeup high watermark"); -static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */ +static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */ SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max, CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_max, 0, "max packets per poll call"); -static u_int32_t if_rxpoll = 1; +u_int32_t if_rxpoll = 1; SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll, 0, sysctl_rxpoll, "I", "enable opportunistic input polling"); @@ -651,11 +671,11 @@ SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delay_disabled, CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_delay_start_disabled, 0, "number of times start was delayed"); -#define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */ -#define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */ -#define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */ -#define HWCKSUM_DBG_MASK \ - (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \ +#define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */ +#define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */ +#define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */ +#define HWCKSUM_DBG_MASK \ + (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \ HWCKSUM_DBG_FINALIZE_FORCED) static uint32_t hwcksum_dbg_mode = 0; @@ -734,14 +754,11 @@ uint32_t tx_chain_len_count = 0; SYSCTL_UINT(_net_link_generic_system, OID_AUTO, tx_chain_len_count, CTLFLAG_RW | CTLFLAG_LOCKED, &tx_chain_len_count, 0, ""); -SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_ports_used, - CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_ports_used, ""); - -static uint32_t threshold_notify = 1; /* enable/disable */ +static uint32_t threshold_notify = 1; /* enable/disable */ SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_notify, CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_notify, 0, ""); -static uint32_t threshold_interval = 2; /* in seconds */ +static uint32_t threshold_interval = 2; /* in seconds */ SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_interval, CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_interval, 0, ""); @@ -752,76 +769,96 @@ SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_kao_frames, #endif /* DEVELOPMENT || DEBUG */ struct net_api_stats net_api_stats; -SYSCTL_STRUCT(_net, OID_AUTO, api_stats, CTLFLAG_RD|CTLFLAG_LOCKED, - &net_api_stats, net_api_stats, ""); +SYSCTL_STRUCT(_net, OID_AUTO, api_stats, CTLFLAG_RD | CTLFLAG_LOCKED, + &net_api_stats, net_api_stats, ""); unsigned int net_rxpoll = 1; unsigned int net_affinity = 1; static kern_return_t dlil_affinity_set(struct thread *, u_int32_t); -extern u_int32_t inject_buckets; +extern u_int32_t inject_buckets; -static lck_grp_attr_t *dlil_grp_attributes = NULL; -static lck_attr_t *dlil_lck_attributes = NULL; +static lck_grp_attr_t *dlil_grp_attributes = NULL; +static lck_attr_t *dlil_lck_attributes = NULL; /* DLIL data threshold thread call */ static void dlil_dt_tcall_fn(thread_call_param_t, thread_call_param_t); -static void dlil_mit_tcall_fn(thread_call_param_t, thread_call_param_t); - -uint32_t dlil_rcv_mit_pkts_min = 5; -uint32_t dlil_rcv_mit_pkts_max = 64; -uint32_t dlil_rcv_mit_interval = (500 * 1000); - -#if (DEVELOPMENT || DEBUG) -SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rcv_mit_pkts_min, - CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_rcv_mit_pkts_min, 0, ""); -SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rcv_mit_pkts_max, - CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_rcv_mit_pkts_max, 0, ""); -SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rcv_mit_interval, - CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_rcv_mit_interval, 0, ""); -#endif /* DEVELOPMENT || DEBUG */ +void +ifnet_filter_update_tso(boolean_t filter_enable) +{ + /* + * update filter count and route_generation ID to let TCP + * know it should reevalute doing TSO or not + */ + OSAddAtomic(filter_enable ? 1 : -1, &dlil_filter_disable_tso_count); + routegenid_update(); +} -#define DLIL_INPUT_CHECK(m, ifp) { \ - struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \ - if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \ - !(mbuf_flags(m) & MBUF_PKTHDR)) { \ - panic_plain("%s: invalid mbuf %p\n", __func__, m); \ - /* NOTREACHED */ \ - } \ +#define DLIL_INPUT_CHECK(m, ifp) { \ + struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \ + if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \ + !(mbuf_flags(m) & MBUF_PKTHDR)) { \ + panic_plain("%s: invalid mbuf %p\n", __func__, m); \ + /* NOTREACHED */ \ + } \ } -#define DLIL_EWMA(old, new, decay) do { \ - u_int32_t _avg; \ - if ((_avg = (old)) > 0) \ - _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \ - else \ - _avg = (new); \ - (old) = _avg; \ +#define DLIL_EWMA(old, new, decay) do { \ + u_int32_t _avg; \ + if ((_avg = (old)) > 0) \ + _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \ + else \ + _avg = (new); \ + (old) = _avg; \ } while (0) -#define MBPS (1ULL * 1000 * 1000) -#define GBPS (MBPS * 1000) +#define MBPS (1ULL * 1000 * 1000) +#define GBPS (MBPS * 1000) struct rxpoll_time_tbl { - u_int64_t speed; /* downlink speed */ - u_int32_t plowat; /* packets low watermark */ - u_int32_t phiwat; /* packets high watermark */ - u_int32_t blowat; /* bytes low watermark */ - u_int32_t bhiwat; /* bytes high watermark */ + u_int64_t speed; /* downlink speed */ + u_int32_t plowat; /* packets low watermark */ + u_int32_t phiwat; /* packets high watermark */ + u_int32_t blowat; /* bytes low watermark */ + u_int32_t bhiwat; /* bytes high watermark */ }; static struct rxpoll_time_tbl rxpoll_tbl[] = { - { 10 * MBPS, 2, 8, (1 * 1024), (6 * 1024) }, - { 100 * MBPS, 10, 40, (4 * 1024), (64 * 1024) }, - { 1 * GBPS, 10, 40, (4 * 1024), (64 * 1024) }, - { 10 * GBPS, 10, 40, (4 * 1024), (64 * 1024) }, - { 100 * GBPS, 10, 40, (4 * 1024), (64 * 1024) }, - { 0, 0, 0, 0, 0 } + { .speed = 10 * MBPS, .plowat = 2, .phiwat = 8, .blowat = (1 * 1024), .bhiwat = (6 * 1024) }, + { .speed = 100 * MBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) }, + { .speed = 1 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) }, + { .speed = 10 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) }, + { .speed = 100 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) }, + { .speed = 0, .plowat = 0, .phiwat = 0, .blowat = 0, .bhiwat = 0 } }; +decl_lck_mtx_data(static, dlil_thread_sync_lock); +static uint32_t dlil_pending_thread_cnt = 0; +static void +dlil_incr_pending_thread_count(void) +{ + LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(&dlil_thread_sync_lock); + dlil_pending_thread_cnt++; + lck_mtx_unlock(&dlil_thread_sync_lock); +} + +static void +dlil_decr_pending_thread_count(void) +{ + LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(&dlil_thread_sync_lock); + VERIFY(dlil_pending_thread_cnt > 0); + dlil_pending_thread_cnt--; + if (dlil_pending_thread_cnt == 0) { + wakeup(&dlil_pending_thread_cnt); + } + lck_mtx_unlock(&dlil_thread_sync_lock); +} + int proto_hash_value(u_int32_t protocol_family) { @@ -831,15 +868,17 @@ proto_hash_value(u_int32_t protocol_family) * here; future changes must be applied there as well. */ switch (protocol_family) { - case PF_INET: - return (0); - case PF_INET6: - return (1); - case PF_VLAN: - return (2); - case PF_UNSPEC: - default: - return (3); + case PF_INET: + return 0; + case PF_INET6: + return 1; + case PF_VLAN: + return 2; + case PF_802154: + return 3; + case PF_UNSPEC: + default: + return 4; } } @@ -854,16 +893,19 @@ find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family) ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED); - if (ifp->if_proto_hash != NULL) + if (ifp->if_proto_hash != NULL) { proto = SLIST_FIRST(&ifp->if_proto_hash[i]); + } - while (proto != NULL && proto->protocol_family != protocol_family) + while (proto != NULL && proto->protocol_family != protocol_family) { proto = SLIST_NEXT(proto, next_hash); + } - if (proto != NULL) + if (proto != NULL) { if_proto_ref(proto); + } - return (proto); + return proto; } static void @@ -883,19 +925,22 @@ if_proto_free(struct if_proto *proto) struct kev_dl_proto_data ev_pr_data; oldval = atomic_add_32_ov(&proto->refcount, -1); - if (oldval > 1) + if (oldval > 1) { return; + } /* No more reference on this, protocol must have been detached */ VERIFY(proto->detached); if (proto->proto_kpi == kProtoKPI_v1) { - if (proto->kpi.v1.detached) + if (proto->kpi.v1.detached) { proto->kpi.v1.detached(ifp, proto->protocol_family); + } } if (proto->proto_kpi == kProtoKPI_v2) { - if (proto->kpi.v2.detached) + if (proto->kpi.v2.detached) { proto->kpi.v2.detached(ifp, proto->protocol_family); + } } /* @@ -910,12 +955,23 @@ if_proto_free(struct if_proto *proto) */ ifnet_lock_shared(ifp); ev_pr_data.proto_family = proto_family; - ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp); + ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0); ifnet_lock_done(ifp); dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED, (struct net_event_data *)&ev_pr_data, - sizeof (struct kev_dl_proto_data)); + sizeof(struct kev_dl_proto_data)); + + if (ev_pr_data.proto_remaining_count == 0) { + /* + * The protocol count has gone to zero, mark the interface down. + * This used to be done by configd.KernelEventMonitor, but that + * is inherently prone to races (rdar://problem/30810208). + */ + (void) ifnet_set_flags(ifp, 0, IFF_UP); + (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL); + dlil_post_sifflags_msg(ifp); + } zfree(dlif_proto_zone, proto); } @@ -951,8 +1007,9 @@ ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what) panic("bad ifnet assert type: %d", what); /* NOTREACHED */ } - if (ass) + if (ass) { LCK_RW_ASSERT(&ifp->if_lock, type); + } } __private_extern__ void @@ -1038,29 +1095,56 @@ ifnet_head_assert_exclusive(void) } /* - * Caller must already be holding ifnet lock. + * dlil_ifp_protolist + * - get the list of protocols attached to the interface, or just the number + * of attached protocols + * - if the number returned is greater than 'list_count', truncation occurred + * + * Note: + * - caller must already be holding ifnet lock. */ -static int -dlil_ifp_proto_count(struct ifnet *ifp) +static u_int32_t +dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list, + u_int32_t list_count) { - int i, count = 0; + u_int32_t count = 0; + int i; ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED); - if (ifp->if_proto_hash == NULL) + if (ifp->if_proto_hash == NULL) { goto done; + } for (i = 0; i < PROTO_HASH_SLOTS; i++) { struct if_proto *proto; SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) { + if (list != NULL && count < list_count) { + list[count] = proto->protocol_family; + } count++; } } done: - return (count); + return count; +} + +__private_extern__ u_int32_t +if_get_protolist(struct ifnet * ifp, u_int32_t *protolist, u_int32_t count) +{ + ifnet_lock_shared(ifp); + count = dlil_ifp_protolist(ifp, protolist, count); + ifnet_lock_done(ifp); + return count; } __private_extern__ void +if_free_protolist(u_int32_t *list) +{ + _FREE(list, M_TEMP); +} + +__private_extern__ int dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass, u_int32_t event_code, struct net_event_data *event_data, u_int32_t event_data_len) @@ -1068,21 +1152,21 @@ dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass, struct net_event_data ev_data; struct kev_msg ev_msg; - bzero(&ev_msg, sizeof (ev_msg)); - bzero(&ev_data, sizeof (ev_data)); + bzero(&ev_msg, sizeof(ev_msg)); + bzero(&ev_data, sizeof(ev_data)); /* * a net event always starts with a net_event_data structure * but the caller can generate a simple net event or * provide a longer event structure to post */ - ev_msg.vendor_code = KEV_VENDOR_APPLE; - ev_msg.kev_class = KEV_NETWORK_CLASS; - ev_msg.kev_subclass = event_subclass; - ev_msg.event_code = event_code; + ev_msg.vendor_code = KEV_VENDOR_APPLE; + ev_msg.kev_class = KEV_NETWORK_CLASS; + ev_msg.kev_subclass = event_subclass; + ev_msg.event_code = event_code; if (event_data == NULL) { event_data = &ev_data; - event_data_len = sizeof (struct net_event_data); + event_data_len = sizeof(struct net_event_data); } strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ); @@ -1093,12 +1177,23 @@ dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass, ev_msg.dv[0].data_ptr = event_data; ev_msg.dv[1].data_length = 0; - /* Don't update interface generation for quality and RRC state changess */ - bool update_generation = (event_subclass != KEV_DL_SUBCLASS || - (event_code != KEV_DL_LINK_QUALITY_METRIC_CHANGED && - event_code != KEV_DL_RRC_STATE_CHANGED)); + bool update_generation = true; + if (event_subclass == KEV_DL_SUBCLASS) { + /* Don't update interface generation for frequent link quality and state changes */ + switch (event_code) { + case KEV_DL_LINK_QUALITY_METRIC_CHANGED: + case KEV_DL_RRC_STATE_CHANGED: + case KEV_DL_NODE_PRESENCE: + case KEV_DL_NODE_ABSENCE: + case KEV_DL_MASTER_ELECTED: + update_generation = false; + break; + default: + break; + } + } - dlil_event_internal(ifp, &ev_msg, update_generation); + return dlil_event_internal(ifp, &ev_msg, update_generation); } __private_extern__ int @@ -1107,8 +1202,9 @@ dlil_alloc_local_stats(struct ifnet *ifp) int ret = EINVAL; void *buf, *base, **pbuf; - if (ifp == NULL) + if (ifp == NULL) { goto end; + } if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) { /* allocate tcpstat_local structure */ @@ -1120,8 +1216,8 @@ dlil_alloc_local_stats(struct ifnet *ifp) bzero(buf, dlif_tcpstat_bufsize); /* Get the 64-bit aligned base address for this object */ - base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t), - sizeof (u_int64_t)); + base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t), + sizeof(u_int64_t)); VERIFY(((intptr_t)base + dlif_tcpstat_size) <= ((intptr_t)buf + dlif_tcpstat_bufsize)); @@ -1129,7 +1225,7 @@ dlil_alloc_local_stats(struct ifnet *ifp) * Wind back a pointer size from the aligned base and * save the original address so we can free it later. */ - pbuf = (void **)((intptr_t)base - sizeof (void *)); + pbuf = (void **)((intptr_t)base - sizeof(void *)); *pbuf = buf; ifp->if_tcp_stat = base; @@ -1142,8 +1238,8 @@ dlil_alloc_local_stats(struct ifnet *ifp) bzero(buf, dlif_udpstat_bufsize); /* Get the 64-bit aligned base address for this object */ - base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t), - sizeof (u_int64_t)); + base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t), + sizeof(u_int64_t)); VERIFY(((intptr_t)base + dlif_udpstat_size) <= ((intptr_t)buf + dlif_udpstat_bufsize)); @@ -1151,19 +1247,19 @@ dlil_alloc_local_stats(struct ifnet *ifp) * Wind back a pointer size from the aligned base and * save the original address so we can free it later. */ - pbuf = (void **)((intptr_t)base - sizeof (void *)); + pbuf = (void **)((intptr_t)base - sizeof(void *)); *pbuf = buf; ifp->if_udp_stat = base; - VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof (u_int64_t)) && - IS_P2ALIGNED(ifp->if_udp_stat, sizeof (u_int64_t))); + VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof(u_int64_t)) && + IS_P2ALIGNED(ifp->if_udp_stat, sizeof(u_int64_t))); ret = 0; } if (ifp->if_ipv4_stat == NULL) { MALLOC(ifp->if_ipv4_stat, struct if_tcp_ecn_stat *, - sizeof (struct if_tcp_ecn_stat), M_TEMP, M_WAITOK|M_ZERO); + sizeof(struct if_tcp_ecn_stat), M_TEMP, M_WAITOK | M_ZERO); if (ifp->if_ipv4_stat == NULL) { ret = ENOMEM; goto end; @@ -1172,23 +1268,23 @@ dlil_alloc_local_stats(struct ifnet *ifp) if (ifp->if_ipv6_stat == NULL) { MALLOC(ifp->if_ipv6_stat, struct if_tcp_ecn_stat *, - sizeof (struct if_tcp_ecn_stat), M_TEMP, M_WAITOK|M_ZERO); + sizeof(struct if_tcp_ecn_stat), M_TEMP, M_WAITOK | M_ZERO); if (ifp->if_ipv6_stat == NULL) { ret = ENOMEM; goto end; } } end: - if (ret != 0) { + if (ifp != NULL && ret != 0) { if (ifp->if_tcp_stat != NULL) { pbuf = (void **) - ((intptr_t)ifp->if_tcp_stat - sizeof (void *)); + ((intptr_t)ifp->if_tcp_stat - sizeof(void *)); zfree(dlif_tcpstat_zone, *pbuf); ifp->if_tcp_stat = NULL; } if (ifp->if_udp_stat != NULL) { pbuf = (void **) - ((intptr_t)ifp->if_udp_stat - sizeof (void *)); + ((intptr_t)ifp->if_udp_stat - sizeof(void *)); zfree(dlif_udpstat_zone, *pbuf); ifp->if_udp_stat = NULL; } @@ -1202,23 +1298,46 @@ end: } } - return (ret); + return ret; +} + +static void +dlil_reset_rxpoll_params(ifnet_t ifp) +{ + ASSERT(ifp != NULL); + ifnet_set_poll_cycle(ifp, NULL); + ifp->if_poll_update = 0; + ifp->if_poll_flags = 0; + ifp->if_poll_req = 0; + ifp->if_poll_mode = IFNET_MODEL_INPUT_POLL_OFF; + bzero(&ifp->if_poll_tstats, sizeof(ifp->if_poll_tstats)); + bzero(&ifp->if_poll_pstats, sizeof(ifp->if_poll_pstats)); + bzero(&ifp->if_poll_sstats, sizeof(ifp->if_poll_sstats)); + net_timerclear(&ifp->if_poll_mode_holdtime); + net_timerclear(&ifp->if_poll_mode_lasttime); + net_timerclear(&ifp->if_poll_sample_holdtime); + net_timerclear(&ifp->if_poll_sample_lasttime); + net_timerclear(&ifp->if_poll_dbg_lasttime); } static int dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp) { + boolean_t dlil_rxpoll_input; thread_continue_t func; u_int32_t limit; int error; + dlil_rxpoll_input = (ifp != NULL && net_rxpoll && + (ifp->if_eflags & IFEF_RXPOLL) && (ifp->if_xflags & IFXF_LEGACY)); + /* NULL ifp indicates the main input thread, called at dlil_init time */ if (ifp == NULL) { func = dlil_main_input_thread_func; VERIFY(inp == dlil_main_input_thread); (void) strlcat(inp->input_name, "main_input", DLIL_THREADNAME_LEN); - } else if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) { + } else if (dlil_rxpoll_input) { func = dlil_rxpoll_input_thread_func; VERIFY(inp != dlil_main_input_thread); (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN, @@ -1234,15 +1353,7 @@ dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp) inp->lck_grp = lck_grp_alloc_init(inp->input_name, dlil_grp_attributes); lck_mtx_init(&inp->input_lck, inp->lck_grp, dlil_lck_attributes); - inp->mode = IFNET_MODEL_INPUT_POLL_OFF; - inp->ifp = ifp; /* NULL for main input thread */ - - net_timerclear(&inp->mode_holdtime); - net_timerclear(&inp->mode_lasttime); - net_timerclear(&inp->sample_holdtime); - net_timerclear(&inp->sample_lasttime); - net_timerclear(&inp->dbg_lasttime); - + inp->ifp = ifp; /* NULL for main input thread */ /* * For interfaces that support opportunistic polling, set the * low and high watermarks for outstanding inbound packets/bytes. @@ -1251,7 +1362,9 @@ dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp) */ if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) { limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN); - (void) dlil_rxpoll_set_params(ifp, NULL, FALSE); + if (ifp->if_xflags & IFXF_LEGACY) { + (void) dlil_rxpoll_set_params(ifp, NULL, FALSE); + } } else { limit = (u_int32_t)-1; } @@ -1266,7 +1379,7 @@ dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp) error = kernel_thread_start(func, inp, &inp->input_thr); if (error == KERN_SUCCESS) { ml_thread_policy(inp->input_thr, MACHINE_GROUP, - (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_NETISR)); + (MACHINE_NETWORK_GROUP | MACHINE_NETWORK_NETISR)); /* * We create an affinity set so that the matching workloop * thread or the starter thread (for loopback) can be @@ -1279,7 +1392,7 @@ dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp) * Randomize to reduce the probability * of affinity tag namespace collision. */ - read_frandom(&tag, sizeof (tag)); + read_frandom(&tag, sizeof(tag)); if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) { thread_reference(tp); inp->tag = tag; @@ -1296,7 +1409,7 @@ dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp) } OSAddAtomic(1, &cur_dlil_input_threads); - return (error); + return error; } #if TEST_INPUT_THREAD_TERMINATION @@ -1310,14 +1423,16 @@ sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS i = if_input_thread_termination_spin; err = sysctl_handle_int(oidp, &i, 0, req); - if (err != 0 || req->newptr == USER_ADDR_NULL) - return (err); + if (err != 0 || req->newptr == USER_ADDR_NULL) { + return err; + } - if (net_rxpoll == 0) - return (ENXIO); + if (net_rxpoll == 0) { + return ENXIO; + } if_input_thread_termination_spin = i; - return (err); + return err; } #endif /* TEST_INPUT_THREAD_TERMINATION */ @@ -1329,29 +1444,17 @@ dlil_clean_threading_info(struct dlil_threading_info *inp) inp->input_waiting = 0; inp->wtot = 0; - bzero(inp->input_name, sizeof (inp->input_name)); + bzero(inp->input_name, sizeof(inp->input_name)); inp->ifp = NULL; VERIFY(qhead(&inp->rcvq_pkts) == NULL && qempty(&inp->rcvq_pkts)); qlimit(&inp->rcvq_pkts) = 0; - bzero(&inp->stats, sizeof (inp->stats)); + bzero(&inp->stats, sizeof(inp->stats)); VERIFY(!inp->net_affinity); inp->input_thr = THREAD_NULL; VERIFY(inp->wloop_thr == THREAD_NULL); VERIFY(inp->poll_thr == THREAD_NULL); VERIFY(inp->tag == 0); - - inp->mode = IFNET_MODEL_INPUT_POLL_OFF; - bzero(&inp->tstats, sizeof (inp->tstats)); - bzero(&inp->pstats, sizeof (inp->pstats)); - bzero(&inp->sstats, sizeof (inp->sstats)); - - net_timerclear(&inp->mode_holdtime); - net_timerclear(&inp->mode_lasttime); - net_timerclear(&inp->sample_holdtime); - net_timerclear(&inp->sample_lasttime); - net_timerclear(&inp->dbg_lasttime); - #if IFNET_INPUT_SANITY_CHK inp->input_mbuf_cnt = 0; #endif /* IFNET_INPUT_SANITY_CHK */ @@ -1361,6 +1464,7 @@ static void dlil_terminate_input_thread(struct dlil_threading_info *inp) { struct ifnet *ifp = inp->ifp; + classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt); VERIFY(current_thread() == inp->input_thr); VERIFY(inp != dlil_main_input_thread); @@ -1369,27 +1473,33 @@ dlil_terminate_input_thread(struct dlil_threading_info *inp) #if TEST_INPUT_THREAD_TERMINATION { /* do something useless that won't get optimized away */ - uint32_t v = 1; + uint32_t v = 1; for (uint32_t i = 0; - i < if_input_thread_termination_spin; - i++) { + i < if_input_thread_termination_spin; + i++) { v = (i + 1) * v; } - printf("the value is %d\n", v); + DLIL_PRINTF("the value is %d\n", v); } #endif /* TEST_INPUT_THREAD_TERMINATION */ lck_mtx_lock_spin(&inp->input_lck); + _getq_all(&inp->rcvq_pkts, &pkt, NULL, NULL, NULL); VERIFY((inp->input_waiting & DLIL_INPUT_TERMINATE) != 0); inp->input_waiting |= DLIL_INPUT_TERMINATE_COMPLETE; wakeup_one((caddr_t)&inp->input_waiting); lck_mtx_unlock(&inp->input_lck); + /* free up pending packets */ + if (pkt.cp_mbuf != NULL) { + mbuf_freem_list(pkt.cp_mbuf); + } + /* for the extra refcnt from kernel_thread_start() */ thread_deallocate(current_thread()); if (dlil_verbose) { - printf("%s: input thread terminated\n", + DLIL_PRINTF("%s: input thread terminated\n", if_name(ifp)); } @@ -1403,10 +1513,10 @@ dlil_affinity_set(struct thread *tp, u_int32_t tag) { thread_affinity_policy_data_t policy; - bzero(&policy, sizeof (policy)); + bzero(&policy, sizeof(policy)); policy.affinity_tag = tag; - return (thread_policy_set(tp, THREAD_AFFINITY_POLICY, - (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT)); + return thread_policy_set(tp, THREAD_AFFINITY_POLICY, + (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT); } void @@ -1513,6 +1623,9 @@ dlil_init(void) _CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE); _CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND); _CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR); + _CASSERT(IFRTYPE_FAMILY_6LOWPAN == IFNET_FAMILY_6LOWPAN); + _CASSERT(IFRTYPE_FAMILY_UTUN == IFNET_FAMILY_UTUN); + _CASSERT(IFRTYPE_FAMILY_IPSEC == IFNET_FAMILY_IPSEC); _CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY); _CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB); @@ -1521,24 +1634,27 @@ dlil_init(void) _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT); _CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED); _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC); + _CASSERT(IFRTYPE_SUBFAMILY_QUICKRELAY == IFNET_SUBFAMILY_QUICKRELAY); + _CASSERT(IFRTYPE_SUBFAMILY_DEFAULT == IFNET_SUBFAMILY_DEFAULT); _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN); _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN); PE_parse_boot_argn("net_affinity", &net_affinity, - sizeof (net_affinity)); + sizeof(net_affinity)); - PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof (net_rxpoll)); + PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof(net_rxpoll)); - PE_parse_boot_argn("net_rtref", &net_rtref, sizeof (net_rtref)); + PE_parse_boot_argn("net_rtref", &net_rtref, sizeof(net_rtref)); - PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof (ifnet_debug)); + PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof(ifnet_debug)); - dlif_size = (ifnet_debug == 0) ? sizeof (struct dlil_ifnet) : - sizeof (struct dlil_ifnet_dbg); + VERIFY(dlil_pending_thread_cnt == 0); + dlif_size = (ifnet_debug == 0) ? sizeof(struct dlil_ifnet) : + sizeof(struct dlil_ifnet_dbg); /* Enforce 64-bit alignment for dlil_ifnet structure */ - dlif_bufsize = dlif_size + sizeof (void *) + sizeof (u_int64_t); - dlif_bufsize = P2ROUNDUP(dlif_bufsize, sizeof (u_int64_t)); + dlif_bufsize = dlif_size + sizeof(void *) + sizeof(u_int64_t); + dlif_bufsize = P2ROUNDUP(dlif_bufsize, sizeof(u_int64_t)); dlif_zone = zinit(dlif_bufsize, DLIF_ZONE_MAX * dlif_bufsize, 0, DLIF_ZONE_NAME); if (dlif_zone == NULL) { @@ -1549,7 +1665,7 @@ dlil_init(void) zone_change(dlif_zone, Z_EXPAND, TRUE); zone_change(dlif_zone, Z_CALLERACCT, FALSE); - dlif_filt_size = sizeof (struct ifnet_filter); + dlif_filt_size = sizeof(struct ifnet_filter); dlif_filt_zone = zinit(dlif_filt_size, DLIF_FILT_ZONE_MAX * dlif_filt_size, 0, DLIF_FILT_ZONE_NAME); if (dlif_filt_zone == NULL) { @@ -1560,7 +1676,7 @@ dlil_init(void) zone_change(dlif_filt_zone, Z_EXPAND, TRUE); zone_change(dlif_filt_zone, Z_CALLERACCT, FALSE); - dlif_phash_size = sizeof (struct proto_hash_entry) * PROTO_HASH_SLOTS; + dlif_phash_size = sizeof(struct proto_hash_entry) * PROTO_HASH_SLOTS; dlif_phash_zone = zinit(dlif_phash_size, DLIF_PHASH_ZONE_MAX * dlif_phash_size, 0, DLIF_PHASH_ZONE_NAME); if (dlif_phash_zone == NULL) { @@ -1571,7 +1687,7 @@ dlil_init(void) zone_change(dlif_phash_zone, Z_EXPAND, TRUE); zone_change(dlif_phash_zone, Z_CALLERACCT, FALSE); - dlif_proto_size = sizeof (struct if_proto); + dlif_proto_size = sizeof(struct if_proto); dlif_proto_zone = zinit(dlif_proto_size, DLIF_PROTO_ZONE_MAX * dlif_proto_size, 0, DLIF_PROTO_ZONE_NAME); if (dlif_proto_zone == NULL) { @@ -1582,12 +1698,12 @@ dlil_init(void) zone_change(dlif_proto_zone, Z_EXPAND, TRUE); zone_change(dlif_proto_zone, Z_CALLERACCT, FALSE); - dlif_tcpstat_size = sizeof (struct tcpstat_local); + dlif_tcpstat_size = sizeof(struct tcpstat_local); /* Enforce 64-bit alignment for tcpstat_local structure */ dlif_tcpstat_bufsize = - dlif_tcpstat_size + sizeof (void *) + sizeof (u_int64_t); + dlif_tcpstat_size + sizeof(void *) + sizeof(u_int64_t); dlif_tcpstat_bufsize = - P2ROUNDUP(dlif_tcpstat_bufsize, sizeof (u_int64_t)); + P2ROUNDUP(dlif_tcpstat_bufsize, sizeof(u_int64_t)); dlif_tcpstat_zone = zinit(dlif_tcpstat_bufsize, DLIF_TCPSTAT_ZONE_MAX * dlif_tcpstat_bufsize, 0, DLIF_TCPSTAT_ZONE_NAME); @@ -1599,12 +1715,12 @@ dlil_init(void) zone_change(dlif_tcpstat_zone, Z_EXPAND, TRUE); zone_change(dlif_tcpstat_zone, Z_CALLERACCT, FALSE); - dlif_udpstat_size = sizeof (struct udpstat_local); + dlif_udpstat_size = sizeof(struct udpstat_local); /* Enforce 64-bit alignment for udpstat_local structure */ dlif_udpstat_bufsize = - dlif_udpstat_size + sizeof (void *) + sizeof (u_int64_t); + dlif_udpstat_size + sizeof(void *) + sizeof(u_int64_t); dlif_udpstat_bufsize = - P2ROUNDUP(dlif_udpstat_bufsize, sizeof (u_int64_t)); + P2ROUNDUP(dlif_udpstat_bufsize, sizeof(u_int64_t)); dlif_udpstat_zone = zinit(dlif_udpstat_bufsize, DLIF_TCPSTAT_ZONE_MAX * dlif_udpstat_bufsize, 0, DLIF_UDPSTAT_ZONE_NAME); @@ -1646,11 +1762,12 @@ dlil_init(void) lck_rw_init(&ifnet_head_lock, ifnet_head_lock_group, dlil_lck_attributes); lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes); + lck_mtx_init(&dlil_thread_sync_lock, dlil_lock_group, dlil_lck_attributes); /* Setup interface flow control related items */ lck_mtx_init(&ifnet_fc_lock, dlil_lock_group, dlil_lck_attributes); - ifnet_fc_zone_size = sizeof (struct ifnet_fc_entry); + ifnet_fc_zone_size = sizeof(struct ifnet_fc_entry); ifnet_fc_zone = zinit(ifnet_fc_zone_size, IFNET_FC_ZONE_MAX * ifnet_fc_zone_size, 0, IFNET_FC_ZONE_NAME); if (ifnet_fc_zone == NULL) { @@ -1684,6 +1801,12 @@ dlil_init(void) /* Initialize the service class to dscp map */ net_qos_map_init(); + /* Initialize the interface port list */ + if_ports_used_init(); + + /* Initialize the interface low power mode event handler */ + if_low_power_evhdlr_init(); + #if DEBUG || DEVELOPMENT /* Run self-tests */ dlil_verify_sum16(); @@ -1696,8 +1819,17 @@ dlil_init(void) * Create and start up the main DLIL input thread and the interface * detacher threads once everything is initialized. */ + dlil_incr_pending_thread_count(); dlil_create_input_thread(NULL, dlil_main_input_thread); + /* + * Create ifnet detacher thread. + * When an interface gets detached, part of the detach processing + * is delayed. The interface is added to delayed detach list + * and this thread is woken up to call ifnet_detach_final + * on these interfaces. + */ + dlil_incr_pending_thread_count(); if (kernel_thread_start(ifnet_detacher_thread_func, NULL, &thread) != KERN_SUCCESS) { panic_plain("%s: couldn't create detacher thread", __func__); @@ -1705,6 +1837,21 @@ dlil_init(void) } thread_deallocate(thread); + /* + * Wait for the created kernel threads for dlil to get + * scheduled and run at least once before we proceed + */ + lck_mtx_lock(&dlil_thread_sync_lock); + while (dlil_pending_thread_cnt != 0) { + DLIL_PRINTF("%s: Waiting for all the create dlil kernel threads " + "to get scheduled at least once.\n", __func__); + (void) msleep(&dlil_pending_thread_cnt, &dlil_thread_sync_lock, (PZERO - 1), + __func__, NULL); + LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_ASSERT_OWNED); + } + lck_mtx_unlock(&dlil_thread_sync_lock); + DLIL_PRINTF("%s: All the created dlil kernel threads have been scheduled " + "at least once. Proceeding.\n", __func__); } static void @@ -1750,7 +1897,7 @@ if_flt_monitor_leave(struct ifnet *ifp) } __private_extern__ int -dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter, +dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter, interface_filter_t *filter_ref, u_int32_t flags) { int retval = 0; @@ -1803,8 +1950,7 @@ dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter, * know it shouldn't do TSO on this connection */ if ((filter->filt_flags & DLIL_IFF_TSO) == 0) { - OSAddAtomic(1, &dlil_filter_disable_tso_count); - routegenid_update(); + ifnet_filter_update_tso(TRUE); } OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_count); INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_total); @@ -1812,7 +1958,7 @@ dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter, INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_os_total); } if (dlil_verbose) { - printf("%s: %s filter attached\n", if_name(ifp), + DLIL_PRINTF("%s: %s filter attached\n", if_name(ifp), if_filter->iff_name); } done: @@ -1821,14 +1967,15 @@ done: DLIL_PRINTF("%s: failed to attach %s (err=%d)\n", if_name(ifp), if_filter->iff_name, retval); } - if (retval != 0 && filter != NULL) + if (retval != 0 && filter != NULL) { zfree(dlif_filt_zone, filter); + } - return (retval); + return retval; } static int -dlil_detach_filter_internal(interface_filter_t filter, int detached) +dlil_detach_filter_internal(interface_filter_t filter, int detached) { int retval = 0; @@ -1841,8 +1988,9 @@ dlil_detach_filter_internal(interface_filter_t filter, int detached) lck_mtx_lock(&ifp->if_flt_lock); TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) { - if (entry != filter || entry->filt_skip) + if (entry != filter || entry->filt_skip) { continue; + } /* * We've found a match; since it's possible * that the thread gets blocked in the monitor, @@ -1850,7 +1998,7 @@ dlil_detach_filter_internal(interface_filter_t filter, int detached) * not be detached since we still have a use * count held during filter attach. */ - entry->filt_skip = 1; /* skip input/output */ + entry->filt_skip = 1; /* skip input/output */ lck_mtx_unlock(&ifp->if_flt_lock); ifnet_head_done(); @@ -1866,7 +2014,7 @@ dlil_detach_filter_internal(interface_filter_t filter, int detached) if_flt_monitor_leave(ifp); lck_mtx_unlock(&ifp->if_flt_lock); if (dlil_verbose) { - printf("%s: %s filter detached\n", + DLIL_PRINTF("%s: %s filter detached\n", if_name(ifp), filter->filt_name); } goto destroy; @@ -1880,22 +2028,23 @@ dlil_detach_filter_internal(interface_filter_t filter, int detached) goto done; } - if (dlil_verbose) - printf("%s filter detached\n", filter->filt_name); + if (dlil_verbose) { + DLIL_PRINTF("%s filter detached\n", filter->filt_name); + } destroy: /* Call the detached function if there is one */ - if (filter->filt_detached) + if (filter->filt_detached) { filter->filt_detached(filter->filt_cookie, filter->filt_ifp); + } /* * Decrease filter count and route_generation ID to let TCP * know it should reevalute doing TSO or not */ if ((filter->filt_flags & DLIL_IFF_TSO) == 0) { - OSAddAtomic(-1, &dlil_filter_disable_tso_count); - routegenid_update(); + ifnet_filter_update_tso(FALSE); } VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_count) > 0); @@ -1909,17 +2058,39 @@ done: filter->filt_name, retval); } - return (retval); + return retval; } __private_extern__ void dlil_detach_filter(interface_filter_t filter) { - if (filter == NULL) + if (filter == NULL) { return; + } dlil_detach_filter_internal(filter, 0); } +__attribute__((noreturn)) +static void +dlil_main_input_thread_func(void *v, wait_result_t w) +{ +#pragma unused(w) + struct dlil_threading_info *inp = v; + + VERIFY(inp == dlil_main_input_thread); + VERIFY(inp->ifp == NULL); + VERIFY(current_thread() == inp->input_thr); + + dlil_decr_pending_thread_count(); + lck_mtx_lock(&inp->input_lck); + VERIFY(!(inp->input_waiting & DLIL_INPUT_RUNNING)); + (void) assert_wait(&inp->input_waiting, THREAD_UNINT); + lck_mtx_unlock(&inp->input_lck); + (void) thread_block_parameter(dlil_main_input_thread_cont, inp); + /* NOTREACHED */ + __builtin_unreachable(); +} + /* * Main input thread: * @@ -1932,46 +2103,38 @@ dlil_detach_filter(interface_filter_t filter) */ __attribute__((noreturn)) static void -dlil_main_input_thread_func(void *v, wait_result_t w) +dlil_main_input_thread_cont(void *v, wait_result_t wres) { -#pragma unused(w) struct dlil_main_threading_info *inpm = v; struct dlil_threading_info *inp = v; - VERIFY(inp == dlil_main_input_thread); - VERIFY(inp->ifp == NULL); - VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF); + /* main input thread is uninterruptible */ + VERIFY(wres != THREAD_INTERRUPTED); + lck_mtx_lock_spin(&inp->input_lck); + VERIFY(!(inp->input_waiting & (DLIL_INPUT_TERMINATE | + DLIL_INPUT_RUNNING))); + inp->input_waiting |= DLIL_INPUT_RUNNING; while (1) { struct mbuf *m = NULL, *m_loop = NULL; u_int32_t m_cnt, m_cnt_loop; + classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt); boolean_t proto_req; - lck_mtx_lock_spin(&inp->input_lck); - - /* Wait until there is work to be done */ - while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) { - inp->input_waiting &= ~DLIL_INPUT_RUNNING; - (void) msleep(&inp->input_waiting, &inp->input_lck, - (PZERO - 1) | PSPIN, inp->input_name, NULL); - } - - inp->input_waiting |= DLIL_INPUT_RUNNING; inp->input_waiting &= ~DLIL_INPUT_WAITING; - /* Main input thread cannot be terminated */ - VERIFY(!(inp->input_waiting & DLIL_INPUT_TERMINATE)); - proto_req = (inp->input_waiting & (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)); /* Packets for non-dedicated interfaces other than lo0 */ m_cnt = qlen(&inp->rcvq_pkts); - m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL); + _getq_all(&inp->rcvq_pkts, &pkt, NULL, NULL, NULL); + m = pkt.cp_mbuf; /* Packets exclusive to lo0 */ m_cnt_loop = qlen(&inpm->lo_rcvq_pkts); - m_loop = _getq_all(&inpm->lo_rcvq_pkts, NULL, NULL, NULL); + _getq_all(&inpm->lo_rcvq_pkts, &pkt, NULL, NULL, NULL); + m_loop = pkt.cp_mbuf; inp->wtot = 0; @@ -1982,25 +2145,43 @@ dlil_main_input_thread_func(void *v, wait_result_t w) * We should think about putting some thread starvation * safeguards if we deal with long chains of packets. */ - if (m_loop != NULL) + if (m_loop != NULL) { dlil_input_packet_list_extended(lo_ifp, m_loop, - m_cnt_loop, inp->mode); + m_cnt_loop, IFNET_MODEL_INPUT_POLL_OFF); + } - if (m != NULL) + if (m != NULL) { dlil_input_packet_list_extended(NULL, m, - m_cnt, inp->mode); + m_cnt, IFNET_MODEL_INPUT_POLL_OFF); + } - if (proto_req) + if (proto_req) { proto_input_run(); + } + + lck_mtx_lock_spin(&inp->input_lck); + VERIFY(inp->input_waiting & DLIL_INPUT_RUNNING); + /* main input thread cannot be terminated */ + VERIFY(!(inp->input_waiting & DLIL_INPUT_TERMINATE)); + if (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) { + break; + } } + inp->input_waiting &= ~DLIL_INPUT_RUNNING; + (void) assert_wait(&inp->input_waiting, THREAD_UNINT); + lck_mtx_unlock(&inp->input_lck); + (void) thread_block_parameter(dlil_main_input_thread_cont, inp); + + VERIFY(0); /* we should never get here */ /* NOTREACHED */ - VERIFY(0); /* we should never get here */ + __builtin_unreachable(); } /* * Input thread for interfaces with legacy input model. */ +__attribute__((noreturn)) static void dlil_input_thread_func(void *v, wait_result_t w) { @@ -2009,30 +2190,52 @@ dlil_input_thread_func(void *v, wait_result_t w) struct dlil_threading_info *inp = v; struct ifnet *ifp = inp->ifp; - /* Construct the name for this thread, and then apply it. */ + VERIFY(inp != dlil_main_input_thread); + VERIFY(ifp != NULL); + VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll || + !(ifp->if_xflags & IFXF_LEGACY)); + VERIFY(ifp->if_poll_mode == IFNET_MODEL_INPUT_POLL_OFF || + !(ifp->if_xflags & IFXF_LEGACY)); + VERIFY(current_thread() == inp->input_thr); + + /* construct the name for this thread, and then apply it */ bzero(thread_name, sizeof(thread_name)); - snprintf(thread_name, sizeof(thread_name), "dlil_input_%s", ifp->if_xname); + (void) snprintf(thread_name, sizeof(thread_name), + "dlil_input_%s", ifp->if_xname); thread_set_thread_name(inp->input_thr, thread_name); + ifnet_decr_pending_thread_count(ifp); - VERIFY(inp != dlil_main_input_thread); - VERIFY(ifp != NULL); - VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll); - VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF); + lck_mtx_lock(&inp->input_lck); + VERIFY(!(inp->input_waiting & DLIL_INPUT_RUNNING)); + (void) assert_wait(&inp->input_waiting, THREAD_UNINT); + lck_mtx_unlock(&inp->input_lck); + (void) thread_block_parameter(dlil_input_thread_cont, inp); + /* NOTREACHED */ + __builtin_unreachable(); +} + +__attribute__((noreturn)) +static void +dlil_input_thread_cont(void *v, wait_result_t wres) +{ + struct dlil_threading_info *inp = v; + struct ifnet *ifp = inp->ifp; + + lck_mtx_lock_spin(&inp->input_lck); + if (__improbable(wres == THREAD_INTERRUPTED || + (inp->input_waiting & DLIL_INPUT_TERMINATE))) { + goto terminate; + } + + VERIFY(!(inp->input_waiting & DLIL_INPUT_RUNNING)); + inp->input_waiting |= DLIL_INPUT_RUNNING; while (1) { struct mbuf *m = NULL; + classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt); + boolean_t notify = FALSE; u_int32_t m_cnt; - lck_mtx_lock_spin(&inp->input_lck); - - /* Wait until there is work to be done */ - while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) { - inp->input_waiting &= ~DLIL_INPUT_RUNNING; - (void) msleep(&inp->input_waiting, &inp->input_lck, - (PZERO - 1) | PSPIN, inp->input_name, NULL); - } - - inp->input_waiting |= DLIL_INPUT_RUNNING; inp->input_waiting &= ~DLIL_INPUT_WAITING; /* @@ -2043,69 +2246,124 @@ dlil_input_thread_func(void *v, wait_result_t w) * (and the benefits might not worth the trouble.) */ VERIFY(!(inp->input_waiting & - (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER))); + (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER))); /* Packets for this interface */ m_cnt = qlen(&inp->rcvq_pkts); - m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL); - - if (inp->input_waiting & DLIL_INPUT_TERMINATE) { - lck_mtx_unlock(&inp->input_lck); - - /* Free up pending packets */ - if (m != NULL) - mbuf_freem_list(m); - - dlil_terminate_input_thread(inp); - /* NOTREACHED */ - return; - } + _getq_all(&inp->rcvq_pkts, &pkt, NULL, NULL, NULL); + m = pkt.cp_mbuf; inp->wtot = 0; - dlil_input_stats_sync(ifp, inp); + notify = dlil_input_stats_sync(ifp, inp); lck_mtx_unlock(&inp->input_lck); + if (notify) { + ifnet_notify_data_threshold(ifp); + } + /* * NOTE warning %%% attention !!!! * We should think about putting some thread starvation * safeguards if we deal with long chains of packets. */ - if (m != NULL) + if (m != NULL) { dlil_input_packet_list_extended(NULL, m, - m_cnt, inp->mode); + m_cnt, ifp->if_poll_mode); + } + + lck_mtx_lock_spin(&inp->input_lck); + VERIFY(inp->input_waiting & DLIL_INPUT_RUNNING); + if (!(inp->input_waiting & ~(DLIL_INPUT_RUNNING | + DLIL_INPUT_TERMINATE))) { + break; + } + } + + inp->input_waiting &= ~DLIL_INPUT_RUNNING; + + if (__improbable(inp->input_waiting & DLIL_INPUT_TERMINATE)) { +terminate: + lck_mtx_unlock(&inp->input_lck); + dlil_terminate_input_thread(inp); + /* NOTREACHED */ + } else { + (void) assert_wait(&inp->input_waiting, THREAD_UNINT); + lck_mtx_unlock(&inp->input_lck); + (void) thread_block_parameter(dlil_input_thread_cont, inp); + /* NOTREACHED */ } + VERIFY(0); /* we should never get here */ /* NOTREACHED */ - VERIFY(0); /* we should never get here */ + __builtin_unreachable(); } /* * Input thread for interfaces with opportunistic polling input model. */ +__attribute__((noreturn)) static void dlil_rxpoll_input_thread_func(void *v, wait_result_t w) { #pragma unused(w) + char thread_name[MAXTHREADNAMESIZE]; struct dlil_threading_info *inp = v; struct ifnet *ifp = inp->ifp; - struct timespec ts; VERIFY(inp != dlil_main_input_thread); - VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL)); + VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL) && + (ifp->if_xflags & IFXF_LEGACY)); + VERIFY(current_thread() == inp->input_thr); + + /* construct the name for this thread, and then apply it */ + bzero(thread_name, sizeof(thread_name)); + (void) snprintf(thread_name, sizeof(thread_name), + "dlil_input_poll_%s", ifp->if_xname); + thread_set_thread_name(inp->input_thr, thread_name); + ifnet_decr_pending_thread_count(ifp); + + lck_mtx_lock(&inp->input_lck); + VERIFY(!(inp->input_waiting & DLIL_INPUT_RUNNING)); + (void) assert_wait(&inp->input_waiting, THREAD_UNINT); + lck_mtx_unlock(&inp->input_lck); + (void) thread_block_parameter(dlil_rxpoll_input_thread_cont, inp); + /* NOTREACHED */ + __builtin_unreachable(); +} + +__attribute__((noreturn)) +static void +dlil_rxpoll_input_thread_cont(void *v, wait_result_t wres) +{ + struct dlil_threading_info *inp = v; + struct ifnet *ifp = inp->ifp; + struct timespec ts; + + lck_mtx_lock_spin(&inp->input_lck); + if (__improbable(wres == THREAD_INTERRUPTED || + (inp->input_waiting & DLIL_INPUT_TERMINATE))) { + goto terminate; + } + + VERIFY(!(inp->input_waiting & DLIL_INPUT_RUNNING)); + inp->input_waiting |= DLIL_INPUT_RUNNING; while (1) { struct mbuf *m = NULL; u_int32_t m_cnt, m_size, poll_req = 0; ifnet_model_t mode; struct timespec now, delta; + classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt); + boolean_t notify; u_int64_t ival; - lck_mtx_lock_spin(&inp->input_lck); + inp->input_waiting &= ~DLIL_INPUT_WAITING; - if ((ival = inp->rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN) + if ((ival = ifp->if_rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN) { ival = IF_RXPOLL_INTERVALTIME_MIN; + } /* Link parameters changed? */ if (ifp->if_poll_update != 0) { @@ -2114,17 +2372,7 @@ dlil_rxpoll_input_thread_func(void *v, wait_result_t w) } /* Current operating mode */ - mode = inp->mode; - - /* Wait until there is work to be done */ - while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) { - inp->input_waiting &= ~DLIL_INPUT_RUNNING; - (void) msleep(&inp->input_waiting, &inp->input_lck, - (PZERO - 1) | PSPIN, inp->input_name, NULL); - } - - inp->input_waiting |= DLIL_INPUT_RUNNING; - inp->input_waiting &= ~DLIL_INPUT_WAITING; + mode = ifp->if_poll_mode; /* * Protocol registration and injection must always use @@ -2134,22 +2382,7 @@ dlil_rxpoll_input_thread_func(void *v, wait_result_t w) * (and the benefits might not worth the trouble.) */ VERIFY(!(inp->input_waiting & - (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER))); - - if (inp->input_waiting & DLIL_INPUT_TERMINATE) { - /* Free up pending packets */ - lck_mtx_convert_spin(&inp->input_lck); - _flushq(&inp->rcvq_pkts); - if (inp->input_mit_tcall != NULL) { - if (thread_call_isactive(inp->input_mit_tcall)) - thread_call_cancel(inp->input_mit_tcall); - } - lck_mtx_unlock(&inp->input_lck); - - dlil_terminate_input_thread(inp); - /* NOTREACHED */ - return; - } + (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER))); /* Total count of all packets */ m_cnt = qlen(&inp->rcvq_pkts); @@ -2158,107 +2391,121 @@ dlil_rxpoll_input_thread_func(void *v, wait_result_t w) m_size = qsize(&inp->rcvq_pkts); /* Packets for this interface */ - m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL); + _getq_all(&inp->rcvq_pkts, &pkt, NULL, NULL, NULL); + m = pkt.cp_mbuf; VERIFY(m != NULL || m_cnt == 0); nanouptime(&now); - if (!net_timerisset(&inp->sample_lasttime)) - *(&inp->sample_lasttime) = *(&now); + if (!net_timerisset(&ifp->if_poll_sample_lasttime)) { + *(&ifp->if_poll_sample_lasttime) = *(&now); + } - net_timersub(&now, &inp->sample_lasttime, &delta); - if (if_rxpoll && net_timerisset(&inp->sample_holdtime)) { + net_timersub(&now, &ifp->if_poll_sample_lasttime, &delta); + if (if_rxpoll && net_timerisset(&ifp->if_poll_sample_holdtime)) { u_int32_t ptot, btot; /* Accumulate statistics for current sampling */ - PKTCNTR_ADD(&inp->sstats, m_cnt, m_size); + PKTCNTR_ADD(&ifp->if_poll_sstats, m_cnt, m_size); - if (net_timercmp(&delta, &inp->sample_holdtime, <)) + if (net_timercmp(&delta, &ifp->if_poll_sample_holdtime, <)) { goto skip; + } - *(&inp->sample_lasttime) = *(&now); + *(&ifp->if_poll_sample_lasttime) = *(&now); /* Calculate min/max of inbound bytes */ - btot = (u_int32_t)inp->sstats.bytes; - if (inp->rxpoll_bmin == 0 || inp->rxpoll_bmin > btot) - inp->rxpoll_bmin = btot; - if (btot > inp->rxpoll_bmax) - inp->rxpoll_bmax = btot; + btot = (u_int32_t)ifp->if_poll_sstats.bytes; + if (ifp->if_rxpoll_bmin == 0 || ifp->if_rxpoll_bmin > btot) { + ifp->if_rxpoll_bmin = btot; + } + if (btot > ifp->if_rxpoll_bmax) { + ifp->if_rxpoll_bmax = btot; + } /* Calculate EWMA of inbound bytes */ - DLIL_EWMA(inp->rxpoll_bavg, btot, if_rxpoll_decay); + DLIL_EWMA(ifp->if_rxpoll_bavg, btot, if_rxpoll_decay); /* Calculate min/max of inbound packets */ - ptot = (u_int32_t)inp->sstats.packets; - if (inp->rxpoll_pmin == 0 || inp->rxpoll_pmin > ptot) - inp->rxpoll_pmin = ptot; - if (ptot > inp->rxpoll_pmax) - inp->rxpoll_pmax = ptot; + ptot = (u_int32_t)ifp->if_poll_sstats.packets; + if (ifp->if_rxpoll_pmin == 0 || ifp->if_rxpoll_pmin > ptot) { + ifp->if_rxpoll_pmin = ptot; + } + if (ptot > ifp->if_rxpoll_pmax) { + ifp->if_rxpoll_pmax = ptot; + } /* Calculate EWMA of inbound packets */ - DLIL_EWMA(inp->rxpoll_pavg, ptot, if_rxpoll_decay); + DLIL_EWMA(ifp->if_rxpoll_pavg, ptot, if_rxpoll_decay); /* Reset sampling statistics */ - PKTCNTR_CLEAR(&inp->sstats); + PKTCNTR_CLEAR(&ifp->if_poll_sstats); /* Calculate EWMA of wakeup requests */ - DLIL_EWMA(inp->rxpoll_wavg, inp->wtot, if_rxpoll_decay); + DLIL_EWMA(ifp->if_rxpoll_wavg, inp->wtot, if_rxpoll_decay); inp->wtot = 0; if (dlil_verbose) { - if (!net_timerisset(&inp->dbg_lasttime)) - *(&inp->dbg_lasttime) = *(&now); - net_timersub(&now, &inp->dbg_lasttime, &delta); + if (!net_timerisset(&ifp->if_poll_dbg_lasttime)) { + *(&ifp->if_poll_dbg_lasttime) = *(&now); + } + net_timersub(&now, &ifp->if_poll_dbg_lasttime, &delta); if (net_timercmp(&delta, &dlil_dbgrate, >=)) { - *(&inp->dbg_lasttime) = *(&now); - printf("%s: [%s] pkts avg %d max %d " + *(&ifp->if_poll_dbg_lasttime) = *(&now); + DLIL_PRINTF("%s: [%s] pkts avg %d max %d " "limits [%d/%d], wreq avg %d " "limits [%d/%d], bytes avg %d " "limits [%d/%d]\n", if_name(ifp), - (inp->mode == + (ifp->if_poll_mode == IFNET_MODEL_INPUT_POLL_ON) ? - "ON" : "OFF", inp->rxpoll_pavg, - inp->rxpoll_pmax, - inp->rxpoll_plowat, - inp->rxpoll_phiwat, - inp->rxpoll_wavg, - inp->rxpoll_wlowat, - inp->rxpoll_whiwat, - inp->rxpoll_bavg, - inp->rxpoll_blowat, - inp->rxpoll_bhiwat); + "ON" : "OFF", ifp->if_rxpoll_pavg, + ifp->if_rxpoll_pmax, + ifp->if_rxpoll_plowat, + ifp->if_rxpoll_phiwat, + ifp->if_rxpoll_wavg, + ifp->if_rxpoll_wlowat, + ifp->if_rxpoll_whiwat, + ifp->if_rxpoll_bavg, + ifp->if_rxpoll_blowat, + ifp->if_rxpoll_bhiwat); } } /* Perform mode transition, if necessary */ - if (!net_timerisset(&inp->mode_lasttime)) - *(&inp->mode_lasttime) = *(&now); + if (!net_timerisset(&ifp->if_poll_mode_lasttime)) { + *(&ifp->if_poll_mode_lasttime) = *(&now); + } - net_timersub(&now, &inp->mode_lasttime, &delta); - if (net_timercmp(&delta, &inp->mode_holdtime, <)) + net_timersub(&now, &ifp->if_poll_mode_lasttime, &delta); + if (net_timercmp(&delta, &ifp->if_poll_mode_holdtime, <)) { goto skip; + } - if (inp->rxpoll_pavg <= inp->rxpoll_plowat && - inp->rxpoll_bavg <= inp->rxpoll_blowat && - inp->mode != IFNET_MODEL_INPUT_POLL_OFF) { + if (ifp->if_rxpoll_pavg <= ifp->if_rxpoll_plowat && + ifp->if_rxpoll_bavg <= ifp->if_rxpoll_blowat && + ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_OFF) { mode = IFNET_MODEL_INPUT_POLL_OFF; - } else if (inp->rxpoll_pavg >= inp->rxpoll_phiwat && - (inp->rxpoll_bavg >= inp->rxpoll_bhiwat || - inp->rxpoll_wavg >= inp->rxpoll_whiwat) && - inp->mode != IFNET_MODEL_INPUT_POLL_ON) { + } else if (ifp->if_rxpoll_pavg >= ifp->if_rxpoll_phiwat && + (ifp->if_rxpoll_bavg >= ifp->if_rxpoll_bhiwat || + ifp->if_rxpoll_wavg >= ifp->if_rxpoll_whiwat) && + ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_ON) { mode = IFNET_MODEL_INPUT_POLL_ON; } - if (mode != inp->mode) { - inp->mode = mode; - *(&inp->mode_lasttime) = *(&now); + if (mode != ifp->if_poll_mode) { + ifp->if_poll_mode = mode; + *(&ifp->if_poll_mode_lasttime) = *(&now); poll_req++; } } skip: - dlil_input_stats_sync(ifp, inp); + notify = dlil_input_stats_sync(ifp, inp); lck_mtx_unlock(&inp->input_lck); + if (notify) { + ifnet_notify_data_threshold(ifp); + } + /* * If there's a mode change and interface is still attached, * perform a downcall to the driver for the new mode. Also @@ -2266,27 +2513,29 @@ skip: * being detached (will be release below.) */ if (poll_req != 0 && ifnet_is_attached(ifp, 1)) { - struct ifnet_model_params p = { mode, { 0 } }; + struct ifnet_model_params p = { + .model = mode, .reserved = { 0 } + }; errno_t err; if (dlil_verbose) { - printf("%s: polling is now %s, " + DLIL_PRINTF("%s: polling is now %s, " "pkts avg %d max %d limits [%d/%d], " "wreq avg %d limits [%d/%d], " "bytes avg %d limits [%d/%d]\n", if_name(ifp), (mode == IFNET_MODEL_INPUT_POLL_ON) ? - "ON" : "OFF", inp->rxpoll_pavg, - inp->rxpoll_pmax, inp->rxpoll_plowat, - inp->rxpoll_phiwat, inp->rxpoll_wavg, - inp->rxpoll_wlowat, inp->rxpoll_whiwat, - inp->rxpoll_bavg, inp->rxpoll_blowat, - inp->rxpoll_bhiwat); + "ON" : "OFF", ifp->if_rxpoll_pavg, + ifp->if_rxpoll_pmax, ifp->if_rxpoll_plowat, + ifp->if_rxpoll_phiwat, ifp->if_rxpoll_wavg, + ifp->if_rxpoll_wlowat, ifp->if_rxpoll_whiwat, + ifp->if_rxpoll_bavg, ifp->if_rxpoll_blowat, + ifp->if_rxpoll_bhiwat); } if ((err = ((*ifp->if_input_ctl)(ifp, - IFNET_CTL_SET_INPUT_MODEL, sizeof (p), &p))) != 0) { - printf("%s: error setting polling mode " + IFNET_CTL_SET_INPUT_MODEL, sizeof(p), &p))) != 0) { + DLIL_PRINTF("%s: error setting polling mode " "to %s (%d)\n", if_name(ifp), (mode == IFNET_MODEL_INPUT_POLL_ON) ? "ON" : "OFF", err); @@ -2295,18 +2544,20 @@ skip: switch (mode) { case IFNET_MODEL_INPUT_POLL_OFF: ifnet_set_poll_cycle(ifp, NULL); - inp->rxpoll_offreq++; - if (err != 0) - inp->rxpoll_offerr++; + ifp->if_rxpoll_offreq++; + if (err != 0) { + ifp->if_rxpoll_offerr++; + } break; case IFNET_MODEL_INPUT_POLL_ON: net_nsectimer(&ival, &ts); ifnet_set_poll_cycle(ifp, &ts); ifnet_poll(ifp); - inp->rxpoll_onreq++; - if (err != 0) - inp->rxpoll_onerr++; + ifp->if_rxpoll_onreq++; + if (err != 0) { + ifp->if_rxpoll_onerr++; + } break; default: @@ -2323,79 +2574,88 @@ skip: * We should think about putting some thread starvation * safeguards if we deal with long chains of packets. */ - if (m != NULL) + if (m != NULL) { dlil_input_packet_list_extended(NULL, m, m_cnt, mode); + } + + lck_mtx_lock_spin(&inp->input_lck); + VERIFY(inp->input_waiting & DLIL_INPUT_RUNNING); + if (!(inp->input_waiting & ~(DLIL_INPUT_RUNNING | + DLIL_INPUT_TERMINATE))) { + break; + } } - /* NOTREACHED */ - VERIFY(0); /* we should never get here */ -} + inp->input_waiting &= ~DLIL_INPUT_RUNNING; -/* - * Must be called on an attached ifnet (caller is expected to check.) - * Caller may pass NULL for poll parameters to indicate "auto-tuning." - */ -errno_t -dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p, - boolean_t locked) -{ - struct dlil_threading_info *inp; - u_int64_t sample_holdtime, inbw; + if (__improbable(inp->input_waiting & DLIL_INPUT_TERMINATE)) { +terminate: + lck_mtx_unlock(&inp->input_lck); + dlil_terminate_input_thread(inp); + /* NOTREACHED */ + } else { + (void) assert_wait(&inp->input_waiting, THREAD_UNINT); + lck_mtx_unlock(&inp->input_lck); + (void) thread_block_parameter(dlil_rxpoll_input_thread_cont, + inp); + /* NOTREACHED */ + } - VERIFY(ifp != NULL); - if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) - return (ENXIO); + VERIFY(0); /* we should never get here */ + /* NOTREACHED */ + __builtin_unreachable(); +} +errno_t +dlil_rxpoll_validate_params(struct ifnet_poll_params *p) +{ if (p != NULL) { if ((p->packets_lowat == 0 && p->packets_hiwat != 0) || - (p->packets_lowat != 0 && p->packets_hiwat == 0)) - return (EINVAL); - if (p->packets_lowat != 0 && /* hiwat must be non-zero */ - p->packets_lowat >= p->packets_hiwat) - return (EINVAL); + (p->packets_lowat != 0 && p->packets_hiwat == 0)) { + return EINVAL; + } + if (p->packets_lowat != 0 && /* hiwat must be non-zero */ + p->packets_lowat >= p->packets_hiwat) { + return EINVAL; + } if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) || - (p->bytes_lowat != 0 && p->bytes_hiwat == 0)) - return (EINVAL); - if (p->bytes_lowat != 0 && /* hiwat must be non-zero */ - p->bytes_lowat >= p->bytes_hiwat) - return (EINVAL); + (p->bytes_lowat != 0 && p->bytes_hiwat == 0)) { + return EINVAL; + } + if (p->bytes_lowat != 0 && /* hiwat must be non-zero */ + p->bytes_lowat >= p->bytes_hiwat) { + return EINVAL; + } if (p->interval_time != 0 && - p->interval_time < IF_RXPOLL_INTERVALTIME_MIN) + p->interval_time < IF_RXPOLL_INTERVALTIME_MIN) { p->interval_time = IF_RXPOLL_INTERVALTIME_MIN; + } } + return 0; +} - if (!locked) - lck_mtx_lock(&inp->input_lck); - - LCK_MTX_ASSERT(&inp->input_lck, LCK_MTX_ASSERT_OWNED); - - /* - * Normally, we'd reset the parameters to the auto-tuned values - * if the the input thread detects a change in link rate. If the - * driver provides its own parameters right after a link rate - * changes, but before the input thread gets to run, we want to - * make sure to keep the driver's values. Clearing if_poll_update - * will achieve that. - */ - if (p != NULL && !locked && ifp->if_poll_update != 0) - ifp->if_poll_update = 0; +void +dlil_rxpoll_update_params(struct ifnet *ifp, struct ifnet_poll_params *p) +{ + u_int64_t sample_holdtime, inbw; if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) { - sample_holdtime = 0; /* polling is disabled */ - inp->rxpoll_wlowat = inp->rxpoll_plowat = - inp->rxpoll_blowat = 0; - inp->rxpoll_whiwat = inp->rxpoll_phiwat = - inp->rxpoll_bhiwat = (u_int32_t)-1; - inp->rxpoll_plim = 0; - inp->rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN; + sample_holdtime = 0; /* polling is disabled */ + ifp->if_rxpoll_wlowat = ifp->if_rxpoll_plowat = + ifp->if_rxpoll_blowat = 0; + ifp->if_rxpoll_whiwat = ifp->if_rxpoll_phiwat = + ifp->if_rxpoll_bhiwat = (u_int32_t)-1; + ifp->if_rxpoll_plim = 0; + ifp->if_rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN; } else { u_int32_t plowat, phiwat, blowat, bhiwat, plim; u_int64_t ival; unsigned int n, i; for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) { - if (inbw < rxpoll_tbl[i].speed) + if (inbw < rxpoll_tbl[i].speed) { break; + } n = i; } /* auto-tune if caller didn't specify a value */ @@ -2417,33 +2677,72 @@ dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p, VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN); sample_holdtime = if_rxpoll_sample_holdtime; - inp->rxpoll_wlowat = if_rxpoll_wlowat; - inp->rxpoll_whiwat = if_rxpoll_whiwat; - inp->rxpoll_plowat = plowat; - inp->rxpoll_phiwat = phiwat; - inp->rxpoll_blowat = blowat; - inp->rxpoll_bhiwat = bhiwat; - inp->rxpoll_plim = plim; - inp->rxpoll_ival = ival; + ifp->if_rxpoll_wlowat = if_sysctl_rxpoll_wlowat; + ifp->if_rxpoll_whiwat = if_sysctl_rxpoll_whiwat; + ifp->if_rxpoll_plowat = plowat; + ifp->if_rxpoll_phiwat = phiwat; + ifp->if_rxpoll_blowat = blowat; + ifp->if_rxpoll_bhiwat = bhiwat; + ifp->if_rxpoll_plim = plim; + ifp->if_rxpoll_ival = ival; } - net_nsectimer(&if_rxpoll_mode_holdtime, &inp->mode_holdtime); - net_nsectimer(&sample_holdtime, &inp->sample_holdtime); + net_nsectimer(&if_rxpoll_mode_holdtime, &ifp->if_poll_mode_holdtime); + net_nsectimer(&sample_holdtime, &ifp->if_poll_sample_holdtime); if (dlil_verbose) { - printf("%s: speed %llu bps, sample per %llu nsec, " + DLIL_PRINTF("%s: speed %llu bps, sample per %llu nsec, " "poll interval %llu nsec, pkts per poll %u, " "pkt limits [%u/%u], wreq limits [%u/%u], " "bytes limits [%u/%u]\n", if_name(ifp), - inbw, sample_holdtime, inp->rxpoll_ival, inp->rxpoll_plim, - inp->rxpoll_plowat, inp->rxpoll_phiwat, inp->rxpoll_wlowat, - inp->rxpoll_whiwat, inp->rxpoll_blowat, inp->rxpoll_bhiwat); + inbw, sample_holdtime, ifp->if_rxpoll_ival, + ifp->if_rxpoll_plim, ifp->if_rxpoll_plowat, + ifp->if_rxpoll_phiwat, ifp->if_rxpoll_wlowat, + ifp->if_rxpoll_whiwat, ifp->if_rxpoll_blowat, + ifp->if_rxpoll_bhiwat); } +} - if (!locked) - lck_mtx_unlock(&inp->input_lck); +/* + * Must be called on an attached ifnet (caller is expected to check.) + * Caller may pass NULL for poll parameters to indicate "auto-tuning." + */ +errno_t +dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p, + boolean_t locked) +{ + errno_t err; + struct dlil_threading_info *inp; - return (0); + VERIFY(ifp != NULL); + if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) { + return ENXIO; + } + err = dlil_rxpoll_validate_params(p); + if (err != 0) { + return err; + } + + if (!locked) { + lck_mtx_lock(&inp->input_lck); + } + LCK_MTX_ASSERT(&inp->input_lck, LCK_MTX_ASSERT_OWNED); + /* + * Normally, we'd reset the parameters to the auto-tuned values + * if the the input thread detects a change in link rate. If the + * driver provides its own parameters right after a link rate + * changes, but before the input thread gets to run, we want to + * make sure to keep the driver's values. Clearing if_poll_update + * will achieve that. + */ + if (p != NULL && !locked && ifp->if_poll_update != 0) { + ifp->if_poll_update = 0; + } + dlil_rxpoll_update_params(ifp, p); + if (!locked) { + lck_mtx_unlock(&inp->input_lck); + } + return 0; } /* @@ -2455,35 +2754,44 @@ dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p) struct dlil_threading_info *inp; VERIFY(ifp != NULL && p != NULL); - if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) - return (ENXIO); + if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) { + return ENXIO; + } - bzero(p, sizeof (*p)); + bzero(p, sizeof(*p)); lck_mtx_lock(&inp->input_lck); - p->packets_limit = inp->rxpoll_plim; - p->packets_lowat = inp->rxpoll_plowat; - p->packets_hiwat = inp->rxpoll_phiwat; - p->bytes_lowat = inp->rxpoll_blowat; - p->bytes_hiwat = inp->rxpoll_bhiwat; - p->interval_time = inp->rxpoll_ival; + p->packets_limit = ifp->if_rxpoll_plim; + p->packets_lowat = ifp->if_rxpoll_plowat; + p->packets_hiwat = ifp->if_rxpoll_phiwat; + p->bytes_lowat = ifp->if_rxpoll_blowat; + p->bytes_hiwat = ifp->if_rxpoll_bhiwat; + p->interval_time = ifp->if_rxpoll_ival; lck_mtx_unlock(&inp->input_lck); - return (0); + return 0; } errno_t ifnet_input(struct ifnet *ifp, struct mbuf *m_head, const struct ifnet_stat_increment_param *s) { - return (ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE)); + return ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE); } errno_t ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail, const struct ifnet_stat_increment_param *s) { - return (ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE)); + return ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE); +} + +errno_t +ifnet_input_poll(struct ifnet *ifp, struct mbuf *m_head, + struct mbuf *m_tail, const struct ifnet_stat_increment_param *s) +{ + return ifnet_input_common(ifp, m_head, m_tail, s, + (m_head != NULL), TRUE); } static errno_t @@ -2497,9 +2805,10 @@ ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail, errno_t err = 0; if ((m_head == NULL && !poll) || (s == NULL && ext)) { - if (m_head != NULL) + if (m_head != NULL) { mbuf_freem_list(m_head); - return (EINVAL); + } + return EINVAL; } VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll)); @@ -2511,10 +2820,11 @@ ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail, * interface is no longer attached; else hold an IO refcnt to * prevent it from being detached (will be released below.) */ - if (ifp == NULL || (ifp != lo_ifp && !ifnet_is_attached(ifp, 1))) { - if (m_head != NULL) + if (ifp == NULL || (ifp != lo_ifp && !ifnet_datamov_begin(ifp))) { + if (m_head != NULL) { mbuf_freem_list(m_head); - return (EINVAL); + } + return EINVAL; } input_func = ifp->if_input_dlil; @@ -2524,13 +2834,15 @@ ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail, last = m_head; while (m_head != NULL) { #if IFNET_INPUT_SANITY_CHK - if (dlil_input_sanity_check != 0) + if (dlil_input_sanity_check != 0) { DLIL_INPUT_CHECK(last, ifp); + } #endif /* IFNET_INPUT_SANITY_CHK */ m_cnt++; m_size += m_length(last); - if (mbuf_nextpkt(last) == NULL) + if (mbuf_nextpkt(last) == NULL) { break; + } last = mbuf_nextpkt(last); } m_tail = last; @@ -2542,8 +2854,9 @@ ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail, DLIL_INPUT_CHECK(last, ifp); m_cnt++; m_size += m_length(last); - if (mbuf_nextpkt(last) == NULL) + if (mbuf_nextpkt(last) == NULL) { break; + } last = mbuf_nextpkt(last); } } else { @@ -2578,7 +2891,7 @@ ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail, } if (s == NULL) { - bzero(&_s, sizeof (_s)); + bzero(&_s, sizeof(_s)); s = &_s; } else { _s = *s; @@ -2590,17 +2903,17 @@ ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail, if (ifp != lo_ifp) { /* Release the IO refcnt */ - ifnet_decr_iorefcnt(ifp); + ifnet_datamov_end(ifp); } - return (err); + return err; } errno_t dlil_output_handler(struct ifnet *ifp, struct mbuf *m) { - return (ifp->if_output(ifp, m)); + return ifp->if_output(ifp, m); } errno_t @@ -2611,9 +2924,11 @@ dlil_input_handler(struct ifnet *ifp, struct mbuf *m_head, struct dlil_threading_info *inp; u_int32_t m_cnt = s->packets_in; u_int32_t m_size = s->bytes_in; + boolean_t notify = FALSE; - if ((inp = ifp->if_inp) == NULL) + if ((inp = ifp->if_inp) == NULL) { inp = dlil_main_input_thread; + } /* * If there is a matching DLIL input thread associated with an @@ -2657,13 +2972,16 @@ dlil_input_handler(struct ifnet *ifp, struct mbuf *m_head, * dedicated input threads go to the regular list. */ if (m_head != NULL) { + classq_pkt_t head, tail; + CLASSQ_PKT_INIT_MBUF(&head, m_head); + CLASSQ_PKT_INIT_MBUF(&tail, m_tail); if (inp == dlil_main_input_thread && ifp == lo_ifp) { struct dlil_main_threading_info *inpm = (struct dlil_main_threading_info *)inp; - _addq_multi(&inpm->lo_rcvq_pkts, m_head, m_tail, + _addq_multi(&inpm->lo_rcvq_pkts, &head, &tail, m_cnt, m_size); } else { - _addq_multi(&inp->rcvq_pkts, m_head, m_tail, + _addq_multi(&inp->rcvq_pkts, &head, &tail, m_cnt, m_size); } } @@ -2673,8 +2991,9 @@ dlil_input_handler(struct ifnet *ifp, struct mbuf *m_head, u_int32_t count; struct mbuf *m0; - for (m0 = m_head, count = 0; m0; m0 = mbuf_nextpkt(m0)) + for (m0 = m_head, count = 0; m0; m0 = mbuf_nextpkt(m0)) { count++; + } if (count != m_cnt) { panic_plain("%s: invalid packet count %d " @@ -2687,46 +3006,38 @@ dlil_input_handler(struct ifnet *ifp, struct mbuf *m_head, } #endif /* IFNET_INPUT_SANITY_CHK */ - dlil_input_stats_add(s, inp, poll); + dlil_input_stats_add(s, inp, ifp, poll); /* * If we're using the main input thread, synchronize the * stats now since we have the interface context. All * other cases involving dedicated input threads will * have their stats synchronized there. */ - if (inp == dlil_main_input_thread) - dlil_input_stats_sync(ifp, inp); - - if (qlen(&inp->rcvq_pkts) >= dlil_rcv_mit_pkts_min && - qlen(&inp->rcvq_pkts) < dlil_rcv_mit_pkts_max && - (ifp->if_family == IFNET_FAMILY_ETHERNET || - ifp->if_type == IFT_CELLULAR) - ) { - if (!thread_call_isactive(inp->input_mit_tcall)) { - uint64_t deadline; - clock_interval_to_deadline(dlil_rcv_mit_interval, - 1, &deadline); - (void) thread_call_enter_delayed( - inp->input_mit_tcall, deadline); - } - } else { - inp->input_waiting |= DLIL_INPUT_WAITING; - if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) { - inp->wtot++; - wakeup_one((caddr_t)&inp->input_waiting); - } + if (inp == dlil_main_input_thread) { + notify = dlil_input_stats_sync(ifp, inp); + } + + inp->input_waiting |= DLIL_INPUT_WAITING; + if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) { + inp->wtot++; + wakeup_one((caddr_t)&inp->input_waiting); } lck_mtx_unlock(&inp->input_lck); - return (0); + if (notify) { + ifnet_notify_data_threshold(ifp); + } + + return 0; } static void ifnet_start_common(struct ifnet *ifp, boolean_t resetfc) { - if (!(ifp->if_eflags & IFEF_TXSTART)) + if (!(ifp->if_eflags & IFEF_TXSTART)) { return; + } /* * If the starter thread is inactive, signal it to do work, * unless the interface is being flow controlled from below, @@ -2758,22 +3069,20 @@ ifnet_start(struct ifnet *ifp) ifnet_start_common(ifp, FALSE); } +__attribute__((noreturn)) static void -ifnet_start_thread_fn(void *v, wait_result_t w) +ifnet_start_thread_func(void *v, wait_result_t w) { #pragma unused(w) struct ifnet *ifp = v; - char ifname[IFNAMSIZ + 1]; char thread_name[MAXTHREADNAMESIZE]; - struct timespec *ts = NULL; - struct ifclassq *ifq = &ifp->if_snd; - struct timespec delay_start_ts; /* Construct the name for this thread, and then apply it. */ bzero(thread_name, sizeof(thread_name)); - (void) snprintf(thread_name, sizeof (thread_name), + (void) snprintf(thread_name, sizeof(thread_name), "ifnet_start_%s", ifp->if_xname); - thread_set_thread_name(ifp->if_start_thread, thread_name); + ASSERT(ifp->if_start_thread == current_thread()); + thread_set_thread_name(current_thread(), thread_name); /* * Treat the dedicated starter thread for lo0 as equivalent to @@ -2801,86 +3110,89 @@ ifnet_start_thread_fn(void *v, wait_result_t w) lck_mtx_unlock(&inp->input_lck); } } + ifnet_decr_pending_thread_count(ifp); - (void) snprintf(ifname, sizeof (ifname), "%s_starter", if_name(ifp)); + lck_mtx_lock(&ifp->if_start_lock); + VERIFY(!ifp->if_start_active); + (void) assert_wait(&ifp->if_start_thread, THREAD_UNINT); + lck_mtx_unlock(&ifp->if_start_lock); + (void) thread_block_parameter(ifnet_start_thread_cont, ifp); + /* NOTREACHED */ + __builtin_unreachable(); +} - lck_mtx_lock_spin(&ifp->if_start_lock); +__attribute__((noreturn)) +static void +ifnet_start_thread_cont(void *v, wait_result_t wres) +{ + struct ifnet *ifp = v; + struct ifclassq *ifq = &ifp->if_snd; - for (;;) { - if (ifp->if_start_thread != NULL) { - (void) msleep(&ifp->if_start_thread, - &ifp->if_start_lock, - (PZERO - 1) | PSPIN, ifname, ts); - } - /* interface is detached? */ - if (ifp->if_start_thread == THREAD_NULL) { - ifnet_set_start_cycle(ifp, NULL); - lck_mtx_unlock(&ifp->if_start_lock); - ifnet_purge(ifp); + lck_mtx_lock(&ifp->if_start_lock); + if (__improbable(wres == THREAD_INTERRUPTED || + ifp->if_start_thread == THREAD_NULL)) { + goto terminate; + } - if (dlil_verbose) { - printf("%s: starter thread terminated\n", - if_name(ifp)); - } + ifp->if_start_active = 1; - /* for the extra refcnt from kernel_thread_start() */ - thread_deallocate(current_thread()); - /* this is the end */ - thread_terminate(current_thread()); - /* NOTREACHED */ - return; + /* + * Keep on servicing until no more request. + */ + for (;;) { + u_int32_t req = ifp->if_start_req; + if (!IFCQ_IS_EMPTY(ifq) && + (ifp->if_eflags & IFEF_ENQUEUE_MULTI) && + ifp->if_start_delayed == 0 && + IFCQ_LEN(ifq) < ifp->if_start_delay_qlen && + (ifp->if_eflags & IFEF_DELAY_START)) { + ifp->if_start_delayed = 1; + ifnet_start_delayed++; + break; + } else { + ifp->if_start_delayed = 0; } + lck_mtx_unlock(&ifp->if_start_lock); - ifp->if_start_active = 1; + /* + * If no longer attached, don't call start because ifp + * is being destroyed; else hold an IO refcnt to + * prevent the interface from being detached (will be + * released below.) + */ + if (!ifnet_datamov_begin(ifp)) { + lck_mtx_lock_spin(&ifp->if_start_lock); + break; + } - for (;;) { - u_int32_t req = ifp->if_start_req; - if (!IFCQ_IS_EMPTY(ifq) && - (ifp->if_eflags & IFEF_ENQUEUE_MULTI) && - ifp->if_start_delayed == 0 && - IFCQ_LEN(ifq) < ifp->if_start_delay_qlen && - (ifp->if_eflags & IFEF_DELAY_START)) { - ifp->if_start_delayed = 1; - ifnet_start_delayed++; - break; - } else { - ifp->if_start_delayed = 0; - } - lck_mtx_unlock(&ifp->if_start_lock); + /* invoke the driver's start routine */ + ((*ifp->if_start)(ifp)); - /* - * If no longer attached, don't call start because ifp - * is being destroyed; else hold an IO refcnt to - * prevent the interface from being detached (will be - * released below.) - */ - if (!ifnet_is_attached(ifp, 1)) { - lck_mtx_lock_spin(&ifp->if_start_lock); - break; - } + /* + * Release the io ref count taken above. + */ + ifnet_datamov_end(ifp); - /* invoke the driver's start routine */ - ((*ifp->if_start)(ifp)); + lck_mtx_lock_spin(&ifp->if_start_lock); - /* - * Release the io ref count taken by ifnet_is_attached. - */ - ifnet_decr_iorefcnt(ifp); + /* + * If there's no pending request or if the + * interface has been disabled, we're done. + */ + if (req == ifp->if_start_req || + (ifp->if_start_flags & IFSF_FLOW_CONTROLLED)) { + break; + } + } - lck_mtx_lock_spin(&ifp->if_start_lock); + ifp->if_start_req = 0; + ifp->if_start_active = 0; - /* - * If there's no pending request or if the - * interface has been disabled, we're done. - */ - if (req == ifp->if_start_req || - (ifp->if_start_flags & IFSF_FLOW_CONTROLLED)) { - break; - } - } - ifp->if_start_req = 0; - ifp->if_start_active = 0; + if (__probable(ifp->if_start_thread != THREAD_NULL)) { + uint64_t deadline = TIMEOUT_WAIT_FOREVER; + struct timespec delay_start_ts; + struct timespec *ts; /* * Wakeup N ns from now if rate-controlled by TBR, and if @@ -2897,27 +3209,61 @@ ifnet_start_thread_fn(void *v, wait_result_t w) ts = &delay_start_ts; } - if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0) + if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0) { ts = NULL; + } + + if (__improbable(ts != NULL)) { + clock_interval_to_deadline((ts->tv_nsec + + (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline); + } + + (void) assert_wait_deadline(&ifp->if_start_thread, + THREAD_UNINT, deadline); + lck_mtx_unlock(&ifp->if_start_lock); + (void) thread_block_parameter(ifnet_start_thread_cont, ifp); + /* NOTREACHED */ + } else { +terminate: + /* interface is detached? */ + ifnet_set_start_cycle(ifp, NULL); + lck_mtx_unlock(&ifp->if_start_lock); + ifnet_purge(ifp); + + if (dlil_verbose) { + DLIL_PRINTF("%s: starter thread terminated\n", + if_name(ifp)); + } + + /* for the extra refcnt from kernel_thread_start() */ + thread_deallocate(current_thread()); + /* this is the end */ + thread_terminate(current_thread()); + /* NOTREACHED */ } + /* must never get here */ + VERIFY(0); /* NOTREACHED */ + __builtin_unreachable(); } void ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts) { - if (ts == NULL) - bzero(&ifp->if_start_cycle, sizeof (ifp->if_start_cycle)); - else + if (ts == NULL) { + bzero(&ifp->if_start_cycle, sizeof(ifp->if_start_cycle)); + } else { *(&ifp->if_start_cycle) = *ts; + } - if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) - printf("%s: restart interval set to %lu nsec\n", + if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) { + DLIL_PRINTF("%s: restart interval set to %lu nsec\n", if_name(ifp), ts->tv_nsec); + } } -static void +void ifnet_poll(struct ifnet *ifp) { /* @@ -2925,165 +3271,214 @@ ifnet_poll(struct ifnet *ifp) */ lck_mtx_lock_spin(&ifp->if_poll_lock); ifp->if_poll_req++; - if (!ifp->if_poll_active && ifp->if_poll_thread != THREAD_NULL) { + if (!(ifp->if_poll_flags & IF_POLLF_RUNNING) && + ifp->if_poll_thread != THREAD_NULL) { wakeup_one((caddr_t)&ifp->if_poll_thread); } lck_mtx_unlock(&ifp->if_poll_lock); } +__attribute__((noreturn)) static void -ifnet_poll_thread_fn(void *v, wait_result_t w) +ifnet_poll_thread_func(void *v, wait_result_t w) { #pragma unused(w) + char thread_name[MAXTHREADNAMESIZE]; + struct ifnet *ifp = v; + + VERIFY(ifp->if_eflags & IFEF_RXPOLL); + VERIFY(current_thread() == ifp->if_poll_thread); + + /* construct the name for this thread, and then apply it */ + bzero(thread_name, sizeof(thread_name)); + (void) snprintf(thread_name, sizeof(thread_name), + "ifnet_poller_%s", ifp->if_xname); + thread_set_thread_name(ifp->if_poll_thread, thread_name); + ifnet_decr_pending_thread_count(ifp); + + lck_mtx_lock(&ifp->if_poll_lock); + (void) assert_wait(&ifp->if_poll_thread, THREAD_UNINT); + lck_mtx_unlock(&ifp->if_poll_lock); + (void) thread_block_parameter(ifnet_poll_thread_cont, ifp); + /* NOTREACHED */ + __builtin_unreachable(); +} + +__attribute__((noreturn)) +static void +ifnet_poll_thread_cont(void *v, wait_result_t wres) +{ struct dlil_threading_info *inp; struct ifnet *ifp = v; - char ifname[IFNAMSIZ + 1]; - struct timespec *ts = NULL; struct ifnet_stat_increment_param s; + struct timespec start_time; - snprintf(ifname, sizeof (ifname), "%s_poller", - if_name(ifp)); - bzero(&s, sizeof (s)); + VERIFY(ifp->if_eflags & IFEF_RXPOLL); + + bzero(&s, sizeof(s)); + net_timerclear(&start_time); lck_mtx_lock_spin(&ifp->if_poll_lock); + if (__improbable(wres == THREAD_INTERRUPTED || + ifp->if_poll_thread == THREAD_NULL)) { + goto terminate; + } inp = ifp->if_inp; VERIFY(inp != NULL); + ifp->if_poll_flags |= IF_POLLF_RUNNING; + + /* + * Keep on servicing until no more request. + */ for (;;) { - if (ifp->if_poll_thread != THREAD_NULL) { - (void) msleep(&ifp->if_poll_thread, &ifp->if_poll_lock, - (PZERO - 1) | PSPIN, ifname, ts); - } + struct mbuf *m_head, *m_tail; + u_int32_t m_lim, m_cnt, m_totlen; + u_int16_t req = ifp->if_poll_req; - /* interface is detached (maybe while asleep)? */ - if (ifp->if_poll_thread == THREAD_NULL) { - ifnet_set_poll_cycle(ifp, NULL); - lck_mtx_unlock(&ifp->if_poll_lock); + m_lim = (ifp->if_rxpoll_plim != 0) ? ifp->if_rxpoll_plim : + MAX((qlimit(&inp->rcvq_pkts)), (ifp->if_rxpoll_phiwat << 2)); + lck_mtx_unlock(&ifp->if_poll_lock); - if (dlil_verbose) { - printf("%s: poller thread terminated\n", - if_name(ifp)); - } + /* + * If no longer attached, there's nothing to do; + * else hold an IO refcnt to prevent the interface + * from being detached (will be released below.) + */ + if (!ifnet_is_attached(ifp, 1)) { + lck_mtx_lock_spin(&ifp->if_poll_lock); + break; + } - /* for the extra refcnt from kernel_thread_start() */ - thread_deallocate(current_thread()); - /* this is the end */ - thread_terminate(current_thread()); - /* NOTREACHED */ - return; + if (dlil_verbose > 1) { + DLIL_PRINTF("%s: polling up to %d pkts, " + "pkts avg %d max %d, wreq avg %d, " + "bytes avg %d\n", + if_name(ifp), m_lim, + ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax, + ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg); } - ifp->if_poll_active = 1; - for (;;) { - struct mbuf *m_head, *m_tail; - u_int32_t m_lim, m_cnt, m_totlen; - u_int16_t req = ifp->if_poll_req; + /* invoke the driver's input poll routine */ + ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail, + &m_cnt, &m_totlen)); - lck_mtx_unlock(&ifp->if_poll_lock); + if (m_head != NULL) { + VERIFY(m_tail != NULL && m_cnt > 0); - /* - * If no longer attached, there's nothing to do; - * else hold an IO refcnt to prevent the interface - * from being detached (will be released below.) - */ - if (!ifnet_is_attached(ifp, 1)) { - lck_mtx_lock_spin(&ifp->if_poll_lock); - break; + if (dlil_verbose > 1) { + DLIL_PRINTF("%s: polled %d pkts, " + "pkts avg %d max %d, wreq avg %d, " + "bytes avg %d\n", + if_name(ifp), m_cnt, + ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax, + ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg); } - m_lim = (inp->rxpoll_plim != 0) ? inp->rxpoll_plim : - MAX((qlimit(&inp->rcvq_pkts)), - (inp->rxpoll_phiwat << 2)); + /* stats are required for extended variant */ + s.packets_in = m_cnt; + s.bytes_in = m_totlen; + (void) ifnet_input_common(ifp, m_head, m_tail, + &s, TRUE, TRUE); + } else { if (dlil_verbose > 1) { - printf("%s: polling up to %d pkts, " + DLIL_PRINTF("%s: no packets, " "pkts avg %d max %d, wreq avg %d, " "bytes avg %d\n", - if_name(ifp), m_lim, - inp->rxpoll_pavg, inp->rxpoll_pmax, - inp->rxpoll_wavg, inp->rxpoll_bavg); + if_name(ifp), ifp->if_rxpoll_pavg, + ifp->if_rxpoll_pmax, ifp->if_rxpoll_wavg, + ifp->if_rxpoll_bavg); } - /* invoke the driver's input poll routine */ - ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail, - &m_cnt, &m_totlen)); - - if (m_head != NULL) { - VERIFY(m_tail != NULL && m_cnt > 0); - - if (dlil_verbose > 1) { - printf("%s: polled %d pkts, " - "pkts avg %d max %d, wreq avg %d, " - "bytes avg %d\n", - if_name(ifp), m_cnt, - inp->rxpoll_pavg, inp->rxpoll_pmax, - inp->rxpoll_wavg, inp->rxpoll_bavg); - } - - /* stats are required for extended variant */ - s.packets_in = m_cnt; - s.bytes_in = m_totlen; + (void) ifnet_input_common(ifp, NULL, NULL, + NULL, FALSE, TRUE); + } - (void) ifnet_input_common(ifp, m_head, m_tail, - &s, TRUE, TRUE); - } else { - if (dlil_verbose > 1) { - printf("%s: no packets, " - "pkts avg %d max %d, wreq avg %d, " - "bytes avg %d\n", - if_name(ifp), inp->rxpoll_pavg, - inp->rxpoll_pmax, inp->rxpoll_wavg, - inp->rxpoll_bavg); - } + /* Release the io ref count */ + ifnet_decr_iorefcnt(ifp); - (void) ifnet_input_common(ifp, NULL, NULL, - NULL, FALSE, TRUE); - } + lck_mtx_lock_spin(&ifp->if_poll_lock); - /* Release the io ref count */ - ifnet_decr_iorefcnt(ifp); + /* if there's no pending request, we're done */ + if (req == ifp->if_poll_req || + ifp->if_poll_thread == THREAD_NULL) { + break; + } + } - lck_mtx_lock_spin(&ifp->if_poll_lock); + ifp->if_poll_req = 0; + ifp->if_poll_flags &= ~IF_POLLF_RUNNING; - /* if there's no pending request, we're done */ - if (req == ifp->if_poll_req) { - break; - } - } - ifp->if_poll_req = 0; - ifp->if_poll_active = 0; + if (ifp->if_poll_thread != THREAD_NULL) { + uint64_t deadline = TIMEOUT_WAIT_FOREVER; + struct timespec *ts; /* * Wakeup N ns from now, else sleep indefinitely (ts = NULL) * until ifnet_poll() is called again. */ ts = &ifp->if_poll_cycle; - if (ts->tv_sec == 0 && ts->tv_nsec == 0) + if (ts->tv_sec == 0 && ts->tv_nsec == 0) { ts = NULL; + } + + if (ts != NULL) { + clock_interval_to_deadline((ts->tv_nsec + + (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline); + } + + (void) assert_wait_deadline(&ifp->if_poll_thread, + THREAD_UNINT, deadline); + lck_mtx_unlock(&ifp->if_poll_lock); + (void) thread_block_parameter(ifnet_poll_thread_cont, ifp); + /* NOTREACHED */ + } else { +terminate: + /* interface is detached (maybe while asleep)? */ + ifnet_set_poll_cycle(ifp, NULL); + lck_mtx_unlock(&ifp->if_poll_lock); + + if (dlil_verbose) { + DLIL_PRINTF("%s: poller thread terminated\n", + if_name(ifp)); + } + + /* for the extra refcnt from kernel_thread_start() */ + thread_deallocate(current_thread()); + /* this is the end */ + thread_terminate(current_thread()); + /* NOTREACHED */ } + /* must never get here */ + VERIFY(0); /* NOTREACHED */ + __builtin_unreachable(); } void ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts) { - if (ts == NULL) - bzero(&ifp->if_poll_cycle, sizeof (ifp->if_poll_cycle)); - else + if (ts == NULL) { + bzero(&ifp->if_poll_cycle, sizeof(ifp->if_poll_cycle)); + } else { *(&ifp->if_poll_cycle) = *ts; + } - if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) - printf("%s: poll interval set to %lu nsec\n", + if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) { + DLIL_PRINTF("%s: poll interval set to %lu nsec\n", if_name(ifp), ts->tv_nsec); + } } void ifnet_purge(struct ifnet *ifp) { - if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART)) + if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART)) { if_qflush(ifp, 0); + } } void @@ -3091,12 +3486,15 @@ ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev) { IFCQ_LOCK_ASSERT_HELD(ifq); - if (!(IFCQ_IS_READY(ifq))) + if (!(IFCQ_IS_READY(ifq))) { return; + } if (IFCQ_TBR_IS_ENABLED(ifq)) { - struct tb_profile tb = { ifq->ifcq_tbr.tbr_rate_raw, - ifq->ifcq_tbr.tbr_percent, 0 }; + struct tb_profile tb = { + .rate = ifq->ifcq_tbr.tbr_rate_raw, + .percent = ifq->ifcq_tbr.tbr_percent, .depth = 0 + }; (void) ifclassq_tbr_set(ifq, &tb, FALSE); } @@ -3108,8 +3506,9 @@ ifnet_update_rcv(struct ifnet *ifp, cqev_t ev) { switch (ev) { case CLASSQ_EV_LINK_BANDWIDTH: - if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) + if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) { ifp->if_poll_update++; + } break; default: @@ -3124,46 +3523,50 @@ ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model) u_int32_t omodel; errno_t err; - if (ifp == NULL || model >= IFNET_SCHED_MODEL_MAX) - return (EINVAL); - else if (!(ifp->if_eflags & IFEF_TXSTART)) - return (ENXIO); + if (ifp == NULL || model >= IFNET_SCHED_MODEL_MAX) { + return EINVAL; + } else if (!(ifp->if_eflags & IFEF_TXSTART)) { + return ENXIO; + } ifq = &ifp->if_snd; IFCQ_LOCK(ifq); omodel = ifp->if_output_sched_model; ifp->if_output_sched_model = model; - if ((err = ifclassq_pktsched_setup(ifq)) != 0) + if ((err = ifclassq_pktsched_setup(ifq)) != 0) { ifp->if_output_sched_model = omodel; + } IFCQ_UNLOCK(ifq); - return (err); + return err; } errno_t ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen) { - if (ifp == NULL) - return (EINVAL); - else if (!(ifp->if_eflags & IFEF_TXSTART)) - return (ENXIO); + if (ifp == NULL) { + return EINVAL; + } else if (!(ifp->if_eflags & IFEF_TXSTART)) { + return ENXIO; + } ifclassq_set_maxlen(&ifp->if_snd, maxqlen); - return (0); + return 0; } errno_t ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen) { - if (ifp == NULL || maxqlen == NULL) - return (EINVAL); - else if (!(ifp->if_eflags & IFEF_TXSTART)) - return (ENXIO); + if (ifp == NULL || maxqlen == NULL) { + return EINVAL; + } else if (!(ifp->if_eflags & IFEF_TXSTART)) { + return ENXIO; + } *maxqlen = ifclassq_get_maxlen(&ifp->if_snd); - return (0); + return 0; } errno_t @@ -3171,15 +3574,16 @@ ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts) { errno_t err; - if (ifp == NULL || pkts == NULL) + if (ifp == NULL || pkts == NULL) { err = EINVAL; - else if (!(ifp->if_eflags & IFEF_TXSTART)) + } else if (!(ifp->if_eflags & IFEF_TXSTART)) { err = ENXIO; - else + } else { err = ifclassq_get_len(&ifp->if_snd, MBUF_SC_UNSPEC, pkts, NULL); + } - return (err); + return err; } errno_t @@ -3189,14 +3593,15 @@ ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc, errno_t err; if (ifp == NULL || !MBUF_VALID_SC(sc) || - (pkts == NULL && bytes == NULL)) + (pkts == NULL && bytes == NULL)) { err = EINVAL; - else if (!(ifp->if_eflags & IFEF_TXSTART)) + } else if (!(ifp->if_eflags & IFEF_TXSTART)) { err = ENXIO; - else + } else { err = ifclassq_get_len(&ifp->if_snd, sc, pkts, bytes); + } - return (err); + return err; } errno_t @@ -3204,22 +3609,24 @@ ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen) { struct dlil_threading_info *inp; - if (ifp == NULL) - return (EINVAL); - else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) - return (ENXIO); + if (ifp == NULL) { + return EINVAL; + } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) { + return ENXIO; + } - if (maxqlen == 0) + if (maxqlen == 0) { maxqlen = if_rcvq_maxlen; - else if (maxqlen < IF_RCVQ_MINLEN) + } else if (maxqlen < IF_RCVQ_MINLEN) { maxqlen = IF_RCVQ_MINLEN; + } inp = ifp->if_inp; lck_mtx_lock(&inp->input_lck); qlimit(&inp->rcvq_pkts) = maxqlen; lck_mtx_unlock(&inp->input_lck); - return (0); + return 0; } errno_t @@ -3227,16 +3634,17 @@ ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen) { struct dlil_threading_info *inp; - if (ifp == NULL || maxqlen == NULL) - return (EINVAL); - else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) - return (ENXIO); + if (ifp == NULL || maxqlen == NULL) { + return EINVAL; + } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) { + return ENXIO; + } inp = ifp->if_inp; lck_mtx_lock(&inp->input_lck); *maxqlen = qlimit(&inp->rcvq_pkts); lck_mtx_unlock(&inp->input_lck); - return (0); + return 0; } void @@ -3257,16 +3665,78 @@ ifnet_enqueue_multi_setup(struct ifnet *ifp, uint16_t delay_qlen, } } +/* + * This function clears the DSCP bits in the IPV4/V6 header pointed to by buf. + * While it's ok for buf to be not 32 bit aligned, the caller must ensure that + * buf holds the full header. + */ +static __attribute__((noinline)) void +ifnet_mcast_clear_dscp(uint8_t *buf, uint8_t ip_ver) +{ + struct ip *ip; + struct ip6_hdr *ip6; + uint8_t lbuf[64] __attribute__((aligned(8))); + uint8_t *p = buf; + + if (ip_ver == IPVERSION) { + uint8_t old_tos; + uint32_t sum; + + if (__improbable(!IP_HDR_ALIGNED_P(p))) { + DTRACE_IP1(not__aligned__v4, uint8_t *, buf); + bcopy(buf, lbuf, sizeof(struct ip)); + p = lbuf; + } + ip = (struct ip *)(void *)p; + if (__probable((ip->ip_tos & ~IPTOS_ECN_MASK) == 0)) { + return; + } + + DTRACE_IP1(clear__v4, struct ip *, ip); + old_tos = ip->ip_tos; + ip->ip_tos &= IPTOS_ECN_MASK; + sum = ip->ip_sum + htons(old_tos) - htons(ip->ip_tos); + sum = (sum >> 16) + (sum & 0xffff); + ip->ip_sum = (uint16_t)(sum & 0xffff); + + if (__improbable(p == lbuf)) { + bcopy(lbuf, buf, sizeof(struct ip)); + } + } else { + uint32_t flow; + ASSERT(ip_ver == IPV6_VERSION); + + if (__improbable(!IP_HDR_ALIGNED_P(p))) { + DTRACE_IP1(not__aligned__v6, uint8_t *, buf); + bcopy(buf, lbuf, sizeof(struct ip6_hdr)); + p = lbuf; + } + ip6 = (struct ip6_hdr *)(void *)p; + flow = ntohl(ip6->ip6_flow); + if (__probable((flow & IP6FLOW_DSCP_MASK) == 0)) { + return; + } + + DTRACE_IP1(clear__v6, struct ip6_hdr *, ip6); + ip6->ip6_flow = htonl(flow & ~IP6FLOW_DSCP_MASK); + + if (__improbable(p == lbuf)) { + bcopy(lbuf, buf, sizeof(struct ip6_hdr)); + } + } +} + static inline errno_t -ifnet_enqueue_common(struct ifnet *ifp, void *p, classq_pkt_type_t ptype, - boolean_t flush, boolean_t *pdrop) +ifnet_enqueue_ifclassq(struct ifnet *ifp, classq_pkt_t *p, boolean_t flush, + boolean_t *pdrop) { volatile uint64_t *fg_ts = NULL; volatile uint64_t *rt_ts = NULL; - struct mbuf *m = p; struct timespec now; u_int64_t now_nsec = 0; int error = 0; + uint8_t *mcast_buf = NULL; + uint8_t ip_ver; ASSERT(ifp->if_eflags & IFEF_TXSTART); @@ -3276,42 +3746,110 @@ ifnet_enqueue_common(struct ifnet *ifp, void *p, classq_pkt_type_t ptype, * PKTF_TS_VALID is always cleared prior to entering classq, i.e. * the timestamp value is used internally there. */ - switch (ptype) { + switch (p->cp_ptype) { case QP_MBUF: - ASSERT(m->m_flags & M_PKTHDR); - ASSERT(m->m_nextpkt == NULL); + ASSERT(p->cp_mbuf->m_flags & M_PKTHDR); + ASSERT(p->cp_mbuf->m_nextpkt == NULL); - if (!(m->m_pkthdr.pkt_flags & PKTF_TS_VALID) || - m->m_pkthdr.pkt_timestamp == 0) { + if (!(p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_TS_VALID) || + p->cp_mbuf->m_pkthdr.pkt_timestamp == 0) { nanouptime(&now); net_timernsec(&now, &now_nsec); - m->m_pkthdr.pkt_timestamp = now_nsec; + p->cp_mbuf->m_pkthdr.pkt_timestamp = now_nsec; } - m->m_pkthdr.pkt_flags &= ~PKTF_TS_VALID; + p->cp_mbuf->m_pkthdr.pkt_flags &= ~PKTF_TS_VALID; /* * If the packet service class is not background, * update the timestamp to indicate recent activity * on a foreground socket. */ - if ((m->m_pkthdr.pkt_flags & PKTF_FLOW_ID) && - m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) { - if (!(m->m_pkthdr.pkt_flags & PKTF_SO_BACKGROUND)) { + if ((p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_FLOW_ID) && + p->cp_mbuf->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) { + if (!(p->cp_mbuf->m_pkthdr.pkt_flags & + PKTF_SO_BACKGROUND)) { ifp->if_fg_sendts = _net_uptime; - if (fg_ts != NULL) + if (fg_ts != NULL) { *fg_ts = _net_uptime; + } } - if (m->m_pkthdr.pkt_flags & PKTF_SO_REALTIME) { + if (p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_SO_REALTIME) { ifp->if_rt_sendts = _net_uptime; - if (rt_ts != NULL) + if (rt_ts != NULL) { *rt_ts = _net_uptime; + } } } + + /* + * Some Wi-Fi AP implementations do not correctly handle + * multicast IP packets with DSCP bits set (radr://9331522). + * As a workaround we clear the DSCP bits and set the service + * class to BE. + */ + if ((p->cp_mbuf->m_flags & M_MCAST) != 0 && + IFNET_IS_WIFI_INFRA(ifp)) { + size_t len = mbuf_len(p->cp_mbuf), hlen; + struct ether_header *eh; + boolean_t pullup = FALSE; + uint16_t etype; + + if (__improbable(len < sizeof(struct ether_header))) { + DTRACE_IP1(small__ether, size_t, len); + if ((p->cp_mbuf = m_pullup(p->cp_mbuf, + sizeof(struct ether_header))) == NULL) { + return ENOMEM; + } + } + eh = (struct ether_header *)mbuf_data(p->cp_mbuf); + etype = ntohs(eh->ether_type); + if (etype == ETHERTYPE_IP) { + hlen = sizeof(struct ether_header) + + sizeof(struct ip); + if (len < hlen) { + DTRACE_IP1(small__v4, size_t, len); + pullup = TRUE; + } + ip_ver = IPVERSION; + } else if (etype == ETHERTYPE_IPV6) { + hlen = sizeof(struct ether_header) + + sizeof(struct ip6_hdr); + if (len < hlen) { + DTRACE_IP1(small__v6, size_t, len); + pullup = TRUE; + } + ip_ver = IPV6_VERSION; + } else { + DTRACE_IP1(invalid__etype, uint16_t, etype); + break; + } + if (pullup) { + if ((p->cp_mbuf = m_pullup(p->cp_mbuf, hlen)) == + NULL) { + return ENOMEM; + } + + eh = (struct ether_header *)mbuf_data( + p->cp_mbuf); + } + mbuf_set_service_class(p->cp_mbuf, MBUF_SC_BE); + mcast_buf = (uint8_t *)(eh + 1); + /* + * ifnet_mcast_clear_dscp() will finish the work below. + * Note that the pullups above ensure that mcast_buf + * points to a full IP header. + */ + } break; default: VERIFY(0); /* NOTREACHED */ + __builtin_unreachable(); + } + + if (mcast_buf != NULL) { + ifnet_mcast_clear_dscp(mcast_buf, ip_ver); } if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) { @@ -3354,7 +3892,8 @@ ifnet_enqueue_common(struct ifnet *ifp, void *p, classq_pkt_type_t ptype, ifp->if_start_delay_idle = 0; } else { if (ifp->if_start_delay_idle >= 10) { - ifp->if_eflags &= ~(IFEF_DELAY_START); + ifp->if_eflags &= + ~(IFEF_DELAY_START); ifnet_delay_start_disabled++; } else { ifp->if_start_delay_idle++; @@ -3373,17 +3912,8 @@ ifnet_enqueue_common(struct ifnet *ifp, void *p, classq_pkt_type_t ptype, ifp->if_eflags &= ~(IFEF_DELAY_START); } - switch (ptype) { - case QP_MBUF: - /* enqueue the packet (caller consumes object) */ - error = ifclassq_enqueue(&ifp->if_snd, m, QP_MBUF, pdrop); - m = NULL; - break; - - - default: - break; - } + /* enqueue the packet (caller consumes object) */ + error = ifclassq_enqueue(&ifp->if_snd, p, pdrop); /* * Tell the driver to start dequeueing; do this even when the queue @@ -3391,43 +3921,76 @@ ifnet_enqueue_common(struct ifnet *ifp, void *p, classq_pkt_type_t ptype, * be dequeueing from other unsuspended queues. */ if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) && - ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED)) + ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED)) { ifnet_start(ifp); + } + + return error; +} - return (error); +int +ifnet_enqueue_netem(void *handle, pktsched_pkt_t *pkts, uint32_t n_pkts) +{ + struct ifnet *ifp = handle; + boolean_t pdrop; /* dummy */ + uint32_t i; + + ASSERT(n_pkts >= 1); + for (i = 0; i < n_pkts - 1; i++) { + (void) ifnet_enqueue_ifclassq(ifp, &pkts[i].pktsched_pkt, + FALSE, &pdrop); + } + /* flush with the last packet */ + (void) ifnet_enqueue_ifclassq(ifp, &pkts[i].pktsched_pkt, TRUE, &pdrop); + + return 0; +} + +static inline errno_t +ifnet_enqueue_common(struct ifnet *ifp, classq_pkt_t *pkt, boolean_t flush, + boolean_t *pdrop) +{ + if (ifp->if_output_netem != NULL) { + return netem_enqueue(ifp->if_output_netem, pkt, pdrop); + } else { + return ifnet_enqueue_ifclassq(ifp, pkt, flush, pdrop); + } } errno_t ifnet_enqueue(struct ifnet *ifp, struct mbuf *m) { boolean_t pdrop; - return (ifnet_enqueue_mbuf(ifp, m, TRUE, &pdrop)); + return ifnet_enqueue_mbuf(ifp, m, TRUE, &pdrop); } errno_t ifnet_enqueue_mbuf(struct ifnet *ifp, struct mbuf *m, boolean_t flush, boolean_t *pdrop) { + classq_pkt_t pkt; + if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) || m->m_nextpkt != NULL) { if (m != NULL) { m_freem_list(m); *pdrop = TRUE; } - return (EINVAL); + return EINVAL; } else if (!(ifp->if_eflags & IFEF_TXSTART) || !IF_FULLY_ATTACHED(ifp)) { /* flag tested without lock for performance */ m_freem(m); *pdrop = TRUE; - return (ENXIO); + return ENXIO; } else if (!(ifp->if_flags & IFF_UP)) { m_freem(m); *pdrop = TRUE; - return (ENETDOWN); + return ENETDOWN; } - return (ifnet_enqueue_common(ifp, m, QP_MBUF, flush, pdrop)); + CLASSQ_PKT_INIT_MBUF(&pkt, m); + return ifnet_enqueue_common(ifp, &pkt, flush, pdrop); } @@ -3435,21 +3998,24 @@ errno_t ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp) { errno_t rc; - classq_pkt_type_t ptype; - if (ifp == NULL || mp == NULL) - return (EINVAL); - else if (!(ifp->if_eflags & IFEF_TXSTART) || - ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) - return (ENXIO); - if (!ifnet_is_attached(ifp, 1)) - return (ENXIO); + classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt); + + if (ifp == NULL || mp == NULL) { + return EINVAL; + } else if (!(ifp->if_eflags & IFEF_TXSTART) || + ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) { + return ENXIO; + } + if (!ifnet_is_attached(ifp, 1)) { + return ENXIO; + } rc = ifclassq_dequeue(&ifp->if_snd, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, - (void **)mp, NULL, NULL, NULL, &ptype); - VERIFY((*mp == NULL) || (ptype == QP_MBUF)); + &pkt, NULL, NULL, NULL); + VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL)); ifnet_decr_iorefcnt(ifp); - - return (rc); + *mp = pkt.cp_mbuf; + return rc; } errno_t @@ -3457,21 +4023,24 @@ ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc, struct mbuf **mp) { errno_t rc; - classq_pkt_type_t ptype; - if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc)) - return (EINVAL); - else if (!(ifp->if_eflags & IFEF_TXSTART) || - ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) - return (ENXIO); - if (!ifnet_is_attached(ifp, 1)) - return (ENXIO); + classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt); + + if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc)) { + return EINVAL; + } else if (!(ifp->if_eflags & IFEF_TXSTART) || + ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) { + return ENXIO; + } + if (!ifnet_is_attached(ifp, 1)) { + return ENXIO; + } rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, 1, - CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, (void **)mp, NULL, NULL, - NULL, &ptype); - VERIFY((*mp == NULL) || (ptype == QP_MBUF)); + CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt, NULL, NULL, NULL); + VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL)); ifnet_decr_iorefcnt(ifp); - return (rc); + *mp = pkt.cp_mbuf; + return rc; } errno_t @@ -3479,21 +4048,28 @@ ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t pkt_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len) { errno_t rc; - classq_pkt_type_t ptype; - if (ifp == NULL || head == NULL || pkt_limit < 1) - return (EINVAL); - else if (!(ifp->if_eflags & IFEF_TXSTART) || - ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) - return (ENXIO); - if (!ifnet_is_attached(ifp, 1)) - return (ENXIO); + classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head); + classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail); + + if (ifp == NULL || head == NULL || pkt_limit < 1) { + return EINVAL; + } else if (!(ifp->if_eflags & IFEF_TXSTART) || + ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) { + return ENXIO; + } + if (!ifnet_is_attached(ifp, 1)) { + return ENXIO; + } rc = ifclassq_dequeue(&ifp->if_snd, pkt_limit, - CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, (void **)head, (void **)tail, cnt, - len, &ptype); - VERIFY((*head == NULL) || (ptype == QP_MBUF)); + CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail, cnt, len); + VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL)); ifnet_decr_iorefcnt(ifp); - return (rc); + *head = pkt_head.cp_mbuf; + if (tail != NULL) { + *tail = pkt_tail.cp_mbuf; + } + return rc; } errno_t @@ -3501,20 +4077,28 @@ ifnet_dequeue_multi_bytes(struct ifnet *ifp, u_int32_t byte_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len) { errno_t rc; - classq_pkt_type_t ptype; - if (ifp == NULL || head == NULL || byte_limit < 1) - return (EINVAL); - else if (!(ifp->if_eflags & IFEF_TXSTART) || - ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) - return (ENXIO); - if (!ifnet_is_attached(ifp, 1)) - return (ENXIO); + classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head); + classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail); + + if (ifp == NULL || head == NULL || byte_limit < 1) { + return EINVAL; + } else if (!(ifp->if_eflags & IFEF_TXSTART) || + ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) { + return ENXIO; + } + if (!ifnet_is_attached(ifp, 1)) { + return ENXIO; + } rc = ifclassq_dequeue(&ifp->if_snd, CLASSQ_DEQUEUE_MAX_PKT_LIMIT, - byte_limit, (void **)head, (void **)tail, cnt, len, &ptype); - VERIFY((*head == NULL) || (ptype == QP_MBUF)); + byte_limit, &pkt_head, &pkt_tail, cnt, len); + VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL)); ifnet_decr_iorefcnt(ifp); - return (rc); + *head = pkt_head.cp_mbuf; + if (tail != NULL) { + *tail = pkt_tail.cp_mbuf; + } + return rc; } errno_t @@ -3523,22 +4107,30 @@ ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc, u_int32_t *len) { errno_t rc; - classq_pkt_type_t ptype; + classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head); + classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail); + if (ifp == NULL || head == NULL || pkt_limit < 1 || - !MBUF_VALID_SC(sc)) - return (EINVAL); - else if (!(ifp->if_eflags & IFEF_TXSTART) || - ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) - return (ENXIO); - if (!ifnet_is_attached(ifp, 1)) - return (ENXIO); + !MBUF_VALID_SC(sc)) { + return EINVAL; + } else if (!(ifp->if_eflags & IFEF_TXSTART) || + ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) { + return ENXIO; + } + if (!ifnet_is_attached(ifp, 1)) { + return ENXIO; + } rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, pkt_limit, - CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, (void **)head, - (void **)tail, cnt, len, &ptype); - VERIFY((*head == NULL) || (ptype == QP_MBUF)); + CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail, + cnt, len); + VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL)); ifnet_decr_iorefcnt(ifp); - return (rc); + *head = pkt_head.cp_mbuf; + if (tail != NULL) { + *tail = pkt_tail.cp_mbuf; + } + return rc; } #if !CONFIG_EMBEDDED @@ -3547,20 +4139,41 @@ ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m, const struct sockaddr *dest, const char *dest_linkaddr, const char *frame_type, u_int32_t *pre, u_int32_t *post) { - if (pre != NULL) + if (pre != NULL) { *pre = 0; - if (post != NULL) + } + if (post != NULL) { *post = 0; + } - return (ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type)); + return ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type); } #endif /* !CONFIG_EMBEDDED */ +static boolean_t +packet_has_vlan_tag(struct mbuf * m) +{ + u_int tag = 0; + + if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) != 0) { + tag = EVL_VLANOFTAG(m->m_pkthdr.vlan_tag); + if (tag == 0) { + /* the packet is just priority-tagged, clear the bit */ + m->m_pkthdr.csum_flags &= ~CSUM_VLAN_TAG_VALID; + } + } + return tag != 0; +} + static int dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p, char **frame_header_p, protocol_family_t protocol_family) { - struct ifnet_filter *filter; + boolean_t is_vlan_packet = FALSE; + struct ifnet_filter *filter; + struct mbuf *m = *m_p; + + is_vlan_packet = packet_has_vlan_tag(m); /* * Pass the inbound packet to the interface filters @@ -3571,6 +4184,12 @@ dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p, TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) { int result; + /* exclude VLAN packets from external filters PR-3586856 */ + if (is_vlan_packet && + (filter->filt_flags & DLIL_IFF_INTERNAL) == 0) { + continue; + } + if (!filter->filt_skip && filter->filt_input != NULL && (filter->filt_protocol == 0 || filter->filt_protocol == protocol_family)) { @@ -3584,7 +4203,7 @@ dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p, /* we're done with the filter list */ if_flt_monitor_unbusy(ifp); lck_mtx_unlock(&ifp->if_flt_lock); - return (result); + return result; } } } @@ -3596,17 +4215,22 @@ dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p, * Strip away M_PROTO1 bit prior to sending packet up the stack as * it is meant to be local to a subsystem -- if_bridge for M_PROTO1 */ - if (*m_p != NULL) + if (*m_p != NULL) { (*m_p)->m_flags &= ~M_PROTO1; + } - return (0); + return 0; } static int dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p, protocol_family_t protocol_family) { - struct ifnet_filter *filter; + boolean_t is_vlan_packet; + struct ifnet_filter *filter; + struct mbuf *m = *m_p; + + is_vlan_packet = packet_has_vlan_tag(m); /* * Pass the outbound packet to the interface filters @@ -3617,6 +4241,12 @@ dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p, TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) { int result; + /* exclude VLAN packets from external filters PR-3586856 */ + if (is_vlan_packet && + (filter->filt_flags & DLIL_IFF_INTERNAL) == 0) { + continue; + } + if (!filter->filt_skip && filter->filt_output != NULL && (filter->filt_protocol == 0 || filter->filt_protocol == protocol_family)) { @@ -3630,7 +4260,7 @@ dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p, /* we're done with the filter list */ if_flt_monitor_unbusy(ifp); lck_mtx_unlock(&ifp->if_flt_lock); - return (result); + return result; } } } @@ -3638,7 +4268,7 @@ dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p, if_flt_monitor_unbusy(ifp); lck_mtx_unlock(&ifp->if_flt_lock); - return (0); + return 0; } static void @@ -3649,8 +4279,8 @@ dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m) if (ifproto->proto_kpi == kProtoKPI_v1) { /* Version 1 protocols get one packet at a time */ while (m != NULL) { - char * frame_header; - mbuf_t next_packet; + char * frame_header; + mbuf_t next_packet; next_packet = m->m_nextpkt; m->m_nextpkt = NULL; @@ -3658,49 +4288,60 @@ dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m) m->m_pkthdr.pkt_hdr = NULL; error = (*ifproto->kpi.v1.input)(ifproto->ifp, ifproto->protocol_family, m, frame_header); - if (error != 0 && error != EJUSTRETURN) + if (error != 0 && error != EJUSTRETURN) { m_freem(m); + } m = next_packet; } } else if (ifproto->proto_kpi == kProtoKPI_v2) { /* Version 2 protocols support packet lists */ error = (*ifproto->kpi.v2.input)(ifproto->ifp, ifproto->protocol_family, m); - if (error != 0 && error != EJUSTRETURN) + if (error != 0 && error != EJUSTRETURN) { m_freem_list(m); + } } } static void dlil_input_stats_add(const struct ifnet_stat_increment_param *s, - struct dlil_threading_info *inp, boolean_t poll) + struct dlil_threading_info *inp, struct ifnet *ifp, boolean_t poll) { struct ifnet_stat_increment_param *d = &inp->stats; - if (s->packets_in != 0) + if (s->packets_in != 0) { d->packets_in += s->packets_in; - if (s->bytes_in != 0) + } + if (s->bytes_in != 0) { d->bytes_in += s->bytes_in; - if (s->errors_in != 0) + } + if (s->errors_in != 0) { d->errors_in += s->errors_in; + } - if (s->packets_out != 0) + if (s->packets_out != 0) { d->packets_out += s->packets_out; - if (s->bytes_out != 0) + } + if (s->bytes_out != 0) { d->bytes_out += s->bytes_out; - if (s->errors_out != 0) + } + if (s->errors_out != 0) { d->errors_out += s->errors_out; + } - if (s->collisions != 0) + if (s->collisions != 0) { d->collisions += s->collisions; - if (s->dropped != 0) + } + if (s->dropped != 0) { d->dropped += s->dropped; + } - if (poll) - PKTCNTR_ADD(&inp->tstats, s->packets_in, s->bytes_in); + if (poll) { + PKTCNTR_ADD(&ifp->if_poll_tstats, s->packets_in, s->bytes_in); + } } -static void +static boolean_t dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp) { struct ifnet_stat_increment_param *s = &inp->stats; @@ -3744,70 +4385,71 @@ dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp) s->dropped = 0; } - if (ifp->if_data_threshold != 0) { - lck_mtx_convert_spin(&inp->input_lck); - ifnet_notify_data_threshold(ifp); - } - /* * No need for atomic operations as they are modified here * only from within the DLIL input thread context. */ - if (inp->tstats.packets != 0) { - inp->pstats.ifi_poll_packets += inp->tstats.packets; - inp->tstats.packets = 0; + if (ifp->if_poll_tstats.packets != 0) { + ifp->if_poll_pstats.ifi_poll_packets += ifp->if_poll_tstats.packets; + ifp->if_poll_tstats.packets = 0; } - if (inp->tstats.bytes != 0) { - inp->pstats.ifi_poll_bytes += inp->tstats.bytes; - inp->tstats.bytes = 0; + if (ifp->if_poll_tstats.bytes != 0) { + ifp->if_poll_pstats.ifi_poll_bytes += ifp->if_poll_tstats.bytes; + ifp->if_poll_tstats.bytes = 0; } + + return ifp->if_data_threshold != 0; } __private_extern__ void dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m) { - return (dlil_input_packet_list_common(ifp, m, 0, - IFNET_MODEL_INPUT_POLL_OFF, FALSE)); + return dlil_input_packet_list_common(ifp, m, 0, + IFNET_MODEL_INPUT_POLL_OFF, FALSE); } __private_extern__ void dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m, u_int32_t cnt, ifnet_model_t mode) { - return (dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE)); + return dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE); } static void dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m, u_int32_t cnt, ifnet_model_t mode, boolean_t ext) { - int error = 0; - protocol_family_t protocol_family; - mbuf_t next_packet; - ifnet_t ifp = ifp_param; - char * frame_header; - struct if_proto * last_ifproto = NULL; - mbuf_t pkt_first = NULL; - mbuf_t * pkt_next = NULL; - u_int32_t poll_thresh = 0, poll_ival = 0; + int error = 0; + protocol_family_t protocol_family; + mbuf_t next_packet; + ifnet_t ifp = ifp_param; + char *frame_header = NULL; + struct if_proto *last_ifproto = NULL; + mbuf_t pkt_first = NULL; + mbuf_t *pkt_next = NULL; + u_int32_t poll_thresh = 0, poll_ival = 0; KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START, 0, 0, 0, 0, 0); if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 && - (poll_ival = if_rxpoll_interval_pkts) > 0) + (poll_ival = if_rxpoll_interval_pkts) > 0) { poll_thresh = cnt; + } while (m != NULL) { struct if_proto *ifproto = NULL; int iorefcnt = 0; - uint32_t pktf_mask; /* pkt flags to preserve */ + uint32_t pktf_mask; /* pkt flags to preserve */ - if (ifp_param == NULL) + if (ifp_param == NULL) { ifp = m->m_pkthdr.rcvif; + } - if ((ifp->if_eflags & IFEF_RXPOLL) && poll_thresh != 0 && - poll_ival > 0 && (--poll_thresh % poll_ival) == 0) + if ((ifp->if_eflags & IFEF_RXPOLL) && + (ifp->if_xflags & IFXF_LEGACY) && poll_thresh != 0 && + poll_ival > 0 && (--poll_thresh % poll_ival) == 0) { ifnet_poll(ifp); + } /* Check if this mbuf looks valid */ MBUF_INPUT_CHECK(m, ifp); @@ -3823,7 +4465,7 @@ dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m, * away, so optimize for that. */ if (ifp != lo_ifp) { - if (!ifnet_is_attached(ifp, 1)) { + if (!ifnet_datamov_begin(ifp)) { m_freem(m); goto next; } @@ -3838,7 +4480,7 @@ dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m, * info to allow for connectivity between loopback * and local interface addresses. */ - pktf_mask = (PKTF_LOOP|PKTF_IFAINFO); + pktf_mask = (PKTF_LOOP | PKTF_IFAINFO); } /* make sure packet comes in clean */ @@ -3852,16 +4494,82 @@ dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m, &protocol_family); ifnet_lock_done(ifp); if (error != 0) { - if (error == EJUSTRETURN) + if (error == EJUSTRETURN) { goto next; + } protocol_family = 0; } + pktap_input(ifp, protocol_family, m, frame_header); + + /* Drop v4 packets received on CLAT46 enabled interface */ + if (protocol_family == PF_INET && IS_INTF_CLAT46(ifp)) { + m_freem(m); + ip6stat.ip6s_clat464_in_v4_drop++; + goto next; + } + + /* Translate the packet if it is received on CLAT interface */ + if (protocol_family == PF_INET6 && IS_INTF_CLAT46(ifp) + && dlil_is_clat_needed(protocol_family, m)) { + char *data = NULL; + struct ether_header eh; + struct ether_header *ehp = NULL; + + if (ifp->if_type == IFT_ETHER) { + ehp = (struct ether_header *)(void *)frame_header; + /* Skip RX Ethernet packets if they are not IPV6 */ + if (ntohs(ehp->ether_type) != ETHERTYPE_IPV6) { + goto skip_clat; + } + + /* Keep a copy of frame_header for Ethernet packets */ + bcopy(frame_header, (caddr_t)&eh, ETHER_HDR_LEN); + } + error = dlil_clat64(ifp, &protocol_family, &m); + data = (char *) mbuf_data(m); + if (error != 0) { + m_freem(m); + ip6stat.ip6s_clat464_in_drop++; + goto next; + } + /* Native v6 should be No-op */ + if (protocol_family != PF_INET) { + goto skip_clat; + } + + /* Do this only for translated v4 packets. */ + switch (ifp->if_type) { + case IFT_CELLULAR: + frame_header = data; + break; + case IFT_ETHER: + /* + * Drop if the mbuf doesn't have enough + * space for Ethernet header + */ + if (M_LEADINGSPACE(m) < ETHER_HDR_LEN) { + m_free(m); + ip6stat.ip6s_clat464_in_drop++; + goto next; + } + /* + * Set the frame_header ETHER_HDR_LEN bytes + * preceeding the data pointer. Change + * the ether_type too. + */ + frame_header = data - ETHER_HDR_LEN; + eh.ether_type = htons(ETHERTYPE_IP); + bcopy((caddr_t)&eh, frame_header, ETHER_HDR_LEN); + break; + } + } +skip_clat: if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK) && - !(m->m_pkthdr.pkt_flags & PKTF_LOOP)) + !(m->m_pkthdr.pkt_flags & PKTF_LOOP)) { dlil_input_cksum_dbg(ifp, m, frame_header, protocol_family); - + } /* * For partial checksum offload, we expect the driver to * set the start offset indicating the start of the span @@ -3869,15 +4577,17 @@ dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m, * adjust this start offset accordingly because the data * pointer has been advanced beyond the link-layer header. * - * Don't adjust if the interface is a bridge member, as - * the adjustment will occur from the context of the - * bridge interface during input. + * Virtual lan types (bridge, vlan, bond) can call + * dlil_input_packet_list() with the same packet with the + * checksum flags set. Set a flag indicating that the + * adjustment has already been done. */ - if (ifp->if_bridge == NULL && (m->m_pkthdr.csum_flags & + if ((m->m_pkthdr.csum_flags & CSUM_ADJUST_DONE) != 0) { + /* adjustment has already been done */ + } else if ((m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PARTIAL)) == (CSUM_DATA_VALID | CSUM_PARTIAL)) { int adj; - if (frame_header == NULL || frame_header < (char *)mbuf_datastart(m) || frame_header > (char *)m->m_data || @@ -3889,24 +4599,27 @@ dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m, } else { m->m_pkthdr.csum_rx_start -= adj; } + /* make sure we don't adjust more than once */ + m->m_pkthdr.csum_flags |= CSUM_ADJUST_DONE; + } + if (clat_debug) { + pktap_input(ifp, protocol_family, m, frame_header); } - pktap_input(ifp, protocol_family, m, frame_header); - - if (m->m_flags & (M_BCAST|M_MCAST)) + if (m->m_flags & (M_BCAST | M_MCAST)) { atomic_add_64(&ifp->if_imcasts, 1); + } - /* run interface filters, exclude VLAN packets PR-3586856 */ - if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) { - error = dlil_interface_filters_input(ifp, &m, - &frame_header, protocol_family); - if (error != 0) { - if (error != EJUSTRETURN) - m_freem(m); - goto next; + /* run interface filters */ + error = dlil_interface_filters_input(ifp, &m, + &frame_header, protocol_family); + if (error != 0) { + if (error != EJUSTRETURN) { + m_freem(m); } + goto next; } - if (error != 0 || ((m->m_flags & M_PROMISC) != 0)) { + if ((m->m_flags & M_PROMISC) != 0) { m_freem(m); goto next; } @@ -3923,7 +4636,7 @@ dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m, VERIFY(ifproto == NULL); ifnet_lock_shared(ifp); /* callee holds a proto refcnt upon success */ - ifproto = find_attached_proto(ifp, protocol_family); + ifproto = find_attached_proto(ifp, protocol_family); ifnet_lock_done(ifp); } if (ifproto == NULL) { @@ -3965,10 +4678,12 @@ next: m = next_packet; /* update the driver's multicast filter, if needed */ - if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) + if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) { ifp->if_updatemcasts = 0; - if (iorefcnt == 1) - ifnet_decr_iorefcnt(ifp); + } + if (iorefcnt == 1) { + ifnet_datamov_end(ifp); + } } KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END, 0, 0, 0, 0, 0); @@ -3980,15 +4695,16 @@ if_mcasts_update(struct ifnet *ifp) errno_t err; err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL); - if (err == EAFNOSUPPORT) + if (err == EAFNOSUPPORT) { err = 0; - printf("%s: %s %d suspended link-layer multicast membership(s) " + } + DLIL_PRINTF("%s: %s %d suspended link-layer multicast membership(s) " "(err=%d)\n", if_name(ifp), (err == 0 ? "successfully restored" : "failed to restore"), ifp->if_updatemcasts, err); /* just return success */ - return (0); + return 0; } /* If ifp is set, we will increment the generation for the interface */ @@ -4003,10 +4719,31 @@ dlil_post_complete_msg(struct ifnet *ifp, struct kev_msg *event) necp_update_all_clients(); #endif /* NECP */ - return (kev_post_msg(event)); + return kev_post_msg(event); } -#define TMP_IF_PROTO_ARR_SIZE 10 +__private_extern__ void +dlil_post_sifflags_msg(struct ifnet * ifp) +{ + struct kev_msg ev_msg; + struct net_event_data ev_data; + + bzero(&ev_data, sizeof(ev_data)); + bzero(&ev_msg, sizeof(ev_msg)); + ev_msg.vendor_code = KEV_VENDOR_APPLE; + ev_msg.kev_class = KEV_NETWORK_CLASS; + ev_msg.kev_subclass = KEV_DL_SUBCLASS; + ev_msg.event_code = KEV_DL_SIFFLAGS; + strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ); + ev_data.if_family = ifp->if_family; + ev_data.if_unit = (u_int32_t) ifp->if_unit; + ev_msg.dv[0].data_length = sizeof(struct net_event_data); + ev_msg.dv[0].data_ptr = &ev_data; + ev_msg.dv[1].data_length = 0; + dlil_post_complete_msg(ifp, &ev_msg); +} + +#define TMP_IF_PROTO_ARR_SIZE 10 static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_generation) { @@ -4039,8 +4776,9 @@ dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_genera lck_mtx_unlock(&ifp->if_flt_lock); /* Get an io ref count if the interface is attached */ - if (!ifnet_is_attached(ifp, 1)) + if (!ifnet_is_attached(ifp, 1)) { goto done; + } /* * An embedded tmp_list_entry in if_proto may still get @@ -4048,7 +4786,7 @@ dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_genera * therefore we are avoiding embedded pointers here. */ ifnet_lock_shared(ifp); - if_proto_count = dlil_ifp_proto_count(ifp); + if_proto_count = dlil_ifp_protolist(ifp, NULL, 0); if (if_proto_count) { int i; VERIFY(ifp->if_proto_hash != NULL); @@ -4056,7 +4794,7 @@ dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_genera tmp_ifproto_arr = tmp_ifproto_stack_arr; } else { MALLOC(tmp_ifproto_arr, struct if_proto **, - sizeof (*tmp_ifproto_arr) * if_proto_count, + sizeof(*tmp_ifproto_arr) * if_proto_count, M_TEMP, M_ZERO); if (tmp_ifproto_arr == NULL) { ifnet_lock_done(ifp); @@ -4099,13 +4837,14 @@ cleanup: } /* Pass the event to the interface */ - if (ifp->if_event != NULL) + if (ifp->if_event != NULL) { ifp->if_event(ifp, event); + } /* Release the io ref count */ ifnet_decr_iorefcnt(ifp); done: - return (dlil_post_complete_msg(update_generation ? ifp : NULL, event)); + return dlil_post_complete_msg(update_generation ? ifp : NULL, event); } errno_t @@ -4114,10 +4853,11 @@ ifnet_event(ifnet_t ifp, struct kern_event_msg *event) struct kev_msg kev_msg; int result = 0; - if (ifp == NULL || event == NULL) - return (EINVAL); + if (ifp == NULL || event == NULL) { + return EINVAL; + } - bzero(&kev_msg, sizeof (kev_msg)); + bzero(&kev_msg, sizeof(kev_msg)); kev_msg.vendor_code = event->vendor_code; kev_msg.kev_class = event->kev_class; kev_msg.kev_subclass = event->kev_subclass; @@ -4128,7 +4868,7 @@ ifnet_event(ifnet_t ifp, struct kern_event_msg *event) result = dlil_event_internal(ifp, &kev_msg, TRUE); - return (result); + return result; } #if CONFIG_MACF_NET @@ -4146,37 +4886,41 @@ dlil_get_socket_type(struct mbuf **mp, int family, int raw) switch (family) { case PF_INET: m = m_pullup(*mp, sizeof(struct ip)); - if (m == NULL) + if (m == NULL) { break; + } *mp = m; ip = mtod(m, struct ip *); - if (ip->ip_p == IPPROTO_TCP) + if (ip->ip_p == IPPROTO_TCP) { type = SOCK_STREAM; - else if (ip->ip_p == IPPROTO_UDP) + } else if (ip->ip_p == IPPROTO_UDP) { type = SOCK_DGRAM; + } break; case PF_INET6: m = m_pullup(*mp, sizeof(struct ip6_hdr)); - if (m == NULL) + if (m == NULL) { break; + } *mp = m; ip6 = mtod(m, struct ip6_hdr *); - if (ip6->ip6_nxt == IPPROTO_TCP) + if (ip6->ip6_nxt == IPPROTO_TCP) { type = SOCK_STREAM; - else if (ip6->ip6_nxt == IPPROTO_UDP) + } else if (ip6->ip6_nxt == IPPROTO_UDP) { type = SOCK_DGRAM; + } break; } } - return (type); + return type; } #endif static void dlil_count_chain_len(mbuf_t m, struct chain_len_stats *cls) { - mbuf_t n = m; + mbuf_t n = m; int chainlen = 0; while (n != NULL) { @@ -4184,24 +4928,24 @@ dlil_count_chain_len(mbuf_t m, struct chain_len_stats *cls) n = n->m_next; } switch (chainlen) { - case 0: - break; - case 1: - atomic_add_64(&cls->cls_one, 1); - break; - case 2: - atomic_add_64(&cls->cls_two, 1); - break; - case 3: - atomic_add_64(&cls->cls_three, 1); - break; - case 4: - atomic_add_64(&cls->cls_four, 1); - break; - case 5: - default: - atomic_add_64(&cls->cls_five_or_more, 1); - break; + case 0: + break; + case 1: + atomic_add_64(&cls->cls_one, 1); + break; + case 2: + atomic_add_64(&cls->cls_two, 1); + break; + case 3: + atomic_add_64(&cls->cls_three, 1); + break; + case 4: + atomic_add_64(&cls->cls_four, 1); + break; + case 5: + default: + atomic_add_64(&cls->cls_five_or_more, 1); + break; } } @@ -4231,16 +4975,21 @@ dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist, int retval = 0; char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4]; char dst_linkaddr_buffer[MAX_LINKADDR * 4]; - struct if_proto *proto = NULL; - mbuf_t m; - mbuf_t send_head = NULL; - mbuf_t *send_tail = &send_head; + struct if_proto *proto = NULL; + mbuf_t m = NULL; + mbuf_t send_head = NULL; + mbuf_t *send_tail = &send_head; int iorefcnt = 0; u_int32_t pre = 0, post = 0; u_int32_t fpkts = 0, fbytes = 0; int32_t flen = 0; struct timespec now; u_int64_t now_nsec; + boolean_t did_clat46 = FALSE; + protocol_family_t old_proto_family = proto_family; + struct sockaddr_in6 dest6; + struct rtentry *rt = NULL; + u_int32_t m_loop_set = 0; KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0); @@ -4248,7 +4997,7 @@ dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist, * Get an io refcnt if the interface is attached to prevent ifnet_detach * from happening while this operation is in progress */ - if (!ifnet_is_attached(ifp, 1)) { + if (!ifnet_datamov_begin(ifp)) { retval = ENXIO; goto cleanup; } @@ -4257,8 +5006,9 @@ dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist, VERIFY(ifp->if_output_dlil != NULL); /* update the driver's multicast filter, if needed */ - if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) + if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) { ifp->if_updatemcasts = 0; + } frame_type = frame_type_buffer; dst_linkaddr = dst_linkaddr_buffer; @@ -4276,13 +5026,93 @@ dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist, } preout_again: - if (packetlist == NULL) + if (packetlist == NULL) { goto cleanup; + } m = packetlist; packetlist = packetlist->m_nextpkt; m->m_nextpkt = NULL; + /* + * Perform address family translation for the first + * packet outside the loop in order to perform address + * lookup for the translated proto family. + */ + if (proto_family == PF_INET && IS_INTF_CLAT46(ifp) && + (ifp->if_type == IFT_CELLULAR || + dlil_is_clat_needed(proto_family, m))) { + retval = dlil_clat46(ifp, &proto_family, &m); + /* + * Go to the next packet if translation fails + */ + if (retval != 0) { + m_freem(m); + m = NULL; + ip6stat.ip6s_clat464_out_drop++; + /* Make sure that the proto family is PF_INET */ + ASSERT(proto_family == PF_INET); + goto preout_again; + } + /* + * Free the old one and make it point to the IPv6 proto structure. + * + * Change proto for the first time we have successfully + * performed address family translation. + */ + if (!did_clat46 && proto_family == PF_INET6) { + did_clat46 = TRUE; + + if (proto != NULL) { + if_proto_free(proto); + } + ifnet_lock_shared(ifp); + /* callee holds a proto refcnt upon success */ + proto = find_attached_proto(ifp, proto_family); + if (proto == NULL) { + ifnet_lock_done(ifp); + retval = ENXIO; + m_freem(m); + m = NULL; + goto cleanup; + } + ifnet_lock_done(ifp); + if (ifp->if_type == IFT_ETHER) { + /* Update the dest to translated v6 address */ + dest6.sin6_len = sizeof(struct sockaddr_in6); + dest6.sin6_family = AF_INET6; + dest6.sin6_addr = (mtod(m, struct ip6_hdr *))->ip6_dst; + dest = (const struct sockaddr *)&dest6; + + /* + * Lookup route to the translated destination + * Free this route ref during cleanup + */ + rt = rtalloc1_scoped((struct sockaddr *)&dest6, + 0, 0, ifp->if_index); + + route = rt; + } + } + } + + /* + * This path gets packet chain going to the same destination. + * The pre output routine is used to either trigger resolution of + * the next hop or retreive the next hop's link layer addressing. + * For ex: ether_inet(6)_pre_output routine. + * + * If the routine returns EJUSTRETURN, it implies that packet has + * been queued, and therefore we have to call preout_again for the + * following packet in the chain. + * + * For errors other than EJUSTRETURN, the current packet is freed + * and the rest of the chain (pointed by packetlist is freed as + * part of clean up. + * + * Else if there is no error the retrieved information is used for + * all the packets in the chain. + */ if (raw == 0) { proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ? proto->kpi.v1.pre_output : proto->kpi.v2.pre_output); @@ -4292,9 +5122,11 @@ preout_again: frame_type, dst_linkaddr); if (retval != 0) { - if (retval == EJUSTRETURN) + if (retval == EJUSTRETURN) { goto preout_again; + } m_freem(m); + m = NULL; goto cleanup; } } @@ -4310,18 +5142,41 @@ preout_again: #endif do { + /* + * Perform address family translation if needed. + * For now we only support stateless 4 to 6 translation + * on the out path. + * + * The routine below translates IP header, updates protocol + * checksum and also translates ICMP. + * + * We skip the first packet as it is already translated and + * the proto family is set to PF_INET6. + */ + if (proto_family == PF_INET && IS_INTF_CLAT46(ifp) && + (ifp->if_type == IFT_CELLULAR || + dlil_is_clat_needed(proto_family, m))) { + retval = dlil_clat46(ifp, &proto_family, &m); + /* Goto the next packet if the translation fails */ + if (retval != 0) { + m_freem(m); + m = NULL; + ip6stat.ip6s_clat464_out_drop++; + goto next; + } + } + #if CONFIG_DTRACE if (!raw && proto_family == PF_INET) { struct ip *ip = mtod(m, struct ip *); DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL, - struct ip *, ip, struct ifnet *, ifp, - struct ip *, ip, struct ip6_hdr *, NULL); - + struct ip *, ip, struct ifnet *, ifp, + struct ip *, ip, struct ip6_hdr *, NULL); } else if (!raw && proto_family == PF_INET6) { struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL, - struct ip6_hdr *, ip6, struct ifnet *, ifp, - struct ip *, NULL, struct ip6_hdr *, ip6); + struct ip6_hdr *, ip6, struct ifnet *, ifp, + struct ip *, NULL, struct ip6_hdr *, ip6); } #endif /* CONFIG_DTRACE */ @@ -4341,12 +5196,13 @@ preout_again: m->m_pkthdr.rcvif = ifp; rcvif_set = 1; } - + m_loop_set = m->m_flags & M_LOOP; retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr, frame_type, &pre, &post); if (retval != 0) { - if (retval != EJUSTRETURN) + if (retval != EJUSTRETURN) { m_freem(m); + } goto next; } @@ -4361,9 +5217,10 @@ preout_again: m->m_pkthdr.csum_tx_start += pre; } - if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK)) + if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK)) { dlil_output_cksum_dbg(ifp, m, pre, proto_family); + } /* * Clear the ifp if it was set above, and to be @@ -4374,22 +5231,20 @@ preout_again: * are clearing the one that will go down to the * layer below. */ - if (rcvif_set && m->m_pkthdr.rcvif == ifp) + if (rcvif_set && m->m_pkthdr.rcvif == ifp) { m->m_pkthdr.rcvif = NULL; + } } /* * Let interface filters (if any) do their thing ... */ - /* Do not pass VLAN tagged packets to filters PR-3586856 */ - if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) { - retval = dlil_interface_filters_output(ifp, - &m, proto_family); - if (retval != 0) { - if (retval != EJUSTRETURN) - m_freem(m); - goto next; + retval = dlil_interface_filters_output(ifp, &m, proto_family); + if (retval != 0) { + if (retval != EJUSTRETURN) { + m_freem(m); } + goto next; } /* * Strip away M_PROTO1 bit prior to sending packet @@ -4406,8 +5261,9 @@ preout_again: * not cross page(s), the following is a no-op. */ if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) { - if ((m = m_normalize(m)) == NULL) + if ((m = m_normalize(m)) == NULL) { goto next; + } } /* @@ -4447,8 +5303,8 @@ preout_again: * final checksum and we shouldn't recompute it. */ if ((m->m_pkthdr.pkt_flags & PKTF_FORWARDED) && - (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID|CSUM_PARTIAL)) == - (CSUM_DATA_VALID|CSUM_PARTIAL)) { + (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PARTIAL)) == + (CSUM_DATA_VALID | CSUM_PARTIAL)) { m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS; m->m_pkthdr.csum_data = 0; } @@ -4486,97 +5342,429 @@ preout_again: fpkts++; } if (retval != 0 && dlil_verbose) { - printf("%s: output error on %s retval = %d\n", + DLIL_PRINTF("%s: output error on %s retval = %d\n", __func__, if_name(ifp), retval); } KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0); } - KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0); + KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0); + +next: + m = packetlist; + if (m != NULL) { + m->m_flags |= m_loop_set; + packetlist = packetlist->m_nextpkt; + m->m_nextpkt = NULL; + } + /* Reset the proto family to old proto family for CLAT */ + if (did_clat46) { + proto_family = old_proto_family; + } + } while (m != NULL); + + if (send_head != NULL) { + KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START, + 0, 0, 0, 0, 0); + if (ifp->if_eflags & IFEF_SENDLIST) { + retval = (*ifp->if_output_dlil)(ifp, send_head); + if (retval == EQFULL || retval == EQSUSPENDED) { + if (adv != NULL) { + adv->code = (retval == EQFULL ? + FADV_FLOW_CONTROLLED : + FADV_SUSPENDED); + } + retval = 0; + } + if (retval == 0 && flen > 0) { + fbytes += flen; + fpkts++; + } + if (retval != 0 && dlil_verbose) { + DLIL_PRINTF("%s: output error on %s retval = %d\n", + __func__, if_name(ifp), retval); + } + } else { + struct mbuf *send_m; + int enq_cnt = 0; + VERIFY(ifp->if_eflags & IFEF_ENQUEUE_MULTI); + while (send_head != NULL) { + send_m = send_head; + send_head = send_m->m_nextpkt; + send_m->m_nextpkt = NULL; + retval = (*ifp->if_output_dlil)(ifp, send_m); + if (retval == EQFULL || retval == EQSUSPENDED) { + if (adv != NULL) { + adv->code = (retval == EQFULL ? + FADV_FLOW_CONTROLLED : + FADV_SUSPENDED); + } + retval = 0; + } + if (retval == 0) { + enq_cnt++; + if (flen > 0) { + fpkts++; + } + } + if (retval != 0 && dlil_verbose) { + DLIL_PRINTF("%s: output error on %s " + "retval = %d\n", + __func__, if_name(ifp), retval); + } + } + if (enq_cnt > 0) { + fbytes += flen; + ifnet_start(ifp); + } + } + KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0); + } + + KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0); + +cleanup: + if (fbytes > 0) { + ifp->if_fbytes += fbytes; + } + if (fpkts > 0) { + ifp->if_fpackets += fpkts; + } + if (proto != NULL) { + if_proto_free(proto); + } + if (packetlist) { /* if any packets are left, clean up */ + mbuf_freem_list(packetlist); + } + if (retval == EJUSTRETURN) { + retval = 0; + } + if (iorefcnt == 1) { + ifnet_datamov_end(ifp); + } + if (rt != NULL) { + rtfree(rt); + rt = NULL; + } + + return retval; +} + +/* + * This routine checks if the destination address is not a loopback, link-local, + * multicast or broadcast address. + */ +static int +dlil_is_clat_needed(protocol_family_t proto_family, mbuf_t m) +{ + int ret = 0; + switch (proto_family) { + case PF_INET: { + struct ip *iph = mtod(m, struct ip *); + if (CLAT46_NEEDED(ntohl(iph->ip_dst.s_addr))) { + ret = 1; + } + break; + } + case PF_INET6: { + struct ip6_hdr *ip6h = mtod(m, struct ip6_hdr *); + if ((size_t)m_pktlen(m) >= sizeof(struct ip6_hdr) && + CLAT64_NEEDED(&ip6h->ip6_dst)) { + ret = 1; + } + break; + } + } + + return ret; +} +/* + * @brief This routine translates IPv4 packet to IPv6 packet, + * updates protocol checksum and also translates ICMP for code + * along with inner header translation. + * + * @param ifp Pointer to the interface + * @param proto_family pointer to protocol family. It is updated if function + * performs the translation successfully. + * @param m Pointer to the pointer pointing to the packet. Needed because this + * routine can end up changing the mbuf to a different one. + * + * @return 0 on success or else a negative value. + */ +static errno_t +dlil_clat46(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m) +{ + VERIFY(*proto_family == PF_INET); + VERIFY(IS_INTF_CLAT46(ifp)); + + pbuf_t pbuf_store, *pbuf = NULL; + struct ip *iph = NULL; + struct in_addr osrc, odst; + uint8_t proto = 0; + struct in6_ifaddr *ia6_clat_src = NULL; + struct in6_addr *src = NULL; + struct in6_addr dst; + int error = 0; + uint32_t off = 0; + uint64_t tot_len = 0; + uint16_t ip_id_val = 0; + uint16_t ip_frag_off = 0; + + boolean_t is_frag = FALSE; + boolean_t is_first_frag = TRUE; + boolean_t is_last_frag = TRUE; + + pbuf_init_mbuf(&pbuf_store, *m, ifp); + pbuf = &pbuf_store; + iph = pbuf->pb_data; + + osrc = iph->ip_src; + odst = iph->ip_dst; + proto = iph->ip_p; + off = iph->ip_hl << 2; + ip_id_val = iph->ip_id; + ip_frag_off = ntohs(iph->ip_off) & IP_OFFMASK; + + tot_len = ntohs(iph->ip_len); + + /* + * For packets that are not first frags + * we only need to adjust CSUM. + * For 4 to 6, Fragmentation header gets appended + * after proto translation. + */ + if (ntohs(iph->ip_off) & ~(IP_DF | IP_RF)) { + is_frag = TRUE; + + /* If the offset is not zero, it is not first frag */ + if (ip_frag_off != 0) { + is_first_frag = FALSE; + } + + /* If IP_MF is set, then it is not last frag */ + if (ntohs(iph->ip_off) & IP_MF) { + is_last_frag = FALSE; + } + } + + /* + * Retrive the local IPv6 CLAT46 address reserved for stateless + * translation. + */ + ia6_clat_src = in6ifa_ifpwithflag(ifp, IN6_IFF_CLAT46); + if (ia6_clat_src == NULL) { + ip6stat.ip6s_clat464_out_nov6addr_drop++; + error = -1; + goto cleanup; + } + + src = &ia6_clat_src->ia_addr.sin6_addr; + + /* + * Translate IPv4 destination to IPv6 destination by using the + * prefixes learned through prior PLAT discovery. + */ + if ((error = nat464_synthesize_ipv6(ifp, &odst, &dst)) != 0) { + ip6stat.ip6s_clat464_out_v6synthfail_drop++; + goto cleanup; + } + + /* Translate the IP header part first */ + error = (nat464_translate_46(pbuf, off, iph->ip_tos, iph->ip_p, + iph->ip_ttl, *src, dst, tot_len) == NT_NAT64) ? 0 : -1; + + iph = NULL; /* Invalidate iph as pbuf has been modified */ + + if (error != 0) { + ip6stat.ip6s_clat464_out_46transfail_drop++; + goto cleanup; + } + + /* + * Translate protocol header, update checksum, checksum flags + * and related fields. + */ + error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc, (struct nat464_addr *)&odst, + proto, PF_INET, PF_INET6, NT_OUT, !is_first_frag) == NT_NAT64) ? 0 : -1; + + if (error != 0) { + ip6stat.ip6s_clat464_out_46proto_transfail_drop++; + goto cleanup; + } + + /* Now insert the IPv6 fragment header */ + if (is_frag) { + error = nat464_insert_frag46(pbuf, ip_id_val, ip_frag_off, is_last_frag); + + if (error != 0) { + ip6stat.ip6s_clat464_out_46frag_transfail_drop++; + goto cleanup; + } + } + +cleanup: + if (ia6_clat_src != NULL) { + IFA_REMREF(&ia6_clat_src->ia_ifa); + } + + if (pbuf_is_valid(pbuf)) { + *m = pbuf->pb_mbuf; + pbuf->pb_mbuf = NULL; + pbuf_destroy(pbuf); + } else { + error = -1; + ip6stat.ip6s_clat464_out_invalpbuf_drop++; + } + + if (error == 0) { + *proto_family = PF_INET6; + ip6stat.ip6s_clat464_out_success++; + } + + return error; +} + +/* + * @brief This routine translates incoming IPv6 to IPv4 packet, + * updates protocol checksum and also translates ICMPv6 outer + * and inner headers + * + * @return 0 on success or else a negative value. + */ +static errno_t +dlil_clat64(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m) +{ + VERIFY(*proto_family == PF_INET6); + VERIFY(IS_INTF_CLAT46(ifp)); + + struct ip6_hdr *ip6h = NULL; + struct in6_addr osrc, odst; + uint8_t proto = 0; + struct in6_ifaddr *ia6_clat_dst = NULL; + struct in_ifaddr *ia4_clat_dst = NULL; + struct in_addr *dst = NULL; + struct in_addr src; + int error = 0; + uint32_t off = 0; + u_int64_t tot_len = 0; + uint8_t tos = 0; + boolean_t is_first_frag = TRUE; + + /* Incoming mbuf does not contain valid IP6 header */ + if ((size_t)(*m)->m_pkthdr.len < sizeof(struct ip6_hdr) || + ((size_t)(*m)->m_len < sizeof(struct ip6_hdr) && + (*m = m_pullup(*m, sizeof(struct ip6_hdr))) == NULL)) { + ip6stat.ip6s_clat464_in_tooshort_drop++; + return -1; + } + + ip6h = mtod(*m, struct ip6_hdr *); + /* Validate that mbuf contains IP payload equal to ip6_plen */ + if ((size_t)(*m)->m_pkthdr.len < ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr)) { + ip6stat.ip6s_clat464_in_tooshort_drop++; + return -1; + } + + osrc = ip6h->ip6_src; + odst = ip6h->ip6_dst; + + /* + * Retrieve the local CLAT46 reserved IPv6 address. + * Let the packet pass if we don't find one, as the flag + * may get set before IPv6 configuration has taken place. + */ + ia6_clat_dst = in6ifa_ifpwithflag(ifp, IN6_IFF_CLAT46); + if (ia6_clat_dst == NULL) { + goto done; + } + + /* + * Check if the original dest in the packet is same as the reserved + * CLAT46 IPv6 address + */ + if (IN6_ARE_ADDR_EQUAL(&odst, &ia6_clat_dst->ia_addr.sin6_addr)) { + pbuf_t pbuf_store, *pbuf = NULL; + pbuf_init_mbuf(&pbuf_store, *m, ifp); + pbuf = &pbuf_store; + + /* + * Retrive the local CLAT46 IPv4 address reserved for stateless + * translation. + */ + ia4_clat_dst = inifa_ifpclatv4(ifp); + if (ia4_clat_dst == NULL) { + IFA_REMREF(&ia6_clat_dst->ia_ifa); + ip6stat.ip6s_clat464_in_nov4addr_drop++; + error = -1; + goto cleanup; + } + IFA_REMREF(&ia6_clat_dst->ia_ifa); -next: - m = packetlist; - if (m != NULL) { - packetlist = packetlist->m_nextpkt; - m->m_nextpkt = NULL; + /* Translate IPv6 src to IPv4 src by removing the NAT64 prefix */ + dst = &ia4_clat_dst->ia_addr.sin_addr; + if ((error = nat464_synthesize_ipv4(ifp, &osrc, &src)) != 0) { + ip6stat.ip6s_clat464_in_v4synthfail_drop++; + error = -1; + goto cleanup; } - } while (m != NULL); - if (send_head != NULL) { - KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START, - 0, 0, 0, 0, 0); - if (ifp->if_eflags & IFEF_SENDLIST) { - retval = (*ifp->if_output_dlil)(ifp, send_head); - if (retval == EQFULL || retval == EQSUSPENDED) { - if (adv != NULL) { - adv->code = (retval == EQFULL ? - FADV_FLOW_CONTROLLED : - FADV_SUSPENDED); - } - retval = 0; - } - if (retval == 0 && flen > 0) { - fbytes += flen; - fpkts++; - } - if (retval != 0 && dlil_verbose) { - printf("%s: output error on %s retval = %d\n", - __func__, if_name(ifp), retval); - } - } else { - struct mbuf *send_m; - int enq_cnt = 0; - VERIFY(ifp->if_eflags & IFEF_ENQUEUE_MULTI); - while (send_head != NULL) { - send_m = send_head; - send_head = send_m->m_nextpkt; - send_m->m_nextpkt = NULL; - retval = (*ifp->if_output_dlil)(ifp, send_m); - if (retval == EQFULL || retval == EQSUSPENDED) { - if (adv != NULL) { - adv->code = (retval == EQFULL ? - FADV_FLOW_CONTROLLED : - FADV_SUSPENDED); - } - retval = 0; - } - if (retval == 0) { - enq_cnt++; - if (flen > 0) - fpkts++; - } - if (retval != 0 && dlil_verbose) { - printf("%s: output error on %s " - "retval = %d\n", - __func__, if_name(ifp), retval); - } - } - if (enq_cnt > 0) { - fbytes += flen; - ifnet_start(ifp); - } + ip6h = pbuf->pb_data; + off = sizeof(struct ip6_hdr); + proto = ip6h->ip6_nxt; + tos = (ntohl(ip6h->ip6_flow) >> 20) & 0xff; + tot_len = ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr); + + /* + * Translate the IP header and update the fragmentation + * header if needed + */ + error = (nat464_translate_64(pbuf, off, tos, &proto, + ip6h->ip6_hlim, src, *dst, tot_len, &is_first_frag) == NT_NAT64) ? + 0 : -1; + + ip6h = NULL; /* Invalidate ip6h as pbuf has been changed */ + + if (error != 0) { + ip6stat.ip6s_clat464_in_64transfail_drop++; + goto cleanup; } - KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0); - } - KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0); + /* + * Translate protocol header, update checksum, checksum flags + * and related fields. + */ + error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc, + (struct nat464_addr *)&odst, proto, PF_INET6, PF_INET, + NT_IN, !is_first_frag) == NT_NAT64) ? 0 : -1; + + if (error != 0) { + ip6stat.ip6s_clat464_in_64proto_transfail_drop++; + goto cleanup; + } cleanup: - if (fbytes > 0) - ifp->if_fbytes += fbytes; - if (fpkts > 0) - ifp->if_fpackets += fpkts; - if (proto != NULL) - if_proto_free(proto); - if (packetlist) /* if any packets are left, clean up */ - mbuf_freem_list(packetlist); - if (retval == EJUSTRETURN) - retval = 0; - if (iorefcnt == 1) - ifnet_decr_iorefcnt(ifp); + if (ia4_clat_dst != NULL) { + IFA_REMREF(&ia4_clat_dst->ia_ifa); + } + + if (pbuf_is_valid(pbuf)) { + *m = pbuf->pb_mbuf; + pbuf->pb_mbuf = NULL; + pbuf_destroy(pbuf); + } else { + error = -1; + ip6stat.ip6s_clat464_in_invalpbuf_drop++; + } + + if (error == 0) { + *proto_family = PF_INET; + ip6stat.ip6s_clat464_in_success++; + } + } /* CLAT traffic */ - return (retval); +done: + return error; } errno_t @@ -4587,12 +5775,14 @@ ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code, int retval = EOPNOTSUPP; int result = 0; - if (ifp == NULL || ioctl_code == 0) - return (EINVAL); + if (ifp == NULL || ioctl_code == 0) { + return EINVAL; + } /* Get an io ref count if the interface is attached */ - if (!ifnet_is_attached(ifp, 1)) - return (EOPNOTSUPP); + if (!ifnet_is_attached(ifp, 1)) { + return EOPNOTSUPP; + } /* * Run the interface filters first. @@ -4614,8 +5804,9 @@ ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code, /* Only update retval if no one has handled the ioctl */ if (retval == EOPNOTSUPP || result == EJUSTRETURN) { - if (result == ENOTSUP) + if (result == ENOTSUP) { result = EOPNOTSUPP; + } retval = result; if (retval != 0 && retval != EOPNOTSUPP) { /* we're done with the filter list */ @@ -4632,7 +5823,7 @@ ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code, /* Allow the protocol to handle the ioctl */ if (proto_fam != 0) { - struct if_proto *proto; + struct if_proto *proto; /* callee holds a proto refcnt upon success */ ifnet_lock_shared(ifp); @@ -4643,18 +5834,21 @@ ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code, (proto->proto_kpi == kProtoKPI_v1 ? proto->kpi.v1.ioctl : proto->kpi.v2.ioctl); result = EOPNOTSUPP; - if (ioctlp != NULL) + if (ioctlp != NULL) { result = ioctlp(ifp, proto_fam, ioctl_code, ioctl_arg); + } if_proto_free(proto); /* Only update retval if no one has handled the ioctl */ if (retval == EOPNOTSUPP || result == EJUSTRETURN) { - if (result == ENOTSUP) + if (result == ENOTSUP) { result = EOPNOTSUPP; + } retval = result; - if (retval && retval != EOPNOTSUPP) + if (retval && retval != EOPNOTSUPP) { goto cleanup; + } } } } @@ -4666,13 +5860,15 @@ ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code, * If it returns EOPNOTSUPP, ignore that, we may have * already handled this in the protocol or family. */ - if (ifp->if_ioctl) + if (ifp->if_ioctl) { result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg); + } /* Only update retval if no one has handled the ioctl */ if (retval == EOPNOTSUPP || result == EJUSTRETURN) { - if (result == ENOTSUP) + if (result == ENOTSUP) { result = EOPNOTSUPP; + } retval = result; if (retval && retval != EOPNOTSUPP) { goto cleanup; @@ -4680,41 +5876,44 @@ ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code, } cleanup: - if (retval == EJUSTRETURN) + if (retval == EJUSTRETURN) { retval = 0; + } ifnet_decr_iorefcnt(ifp); - return (retval); + return retval; } __private_extern__ errno_t dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback) { - errno_t error = 0; + errno_t error = 0; if (ifp->if_set_bpf_tap) { /* Get an io reference on the interface if it is attached */ - if (!ifnet_is_attached(ifp, 1)) - return (ENXIO); + if (!ifnet_is_attached(ifp, 1)) { + return ENXIO; + } error = ifp->if_set_bpf_tap(ifp, mode, callback); ifnet_decr_iorefcnt(ifp); } - return (error); + return error; } errno_t dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr, struct sockaddr *ll_addr, size_t ll_len) { - errno_t result = EOPNOTSUPP; + errno_t result = EOPNOTSUPP; struct if_proto *proto; const struct sockaddr *verify; proto_media_resolve_multi resolvep; - if (!ifnet_is_attached(ifp, 1)) - return (result); + if (!ifnet_is_attached(ifp, 1)) { + return result; + } bzero(ll_addr, ll_len); @@ -4725,23 +5924,25 @@ dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr, if (proto != NULL) { resolvep = (proto->proto_kpi == kProtoKPI_v1 ? proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi); - if (resolvep != NULL) + if (resolvep != NULL) { result = resolvep(ifp, proto_addr, (struct sockaddr_dl *)(void *)ll_addr, ll_len); + } if_proto_free(proto); } /* Let the interface verify the multicast address */ if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) { - if (result == 0) + if (result == 0) { verify = ll_addr; - else + } else { verify = proto_addr; + } result = ifp->if_check_multi(ifp, verify); } ifnet_decr_iorefcnt(ifp); - return (result); + return result; } __private_extern__ errno_t @@ -4750,7 +5951,7 @@ dlil_send_arp_internal(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto) { struct if_proto *proto; - errno_t result = 0; + errno_t result = 0; /* callee holds a proto refcnt upon success */ ifnet_lock_shared(ifp); @@ -4759,7 +5960,7 @@ dlil_send_arp_internal(ifnet_t ifp, u_short arpop, if (proto == NULL) { result = ENOTSUP; } else { - proto_media_send_arp arpp; + proto_media_send_arp arpp; arpp = (proto->proto_kpi == kProtoKPI_v1 ? proto->kpi.v1.send_arp : proto->kpi.v2.send_arp); if (arpp == NULL) { @@ -4768,8 +5969,9 @@ dlil_send_arp_internal(ifnet_t ifp, u_short arpop, switch (arpop) { case ARPOP_REQUEST: arpstat.txrequests++; - if (target_hw != NULL) + if (target_hw != NULL) { arpstat.txurequests++; + } break; case ARPOP_REPLY: arpstat.txreplies++; @@ -4781,14 +5983,14 @@ dlil_send_arp_internal(ifnet_t ifp, u_short arpop, if_proto_free(proto); } - return (result); + return result; } struct net_thread_marks { }; static const struct net_thread_marks net_thread_marks_base = { }; __private_extern__ const net_thread_marks_t net_thread_marks_none = - &net_thread_marks_base; + &net_thread_marks_base; __private_extern__ net_thread_marks_t net_thread_marks_push(u_int32_t push) @@ -4800,11 +6002,12 @@ net_thread_marks_push(u_int32_t push) struct uthread *uth = get_bsdthread_info(current_thread()); pop = push & ~uth->uu_network_marks; - if (pop != 0) + if (pop != 0) { uth->uu_network_marks |= pop; + } } - return ((net_thread_marks_t)&base[pop]); + return (net_thread_marks_t)&base[pop]; } __private_extern__ net_thread_marks_t @@ -4817,11 +6020,12 @@ net_thread_unmarks_push(u_int32_t unpush) struct uthread *uth = get_bsdthread_info(current_thread()); unpop = unpush & uth->uu_network_marks; - if (unpop != 0) + if (unpop != 0) { uth->uu_network_marks &= ~unpop; + } } - return ((net_thread_marks_t)&base[unpop]); + return (net_thread_marks_t)&base[unpop]; } __private_extern__ void @@ -4861,10 +6065,10 @@ net_thread_is_marked(u_int32_t check) { if (check != 0) { struct uthread *uth = get_bsdthread_info(current_thread()); - return (uth->uu_network_marks & check); + return uth->uu_network_marks & check; + } else { + return 0; } - else - return (0); } __private_extern__ u_int32_t @@ -4872,20 +6076,21 @@ net_thread_is_unmarked(u_int32_t check) { if (check != 0) { struct uthread *uth = get_bsdthread_info(current_thread()); - return (~uth->uu_network_marks & check); + return ~uth->uu_network_marks & check; + } else { + return 0; } - else - return (0); } static __inline__ int _is_announcement(const struct sockaddr_in * sender_sin, const struct sockaddr_in * target_sin) { - if (sender_sin == NULL) { - return (FALSE); + if (target_sin == NULL || sender_sin == NULL) { + return FALSE; } - return (sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr); + + return sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr; } __private_extern__ errno_t @@ -4893,15 +6098,19 @@ dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto0, u_int32_t rtflags) { - errno_t result = 0; + errno_t result = 0; const struct sockaddr_in * sender_sin; const struct sockaddr_in * target_sin; struct sockaddr_inarp target_proto_sinarp; struct sockaddr *target_proto = (void *)(uintptr_t)target_proto0; - if (target_proto == NULL || (sender_proto != NULL && - sender_proto->sa_family != target_proto->sa_family)) - return (EINVAL); + if (target_proto == NULL || sender_proto == NULL) { + return EINVAL; + } + + if (sender_proto->sa_family != target_proto->sa_family) { + return EINVAL; + } /* * If the target is a (default) router, provide that @@ -4909,7 +6118,7 @@ dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw, */ if (rtflags & RTF_ROUTER) { bcopy(target_proto, &target_proto_sinarp, - sizeof (struct sockaddr_in)); + sizeof(struct sockaddr_in)); target_proto_sinarp.sin_other |= SIN_ROUTER; target_proto = (struct sockaddr *)&target_proto_sinarp; } @@ -4925,10 +6134,10 @@ dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw, if (target_proto->sa_family == AF_INET && IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) && ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST && - !_is_announcement(target_sin, sender_sin)) { - ifnet_t *ifp_list; - u_int32_t count; - u_int32_t ifp_on; + !_is_announcement(sender_sin, target_sin)) { + ifnet_t *ifp_list; + u_int32_t count; + u_int32_t ifp_on; result = ENOTSUP; @@ -4945,8 +6154,9 @@ dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw, * ARPing. This may mean that we don't ARP on * the interface the subnet route points to. */ - if (!(cur_ifp->if_eflags & IFEF_ARPLL)) + if (!(cur_ifp->if_eflags & IFEF_ARPLL)) { continue; + } /* Find the source IP address */ ifnet_lock_shared(cur_ifp); @@ -4995,7 +6205,7 @@ dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw, sender_proto, target_hw, target_proto); } - return (result); + return result; } /* @@ -5008,10 +6218,11 @@ ifnet_lookup(struct ifnet *ifp) LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_HELD); TAILQ_FOREACH(_ifp, &ifnet_head, if_link) { - if (_ifp == ifp) + if (_ifp == ifp) { break; + } } - return (_ifp != NULL); + return _ifp != NULL; } /* @@ -5026,18 +6237,39 @@ ifnet_is_attached(struct ifnet *ifp, int refio) lck_mtx_lock_spin(&ifp->if_ref_lock); if ((ret = IF_FULLY_ATTACHED(ifp))) { - if (refio > 0) + if (refio > 0) { ifp->if_refio++; + } } lck_mtx_unlock(&ifp->if_ref_lock); - return (ret); + return ret; +} + +void +ifnet_incr_pending_thread_count(struct ifnet *ifp) +{ + lck_mtx_lock_spin(&ifp->if_ref_lock); + ifp->if_threads_pending++; + lck_mtx_unlock(&ifp->if_ref_lock); +} + +void +ifnet_decr_pending_thread_count(struct ifnet *ifp) +{ + lck_mtx_lock_spin(&ifp->if_ref_lock); + VERIFY(ifp->if_threads_pending > 0); + ifp->if_threads_pending--; + if (ifp->if_threads_pending == 0) { + wakeup(&ifp->if_threads_pending); + } + lck_mtx_unlock(&ifp->if_ref_lock); } /* * Caller must ensure the interface is attached; the assumption is that * there is at least an outstanding IO reference count held already. - * Most callers would call ifnet_is_attached() instead. + * Most callers would call ifnet_is_{attached,data_ready}() instead. */ void ifnet_incr_iorefcnt(struct ifnet *ifp) @@ -5049,21 +6281,114 @@ ifnet_incr_iorefcnt(struct ifnet *ifp) lck_mtx_unlock(&ifp->if_ref_lock); } -void -ifnet_decr_iorefcnt(struct ifnet *ifp) +__attribute__((always_inline)) +static void +ifnet_decr_iorefcnt_locked(struct ifnet *ifp) { - lck_mtx_lock_spin(&ifp->if_ref_lock); + LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_MTX_ASSERT_OWNED); + VERIFY(ifp->if_refio > 0); VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)); + ifp->if_refio--; + VERIFY(ifp->if_refio != 0 || ifp->if_datamov == 0); /* * if there are no more outstanding io references, wakeup the * ifnet_detach thread if detaching flag is set. */ - if (ifp->if_refio == 0 && (ifp->if_refflags & IFRF_DETACHING)) + if (ifp->if_refio == 0 && (ifp->if_refflags & IFRF_DETACHING)) { wakeup(&(ifp->if_refio)); + } +} + +void +ifnet_decr_iorefcnt(struct ifnet *ifp) +{ + lck_mtx_lock_spin(&ifp->if_ref_lock); + ifnet_decr_iorefcnt_locked(ifp); + lck_mtx_unlock(&ifp->if_ref_lock); +} + +boolean_t +ifnet_datamov_begin(struct ifnet *ifp) +{ + boolean_t ret; + + lck_mtx_lock_spin(&ifp->if_ref_lock); + if ((ret = IF_FULLY_ATTACHED_AND_READY(ifp))) { + ifp->if_refio++; + ifp->if_datamov++; + } + lck_mtx_unlock(&ifp->if_ref_lock); + return ret; +} + +void +ifnet_datamov_end(struct ifnet *ifp) +{ + lck_mtx_lock_spin(&ifp->if_ref_lock); + VERIFY(ifp->if_datamov > 0); + /* + * if there's no more thread moving data, wakeup any + * drainers that's blocked waiting for this. + */ + if (--ifp->if_datamov == 0 && ifp->if_drainers > 0) { + wakeup(&(ifp->if_datamov)); + } + ifnet_decr_iorefcnt_locked(ifp); + lck_mtx_unlock(&ifp->if_ref_lock); +} + +void +ifnet_datamov_suspend(struct ifnet *ifp) +{ + lck_mtx_lock_spin(&ifp->if_ref_lock); + VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)); + ifp->if_refio++; + if (ifp->if_suspend++ == 0) { + VERIFY(ifp->if_refflags & IFRF_READY); + ifp->if_refflags &= ~IFRF_READY; + } + lck_mtx_unlock(&ifp->if_ref_lock); +} + +void +ifnet_datamov_drain(struct ifnet *ifp) +{ + lck_mtx_lock(&ifp->if_ref_lock); + VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)); + /* data movement must already be suspended */ + VERIFY(ifp->if_suspend > 0); + VERIFY(!(ifp->if_refflags & IFRF_READY)); + ifp->if_drainers++; + while (ifp->if_datamov != 0) { + (void) msleep(&(ifp->if_datamov), &ifp->if_ref_lock, + (PZERO - 1), __func__, NULL); + } + VERIFY(!(ifp->if_refflags & IFRF_READY)); + VERIFY(ifp->if_drainers > 0); + ifp->if_drainers--; + lck_mtx_unlock(&ifp->if_ref_lock); + + /* purge the interface queues */ + if ((ifp->if_eflags & IFEF_TXSTART) != 0) { + if_qflush(ifp, 0); + } +} + +void +ifnet_datamov_resume(struct ifnet *ifp) +{ + lck_mtx_lock(&ifp->if_ref_lock); + /* data movement must already be suspended */ + VERIFY(ifp->if_suspend > 0); + if (--ifp->if_suspend == 0) { + VERIFY(!(ifp->if_refflags & IFRF_READY)); + ifp->if_refflags |= IFRF_READY; + } + ifnet_decr_iorefcnt_locked(ifp); lck_mtx_unlock(&ifp->if_ref_lock); } @@ -5097,8 +6422,9 @@ dlil_if_ref(struct ifnet *ifp) { struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp; - if (dl_if == NULL) - return (EINVAL); + if (dl_if == NULL) { + return EINVAL; + } lck_mtx_lock_spin(&dl_if->dl_if_lock); ++dl_if->dl_if_refcnt; @@ -5106,11 +6432,12 @@ dlil_if_ref(struct ifnet *ifp) panic("%s: wraparound refcnt for ifp=%p", __func__, ifp); /* NOTREACHED */ } - if (dl_if->dl_if_trace != NULL) + if (dl_if->dl_if_trace != NULL) { (*dl_if->dl_if_trace)(dl_if, TRUE); + } lck_mtx_unlock(&dl_if->dl_if_lock); - return (0); + return 0; } errno_t @@ -5119,8 +6446,9 @@ dlil_if_free(struct ifnet *ifp) struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp; bool need_release = FALSE; - if (dl_if == NULL) - return (EINVAL); + if (dl_if == NULL) { + return EINVAL; + } lck_mtx_lock_spin(&dl_if->dl_if_lock); switch (dl_if->dl_if_refcnt) { @@ -5137,13 +6465,14 @@ dlil_if_free(struct ifnet *ifp) break; } --dl_if->dl_if_refcnt; - if (dl_if->dl_if_trace != NULL) + if (dl_if->dl_if_trace != NULL) { (*dl_if->dl_if_trace)(dl_if, FALSE); + } lck_mtx_unlock(&dl_if->dl_if_lock); if (need_release) { dlil_if_release(ifp); } - return (0); + return 0; } static errno_t @@ -5164,7 +6493,7 @@ dlil_attach_protocol_internal(struct if_proto *proto, if (_proto != NULL) { ifnet_lock_done(ifp); if_proto_free(_proto); - return (EEXIST); + return EEXIST; } /* @@ -5175,20 +6504,22 @@ dlil_attach_protocol_internal(struct if_proto *proto, demux_count); if (retval) { ifnet_lock_done(ifp); - return (retval); + return retval; } /* * Insert the protocol in the hash */ prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]); - while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL) + while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL) { prev_proto = SLIST_NEXT(prev_proto, next_hash); - if (prev_proto) + } + if (prev_proto) { SLIST_INSERT_AFTER(prev_proto, proto, next_hash); - else + } else { SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value], proto, next_hash); + } /* hold a proto refcnt for attach */ if_proto_ref(proto); @@ -5198,16 +6529,17 @@ dlil_attach_protocol_internal(struct if_proto *proto, * (subject to change) */ ev_pr_data.proto_family = proto->protocol_family; - ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp); + ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0); + ifnet_lock_done(ifp); dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED, (struct net_event_data *)&ev_pr_data, - sizeof (struct kev_dl_proto_data)); + sizeof(struct kev_dl_proto_data)); if (proto_count != NULL) { *proto_count = ev_pr_data.proto_remaining_count; } - return (retval); + return retval; } errno_t @@ -5249,26 +6581,34 @@ ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol, ifproto->kpi.v1.send_arp = proto_details->send_arp; retval = dlil_attach_protocol_internal(ifproto, - proto_details->demux_list, proto_details->demux_count, - &proto_count); + proto_details->demux_list, proto_details->demux_count, + &proto_count); end: - if (retval != 0 && retval != EEXIST && ifp != NULL) { + if (retval != 0 && retval != EEXIST) { DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n", - if_name(ifp), protocol, retval); + ifp != NULL ? if_name(ifp) : "N/A", protocol, retval); } else { if (dlil_verbose) { - printf("%s: attached v1 protocol %d (count = %d)\n", - if_name(ifp), - protocol, proto_count); + DLIL_PRINTF("%s: attached v1 protocol %d (count = %d)\n", + ifp != NULL ? if_name(ifp) : "N/A", + protocol, proto_count); } } ifnet_head_done(); if (retval == 0) { + /* + * A protocol has been attached, mark the interface up. + * This used to be done by configd.KernelEventMonitor, but that + * is inherently prone to races (rdar://problem/30810208). + */ + (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP); + (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL); + dlil_post_sifflags_msg(ifp); } else if (ifproto != NULL) { zfree(dlif_proto_zone, ifproto); } - return (retval); + return retval; } errno_t @@ -5310,33 +6650,41 @@ ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol, ifproto->kpi.v2.send_arp = proto_details->send_arp; retval = dlil_attach_protocol_internal(ifproto, - proto_details->demux_list, proto_details->demux_count, - &proto_count); + proto_details->demux_list, proto_details->demux_count, + &proto_count); end: - if (retval != 0 && retval != EEXIST && ifp != NULL) { + if (retval != 0 && retval != EEXIST) { DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n", - if_name(ifp), protocol, retval); + ifp != NULL ? if_name(ifp) : "N/A", protocol, retval); } else { if (dlil_verbose) { - printf("%s: attached v2 protocol %d (count = %d)\n", - if_name(ifp), - protocol, proto_count); + DLIL_PRINTF("%s: attached v2 protocol %d (count = %d)\n", + ifp != NULL ? if_name(ifp) : "N/A", + protocol, proto_count); } } ifnet_head_done(); if (retval == 0) { + /* + * A protocol has been attached, mark the interface up. + * This used to be done by configd.KernelEventMonitor, but that + * is inherently prone to races (rdar://problem/30810208). + */ + (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP); + (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL); + dlil_post_sifflags_msg(ifp); } else if (ifproto != NULL) { zfree(dlif_proto_zone, ifproto); } - return (retval); + return retval; } errno_t ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family) { struct if_proto *proto = NULL; - int retval = 0; + int retval = 0; if (ifp == NULL || proto_family == 0) { retval = EINVAL; @@ -5353,8 +6701,9 @@ ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family) } /* call family module del_proto */ - if (ifp->if_del_proto) + if (ifp->if_del_proto) { ifp->if_del_proto(ifp, proto->protocol_family); + } SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)], proto, if_proto, next_hash); @@ -5378,7 +6727,7 @@ ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family) ifnet_lock_done(ifp); if (dlil_verbose) { - printf("%s: detached %s protocol %d\n", if_name(ifp), + DLIL_PRINTF("%s: detached %s protocol %d\n", if_name(ifp), (proto->proto_kpi == kProtoKPI_v1) ? "v1" : "v2", proto_family); } @@ -5394,7 +6743,7 @@ ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family) if_proto_free(proto); end: - return (retval); + return retval; } @@ -5403,7 +6752,7 @@ ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol, struct mbuf *packet, char *header) { #pragma unused(ifp, protocol, packet, header) - return (ENXIO); + return ENXIO; } static errno_t @@ -5411,8 +6760,7 @@ ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol, struct mbuf *packet) { #pragma unused(ifp, protocol, packet) - return (ENXIO); - + return ENXIO; } static errno_t @@ -5421,8 +6769,7 @@ ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol, char *link_layer_dest) { #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest) - return (ENXIO); - + return ENXIO; } static void @@ -5437,7 +6784,7 @@ ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol, unsigned long command, void *argument) { #pragma unused(ifp, protocol, command, argument) - return (ENXIO); + return ENXIO; } static errno_t @@ -5445,7 +6792,7 @@ ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr, struct sockaddr_dl *out_ll, size_t ll_len) { #pragma unused(ifp, proto_addr, out_ll, ll_len) - return (ENXIO); + return ENXIO; } static errno_t @@ -5454,7 +6801,7 @@ ifproto_media_send_arp(struct ifnet *ifp, u_short arpop, const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto) { #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto) - return (ENXIO); + return ENXIO; } extern int if_next_index(void); @@ -5471,8 +6818,9 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) u_int32_t sflags = 0; int err; - if (ifp == NULL) - return (EINVAL); + if (ifp == NULL) { + return EINVAL; + } /* * Serialize ifnet attach using dlil_ifnet_lock, in order to @@ -5487,7 +6835,7 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) if (tmp_if == ifp) { ifnet_head_done(); dlil_if_unlock(); - return (EEXIST); + return EEXIST; } } @@ -5504,6 +6852,7 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) /* Sanity check */ VERIFY(ifp->if_detaching_link.tqe_next == NULL); VERIFY(ifp->if_detaching_link.tqe_prev == NULL); + VERIFY(ifp->if_threads_pending == 0); if (ll_addr != NULL) { if (ifp->if_addrlen == 0) { @@ -5512,7 +6861,7 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) ifnet_lock_done(ifp); ifnet_head_done(); dlil_if_unlock(); - return (EINVAL); + return EINVAL; } } @@ -5526,7 +6875,7 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) ifnet_lock_done(ifp); ifnet_head_done(); dlil_if_unlock(); - return (ENODEV); + return ENODEV; } /* Allocate protocol hash table */ @@ -5536,7 +6885,7 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) ifnet_lock_done(ifp); ifnet_head_done(); dlil_if_unlock(); - return (ENOBUFS); + return ENOBUFS; } bzero(ifp->if_proto_hash, dlif_phash_size); @@ -5564,7 +6913,7 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) ifnet_lock_done(ifp); ifnet_head_done(); dlil_if_unlock(); - return (ENOBUFS); + return ENOBUFS; } ifp->if_index = idx; } @@ -5577,7 +6926,7 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) ifnet_lock_done(ifp); ifnet_head_done(); dlil_if_unlock(); - return (ENOBUFS); + return ENOBUFS; } VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL); @@ -5603,7 +6952,7 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) /* Clear stats (save and restore other fields that we care) */ if_data_saved = ifp->if_data; - bzero(&ifp->if_data, sizeof (ifp->if_data)); + bzero(&ifp->if_data, sizeof(ifp->if_data)); ifp->if_data.ifi_type = if_data_saved.ifi_type; ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen; ifp->if_data.ifi_physical = if_data_saved.ifi_physical; @@ -5622,15 +6971,18 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) /* By default, use SFB and enable flow advisory */ sflags = PKTSCHEDF_QALG_SFB; - if (if_flowadv) + if (if_flowadv) { sflags |= PKTSCHEDF_QALG_FLOWCTL; + } - if (if_delaybased_queue) + if (if_delaybased_queue) { sflags |= PKTSCHEDF_QALG_DELAYBASED; + } if (ifp->if_output_sched_model == - IFNET_SCHED_MODEL_DRIVER_MANAGED) + IFNET_SCHED_MODEL_DRIVER_MANAGED) { sflags |= PKTSCHEDF_QALG_DRIVER_MANAGED; + } /* Initialize transmit queue(s) */ err = ifclassq_setup(ifp, sflags, (dl_if->dl_if_flags & DLIF_REUSE)); @@ -5642,7 +6994,7 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) /* Sanity checks on the input thread storage */ dl_inp = &dl_if->dl_if_inpstorage; - bzero(&dl_inp->stats, sizeof (dl_inp->stats)); + bzero(&dl_inp->stats, sizeof(dl_inp->stats)); VERIFY(dl_inp->input_waiting == 0); VERIFY(dl_inp->wtot == 0); VERIFY(dl_inp->ifp == NULL); @@ -5654,23 +7006,19 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) VERIFY(dl_inp->wloop_thr == THREAD_NULL); VERIFY(dl_inp->poll_thr == THREAD_NULL); VERIFY(dl_inp->tag == 0); - VERIFY(dl_inp->mode == IFNET_MODEL_INPUT_POLL_OFF); - bzero(&dl_inp->tstats, sizeof (dl_inp->tstats)); - bzero(&dl_inp->pstats, sizeof (dl_inp->pstats)); - bzero(&dl_inp->sstats, sizeof (dl_inp->sstats)); + #if IFNET_INPUT_SANITY_CHK VERIFY(dl_inp->input_mbuf_cnt == 0); #endif /* IFNET_INPUT_SANITY_CHK */ + VERIFY(ifp->if_poll_thread == THREAD_NULL); + dlil_reset_rxpoll_params(ifp); /* - * A specific DLIL input thread is created per Ethernet/cellular - * interface or for an interface which supports opportunistic - * input polling. Pseudo interfaces or other types of interfaces - * use the main input thread instead. + * A specific DLIL input thread is created per non-loopback interface. */ - if ((net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) || - ifp->if_type == IFT_ETHER || ifp->if_type == IFT_CELLULAR) { + if (ifp->if_family != IFNET_FAMILY_LOOPBACK) { ifp->if_inp = dl_inp; + ifnet_incr_pending_thread_count(ifp); err = dlil_create_input_thread(ifp, ifp->if_inp); if (err != 0) { panic_plain("%s: ifp=%p couldn't get an input thread; " @@ -5678,13 +7026,6 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) /* NOTREACHED */ } } - - if (ifp->if_inp != NULL && ifp->if_inp->input_mit_tcall == NULL) { - ifp->if_inp->input_mit_tcall = - thread_call_allocate_with_priority(dlil_mit_tcall_fn, - ifp, THREAD_CALL_PRIORITY_KERNEL); - } - /* * If the driver supports the new transmit model, calculate flow hash * and create a workloop starter thread to invoke the if_start callback @@ -5700,41 +7041,46 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) ifp->if_start_req = 0; ifp->if_start_flags = 0; VERIFY(ifp->if_start != NULL); - if ((err = kernel_thread_start(ifnet_start_thread_fn, + ifnet_incr_pending_thread_count(ifp); + if ((err = kernel_thread_start(ifnet_start_thread_func, ifp, &ifp->if_start_thread)) != KERN_SUCCESS) { panic_plain("%s: " "ifp=%p couldn't get a start thread; " "err=%d", __func__, ifp, err); - /* NOTREACHED */ + /* NOTREACHED */ } ml_thread_policy(ifp->if_start_thread, MACHINE_GROUP, - (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP)); + (MACHINE_NETWORK_GROUP | MACHINE_NETWORK_WORKLOOP)); } else { ifp->if_flowhash = 0; } + /* Reset polling parameters */ + ifnet_set_poll_cycle(ifp, NULL); + ifp->if_poll_update = 0; + ifp->if_poll_flags = 0; + ifp->if_poll_req = 0; + VERIFY(ifp->if_poll_thread == THREAD_NULL); + /* * If the driver supports the new receive model, create a poller * thread to invoke if_input_poll callback where the packets may * be dequeued from the driver and processed for reception. + * if the interface is netif compat then the poller thread is managed by netif. */ - if (ifp->if_eflags & IFEF_RXPOLL) { + if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL) && + (ifp->if_xflags & IFXF_LEGACY)) { VERIFY(ifp->if_input_poll != NULL); VERIFY(ifp->if_input_ctl != NULL); - VERIFY(ifp->if_poll_thread == THREAD_NULL); - - ifnet_set_poll_cycle(ifp, NULL); - ifp->if_poll_update = 0; - ifp->if_poll_active = 0; - ifp->if_poll_req = 0; - if ((err = kernel_thread_start(ifnet_poll_thread_fn, ifp, + ifnet_incr_pending_thread_count(ifp); + if ((err = kernel_thread_start(ifnet_poll_thread_func, ifp, &ifp->if_poll_thread)) != KERN_SUCCESS) { panic_plain("%s: ifp=%p couldn't get a poll thread; " "err=%d", __func__, ifp, err); /* NOTREACHED */ } ml_thread_policy(ifp->if_poll_thread, MACHINE_GROUP, - (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP)); + (MACHINE_NETWORK_GROUP | MACHINE_NETWORK_WORKLOOP)); } VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE); @@ -5750,18 +7096,19 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { IFMA_LOCK(ifma); if (ifma->ifma_addr->sa_family == AF_LINK || - ifma->ifma_addr->sa_family == AF_UNSPEC) + ifma->ifma_addr->sa_family == AF_UNSPEC) { ifp->if_updatemcasts++; + } IFMA_UNLOCK(ifma); } - printf("%s: attached with %d suspended link-layer multicast " + DLIL_PRINTF("%s: attached with %d suspended link-layer multicast " "membership(s)\n", if_name(ifp), ifp->if_updatemcasts); } /* Clear logging parameters */ - bzero(&ifp->if_log, sizeof (ifp->if_log)); + bzero(&ifp->if_log, sizeof(ifp->if_log)); /* Clear foreground/realtime activity timestamps */ ifp->if_fg_sendts = 0; @@ -5772,6 +7119,7 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) VERIFY(ifp->if_delegated.family == 0); VERIFY(ifp->if_delegated.subfamily == 0); VERIFY(ifp->if_delegated.expensive == 0); + VERIFY(ifp->if_delegated.constrained == 0); VERIFY(ifp->if_agentids == NULL); VERIFY(ifp->if_agentcount == 0); @@ -5779,9 +7127,9 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) /* Reset interface state */ bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state)); ifp->if_interface_state.valid_bitmask |= - IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID; + IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID; ifp->if_interface_state.interface_availability = - IF_INTERFACE_STATE_INTERFACE_AVAILABLE; + IF_INTERFACE_STATE_INTERFACE_AVAILABLE; /* Initialize Link Quality Metric (loopback [lo0] is always good) */ if (ifp == lo_ifp) { @@ -5810,13 +7158,13 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) error = if_set_qosmarking_mode(ifp, IFRTYPE_QOSMARKING_FASTLANE); if (error != 0) { - printf("%s if_set_qosmarking_mode(%s) error %d\n", + DLIL_PRINTF("%s if_set_qosmarking_mode(%s) error %d\n", __func__, ifp->if_xname, error); } else { ifp->if_eflags |= IFEF_QOSMARKING_ENABLED; #if (DEVELOPMENT || DEBUG) - printf("%s fastlane enabled on %s\n", - __func__, ifp->if_xname); + DLIL_PRINTF("%s fastlane enabled on %s\n", + __func__, ifp->if_xname); #endif /* (DEVELOPMENT || DEBUG) */ } } @@ -5830,11 +7178,11 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) ifp->if_fwd_cacheok = 1; /* Clean up any existing cached routes */ ROUTE_RELEASE(&ifp->if_fwd_route); - bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route)); + bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route)); ROUTE_RELEASE(&ifp->if_src_route); - bzero(&ifp->if_src_route, sizeof (ifp->if_src_route)); + bzero(&ifp->if_src_route, sizeof(ifp->if_src_route)); ROUTE_RELEASE(&ifp->if_src_route6); - bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6)); + bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6)); lck_mtx_unlock(&ifp->if_cached_route_lock); ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE)); @@ -5871,12 +7219,28 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) VERIFY(ifp->if_dt_tcall != NULL); /* - * Finally, mark this ifnet as attached. + * Wait for the created kernel threads for I/O to get + * scheduled and run at least once before we proceed + * to mark interface as attached. */ + lck_mtx_lock(&ifp->if_ref_lock); + while (ifp->if_threads_pending != 0) { + DLIL_PRINTF("%s: Waiting for all kernel threads created for " + "interface %s to get scheduled at least once.\n", + __func__, ifp->if_xname); + (void) msleep(&ifp->if_threads_pending, &ifp->if_ref_lock, (PZERO - 1), + __func__, NULL); + LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_ASSERT_OWNED); + } + lck_mtx_unlock(&ifp->if_ref_lock); + DLIL_PRINTF("%s: All kernel threads created for interface %s have been scheduled " + "at least once. Proceeding.\n", __func__, ifp->if_xname); + + /* Final mark this ifnet as attached. */ lck_mtx_lock(rnh_lock); ifnet_lock_exclusive(ifp); lck_mtx_lock_spin(&ifp->if_ref_lock); - ifp->if_refflags = IFRF_ATTACHED; /* clears embryonic */ + ifp->if_refflags = (IFRF_ATTACHED | IFRF_READY); /* clears embryonic */ lck_mtx_unlock(&ifp->if_ref_lock); if (net_rtref) { /* boot-args override; enable idle notification */ @@ -5886,7 +7250,6 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) /* apply previous request(s) to set the idle flags, if any */ (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags, ifp->if_idle_new_flags_mask); - } ifnet_lock_done(ifp); lck_mtx_unlock(rnh_lock); @@ -5902,11 +7265,11 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0); if (dlil_verbose) { - printf("%s: attached%s\n", if_name(ifp), + DLIL_PRINTF("%s: attached%s\n", if_name(ifp), (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : ""); } - return (0); + return 0; } /* @@ -5924,21 +7287,22 @@ dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr) { struct ifaddr *ifa, *oifa; struct sockaddr_dl *asdl, *msdl; - char workbuf[IFNAMSIZ*2]; + char workbuf[IFNAMSIZ * 2]; int namelen, masklen, socksize; struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp; ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE); VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen); - namelen = snprintf(workbuf, sizeof (workbuf), "%s", + namelen = scnprintf(workbuf, sizeof(workbuf), "%s", if_name(ifp)); masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + ((namelen > 0) ? namelen : 0); socksize = masklen + ifp->if_addrlen; -#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1))) - if ((u_int32_t)socksize < sizeof (struct sockaddr_dl)) +#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1))) + if ((u_int32_t)socksize < sizeof(struct sockaddr_dl)) { socksize = sizeof(struct sockaddr_dl); + } socksize = ROUNDUP(socksize); #undef ROUNDUP @@ -5953,10 +7317,11 @@ dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr) * This same space will be used when if_addrlen shrinks. */ if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) { - int ifasize = sizeof (*ifa) + 2 * SOCK_MAXADDRLEN; + int ifasize = sizeof(*ifa) + 2 * SOCK_MAXADDRLEN; ifa = _MALLOC(ifasize, M_IFADDR, M_WAITOK | M_ZERO); - if (ifa == NULL) - return (NULL); + if (ifa == NULL) { + return NULL; + } ifa_lock_init(ifa); /* Don't set IFD_ALLOC, as this is permanent */ ifa->ifa_debug = IFD_LINK; @@ -5983,9 +7348,9 @@ dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr) IFA_LOCK(ifa); /* address and mask sockaddr_dl locations */ asdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.asdl; - bzero(asdl, sizeof (dl_if->dl_if_lladdr.asdl)); + bzero(asdl, sizeof(dl_if->dl_if_lladdr.asdl)); msdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.msdl; - bzero(msdl, sizeof (dl_if->dl_if_lladdr.msdl)); + bzero(msdl, sizeof(dl_if->dl_if_lladdr.msdl)); } /* hold a permanent reference for the ifnet itself */ @@ -6001,7 +7366,7 @@ dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr) asdl->sdl_family = AF_LINK; if (namelen > 0) { bcopy(workbuf, asdl->sdl_data, min(namelen, - sizeof (asdl->sdl_data))); + sizeof(asdl->sdl_data))); asdl->sdl_nlen = namelen; } else { asdl->sdl_nlen = 0; @@ -6016,14 +7381,16 @@ dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr) } ifa->ifa_netmask = (struct sockaddr *)msdl; msdl->sdl_len = masklen; - while (namelen > 0) + while (namelen > 0) { msdl->sdl_data[--namelen] = 0xff; + } IFA_UNLOCK(ifa); - if (oifa != NULL) + if (oifa != NULL) { IFA_REMREF(oifa); + } - return (ifa); + return ifa; } static void @@ -6043,17 +7410,24 @@ ifnet_detach(ifnet_t ifp) struct ifnet *delegated_ifp; struct nd_ifinfo *ndi = NULL; - if (ifp == NULL) - return (EINVAL); + if (ifp == NULL) { + return EINVAL; + } ndi = ND_IFINFO(ifp); - if (NULL != ndi) + if (NULL != ndi) { ndi->cga_initialized = FALSE; + } lck_mtx_lock(rnh_lock); ifnet_head_lock_exclusive(); ifnet_lock_exclusive(ifp); + if (ifp->if_output_netem != NULL) { + netem_destroy(ifp->if_output_netem); + ifp->if_output_netem = NULL; + } + /* * Check to see if this interface has previously triggered * aggressive protocol draining; if so, decrement the global @@ -6068,14 +7442,14 @@ ifnet_detach(ifnet_t ifp) ifnet_lock_done(ifp); ifnet_head_done(); lck_mtx_unlock(rnh_lock); - return (EINVAL); + return EINVAL; } else if (ifp->if_refflags & IFRF_DETACHING) { /* Interface has already been detached */ lck_mtx_unlock(&ifp->if_ref_lock); ifnet_lock_done(ifp); ifnet_head_done(); lck_mtx_unlock(rnh_lock); - return (ENXIO); + return ENXIO; } VERIFY(!(ifp->if_refflags & IFRF_EMBRYONIC)); /* Indicate this interface is being detached */ @@ -6084,7 +7458,7 @@ ifnet_detach(ifnet_t ifp) lck_mtx_unlock(&ifp->if_ref_lock); if (dlil_verbose) { - printf("%s: detaching\n", if_name(ifp)); + DLIL_PRINTF("%s: detaching\n", if_name(ifp)); } /* clean up flow control entry object if there's any */ @@ -6096,6 +7470,20 @@ ifnet_detach(ifnet_t ifp) ifp->if_eflags &= ~IFEF_ECN_DISABLE; ifp->if_eflags &= ~IFEF_ECN_ENABLE; + /* Reset CLAT46 flag */ + ifp->if_eflags &= ~IFEF_CLAT46; + + /* + * We do not reset the TCP keep alive counters in case + * a TCP connection stays connection after the interface + * went down + */ + if (ifp->if_tcp_kao_cnt > 0) { + os_log(OS_LOG_DEFAULT, "%s %s tcp_kao_cnt %u not zero", + __func__, if_name(ifp), ifp->if_tcp_kao_cnt); + } + ifp->if_tcp_kao_max = 0; + /* * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will * no longer be visible during lookups from this point. @@ -6105,7 +7493,7 @@ ifnet_detach(ifnet_t ifp) ifp->if_link.tqe_next = NULL; ifp->if_link.tqe_prev = NULL; if (ifp->if_ordered_link.tqe_next != NULL || - ifp->if_ordered_link.tqe_prev != NULL) { + ifp->if_ordered_link.tqe_prev != NULL) { ifnet_remove_from_ordered_list(ifp); } ifindex2ifnet[ifp->if_index] = NULL; @@ -6117,11 +7505,11 @@ ifnet_detach(ifnet_t ifp) ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach); /* Clear logging parameters */ - bzero(&ifp->if_log, sizeof (ifp->if_log)); + bzero(&ifp->if_log, sizeof(ifp->if_log)); /* Clear delegated interface info (reference released below) */ delegated_ifp = ifp->if_delegated.ifp; - bzero(&ifp->if_delegated, sizeof (ifp->if_delegated)); + bzero(&ifp->if_delegated, sizeof(ifp->if_delegated)); /* Reset interface state */ bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state)); @@ -6132,28 +7520,34 @@ ifnet_detach(ifnet_t ifp) /* Release reference held on the delegated interface */ - if (delegated_ifp != NULL) + if (delegated_ifp != NULL) { ifnet_release(delegated_ifp); + } /* Reset Link Quality Metric (unless loopback [lo0]) */ - if (ifp != lo_ifp) + if (ifp != lo_ifp) { if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0); + } /* Reset TCP local statistics */ - if (ifp->if_tcp_stat != NULL) + if (ifp->if_tcp_stat != NULL) { bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat)); + } /* Reset UDP local statistics */ - if (ifp->if_udp_stat != NULL) + if (ifp->if_udp_stat != NULL) { bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat)); + } /* Reset ifnet IPv4 stats */ - if (ifp->if_ipv4_stat != NULL) + if (ifp->if_ipv4_stat != NULL) { bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat)); + } /* Reset ifnet IPv6 stats */ - if (ifp->if_ipv6_stat != NULL) + if (ifp->if_ipv6_stat != NULL) { bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat)); + } /* Release memory held for interface link status report */ if (ifp->if_link_status != NULL) { @@ -6204,7 +7598,7 @@ ifnet_detach(ifnet_t ifp) ifnet_detaching_enqueue(ifp); dlil_if_unlock(); - return (0); + return 0; } static void @@ -6234,7 +7628,7 @@ ifnet_detaching_dequeue(void) ifp->if_detaching_link.tqe_next = NULL; ifp->if_detaching_link.tqe_prev = NULL; } - return (ifp); + return ifp; } static int @@ -6252,6 +7646,8 @@ ifnet_detacher_thread_cont(int err) /* NOTREACHED */ } + net_update_uptime(); + VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL); /* Take care of detaching ifnet */ @@ -6264,10 +7660,12 @@ ifnet_detacher_thread_cont(int err) } } +__dead2 static void ifnet_detacher_thread_func(void *v, wait_result_t w) { #pragma unused(v, w) + dlil_decr_pending_thread_count(); dlil_if_lock(); (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock, (PZERO - 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont); @@ -6302,11 +7700,16 @@ ifnet_detach_final(struct ifnet *ifp) * common case, so block without using a continuation. */ while (ifp->if_refio > 0) { - printf("%s: Waiting for IO references on %s interface " + DLIL_PRINTF("%s: Waiting for IO references on %s interface " "to be released\n", __func__, if_name(ifp)); (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock, - (PZERO - 1), "ifnet_ioref_wait", NULL); + (PZERO - 1), "ifnet_ioref_wait", NULL); } + + VERIFY(ifp->if_datamov == 0); + VERIFY(ifp->if_drainers == 0); + VERIFY(ifp->if_suspend == 0); + ifp->if_refflags &= ~IFRF_READY; lck_mtx_unlock(&ifp->if_ref_lock); /* Drain and destroy send queue */ @@ -6416,7 +7819,7 @@ ifnet_detach_final(struct ifnet *ifp) inp->wloop_thr = THREAD_NULL; ptp = inp->poll_thr; inp->poll_thr = THREAD_NULL; - tp = inp->input_thr; /* don't nullify now */ + tp = inp->input_thr; /* don't nullify now */ inp->tag = 0; inp->net_affinity = FALSE; lck_mtx_unlock(&inp->input_lck); @@ -6424,6 +7827,7 @@ ifnet_detach_final(struct ifnet *ifp) /* Tear down poll thread affinity */ if (ptp != NULL) { VERIFY(ifp->if_eflags & IFEF_RXPOLL); + VERIFY(ifp->if_xflags & IFXF_LEGACY); (void) dlil_affinity_set(ptp, THREAD_AFFINITY_TAG_NULL); thread_deallocate(ptp); @@ -6456,7 +7860,7 @@ ifnet_detach_final(struct ifnet *ifp) /* wait for the input thread to terminate */ lck_mtx_lock_spin(&inp->input_lck); while ((inp->input_waiting & DLIL_INPUT_TERMINATE_COMPLETE) - == 0) { + == 0) { (void) msleep(&inp->input_waiting, &inp->input_lck, (PZERO - 1) | PSPIN, inp->input_name, NULL); } @@ -6465,7 +7869,9 @@ ifnet_detach_final(struct ifnet *ifp) /* clean-up input thread state */ dlil_clean_threading_info(inp); - + /* clean-up poll parameters */ + VERIFY(ifp->if_poll_thread == THREAD_NULL); + dlil_reset_rxpoll_params(ifp); } /* The driver might unload, so point these to ourselves */ @@ -6501,6 +7907,7 @@ ifnet_detach_final(struct ifnet *ifp) VERIFY(ifp->if_delegated.family == 0); VERIFY(ifp->if_delegated.subfamily == 0); VERIFY(ifp->if_delegated.expensive == 0); + VERIFY(ifp->if_delegated.constrained == 0); /* QoS marking get cleared */ ifp->if_eflags &= ~IFEF_QOSMARKING_ENABLED; @@ -6530,11 +7937,11 @@ ifnet_detach_final(struct ifnet *ifp) lck_mtx_lock(&ifp->if_cached_route_lock); VERIFY(!ifp->if_fwd_cacheok); ROUTE_RELEASE(&ifp->if_fwd_route); - bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route)); + bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route)); ROUTE_RELEASE(&ifp->if_src_route); - bzero(&ifp->if_src_route, sizeof (ifp->if_src_route)); + bzero(&ifp->if_src_route, sizeof(ifp->if_src_route)); ROUTE_RELEASE(&ifp->if_src_route6); - bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6)); + bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6)); lck_mtx_unlock(&ifp->if_cached_route_lock); VERIFY(ifp->if_data_threshold == 0); @@ -6556,11 +7963,13 @@ ifnet_detach_final(struct ifnet *ifp) } ifp->if_refflags &= ~IFRF_DETACHING; lck_mtx_unlock(&ifp->if_ref_lock); - if (if_free != NULL) + if (if_free != NULL) { if_free(ifp); + } - if (dlil_verbose) - printf("%s: detached\n", if_name(ifp)); + if (dlil_verbose) { + DLIL_PRINTF("%s: detached\n", if_name(ifp)); + } /* Release reference held during ifnet attach */ ifnet_release(ifp); @@ -6571,7 +7980,7 @@ ifp_if_output(struct ifnet *ifp, struct mbuf *m) { #pragma unused(ifp) m_freem_list(m); - return (0); + return 0; } void @@ -6587,7 +7996,7 @@ ifp_if_input(struct ifnet *ifp, struct mbuf *m_head, { #pragma unused(ifp, m_tail, s, poll, tp) m_freem_list(m_head); - return (ENXIO); + return ENXIO; } static void @@ -6595,21 +8004,25 @@ ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt, struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len) { #pragma unused(ifp, flags, max_cnt) - if (m_head != NULL) + if (m_head != NULL) { *m_head = NULL; - if (m_tail != NULL) + } + if (m_tail != NULL) { *m_tail = NULL; - if (cnt != NULL) + } + if (cnt != NULL) { *cnt = 0; - if (len != NULL) + } + if (len != NULL) { *len = 0; + } } static errno_t ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg) { #pragma unused(ifp, cmd, arglen, arg) - return (EOPNOTSUPP); + return EOPNOTSUPP; } static errno_t @@ -6617,7 +8030,7 @@ ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf) { #pragma unused(ifp, fh, pf) m_freem(m); - return (EJUSTRETURN); + return EJUSTRETURN; } static errno_t @@ -6625,21 +8038,21 @@ ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf, const struct ifnet_demux_desc *da, u_int32_t dc) { #pragma unused(ifp, pf, da, dc) - return (EINVAL); + return EINVAL; } static errno_t ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf) { #pragma unused(ifp, pf) - return (EINVAL); + return EINVAL; } static errno_t ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa) { #pragma unused(ifp, sa) - return (EOPNOTSUPP); + return EOPNOTSUPP; } #if CONFIG_EMBEDDED @@ -6655,9 +8068,9 @@ ifp_if_framer(struct ifnet *ifp, struct mbuf **m, { #pragma unused(ifp, m, sa, ll, t) #if CONFIG_EMBEDDED - return (ifp_if_framer_extended(ifp, m, sa, ll, t, pre, post)); + return ifp_if_framer_extended(ifp, m, sa, ll, t, pre, post); #else - return (ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL)); + return ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL); #endif /* !CONFIG_EMBEDDED */ } @@ -6670,19 +8083,21 @@ ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m, m_freem(*m); *m = NULL; - if (pre != NULL) + if (pre != NULL) { *pre = 0; - if (post != NULL) + } + if (post != NULL) { *post = 0; + } - return (EJUSTRETURN); + return EJUSTRETURN; } errno_t ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg) { #pragma unused(ifp, cmd, arg) - return (EOPNOTSUPP); + return EOPNOTSUPP; } static errno_t @@ -6690,7 +8105,7 @@ ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f) { #pragma unused(ifp, tm, f) /* XXX not sure what to do here */ - return (0); + return 0; } static void @@ -6705,43 +8120,77 @@ ifp_if_event(struct ifnet *ifp, const struct kev_msg *e) #pragma unused(ifp, e) } -__private_extern__ -int dlil_if_acquire(u_int32_t family, const void *uniqueid, - size_t uniqueid_len, struct ifnet **ifp) +int +dlil_if_acquire(u_int32_t family, const void *uniqueid, + size_t uniqueid_len, const char *ifxname, struct ifnet **ifp) { struct ifnet *ifp1 = NULL; struct dlil_ifnet *dlifp1 = NULL; + struct dlil_ifnet *dlifp1_saved = NULL; void *buf, *base, **pbuf; int ret = 0; + VERIFY(*ifp == NULL); dlil_if_lock(); + /* + * We absolutely can't have an interface with the same name + * in in-use state. + * To make sure of that list has to be traversed completely + */ TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) { ifp1 = (struct ifnet *)dlifp1; - if (ifp1->if_family != family) + if (ifp1->if_family != family) { continue; + } + /* + * If interface is in use, return EBUSY if either unique id + * or interface extended names are the same + */ lck_mtx_lock(&dlifp1->dl_if_lock); - /* same uniqueid and same len or no unique id specified */ - if ((uniqueid_len == dlifp1->dl_if_uniqueid_len) && - bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len) == 0) { - /* check for matching interface in use */ + if (strncmp(ifxname, ifp1->if_xname, IFXNAMSIZ) == 0) { if (dlifp1->dl_if_flags & DLIF_INUSE) { - if (uniqueid_len) { - ret = EBUSY; + lck_mtx_unlock(&dlifp1->dl_if_lock); + ret = EBUSY; + goto end; + } + } + + if (uniqueid_len) { + if (uniqueid_len == dlifp1->dl_if_uniqueid_len && + bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len) == 0) { + if (dlifp1->dl_if_flags & DLIF_INUSE) { lck_mtx_unlock(&dlifp1->dl_if_lock); + ret = EBUSY; goto end; + } else { + /* Cache the first interface that can be recycled */ + if (*ifp == NULL) { + *ifp = ifp1; + dlifp1_saved = dlifp1; + } + /* + * XXX Do not break or jump to end as we have to traverse + * the whole list to ensure there are no name collisions + */ } - } else { - dlifp1->dl_if_flags |= (DLIF_INUSE|DLIF_REUSE); - lck_mtx_unlock(&dlifp1->dl_if_lock); - *ifp = ifp1; - goto end; } } lck_mtx_unlock(&dlifp1->dl_if_lock); } + /* If there's an interface that can be recycled, use that */ + if (*ifp != NULL) { + if (dlifp1_saved != NULL) { + lck_mtx_lock(&dlifp1_saved->dl_if_lock); + dlifp1_saved->dl_if_flags |= (DLIF_INUSE | DLIF_REUSE); + lck_mtx_unlock(&dlifp1_saved->dl_if_lock); + dlifp1_saved = NULL; + } + goto end; + } + /* no interface found, allocate a new one */ buf = zalloc(dlif_zone); if (buf == NULL) { @@ -6751,15 +8200,15 @@ int dlil_if_acquire(u_int32_t family, const void *uniqueid, bzero(buf, dlif_bufsize); /* Get the 64-bit aligned base address for this object */ - base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t), - sizeof (u_int64_t)); + base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t), + sizeof(u_int64_t)); VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize)); /* * Wind back a pointer size from the aligned base and * save the original address so we can free it later. */ - pbuf = (void **)((intptr_t)base - sizeof (void *)); + pbuf = (void **)((intptr_t)base - sizeof(void *)); *pbuf = buf; dlifp1 = base; @@ -6849,14 +8298,14 @@ int dlil_if_acquire(u_int32_t family, const void *uniqueid, end: dlil_if_unlock(); - VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof (u_int64_t)) && - IS_P2ALIGNED(&ifp1->if_data, sizeof (u_int64_t)))); + VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof(u_int64_t)) && + IS_P2ALIGNED(&ifp1->if_data, sizeof(u_int64_t)))); - return (ret); + return ret; } __private_extern__ void -dlil_if_release(ifnet_t ifp) +dlil_if_release(ifnet_t ifp) { struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp; @@ -6929,7 +8378,7 @@ ifp_src_route_copyout(struct ifnet *ifp, struct route *dst) lck_mtx_lock_spin(&ifp->if_cached_route_lock); lck_mtx_convert_spin(&ifp->if_cached_route_lock); - route_copyout(dst, &ifp->if_src_route, sizeof (*dst)); + route_copyout(dst, &ifp->if_src_route, sizeof(*dst)); lck_mtx_unlock(&ifp->if_cached_route_lock); } @@ -6941,7 +8390,7 @@ ifp_src_route_copyin(struct ifnet *ifp, struct route *src) lck_mtx_convert_spin(&ifp->if_cached_route_lock); if (ifp->if_fwd_cacheok) { - route_copyin(src, &ifp->if_src_route, sizeof (*src)); + route_copyin(src, &ifp->if_src_route, sizeof(*src)); } else { ROUTE_RELEASE(src); } @@ -6956,7 +8405,7 @@ ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst) lck_mtx_convert_spin(&ifp->if_cached_route_lock); route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6, - sizeof (*dst)); + sizeof(*dst)); lck_mtx_unlock(&ifp->if_cached_route_lock); } @@ -6969,7 +8418,7 @@ ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src) if (ifp->if_fwd_cacheok) { route_copyin((struct route *)src, - (struct route *)&ifp->if_src_route6, sizeof (*src)); + (struct route *)&ifp->if_src_route6, sizeof(*src)); } else { ROUTE_RELEASE(src); } @@ -6978,10 +8427,10 @@ ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src) #endif /* INET6 */ struct rtentry * -ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip) +ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip) { - struct route src_rt; - struct sockaddr_in *dst; + struct route src_rt; + struct sockaddr_in *dst; dst = (struct sockaddr_in *)(void *)(&src_rt.ro_dst); @@ -6990,8 +8439,8 @@ ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip) if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) { ROUTE_RELEASE(&src_rt); if (dst->sin_family != AF_INET) { - bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst)); - dst->sin_len = sizeof (src_rt.ro_dst); + bzero(&src_rt.ro_dst, sizeof(src_rt.ro_dst)); + dst->sin_len = sizeof(src_rt.ro_dst); dst->sin_family = AF_INET; } dst->sin_addr = src_ip; @@ -7002,14 +8451,14 @@ ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip) if (src_rt.ro_rt != NULL) { /* retain a ref, copyin consumes one */ - struct rtentry *rte = src_rt.ro_rt; + struct rtentry *rte = src_rt.ro_rt; RT_ADDREF(rte); ifp_src_route_copyin(ifp, &src_rt); src_rt.ro_rt = rte; } } - return (src_rt.ro_rt); + return src_rt.ro_rt; } #if INET6 @@ -7024,22 +8473,22 @@ ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6) !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) { ROUTE_RELEASE(&src_rt); if (src_rt.ro_dst.sin6_family != AF_INET6) { - bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst)); - src_rt.ro_dst.sin6_len = sizeof (src_rt.ro_dst); + bzero(&src_rt.ro_dst, sizeof(src_rt.ro_dst)); + src_rt.ro_dst.sin6_len = sizeof(src_rt.ro_dst); src_rt.ro_dst.sin6_family = AF_INET6; } src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6); bcopy(src_ip6, &src_rt.ro_dst.sin6_addr, - sizeof (src_rt.ro_dst.sin6_addr)); + sizeof(src_rt.ro_dst.sin6_addr)); if (src_rt.ro_rt == NULL) { src_rt.ro_rt = rtalloc1_scoped( - (struct sockaddr *)&src_rt.ro_dst, 0, 0, - ifp->if_index); + (struct sockaddr *)&src_rt.ro_dst, 0, 0, + ifp->if_index); if (src_rt.ro_rt != NULL) { /* retain a ref, copyin consumes one */ - struct rtentry *rte = src_rt.ro_rt; + struct rtentry *rte = src_rt.ro_rt; RT_ADDREF(rte); ifp_src_route6_copyin(ifp, &src_rt); src_rt.ro_rt = rte; @@ -7047,7 +8496,7 @@ ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6) } } - return (src_rt.ro_rt); + return src_rt.ro_rt; } #endif /* INET6 */ @@ -7078,8 +8527,9 @@ if_lqm_update(struct ifnet *ifp, int lqm, int locked) /* * Take the lock if needed */ - if (!locked) + if (!locked) { ifnet_lock_exclusive(ifp); + } if (lqm == ifp->if_interface_state.lqm_state && (ifp->if_interface_state.valid_bitmask & @@ -7087,12 +8537,13 @@ if_lqm_update(struct ifnet *ifp, int lqm, int locked) /* * Release the lock if was not held by the caller */ - if (!locked) + if (!locked) { ifnet_lock_done(ifp); - return; /* nothing to update */ + } + return; /* nothing to update */ } ifp->if_interface_state.valid_bitmask |= - IF_INTERFACE_STATE_LQM_STATE_VALID; + IF_INTERFACE_STATE_LQM_STATE_VALID; ifp->if_interface_state.lqm_state = lqm; /* @@ -7100,17 +8551,18 @@ if_lqm_update(struct ifnet *ifp, int lqm, int locked) */ ifnet_lock_done(ifp); - bzero(&ev_lqm_data, sizeof (ev_lqm_data)); + bzero(&ev_lqm_data, sizeof(ev_lqm_data)); ev_lqm_data.link_quality_metric = lqm; dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED, - (struct net_event_data *)&ev_lqm_data, sizeof (ev_lqm_data)); + (struct net_event_data *)&ev_lqm_data, sizeof(ev_lqm_data)); /* * Reacquire the lock for the caller */ - if (locked) + if (locked) { ifnet_lock_exclusive(ifp); + } } static void @@ -7120,8 +8572,9 @@ if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state) if (rrc_state == ifp->if_interface_state.rrc_state && (ifp->if_interface_state.valid_bitmask & - IF_INTERFACE_STATE_RRC_STATE_VALID)) + IF_INTERFACE_STATE_RRC_STATE_VALID)) { return; + } ifp->if_interface_state.valid_bitmask |= IF_INTERFACE_STATE_RRC_STATE_VALID; @@ -7154,14 +8607,14 @@ if_state_update(struct ifnet *ifp, (if_interface_state->valid_bitmask & IF_INTERFACE_STATE_RRC_STATE_VALID)) { ifnet_lock_done(ifp); - return (ENOTSUP); + return ENOTSUP; } if ((if_interface_state->valid_bitmask & IF_INTERFACE_STATE_LQM_STATE_VALID) && (if_interface_state->lqm_state < IFNET_LQM_MIN || if_interface_state->lqm_state > IFNET_LQM_MAX)) { ifnet_lock_done(ifp); - return (EINVAL); + return EINVAL; } if ((if_interface_state->valid_bitmask & IF_INTERFACE_STATE_RRC_STATE_VALID) && @@ -7170,7 +8623,7 @@ if_state_update(struct ifnet *ifp, if_interface_state->rrc_state != IF_INTERFACE_STATE_RRC_STATE_CONNECTED) { ifnet_lock_done(ifp); - return (EINVAL); + return EINVAL; } if (if_interface_state->valid_bitmask & @@ -7190,7 +8643,12 @@ if_state_update(struct ifnet *ifp, if (ifp->if_interface_state.interface_availability == IF_INTERFACE_STATE_INTERFACE_AVAILABLE) { + os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) available\n", + __func__, if_name(ifp), ifp->if_index); if_index_available = ifp->if_index; + } else { + os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) unavailable)\n", + __func__, if_name(ifp), ifp->if_index); } } ifnet_lock_done(ifp); @@ -7198,13 +8656,14 @@ if_state_update(struct ifnet *ifp, /* * Check if the TCP connections going on this interface should be * forced to send probe packets instead of waiting for TCP timers - * to fire. This will be done when there is an explicit - * notification that the interface became available. + * to fire. This is done on an explicit notification such as + * SIOCSIFINTERFACESTATE which marks the interface as available. */ - if (if_index_available > 0) + if (if_index_available > 0) { tcp_interface_send_probe(if_index_available); + } - return (0); + return 0; } void @@ -7246,12 +8705,13 @@ if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe) ifnet_lock_exclusive(ifp); if (conn_probe > 1) { ifnet_lock_done(ifp); - return (EINVAL); + return EINVAL; } - if (conn_probe == 0) + if (conn_probe == 0) { ifp->if_eflags &= ~IFEF_PROBE_CONNECTIVITY; - else + } else { ifp->if_eflags |= IFEF_PROBE_CONNECTIVITY; + } ifnet_lock_done(ifp); #if NECP @@ -7259,34 +8719,80 @@ if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe) #endif /* NECP */ tcp_probe_connectivity(ifp, conn_probe); - return (0); + return 0; } /* for uuid.c */ -int -uuid_get_ethernet(u_int8_t *node) +static int +get_ether_index(int * ret_other_index) { struct ifnet *ifp; - struct sockaddr_dl *sdl; + int en0_index = 0; + int other_en_index = 0; + int any_ether_index = 0; + short best_unit = 0; - ifnet_head_lock_shared(); + *ret_other_index = 0; TAILQ_FOREACH(ifp, &ifnet_head, if_link) { + /* + * find en0, or if not en0, the lowest unit en*, and if not + * that, any ethernet + */ ifnet_lock_shared(ifp); - IFA_LOCK_SPIN(ifp->if_lladdr); - sdl = (struct sockaddr_dl *)(void *)ifp->if_lladdr->ifa_addr; - if (sdl->sdl_type == IFT_ETHER) { - memcpy(node, LLADDR(sdl), ETHER_ADDR_LEN); - IFA_UNLOCK(ifp->if_lladdr); - ifnet_lock_done(ifp); - ifnet_head_done(); - return (0); + if (strcmp(ifp->if_name, "en") == 0) { + if (ifp->if_unit == 0) { + /* found en0, we're done */ + en0_index = ifp->if_index; + ifnet_lock_done(ifp); + break; + } + if (other_en_index == 0 || ifp->if_unit < best_unit) { + other_en_index = ifp->if_index; + best_unit = ifp->if_unit; + } + } else if (ifp->if_type == IFT_ETHER && any_ether_index == 0) { + any_ether_index = ifp->if_index; } - IFA_UNLOCK(ifp->if_lladdr); ifnet_lock_done(ifp); } - ifnet_head_done(); + if (en0_index == 0) { + if (other_en_index != 0) { + *ret_other_index = other_en_index; + } else if (any_ether_index != 0) { + *ret_other_index = any_ether_index; + } + } + return en0_index; +} + +int +uuid_get_ethernet(u_int8_t *node) +{ + static int en0_index; + struct ifnet *ifp; + int other_index = 0; + int the_index = 0; + int ret; - return (-1); + ifnet_head_lock_shared(); + if (en0_index == 0 || ifindex2ifnet[en0_index] == NULL) { + en0_index = get_ether_index(&other_index); + } + if (en0_index != 0) { + the_index = en0_index; + } else if (other_index != 0) { + the_index = other_index; + } + if (the_index != 0) { + ifp = ifindex2ifnet[the_index]; + VERIFY(ifp != NULL); + memcpy(node, IF_LLADDR(ifp), ETHER_ADDR_LEN); + ret = 0; + } else { + ret = -1; + } + ifnet_head_done(); + return ret; } static int @@ -7299,14 +8805,16 @@ sysctl_rxpoll SYSCTL_HANDLER_ARGS i = if_rxpoll; err = sysctl_handle_int(oidp, &i, 0, req); - if (err != 0 || req->newptr == USER_ADDR_NULL) - return (err); + if (err != 0 || req->newptr == USER_ADDR_NULL) { + return err; + } - if (net_rxpoll == 0) - return (ENXIO); + if (net_rxpoll == 0) { + return ENXIO; + } if_rxpoll = i; - return (err); + return err; } static int @@ -7319,15 +8827,17 @@ sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS q = if_rxpoll_mode_holdtime; err = sysctl_handle_quad(oidp, &q, 0, req); - if (err != 0 || req->newptr == USER_ADDR_NULL) - return (err); + if (err != 0 || req->newptr == USER_ADDR_NULL) { + return err; + } - if (q < IF_RXPOLL_MODE_HOLDTIME_MIN) + if (q < IF_RXPOLL_MODE_HOLDTIME_MIN) { q = IF_RXPOLL_MODE_HOLDTIME_MIN; + } if_rxpoll_mode_holdtime = q; - return (err); + return err; } static int @@ -7340,15 +8850,17 @@ sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS q = if_rxpoll_sample_holdtime; err = sysctl_handle_quad(oidp, &q, 0, req); - if (err != 0 || req->newptr == USER_ADDR_NULL) - return (err); + if (err != 0 || req->newptr == USER_ADDR_NULL) { + return err; + } - if (q < IF_RXPOLL_SAMPLETIME_MIN) + if (q < IF_RXPOLL_SAMPLETIME_MIN) { q = IF_RXPOLL_SAMPLETIME_MIN; + } if_rxpoll_sample_holdtime = q; - return (err); + return err; } static int @@ -7361,15 +8873,17 @@ sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS q = if_rxpoll_interval_time; err = sysctl_handle_quad(oidp, &q, 0, req); - if (err != 0 || req->newptr == USER_ADDR_NULL) - return (err); + if (err != 0 || req->newptr == USER_ADDR_NULL) { + return err; + } - if (q < IF_RXPOLL_INTERVALTIME_MIN) + if (q < IF_RXPOLL_INTERVALTIME_MIN) { q = IF_RXPOLL_INTERVALTIME_MIN; + } if_rxpoll_interval_time = q; - return (err); + return err; } static int @@ -7379,17 +8893,19 @@ sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS uint32_t i; int err; - i = if_rxpoll_wlowat; + i = if_sysctl_rxpoll_wlowat; err = sysctl_handle_int(oidp, &i, 0, req); - if (err != 0 || req->newptr == USER_ADDR_NULL) - return (err); + if (err != 0 || req->newptr == USER_ADDR_NULL) { + return err; + } - if (i == 0 || i >= if_rxpoll_whiwat) - return (EINVAL); + if (i == 0 || i >= if_sysctl_rxpoll_whiwat) { + return EINVAL; + } - if_rxpoll_wlowat = i; - return (err); + if_sysctl_rxpoll_wlowat = i; + return err; } static int @@ -7399,17 +8915,19 @@ sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS uint32_t i; int err; - i = if_rxpoll_whiwat; + i = if_sysctl_rxpoll_whiwat; err = sysctl_handle_int(oidp, &i, 0, req); - if (err != 0 || req->newptr == USER_ADDR_NULL) - return (err); + if (err != 0 || req->newptr == USER_ADDR_NULL) { + return err; + } - if (i <= if_rxpoll_wlowat) - return (EINVAL); + if (i <= if_sysctl_rxpoll_wlowat) { + return EINVAL; + } - if_rxpoll_whiwat = i; - return (err); + if_sysctl_rxpoll_whiwat = i; + return err; } static int @@ -7421,14 +8939,16 @@ sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS i = if_sndq_maxlen; err = sysctl_handle_int(oidp, &i, 0, req); - if (err != 0 || req->newptr == USER_ADDR_NULL) - return (err); + if (err != 0 || req->newptr == USER_ADDR_NULL) { + return err; + } - if (i < IF_SNDQ_MINLEN) + if (i < IF_SNDQ_MINLEN) { i = IF_SNDQ_MINLEN; + } if_sndq_maxlen = i; - return (err); + return err; } static int @@ -7440,66 +8960,134 @@ sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS i = if_rcvq_maxlen; err = sysctl_handle_int(oidp, &i, 0, req); - if (err != 0 || req->newptr == USER_ADDR_NULL) - return (err); + if (err != 0 || req->newptr == USER_ADDR_NULL) { + return err; + } - if (i < IF_RCVQ_MINLEN) + if (i < IF_RCVQ_MINLEN) { i = IF_RCVQ_MINLEN; + } if_rcvq_maxlen = i; - return (err); + return err; } -void +int dlil_node_present(struct ifnet *ifp, struct sockaddr *sa, int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48]) { struct kev_dl_node_presence kev; struct sockaddr_dl *sdl; struct sockaddr_in6 *sin6; + int ret = 0; VERIFY(ifp); VERIFY(sa); VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6); - bzero(&kev, sizeof (kev)); + bzero(&kev, sizeof(kev)); sin6 = &kev.sin6_node_address; sdl = &kev.sdl_node_address; nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6); kev.rssi = rssi; kev.link_quality_metric = lqm; kev.node_proximity_metric = npm; - bcopy(srvinfo, kev.node_service_info, sizeof (kev.node_service_info)); + bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info)); - nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm); - dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE, - &kev.link_data, sizeof (kev)); + ret = nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm); + if (ret == 0) { + int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE, + &kev.link_data, sizeof(kev)); + if (err != 0) { + log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with" + "error %d\n", __func__, err); + } + } + return ret; } void dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa) { - struct kev_dl_node_absence kev; - struct sockaddr_in6 *sin6; - struct sockaddr_dl *sdl; + struct kev_dl_node_absence kev = {}; + struct sockaddr_in6 *kev_sin6 = NULL; + struct sockaddr_dl *kev_sdl = NULL; - VERIFY(ifp); - VERIFY(sa); + VERIFY(ifp != NULL); + VERIFY(sa != NULL); VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6); - bzero(&kev, sizeof (kev)); - sin6 = &kev.sin6_node_address; - sdl = &kev.sdl_node_address; - nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6); + kev_sin6 = &kev.sin6_node_address; + kev_sdl = &kev.sdl_node_address; + + if (sa->sa_family == AF_INET6) { + /* + * If IPv6 address is given, get the link layer + * address from what was cached in the neighbor cache + */ + VERIFY(sa->sa_len <= sizeof(*kev_sin6)); + bcopy(sa, kev_sin6, sa->sa_len); + nd6_alt_node_absent(ifp, kev_sin6, kev_sdl); + } else { + /* + * If passed address is AF_LINK type, derive the address + * based on the link address. + */ + nd6_alt_node_addr_decompose(ifp, sa, kev_sdl, kev_sin6); + nd6_alt_node_absent(ifp, kev_sin6, NULL); + } + + kev_sdl->sdl_type = ifp->if_type; + kev_sdl->sdl_index = ifp->if_index; - nd6_alt_node_absent(ifp, sin6); dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE, - &kev.link_data, sizeof (kev)); + &kev.link_data, sizeof(kev)); +} + +int +dlil_node_present_v2(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr_dl *sdl, + int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48]) +{ + struct kev_dl_node_presence kev = {}; + struct sockaddr_dl *kev_sdl = NULL; + struct sockaddr_in6 *kev_sin6 = NULL; + int ret = 0; + + VERIFY(ifp != NULL); + VERIFY(sa != NULL && sdl != NULL); + VERIFY(sa->sa_family == AF_INET6 && sdl->sdl_family == AF_LINK); + + kev_sin6 = &kev.sin6_node_address; + kev_sdl = &kev.sdl_node_address; + + VERIFY(sdl->sdl_len <= sizeof(*kev_sdl)); + bcopy(sdl, kev_sdl, sdl->sdl_len); + kev_sdl->sdl_type = ifp->if_type; + kev_sdl->sdl_index = ifp->if_index; + + VERIFY(sa->sa_len <= sizeof(*kev_sin6)); + bcopy(sa, kev_sin6, sa->sa_len); + + kev.rssi = rssi; + kev.link_quality_metric = lqm; + kev.node_proximity_metric = npm; + bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info)); + + ret = nd6_alt_node_present(ifp, SIN6(sa), sdl, rssi, lqm, npm); + if (ret == 0) { + int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE, + &kev.link_data, sizeof(kev)); + if (err != 0) { + log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with", + "error %d\n", __func__, err); + } + } + return ret; } const void * dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep, - kauth_cred_t *credp) + kauth_cred_t *credp) { const u_int8_t *bytes; size_t size; @@ -7516,11 +9104,12 @@ dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep, default: credp = NULL; break; - }; + } + ; if (credp && mac_system_check_info(*credp, "net.link.addr")) { static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = { - [0] = 2 + [0] = 2 }; bytes = unspec; @@ -7530,8 +9119,10 @@ dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep, #pragma unused(credp) #endif - if (sizep != NULL) *sizep = size; - return (bytes); + if (sizep != NULL) { + *sizep = size; + } + return bytes; } void @@ -7543,19 +9134,20 @@ dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN], VERIFY(ifp != NULL); VERIFY(modid != NULL); - _CASSERT(sizeof (kev.modid) == DLIL_MODIDLEN); - _CASSERT(sizeof (kev.info) == DLIL_MODARGLEN); + _CASSERT(sizeof(kev.modid) == DLIL_MODIDLEN); + _CASSERT(sizeof(kev.info) == DLIL_MODARGLEN); - bzero(&kev, sizeof (kev)); + bzero(&kev, sizeof(kev)); microtime(&tv); kev.timestamp = tv.tv_sec; bcopy(modid, &kev.modid, DLIL_MODIDLEN); - if (info != NULL) + if (info != NULL) { bcopy(info, &kev.info, DLIL_MODARGLEN); + } dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES, - &kev.link_data, sizeof (kev)); + &kev.link_data, sizeof(kev)); } errno_t @@ -7571,19 +9163,22 @@ ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr, /* * XXX: Use priv_check_cred() instead of root check? */ - if ((result = proc_suser(p)) != 0) - return (result); + if ((result = proc_suser(p)) != 0) { + return result; + } if (ifr->ifr_opportunistic.ifo_flags == - IFRIFOF_BLOCK_OPPORTUNISTIC) + IFRIFOF_BLOCK_OPPORTUNISTIC) { level = IFNET_THROTTLE_OPPORTUNISTIC; - else if (ifr->ifr_opportunistic.ifo_flags == 0) + } else if (ifr->ifr_opportunistic.ifo_flags == 0) { level = IFNET_THROTTLE_OFF; - else + } else { result = EINVAL; + } - if (result == 0) + if (result == 0) { result = ifnet_set_throttle(ifp, level); + } } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) { ifr->ifr_opportunistic.ifo_flags = 0; if (level == IFNET_THROTTLE_OPPORTUNISTIC) { @@ -7599,18 +9194,19 @@ ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr, if (result == 0) { uint32_t flags = 0; flags |= (cmd == SIOCSIFOPPORTUNISTIC) ? - INPCB_OPPORTUNISTIC_SETCMD : 0; + INPCB_OPPORTUNISTIC_SETCMD : 0; flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ? - INPCB_OPPORTUNISTIC_THROTTLEON : 0; + INPCB_OPPORTUNISTIC_THROTTLEON : 0; ifr->ifr_opportunistic.ifo_inuse = udp_count_opportunistic(ifp->if_index, flags) + tcp_count_opportunistic(ifp->if_index, flags); } - if (result == EALREADY) + if (result == EALREADY) { result = 0; + } - return (result); + return result; } int @@ -7619,19 +9215,21 @@ ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level) struct ifclassq *ifq; int err = 0; - if (!(ifp->if_eflags & IFEF_TXSTART)) - return (ENXIO); + if (!(ifp->if_eflags & IFEF_TXSTART)) { + return ENXIO; + } *level = IFNET_THROTTLE_OFF; ifq = &ifp->if_snd; IFCQ_LOCK(ifq); /* Throttling works only for IFCQ, not ALTQ instances */ - if (IFCQ_IS_ENABLED(ifq)) + if (IFCQ_IS_ENABLED(ifq)) { IFCQ_GET_THROTTLE(ifq, *level, err); + } IFCQ_UNLOCK(ifq); - return (err); + return err; } int @@ -7640,8 +9238,9 @@ ifnet_set_throttle(struct ifnet *ifp, u_int32_t level) struct ifclassq *ifq; int err = 0; - if (!(ifp->if_eflags & IFEF_TXSTART)) - return (ENXIO); + if (!(ifp->if_eflags & IFEF_TXSTART)) { + return ENXIO; + } ifq = &ifp->if_snd; @@ -7650,22 +9249,27 @@ ifnet_set_throttle(struct ifnet *ifp, u_int32_t level) case IFNET_THROTTLE_OPPORTUNISTIC: break; default: - return (EINVAL); + return EINVAL; } IFCQ_LOCK(ifq); - if (IFCQ_IS_ENABLED(ifq)) + if (IFCQ_IS_ENABLED(ifq)) { IFCQ_SET_THROTTLE(ifq, level, err); + } IFCQ_UNLOCK(ifq); if (err == 0) { - printf("%s: throttling level set to %d\n", if_name(ifp), + DLIL_PRINTF("%s: throttling level set to %d\n", if_name(ifp), level); - if (level == IFNET_THROTTLE_OFF) +#if NECP + necp_update_all_clients(); +#endif /* NECP */ + if (level == IFNET_THROTTLE_OFF) { ifnet_start(ifp); + } } - return (err); + return err; } errno_t @@ -7681,23 +9285,27 @@ ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr, if (cmd == SIOCSIFLOG) { if ((result = priv_check_cred(kauth_cred_get(), - PRIV_NET_INTERFACE_CONTROL, 0)) != 0) - return (result); + PRIV_NET_INTERFACE_CONTROL, 0)) != 0) { + return result; + } level = ifr->ifr_log.ifl_level; - if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX) + if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX) { result = EINVAL; + } flags = ifr->ifr_log.ifl_flags; - if ((flags &= IFNET_LOGF_MASK) == 0) + if ((flags &= IFNET_LOGF_MASK) == 0) { result = EINVAL; + } category = ifr->ifr_log.ifl_category; subcategory = ifr->ifr_log.ifl_subcategory; - if (result == 0) + if (result == 0) { result = ifnet_set_log(ifp, level, flags, category, subcategory); + } } else { result = ifnet_get_log(ifp, &level, &flags, &category, &subcategory); @@ -7709,7 +9317,7 @@ ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr, } } - return (result); + return result; } int @@ -7730,7 +9338,7 @@ ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags, if (ifp->if_output_ctl != NULL) { struct ifnet_log_params l; - bzero(&l, sizeof (l)); + bzero(&l, sizeof(l)); l.level = level; l.flags = flags; l.flags &= ~IFNET_LOGF_DLIL; @@ -7740,7 +9348,7 @@ ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags, /* Send this request to lower layers */ if (l.flags != 0) { err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG, - sizeof (l), &l); + sizeof(l), &l); } } else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) { /* @@ -7749,15 +9357,17 @@ ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags, * silently ignore facilities other than ours. */ flags &= IFNET_LOGF_DLIL; - if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL))) + if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL))) { level = 0; + } } if (err == 0) { - if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT) + if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT) { ifp->if_log.flags = 0; - else + } else { ifp->if_log.flags |= flags; + } log(LOG_INFO, "%s: logging level set to %d flags=%b " "arg=%b, category=%d subcategory=%d\n", if_name(ifp), @@ -7766,23 +9376,27 @@ ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags, category, subcategory); } - return (err); + return err; } int ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags, int32_t *category, int32_t *subcategory) { - if (level != NULL) + if (level != NULL) { *level = ifp->if_log.level; - if (flags != NULL) + } + if (flags != NULL) { *flags = ifp->if_log.flags; - if (category != NULL) + } + if (category != NULL) { *category = ifp->if_log.category; - if (subcategory != NULL) + } + if (subcategory != NULL) { *subcategory = ifp->if_log.subcategory; + } - return (0); + return 0; } int @@ -7794,29 +9408,30 @@ ifnet_notify_address(struct ifnet *ifp, int af) (void) pf_ifaddr_hook(ifp); #endif /* PF */ - if (ifp->if_output_ctl == NULL) - return (EOPNOTSUPP); + if (ifp->if_output_ctl == NULL) { + return EOPNOTSUPP; + } - bzero(&na, sizeof (na)); + bzero(&na, sizeof(na)); na.address_family = af; - return (ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS, - sizeof (na), &na)); + return ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS, + sizeof(na), &na); } errno_t ifnet_flowid(struct ifnet *ifp, uint32_t *flowid) { if (ifp == NULL || flowid == NULL) { - return (EINVAL); + return EINVAL; } else if (!(ifp->if_eflags & IFEF_TXSTART) || !IF_FULLY_ATTACHED(ifp)) { - return (ENXIO); + return ENXIO; } *flowid = ifp->if_flowhash; - return (0); + return 0; } errno_t @@ -7825,10 +9440,10 @@ ifnet_disable_output(struct ifnet *ifp) int err; if (ifp == NULL) { - return (EINVAL); + return EINVAL; } else if (!(ifp->if_eflags & IFEF_TXSTART) || !IF_FULLY_ATTACHED(ifp)) { - return (ENXIO); + return ENXIO; } if ((err = ifnet_fc_add(ifp)) == 0) { @@ -7836,21 +9451,21 @@ ifnet_disable_output(struct ifnet *ifp) ifp->if_start_flags |= IFSF_FLOW_CONTROLLED; lck_mtx_unlock(&ifp->if_start_lock); } - return (err); + return err; } errno_t ifnet_enable_output(struct ifnet *ifp) { if (ifp == NULL) { - return (EINVAL); + return EINVAL; } else if (!(ifp->if_eflags & IFEF_TXSTART) || !IF_FULLY_ATTACHED(ifp)) { - return (ENXIO); + return ENXIO; } ifnet_start_common(ifp, TRUE); - return (0); + return 0; } void @@ -7860,16 +9475,18 @@ ifnet_flowadv(uint32_t flowhash) struct ifnet *ifp; ifce = ifnet_fc_get(flowhash); - if (ifce == NULL) + if (ifce == NULL) { return; + } VERIFY(ifce->ifce_ifp != NULL); ifp = ifce->ifce_ifp; /* flow hash gets recalculated per attach, so check */ if (ifnet_is_attached(ifp, 1)) { - if (ifp->if_flowhash == flowhash) + if (ifp->if_flowhash == flowhash) { (void) ifnet_enable_output(ifp); + } ifnet_decr_iorefcnt(ifp); } ifnet_fc_entry_free(ifce); @@ -7881,7 +9498,7 @@ ifnet_flowadv(uint32_t flowhash) static inline int ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2) { - return (fc1->ifce_flowhash - fc2->ifce_flowhash); + return fc1->ifce_flowhash - fc2->ifce_flowhash; } static int @@ -7894,7 +9511,7 @@ ifnet_fc_add(struct ifnet *ifp) VERIFY(ifp->if_flowhash != 0); flowhash = ifp->if_flowhash; - bzero(&keyfc, sizeof (keyfc)); + bzero(&keyfc, sizeof(keyfc)); keyfc.ifce_flowhash = flowhash; lck_mtx_lock_spin(&ifnet_fc_lock); @@ -7902,7 +9519,7 @@ ifnet_fc_add(struct ifnet *ifp) if (ifce != NULL && ifce->ifce_ifp == ifp) { /* Entry is already in ifnet_fc_tree, return */ lck_mtx_unlock(&ifnet_fc_lock); - return (0); + return 0; } if (ifce != NULL) { @@ -7913,7 +9530,7 @@ ifnet_fc_add(struct ifnet *ifp) * avoid adding a second one when there is a collision. */ lck_mtx_unlock(&ifnet_fc_lock); - return (EAGAIN); + return EAGAIN; } /* become regular mutex */ @@ -7923,7 +9540,7 @@ ifnet_fc_add(struct ifnet *ifp) if (ifce == NULL) { /* memory allocation failed */ lck_mtx_unlock(&ifnet_fc_lock); - return (ENOMEM); + return ENOMEM; } bzero(ifce, ifnet_fc_zone_size); @@ -7932,7 +9549,7 @@ ifnet_fc_add(struct ifnet *ifp) RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce); lck_mtx_unlock(&ifnet_fc_lock); - return (0); + return 0; } static struct ifnet_fc_entry * @@ -7941,7 +9558,7 @@ ifnet_fc_get(uint32_t flowhash) struct ifnet_fc_entry keyfc, *ifce; struct ifnet *ifp; - bzero(&keyfc, sizeof (keyfc)); + bzero(&keyfc, sizeof(keyfc)); keyfc.ifce_flowhash = flowhash; lck_mtx_lock_spin(&ifnet_fc_lock); @@ -7949,7 +9566,7 @@ ifnet_fc_get(uint32_t flowhash) if (ifce == NULL) { /* Entry is not present in ifnet_fc_tree, return */ lck_mtx_unlock(&ifnet_fc_lock); - return (NULL); + return NULL; } RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce); @@ -7970,7 +9587,7 @@ ifnet_fc_get(uint32_t flowhash) } lck_mtx_unlock(&ifnet_fc_lock); - return (ifce); + return ifce; } static void @@ -7985,12 +9602,13 @@ ifnet_calc_flowhash(struct ifnet *ifp) struct ifnet_flowhash_key fh __attribute__((aligned(8))); uint32_t flowhash = 0; - if (ifnet_flowhash_seed == 0) + if (ifnet_flowhash_seed == 0) { ifnet_flowhash_seed = RandomULong(); + } - bzero(&fh, sizeof (fh)); + bzero(&fh, sizeof(fh)); - (void) snprintf(fh.ifk_name, sizeof (fh.ifk_name), "%s", ifp->if_name); + (void) snprintf(fh.ifk_name, sizeof(fh.ifk_name), "%s", ifp->if_name); fh.ifk_unit = ifp->if_unit; fh.ifk_flags = ifp->if_flags; fh.ifk_eflags = ifp->if_eflags; @@ -8001,14 +9619,14 @@ ifnet_calc_flowhash(struct ifnet *ifp) fh.ifk_rand2 = RandomULong(); try_again: - flowhash = net_flowhash(&fh, sizeof (fh), ifnet_flowhash_seed); + flowhash = net_flowhash(&fh, sizeof(fh), ifnet_flowhash_seed); if (flowhash == 0) { /* try to get a non-zero flowhash */ ifnet_flowhash_seed = RandomULong(); goto try_again; } - return (flowhash); + return flowhash; } int @@ -8026,10 +9644,10 @@ ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len, /* Allow clearing the signature */ IN_IFEXTRA(ifp)->netsig_len = 0; bzero(IN_IFEXTRA(ifp)->netsig, - sizeof (IN_IFEXTRA(ifp)->netsig)); + sizeof(IN_IFEXTRA(ifp)->netsig)); if_inetdata_lock_done(ifp); break; - } else if (len > sizeof (IN_IFEXTRA(ifp)->netsig)) { + } else if (len > sizeof(IN_IFEXTRA(ifp)->netsig)) { error = EINVAL; if_inetdata_lock_done(ifp); break; @@ -8049,10 +9667,10 @@ ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len, /* Allow clearing the signature */ IN6_IFEXTRA(ifp)->netsig_len = 0; bzero(IN6_IFEXTRA(ifp)->netsig, - sizeof (IN6_IFEXTRA(ifp)->netsig)); + sizeof(IN6_IFEXTRA(ifp)->netsig)); if_inet6data_lock_done(ifp); break; - } else if (len > sizeof (IN6_IFEXTRA(ifp)->netsig)) { + } else if (len > sizeof(IN6_IFEXTRA(ifp)->netsig)) { error = EINVAL; if_inet6data_lock_done(ifp); break; @@ -8070,7 +9688,7 @@ ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len, break; } - return (error); + return error; } int @@ -8079,8 +9697,9 @@ ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len, { int error = 0; - if (ifp == NULL || len == NULL || data == NULL) - return (EINVAL); + if (ifp == NULL || len == NULL || data == NULL) { + return EINVAL; + } switch (family) { case AF_INET: @@ -8091,10 +9710,11 @@ ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len, if_inetdata_lock_done(ifp); break; } - if ((*len = IN_IFEXTRA(ifp)->netsig_len) > 0) + if ((*len = IN_IFEXTRA(ifp)->netsig_len) > 0) { bcopy(IN_IFEXTRA(ifp)->netsig, data, *len); - else + } else { error = ENOENT; + } } else { error = ENOMEM; } @@ -8109,10 +9729,11 @@ ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len, if_inet6data_lock_done(ifp); break; } - if ((*len = IN6_IFEXTRA(ifp)->netsig_len) > 0) + if ((*len = IN6_IFEXTRA(ifp)->netsig_len) > 0) { bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len); - else + } else { error = ENOENT; + } } else { error = ENOMEM; } @@ -8124,10 +9745,11 @@ ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len, break; } - if (error == 0 && flags != NULL) + if (error == 0 && flags != NULL) { *flags = 0; + } - return (error); + return error; } #if INET6 @@ -8150,6 +9772,9 @@ ifnet_set_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes) &prefixes[i].ipv6_prefix; if (prefix_len == 0) { + clat_log0((LOG_DEBUG, + "NAT64 prefixes purged from Interface %s\n", + if_name(ifp))); /* Allow clearing the signature */ IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = 0; bzero(&IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix, @@ -8157,16 +9782,20 @@ ifnet_set_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes) continue; } else if (prefix_len != NAT64_PREFIX_LEN_32 && - prefix_len != NAT64_PREFIX_LEN_40 && - prefix_len != NAT64_PREFIX_LEN_48 && - prefix_len != NAT64_PREFIX_LEN_56 && - prefix_len != NAT64_PREFIX_LEN_64 && - prefix_len != NAT64_PREFIX_LEN_96) { + prefix_len != NAT64_PREFIX_LEN_40 && + prefix_len != NAT64_PREFIX_LEN_48 && + prefix_len != NAT64_PREFIX_LEN_56 && + prefix_len != NAT64_PREFIX_LEN_64 && + prefix_len != NAT64_PREFIX_LEN_96) { + clat_log0((LOG_DEBUG, + "NAT64 prefixlen is incorrect %d\n", prefix_len)); error = EINVAL; goto out; } if (IN6_IS_SCOPE_EMBED(prefix)) { + clat_log0((LOG_DEBUG, + "NAT64 prefix has interface/link local scope.\n")); error = EINVAL; goto out; } @@ -8174,16 +9803,20 @@ ifnet_set_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes) IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = prefix_len; bcopy(prefix, &IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix, sizeof(struct in6_addr)); + clat_log0((LOG_DEBUG, + "NAT64 prefix set to %s with prefixlen: %d\n", + ip6_sprintf(prefix), prefix_len)); one_set = 1; } out: if_inet6data_lock_done(ifp); - if (error == 0 && one_set != 0) + if (error == 0 && one_set != 0) { necp_update_all_clients(); + } - return (error); + return error; } int @@ -8191,8 +9824,9 @@ ifnet_get_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes) { int i, found_one = 0, error = 0; - if (ifp == NULL) - return (EINVAL); + if (ifp == NULL) { + return EINVAL; + } if_inet6data_lock_shared(ifp); @@ -8202,8 +9836,9 @@ ifnet_get_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes) } for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) { - if (IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len != 0) + if (IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len != 0) { found_one = 1; + } } if (found_one == 0) { @@ -8211,14 +9846,15 @@ ifnet_get_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes) goto out; } - if (prefixes) + if (prefixes) { bcopy(IN6_IFEXTRA(ifp)->nat64_prefixes, prefixes, sizeof(IN6_IFEXTRA(ifp)->nat64_prefixes)); + } out: if_inet6data_lock_done(ifp); - return (error); + return error; } #endif @@ -8230,16 +9866,19 @@ dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff, uint32_t did_sw; if (!(hwcksum_dbg_mode & HWCKSUM_DBG_FINALIZE_FORCED) || - (m->m_pkthdr.csum_flags & (CSUM_TSO_IPV4|CSUM_TSO_IPV6))) + (m->m_pkthdr.csum_flags & (CSUM_TSO_IPV4 | CSUM_TSO_IPV6))) { return; + } switch (pf) { case PF_INET: did_sw = in_finalize_cksum(m, hoff, m->m_pkthdr.csum_flags); - if (did_sw & CSUM_DELAY_IP) + if (did_sw & CSUM_DELAY_IP) { hwcksum_dbg_finalized_hdr++; - if (did_sw & CSUM_DELAY_DATA) + } + if (did_sw & CSUM_DELAY_DATA) { hwcksum_dbg_finalized_data++; + } break; #if INET6 case PF_INET6: @@ -8252,8 +9891,9 @@ dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff, */ did_sw = in6_finalize_cksum(m, hoff, -1, -1, m->m_pkthdr.csum_flags); - if (did_sw & CSUM_DELAY_IPV6_DATA) + if (did_sw & CSUM_DELAY_IPV6_DATA) { hwcksum_dbg_finalized_data++; + } break; #endif /* INET6 */ default: @@ -8271,7 +9911,7 @@ dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header, if (frame_header == NULL || frame_header < (char *)mbuf_datastart(m) || frame_header > (char *)m->m_data) { - printf("%s: frame header pointer 0x%llx out of range " + DLIL_PRINTF("%s: frame header pointer 0x%llx out of range " "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp), (uint64_t)VM_KERNEL_ADDRPERM(frame_header), (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)), @@ -8299,8 +9939,9 @@ dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header, if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED) { uint32_t foff = hwcksum_dbg_partial_rxoff_forced; - if (foff > (uint32_t)m->m_pkthdr.len) + if (foff > (uint32_t)m->m_pkthdr.len) { return; + } m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS; @@ -8330,7 +9971,7 @@ dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header, if (hlen > rxoff) { hwcksum_dbg_bad_rxoff++; if (dlil_verbose) { - printf("%s: partial cksum start offset %d " + DLIL_PRINTF("%s: partial cksum start offset %d " "is less than frame header length %d for " "mbuf 0x%llx\n", if_name(ifp), rxoff, hlen, (uint64_t)VM_KERNEL_ADDRPERM(m)); @@ -8351,7 +9992,7 @@ dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header, if (sum != m->m_pkthdr.csum_rx_val) { hwcksum_dbg_bad_cksum++; if (dlil_verbose) { - printf("%s: bad partial cksum value " + DLIL_PRINTF("%s: bad partial cksum value " "0x%x (expected 0x%x) for mbuf " "0x%llx [rx_start %d]\n", if_name(ifp), @@ -8372,8 +10013,9 @@ dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header, if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ) { uint32_t aoff = hwcksum_dbg_partial_rxoff_adj; - if (aoff == rxoff || aoff > (uint32_t)m->m_pkthdr.len) + if (aoff == rxoff || aoff > (uint32_t)m->m_pkthdr.len) { return; + } sum = m_adj_sum16(m, rxoff, aoff, m_pktlen(m) - aoff, sum); @@ -8396,18 +10038,21 @@ sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS i = hwcksum_dbg_mode; err = sysctl_handle_int(oidp, &i, 0, req); - if (err != 0 || req->newptr == USER_ADDR_NULL) - return (err); + if (err != 0 || req->newptr == USER_ADDR_NULL) { + return err; + } - if (hwcksum_dbg == 0) - return (ENODEV); + if (hwcksum_dbg == 0) { + return ENODEV; + } - if ((i & ~HWCKSUM_DBG_MASK) != 0) - return (EINVAL); + if ((i & ~HWCKSUM_DBG_MASK) != 0) { + return EINVAL; + } hwcksum_dbg_mode = (i & HWCKSUM_DBG_MASK); - return (err); + return err; } static int @@ -8420,15 +10065,17 @@ sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS i = hwcksum_dbg_partial_rxoff_forced; err = sysctl_handle_int(oidp, &i, 0, req); - if (err != 0 || req->newptr == USER_ADDR_NULL) - return (err); + if (err != 0 || req->newptr == USER_ADDR_NULL) { + return err; + } - if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) - return (ENODEV); + if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) { + return ENODEV; + } hwcksum_dbg_partial_rxoff_forced = i; - return (err); + return err; } static int @@ -8441,15 +10088,17 @@ sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS i = hwcksum_dbg_partial_rxoff_adj; err = sysctl_handle_int(oidp, &i, 0, req); - if (err != 0 || req->newptr == USER_ADDR_NULL) - return (err); + if (err != 0 || req->newptr == USER_ADDR_NULL) { + return err; + } - if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ)) - return (ENODEV); + if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ)) { + return ENODEV; + } hwcksum_dbg_partial_rxoff_adj = i; - return (err); + return err; } static int @@ -8459,15 +10108,14 @@ sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS int err; if (req->oldptr == USER_ADDR_NULL) { - } if (req->newptr != USER_ADDR_NULL) { - return (EPERM); + return EPERM; } err = SYSCTL_OUT(req, &tx_chain_len_stats, sizeof(struct chain_len_stats)); - return (err); + return err; } @@ -8511,28 +10159,28 @@ static uint8_t sumdata[] = { /* Precomputed 16-bit 1's complement sums for various spans of the above data */ static struct { - boolean_t init; - uint16_t len; - uint16_t sumr; /* reference */ - uint16_t sumrp; /* reference, precomputed */ + boolean_t init; + uint16_t len; + uint16_t sumr; /* reference */ + uint16_t sumrp; /* reference, precomputed */ } sumtbl[] = { - { FALSE, 0, 0, 0x0000 }, - { FALSE, 1, 0, 0x001f }, - { FALSE, 2, 0, 0x8b1f }, - { FALSE, 3, 0, 0x8b27 }, - { FALSE, 7, 0, 0x790e }, - { FALSE, 11, 0, 0xcb6d }, - { FALSE, 20, 0, 0x20dd }, - { FALSE, 27, 0, 0xbabd }, - { FALSE, 32, 0, 0xf3e8 }, - { FALSE, 37, 0, 0x197d }, - { FALSE, 43, 0, 0x9eae }, - { FALSE, 64, 0, 0x4678 }, + { FALSE, 0, 0, 0x0000 }, + { FALSE, 1, 0, 0x001f }, + { FALSE, 2, 0, 0x8b1f }, + { FALSE, 3, 0, 0x8b27 }, + { FALSE, 7, 0, 0x790e }, + { FALSE, 11, 0, 0xcb6d }, + { FALSE, 20, 0, 0x20dd }, + { FALSE, 27, 0, 0xbabd }, + { FALSE, 32, 0, 0xf3e8 }, + { FALSE, 37, 0, 0x197d }, + { FALSE, 43, 0, 0x9eae }, + { FALSE, 64, 0, 0x4678 }, { FALSE, 127, 0, 0x9399 }, { FALSE, 256, 0, 0xd147 }, { FALSE, 325, 0, 0x0358 }, }; -#define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0])) +#define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0])) static void dlil_verify_sum16(void) @@ -8542,25 +10190,26 @@ dlil_verify_sum16(void) int n; /* Make sure test data plus extra room for alignment fits in cluster */ - _CASSERT((sizeof (sumdata) + (sizeof (uint64_t) * 2)) <= MCLBYTES); + _CASSERT((sizeof(sumdata) + (sizeof(uint64_t) * 2)) <= MCLBYTES); kprintf("DLIL: running SUM16 self-tests ... "); m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR); - MH_ALIGN(m, sizeof (uint32_t)); /* 32-bit starting alignment */ - buf = mtod(m, uint8_t *); /* base address */ + m_align(m, sizeof(sumdata) + (sizeof(uint64_t) * 2)); + + buf = mtod(m, uint8_t *); /* base address */ for (n = 0; n < SUMTBL_MAX; n++) { uint16_t len = sumtbl[n].len; int i; /* Verify for all possible alignments */ - for (i = 0; i < (int)sizeof (uint64_t); i++) { + for (i = 0; i < (int)sizeof(uint64_t); i++) { uint16_t sum, sumr; uint8_t *c; /* Copy over test data to mbuf */ - VERIFY(len <= sizeof (sumdata)); + VERIFY(len <= sizeof(sumdata)); c = buf + i; bcopy(sumdata, c, len); @@ -8623,109 +10272,41 @@ dlil_verify_sum16(void) } #endif /* DEBUG || DEVELOPMENT */ -#define CASE_STRINGIFY(x) case x: return #x +#define CASE_STRINGIFY(x) case x: return #x __private_extern__ const char * dlil_kev_dl_code_str(u_int32_t event_code) { switch (event_code) { - CASE_STRINGIFY(KEV_DL_SIFFLAGS); - CASE_STRINGIFY(KEV_DL_SIFMETRICS); - CASE_STRINGIFY(KEV_DL_SIFMTU); - CASE_STRINGIFY(KEV_DL_SIFPHYS); - CASE_STRINGIFY(KEV_DL_SIFMEDIA); - CASE_STRINGIFY(KEV_DL_SIFGENERIC); - CASE_STRINGIFY(KEV_DL_ADDMULTI); - CASE_STRINGIFY(KEV_DL_DELMULTI); - CASE_STRINGIFY(KEV_DL_IF_ATTACHED); - CASE_STRINGIFY(KEV_DL_IF_DETACHING); - CASE_STRINGIFY(KEV_DL_IF_DETACHED); - CASE_STRINGIFY(KEV_DL_LINK_OFF); - CASE_STRINGIFY(KEV_DL_LINK_ON); - CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED); - CASE_STRINGIFY(KEV_DL_PROTO_DETACHED); - CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED); - CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED); - CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT); - CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED); - CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED); - CASE_STRINGIFY(KEV_DL_NODE_PRESENCE); - CASE_STRINGIFY(KEV_DL_NODE_ABSENCE); - CASE_STRINGIFY(KEV_DL_MASTER_ELECTED); - CASE_STRINGIFY(KEV_DL_ISSUES); - CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED); + CASE_STRINGIFY(KEV_DL_SIFFLAGS); + CASE_STRINGIFY(KEV_DL_SIFMETRICS); + CASE_STRINGIFY(KEV_DL_SIFMTU); + CASE_STRINGIFY(KEV_DL_SIFPHYS); + CASE_STRINGIFY(KEV_DL_SIFMEDIA); + CASE_STRINGIFY(KEV_DL_SIFGENERIC); + CASE_STRINGIFY(KEV_DL_ADDMULTI); + CASE_STRINGIFY(KEV_DL_DELMULTI); + CASE_STRINGIFY(KEV_DL_IF_ATTACHED); + CASE_STRINGIFY(KEV_DL_IF_DETACHING); + CASE_STRINGIFY(KEV_DL_IF_DETACHED); + CASE_STRINGIFY(KEV_DL_LINK_OFF); + CASE_STRINGIFY(KEV_DL_LINK_ON); + CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED); + CASE_STRINGIFY(KEV_DL_PROTO_DETACHED); + CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED); + CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED); + CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT); + CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED); + CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED); + CASE_STRINGIFY(KEV_DL_NODE_PRESENCE); + CASE_STRINGIFY(KEV_DL_NODE_ABSENCE); + CASE_STRINGIFY(KEV_DL_MASTER_ELECTED); + CASE_STRINGIFY(KEV_DL_ISSUES); + CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED); default: break; } - return (""); -} - -/* - * Mirror the arguments of ifnet_get_local_ports_extended() - * ifindex - * protocol - * flags - */ -static int -sysctl_get_ports_used SYSCTL_HANDLER_ARGS -{ -#pragma unused(oidp) - int *name = (int *)arg1; - int namelen = arg2; - int error = 0; - int idx; - protocol_family_t protocol; - u_int32_t flags; - ifnet_t ifp = NULL; - u_int8_t *bitfield = NULL; - - if (req->newptr != USER_ADDR_NULL) { - error = EPERM; - goto done; - } - if (namelen != 3) { - error = ENOENT; - goto done; - } - - if (req->oldptr == USER_ADDR_NULL) { - req->oldidx = bitstr_size(65536); - goto done; - } - if (req->oldlen < bitstr_size(65536)) { - error = ENOMEM; - goto done; - } - - idx = name[0]; - protocol = name[1]; - flags = name[2]; - - ifnet_head_lock_shared(); - if (!IF_INDEX_IN_RANGE(idx)) { - ifnet_head_done(); - error = ENOENT; - goto done; - } - ifp = ifindex2ifnet[idx]; - ifnet_head_done(); - - bitfield = _MALLOC(bitstr_size(65536), M_TEMP, M_WAITOK | M_ZERO); - if (bitfield == NULL) { - error = ENOMEM; - goto done; - } - error = ifnet_get_local_ports_extended(ifp, protocol, flags, bitfield); - if (error != 0) { - printf("%s: ifnet_get_local_ports_extended() error %d\n", - __func__, error); - goto done; - } - error = SYSCTL_OUT(req, bitfield, bitstr_size(65536)); -done: - if (bitfield != NULL) - _FREE(bitfield, M_TEMP); - return (error); + return ""; } static void @@ -8798,8 +10379,9 @@ sysctl_get_kao_frames SYSCTL_HANDLER_ARGS * Only root can get look at other people TCP frames */ error = proc_suser(current_proc()); - if (error != 0) + if (error != 0) { goto done; + } /* * Validate the input parameters */ @@ -8849,7 +10431,7 @@ sysctl_get_kao_frames SYSCTL_HANDLER_ARGS error = ifnet_get_keepalive_offload_frames(ifp, frames_array, frames_array_count, frame_data_offset, &used_frames_count); if (error != 0) { - printf("%s: ifnet_get_keepalive_offload_frames error %d\n", + DLIL_PRINTF("%s: ifnet_get_keepalive_offload_frames error %d\n", __func__, error); goto done; } @@ -8862,9 +10444,10 @@ sysctl_get_kao_frames SYSCTL_HANDLER_ARGS } } done: - if (frames_array != NULL) + if (frames_array != NULL) { _FREE(frames_array, M_TEMP); - return (error); + } + return error; } #endif /* DEVELOPMENT || DEBUG */ @@ -8874,27 +10457,3 @@ ifnet_update_stats_per_flow(struct ifnet_stats_per_flow *ifs, { tcp_update_stats_per_flow(ifs, ifp); } - -static void -dlil_mit_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1) -{ -#pragma unused(arg1) - struct ifnet *ifp = (struct ifnet *)arg0; - struct dlil_threading_info *inp = ifp->if_inp; - - ifnet_lock_shared(ifp); - if (!IF_FULLY_ATTACHED(ifp) || inp == NULL) { - ifnet_lock_done(ifp); - return; - } - - lck_mtx_lock_spin(&inp->input_lck); - inp->input_waiting |= DLIL_INPUT_WAITING; - if (!(inp->input_waiting & DLIL_INPUT_RUNNING) || - !qempty(&inp->rcvq_pkts)) { - inp->wtot++; - wakeup_one((caddr_t)&inp->input_waiting); - } - lck_mtx_unlock(&inp->input_lck); - ifnet_lock_done(ifp); -}