/*
- * Copyright (c) 1999-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 1999-2019 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <net/kpi_protocol.h>
#include <net/if_types.h>
+#include <net/if_ipsec.h>
#include <net/if_llreach.h>
+#include <net/if_utun.h>
#include <net/kpi_interfacefilter.h>
#include <net/classq/classq.h>
#include <net/classq/classq_sfb.h>
#include <net/flowhash.h>
#include <net/ntstat.h>
-
+#include <net/if_llatbl.h>
+#include <net/net_api_stats.h>
+#include <net/if_ports_used.h>
+#include <net/if_vlan_var.h>
+#include <netinet/in.h>
#if INET
#include <netinet/in_var.h>
#include <netinet/igmp_var.h>
#include <netinet/udp_var.h>
#include <netinet/if_ether.h>
#include <netinet/in_pcb.h>
+#include <netinet/in_tclass.h>
+#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/icmp_var.h>
#endif /* INET */
#if INET6
+#include <net/nat464_utils.h>
#include <netinet6/in6_var.h>
#include <netinet6/nd6.h>
#include <netinet6/mld6_var.h>
#include <netinet6/scope6_var.h>
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
#endif /* INET6 */
-
+#include <net/pf_pbuf.h>
#include <libkern/OSAtomic.h>
#include <libkern/tree.h>
#if PF
#include <net/pfvar.h>
#endif /* PF */
-#if PF_ALTQ
-#include <net/altq/altq.h>
-#endif /* PF_ALTQ */
#include <net/pktsched/pktsched.h>
+#include <net/pktsched/pktsched_netem.h>
-#define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
-#define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
+#if NECP
+#include <net/necp.h>
+#endif /* NECP */
+
+
+#include <os/log.h>
+
+#define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
+#define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
#define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
#define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
#define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
#define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
-#define MAX_LINKADDR 4 /* LONGWORDS */
+#define MAX_LINKADDR 4 /* LONGWORDS */
#define M_NKE M_IFADDR
#if 1
-#define DLIL_PRINTF printf
+#define DLIL_PRINTF printf
#else
-#define DLIL_PRINTF kprintf
+#define DLIL_PRINTF kprintf
#endif
-#define IF_DATA_REQUIRE_ALIGNED_64(f) \
+#define IF_DATA_REQUIRE_ALIGNED_64(f) \
_CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
-#define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
+#define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
_CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
enum {
- kProtoKPI_v1 = 1,
- kProtoKPI_v2 = 2
+ kProtoKPI_v1 = 1,
+ kProtoKPI_v2 = 2
};
/*
* a reference to it is valid, via if_proto_ref().
*/
struct if_proto {
- SLIST_ENTRY(if_proto) next_hash;
- u_int32_t refcount;
- u_int32_t detached;
- struct ifnet *ifp;
- protocol_family_t protocol_family;
- int proto_kpi;
- union {
+ SLIST_ENTRY(if_proto) next_hash;
+ u_int32_t refcount;
+ u_int32_t detached;
+ struct ifnet *ifp;
+ protocol_family_t protocol_family;
+ int proto_kpi;
+ union {
struct {
- proto_media_input input;
- proto_media_preout pre_output;
- proto_media_event event;
- proto_media_ioctl ioctl;
- proto_media_detached detached;
- proto_media_resolve_multi resolve_multi;
- proto_media_send_arp send_arp;
+ proto_media_input input;
+ proto_media_preout pre_output;
+ proto_media_event event;
+ proto_media_ioctl ioctl;
+ proto_media_detached detached;
+ proto_media_resolve_multi resolve_multi;
+ proto_media_send_arp send_arp;
} v1;
struct {
- proto_media_input_v2 input;
- proto_media_preout pre_output;
- proto_media_event event;
- proto_media_ioctl ioctl;
- proto_media_detached detached;
- proto_media_resolve_multi resolve_multi;
- proto_media_send_arp send_arp;
+ proto_media_input_v2 input;
+ proto_media_preout pre_output;
+ proto_media_event event;
+ proto_media_ioctl ioctl;
+ proto_media_detached detached;
+ proto_media_resolve_multi resolve_multi;
+ proto_media_send_arp send_arp;
} v2;
} kpi;
};
SLIST_HEAD(proto_hash_entry, if_proto);
-#define DLIL_SDLMAXLEN 64
-#define DLIL_SDLDATALEN \
+#define DLIL_SDLDATALEN \
(DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
struct dlil_ifnet {
- struct ifnet dl_if; /* public ifnet */
+ struct ifnet dl_if; /* public ifnet */
/*
* DLIL private fields, protected by dl_if_lock
*/
decl_lck_mtx_data(, dl_if_lock);
- TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */
- u_int32_t dl_if_flags; /* flags (below) */
- u_int32_t dl_if_refcnt; /* refcnt */
+ TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */
+ u_int32_t dl_if_flags; /* flags (below) */
+ u_int32_t dl_if_refcnt; /* refcnt */
void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */
- void *dl_if_uniqueid; /* unique interface id */
- size_t dl_if_uniqueid_len; /* length of the unique id */
- char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */
- char dl_if_xnamestorage[IFXNAMSIZ]; /* external name storage */
+ void *dl_if_uniqueid; /* unique interface id */
+ size_t dl_if_uniqueid_len; /* length of the unique id */
+ char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */
+ char dl_if_xnamestorage[IFXNAMSIZ]; /* external name storage */
struct {
- struct ifaddr ifa; /* lladdr ifa */
- u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */
- u_int8_t msdl[DLIL_SDLMAXLEN]; /* mask storage */
+ struct ifaddr ifa; /* lladdr ifa */
+ u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */
+ u_int8_t msdl[DLIL_SDLMAXLEN]; /* mask storage */
} dl_if_lladdr;
u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */
struct dlil_threading_info dl_if_inpstorage; /* input thread storage */
- ctrace_t dl_if_attach; /* attach PC stacktrace */
- ctrace_t dl_if_detach; /* detach PC stacktrace */
+ ctrace_t dl_if_attach; /* attach PC stacktrace */
+ ctrace_t dl_if_detach; /* detach PC stacktrace */
};
/* Values for dl_if_flags (private to DLIL) */
-#define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
-#define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
-#define DLIF_DEBUG 0x4 /* has debugging info */
+#define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
+#define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
+#define DLIF_DEBUG 0x4 /* has debugging info */
-#define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
+#define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
/* For gdb */
__private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
struct dlil_ifnet_dbg {
- struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */
- u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */
- u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */
+ struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */
+ u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */
+ u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */
/*
* Circular lists of ifnet_{reference,release} callers.
*/
- ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE];
- ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE];
+ ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE];
+ ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE];
};
-#define DLIL_TO_IFP(s) (&s->dl_if)
-#define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
+#define DLIL_TO_IFP(s) (&s->dl_if)
+#define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
struct ifnet_filter {
- TAILQ_ENTRY(ifnet_filter) filt_next;
- u_int32_t filt_skip;
- u_int32_t filt_flags;
- ifnet_t filt_ifp;
- const char *filt_name;
- void *filt_cookie;
- protocol_family_t filt_protocol;
- iff_input_func filt_input;
- iff_output_func filt_output;
- iff_event_func filt_event;
- iff_ioctl_func filt_ioctl;
- iff_detached_func filt_detached;
+ TAILQ_ENTRY(ifnet_filter) filt_next;
+ u_int32_t filt_skip;
+ u_int32_t filt_flags;
+ ifnet_t filt_ifp;
+ const char *filt_name;
+ void *filt_cookie;
+ protocol_family_t filt_protocol;
+ iff_input_func filt_input;
+ iff_output_func filt_output;
+ iff_event_func filt_event;
+ iff_ioctl_func filt_ioctl;
+ iff_detached_func filt_detached;
};
struct proto_input_entry;
u_int32_t dlil_filter_disable_tso_count = 0;
#if DEBUG
-static unsigned int ifnet_debug = 1; /* debugging (enabled) */
+static unsigned int ifnet_debug = 1; /* debugging (enabled) */
#else
-static unsigned int ifnet_debug; /* debugging (disabled) */
+static unsigned int ifnet_debug; /* debugging (disabled) */
#endif /* !DEBUG */
-static unsigned int dlif_size; /* size of dlil_ifnet to allocate */
-static unsigned int dlif_bufsize; /* size of dlif_size + headroom */
-static struct zone *dlif_zone; /* zone for dlil_ifnet */
+static unsigned int dlif_size; /* size of dlil_ifnet to allocate */
+static unsigned int dlif_bufsize; /* size of dlif_size + headroom */
+static struct zone *dlif_zone; /* zone for dlil_ifnet */
-#define DLIF_ZONE_MAX 64 /* maximum elements in zone */
-#define DLIF_ZONE_NAME "ifnet" /* zone name */
+#define DLIF_ZONE_MAX IFNETS_MAX /* maximum elements in zone */
+#define DLIF_ZONE_NAME "ifnet" /* zone name */
-static unsigned int dlif_filt_size; /* size of ifnet_filter */
-static struct zone *dlif_filt_zone; /* zone for ifnet_filter */
+static unsigned int dlif_filt_size; /* size of ifnet_filter */
+static struct zone *dlif_filt_zone; /* zone for ifnet_filter */
-#define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
-#define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
+#define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
+#define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
-static unsigned int dlif_phash_size; /* size of ifnet proto hash table */
-static struct zone *dlif_phash_zone; /* zone for ifnet proto hash table */
+static unsigned int dlif_phash_size; /* size of ifnet proto hash table */
+static struct zone *dlif_phash_zone; /* zone for ifnet proto hash table */
-#define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
-#define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
+#define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
+#define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
-static unsigned int dlif_proto_size; /* size of if_proto */
-static struct zone *dlif_proto_zone; /* zone for if_proto */
+static unsigned int dlif_proto_size; /* size of if_proto */
+static struct zone *dlif_proto_zone; /* zone for if_proto */
-#define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
-#define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
+#define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
+#define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
-static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */
-static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */
-static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */
+static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */
+static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */
+static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */
-#define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
-#define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
+#define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
+#define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
-static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */
-static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */
-static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */
+static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */
+static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */
+static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */
-#define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
-#define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
+#define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
+#define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
-/*
- * Updating this variable should be done by first acquiring the global
- * radix node head (rnh_lock), in tandem with settting/clearing the
- * PR_AGGDRAIN for routedomain.
- */
-u_int32_t ifnet_aggressive_drainers;
static u_int32_t net_rtref;
static struct dlil_main_threading_info dlil_main_input_thread_info;
__private_extern__ struct dlil_threading_info *dlil_main_input_thread =
(struct dlil_threading_info *)&dlil_main_input_thread_info;
-static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg);
+static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg, bool update_generation);
static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
static void dlil_if_trace(struct dlil_ifnet *, int);
static void if_proto_ref(struct if_proto *);
static void if_proto_free(struct if_proto *);
static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t);
-static int dlil_ifp_proto_count(struct ifnet *);
+static u_int32_t dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
+ u_int32_t list_count);
static void if_flt_monitor_busy(struct ifnet *);
static void if_flt_monitor_unbusy(struct ifnet *);
static void if_flt_monitor_enter(struct ifnet *);
const struct sockaddr_dl *, const struct sockaddr *,
const struct sockaddr_dl *, const struct sockaddr *);
-static errno_t ifp_if_output(struct ifnet *, struct mbuf *);
-static void ifp_if_start(struct ifnet *);
+static errno_t ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
+ struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
+ boolean_t poll, struct thread *tp);
static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
const struct ifnet_demux_desc *, u_int32_t);
static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
+#if CONFIG_EMBEDDED
+static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
+ const struct sockaddr *, const char *, const char *,
+ u_int32_t *, u_int32_t *);
+#else
static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
const struct sockaddr *, const char *, const char *);
+#endif /* CONFIG_EMBEDDED */
static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
const struct sockaddr *, const char *, const char *,
u_int32_t *, u_int32_t *);
static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *);
static void dlil_main_input_thread_func(void *, wait_result_t);
+static void dlil_main_input_thread_cont(void *, wait_result_t);
+
static void dlil_input_thread_func(void *, wait_result_t);
+static void dlil_input_thread_cont(void *, wait_result_t);
+
static void dlil_rxpoll_input_thread_func(void *, wait_result_t);
+static void dlil_rxpoll_input_thread_cont(void *, wait_result_t);
+
static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *);
static void dlil_terminate_input_thread(struct dlil_threading_info *);
static void dlil_input_stats_add(const struct ifnet_stat_increment_param *,
- struct dlil_threading_info *, boolean_t);
-static void dlil_input_stats_sync(struct ifnet *, struct dlil_threading_info *);
+ struct dlil_threading_info *, struct ifnet *, boolean_t);
+static boolean_t dlil_input_stats_sync(struct ifnet *,
+ struct dlil_threading_info *);
static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *,
u_int32_t, ifnet_model_t, boolean_t);
static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
-
-#if DEBUG
+static int dlil_is_clat_needed(protocol_family_t, mbuf_t );
+static errno_t dlil_clat46(ifnet_t, protocol_family_t *, mbuf_t *);
+static errno_t dlil_clat64(ifnet_t, protocol_family_t *, mbuf_t *);
+#if DEBUG || DEVELOPMENT
static void dlil_verify_sum16(void);
-#endif /* DEBUG */
+#endif /* DEBUG || DEVELOPMENT */
static void dlil_output_cksum_dbg(struct ifnet *, struct mbuf *, uint32_t,
protocol_family_t);
static void dlil_input_cksum_dbg(struct ifnet *, struct mbuf *, char *,
protocol_family_t);
+static void dlil_incr_pending_thread_count(void);
+static void dlil_decr_pending_thread_count(void);
+
static void ifnet_detacher_thread_func(void *, wait_result_t);
static int ifnet_detacher_thread_cont(int);
static void ifnet_detach_final(struct ifnet *);
static void ifnet_detaching_enqueue(struct ifnet *);
static struct ifnet *ifnet_detaching_dequeue(void);
-static void ifnet_start_thread_fn(void *, wait_result_t);
-static void ifnet_poll_thread_fn(void *, wait_result_t);
-static void ifnet_poll(struct ifnet *);
+static void ifnet_start_thread_func(void *, wait_result_t);
+static void ifnet_start_thread_cont(void *, wait_result_t);
+
+static void ifnet_poll_thread_func(void *, wait_result_t);
+static void ifnet_poll_thread_cont(void *, wait_result_t);
+
+static errno_t ifnet_enqueue_common(struct ifnet *, classq_pkt_t *,
+ boolean_t, boolean_t *);
static void ifp_src_route_copyout(struct ifnet *, struct route *);
static void ifp_src_route_copyin(struct ifnet *, struct route *);
static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS;
static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS;
+struct chain_len_stats tx_chain_len_stats;
+static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS;
+
+#if TEST_INPUT_THREAD_TERMINATION
+static int sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS;
+#endif /* TEST_INPUT_THREAD_TERMINATION */
+
/* The following are protected by dlil_ifnet_lock */
static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
static u_int32_t ifnet_detaching_cnt;
-static void *ifnet_delayed_run; /* wait channel for detaching thread */
+static void *ifnet_delayed_run; /* wait channel for detaching thread */
decl_lck_mtx_data(static, ifnet_fc_lock);
static uint32_t ifnet_flowhash_seed;
struct ifnet_flowhash_key {
- char ifk_name[IFNAMSIZ];
- uint32_t ifk_unit;
- uint32_t ifk_flags;
- uint32_t ifk_eflags;
- uint32_t ifk_capabilities;
- uint32_t ifk_capenable;
- uint32_t ifk_output_sched_model;
- uint32_t ifk_rand1;
- uint32_t ifk_rand2;
+ char ifk_name[IFNAMSIZ];
+ uint32_t ifk_unit;
+ uint32_t ifk_flags;
+ uint32_t ifk_eflags;
+ uint32_t ifk_capabilities;
+ uint32_t ifk_capenable;
+ uint32_t ifk_output_sched_model;
+ uint32_t ifk_rand1;
+ uint32_t ifk_rand2;
};
/* Flow control entry per interface */
struct ifnet_fc_entry {
RB_ENTRY(ifnet_fc_entry) ifce_entry;
- u_int32_t ifce_flowhash;
- struct ifnet *ifce_ifp;
+ u_int32_t ifce_flowhash;
+ struct ifnet *ifce_ifp;
};
static uint32_t ifnet_calc_flowhash(struct ifnet *);
RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
-static unsigned int ifnet_fc_zone_size; /* sizeof ifnet_fc_entry */
-static struct zone *ifnet_fc_zone; /* ifnet_fc_entry zone */
+static unsigned int ifnet_fc_zone_size; /* sizeof ifnet_fc_entry */
+static struct zone *ifnet_fc_zone; /* ifnet_fc_entry zone */
-#define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
-#define IFNET_FC_ZONE_MAX 32
+#define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
+#define IFNET_FC_ZONE_MAX 32
-extern void bpfdetach(struct ifnet*);
+extern void bpfdetach(struct ifnet *);
extern void proto_input_run(void);
-extern uint32_t udp_count_opportunistic(unsigned int ifindex,
- u_int32_t flags);
-extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
- u_int32_t flags);
+extern uint32_t udp_count_opportunistic(unsigned int ifindex,
+ u_int32_t flags);
+extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
+ u_int32_t flags);
__private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
#if CONFIG_MACF
+#ifdef CONFIG_EMBEDDED
+int dlil_lladdr_ckreq = 1;
+#else
int dlil_lladdr_ckreq = 0;
#endif
+#endif
#if DEBUG
int dlil_verbose = 1;
static u_int32_t dlil_input_sanity_check = 0;
#endif /* IFNET_INPUT_SANITY_CHK */
/* rate limit debug messages */
-struct timespec dlil_dbgrate = { 1, 0 };
+struct timespec dlil_dbgrate = { .tv_sec = 1, .tv_nsec = 0 };
SYSCTL_DECL(_net_link_generic_system);
-#if CONFIG_MACF
-SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_lladdr_ckreq,
- CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_lladdr_ckreq, 0,
- "Require MACF system info check to expose link-layer address");
-#endif
-
SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose,
CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages");
-#define IF_SNDQ_MINLEN 32
+#define IF_SNDQ_MINLEN 32
u_int32_t if_sndq_maxlen = IFQ_MAXLEN;
SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen,
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sndq_maxlen, IFQ_MAXLEN,
sysctl_sndq_maxlen, "I", "Default transmit queue max length");
-#define IF_RCVQ_MINLEN 32
-#define IF_RCVQ_MAXLEN 256
+#define IF_RCVQ_MINLEN 32
+#define IF_RCVQ_MAXLEN 256
u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN;
SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen,
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN,
sysctl_rcvq_maxlen, "I", "Default receive queue max length");
-#define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
-static u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY;
+#define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
+u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY;
SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay,
CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY,
"ilog2 of EWMA decay rate of avg inbound packets");
-#define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
-#define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
+#define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
+#define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME;
SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time,
CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime,
IF_RXPOLL_MODE_HOLDTIME, sysctl_rxpoll_mode_holdtime,
"Q", "input poll mode freeze time");
-#define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
-#define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
+#define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
+#define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME;
SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_sample_time,
CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime,
IF_RXPOLL_SAMPLETIME, sysctl_rxpoll_sample_holdtime,
"Q", "input poll sampling time");
-#define IF_RXPOLL_INTERVALTIME_MIN (1ULL * 1000) /* 1 us */
-#define IF_RXPOLL_INTERVALTIME (1ULL * 1000 * 1000) /* 1 ms */
static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVALTIME;
SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_interval_time,
CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time,
IF_RXPOLL_INTERVALTIME, sysctl_rxpoll_interval_time,
"Q", "input poll interval (time)");
-#define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
-static u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS;
+#define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
+u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS;
SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts,
CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts,
IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)");
-#define IF_RXPOLL_WLOWAT 10
-static u_int32_t if_rxpoll_wlowat = IF_RXPOLL_WLOWAT;
+#define IF_RXPOLL_WLOWAT 10
+static u_int32_t if_sysctl_rxpoll_wlowat = IF_RXPOLL_WLOWAT;
SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat,
- CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_wlowat,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sysctl_rxpoll_wlowat,
IF_RXPOLL_WLOWAT, sysctl_rxpoll_wlowat,
"I", "input poll wakeup low watermark");
-#define IF_RXPOLL_WHIWAT 100
-static u_int32_t if_rxpoll_whiwat = IF_RXPOLL_WHIWAT;
+#define IF_RXPOLL_WHIWAT 100
+static u_int32_t if_sysctl_rxpoll_whiwat = IF_RXPOLL_WHIWAT;
SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat,
- CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_whiwat,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sysctl_rxpoll_whiwat,
IF_RXPOLL_WHIWAT, sysctl_rxpoll_whiwat,
"I", "input poll wakeup high watermark");
-static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */
+static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */
SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max,
CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_max, 0,
"max packets per poll call");
-static u_int32_t if_rxpoll = 1;
+u_int32_t if_rxpoll = 1;
SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll,
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll, 0,
sysctl_rxpoll, "I", "enable opportunistic input polling");
-u_int32_t if_bw_smoothing_val = 3;
-SYSCTL_UINT(_net_link_generic_system, OID_AUTO, if_bw_smoothing_val,
- CTLFLAG_RW | CTLFLAG_LOCKED, &if_bw_smoothing_val, 0, "");
-
-u_int32_t if_bw_measure_size = 10;
-SYSCTL_INT(_net_link_generic_system, OID_AUTO, if_bw_measure_size,
- CTLFLAG_RW | CTLFLAG_LOCKED, &if_bw_measure_size, 0, "");
+#if TEST_INPUT_THREAD_TERMINATION
+static u_int32_t if_input_thread_termination_spin = 0;
+SYSCTL_PROC(_net_link_generic_system, OID_AUTO, input_thread_termination_spin,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
+ &if_input_thread_termination_spin, 0,
+ sysctl_input_thread_termination_spin,
+ "I", "input thread termination spin limit");
+#endif /* TEST_INPUT_THREAD_TERMINATION */
static u_int32_t cur_dlil_input_threads = 0;
SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_threads,
- CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads , 0,
+ CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads, 0,
"Current number of DLIL input threads");
#if IFNET_INPUT_SANITY_CHK
SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check,
- CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check , 0,
+ CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check, 0,
"Turn on sanity checking in DLIL input");
#endif /* IFNET_INPUT_SANITY_CHK */
CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg, 0,
"enable hardware cksum debugging");
-#define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
-#define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
-#define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
-#define HWCKSUM_DBG_MASK \
- (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
+u_int32_t ifnet_start_delayed = 0;
+SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delayed,
+ CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_start_delayed, 0,
+ "number of times start was delayed");
+
+u_int32_t ifnet_delay_start_disabled = 0;
+SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delay_disabled,
+ CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_delay_start_disabled, 0,
+ "number of times start was delayed");
+
+#define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
+#define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
+#define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
+#define HWCKSUM_DBG_MASK \
+ (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
HWCKSUM_DBG_FINALIZE_FORCED)
static uint32_t hwcksum_dbg_mode = 0;
CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_rx, 0,
"enable receive hardware checksum offload");
+SYSCTL_PROC(_net_link_generic_system, OID_AUTO, tx_chain_len_stats,
+ CTLFLAG_RD | CTLFLAG_LOCKED, 0, 9,
+ sysctl_tx_chain_len_stats, "S", "");
+
+uint32_t tx_chain_len_count = 0;
+SYSCTL_UINT(_net_link_generic_system, OID_AUTO, tx_chain_len_count,
+ CTLFLAG_RW | CTLFLAG_LOCKED, &tx_chain_len_count, 0, "");
+
+static uint32_t threshold_notify = 1; /* enable/disable */
+SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_notify,
+ CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_notify, 0, "");
+
+static uint32_t threshold_interval = 2; /* in seconds */
+SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_interval,
+ CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_interval, 0, "");
+
+#if (DEVELOPMENT || DEBUG)
+static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS;
+SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_kao_frames,
+ CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_kao_frames, "");
+#endif /* DEVELOPMENT || DEBUG */
+
+struct net_api_stats net_api_stats;
+SYSCTL_STRUCT(_net, OID_AUTO, api_stats, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &net_api_stats, net_api_stats, "");
+
+
unsigned int net_rxpoll = 1;
unsigned int net_affinity = 1;
static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
-extern u_int32_t inject_buckets;
+extern u_int32_t inject_buckets;
+
+static lck_grp_attr_t *dlil_grp_attributes = NULL;
+static lck_attr_t *dlil_lck_attributes = NULL;
-static lck_grp_attr_t *dlil_grp_attributes = NULL;
-static lck_attr_t *dlil_lck_attributes = NULL;
+/* DLIL data threshold thread call */
+static void dlil_dt_tcall_fn(thread_call_param_t, thread_call_param_t);
+
+void
+ifnet_filter_update_tso(boolean_t filter_enable)
+{
+ /*
+ * update filter count and route_generation ID to let TCP
+ * know it should reevalute doing TSO or not
+ */
+ OSAddAtomic(filter_enable ? 1 : -1, &dlil_filter_disable_tso_count);
+ routegenid_update();
+}
-#define DLIL_INPUT_CHECK(m, ifp) { \
- struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
- if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
- !(mbuf_flags(m) & MBUF_PKTHDR)) { \
- panic_plain("%s: invalid mbuf %p\n", __func__, m); \
- /* NOTREACHED */ \
- } \
+#define DLIL_INPUT_CHECK(m, ifp) { \
+ struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
+ if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
+ !(mbuf_flags(m) & MBUF_PKTHDR)) { \
+ panic_plain("%s: invalid mbuf %p\n", __func__, m); \
+ /* NOTREACHED */ \
+ } \
}
-#define DLIL_EWMA(old, new, decay) do { \
- u_int32_t _avg; \
- if ((_avg = (old)) > 0) \
- _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
- else \
- _avg = (new); \
- (old) = _avg; \
+#define DLIL_EWMA(old, new, decay) do { \
+ u_int32_t _avg; \
+ if ((_avg = (old)) > 0) \
+ _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
+ else \
+ _avg = (new); \
+ (old) = _avg; \
} while (0)
-#define MBPS (1ULL * 1000 * 1000)
-#define GBPS (MBPS * 1000)
+#define MBPS (1ULL * 1000 * 1000)
+#define GBPS (MBPS * 1000)
struct rxpoll_time_tbl {
- u_int64_t speed; /* downlink speed */
- u_int32_t plowat; /* packets low watermark */
- u_int32_t phiwat; /* packets high watermark */
- u_int32_t blowat; /* bytes low watermark */
- u_int32_t bhiwat; /* bytes high watermark */
+ u_int64_t speed; /* downlink speed */
+ u_int32_t plowat; /* packets low watermark */
+ u_int32_t phiwat; /* packets high watermark */
+ u_int32_t blowat; /* bytes low watermark */
+ u_int32_t bhiwat; /* bytes high watermark */
};
static struct rxpoll_time_tbl rxpoll_tbl[] = {
- { 10 * MBPS, 2, 8, (1 * 1024), (6 * 1024) },
- { 100 * MBPS, 10, 40, (4 * 1024), (64 * 1024) },
- { 1 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
- { 10 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
- { 100 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
- { 0, 0, 0, 0, 0 }
+ { .speed = 10 * MBPS, .plowat = 2, .phiwat = 8, .blowat = (1 * 1024), .bhiwat = (6 * 1024) },
+ { .speed = 100 * MBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
+ { .speed = 1 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
+ { .speed = 10 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
+ { .speed = 100 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
+ { .speed = 0, .plowat = 0, .phiwat = 0, .blowat = 0, .bhiwat = 0 }
};
+decl_lck_mtx_data(static, dlil_thread_sync_lock);
+static uint32_t dlil_pending_thread_cnt = 0;
+static void
+dlil_incr_pending_thread_count(void)
+{
+ LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_MTX_ASSERT_NOTOWNED);
+ lck_mtx_lock(&dlil_thread_sync_lock);
+ dlil_pending_thread_cnt++;
+ lck_mtx_unlock(&dlil_thread_sync_lock);
+}
+
+static void
+dlil_decr_pending_thread_count(void)
+{
+ LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_MTX_ASSERT_NOTOWNED);
+ lck_mtx_lock(&dlil_thread_sync_lock);
+ VERIFY(dlil_pending_thread_cnt > 0);
+ dlil_pending_thread_cnt--;
+ if (dlil_pending_thread_cnt == 0) {
+ wakeup(&dlil_pending_thread_cnt);
+ }
+ lck_mtx_unlock(&dlil_thread_sync_lock);
+}
+
int
proto_hash_value(u_int32_t protocol_family)
{
* the hash bucket index and the protocol family defined
* here; future changes must be applied there as well.
*/
- switch(protocol_family) {
- case PF_INET:
- return (0);
- case PF_INET6:
- return (1);
- case PF_VLAN:
- return (2);
- case PF_UNSPEC:
- default:
- return (3);
+ switch (protocol_family) {
+ case PF_INET:
+ return 0;
+ case PF_INET6:
+ return 1;
+ case PF_VLAN:
+ return 2;
+ case PF_802154:
+ return 3;
+ case PF_UNSPEC:
+ default:
+ return 4;
}
}
ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
- if (ifp->if_proto_hash != NULL)
+ if (ifp->if_proto_hash != NULL) {
proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
+ }
- while (proto != NULL && proto->protocol_family != protocol_family)
+ while (proto != NULL && proto->protocol_family != protocol_family) {
proto = SLIST_NEXT(proto, next_hash);
+ }
- if (proto != NULL)
+ if (proto != NULL) {
if_proto_ref(proto);
+ }
- return (proto);
+ return proto;
}
static void
struct kev_dl_proto_data ev_pr_data;
oldval = atomic_add_32_ov(&proto->refcount, -1);
- if (oldval > 1)
+ if (oldval > 1) {
return;
+ }
/* No more reference on this, protocol must have been detached */
VERIFY(proto->detached);
if (proto->proto_kpi == kProtoKPI_v1) {
- if (proto->kpi.v1.detached)
+ if (proto->kpi.v1.detached) {
proto->kpi.v1.detached(ifp, proto->protocol_family);
+ }
}
if (proto->proto_kpi == kProtoKPI_v2) {
- if (proto->kpi.v2.detached)
+ if (proto->kpi.v2.detached) {
proto->kpi.v2.detached(ifp, proto->protocol_family);
+ }
}
/*
*/
ifnet_lock_shared(ifp);
ev_pr_data.proto_family = proto_family;
- ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp);
+ ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
ifnet_lock_done(ifp);
dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
(struct net_event_data *)&ev_pr_data,
sizeof(struct kev_dl_proto_data));
+ if (ev_pr_data.proto_remaining_count == 0) {
+ /*
+ * The protocol count has gone to zero, mark the interface down.
+ * This used to be done by configd.KernelEventMonitor, but that
+ * is inherently prone to races (rdar://problem/30810208).
+ */
+ (void) ifnet_set_flags(ifp, 0, IFF_UP);
+ (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
+ dlil_post_sifflags_msg(ifp);
+ }
+
zfree(dlif_proto_zone, proto);
}
__private_extern__ void
ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what)
{
+#if !MACH_ASSERT
+#pragma unused(ifp)
+#endif
unsigned int type = 0;
int ass = 1;
panic("bad ifnet assert type: %d", what);
/* NOTREACHED */
}
- if (ass)
- lck_rw_assert(&ifp->if_lock, type);
+ if (ass) {
+ LCK_RW_ASSERT(&ifp->if_lock, type);
+ }
}
__private_extern__ void
lck_rw_done(&ifp->if_lock);
}
+#if INET
+__private_extern__ void
+if_inetdata_lock_shared(struct ifnet *ifp)
+{
+ lck_rw_lock_shared(&ifp->if_inetdata_lock);
+}
+
+__private_extern__ void
+if_inetdata_lock_exclusive(struct ifnet *ifp)
+{
+ lck_rw_lock_exclusive(&ifp->if_inetdata_lock);
+}
+
+__private_extern__ void
+if_inetdata_lock_done(struct ifnet *ifp)
+{
+ lck_rw_done(&ifp->if_inetdata_lock);
+}
+#endif
+
#if INET6
__private_extern__ void
if_inet6data_lock_shared(struct ifnet *ifp)
lck_rw_done(&ifnet_head_lock);
}
+__private_extern__ void
+ifnet_head_assert_exclusive(void)
+{
+ LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_EXCLUSIVE);
+}
+
/*
- * Caller must already be holding ifnet lock.
+ * dlil_ifp_protolist
+ * - get the list of protocols attached to the interface, or just the number
+ * of attached protocols
+ * - if the number returned is greater than 'list_count', truncation occurred
+ *
+ * Note:
+ * - caller must already be holding ifnet lock.
*/
-static int
-dlil_ifp_proto_count(struct ifnet * ifp)
+static u_int32_t
+dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
+ u_int32_t list_count)
{
- int i, count = 0;
+ u_int32_t count = 0;
+ int i;
ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
- if (ifp->if_proto_hash == NULL)
+ if (ifp->if_proto_hash == NULL) {
goto done;
+ }
for (i = 0; i < PROTO_HASH_SLOTS; i++) {
struct if_proto *proto;
SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
+ if (list != NULL && count < list_count) {
+ list[count] = proto->protocol_family;
+ }
count++;
}
}
done:
- return (count);
+ return count;
+}
+
+__private_extern__ u_int32_t
+if_get_protolist(struct ifnet * ifp, u_int32_t *protolist, u_int32_t count)
+{
+ ifnet_lock_shared(ifp);
+ count = dlil_ifp_protolist(ifp, protolist, count);
+ ifnet_lock_done(ifp);
+ return count;
}
__private_extern__ void
+if_free_protolist(u_int32_t *list)
+{
+ _FREE(list, M_TEMP);
+}
+
+__private_extern__ int
dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
u_int32_t event_code, struct net_event_data *event_data,
u_int32_t event_data_len)
struct net_event_data ev_data;
struct kev_msg ev_msg;
- bzero(&ev_msg, sizeof (ev_msg));
- bzero(&ev_data, sizeof (ev_data));
+ bzero(&ev_msg, sizeof(ev_msg));
+ bzero(&ev_data, sizeof(ev_data));
/*
* a net event always starts with a net_event_data structure
* but the caller can generate a simple net event or
* provide a longer event structure to post
*/
- ev_msg.vendor_code = KEV_VENDOR_APPLE;
- ev_msg.kev_class = KEV_NETWORK_CLASS;
- ev_msg.kev_subclass = event_subclass;
- ev_msg.event_code = event_code;
+ ev_msg.vendor_code = KEV_VENDOR_APPLE;
+ ev_msg.kev_class = KEV_NETWORK_CLASS;
+ ev_msg.kev_subclass = event_subclass;
+ ev_msg.event_code = event_code;
if (event_data == NULL) {
event_data = &ev_data;
strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
event_data->if_family = ifp->if_family;
- event_data->if_unit = (u_int32_t) ifp->if_unit;
+ event_data->if_unit = (u_int32_t)ifp->if_unit;
ev_msg.dv[0].data_length = event_data_len;
ev_msg.dv[0].data_ptr = event_data;
ev_msg.dv[1].data_length = 0;
- dlil_event_internal(ifp, &ev_msg);
+ bool update_generation = true;
+ if (event_subclass == KEV_DL_SUBCLASS) {
+ /* Don't update interface generation for frequent link quality and state changes */
+ switch (event_code) {
+ case KEV_DL_LINK_QUALITY_METRIC_CHANGED:
+ case KEV_DL_RRC_STATE_CHANGED:
+ case KEV_DL_NODE_PRESENCE:
+ case KEV_DL_NODE_ABSENCE:
+ case KEV_DL_MASTER_ELECTED:
+ update_generation = false;
+ break;
+ default:
+ break;
+ }
+ }
+
+ return dlil_event_internal(ifp, &ev_msg, update_generation);
}
__private_extern__ int
int ret = EINVAL;
void *buf, *base, **pbuf;
- if (ifp == NULL)
+ if (ifp == NULL) {
goto end;
+ }
if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) {
/* allocate tcpstat_local structure */
bzero(buf, dlif_tcpstat_bufsize);
/* Get the 64-bit aligned base address for this object */
- base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
- sizeof (u_int64_t));
+ base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
+ sizeof(u_int64_t));
VERIFY(((intptr_t)base + dlif_tcpstat_size) <=
((intptr_t)buf + dlif_tcpstat_bufsize));
* Wind back a pointer size from the aligned base and
* save the original address so we can free it later.
*/
- pbuf = (void **)((intptr_t)base - sizeof (void *));
+ pbuf = (void **)((intptr_t)base - sizeof(void *));
*pbuf = buf;
ifp->if_tcp_stat = base;
bzero(buf, dlif_udpstat_bufsize);
/* Get the 64-bit aligned base address for this object */
- base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
- sizeof (u_int64_t));
+ base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
+ sizeof(u_int64_t));
VERIFY(((intptr_t)base + dlif_udpstat_size) <=
((intptr_t)buf + dlif_udpstat_bufsize));
* Wind back a pointer size from the aligned base and
* save the original address so we can free it later.
*/
- pbuf = (void **)((intptr_t)base - sizeof (void *));
+ pbuf = (void **)((intptr_t)base - sizeof(void *));
*pbuf = buf;
ifp->if_udp_stat = base;
- VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof (u_int64_t)) &&
- IS_P2ALIGNED(ifp->if_udp_stat, sizeof (u_int64_t)));
+ VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof(u_int64_t)) &&
+ IS_P2ALIGNED(ifp->if_udp_stat, sizeof(u_int64_t)));
ret = 0;
}
+ if (ifp->if_ipv4_stat == NULL) {
+ MALLOC(ifp->if_ipv4_stat, struct if_tcp_ecn_stat *,
+ sizeof(struct if_tcp_ecn_stat), M_TEMP, M_WAITOK | M_ZERO);
+ if (ifp->if_ipv4_stat == NULL) {
+ ret = ENOMEM;
+ goto end;
+ }
+ }
+
+ if (ifp->if_ipv6_stat == NULL) {
+ MALLOC(ifp->if_ipv6_stat, struct if_tcp_ecn_stat *,
+ sizeof(struct if_tcp_ecn_stat), M_TEMP, M_WAITOK | M_ZERO);
+ if (ifp->if_ipv6_stat == NULL) {
+ ret = ENOMEM;
+ goto end;
+ }
+ }
end:
- if (ret != 0) {
+ if (ifp != NULL && ret != 0) {
if (ifp->if_tcp_stat != NULL) {
pbuf = (void **)
- ((intptr_t)ifp->if_tcp_stat - sizeof (void *));
+ ((intptr_t)ifp->if_tcp_stat - sizeof(void *));
zfree(dlif_tcpstat_zone, *pbuf);
ifp->if_tcp_stat = NULL;
}
if (ifp->if_udp_stat != NULL) {
pbuf = (void **)
- ((intptr_t)ifp->if_udp_stat - sizeof (void *));
+ ((intptr_t)ifp->if_udp_stat - sizeof(void *));
zfree(dlif_udpstat_zone, *pbuf);
ifp->if_udp_stat = NULL;
}
+ if (ifp->if_ipv4_stat != NULL) {
+ FREE(ifp->if_ipv4_stat, M_TEMP);
+ ifp->if_ipv4_stat = NULL;
+ }
+ if (ifp->if_ipv6_stat != NULL) {
+ FREE(ifp->if_ipv6_stat, M_TEMP);
+ ifp->if_ipv6_stat = NULL;
+ }
}
- return (ret);
+ return ret;
+}
+
+static void
+dlil_reset_rxpoll_params(ifnet_t ifp)
+{
+ ASSERT(ifp != NULL);
+ ifnet_set_poll_cycle(ifp, NULL);
+ ifp->if_poll_update = 0;
+ ifp->if_poll_flags = 0;
+ ifp->if_poll_req = 0;
+ ifp->if_poll_mode = IFNET_MODEL_INPUT_POLL_OFF;
+ bzero(&ifp->if_poll_tstats, sizeof(ifp->if_poll_tstats));
+ bzero(&ifp->if_poll_pstats, sizeof(ifp->if_poll_pstats));
+ bzero(&ifp->if_poll_sstats, sizeof(ifp->if_poll_sstats));
+ net_timerclear(&ifp->if_poll_mode_holdtime);
+ net_timerclear(&ifp->if_poll_mode_lasttime);
+ net_timerclear(&ifp->if_poll_sample_holdtime);
+ net_timerclear(&ifp->if_poll_sample_lasttime);
+ net_timerclear(&ifp->if_poll_dbg_lasttime);
}
static int
dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp)
{
+ boolean_t dlil_rxpoll_input;
thread_continue_t func;
u_int32_t limit;
int error;
+ dlil_rxpoll_input = (ifp != NULL && net_rxpoll &&
+ (ifp->if_eflags & IFEF_RXPOLL) && (ifp->if_xflags & IFXF_LEGACY));
+
/* NULL ifp indicates the main input thread, called at dlil_init time */
if (ifp == NULL) {
func = dlil_main_input_thread_func;
VERIFY(inp == dlil_main_input_thread);
(void) strlcat(inp->input_name,
"main_input", DLIL_THREADNAME_LEN);
- } else if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
+ } else if (dlil_rxpoll_input) {
func = dlil_rxpoll_input_thread_func;
VERIFY(inp != dlil_main_input_thread);
(void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
inp->lck_grp = lck_grp_alloc_init(inp->input_name, dlil_grp_attributes);
lck_mtx_init(&inp->input_lck, inp->lck_grp, dlil_lck_attributes);
- inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
- inp->ifp = ifp; /* NULL for main input thread */
-
- net_timerclear(&inp->mode_holdtime);
- net_timerclear(&inp->mode_lasttime);
- net_timerclear(&inp->sample_holdtime);
- net_timerclear(&inp->sample_lasttime);
- net_timerclear(&inp->dbg_lasttime);
-
+ inp->ifp = ifp; /* NULL for main input thread */
/*
* For interfaces that support opportunistic polling, set the
* low and high watermarks for outstanding inbound packets/bytes.
*/
if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN);
- (void) dlil_rxpoll_set_params(ifp, NULL, FALSE);
+ if (ifp->if_xflags & IFXF_LEGACY) {
+ (void) dlil_rxpoll_set_params(ifp, NULL, FALSE);
+ }
} else {
limit = (u_int32_t)-1;
}
- _qinit(&inp->rcvq_pkts, Q_DROPTAIL, limit);
+ _qinit(&inp->rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
if (inp == dlil_main_input_thread) {
struct dlil_main_threading_info *inpm =
(struct dlil_main_threading_info *)inp;
- _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit);
+ _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
}
error = kernel_thread_start(func, inp, &inp->input_thr);
if (error == KERN_SUCCESS) {
ml_thread_policy(inp->input_thr, MACHINE_GROUP,
- (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_NETISR));
+ (MACHINE_NETWORK_GROUP | MACHINE_NETWORK_NETISR));
/*
* We create an affinity set so that the matching workloop
* thread or the starter thread (for loopback) can be
* Randomize to reduce the probability
* of affinity tag namespace collision.
*/
- read_random(&tag, sizeof (tag));
+ read_frandom(&tag, sizeof(tag));
if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) {
thread_reference(tp);
inp->tag = tag;
}
OSAddAtomic(1, &cur_dlil_input_threads);
- return (error);
+ return error;
}
-static void
-dlil_terminate_input_thread(struct dlil_threading_info *inp)
+#if TEST_INPUT_THREAD_TERMINATION
+static int
+sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
{
- struct ifnet *ifp;
+#pragma unused(arg1, arg2)
+ uint32_t i;
+ int err;
- VERIFY(current_thread() == inp->input_thr);
- VERIFY(inp != dlil_main_input_thread);
+ i = if_input_thread_termination_spin;
- OSAddAtomic(-1, &cur_dlil_input_threads);
+ err = sysctl_handle_int(oidp, &i, 0, req);
+ if (err != 0 || req->newptr == USER_ADDR_NULL) {
+ return err;
+ }
+
+ if (net_rxpoll == 0) {
+ return ENXIO;
+ }
+
+ if_input_thread_termination_spin = i;
+ return err;
+}
+#endif /* TEST_INPUT_THREAD_TERMINATION */
+static void
+dlil_clean_threading_info(struct dlil_threading_info *inp)
+{
lck_mtx_destroy(&inp->input_lck, inp->lck_grp);
lck_grp_free(inp->lck_grp);
inp->input_waiting = 0;
inp->wtot = 0;
- bzero(inp->input_name, sizeof (inp->input_name));
- ifp = inp->ifp;
+ bzero(inp->input_name, sizeof(inp->input_name));
inp->ifp = NULL;
VERIFY(qhead(&inp->rcvq_pkts) == NULL && qempty(&inp->rcvq_pkts));
qlimit(&inp->rcvq_pkts) = 0;
- bzero(&inp->stats, sizeof (inp->stats));
+ bzero(&inp->stats, sizeof(inp->stats));
VERIFY(!inp->net_affinity);
inp->input_thr = THREAD_NULL;
VERIFY(inp->wloop_thr == THREAD_NULL);
VERIFY(inp->poll_thr == THREAD_NULL);
VERIFY(inp->tag == 0);
-
- inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
- bzero(&inp->tstats, sizeof (inp->tstats));
- bzero(&inp->pstats, sizeof (inp->pstats));
- bzero(&inp->sstats, sizeof (inp->sstats));
-
- net_timerclear(&inp->mode_holdtime);
- net_timerclear(&inp->mode_lasttime);
- net_timerclear(&inp->sample_holdtime);
- net_timerclear(&inp->sample_lasttime);
- net_timerclear(&inp->dbg_lasttime);
-
#if IFNET_INPUT_SANITY_CHK
inp->input_mbuf_cnt = 0;
#endif /* IFNET_INPUT_SANITY_CHK */
+}
- if (dlil_verbose) {
- printf("%s: input thread terminated\n",
- if_name(ifp));
+static void
+dlil_terminate_input_thread(struct dlil_threading_info *inp)
+{
+ struct ifnet *ifp = inp->ifp;
+ classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
+
+ VERIFY(current_thread() == inp->input_thr);
+ VERIFY(inp != dlil_main_input_thread);
+
+ OSAddAtomic(-1, &cur_dlil_input_threads);
+
+#if TEST_INPUT_THREAD_TERMINATION
+ { /* do something useless that won't get optimized away */
+ uint32_t v = 1;
+ for (uint32_t i = 0;
+ i < if_input_thread_termination_spin;
+ i++) {
+ v = (i + 1) * v;
+ }
+ DLIL_PRINTF("the value is %d\n", v);
+ }
+#endif /* TEST_INPUT_THREAD_TERMINATION */
+
+ lck_mtx_lock_spin(&inp->input_lck);
+ _getq_all(&inp->rcvq_pkts, &pkt, NULL, NULL, NULL);
+ VERIFY((inp->input_waiting & DLIL_INPUT_TERMINATE) != 0);
+ inp->input_waiting |= DLIL_INPUT_TERMINATE_COMPLETE;
+ wakeup_one((caddr_t)&inp->input_waiting);
+ lck_mtx_unlock(&inp->input_lck);
+
+ /* free up pending packets */
+ if (pkt.cp_mbuf != NULL) {
+ mbuf_freem_list(pkt.cp_mbuf);
}
/* for the extra refcnt from kernel_thread_start() */
thread_deallocate(current_thread());
+ if (dlil_verbose) {
+ DLIL_PRINTF("%s: input thread terminated\n",
+ if_name(ifp));
+ }
+
/* this is the end */
thread_terminate(current_thread());
/* NOTREACHED */
{
thread_affinity_policy_data_t policy;
- bzero(&policy, sizeof (policy));
+ bzero(&policy, sizeof(policy));
policy.affinity_tag = tag;
- return (thread_policy_set(tp, THREAD_AFFINITY_POLICY,
- (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT));
+ return thread_policy_set(tp, THREAD_AFFINITY_POLICY,
+ (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT);
}
void
* The following fields must be 64-bit aligned for atomic operations.
*/
IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
- IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors)
+ IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
- IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors)
+ IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
_CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
_CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
_CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
+ _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT == IFNET_CSUM_ZERO_INVERT);
_CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
_CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
_CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
_CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
_CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
_CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
+ _CASSERT(CSUM_ZERO_INVERT == IF_HWASSIST_CSUM_ZERO_INVERT);
_CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
/*
_CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
_CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
_CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
+ _CASSERT(IFRTYPE_FAMILY_6LOWPAN == IFNET_FAMILY_6LOWPAN);
+ _CASSERT(IFRTYPE_FAMILY_UTUN == IFNET_FAMILY_UTUN);
+ _CASSERT(IFRTYPE_FAMILY_IPSEC == IFNET_FAMILY_IPSEC);
_CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
_CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
_CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
_CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
_CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED);
+ _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC);
+ _CASSERT(IFRTYPE_SUBFAMILY_QUICKRELAY == IFNET_SUBFAMILY_QUICKRELAY);
+ _CASSERT(IFRTYPE_SUBFAMILY_DEFAULT == IFNET_SUBFAMILY_DEFAULT);
_CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN);
_CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN);
PE_parse_boot_argn("net_affinity", &net_affinity,
- sizeof (net_affinity));
+ sizeof(net_affinity));
- PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof (net_rxpoll));
+ PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof(net_rxpoll));
- PE_parse_boot_argn("net_rtref", &net_rtref, sizeof (net_rtref));
+ PE_parse_boot_argn("net_rtref", &net_rtref, sizeof(net_rtref));
- PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof (ifnet_debug));
+ PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof(ifnet_debug));
- dlif_size = (ifnet_debug == 0) ? sizeof (struct dlil_ifnet) :
- sizeof (struct dlil_ifnet_dbg);
+ VERIFY(dlil_pending_thread_cnt == 0);
+ dlif_size = (ifnet_debug == 0) ? sizeof(struct dlil_ifnet) :
+ sizeof(struct dlil_ifnet_dbg);
/* Enforce 64-bit alignment for dlil_ifnet structure */
- dlif_bufsize = dlif_size + sizeof (void *) + sizeof (u_int64_t);
- dlif_bufsize = P2ROUNDUP(dlif_bufsize, sizeof (u_int64_t));
+ dlif_bufsize = dlif_size + sizeof(void *) + sizeof(u_int64_t);
+ dlif_bufsize = P2ROUNDUP(dlif_bufsize, sizeof(u_int64_t));
dlif_zone = zinit(dlif_bufsize, DLIF_ZONE_MAX * dlif_bufsize,
0, DLIF_ZONE_NAME);
if (dlif_zone == NULL) {
zone_change(dlif_zone, Z_EXPAND, TRUE);
zone_change(dlif_zone, Z_CALLERACCT, FALSE);
- dlif_filt_size = sizeof (struct ifnet_filter);
+ dlif_filt_size = sizeof(struct ifnet_filter);
dlif_filt_zone = zinit(dlif_filt_size,
DLIF_FILT_ZONE_MAX * dlif_filt_size, 0, DLIF_FILT_ZONE_NAME);
if (dlif_filt_zone == NULL) {
zone_change(dlif_filt_zone, Z_EXPAND, TRUE);
zone_change(dlif_filt_zone, Z_CALLERACCT, FALSE);
- dlif_phash_size = sizeof (struct proto_hash_entry) * PROTO_HASH_SLOTS;
+ dlif_phash_size = sizeof(struct proto_hash_entry) * PROTO_HASH_SLOTS;
dlif_phash_zone = zinit(dlif_phash_size,
DLIF_PHASH_ZONE_MAX * dlif_phash_size, 0, DLIF_PHASH_ZONE_NAME);
if (dlif_phash_zone == NULL) {
zone_change(dlif_phash_zone, Z_EXPAND, TRUE);
zone_change(dlif_phash_zone, Z_CALLERACCT, FALSE);
- dlif_proto_size = sizeof (struct if_proto);
+ dlif_proto_size = sizeof(struct if_proto);
dlif_proto_zone = zinit(dlif_proto_size,
DLIF_PROTO_ZONE_MAX * dlif_proto_size, 0, DLIF_PROTO_ZONE_NAME);
if (dlif_proto_zone == NULL) {
zone_change(dlif_proto_zone, Z_EXPAND, TRUE);
zone_change(dlif_proto_zone, Z_CALLERACCT, FALSE);
- dlif_tcpstat_size = sizeof (struct tcpstat_local);
+ dlif_tcpstat_size = sizeof(struct tcpstat_local);
/* Enforce 64-bit alignment for tcpstat_local structure */
dlif_tcpstat_bufsize =
- dlif_tcpstat_size + sizeof (void *) + sizeof (u_int64_t);
+ dlif_tcpstat_size + sizeof(void *) + sizeof(u_int64_t);
dlif_tcpstat_bufsize =
- P2ROUNDUP(dlif_tcpstat_bufsize, sizeof (u_int64_t));
+ P2ROUNDUP(dlif_tcpstat_bufsize, sizeof(u_int64_t));
dlif_tcpstat_zone = zinit(dlif_tcpstat_bufsize,
DLIF_TCPSTAT_ZONE_MAX * dlif_tcpstat_bufsize, 0,
DLIF_TCPSTAT_ZONE_NAME);
zone_change(dlif_tcpstat_zone, Z_EXPAND, TRUE);
zone_change(dlif_tcpstat_zone, Z_CALLERACCT, FALSE);
- dlif_udpstat_size = sizeof (struct udpstat_local);
+ dlif_udpstat_size = sizeof(struct udpstat_local);
/* Enforce 64-bit alignment for udpstat_local structure */
dlif_udpstat_bufsize =
- dlif_udpstat_size + sizeof (void *) + sizeof (u_int64_t);
+ dlif_udpstat_size + sizeof(void *) + sizeof(u_int64_t);
dlif_udpstat_bufsize =
- P2ROUNDUP(dlif_udpstat_bufsize, sizeof (u_int64_t));
+ P2ROUNDUP(dlif_udpstat_bufsize, sizeof(u_int64_t));
dlif_udpstat_zone = zinit(dlif_udpstat_bufsize,
DLIF_TCPSTAT_ZONE_MAX * dlif_udpstat_bufsize, 0,
DLIF_UDPSTAT_ZONE_NAME);
zone_change(dlif_udpstat_zone, Z_CALLERACCT, FALSE);
ifnet_llreach_init();
+ eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt);
TAILQ_INIT(&dlil_ifnet_head);
TAILQ_INIT(&ifnet_head);
TAILQ_INIT(&ifnet_detaching_head);
+ TAILQ_INIT(&ifnet_ordered_head);
/* Setup the lock groups we will use */
dlil_grp_attributes = lck_grp_attr_alloc_init();
lck_rw_init(&ifnet_head_lock, ifnet_head_lock_group,
dlil_lck_attributes);
lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes);
+ lck_mtx_init(&dlil_thread_sync_lock, dlil_lock_group, dlil_lck_attributes);
/* Setup interface flow control related items */
lck_mtx_init(&ifnet_fc_lock, dlil_lock_group, dlil_lck_attributes);
- ifnet_fc_zone_size = sizeof (struct ifnet_fc_entry);
+ ifnet_fc_zone_size = sizeof(struct ifnet_fc_entry);
ifnet_fc_zone = zinit(ifnet_fc_zone_size,
IFNET_FC_ZONE_MAX * ifnet_fc_zone_size, 0, IFNET_FC_ZONE_NAME);
if (ifnet_fc_zone == NULL) {
/* Initialize the pktap virtual interface */
pktap_init();
-#if DEBUG
+ /* Initialize the service class to dscp map */
+ net_qos_map_init();
+
+ /* Initialize the interface port list */
+ if_ports_used_init();
+
+ /* Initialize the interface low power mode event handler */
+ if_low_power_evhdlr_init();
+
+#if DEBUG || DEVELOPMENT
/* Run self-tests */
dlil_verify_sum16();
-#endif /* DEBUG */
+#endif /* DEBUG || DEVELOPMENT */
+
+ /* Initialize link layer table */
+ lltable_glbl_init();
/*
* Create and start up the main DLIL input thread and the interface
* detacher threads once everything is initialized.
*/
+ dlil_incr_pending_thread_count();
dlil_create_input_thread(NULL, dlil_main_input_thread);
+ /*
+ * Create ifnet detacher thread.
+ * When an interface gets detached, part of the detach processing
+ * is delayed. The interface is added to delayed detach list
+ * and this thread is woken up to call ifnet_detach_final
+ * on these interfaces.
+ */
+ dlil_incr_pending_thread_count();
if (kernel_thread_start(ifnet_detacher_thread_func,
NULL, &thread) != KERN_SUCCESS) {
panic_plain("%s: couldn't create detacher thread", __func__);
/* NOTREACHED */
}
thread_deallocate(thread);
+
+ /*
+ * Wait for the created kernel threads for dlil to get
+ * scheduled and run at least once before we proceed
+ */
+ lck_mtx_lock(&dlil_thread_sync_lock);
+ while (dlil_pending_thread_cnt != 0) {
+ DLIL_PRINTF("%s: Waiting for all the create dlil kernel threads "
+ "to get scheduled at least once.\n", __func__);
+ (void) msleep(&dlil_pending_thread_cnt, &dlil_thread_sync_lock, (PZERO - 1),
+ __func__, NULL);
+ LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_ASSERT_OWNED);
+ }
+ lck_mtx_unlock(&dlil_thread_sync_lock);
+ DLIL_PRINTF("%s: All the created dlil kernel threads have been scheduled "
+ "at least once. Proceeding.\n", __func__);
}
static void
if_flt_monitor_busy(struct ifnet *ifp)
{
- lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
+ LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
++ifp->if_flt_busy;
VERIFY(ifp->if_flt_busy != 0);
static void
if_flt_monitor_enter(struct ifnet *ifp)
{
- lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
+ LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
while (ifp->if_flt_busy) {
++ifp->if_flt_waiters;
static void
if_flt_monitor_leave(struct ifnet *ifp)
{
- lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
+ LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
VERIFY(ifp->if_flt_busy != 0);
--ifp->if_flt_busy;
}
__private_extern__ int
-dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
+dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
interface_filter_t *filter_ref, u_int32_t flags)
{
int retval = 0;
filter->filt_cookie = if_filter->iff_cookie;
filter->filt_name = if_filter->iff_name;
filter->filt_protocol = if_filter->iff_protocol;
- filter->filt_input = if_filter->iff_input;
- filter->filt_output = if_filter->iff_output;
- filter->filt_event = if_filter->iff_event;
- filter->filt_ioctl = if_filter->iff_ioctl;
+ /*
+ * Do not install filter callbacks for internal coproc interface
+ */
+ if (!IFNET_IS_INTCOPROC(ifp)) {
+ filter->filt_input = if_filter->iff_input;
+ filter->filt_output = if_filter->iff_output;
+ filter->filt_event = if_filter->iff_event;
+ filter->filt_ioctl = if_filter->iff_ioctl;
+ }
filter->filt_detached = if_filter->iff_detached;
lck_mtx_lock(&ifp->if_flt_lock);
if_flt_monitor_enter(ifp);
- lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
+ LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
if_flt_monitor_leave(ifp);
* know it shouldn't do TSO on this connection
*/
if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
- OSAddAtomic(1, &dlil_filter_disable_tso_count);
- routegenid_update();
+ ifnet_filter_update_tso(TRUE);
+ }
+ OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_count);
+ INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_total);
+ if ((filter->filt_flags & DLIL_IFF_INTERNAL)) {
+ INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_os_total);
}
if (dlil_verbose) {
- printf("%s: %s filter attached\n", if_name(ifp),
+ DLIL_PRINTF("%s: %s filter attached\n", if_name(ifp),
if_filter->iff_name);
}
done:
DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
if_name(ifp), if_filter->iff_name, retval);
}
- if (retval != 0 && filter != NULL)
+ if (retval != 0 && filter != NULL) {
zfree(dlif_filt_zone, filter);
+ }
- return (retval);
+ return retval;
}
static int
-dlil_detach_filter_internal(interface_filter_t filter, int detached)
+dlil_detach_filter_internal(interface_filter_t filter, int detached)
{
int retval = 0;
lck_mtx_lock(&ifp->if_flt_lock);
TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
- if (entry != filter || entry->filt_skip)
+ if (entry != filter || entry->filt_skip) {
continue;
+ }
/*
* We've found a match; since it's possible
* that the thread gets blocked in the monitor,
* not be detached since we still have a use
* count held during filter attach.
*/
- entry->filt_skip = 1; /* skip input/output */
+ entry->filt_skip = 1; /* skip input/output */
lck_mtx_unlock(&ifp->if_flt_lock);
ifnet_head_done();
lck_mtx_lock(&ifp->if_flt_lock);
if_flt_monitor_enter(ifp);
- lck_mtx_assert(&ifp->if_flt_lock,
+ LCK_MTX_ASSERT(&ifp->if_flt_lock,
LCK_MTX_ASSERT_OWNED);
/* Remove the filter from the list */
if_flt_monitor_leave(ifp);
lck_mtx_unlock(&ifp->if_flt_lock);
if (dlil_verbose) {
- printf("%s: %s filter detached\n",
+ DLIL_PRINTF("%s: %s filter detached\n",
if_name(ifp), filter->filt_name);
}
goto destroy;
goto done;
}
- if (dlil_verbose)
- printf("%s filter detached\n", filter->filt_name);
+ if (dlil_verbose) {
+ DLIL_PRINTF("%s filter detached\n", filter->filt_name);
+ }
destroy:
/* Call the detached function if there is one */
- if (filter->filt_detached)
+ if (filter->filt_detached) {
filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
-
- /* Free the filter */
- zfree(dlif_filt_zone, filter);
+ }
/*
* Decrease filter count and route_generation ID to let TCP
* know it should reevalute doing TSO or not
*/
if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
- OSAddAtomic(-1, &dlil_filter_disable_tso_count);
- routegenid_update();
+ ifnet_filter_update_tso(FALSE);
}
+
+ VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_count) > 0);
+
+ /* Free the filter */
+ zfree(dlif_filt_zone, filter);
+ filter = NULL;
done:
- if (retval != 0) {
+ if (retval != 0 && filter != NULL) {
DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
filter->filt_name, retval);
}
- return (retval);
+
+ return retval;
}
__private_extern__ void
dlil_detach_filter(interface_filter_t filter)
{
- if (filter == NULL)
+ if (filter == NULL) {
return;
+ }
dlil_detach_filter_internal(filter, 0);
}
+__attribute__((noreturn))
+static void
+dlil_main_input_thread_func(void *v, wait_result_t w)
+{
+#pragma unused(w)
+ struct dlil_threading_info *inp = v;
+
+ VERIFY(inp == dlil_main_input_thread);
+ VERIFY(inp->ifp == NULL);
+ VERIFY(current_thread() == inp->input_thr);
+
+ dlil_decr_pending_thread_count();
+ lck_mtx_lock(&inp->input_lck);
+ VERIFY(!(inp->input_waiting & DLIL_INPUT_RUNNING));
+ (void) assert_wait(&inp->input_waiting, THREAD_UNINT);
+ lck_mtx_unlock(&inp->input_lck);
+ (void) thread_block_parameter(dlil_main_input_thread_cont, inp);
+ /* NOTREACHED */
+ __builtin_unreachable();
+}
+
/*
* Main input thread:
*
* c) protocol registrations
* d) packet injections
*/
+__attribute__((noreturn))
static void
-dlil_main_input_thread_func(void *v, wait_result_t w)
+dlil_main_input_thread_cont(void *v, wait_result_t wres)
{
-#pragma unused(w)
struct dlil_main_threading_info *inpm = v;
struct dlil_threading_info *inp = v;
- VERIFY(inp == dlil_main_input_thread);
- VERIFY(inp->ifp == NULL);
- VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
+ /* main input thread is uninterruptible */
+ VERIFY(wres != THREAD_INTERRUPTED);
+ lck_mtx_lock_spin(&inp->input_lck);
+ VERIFY(!(inp->input_waiting & (DLIL_INPUT_TERMINATE |
+ DLIL_INPUT_RUNNING)));
+ inp->input_waiting |= DLIL_INPUT_RUNNING;
while (1) {
struct mbuf *m = NULL, *m_loop = NULL;
u_int32_t m_cnt, m_cnt_loop;
+ classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
boolean_t proto_req;
- lck_mtx_lock_spin(&inp->input_lck);
-
- /* Wait until there is work to be done */
- while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
- inp->input_waiting &= ~DLIL_INPUT_RUNNING;
- (void) msleep(&inp->input_waiting, &inp->input_lck,
- (PZERO - 1) | PSPIN, inp->input_name, NULL);
- }
-
- inp->input_waiting |= DLIL_INPUT_RUNNING;
inp->input_waiting &= ~DLIL_INPUT_WAITING;
- /* Main input thread cannot be terminated */
- VERIFY(!(inp->input_waiting & DLIL_INPUT_TERMINATE));
-
proto_req = (inp->input_waiting &
(DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER));
/* Packets for non-dedicated interfaces other than lo0 */
m_cnt = qlen(&inp->rcvq_pkts);
- m = _getq_all(&inp->rcvq_pkts);
+ _getq_all(&inp->rcvq_pkts, &pkt, NULL, NULL, NULL);
+ m = pkt.cp_mbuf;
/* Packets exclusive to lo0 */
m_cnt_loop = qlen(&inpm->lo_rcvq_pkts);
- m_loop = _getq_all(&inpm->lo_rcvq_pkts);
+ _getq_all(&inpm->lo_rcvq_pkts, &pkt, NULL, NULL, NULL);
+ m_loop = pkt.cp_mbuf;
inp->wtot = 0;
lck_mtx_unlock(&inp->input_lck);
/*
- * NOTE warning %%% attention !!!!
- * We should think about putting some thread starvation
- * safeguards if we deal with long chains of packets.
- */
- if (m_loop != NULL)
+ * NOTE warning %%% attention !!!!
+ * We should think about putting some thread starvation
+ * safeguards if we deal with long chains of packets.
+ */
+ if (m_loop != NULL) {
dlil_input_packet_list_extended(lo_ifp, m_loop,
- m_cnt_loop, inp->mode);
+ m_cnt_loop, IFNET_MODEL_INPUT_POLL_OFF);
+ }
- if (m != NULL)
+ if (m != NULL) {
dlil_input_packet_list_extended(NULL, m,
- m_cnt, inp->mode);
+ m_cnt, IFNET_MODEL_INPUT_POLL_OFF);
+ }
- if (proto_req)
+ if (proto_req) {
proto_input_run();
+ }
+
+ lck_mtx_lock_spin(&inp->input_lck);
+ VERIFY(inp->input_waiting & DLIL_INPUT_RUNNING);
+ /* main input thread cannot be terminated */
+ VERIFY(!(inp->input_waiting & DLIL_INPUT_TERMINATE));
+ if (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
+ break;
+ }
}
+ inp->input_waiting &= ~DLIL_INPUT_RUNNING;
+ (void) assert_wait(&inp->input_waiting, THREAD_UNINT);
+ lck_mtx_unlock(&inp->input_lck);
+ (void) thread_block_parameter(dlil_main_input_thread_cont, inp);
+
+ VERIFY(0); /* we should never get here */
/* NOTREACHED */
- VERIFY(0); /* we should never get here */
+ __builtin_unreachable();
}
/*
* Input thread for interfaces with legacy input model.
*/
+__attribute__((noreturn))
static void
dlil_input_thread_func(void *v, wait_result_t w)
{
#pragma unused(w)
+ char thread_name[MAXTHREADNAMESIZE];
struct dlil_threading_info *inp = v;
struct ifnet *ifp = inp->ifp;
VERIFY(inp != dlil_main_input_thread);
VERIFY(ifp != NULL);
- VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll);
- VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
+ VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll ||
+ !(ifp->if_xflags & IFXF_LEGACY));
+ VERIFY(ifp->if_poll_mode == IFNET_MODEL_INPUT_POLL_OFF ||
+ !(ifp->if_xflags & IFXF_LEGACY));
+ VERIFY(current_thread() == inp->input_thr);
- while (1) {
- struct mbuf *m = NULL;
- u_int32_t m_cnt;
+ /* construct the name for this thread, and then apply it */
+ bzero(thread_name, sizeof(thread_name));
+ (void) snprintf(thread_name, sizeof(thread_name),
+ "dlil_input_%s", ifp->if_xname);
+ thread_set_thread_name(inp->input_thr, thread_name);
+ ifnet_decr_pending_thread_count(ifp);
- lck_mtx_lock_spin(&inp->input_lck);
+ lck_mtx_lock(&inp->input_lck);
+ VERIFY(!(inp->input_waiting & DLIL_INPUT_RUNNING));
+ (void) assert_wait(&inp->input_waiting, THREAD_UNINT);
+ lck_mtx_unlock(&inp->input_lck);
+ (void) thread_block_parameter(dlil_input_thread_cont, inp);
+ /* NOTREACHED */
+ __builtin_unreachable();
+}
- /* Wait until there is work to be done */
- while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
- inp->input_waiting &= ~DLIL_INPUT_RUNNING;
- (void) msleep(&inp->input_waiting, &inp->input_lck,
- (PZERO - 1) | PSPIN, inp->input_name, NULL);
- }
+__attribute__((noreturn))
+static void
+dlil_input_thread_cont(void *v, wait_result_t wres)
+{
+ struct dlil_threading_info *inp = v;
+ struct ifnet *ifp = inp->ifp;
+
+ lck_mtx_lock_spin(&inp->input_lck);
+ if (__improbable(wres == THREAD_INTERRUPTED ||
+ (inp->input_waiting & DLIL_INPUT_TERMINATE))) {
+ goto terminate;
+ }
+
+ VERIFY(!(inp->input_waiting & DLIL_INPUT_RUNNING));
+ inp->input_waiting |= DLIL_INPUT_RUNNING;
+
+ while (1) {
+ struct mbuf *m = NULL;
+ classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
+ boolean_t notify = FALSE;
+ u_int32_t m_cnt;
- inp->input_waiting |= DLIL_INPUT_RUNNING;
inp->input_waiting &= ~DLIL_INPUT_WAITING;
/*
* (and the benefits might not worth the trouble.)
*/
VERIFY(!(inp->input_waiting &
- (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
+ (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)));
/* Packets for this interface */
m_cnt = qlen(&inp->rcvq_pkts);
- m = _getq_all(&inp->rcvq_pkts);
-
- if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
- lck_mtx_unlock(&inp->input_lck);
-
- /* Free up pending packets */
- if (m != NULL)
- mbuf_freem_list(m);
-
- dlil_terminate_input_thread(inp);
- /* NOTREACHED */
- return;
- }
+ _getq_all(&inp->rcvq_pkts, &pkt, NULL, NULL, NULL);
+ m = pkt.cp_mbuf;
inp->wtot = 0;
- dlil_input_stats_sync(ifp, inp);
+ notify = dlil_input_stats_sync(ifp, inp);
lck_mtx_unlock(&inp->input_lck);
+ if (notify) {
+ ifnet_notify_data_threshold(ifp);
+ }
+
/*
- * NOTE warning %%% attention !!!!
- * We should think about putting some thread starvation
- * safeguards if we deal with long chains of packets.
- */
- if (m != NULL)
+ * NOTE warning %%% attention !!!!
+ * We should think about putting some thread starvation
+ * safeguards if we deal with long chains of packets.
+ */
+ if (m != NULL) {
dlil_input_packet_list_extended(NULL, m,
- m_cnt, inp->mode);
+ m_cnt, ifp->if_poll_mode);
+ }
+
+ lck_mtx_lock_spin(&inp->input_lck);
+ VERIFY(inp->input_waiting & DLIL_INPUT_RUNNING);
+ if (!(inp->input_waiting & ~(DLIL_INPUT_RUNNING |
+ DLIL_INPUT_TERMINATE))) {
+ break;
+ }
+ }
+
+ inp->input_waiting &= ~DLIL_INPUT_RUNNING;
+
+ if (__improbable(inp->input_waiting & DLIL_INPUT_TERMINATE)) {
+terminate:
+ lck_mtx_unlock(&inp->input_lck);
+ dlil_terminate_input_thread(inp);
+ /* NOTREACHED */
+ } else {
+ (void) assert_wait(&inp->input_waiting, THREAD_UNINT);
+ lck_mtx_unlock(&inp->input_lck);
+ (void) thread_block_parameter(dlil_input_thread_cont, inp);
+ /* NOTREACHED */
}
+ VERIFY(0); /* we should never get here */
/* NOTREACHED */
- VERIFY(0); /* we should never get here */
+ __builtin_unreachable();
}
/*
* Input thread for interfaces with opportunistic polling input model.
*/
+__attribute__((noreturn))
static void
dlil_rxpoll_input_thread_func(void *v, wait_result_t w)
{
#pragma unused(w)
+ char thread_name[MAXTHREADNAMESIZE];
struct dlil_threading_info *inp = v;
struct ifnet *ifp = inp->ifp;
- struct timespec ts;
VERIFY(inp != dlil_main_input_thread);
- VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL));
+ VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL) &&
+ (ifp->if_xflags & IFXF_LEGACY));
+ VERIFY(current_thread() == inp->input_thr);
+
+ /* construct the name for this thread, and then apply it */
+ bzero(thread_name, sizeof(thread_name));
+ (void) snprintf(thread_name, sizeof(thread_name),
+ "dlil_input_poll_%s", ifp->if_xname);
+ thread_set_thread_name(inp->input_thr, thread_name);
+ ifnet_decr_pending_thread_count(ifp);
+
+ lck_mtx_lock(&inp->input_lck);
+ VERIFY(!(inp->input_waiting & DLIL_INPUT_RUNNING));
+ (void) assert_wait(&inp->input_waiting, THREAD_UNINT);
+ lck_mtx_unlock(&inp->input_lck);
+ (void) thread_block_parameter(dlil_rxpoll_input_thread_cont, inp);
+ /* NOTREACHED */
+ __builtin_unreachable();
+}
+
+__attribute__((noreturn))
+static void
+dlil_rxpoll_input_thread_cont(void *v, wait_result_t wres)
+{
+ struct dlil_threading_info *inp = v;
+ struct ifnet *ifp = inp->ifp;
+ struct timespec ts;
+
+ lck_mtx_lock_spin(&inp->input_lck);
+ if (__improbable(wres == THREAD_INTERRUPTED ||
+ (inp->input_waiting & DLIL_INPUT_TERMINATE))) {
+ goto terminate;
+ }
+
+ VERIFY(!(inp->input_waiting & DLIL_INPUT_RUNNING));
+ inp->input_waiting |= DLIL_INPUT_RUNNING;
while (1) {
struct mbuf *m = NULL;
u_int32_t m_cnt, m_size, poll_req = 0;
ifnet_model_t mode;
struct timespec now, delta;
+ classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
+ boolean_t notify;
u_int64_t ival;
- lck_mtx_lock_spin(&inp->input_lck);
+ inp->input_waiting &= ~DLIL_INPUT_WAITING;
- if ((ival = inp->rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN)
+ if ((ival = ifp->if_rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN) {
ival = IF_RXPOLL_INTERVALTIME_MIN;
+ }
/* Link parameters changed? */
if (ifp->if_poll_update != 0) {
}
/* Current operating mode */
- mode = inp->mode;
-
- /* Wait until there is work to be done */
- while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
- inp->input_waiting &= ~DLIL_INPUT_RUNNING;
- (void) msleep(&inp->input_waiting, &inp->input_lck,
- (PZERO - 1) | PSPIN, inp->input_name, NULL);
- }
-
- inp->input_waiting |= DLIL_INPUT_RUNNING;
- inp->input_waiting &= ~DLIL_INPUT_WAITING;
+ mode = ifp->if_poll_mode;
/*
* Protocol registration and injection must always use
* (and the benefits might not worth the trouble.)
*/
VERIFY(!(inp->input_waiting &
- (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
-
- if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
- /* Free up pending packets */
- _flushq(&inp->rcvq_pkts);
- lck_mtx_unlock(&inp->input_lck);
-
- dlil_terminate_input_thread(inp);
- /* NOTREACHED */
- return;
- }
+ (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)));
/* Total count of all packets */
m_cnt = qlen(&inp->rcvq_pkts);
m_size = qsize(&inp->rcvq_pkts);
/* Packets for this interface */
- m = _getq_all(&inp->rcvq_pkts);
+ _getq_all(&inp->rcvq_pkts, &pkt, NULL, NULL, NULL);
+ m = pkt.cp_mbuf;
VERIFY(m != NULL || m_cnt == 0);
nanouptime(&now);
- if (!net_timerisset(&inp->sample_lasttime))
- *(&inp->sample_lasttime) = *(&now);
+ if (!net_timerisset(&ifp->if_poll_sample_lasttime)) {
+ *(&ifp->if_poll_sample_lasttime) = *(&now);
+ }
- net_timersub(&now, &inp->sample_lasttime, &delta);
- if (if_rxpoll && net_timerisset(&inp->sample_holdtime)) {
+ net_timersub(&now, &ifp->if_poll_sample_lasttime, &delta);
+ if (if_rxpoll && net_timerisset(&ifp->if_poll_sample_holdtime)) {
u_int32_t ptot, btot;
/* Accumulate statistics for current sampling */
- PKTCNTR_ADD(&inp->sstats, m_cnt, m_size);
+ PKTCNTR_ADD(&ifp->if_poll_sstats, m_cnt, m_size);
- if (net_timercmp(&delta, &inp->sample_holdtime, <))
+ if (net_timercmp(&delta, &ifp->if_poll_sample_holdtime, <)) {
goto skip;
+ }
- *(&inp->sample_lasttime) = *(&now);
+ *(&ifp->if_poll_sample_lasttime) = *(&now);
/* Calculate min/max of inbound bytes */
- btot = (u_int32_t)inp->sstats.bytes;
- if (inp->rxpoll_bmin == 0 || inp->rxpoll_bmin > btot)
- inp->rxpoll_bmin = btot;
- if (btot > inp->rxpoll_bmax)
- inp->rxpoll_bmax = btot;
+ btot = (u_int32_t)ifp->if_poll_sstats.bytes;
+ if (ifp->if_rxpoll_bmin == 0 || ifp->if_rxpoll_bmin > btot) {
+ ifp->if_rxpoll_bmin = btot;
+ }
+ if (btot > ifp->if_rxpoll_bmax) {
+ ifp->if_rxpoll_bmax = btot;
+ }
/* Calculate EWMA of inbound bytes */
- DLIL_EWMA(inp->rxpoll_bavg, btot, if_rxpoll_decay);
+ DLIL_EWMA(ifp->if_rxpoll_bavg, btot, if_rxpoll_decay);
/* Calculate min/max of inbound packets */
- ptot = (u_int32_t)inp->sstats.packets;
- if (inp->rxpoll_pmin == 0 || inp->rxpoll_pmin > ptot)
- inp->rxpoll_pmin = ptot;
- if (ptot > inp->rxpoll_pmax)
- inp->rxpoll_pmax = ptot;
+ ptot = (u_int32_t)ifp->if_poll_sstats.packets;
+ if (ifp->if_rxpoll_pmin == 0 || ifp->if_rxpoll_pmin > ptot) {
+ ifp->if_rxpoll_pmin = ptot;
+ }
+ if (ptot > ifp->if_rxpoll_pmax) {
+ ifp->if_rxpoll_pmax = ptot;
+ }
/* Calculate EWMA of inbound packets */
- DLIL_EWMA(inp->rxpoll_pavg, ptot, if_rxpoll_decay);
+ DLIL_EWMA(ifp->if_rxpoll_pavg, ptot, if_rxpoll_decay);
/* Reset sampling statistics */
- PKTCNTR_CLEAR(&inp->sstats);
+ PKTCNTR_CLEAR(&ifp->if_poll_sstats);
/* Calculate EWMA of wakeup requests */
- DLIL_EWMA(inp->rxpoll_wavg, inp->wtot, if_rxpoll_decay);
+ DLIL_EWMA(ifp->if_rxpoll_wavg, inp->wtot, if_rxpoll_decay);
inp->wtot = 0;
if (dlil_verbose) {
- if (!net_timerisset(&inp->dbg_lasttime))
- *(&inp->dbg_lasttime) = *(&now);
- net_timersub(&now, &inp->dbg_lasttime, &delta);
+ if (!net_timerisset(&ifp->if_poll_dbg_lasttime)) {
+ *(&ifp->if_poll_dbg_lasttime) = *(&now);
+ }
+ net_timersub(&now, &ifp->if_poll_dbg_lasttime, &delta);
if (net_timercmp(&delta, &dlil_dbgrate, >=)) {
- *(&inp->dbg_lasttime) = *(&now);
- printf("%s: [%s] pkts avg %d max %d "
+ *(&ifp->if_poll_dbg_lasttime) = *(&now);
+ DLIL_PRINTF("%s: [%s] pkts avg %d max %d "
"limits [%d/%d], wreq avg %d "
"limits [%d/%d], bytes avg %d "
"limits [%d/%d]\n", if_name(ifp),
- (inp->mode ==
+ (ifp->if_poll_mode ==
IFNET_MODEL_INPUT_POLL_ON) ?
- "ON" : "OFF", inp->rxpoll_pavg,
- inp->rxpoll_pmax,
- inp->rxpoll_plowat,
- inp->rxpoll_phiwat,
- inp->rxpoll_wavg,
- inp->rxpoll_wlowat,
- inp->rxpoll_whiwat,
- inp->rxpoll_bavg,
- inp->rxpoll_blowat,
- inp->rxpoll_bhiwat);
+ "ON" : "OFF", ifp->if_rxpoll_pavg,
+ ifp->if_rxpoll_pmax,
+ ifp->if_rxpoll_plowat,
+ ifp->if_rxpoll_phiwat,
+ ifp->if_rxpoll_wavg,
+ ifp->if_rxpoll_wlowat,
+ ifp->if_rxpoll_whiwat,
+ ifp->if_rxpoll_bavg,
+ ifp->if_rxpoll_blowat,
+ ifp->if_rxpoll_bhiwat);
}
}
/* Perform mode transition, if necessary */
- if (!net_timerisset(&inp->mode_lasttime))
- *(&inp->mode_lasttime) = *(&now);
+ if (!net_timerisset(&ifp->if_poll_mode_lasttime)) {
+ *(&ifp->if_poll_mode_lasttime) = *(&now);
+ }
- net_timersub(&now, &inp->mode_lasttime, &delta);
- if (net_timercmp(&delta, &inp->mode_holdtime, <))
+ net_timersub(&now, &ifp->if_poll_mode_lasttime, &delta);
+ if (net_timercmp(&delta, &ifp->if_poll_mode_holdtime, <)) {
goto skip;
+ }
- if (inp->rxpoll_pavg <= inp->rxpoll_plowat &&
- inp->rxpoll_bavg <= inp->rxpoll_blowat &&
- inp->mode != IFNET_MODEL_INPUT_POLL_OFF) {
+ if (ifp->if_rxpoll_pavg <= ifp->if_rxpoll_plowat &&
+ ifp->if_rxpoll_bavg <= ifp->if_rxpoll_blowat &&
+ ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_OFF) {
mode = IFNET_MODEL_INPUT_POLL_OFF;
- } else if (inp->rxpoll_pavg >= inp->rxpoll_phiwat &&
- (inp->rxpoll_bavg >= inp->rxpoll_bhiwat ||
- inp->rxpoll_wavg >= inp->rxpoll_whiwat) &&
- inp->mode != IFNET_MODEL_INPUT_POLL_ON) {
+ } else if (ifp->if_rxpoll_pavg >= ifp->if_rxpoll_phiwat &&
+ (ifp->if_rxpoll_bavg >= ifp->if_rxpoll_bhiwat ||
+ ifp->if_rxpoll_wavg >= ifp->if_rxpoll_whiwat) &&
+ ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_ON) {
mode = IFNET_MODEL_INPUT_POLL_ON;
}
- if (mode != inp->mode) {
- inp->mode = mode;
- *(&inp->mode_lasttime) = *(&now);
+ if (mode != ifp->if_poll_mode) {
+ ifp->if_poll_mode = mode;
+ *(&ifp->if_poll_mode_lasttime) = *(&now);
poll_req++;
}
}
skip:
- dlil_input_stats_sync(ifp, inp);
+ notify = dlil_input_stats_sync(ifp, inp);
lck_mtx_unlock(&inp->input_lck);
+ if (notify) {
+ ifnet_notify_data_threshold(ifp);
+ }
+
/*
* If there's a mode change and interface is still attached,
* perform a downcall to the driver for the new mode. Also
* being detached (will be release below.)
*/
if (poll_req != 0 && ifnet_is_attached(ifp, 1)) {
- struct ifnet_model_params p = { mode, { 0 } };
+ struct ifnet_model_params p = {
+ .model = mode, .reserved = { 0 }
+ };
errno_t err;
if (dlil_verbose) {
- printf("%s: polling is now %s, "
+ DLIL_PRINTF("%s: polling is now %s, "
"pkts avg %d max %d limits [%d/%d], "
"wreq avg %d limits [%d/%d], "
"bytes avg %d limits [%d/%d]\n",
if_name(ifp),
(mode == IFNET_MODEL_INPUT_POLL_ON) ?
- "ON" : "OFF", inp->rxpoll_pavg,
- inp->rxpoll_pmax, inp->rxpoll_plowat,
- inp->rxpoll_phiwat, inp->rxpoll_wavg,
- inp->rxpoll_wlowat, inp->rxpoll_whiwat,
- inp->rxpoll_bavg, inp->rxpoll_blowat,
- inp->rxpoll_bhiwat);
+ "ON" : "OFF", ifp->if_rxpoll_pavg,
+ ifp->if_rxpoll_pmax, ifp->if_rxpoll_plowat,
+ ifp->if_rxpoll_phiwat, ifp->if_rxpoll_wavg,
+ ifp->if_rxpoll_wlowat, ifp->if_rxpoll_whiwat,
+ ifp->if_rxpoll_bavg, ifp->if_rxpoll_blowat,
+ ifp->if_rxpoll_bhiwat);
}
if ((err = ((*ifp->if_input_ctl)(ifp,
- IFNET_CTL_SET_INPUT_MODEL, sizeof (p), &p))) != 0) {
- printf("%s: error setting polling mode "
+ IFNET_CTL_SET_INPUT_MODEL, sizeof(p), &p))) != 0) {
+ DLIL_PRINTF("%s: error setting polling mode "
"to %s (%d)\n", if_name(ifp),
(mode == IFNET_MODEL_INPUT_POLL_ON) ?
"ON" : "OFF", err);
switch (mode) {
case IFNET_MODEL_INPUT_POLL_OFF:
ifnet_set_poll_cycle(ifp, NULL);
- inp->rxpoll_offreq++;
- if (err != 0)
- inp->rxpoll_offerr++;
+ ifp->if_rxpoll_offreq++;
+ if (err != 0) {
+ ifp->if_rxpoll_offerr++;
+ }
break;
case IFNET_MODEL_INPUT_POLL_ON:
net_nsectimer(&ival, &ts);
ifnet_set_poll_cycle(ifp, &ts);
ifnet_poll(ifp);
- inp->rxpoll_onreq++;
- if (err != 0)
- inp->rxpoll_onerr++;
+ ifp->if_rxpoll_onreq++;
+ if (err != 0) {
+ ifp->if_rxpoll_onerr++;
+ }
break;
default:
}
/*
- * NOTE warning %%% attention !!!!
- * We should think about putting some thread starvation
- * safeguards if we deal with long chains of packets.
- */
- if (m != NULL)
+ * NOTE warning %%% attention !!!!
+ * We should think about putting some thread starvation
+ * safeguards if we deal with long chains of packets.
+ */
+ if (m != NULL) {
dlil_input_packet_list_extended(NULL, m, m_cnt, mode);
+ }
+
+ lck_mtx_lock_spin(&inp->input_lck);
+ VERIFY(inp->input_waiting & DLIL_INPUT_RUNNING);
+ if (!(inp->input_waiting & ~(DLIL_INPUT_RUNNING |
+ DLIL_INPUT_TERMINATE))) {
+ break;
+ }
+ }
+
+ inp->input_waiting &= ~DLIL_INPUT_RUNNING;
+
+ if (__improbable(inp->input_waiting & DLIL_INPUT_TERMINATE)) {
+terminate:
+ lck_mtx_unlock(&inp->input_lck);
+ dlil_terminate_input_thread(inp);
+ /* NOTREACHED */
+ } else {
+ (void) assert_wait(&inp->input_waiting, THREAD_UNINT);
+ lck_mtx_unlock(&inp->input_lck);
+ (void) thread_block_parameter(dlil_rxpoll_input_thread_cont,
+ inp);
+ /* NOTREACHED */
}
+ VERIFY(0); /* we should never get here */
/* NOTREACHED */
- VERIFY(0); /* we should never get here */
+ __builtin_unreachable();
}
-/*
- * Must be called on an attached ifnet (caller is expected to check.)
- * Caller may pass NULL for poll parameters to indicate "auto-tuning."
- */
errno_t
-dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
- boolean_t locked)
+dlil_rxpoll_validate_params(struct ifnet_poll_params *p)
{
- struct dlil_threading_info *inp;
- u_int64_t sample_holdtime, inbw;
-
- VERIFY(ifp != NULL);
- if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL)
- return (ENXIO);
-
if (p != NULL) {
if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
- (p->packets_lowat != 0 && p->packets_hiwat == 0))
- return (EINVAL);
- if (p->packets_lowat != 0 && /* hiwat must be non-zero */
- p->packets_lowat >= p->packets_hiwat)
- return (EINVAL);
+ (p->packets_lowat != 0 && p->packets_hiwat == 0)) {
+ return EINVAL;
+ }
+ if (p->packets_lowat != 0 && /* hiwat must be non-zero */
+ p->packets_lowat >= p->packets_hiwat) {
+ return EINVAL;
+ }
if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
- (p->bytes_lowat != 0 && p->bytes_hiwat == 0))
- return (EINVAL);
- if (p->bytes_lowat != 0 && /* hiwat must be non-zero */
- p->bytes_lowat >= p->bytes_hiwat)
- return (EINVAL);
+ (p->bytes_lowat != 0 && p->bytes_hiwat == 0)) {
+ return EINVAL;
+ }
+ if (p->bytes_lowat != 0 && /* hiwat must be non-zero */
+ p->bytes_lowat >= p->bytes_hiwat) {
+ return EINVAL;
+ }
if (p->interval_time != 0 &&
- p->interval_time < IF_RXPOLL_INTERVALTIME_MIN)
+ p->interval_time < IF_RXPOLL_INTERVALTIME_MIN) {
p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
+ }
}
+ return 0;
+}
- if (!locked)
- lck_mtx_lock(&inp->input_lck);
-
- lck_mtx_assert(&inp->input_lck, LCK_MTX_ASSERT_OWNED);
-
- /*
- * Normally, we'd reset the parameters to the auto-tuned values
- * if the the input thread detects a change in link rate. If the
- * driver provides its own parameters right after a link rate
- * changes, but before the input thread gets to run, we want to
- * make sure to keep the driver's values. Clearing if_poll_update
- * will achieve that.
- */
- if (p != NULL && !locked && ifp->if_poll_update != 0)
- ifp->if_poll_update = 0;
+void
+dlil_rxpoll_update_params(struct ifnet *ifp, struct ifnet_poll_params *p)
+{
+ u_int64_t sample_holdtime, inbw;
if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
- sample_holdtime = 0; /* polling is disabled */
- inp->rxpoll_wlowat = inp->rxpoll_plowat =
- inp->rxpoll_blowat = 0;
- inp->rxpoll_whiwat = inp->rxpoll_phiwat =
- inp->rxpoll_bhiwat = (u_int32_t)-1;
- inp->rxpoll_plim = 0;
- inp->rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
+ sample_holdtime = 0; /* polling is disabled */
+ ifp->if_rxpoll_wlowat = ifp->if_rxpoll_plowat =
+ ifp->if_rxpoll_blowat = 0;
+ ifp->if_rxpoll_whiwat = ifp->if_rxpoll_phiwat =
+ ifp->if_rxpoll_bhiwat = (u_int32_t)-1;
+ ifp->if_rxpoll_plim = 0;
+ ifp->if_rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
} else {
u_int32_t plowat, phiwat, blowat, bhiwat, plim;
u_int64_t ival;
unsigned int n, i;
for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
- if (inbw < rxpoll_tbl[i].speed)
+ if (inbw < rxpoll_tbl[i].speed) {
break;
+ }
n = i;
}
/* auto-tune if caller didn't specify a value */
VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
sample_holdtime = if_rxpoll_sample_holdtime;
- inp->rxpoll_wlowat = if_rxpoll_wlowat;
- inp->rxpoll_whiwat = if_rxpoll_whiwat;
- inp->rxpoll_plowat = plowat;
- inp->rxpoll_phiwat = phiwat;
- inp->rxpoll_blowat = blowat;
- inp->rxpoll_bhiwat = bhiwat;
- inp->rxpoll_plim = plim;
- inp->rxpoll_ival = ival;
+ ifp->if_rxpoll_wlowat = if_sysctl_rxpoll_wlowat;
+ ifp->if_rxpoll_whiwat = if_sysctl_rxpoll_whiwat;
+ ifp->if_rxpoll_plowat = plowat;
+ ifp->if_rxpoll_phiwat = phiwat;
+ ifp->if_rxpoll_blowat = blowat;
+ ifp->if_rxpoll_bhiwat = bhiwat;
+ ifp->if_rxpoll_plim = plim;
+ ifp->if_rxpoll_ival = ival;
}
- net_nsectimer(&if_rxpoll_mode_holdtime, &inp->mode_holdtime);
- net_nsectimer(&sample_holdtime, &inp->sample_holdtime);
+ net_nsectimer(&if_rxpoll_mode_holdtime, &ifp->if_poll_mode_holdtime);
+ net_nsectimer(&sample_holdtime, &ifp->if_poll_sample_holdtime);
if (dlil_verbose) {
- printf("%s: speed %llu bps, sample per %llu nsec, "
+ DLIL_PRINTF("%s: speed %llu bps, sample per %llu nsec, "
"poll interval %llu nsec, pkts per poll %u, "
"pkt limits [%u/%u], wreq limits [%u/%u], "
"bytes limits [%u/%u]\n", if_name(ifp),
- inbw, sample_holdtime, inp->rxpoll_ival, inp->rxpoll_plim,
- inp->rxpoll_plowat, inp->rxpoll_phiwat, inp->rxpoll_wlowat,
- inp->rxpoll_whiwat, inp->rxpoll_blowat, inp->rxpoll_bhiwat);
+ inbw, sample_holdtime, ifp->if_rxpoll_ival,
+ ifp->if_rxpoll_plim, ifp->if_rxpoll_plowat,
+ ifp->if_rxpoll_phiwat, ifp->if_rxpoll_wlowat,
+ ifp->if_rxpoll_whiwat, ifp->if_rxpoll_blowat,
+ ifp->if_rxpoll_bhiwat);
}
+}
- if (!locked)
- lck_mtx_unlock(&inp->input_lck);
+/*
+ * Must be called on an attached ifnet (caller is expected to check.)
+ * Caller may pass NULL for poll parameters to indicate "auto-tuning."
+ */
+errno_t
+dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
+ boolean_t locked)
+{
+ errno_t err;
+ struct dlil_threading_info *inp;
+
+ VERIFY(ifp != NULL);
+ if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
+ return ENXIO;
+ }
+ err = dlil_rxpoll_validate_params(p);
+ if (err != 0) {
+ return err;
+ }
- return (0);
+ if (!locked) {
+ lck_mtx_lock(&inp->input_lck);
+ }
+ LCK_MTX_ASSERT(&inp->input_lck, LCK_MTX_ASSERT_OWNED);
+ /*
+ * Normally, we'd reset the parameters to the auto-tuned values
+ * if the the input thread detects a change in link rate. If the
+ * driver provides its own parameters right after a link rate
+ * changes, but before the input thread gets to run, we want to
+ * make sure to keep the driver's values. Clearing if_poll_update
+ * will achieve that.
+ */
+ if (p != NULL && !locked && ifp->if_poll_update != 0) {
+ ifp->if_poll_update = 0;
+ }
+ dlil_rxpoll_update_params(ifp, p);
+ if (!locked) {
+ lck_mtx_unlock(&inp->input_lck);
+ }
+ return 0;
}
/*
struct dlil_threading_info *inp;
VERIFY(ifp != NULL && p != NULL);
- if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL)
- return (ENXIO);
+ if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
+ return ENXIO;
+ }
- bzero(p, sizeof (*p));
+ bzero(p, sizeof(*p));
lck_mtx_lock(&inp->input_lck);
- p->packets_limit = inp->rxpoll_plim;
- p->packets_lowat = inp->rxpoll_plowat;
- p->packets_hiwat = inp->rxpoll_phiwat;
- p->bytes_lowat = inp->rxpoll_blowat;
- p->bytes_hiwat = inp->rxpoll_bhiwat;
- p->interval_time = inp->rxpoll_ival;
+ p->packets_limit = ifp->if_rxpoll_plim;
+ p->packets_lowat = ifp->if_rxpoll_plowat;
+ p->packets_hiwat = ifp->if_rxpoll_phiwat;
+ p->bytes_lowat = ifp->if_rxpoll_blowat;
+ p->bytes_hiwat = ifp->if_rxpoll_bhiwat;
+ p->interval_time = ifp->if_rxpoll_ival;
lck_mtx_unlock(&inp->input_lck);
- return (0);
+ return 0;
}
errno_t
ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
const struct ifnet_stat_increment_param *s)
{
- return (ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE));
+ return ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE);
}
errno_t
ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
{
- return (ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE));
+ return ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE);
+}
+
+errno_t
+ifnet_input_poll(struct ifnet *ifp, struct mbuf *m_head,
+ struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
+{
+ return ifnet_input_common(ifp, m_head, m_tail, s,
+ (m_head != NULL), TRUE);
}
static errno_t
ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
{
- struct thread *tp = current_thread();
- struct mbuf *last;
- struct dlil_threading_info *inp;
+ dlil_input_func input_func;
+ struct ifnet_stat_increment_param _s;
u_int32_t m_cnt = 0, m_size = 0;
+ struct mbuf *last;
+ errno_t err = 0;
if ((m_head == NULL && !poll) || (s == NULL && ext)) {
- if (m_head != NULL)
+ if (m_head != NULL) {
mbuf_freem_list(m_head);
- return (EINVAL);
+ }
+ return EINVAL;
}
VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
* interface is no longer attached; else hold an IO refcnt to
* prevent it from being detached (will be released below.)
*/
- if (ifp == NULL || (ifp != lo_ifp && !ifnet_is_attached(ifp, 1))) {
- if (m_head != NULL)
+ if (ifp == NULL || (ifp != lo_ifp && !ifnet_datamov_begin(ifp))) {
+ if (m_head != NULL) {
mbuf_freem_list(m_head);
- return (EINVAL);
+ }
+ return EINVAL;
}
+ input_func = ifp->if_input_dlil;
+ VERIFY(input_func != NULL);
+
if (m_tail == NULL) {
last = m_head;
while (m_head != NULL) {
#if IFNET_INPUT_SANITY_CHK
- if (dlil_input_sanity_check != 0)
+ if (dlil_input_sanity_check != 0) {
DLIL_INPUT_CHECK(last, ifp);
+ }
#endif /* IFNET_INPUT_SANITY_CHK */
m_cnt++;
m_size += m_length(last);
- if (mbuf_nextpkt(last) == NULL)
+ if (mbuf_nextpkt(last) == NULL) {
break;
+ }
last = mbuf_nextpkt(last);
}
m_tail = last;
DLIL_INPUT_CHECK(last, ifp);
m_cnt++;
m_size += m_length(last);
- if (mbuf_nextpkt(last) == NULL)
+ if (mbuf_nextpkt(last) == NULL) {
break;
+ }
last = mbuf_nextpkt(last);
}
} else {
s->packets_in, m_cnt);
}
- if ((inp = ifp->if_inp) == NULL)
+ if (s == NULL) {
+ bzero(&_s, sizeof(_s));
+ s = &_s;
+ } else {
+ _s = *s;
+ }
+ _s.packets_in = m_cnt;
+ _s.bytes_in = m_size;
+
+ err = (*input_func)(ifp, m_head, m_tail, s, poll, current_thread());
+
+ if (ifp != lo_ifp) {
+ /* Release the IO refcnt */
+ ifnet_datamov_end(ifp);
+ }
+
+ return err;
+}
+
+
+errno_t
+dlil_output_handler(struct ifnet *ifp, struct mbuf *m)
+{
+ return ifp->if_output(ifp, m);
+}
+
+errno_t
+dlil_input_handler(struct ifnet *ifp, struct mbuf *m_head,
+ struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
+ boolean_t poll, struct thread *tp)
+{
+ struct dlil_threading_info *inp;
+ u_int32_t m_cnt = s->packets_in;
+ u_int32_t m_size = s->bytes_in;
+ boolean_t notify = FALSE;
+
+ if ((inp = ifp->if_inp) == NULL) {
inp = dlil_main_input_thread;
+ }
/*
* If there is a matching DLIL input thread associated with an
* will only do this once.
*/
lck_mtx_lock_spin(&inp->input_lck);
- if (inp != dlil_main_input_thread && inp->net_affinity &&
+ if (inp != dlil_main_input_thread && inp->net_affinity && tp != NULL &&
((!poll && inp->wloop_thr == THREAD_NULL) ||
(poll && inp->poll_thr == THREAD_NULL))) {
u_int32_t tag = inp->tag;
/*
* Take a reference on the current thread; during detach,
- * we will need to refer to it in order ot tear down its
+ * we will need to refer to it in order to tear down its
* affinity.
*/
thread_reference(tp);
VERIFY(m_head != NULL || (m_tail == NULL && m_cnt == 0));
- /*
+ /*
* Because of loopbacked multicast we cannot stuff the ifp in
* the rcvif of the packet header: loopback (lo0) packets use a
* dedicated list so that we can later associate them with lo_ifp
* dedicated input threads go to the regular list.
*/
if (m_head != NULL) {
+ classq_pkt_t head, tail;
+ CLASSQ_PKT_INIT_MBUF(&head, m_head);
+ CLASSQ_PKT_INIT_MBUF(&tail, m_tail);
if (inp == dlil_main_input_thread && ifp == lo_ifp) {
struct dlil_main_threading_info *inpm =
(struct dlil_main_threading_info *)inp;
- _addq_multi(&inpm->lo_rcvq_pkts, m_head, m_tail,
+ _addq_multi(&inpm->lo_rcvq_pkts, &head, &tail,
m_cnt, m_size);
} else {
- _addq_multi(&inp->rcvq_pkts, m_head, m_tail,
+ _addq_multi(&inp->rcvq_pkts, &head, &tail,
m_cnt, m_size);
}
}
u_int32_t count;
struct mbuf *m0;
- for (m0 = m_head, count = 0; m0; m0 = mbuf_nextpkt(m0))
+ for (m0 = m_head, count = 0; m0; m0 = mbuf_nextpkt(m0)) {
count++;
+ }
if (count != m_cnt) {
panic_plain("%s: invalid packet count %d "
}
#endif /* IFNET_INPUT_SANITY_CHK */
- if (s != NULL) {
- dlil_input_stats_add(s, inp, poll);
- /*
- * If we're using the main input thread, synchronize the
- * stats now since we have the interface context. All
- * other cases involving dedicated input threads will
- * have their stats synchronized there.
- */
- if (inp == dlil_main_input_thread)
- dlil_input_stats_sync(ifp, inp);
+ dlil_input_stats_add(s, inp, ifp, poll);
+ /*
+ * If we're using the main input thread, synchronize the
+ * stats now since we have the interface context. All
+ * other cases involving dedicated input threads will
+ * have their stats synchronized there.
+ */
+ if (inp == dlil_main_input_thread) {
+ notify = dlil_input_stats_sync(ifp, inp);
}
inp->input_waiting |= DLIL_INPUT_WAITING;
}
lck_mtx_unlock(&inp->input_lck);
- if (ifp != lo_ifp) {
- /* Release the IO refcnt */
- ifnet_decr_iorefcnt(ifp);
+ if (notify) {
+ ifnet_notify_data_threshold(ifp);
}
- return (0);
+ return 0;
}
+
static void
-ifnet_start_common(struct ifnet *ifp, int resetfc)
+ifnet_start_common(struct ifnet *ifp, boolean_t resetfc)
{
- if (!(ifp->if_eflags & IFEF_TXSTART))
+ if (!(ifp->if_eflags & IFEF_TXSTART)) {
return;
+ }
/*
* If the starter thread is inactive, signal it to do work,
* unless the interface is being flow controlled from below,
* e.g. a virtual interface being flow controlled by a real
- * network interface beneath it.
+ * network interface beneath it, or it's been disabled via
+ * a call to ifnet_disable_output().
*/
lck_mtx_lock_spin(&ifp->if_start_lock);
if (resetfc) {
return;
}
ifp->if_start_req++;
- if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL) {
- wakeup_one((caddr_t)&ifp->if_start_thread);
+ if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
+ (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
+ IFCQ_LEN(&ifp->if_snd) >= ifp->if_start_delay_qlen ||
+ ifp->if_start_delayed == 0)) {
+ (void) thread_wakeup_thread((caddr_t)&ifp->if_start_thread,
+ ifp->if_start_thread);
}
lck_mtx_unlock(&ifp->if_start_lock);
}
void
ifnet_start(struct ifnet *ifp)
{
- ifnet_start_common(ifp, 0);
+ ifnet_start_common(ifp, FALSE);
}
+__attribute__((noreturn))
static void
-ifnet_start_thread_fn(void *v, wait_result_t w)
+ifnet_start_thread_func(void *v, wait_result_t w)
{
#pragma unused(w)
struct ifnet *ifp = v;
- char ifname[IFNAMSIZ + 1];
- struct timespec *ts = NULL;
- struct ifclassq *ifq = &ifp->if_snd;
+ char thread_name[MAXTHREADNAMESIZE];
+
+ /* Construct the name for this thread, and then apply it. */
+ bzero(thread_name, sizeof(thread_name));
+ (void) snprintf(thread_name, sizeof(thread_name),
+ "ifnet_start_%s", ifp->if_xname);
+ ASSERT(ifp->if_start_thread == current_thread());
+ thread_set_thread_name(current_thread(), thread_name);
/*
* Treat the dedicated starter thread for lo0 as equivalent to
lck_mtx_unlock(&inp->input_lck);
}
}
+ ifnet_decr_pending_thread_count(ifp);
- snprintf(ifname, sizeof (ifname), "%s_starter",
- if_name(ifp));
-
- lck_mtx_lock_spin(&ifp->if_start_lock);
+ lck_mtx_lock(&ifp->if_start_lock);
+ VERIFY(!ifp->if_start_active);
+ (void) assert_wait(&ifp->if_start_thread, THREAD_UNINT);
+ lck_mtx_unlock(&ifp->if_start_lock);
+ (void) thread_block_parameter(ifnet_start_thread_cont, ifp);
+ /* NOTREACHED */
+ __builtin_unreachable();
+}
- for (;;) {
- (void) msleep(&ifp->if_start_thread, &ifp->if_start_lock,
- (PZERO - 1) | PSPIN, ifname, ts);
+__attribute__((noreturn))
+static void
+ifnet_start_thread_cont(void *v, wait_result_t wres)
+{
+ struct ifnet *ifp = v;
+ struct ifclassq *ifq = &ifp->if_snd;
- /* interface is detached? */
- if (ifp->if_start_thread == THREAD_NULL) {
- ifnet_set_start_cycle(ifp, NULL);
- lck_mtx_unlock(&ifp->if_start_lock);
- ifnet_purge(ifp);
+ lck_mtx_lock(&ifp->if_start_lock);
+ if (__improbable(wres == THREAD_INTERRUPTED ||
+ ifp->if_start_thread == THREAD_NULL)) {
+ goto terminate;
+ }
- if (dlil_verbose) {
- printf("%s: starter thread terminated\n",
- if_name(ifp));
- }
+ ifp->if_start_active = 1;
- /* for the extra refcnt from kernel_thread_start() */
- thread_deallocate(current_thread());
- /* this is the end */
- thread_terminate(current_thread());
- /* NOTREACHED */
- return;
+ /*
+ * Keep on servicing until no more request.
+ */
+ for (;;) {
+ u_int32_t req = ifp->if_start_req;
+ if (!IFCQ_IS_EMPTY(ifq) &&
+ (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
+ ifp->if_start_delayed == 0 &&
+ IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
+ (ifp->if_eflags & IFEF_DELAY_START)) {
+ ifp->if_start_delayed = 1;
+ ifnet_start_delayed++;
+ break;
+ } else {
+ ifp->if_start_delayed = 0;
}
+ lck_mtx_unlock(&ifp->if_start_lock);
- ifp->if_start_active = 1;
- for (;;) {
- u_int32_t req = ifp->if_start_req;
-
- lck_mtx_unlock(&ifp->if_start_lock);
- /* invoke the driver's start routine */
- ((*ifp->if_start)(ifp));
+ /*
+ * If no longer attached, don't call start because ifp
+ * is being destroyed; else hold an IO refcnt to
+ * prevent the interface from being detached (will be
+ * released below.)
+ */
+ if (!ifnet_datamov_begin(ifp)) {
lck_mtx_lock_spin(&ifp->if_start_lock);
-
- /* if there's no pending request, we're done */
- if (req == ifp->if_start_req)
- break;
+ break;
}
- ifp->if_start_req = 0;
- ifp->if_start_active = 0;
+
+ /* invoke the driver's start routine */
+ ((*ifp->if_start)(ifp));
+
/*
- * Wakeup N ns from now if rate-controlled by TBR, and if
- * there are still packets in the send queue which haven't
- * been dequeued so far; else sleep indefinitely (ts = NULL)
- * until ifnet_start() is called again.
+ * Release the io ref count taken above.
+ */
+ ifnet_datamov_end(ifp);
+
+ lck_mtx_lock_spin(&ifp->if_start_lock);
+
+ /*
+ * If there's no pending request or if the
+ * interface has been disabled, we're done.
+ */
+ if (req == ifp->if_start_req ||
+ (ifp->if_start_flags & IFSF_FLOW_CONTROLLED)) {
+ break;
+ }
+ }
+
+ ifp->if_start_req = 0;
+ ifp->if_start_active = 0;
+
+
+ if (__probable(ifp->if_start_thread != THREAD_NULL)) {
+ uint64_t deadline = TIMEOUT_WAIT_FOREVER;
+ struct timespec delay_start_ts;
+ struct timespec *ts;
+
+ /*
+ * Wakeup N ns from now if rate-controlled by TBR, and if
+ * there are still packets in the send queue which haven't
+ * been dequeued so far; else sleep indefinitely (ts = NULL)
+ * until ifnet_start() is called again.
*/
ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
&ifp->if_start_cycle : NULL);
- if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0)
+ if (ts == NULL && ifp->if_start_delayed == 1) {
+ delay_start_ts.tv_sec = 0;
+ delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
+ ts = &delay_start_ts;
+ }
+
+ if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0) {
ts = NULL;
+ }
+
+ if (__improbable(ts != NULL)) {
+ clock_interval_to_deadline((ts->tv_nsec +
+ (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
+ }
+
+ (void) assert_wait_deadline(&ifp->if_start_thread,
+ THREAD_UNINT, deadline);
+ lck_mtx_unlock(&ifp->if_start_lock);
+ (void) thread_block_parameter(ifnet_start_thread_cont, ifp);
+ /* NOTREACHED */
+ } else {
+terminate:
+ /* interface is detached? */
+ ifnet_set_start_cycle(ifp, NULL);
+ lck_mtx_unlock(&ifp->if_start_lock);
+ ifnet_purge(ifp);
+
+ if (dlil_verbose) {
+ DLIL_PRINTF("%s: starter thread terminated\n",
+ if_name(ifp));
+ }
+
+ /* for the extra refcnt from kernel_thread_start() */
+ thread_deallocate(current_thread());
+ /* this is the end */
+ thread_terminate(current_thread());
+ /* NOTREACHED */
}
+ /* must never get here */
+ VERIFY(0);
/* NOTREACHED */
+ __builtin_unreachable();
}
void
ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
{
- if (ts == NULL)
- bzero(&ifp->if_start_cycle, sizeof (ifp->if_start_cycle));
- else
+ if (ts == NULL) {
+ bzero(&ifp->if_start_cycle, sizeof(ifp->if_start_cycle));
+ } else {
*(&ifp->if_start_cycle) = *ts;
+ }
- if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
- printf("%s: restart interval set to %lu nsec\n",
+ if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
+ DLIL_PRINTF("%s: restart interval set to %lu nsec\n",
if_name(ifp), ts->tv_nsec);
+ }
}
-static void
+void
ifnet_poll(struct ifnet *ifp)
{
/*
*/
lck_mtx_lock_spin(&ifp->if_poll_lock);
ifp->if_poll_req++;
- if (!ifp->if_poll_active && ifp->if_poll_thread != THREAD_NULL) {
+ if (!(ifp->if_poll_flags & IF_POLLF_RUNNING) &&
+ ifp->if_poll_thread != THREAD_NULL) {
wakeup_one((caddr_t)&ifp->if_poll_thread);
}
lck_mtx_unlock(&ifp->if_poll_lock);
}
+__attribute__((noreturn))
static void
-ifnet_poll_thread_fn(void *v, wait_result_t w)
+ifnet_poll_thread_func(void *v, wait_result_t w)
{
#pragma unused(w)
+ char thread_name[MAXTHREADNAMESIZE];
+ struct ifnet *ifp = v;
+
+ VERIFY(ifp->if_eflags & IFEF_RXPOLL);
+ VERIFY(current_thread() == ifp->if_poll_thread);
+
+ /* construct the name for this thread, and then apply it */
+ bzero(thread_name, sizeof(thread_name));
+ (void) snprintf(thread_name, sizeof(thread_name),
+ "ifnet_poller_%s", ifp->if_xname);
+ thread_set_thread_name(ifp->if_poll_thread, thread_name);
+ ifnet_decr_pending_thread_count(ifp);
+
+ lck_mtx_lock(&ifp->if_poll_lock);
+ (void) assert_wait(&ifp->if_poll_thread, THREAD_UNINT);
+ lck_mtx_unlock(&ifp->if_poll_lock);
+ (void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
+ /* NOTREACHED */
+ __builtin_unreachable();
+}
+
+__attribute__((noreturn))
+static void
+ifnet_poll_thread_cont(void *v, wait_result_t wres)
+{
struct dlil_threading_info *inp;
struct ifnet *ifp = v;
- char ifname[IFNAMSIZ + 1];
- struct timespec *ts = NULL;
struct ifnet_stat_increment_param s;
+ struct timespec start_time;
- snprintf(ifname, sizeof (ifname), "%s_poller",
- if_name(ifp));
- bzero(&s, sizeof (s));
+ VERIFY(ifp->if_eflags & IFEF_RXPOLL);
+
+ bzero(&s, sizeof(s));
+ net_timerclear(&start_time);
lck_mtx_lock_spin(&ifp->if_poll_lock);
+ if (__improbable(wres == THREAD_INTERRUPTED ||
+ ifp->if_poll_thread == THREAD_NULL)) {
+ goto terminate;
+ }
inp = ifp->if_inp;
VERIFY(inp != NULL);
+ ifp->if_poll_flags |= IF_POLLF_RUNNING;
+
+ /*
+ * Keep on servicing until no more request.
+ */
for (;;) {
- if (ifp->if_poll_thread != THREAD_NULL) {
- (void) msleep(&ifp->if_poll_thread, &ifp->if_poll_lock,
- (PZERO - 1) | PSPIN, ifname, ts);
- }
+ struct mbuf *m_head, *m_tail;
+ u_int32_t m_lim, m_cnt, m_totlen;
+ u_int16_t req = ifp->if_poll_req;
- /* interface is detached (maybe while asleep)? */
- if (ifp->if_poll_thread == THREAD_NULL) {
- ifnet_set_poll_cycle(ifp, NULL);
- lck_mtx_unlock(&ifp->if_poll_lock);
+ m_lim = (ifp->if_rxpoll_plim != 0) ? ifp->if_rxpoll_plim :
+ MAX((qlimit(&inp->rcvq_pkts)), (ifp->if_rxpoll_phiwat << 2));
+ lck_mtx_unlock(&ifp->if_poll_lock);
- if (dlil_verbose) {
- printf("%s: poller thread terminated\n",
- if_name(ifp));
- }
+ /*
+ * If no longer attached, there's nothing to do;
+ * else hold an IO refcnt to prevent the interface
+ * from being detached (will be released below.)
+ */
+ if (!ifnet_is_attached(ifp, 1)) {
+ lck_mtx_lock_spin(&ifp->if_poll_lock);
+ break;
+ }
- /* for the extra refcnt from kernel_thread_start() */
- thread_deallocate(current_thread());
- /* this is the end */
- thread_terminate(current_thread());
- /* NOTREACHED */
- return;
+ if (dlil_verbose > 1) {
+ DLIL_PRINTF("%s: polling up to %d pkts, "
+ "pkts avg %d max %d, wreq avg %d, "
+ "bytes avg %d\n",
+ if_name(ifp), m_lim,
+ ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
+ ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
}
- ifp->if_poll_active = 1;
- for (;;) {
- struct mbuf *m_head, *m_tail;
- u_int32_t m_lim, m_cnt, m_totlen;
- u_int16_t req = ifp->if_poll_req;
+ /* invoke the driver's input poll routine */
+ ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
+ &m_cnt, &m_totlen));
- lck_mtx_unlock(&ifp->if_poll_lock);
+ if (m_head != NULL) {
+ VERIFY(m_tail != NULL && m_cnt > 0);
- /*
- * If no longer attached, there's nothing to do;
- * else hold an IO refcnt to prevent the interface
- * from being detached (will be released below.)
- */
- if (!ifnet_is_attached(ifp, 1)) {
- lck_mtx_lock_spin(&ifp->if_poll_lock);
- break;
+ if (dlil_verbose > 1) {
+ DLIL_PRINTF("%s: polled %d pkts, "
+ "pkts avg %d max %d, wreq avg %d, "
+ "bytes avg %d\n",
+ if_name(ifp), m_cnt,
+ ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
+ ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
}
- m_lim = (inp->rxpoll_plim != 0) ? inp->rxpoll_plim :
- MAX((qlimit(&inp->rcvq_pkts)),
- (inp->rxpoll_phiwat << 2));
+ /* stats are required for extended variant */
+ s.packets_in = m_cnt;
+ s.bytes_in = m_totlen;
+ (void) ifnet_input_common(ifp, m_head, m_tail,
+ &s, TRUE, TRUE);
+ } else {
if (dlil_verbose > 1) {
- printf("%s: polling up to %d pkts, "
+ DLIL_PRINTF("%s: no packets, "
"pkts avg %d max %d, wreq avg %d, "
"bytes avg %d\n",
- if_name(ifp), m_lim,
- inp->rxpoll_pavg, inp->rxpoll_pmax,
- inp->rxpoll_wavg, inp->rxpoll_bavg);
+ if_name(ifp), ifp->if_rxpoll_pavg,
+ ifp->if_rxpoll_pmax, ifp->if_rxpoll_wavg,
+ ifp->if_rxpoll_bavg);
}
- /* invoke the driver's input poll routine */
- ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
- &m_cnt, &m_totlen));
-
- if (m_head != NULL) {
- VERIFY(m_tail != NULL && m_cnt > 0);
-
- if (dlil_verbose > 1) {
- printf("%s: polled %d pkts, "
- "pkts avg %d max %d, wreq avg %d, "
- "bytes avg %d\n",
- if_name(ifp), m_cnt,
- inp->rxpoll_pavg, inp->rxpoll_pmax,
- inp->rxpoll_wavg, inp->rxpoll_bavg);
- }
-
- /* stats are required for extended variant */
- s.packets_in = m_cnt;
- s.bytes_in = m_totlen;
+ (void) ifnet_input_common(ifp, NULL, NULL,
+ NULL, FALSE, TRUE);
+ }
- (void) ifnet_input_common(ifp, m_head, m_tail,
- &s, TRUE, TRUE);
- } else {
- if (dlil_verbose > 1) {
- printf("%s: no packets, "
- "pkts avg %d max %d, wreq avg %d, "
- "bytes avg %d\n",
- if_name(ifp), inp->rxpoll_pavg,
- inp->rxpoll_pmax, inp->rxpoll_wavg,
- inp->rxpoll_bavg);
- }
+ /* Release the io ref count */
+ ifnet_decr_iorefcnt(ifp);
- (void) ifnet_input_common(ifp, NULL, NULL,
- NULL, FALSE, TRUE);
- }
+ lck_mtx_lock_spin(&ifp->if_poll_lock);
- /* Release the io ref count */
- ifnet_decr_iorefcnt(ifp);
+ /* if there's no pending request, we're done */
+ if (req == ifp->if_poll_req ||
+ ifp->if_poll_thread == THREAD_NULL) {
+ break;
+ }
+ }
- lck_mtx_lock_spin(&ifp->if_poll_lock);
+ ifp->if_poll_req = 0;
+ ifp->if_poll_flags &= ~IF_POLLF_RUNNING;
- /* if there's no pending request, we're done */
- if (req == ifp->if_poll_req)
- break;
- }
- ifp->if_poll_req = 0;
- ifp->if_poll_active = 0;
+ if (ifp->if_poll_thread != THREAD_NULL) {
+ uint64_t deadline = TIMEOUT_WAIT_FOREVER;
+ struct timespec *ts;
/*
* Wakeup N ns from now, else sleep indefinitely (ts = NULL)
* until ifnet_poll() is called again.
*/
ts = &ifp->if_poll_cycle;
- if (ts->tv_sec == 0 && ts->tv_nsec == 0)
+ if (ts->tv_sec == 0 && ts->tv_nsec == 0) {
ts = NULL;
+ }
+
+ if (ts != NULL) {
+ clock_interval_to_deadline((ts->tv_nsec +
+ (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
+ }
+
+ (void) assert_wait_deadline(&ifp->if_poll_thread,
+ THREAD_UNINT, deadline);
+ lck_mtx_unlock(&ifp->if_poll_lock);
+ (void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
+ /* NOTREACHED */
+ } else {
+terminate:
+ /* interface is detached (maybe while asleep)? */
+ ifnet_set_poll_cycle(ifp, NULL);
+ lck_mtx_unlock(&ifp->if_poll_lock);
+
+ if (dlil_verbose) {
+ DLIL_PRINTF("%s: poller thread terminated\n",
+ if_name(ifp));
+ }
+
+ /* for the extra refcnt from kernel_thread_start() */
+ thread_deallocate(current_thread());
+ /* this is the end */
+ thread_terminate(current_thread());
+ /* NOTREACHED */
}
+ /* must never get here */
+ VERIFY(0);
/* NOTREACHED */
+ __builtin_unreachable();
}
void
ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
{
- if (ts == NULL)
- bzero(&ifp->if_poll_cycle, sizeof (ifp->if_poll_cycle));
- else
+ if (ts == NULL) {
+ bzero(&ifp->if_poll_cycle, sizeof(ifp->if_poll_cycle));
+ } else {
*(&ifp->if_poll_cycle) = *ts;
+ }
- if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
- printf("%s: poll interval set to %lu nsec\n",
+ if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
+ DLIL_PRINTF("%s: poll interval set to %lu nsec\n",
if_name(ifp), ts->tv_nsec);
+ }
}
void
ifnet_purge(struct ifnet *ifp)
{
- if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART))
+ if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART)) {
if_qflush(ifp, 0);
+ }
}
void
{
IFCQ_LOCK_ASSERT_HELD(ifq);
- if (!(IFCQ_IS_READY(ifq)))
+ if (!(IFCQ_IS_READY(ifq))) {
return;
+ }
if (IFCQ_TBR_IS_ENABLED(ifq)) {
- struct tb_profile tb = { ifq->ifcq_tbr.tbr_rate_raw,
- ifq->ifcq_tbr.tbr_percent, 0 };
+ struct tb_profile tb = {
+ .rate = ifq->ifcq_tbr.tbr_rate_raw,
+ .percent = ifq->ifcq_tbr.tbr_percent, .depth = 0
+ };
(void) ifclassq_tbr_set(ifq, &tb, FALSE);
}
{
switch (ev) {
case CLASSQ_EV_LINK_BANDWIDTH:
- if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL))
+ if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
ifp->if_poll_update++;
+ }
break;
default:
u_int32_t omodel;
errno_t err;
- if (ifp == NULL || (model != IFNET_SCHED_MODEL_DRIVER_MANAGED &&
- model != IFNET_SCHED_MODEL_NORMAL))
- return (EINVAL);
- else if (!(ifp->if_eflags & IFEF_TXSTART))
- return (ENXIO);
+ if (ifp == NULL || model >= IFNET_SCHED_MODEL_MAX) {
+ return EINVAL;
+ } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
+ return ENXIO;
+ }
ifq = &ifp->if_snd;
IFCQ_LOCK(ifq);
omodel = ifp->if_output_sched_model;
ifp->if_output_sched_model = model;
- if ((err = ifclassq_pktsched_setup(ifq)) != 0)
+ if ((err = ifclassq_pktsched_setup(ifq)) != 0) {
ifp->if_output_sched_model = omodel;
+ }
IFCQ_UNLOCK(ifq);
- return (err);
+ return err;
}
errno_t
ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
{
- if (ifp == NULL)
- return (EINVAL);
- else if (!(ifp->if_eflags & IFEF_TXSTART))
- return (ENXIO);
+ if (ifp == NULL) {
+ return EINVAL;
+ } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
+ return ENXIO;
+ }
ifclassq_set_maxlen(&ifp->if_snd, maxqlen);
- return (0);
+ return 0;
}
errno_t
ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
{
- if (ifp == NULL || maxqlen == NULL)
- return (EINVAL);
- else if (!(ifp->if_eflags & IFEF_TXSTART))
- return (ENXIO);
+ if (ifp == NULL || maxqlen == NULL) {
+ return EINVAL;
+ } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
+ return ENXIO;
+ }
*maxqlen = ifclassq_get_maxlen(&ifp->if_snd);
- return (0);
+ return 0;
}
errno_t
{
errno_t err;
- if (ifp == NULL || pkts == NULL)
+ if (ifp == NULL || pkts == NULL) {
err = EINVAL;
- else if (!(ifp->if_eflags & IFEF_TXSTART))
+ } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
err = ENXIO;
- else
+ } else {
err = ifclassq_get_len(&ifp->if_snd, MBUF_SC_UNSPEC,
pkts, NULL);
+ }
- return (err);
+ return err;
}
errno_t
errno_t err;
if (ifp == NULL || !MBUF_VALID_SC(sc) ||
- (pkts == NULL && bytes == NULL))
+ (pkts == NULL && bytes == NULL)) {
err = EINVAL;
- else if (!(ifp->if_eflags & IFEF_TXSTART))
+ } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
err = ENXIO;
- else
+ } else {
err = ifclassq_get_len(&ifp->if_snd, sc, pkts, bytes);
+ }
- return (err);
+ return err;
}
errno_t
{
struct dlil_threading_info *inp;
- if (ifp == NULL)
- return (EINVAL);
- else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
- return (ENXIO);
+ if (ifp == NULL) {
+ return EINVAL;
+ } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
+ return ENXIO;
+ }
- if (maxqlen == 0)
+ if (maxqlen == 0) {
maxqlen = if_rcvq_maxlen;
- else if (maxqlen < IF_RCVQ_MINLEN)
+ } else if (maxqlen < IF_RCVQ_MINLEN) {
maxqlen = IF_RCVQ_MINLEN;
+ }
inp = ifp->if_inp;
lck_mtx_lock(&inp->input_lck);
qlimit(&inp->rcvq_pkts) = maxqlen;
lck_mtx_unlock(&inp->input_lck);
- return (0);
+ return 0;
}
errno_t
{
struct dlil_threading_info *inp;
- if (ifp == NULL || maxqlen == NULL)
- return (EINVAL);
- else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
- return (ENXIO);
+ if (ifp == NULL || maxqlen == NULL) {
+ return EINVAL;
+ } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
+ return ENXIO;
+ }
inp = ifp->if_inp;
lck_mtx_lock(&inp->input_lck);
*maxqlen = qlimit(&inp->rcvq_pkts);
lck_mtx_unlock(&inp->input_lck);
- return (0);
+ return 0;
+}
+
+void
+ifnet_enqueue_multi_setup(struct ifnet *ifp, uint16_t delay_qlen,
+ uint16_t delay_timeout)
+{
+ if (delay_qlen > 0 && delay_timeout > 0) {
+ ifp->if_eflags |= IFEF_ENQUEUE_MULTI;
+ ifp->if_start_delay_qlen = min(100, delay_qlen);
+ ifp->if_start_delay_timeout = min(20000, delay_timeout);
+ /* convert timeout to nanoseconds */
+ ifp->if_start_delay_timeout *= 1000;
+ kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
+ ifp->if_xname, (uint32_t)delay_qlen,
+ (uint32_t)delay_timeout);
+ } else {
+ ifp->if_eflags &= ~IFEF_ENQUEUE_MULTI;
+ }
+}
+
+/*
+ * This function clears the DSCP bits in the IPV4/V6 header pointed to by buf.
+ * While it's ok for buf to be not 32 bit aligned, the caller must ensure that
+ * buf holds the full header.
+ */
+static __attribute__((noinline)) void
+ifnet_mcast_clear_dscp(uint8_t *buf, uint8_t ip_ver)
+{
+ struct ip *ip;
+ struct ip6_hdr *ip6;
+ uint8_t lbuf[64] __attribute__((aligned(8)));
+ uint8_t *p = buf;
+
+ if (ip_ver == IPVERSION) {
+ uint8_t old_tos;
+ uint32_t sum;
+
+ if (__improbable(!IP_HDR_ALIGNED_P(p))) {
+ DTRACE_IP1(not__aligned__v4, uint8_t *, buf);
+ bcopy(buf, lbuf, sizeof(struct ip));
+ p = lbuf;
+ }
+ ip = (struct ip *)(void *)p;
+ if (__probable((ip->ip_tos & ~IPTOS_ECN_MASK) == 0)) {
+ return;
+ }
+
+ DTRACE_IP1(clear__v4, struct ip *, ip);
+ old_tos = ip->ip_tos;
+ ip->ip_tos &= IPTOS_ECN_MASK;
+ sum = ip->ip_sum + htons(old_tos) - htons(ip->ip_tos);
+ sum = (sum >> 16) + (sum & 0xffff);
+ ip->ip_sum = (uint16_t)(sum & 0xffff);
+
+ if (__improbable(p == lbuf)) {
+ bcopy(lbuf, buf, sizeof(struct ip));
+ }
+ } else {
+ uint32_t flow;
+ ASSERT(ip_ver == IPV6_VERSION);
+
+ if (__improbable(!IP_HDR_ALIGNED_P(p))) {
+ DTRACE_IP1(not__aligned__v6, uint8_t *, buf);
+ bcopy(buf, lbuf, sizeof(struct ip6_hdr));
+ p = lbuf;
+ }
+ ip6 = (struct ip6_hdr *)(void *)p;
+ flow = ntohl(ip6->ip6_flow);
+ if (__probable((flow & IP6FLOW_DSCP_MASK) == 0)) {
+ return;
+ }
+
+ DTRACE_IP1(clear__v6, struct ip6_hdr *, ip6);
+ ip6->ip6_flow = htonl(flow & ~IP6FLOW_DSCP_MASK);
+
+ if (__improbable(p == lbuf)) {
+ bcopy(lbuf, buf, sizeof(struct ip6_hdr));
+ }
+ }
+}
+
+static inline errno_t
+ifnet_enqueue_ifclassq(struct ifnet *ifp, classq_pkt_t *p, boolean_t flush,
+ boolean_t *pdrop)
+{
+ volatile uint64_t *fg_ts = NULL;
+ volatile uint64_t *rt_ts = NULL;
+ struct timespec now;
+ u_int64_t now_nsec = 0;
+ int error = 0;
+ uint8_t *mcast_buf = NULL;
+ uint8_t ip_ver;
+
+ ASSERT(ifp->if_eflags & IFEF_TXSTART);
+
+ /*
+ * If packet already carries a timestamp, either from dlil_output()
+ * or from flowswitch, use it here. Otherwise, record timestamp.
+ * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
+ * the timestamp value is used internally there.
+ */
+ switch (p->cp_ptype) {
+ case QP_MBUF:
+ ASSERT(p->cp_mbuf->m_flags & M_PKTHDR);
+ ASSERT(p->cp_mbuf->m_nextpkt == NULL);
+
+ if (!(p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_TS_VALID) ||
+ p->cp_mbuf->m_pkthdr.pkt_timestamp == 0) {
+ nanouptime(&now);
+ net_timernsec(&now, &now_nsec);
+ p->cp_mbuf->m_pkthdr.pkt_timestamp = now_nsec;
+ }
+ p->cp_mbuf->m_pkthdr.pkt_flags &= ~PKTF_TS_VALID;
+ /*
+ * If the packet service class is not background,
+ * update the timestamp to indicate recent activity
+ * on a foreground socket.
+ */
+ if ((p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
+ p->cp_mbuf->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
+ if (!(p->cp_mbuf->m_pkthdr.pkt_flags &
+ PKTF_SO_BACKGROUND)) {
+ ifp->if_fg_sendts = _net_uptime;
+ if (fg_ts != NULL) {
+ *fg_ts = _net_uptime;
+ }
+ }
+ if (p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_SO_REALTIME) {
+ ifp->if_rt_sendts = _net_uptime;
+ if (rt_ts != NULL) {
+ *rt_ts = _net_uptime;
+ }
+ }
+ }
+
+ /*
+ * Some Wi-Fi AP implementations do not correctly handle
+ * multicast IP packets with DSCP bits set (radr://9331522).
+ * As a workaround we clear the DSCP bits and set the service
+ * class to BE.
+ */
+ if ((p->cp_mbuf->m_flags & M_MCAST) != 0 &&
+ IFNET_IS_WIFI_INFRA(ifp)) {
+ size_t len = mbuf_len(p->cp_mbuf), hlen;
+ struct ether_header *eh;
+ boolean_t pullup = FALSE;
+ uint16_t etype;
+
+ if (__improbable(len < sizeof(struct ether_header))) {
+ DTRACE_IP1(small__ether, size_t, len);
+ if ((p->cp_mbuf = m_pullup(p->cp_mbuf,
+ sizeof(struct ether_header))) == NULL) {
+ return ENOMEM;
+ }
+ }
+ eh = (struct ether_header *)mbuf_data(p->cp_mbuf);
+ etype = ntohs(eh->ether_type);
+ if (etype == ETHERTYPE_IP) {
+ hlen = sizeof(struct ether_header) +
+ sizeof(struct ip);
+ if (len < hlen) {
+ DTRACE_IP1(small__v4, size_t, len);
+ pullup = TRUE;
+ }
+ ip_ver = IPVERSION;
+ } else if (etype == ETHERTYPE_IPV6) {
+ hlen = sizeof(struct ether_header) +
+ sizeof(struct ip6_hdr);
+ if (len < hlen) {
+ DTRACE_IP1(small__v6, size_t, len);
+ pullup = TRUE;
+ }
+ ip_ver = IPV6_VERSION;
+ } else {
+ DTRACE_IP1(invalid__etype, uint16_t, etype);
+ break;
+ }
+ if (pullup) {
+ if ((p->cp_mbuf = m_pullup(p->cp_mbuf, hlen)) ==
+ NULL) {
+ return ENOMEM;
+ }
+
+ eh = (struct ether_header *)mbuf_data(
+ p->cp_mbuf);
+ }
+ mbuf_set_service_class(p->cp_mbuf, MBUF_SC_BE);
+ mcast_buf = (uint8_t *)(eh + 1);
+ /*
+ * ifnet_mcast_clear_dscp() will finish the work below.
+ * Note that the pullups above ensure that mcast_buf
+ * points to a full IP header.
+ */
+ }
+ break;
+
+
+ default:
+ VERIFY(0);
+ /* NOTREACHED */
+ __builtin_unreachable();
+ }
+
+ if (mcast_buf != NULL) {
+ ifnet_mcast_clear_dscp(mcast_buf, ip_ver);
+ }
+
+ if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
+ if (now_nsec == 0) {
+ nanouptime(&now);
+ net_timernsec(&now, &now_nsec);
+ }
+ /*
+ * If the driver chose to delay start callback for
+ * coalescing multiple packets, Then use the following
+ * heuristics to make sure that start callback will
+ * be delayed only when bulk data transfer is detected.
+ * 1. number of packets enqueued in (delay_win * 2) is
+ * greater than or equal to the delay qlen.
+ * 2. If delay_start is enabled it will stay enabled for
+ * another 10 idle windows. This is to take into account
+ * variable RTT and burst traffic.
+ * 3. If the time elapsed since last enqueue is more
+ * than 200ms we disable delaying start callback. This is
+ * is to take idle time into account.
+ */
+ u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
+ if (ifp->if_start_delay_swin > 0) {
+ if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
+ ifp->if_start_delay_cnt++;
+ } else if ((now_nsec - ifp->if_start_delay_swin)
+ >= (200 * 1000 * 1000)) {
+ ifp->if_start_delay_swin = now_nsec;
+ ifp->if_start_delay_cnt = 1;
+ ifp->if_start_delay_idle = 0;
+ if (ifp->if_eflags & IFEF_DELAY_START) {
+ ifp->if_eflags &=
+ ~(IFEF_DELAY_START);
+ ifnet_delay_start_disabled++;
+ }
+ } else {
+ if (ifp->if_start_delay_cnt >=
+ ifp->if_start_delay_qlen) {
+ ifp->if_eflags |= IFEF_DELAY_START;
+ ifp->if_start_delay_idle = 0;
+ } else {
+ if (ifp->if_start_delay_idle >= 10) {
+ ifp->if_eflags &=
+ ~(IFEF_DELAY_START);
+ ifnet_delay_start_disabled++;
+ } else {
+ ifp->if_start_delay_idle++;
+ }
+ }
+ ifp->if_start_delay_swin = now_nsec;
+ ifp->if_start_delay_cnt = 1;
+ }
+ } else {
+ ifp->if_start_delay_swin = now_nsec;
+ ifp->if_start_delay_cnt = 1;
+ ifp->if_start_delay_idle = 0;
+ ifp->if_eflags &= ~(IFEF_DELAY_START);
+ }
+ } else {
+ ifp->if_eflags &= ~(IFEF_DELAY_START);
+ }
+
+ /* enqueue the packet (caller consumes object) */
+ error = ifclassq_enqueue(&ifp->if_snd, p, pdrop);
+
+ /*
+ * Tell the driver to start dequeueing; do this even when the queue
+ * for the packet is suspended (EQSUSPENDED), as the driver could still
+ * be dequeueing from other unsuspended queues.
+ */
+ if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
+ ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED)) {
+ ifnet_start(ifp);
+ }
+
+ return error;
+}
+
+int
+ifnet_enqueue_netem(void *handle, pktsched_pkt_t *pkts, uint32_t n_pkts)
+{
+ struct ifnet *ifp = handle;
+ boolean_t pdrop; /* dummy */
+ uint32_t i;
+
+ ASSERT(n_pkts >= 1);
+ for (i = 0; i < n_pkts - 1; i++) {
+ (void) ifnet_enqueue_ifclassq(ifp, &pkts[i].pktsched_pkt,
+ FALSE, &pdrop);
+ }
+ /* flush with the last packet */
+ (void) ifnet_enqueue_ifclassq(ifp, &pkts[i].pktsched_pkt, TRUE, &pdrop);
+
+ return 0;
+}
+
+static inline errno_t
+ifnet_enqueue_common(struct ifnet *ifp, classq_pkt_t *pkt, boolean_t flush,
+ boolean_t *pdrop)
+{
+ if (ifp->if_output_netem != NULL) {
+ return netem_enqueue(ifp->if_output_netem, pkt, pdrop);
+ } else {
+ return ifnet_enqueue_ifclassq(ifp, pkt, flush, pdrop);
+ }
}
errno_t
ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
{
- int error;
+ boolean_t pdrop;
+ return ifnet_enqueue_mbuf(ifp, m, TRUE, &pdrop);
+}
+
+errno_t
+ifnet_enqueue_mbuf(struct ifnet *ifp, struct mbuf *m, boolean_t flush,
+ boolean_t *pdrop)
+{
+ classq_pkt_t pkt;
if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
m->m_nextpkt != NULL) {
- if (m != NULL)
+ if (m != NULL) {
m_freem_list(m);
- return (EINVAL);
+ *pdrop = TRUE;
+ }
+ return EINVAL;
} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
- !(ifp->if_refflags & IFRF_ATTACHED)) {
+ !IF_FULLY_ATTACHED(ifp)) {
/* flag tested without lock for performance */
m_freem(m);
- return (ENXIO);
+ *pdrop = TRUE;
+ return ENXIO;
} else if (!(ifp->if_flags & IFF_UP)) {
m_freem(m);
- return (ENETDOWN);
+ *pdrop = TRUE;
+ return ENETDOWN;
}
- /* enqueue the packet */
- error = ifclassq_enqueue(&ifp->if_snd, m);
-
- /*
- * Tell the driver to start dequeueing; do this even when the queue
- * for the packet is suspended (EQSUSPENDED), as the driver could still
- * be dequeueing from other unsuspended queues.
- */
- if (error == 0 || error == EQFULL || error == EQSUSPENDED)
- ifnet_start(ifp);
-
- return (error);
+ CLASSQ_PKT_INIT_MBUF(&pkt, m);
+ return ifnet_enqueue_common(ifp, &pkt, flush, pdrop);
}
+
errno_t
ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
{
errno_t rc;
- if (ifp == NULL || mp == NULL)
- return (EINVAL);
- else if (!(ifp->if_eflags & IFEF_TXSTART) ||
- (ifp->if_output_sched_model != IFNET_SCHED_MODEL_NORMAL))
- return (ENXIO);
- if (!ifnet_is_attached(ifp, 1))
- return (ENXIO);
- rc = ifclassq_dequeue(&ifp->if_snd, 1, mp, NULL, NULL, NULL);
- ifnet_decr_iorefcnt(ifp);
+ classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
- return (rc);
+ if (ifp == NULL || mp == NULL) {
+ return EINVAL;
+ } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
+ ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
+ return ENXIO;
+ }
+ if (!ifnet_is_attached(ifp, 1)) {
+ return ENXIO;
+ }
+
+ rc = ifclassq_dequeue(&ifp->if_snd, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
+ &pkt, NULL, NULL, NULL);
+ VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
+ ifnet_decr_iorefcnt(ifp);
+ *mp = pkt.cp_mbuf;
+ return rc;
}
errno_t
struct mbuf **mp)
{
errno_t rc;
- if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc))
- return (EINVAL);
- else if (!(ifp->if_eflags & IFEF_TXSTART) ||
- (ifp->if_output_sched_model != IFNET_SCHED_MODEL_DRIVER_MANAGED))
- return (ENXIO);
- if (!ifnet_is_attached(ifp, 1))
- return (ENXIO);
-
- rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, 1, mp, NULL, NULL, NULL);
+ classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
+
+ if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc)) {
+ return EINVAL;
+ } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
+ ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
+ return ENXIO;
+ }
+ if (!ifnet_is_attached(ifp, 1)) {
+ return ENXIO;
+ }
+
+ rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, 1,
+ CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt, NULL, NULL, NULL);
+ VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
+ ifnet_decr_iorefcnt(ifp);
+ *mp = pkt.cp_mbuf;
+ return rc;
+}
+
+errno_t
+ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t pkt_limit,
+ struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
+{
+ errno_t rc;
+ classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
+ classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
+
+ if (ifp == NULL || head == NULL || pkt_limit < 1) {
+ return EINVAL;
+ } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
+ ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
+ return ENXIO;
+ }
+ if (!ifnet_is_attached(ifp, 1)) {
+ return ENXIO;
+ }
+
+ rc = ifclassq_dequeue(&ifp->if_snd, pkt_limit,
+ CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail, cnt, len);
+ VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
ifnet_decr_iorefcnt(ifp);
- return (rc);
+ *head = pkt_head.cp_mbuf;
+ if (tail != NULL) {
+ *tail = pkt_tail.cp_mbuf;
+ }
+ return rc;
}
errno_t
-ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t limit, struct mbuf **head,
- struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
+ifnet_dequeue_multi_bytes(struct ifnet *ifp, u_int32_t byte_limit,
+ struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
{
errno_t rc;
- if (ifp == NULL || head == NULL || limit < 1)
- return (EINVAL);
- else if (!(ifp->if_eflags & IFEF_TXSTART) ||
- (ifp->if_output_sched_model != IFNET_SCHED_MODEL_NORMAL))
- return (ENXIO);
- if (!ifnet_is_attached(ifp, 1))
- return (ENXIO);
-
- rc = ifclassq_dequeue(&ifp->if_snd, limit, head, tail, cnt, len);
+ classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
+ classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
+
+ if (ifp == NULL || head == NULL || byte_limit < 1) {
+ return EINVAL;
+ } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
+ ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
+ return ENXIO;
+ }
+ if (!ifnet_is_attached(ifp, 1)) {
+ return ENXIO;
+ }
+
+ rc = ifclassq_dequeue(&ifp->if_snd, CLASSQ_DEQUEUE_MAX_PKT_LIMIT,
+ byte_limit, &pkt_head, &pkt_tail, cnt, len);
+ VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
ifnet_decr_iorefcnt(ifp);
- return (rc);
+ *head = pkt_head.cp_mbuf;
+ if (tail != NULL) {
+ *tail = pkt_tail.cp_mbuf;
+ }
+ return rc;
}
errno_t
ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
- u_int32_t limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
+ u_int32_t pkt_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
u_int32_t *len)
{
errno_t rc;
- if (ifp == NULL || head == NULL || limit < 1 || !MBUF_VALID_SC(sc))
- return (EINVAL);
- else if (!(ifp->if_eflags & IFEF_TXSTART) ||
- (ifp->if_output_sched_model != IFNET_SCHED_MODEL_DRIVER_MANAGED))
- return (ENXIO);
- if (!ifnet_is_attached(ifp, 1))
- return (ENXIO);
- rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, limit, head,
- tail, cnt, len);
+ classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
+ classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
+
+ if (ifp == NULL || head == NULL || pkt_limit < 1 ||
+ !MBUF_VALID_SC(sc)) {
+ return EINVAL;
+ } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
+ ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
+ return ENXIO;
+ }
+ if (!ifnet_is_attached(ifp, 1)) {
+ return ENXIO;
+ }
+
+ rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, pkt_limit,
+ CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail,
+ cnt, len);
+ VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
ifnet_decr_iorefcnt(ifp);
- return (rc);
+ *head = pkt_head.cp_mbuf;
+ if (tail != NULL) {
+ *tail = pkt_tail.cp_mbuf;
+ }
+ return rc;
}
+#if !CONFIG_EMBEDDED
errno_t
ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
const struct sockaddr *dest, const char *dest_linkaddr,
const char *frame_type, u_int32_t *pre, u_int32_t *post)
{
- if (pre != NULL)
+ if (pre != NULL) {
*pre = 0;
- if (post != NULL)
+ }
+ if (post != NULL) {
*post = 0;
+ }
- return (ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type));
+ return ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type);
+}
+#endif /* !CONFIG_EMBEDDED */
+
+static boolean_t
+packet_has_vlan_tag(struct mbuf * m)
+{
+ u_int tag = 0;
+
+ if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) != 0) {
+ tag = EVL_VLANOFTAG(m->m_pkthdr.vlan_tag);
+ if (tag == 0) {
+ /* the packet is just priority-tagged, clear the bit */
+ m->m_pkthdr.csum_flags &= ~CSUM_VLAN_TAG_VALID;
+ }
+ }
+ return tag != 0;
}
static int
dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
char **frame_header_p, protocol_family_t protocol_family)
{
- struct ifnet_filter *filter;
+ boolean_t is_vlan_packet = FALSE;
+ struct ifnet_filter *filter;
+ struct mbuf *m = *m_p;
+
+ is_vlan_packet = packet_has_vlan_tag(m);
/*
* Pass the inbound packet to the interface filters
TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
int result;
+ /* exclude VLAN packets from external filters PR-3586856 */
+ if (is_vlan_packet &&
+ (filter->filt_flags & DLIL_IFF_INTERNAL) == 0) {
+ continue;
+ }
+
if (!filter->filt_skip && filter->filt_input != NULL &&
(filter->filt_protocol == 0 ||
filter->filt_protocol == protocol_family)) {
/* we're done with the filter list */
if_flt_monitor_unbusy(ifp);
lck_mtx_unlock(&ifp->if_flt_lock);
- return (result);
+ return result;
}
}
}
* Strip away M_PROTO1 bit prior to sending packet up the stack as
* it is meant to be local to a subsystem -- if_bridge for M_PROTO1
*/
- if (*m_p != NULL)
+ if (*m_p != NULL) {
(*m_p)->m_flags &= ~M_PROTO1;
+ }
- return (0);
+ return 0;
}
static int
dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p,
protocol_family_t protocol_family)
{
- struct ifnet_filter *filter;
+ boolean_t is_vlan_packet;
+ struct ifnet_filter *filter;
+ struct mbuf *m = *m_p;
+
+ is_vlan_packet = packet_has_vlan_tag(m);
/*
* Pass the outbound packet to the interface filters
TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
int result;
+ /* exclude VLAN packets from external filters PR-3586856 */
+ if (is_vlan_packet &&
+ (filter->filt_flags & DLIL_IFF_INTERNAL) == 0) {
+ continue;
+ }
+
if (!filter->filt_skip && filter->filt_output != NULL &&
(filter->filt_protocol == 0 ||
filter->filt_protocol == protocol_family)) {
/* we're done with the filter list */
if_flt_monitor_unbusy(ifp);
lck_mtx_unlock(&ifp->if_flt_lock);
- return (result);
+ return result;
}
}
}
if_flt_monitor_unbusy(ifp);
lck_mtx_unlock(&ifp->if_flt_lock);
- return (0);
+ return 0;
}
static void
if (ifproto->proto_kpi == kProtoKPI_v1) {
/* Version 1 protocols get one packet at a time */
while (m != NULL) {
- char * frame_header;
- mbuf_t next_packet;
+ char * frame_header;
+ mbuf_t next_packet;
next_packet = m->m_nextpkt;
m->m_nextpkt = NULL;
m->m_pkthdr.pkt_hdr = NULL;
error = (*ifproto->kpi.v1.input)(ifproto->ifp,
ifproto->protocol_family, m, frame_header);
- if (error != 0 && error != EJUSTRETURN)
+ if (error != 0 && error != EJUSTRETURN) {
m_freem(m);
+ }
m = next_packet;
}
} else if (ifproto->proto_kpi == kProtoKPI_v2) {
/* Version 2 protocols support packet lists */
error = (*ifproto->kpi.v2.input)(ifproto->ifp,
ifproto->protocol_family, m);
- if (error != 0 && error != EJUSTRETURN)
+ if (error != 0 && error != EJUSTRETURN) {
m_freem_list(m);
+ }
}
- return;
}
static void
dlil_input_stats_add(const struct ifnet_stat_increment_param *s,
- struct dlil_threading_info *inp, boolean_t poll)
+ struct dlil_threading_info *inp, struct ifnet *ifp, boolean_t poll)
{
struct ifnet_stat_increment_param *d = &inp->stats;
- if (s->packets_in != 0)
+ if (s->packets_in != 0) {
d->packets_in += s->packets_in;
- if (s->bytes_in != 0)
+ }
+ if (s->bytes_in != 0) {
d->bytes_in += s->bytes_in;
- if (s->errors_in != 0)
+ }
+ if (s->errors_in != 0) {
d->errors_in += s->errors_in;
+ }
- if (s->packets_out != 0)
+ if (s->packets_out != 0) {
d->packets_out += s->packets_out;
- if (s->bytes_out != 0)
+ }
+ if (s->bytes_out != 0) {
d->bytes_out += s->bytes_out;
- if (s->errors_out != 0)
+ }
+ if (s->errors_out != 0) {
d->errors_out += s->errors_out;
+ }
- if (s->collisions != 0)
+ if (s->collisions != 0) {
d->collisions += s->collisions;
- if (s->dropped != 0)
+ }
+ if (s->dropped != 0) {
d->dropped += s->dropped;
+ }
- if (poll)
- PKTCNTR_ADD(&inp->tstats, s->packets_in, s->bytes_in);
+ if (poll) {
+ PKTCNTR_ADD(&ifp->if_poll_tstats, s->packets_in, s->bytes_in);
+ }
}
-static void
+static boolean_t
dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp)
{
struct ifnet_stat_increment_param *s = &inp->stats;
atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped);
s->dropped = 0;
}
- /*
- * If we went over the threshold, notify NetworkStatistics.
- */
- if (ifp->if_data_threshold &&
- (ifp->if_ibytes + ifp->if_obytes) - ifp->if_dt_bytes >
- ifp->if_data_threshold) {
- ifp->if_dt_bytes = ifp->if_ibytes + ifp->if_obytes;
- nstat_ifnet_threshold_reached(ifp->if_index);
- }
+
/*
* No need for atomic operations as they are modified here
* only from within the DLIL input thread context.
*/
- if (inp->tstats.packets != 0) {
- inp->pstats.ifi_poll_packets += inp->tstats.packets;
- inp->tstats.packets = 0;
+ if (ifp->if_poll_tstats.packets != 0) {
+ ifp->if_poll_pstats.ifi_poll_packets += ifp->if_poll_tstats.packets;
+ ifp->if_poll_tstats.packets = 0;
}
- if (inp->tstats.bytes != 0) {
- inp->pstats.ifi_poll_bytes += inp->tstats.bytes;
- inp->tstats.bytes = 0;
+ if (ifp->if_poll_tstats.bytes != 0) {
+ ifp->if_poll_pstats.ifi_poll_bytes += ifp->if_poll_tstats.bytes;
+ ifp->if_poll_tstats.bytes = 0;
}
+
+ return ifp->if_data_threshold != 0;
}
__private_extern__ void
dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m)
{
- return (dlil_input_packet_list_common(ifp, m, 0,
- IFNET_MODEL_INPUT_POLL_OFF, FALSE));
+ return dlil_input_packet_list_common(ifp, m, 0,
+ IFNET_MODEL_INPUT_POLL_OFF, FALSE);
}
__private_extern__ void
dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m,
u_int32_t cnt, ifnet_model_t mode)
{
- return (dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE));
+ return dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE);
}
static void
dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m,
u_int32_t cnt, ifnet_model_t mode, boolean_t ext)
{
- int error = 0;
- protocol_family_t protocol_family;
- mbuf_t next_packet;
- ifnet_t ifp = ifp_param;
- char * frame_header;
- struct if_proto * last_ifproto = NULL;
- mbuf_t pkt_first = NULL;
- mbuf_t * pkt_next = NULL;
- u_int32_t poll_thresh = 0, poll_ival = 0;
+ int error = 0;
+ protocol_family_t protocol_family;
+ mbuf_t next_packet;
+ ifnet_t ifp = ifp_param;
+ char *frame_header = NULL;
+ struct if_proto *last_ifproto = NULL;
+ mbuf_t pkt_first = NULL;
+ mbuf_t *pkt_next = NULL;
+ u_int32_t poll_thresh = 0, poll_ival = 0;
- KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START,0,0,0,0,0);
+ KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 &&
- (poll_ival = if_rxpoll_interval_pkts) > 0)
+ (poll_ival = if_rxpoll_interval_pkts) > 0) {
poll_thresh = cnt;
+ }
while (m != NULL) {
struct if_proto *ifproto = NULL;
int iorefcnt = 0;
- uint32_t pktf_mask; /* pkt flags to preserve */
+ uint32_t pktf_mask; /* pkt flags to preserve */
- if (ifp_param == NULL)
+ if (ifp_param == NULL) {
ifp = m->m_pkthdr.rcvif;
+ }
- if ((ifp->if_eflags & IFEF_RXPOLL) && poll_thresh != 0 &&
- poll_ival > 0 && (--poll_thresh % poll_ival) == 0)
+ if ((ifp->if_eflags & IFEF_RXPOLL) &&
+ (ifp->if_xflags & IFXF_LEGACY) && poll_thresh != 0 &&
+ poll_ival > 0 && (--poll_thresh % poll_ival) == 0) {
ifnet_poll(ifp);
+ }
/* Check if this mbuf looks valid */
MBUF_INPUT_CHECK(m, ifp);
* away, so optimize for that.
*/
if (ifp != lo_ifp) {
- if (!ifnet_is_attached(ifp, 1)) {
+ if (!ifnet_datamov_begin(ifp)) {
m_freem(m);
goto next;
}
iorefcnt = 1;
- pktf_mask = 0;
+ /*
+ * Preserve the time stamp if it was set.
+ */
+ pktf_mask = PKTF_TS_VALID;
} else {
/*
* If this arrived on lo0, preserve interface addr
* info to allow for connectivity between loopback
* and local interface addresses.
*/
- pktf_mask = (PKTF_LOOP|PKTF_IFAINFO);
+ pktf_mask = (PKTF_LOOP | PKTF_IFAINFO);
}
/* make sure packet comes in clean */
&protocol_family);
ifnet_lock_done(ifp);
if (error != 0) {
- if (error == EJUSTRETURN)
+ if (error == EJUSTRETURN) {
goto next;
+ }
protocol_family = 0;
}
+ pktap_input(ifp, protocol_family, m, frame_header);
+
+ /* Drop v4 packets received on CLAT46 enabled interface */
+ if (protocol_family == PF_INET && IS_INTF_CLAT46(ifp)) {
+ m_freem(m);
+ ip6stat.ip6s_clat464_in_v4_drop++;
+ goto next;
+ }
+
+ /* Translate the packet if it is received on CLAT interface */
+ if (protocol_family == PF_INET6 && IS_INTF_CLAT46(ifp)
+ && dlil_is_clat_needed(protocol_family, m)) {
+ char *data = NULL;
+ struct ether_header eh;
+ struct ether_header *ehp = NULL;
+
+ if (ifp->if_type == IFT_ETHER) {
+ ehp = (struct ether_header *)(void *)frame_header;
+ /* Skip RX Ethernet packets if they are not IPV6 */
+ if (ntohs(ehp->ether_type) != ETHERTYPE_IPV6) {
+ goto skip_clat;
+ }
+
+ /* Keep a copy of frame_header for Ethernet packets */
+ bcopy(frame_header, (caddr_t)&eh, ETHER_HDR_LEN);
+ }
+ error = dlil_clat64(ifp, &protocol_family, &m);
+ data = (char *) mbuf_data(m);
+ if (error != 0) {
+ m_freem(m);
+ ip6stat.ip6s_clat464_in_drop++;
+ goto next;
+ }
+ /* Native v6 should be No-op */
+ if (protocol_family != PF_INET) {
+ goto skip_clat;
+ }
+
+ /* Do this only for translated v4 packets. */
+ switch (ifp->if_type) {
+ case IFT_CELLULAR:
+ frame_header = data;
+ break;
+ case IFT_ETHER:
+ /*
+ * Drop if the mbuf doesn't have enough
+ * space for Ethernet header
+ */
+ if (M_LEADINGSPACE(m) < ETHER_HDR_LEN) {
+ m_free(m);
+ ip6stat.ip6s_clat464_in_drop++;
+ goto next;
+ }
+ /*
+ * Set the frame_header ETHER_HDR_LEN bytes
+ * preceeding the data pointer. Change
+ * the ether_type too.
+ */
+ frame_header = data - ETHER_HDR_LEN;
+ eh.ether_type = htons(ETHERTYPE_IP);
+ bcopy((caddr_t)&eh, frame_header, ETHER_HDR_LEN);
+ break;
+ }
+ }
+skip_clat:
if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK) &&
- !(m->m_pkthdr.pkt_flags & PKTF_LOOP))
+ !(m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
dlil_input_cksum_dbg(ifp, m, frame_header,
protocol_family);
-
+ }
/*
* For partial checksum offload, we expect the driver to
* set the start offset indicating the start of the span
* adjust this start offset accordingly because the data
* pointer has been advanced beyond the link-layer header.
*
- * Don't adjust if the interface is a bridge member, as
- * the adjustment will occur from the context of the
- * bridge interface during input.
+ * Virtual lan types (bridge, vlan, bond) can call
+ * dlil_input_packet_list() with the same packet with the
+ * checksum flags set. Set a flag indicating that the
+ * adjustment has already been done.
*/
- if (ifp->if_bridge == NULL && (m->m_pkthdr.csum_flags &
+ if ((m->m_pkthdr.csum_flags & CSUM_ADJUST_DONE) != 0) {
+ /* adjustment has already been done */
+ } else if ((m->m_pkthdr.csum_flags &
(CSUM_DATA_VALID | CSUM_PARTIAL)) ==
(CSUM_DATA_VALID | CSUM_PARTIAL)) {
int adj;
-
if (frame_header == NULL ||
frame_header < (char *)mbuf_datastart(m) ||
frame_header > (char *)m->m_data ||
} else {
m->m_pkthdr.csum_rx_start -= adj;
}
+ /* make sure we don't adjust more than once */
+ m->m_pkthdr.csum_flags |= CSUM_ADJUST_DONE;
+ }
+ if (clat_debug) {
+ pktap_input(ifp, protocol_family, m, frame_header);
}
- pktap_input(ifp, protocol_family, m, frame_header);
-
- if (m->m_flags & (M_BCAST|M_MCAST))
+ if (m->m_flags & (M_BCAST | M_MCAST)) {
atomic_add_64(&ifp->if_imcasts, 1);
+ }
- /* run interface filters, exclude VLAN packets PR-3586856 */
- if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
- error = dlil_interface_filters_input(ifp, &m,
- &frame_header, protocol_family);
- if (error != 0) {
- if (error != EJUSTRETURN)
- m_freem(m);
- goto next;
+ /* run interface filters */
+ error = dlil_interface_filters_input(ifp, &m,
+ &frame_header, protocol_family);
+ if (error != 0) {
+ if (error != EJUSTRETURN) {
+ m_freem(m);
}
+ goto next;
}
- if (error != 0 || ((m->m_flags & M_PROMISC) != 0) ) {
+ if ((m->m_flags & M_PROMISC) != 0) {
m_freem(m);
goto next;
}
VERIFY(ifproto == NULL);
ifnet_lock_shared(ifp);
/* callee holds a proto refcnt upon success */
- ifproto = find_attached_proto(ifp, protocol_family);
+ ifproto = find_attached_proto(ifp, protocol_family);
ifnet_lock_done(ifp);
}
if (ifproto == NULL) {
m = next_packet;
/* update the driver's multicast filter, if needed */
- if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
+ if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) {
ifp->if_updatemcasts = 0;
- if (iorefcnt == 1)
- ifnet_decr_iorefcnt(ifp);
+ }
+ if (iorefcnt == 1) {
+ ifnet_datamov_end(ifp);
+ }
}
- KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END,0,0,0,0,0);
+ KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
}
errno_t
errno_t err;
err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
- if (err == EAFNOSUPPORT)
+ if (err == EAFNOSUPPORT) {
err = 0;
- printf("%s: %s %d suspended link-layer multicast membership(s) "
+ }
+ DLIL_PRINTF("%s: %s %d suspended link-layer multicast membership(s) "
"(err=%d)\n", if_name(ifp),
(err == 0 ? "successfully restored" : "failed to restore"),
ifp->if_updatemcasts, err);
/* just return success */
- return (0);
+ return 0;
}
-static int
-dlil_event_internal(struct ifnet *ifp, struct kev_msg *event)
+/* If ifp is set, we will increment the generation for the interface */
+int
+dlil_post_complete_msg(struct ifnet *ifp, struct kev_msg *event)
{
- struct ifnet_filter *filter;
+ if (ifp != NULL) {
+ ifnet_increment_generation(ifp);
+ }
- /* Get an io ref count if the interface is attached */
- if (!ifnet_is_attached(ifp, 1))
- goto done;
+#if NECP
+ necp_update_all_clients();
+#endif /* NECP */
+
+ return kev_post_msg(event);
+}
+
+__private_extern__ void
+dlil_post_sifflags_msg(struct ifnet * ifp)
+{
+ struct kev_msg ev_msg;
+ struct net_event_data ev_data;
+
+ bzero(&ev_data, sizeof(ev_data));
+ bzero(&ev_msg, sizeof(ev_msg));
+ ev_msg.vendor_code = KEV_VENDOR_APPLE;
+ ev_msg.kev_class = KEV_NETWORK_CLASS;
+ ev_msg.kev_subclass = KEV_DL_SUBCLASS;
+ ev_msg.event_code = KEV_DL_SIFFLAGS;
+ strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
+ ev_data.if_family = ifp->if_family;
+ ev_data.if_unit = (u_int32_t) ifp->if_unit;
+ ev_msg.dv[0].data_length = sizeof(struct net_event_data);
+ ev_msg.dv[0].data_ptr = &ev_data;
+ ev_msg.dv[1].data_length = 0;
+ dlil_post_complete_msg(ifp, &ev_msg);
+}
+
+#define TMP_IF_PROTO_ARR_SIZE 10
+static int
+dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_generation)
+{
+ struct ifnet_filter *filter = NULL;
+ struct if_proto *proto = NULL;
+ int if_proto_count = 0;
+ struct if_proto **tmp_ifproto_arr = NULL;
+ struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
+ int tmp_ifproto_arr_idx = 0;
+ bool tmp_malloc = false;
/*
* Pass the event to the interface filters
if_flt_monitor_unbusy(ifp);
lck_mtx_unlock(&ifp->if_flt_lock);
+ /* Get an io ref count if the interface is attached */
+ if (!ifnet_is_attached(ifp, 1)) {
+ goto done;
+ }
+
+ /*
+ * An embedded tmp_list_entry in if_proto may still get
+ * over-written by another thread after giving up ifnet lock,
+ * therefore we are avoiding embedded pointers here.
+ */
ifnet_lock_shared(ifp);
- if (ifp->if_proto_hash != NULL) {
+ if_proto_count = dlil_ifp_protolist(ifp, NULL, 0);
+ if (if_proto_count) {
int i;
+ VERIFY(ifp->if_proto_hash != NULL);
+ if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
+ tmp_ifproto_arr = tmp_ifproto_stack_arr;
+ } else {
+ MALLOC(tmp_ifproto_arr, struct if_proto **,
+ sizeof(*tmp_ifproto_arr) * if_proto_count,
+ M_TEMP, M_ZERO);
+ if (tmp_ifproto_arr == NULL) {
+ ifnet_lock_done(ifp);
+ goto cleanup;
+ }
+ tmp_malloc = true;
+ }
for (i = 0; i < PROTO_HASH_SLOTS; i++) {
- struct if_proto *proto;
-
SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
next_hash) {
- proto_media_event eventp =
- (proto->proto_kpi == kProtoKPI_v1 ?
- proto->kpi.v1.event :
- proto->kpi.v2.event);
-
- if (eventp != NULL) {
- if_proto_ref(proto);
- ifnet_lock_done(ifp);
-
- eventp(ifp, proto->protocol_family,
- event);
-
- ifnet_lock_shared(ifp);
- if_proto_free(proto);
- }
+ if_proto_ref(proto);
+ tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
+ tmp_ifproto_arr_idx++;
}
}
+ VERIFY(if_proto_count == tmp_ifproto_arr_idx);
}
ifnet_lock_done(ifp);
+ for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
+ tmp_ifproto_arr_idx++) {
+ proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
+ VERIFY(proto != NULL);
+ proto_media_event eventp =
+ (proto->proto_kpi == kProtoKPI_v1 ?
+ proto->kpi.v1.event :
+ proto->kpi.v2.event);
+
+ if (eventp != NULL) {
+ eventp(ifp, proto->protocol_family,
+ event);
+ }
+ if_proto_free(proto);
+ }
+
+cleanup:
+ if (tmp_malloc) {
+ FREE(tmp_ifproto_arr, M_TEMP);
+ }
+
/* Pass the event to the interface */
- if (ifp->if_event != NULL)
+ if (ifp->if_event != NULL) {
ifp->if_event(ifp, event);
+ }
/* Release the io ref count */
ifnet_decr_iorefcnt(ifp);
-
done:
- return (kev_post_msg(event));
+ return dlil_post_complete_msg(update_generation ? ifp : NULL, event);
}
errno_t
ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
{
- struct kev_msg kev_msg;
+ struct kev_msg kev_msg;
int result = 0;
- if (ifp == NULL || event == NULL)
- return (EINVAL);
+ if (ifp == NULL || event == NULL) {
+ return EINVAL;
+ }
- bzero(&kev_msg, sizeof (kev_msg));
- kev_msg.vendor_code = event->vendor_code;
- kev_msg.kev_class = event->kev_class;
- kev_msg.kev_subclass = event->kev_subclass;
- kev_msg.event_code = event->event_code;
+ bzero(&kev_msg, sizeof(kev_msg));
+ kev_msg.vendor_code = event->vendor_code;
+ kev_msg.kev_class = event->kev_class;
+ kev_msg.kev_subclass = event->kev_subclass;
+ kev_msg.event_code = event->event_code;
kev_msg.dv[0].data_ptr = &event->event_data[0];
kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
kev_msg.dv[1].data_length = 0;
- result = dlil_event_internal(ifp, &kev_msg);
+ result = dlil_event_internal(ifp, &kev_msg, TRUE);
- return (result);
+ return result;
}
#if CONFIG_MACF_NET
switch (family) {
case PF_INET:
m = m_pullup(*mp, sizeof(struct ip));
- if (m == NULL)
+ if (m == NULL) {
break;
+ }
*mp = m;
ip = mtod(m, struct ip *);
- if (ip->ip_p == IPPROTO_TCP)
+ if (ip->ip_p == IPPROTO_TCP) {
type = SOCK_STREAM;
- else if (ip->ip_p == IPPROTO_UDP)
+ } else if (ip->ip_p == IPPROTO_UDP) {
type = SOCK_DGRAM;
+ }
break;
case PF_INET6:
m = m_pullup(*mp, sizeof(struct ip6_hdr));
- if (m == NULL)
+ if (m == NULL) {
break;
+ }
*mp = m;
ip6 = mtod(m, struct ip6_hdr *);
- if (ip6->ip6_nxt == IPPROTO_TCP)
+ if (ip6->ip6_nxt == IPPROTO_TCP) {
type = SOCK_STREAM;
- else if (ip6->ip6_nxt == IPPROTO_UDP)
+ } else if (ip6->ip6_nxt == IPPROTO_UDP) {
type = SOCK_DGRAM;
+ }
break;
}
}
- return (type);
+ return type;
}
#endif
-/*
- * This is mostly called from the context of the DLIL input thread;
- * because of that there is no need for atomic operations.
- */
-static __inline void
-ifp_inc_traffic_class_in(struct ifnet *ifp, struct mbuf *m)
+static void
+dlil_count_chain_len(mbuf_t m, struct chain_len_stats *cls)
{
- if (!(m->m_flags & M_PKTHDR))
- return;
-
- switch (m_get_traffic_class(m)) {
- case MBUF_TC_BE:
- ifp->if_tc.ifi_ibepackets++;
- ifp->if_tc.ifi_ibebytes += m->m_pkthdr.len;
- break;
- case MBUF_TC_BK:
- ifp->if_tc.ifi_ibkpackets++;
- ifp->if_tc.ifi_ibkbytes += m->m_pkthdr.len;
- break;
- case MBUF_TC_VI:
- ifp->if_tc.ifi_ivipackets++;
- ifp->if_tc.ifi_ivibytes += m->m_pkthdr.len;
- break;
- case MBUF_TC_VO:
- ifp->if_tc.ifi_ivopackets++;
- ifp->if_tc.ifi_ivobytes += m->m_pkthdr.len;
- break;
- default:
- break;
- }
+ mbuf_t n = m;
+ int chainlen = 0;
- if (mbuf_is_traffic_class_privileged(m)) {
- ifp->if_tc.ifi_ipvpackets++;
- ifp->if_tc.ifi_ipvbytes += m->m_pkthdr.len;
+ while (n != NULL) {
+ chainlen++;
+ n = n->m_next;
}
-}
-
-/*
- * This is called from DLIL output, hence multiple threads could end
- * up modifying the statistics. We trade off acccuracy for performance
- * by not using atomic operations here.
- */
-static __inline void
-ifp_inc_traffic_class_out(struct ifnet *ifp, struct mbuf *m)
-{
- if (!(m->m_flags & M_PKTHDR))
- return;
-
- switch (m_get_traffic_class(m)) {
- case MBUF_TC_BE:
- ifp->if_tc.ifi_obepackets++;
- ifp->if_tc.ifi_obebytes += m->m_pkthdr.len;
+ switch (chainlen) {
+ case 0:
+ break;
+ case 1:
+ atomic_add_64(&cls->cls_one, 1);
break;
- case MBUF_TC_BK:
- ifp->if_tc.ifi_obkpackets++;
- ifp->if_tc.ifi_obkbytes += m->m_pkthdr.len;
+ case 2:
+ atomic_add_64(&cls->cls_two, 1);
break;
- case MBUF_TC_VI:
- ifp->if_tc.ifi_ovipackets++;
- ifp->if_tc.ifi_ovibytes += m->m_pkthdr.len;
+ case 3:
+ atomic_add_64(&cls->cls_three, 1);
break;
- case MBUF_TC_VO:
- ifp->if_tc.ifi_ovopackets++;
- ifp->if_tc.ifi_ovobytes += m->m_pkthdr.len;
+ case 4:
+ atomic_add_64(&cls->cls_four, 1);
break;
+ case 5:
default:
+ atomic_add_64(&cls->cls_five_or_more, 1);
break;
}
-
- if (mbuf_is_traffic_class_privileged(m)) {
- ifp->if_tc.ifi_opvpackets++;
- ifp->if_tc.ifi_opvbytes += m->m_pkthdr.len;
- }
}
/*
int retval = 0;
char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
char dst_linkaddr_buffer[MAX_LINKADDR * 4];
- struct if_proto *proto = NULL;
- mbuf_t m;
- mbuf_t send_head = NULL;
- mbuf_t *send_tail = &send_head;
+ struct if_proto *proto = NULL;
+ mbuf_t m = NULL;
+ mbuf_t send_head = NULL;
+ mbuf_t *send_tail = &send_head;
int iorefcnt = 0;
u_int32_t pre = 0, post = 0;
u_int32_t fpkts = 0, fbytes = 0;
int32_t flen = 0;
+ struct timespec now;
+ u_int64_t now_nsec;
+ boolean_t did_clat46 = FALSE;
+ protocol_family_t old_proto_family = proto_family;
+ struct sockaddr_in6 dest6;
+ struct rtentry *rt = NULL;
+ u_int32_t m_loop_set = 0;
KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
- /* Get an io refcnt if the interface is attached to prevent ifnet_detach
- * from happening while this operation is in progress */
- if (!ifnet_is_attached(ifp, 1)) {
+ /*
+ * Get an io refcnt if the interface is attached to prevent ifnet_detach
+ * from happening while this operation is in progress
+ */
+ if (!ifnet_datamov_begin(ifp)) {
retval = ENXIO;
goto cleanup;
}
iorefcnt = 1;
+ VERIFY(ifp->if_output_dlil != NULL);
+
/* update the driver's multicast filter, if needed */
- if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
+ if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) {
ifp->if_updatemcasts = 0;
+ }
frame_type = frame_type_buffer;
dst_linkaddr = dst_linkaddr_buffer;
}
preout_again:
- if (packetlist == NULL)
+ if (packetlist == NULL) {
goto cleanup;
+ }
m = packetlist;
packetlist = packetlist->m_nextpkt;
m->m_nextpkt = NULL;
+ /*
+ * Perform address family translation for the first
+ * packet outside the loop in order to perform address
+ * lookup for the translated proto family.
+ */
+ if (proto_family == PF_INET && IS_INTF_CLAT46(ifp) &&
+ (ifp->if_type == IFT_CELLULAR ||
+ dlil_is_clat_needed(proto_family, m))) {
+ retval = dlil_clat46(ifp, &proto_family, &m);
+ /*
+ * Go to the next packet if translation fails
+ */
+ if (retval != 0) {
+ m_freem(m);
+ m = NULL;
+ ip6stat.ip6s_clat464_out_drop++;
+ /* Make sure that the proto family is PF_INET */
+ ASSERT(proto_family == PF_INET);
+ goto preout_again;
+ }
+ /*
+ * Free the old one and make it point to the IPv6 proto structure.
+ *
+ * Change proto for the first time we have successfully
+ * performed address family translation.
+ */
+ if (!did_clat46 && proto_family == PF_INET6) {
+ did_clat46 = TRUE;
+
+ if (proto != NULL) {
+ if_proto_free(proto);
+ }
+ ifnet_lock_shared(ifp);
+ /* callee holds a proto refcnt upon success */
+ proto = find_attached_proto(ifp, proto_family);
+ if (proto == NULL) {
+ ifnet_lock_done(ifp);
+ retval = ENXIO;
+ m_freem(m);
+ m = NULL;
+ goto cleanup;
+ }
+ ifnet_lock_done(ifp);
+ if (ifp->if_type == IFT_ETHER) {
+ /* Update the dest to translated v6 address */
+ dest6.sin6_len = sizeof(struct sockaddr_in6);
+ dest6.sin6_family = AF_INET6;
+ dest6.sin6_addr = (mtod(m, struct ip6_hdr *))->ip6_dst;
+ dest = (const struct sockaddr *)&dest6;
+
+ /*
+ * Lookup route to the translated destination
+ * Free this route ref during cleanup
+ */
+ rt = rtalloc1_scoped((struct sockaddr *)&dest6,
+ 0, 0, ifp->if_index);
+
+ route = rt;
+ }
+ }
+ }
+
+ /*
+ * This path gets packet chain going to the same destination.
+ * The pre output routine is used to either trigger resolution of
+ * the next hop or retreive the next hop's link layer addressing.
+ * For ex: ether_inet(6)_pre_output routine.
+ *
+ * If the routine returns EJUSTRETURN, it implies that packet has
+ * been queued, and therefore we have to call preout_again for the
+ * following packet in the chain.
+ *
+ * For errors other than EJUSTRETURN, the current packet is freed
+ * and the rest of the chain (pointed by packetlist is freed as
+ * part of clean up.
+ *
+ * Else if there is no error the retrieved information is used for
+ * all the packets in the chain.
+ */
if (raw == 0) {
proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ?
proto->kpi.v1.pre_output : proto->kpi.v2.pre_output);
frame_type, dst_linkaddr);
if (retval != 0) {
- if (retval == EJUSTRETURN)
+ if (retval == EJUSTRETURN) {
goto preout_again;
+ }
m_freem(m);
+ m = NULL;
goto cleanup;
}
}
#endif
do {
+ /*
+ * Perform address family translation if needed.
+ * For now we only support stateless 4 to 6 translation
+ * on the out path.
+ *
+ * The routine below translates IP header, updates protocol
+ * checksum and also translates ICMP.
+ *
+ * We skip the first packet as it is already translated and
+ * the proto family is set to PF_INET6.
+ */
+ if (proto_family == PF_INET && IS_INTF_CLAT46(ifp) &&
+ (ifp->if_type == IFT_CELLULAR ||
+ dlil_is_clat_needed(proto_family, m))) {
+ retval = dlil_clat46(ifp, &proto_family, &m);
+ /* Goto the next packet if the translation fails */
+ if (retval != 0) {
+ m_freem(m);
+ m = NULL;
+ ip6stat.ip6s_clat464_out_drop++;
+ goto next;
+ }
+ }
+
#if CONFIG_DTRACE
if (!raw && proto_family == PF_INET) {
- struct ip *ip = mtod(m, struct ip*);
- DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
- struct ip *, ip, struct ifnet *, ifp,
- struct ip *, ip, struct ip6_hdr *, NULL);
-
+ struct ip *ip = mtod(m, struct ip *);
+ DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
+ struct ip *, ip, struct ifnet *, ifp,
+ struct ip *, ip, struct ip6_hdr *, NULL);
} else if (!raw && proto_family == PF_INET6) {
- struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr*);
- DTRACE_IP6(send, struct mbuf*, m, struct inpcb *, NULL,
- struct ip6_hdr *, ip6, struct ifnet*, ifp,
- struct ip*, NULL, struct ip6_hdr *, ip6);
+ struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
+ DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
+ struct ip6_hdr *, ip6, struct ifnet *, ifp,
+ struct ip *, NULL, struct ip6_hdr *, ip6);
}
#endif /* CONFIG_DTRACE */
m->m_pkthdr.rcvif = ifp;
rcvif_set = 1;
}
-
+ m_loop_set = m->m_flags & M_LOOP;
retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
frame_type, &pre, &post);
if (retval != 0) {
- if (retval != EJUSTRETURN)
+ if (retval != EJUSTRETURN) {
m_freem(m);
+ }
goto next;
}
m->m_pkthdr.csum_tx_start += pre;
}
- if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK))
+ if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK)) {
dlil_output_cksum_dbg(ifp, m, pre,
proto_family);
+ }
/*
* Clear the ifp if it was set above, and to be
* are clearing the one that will go down to the
* layer below.
*/
- if (rcvif_set && m->m_pkthdr.rcvif == ifp)
+ if (rcvif_set && m->m_pkthdr.rcvif == ifp) {
m->m_pkthdr.rcvif = NULL;
+ }
}
/*
* Let interface filters (if any) do their thing ...
*/
- /* Do not pass VLAN tagged packets to filters PR-3586856 */
- if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
- retval = dlil_interface_filters_output(ifp,
- &m, proto_family);
- if (retval != 0) {
- if (retval != EJUSTRETURN)
- m_freem(m);
- goto next;
+ retval = dlil_interface_filters_output(ifp, &m, proto_family);
+ if (retval != 0) {
+ if (retval != EJUSTRETURN) {
+ m_freem(m);
}
+ goto next;
}
/*
* Strip away M_PROTO1 bit prior to sending packet
* not cross page(s), the following is a no-op.
*/
if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) {
- if ((m = m_normalize(m)) == NULL)
+ if ((m = m_normalize(m)) == NULL) {
goto next;
+ }
}
/*
goto cleanup;
}
+ ifp_inc_traffic_class_out(ifp, m);
+ pktap_output(ifp, proto_family, m, pre, post);
+
/*
- * If the packet service class is not background,
- * update the timestamp to indicate recent activity
- * on a foreground socket.
+ * Count the number of elements in the mbuf chain
*/
- if (!(m->m_pkthdr.pkt_flags & PKTF_SO_BACKGROUND) &&
- (m->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
- m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB)
- ifp->if_fg_sendts = net_uptime();
+ if (tx_chain_len_count) {
+ dlil_count_chain_len(m, &tx_chain_len_stats);
+ }
- ifp_inc_traffic_class_out(ifp, m);
- pktap_output(ifp, proto_family, m, pre, post);
+ /*
+ * Record timestamp; ifnet_enqueue() will use this info
+ * rather than redoing the work. An optimization could
+ * involve doing this just once at the top, if there are
+ * no interface filters attached, but that's probably
+ * not a big deal.
+ */
+ nanouptime(&now);
+ net_timernsec(&now, &now_nsec);
+ (void) mbuf_set_timestamp(m, now_nsec, TRUE);
+
+ /*
+ * Discard partial sum information if this packet originated
+ * from another interface; the packet would already have the
+ * final checksum and we shouldn't recompute it.
+ */
+ if ((m->m_pkthdr.pkt_flags & PKTF_FORWARDED) &&
+ (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
+ (CSUM_DATA_VALID | CSUM_PARTIAL)) {
+ m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
+ m->m_pkthdr.csum_data = 0;
+ }
/*
* Finally, call the driver.
*/
- if (ifp->if_eflags & IFEF_SENDLIST) {
+ if (ifp->if_eflags & (IFEF_SENDLIST | IFEF_ENQUEUE_MULTI)) {
if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
flen += (m_pktlen(m) - (pre + post));
m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
}
KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
0, 0, 0, 0, 0);
- retval = (*ifp->if_output)(ifp, m);
+ retval = (*ifp->if_output_dlil)(ifp, m);
if (retval == EQFULL || retval == EQSUSPENDED) {
if (adv != NULL && adv->code == FADV_SUCCESS) {
adv->code = (retval == EQFULL ?
fpkts++;
}
if (retval != 0 && dlil_verbose) {
- printf("%s: output error on %s retval = %d\n",
+ DLIL_PRINTF("%s: output error on %s retval = %d\n",
__func__, if_name(ifp),
retval);
}
KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END,
0, 0, 0, 0, 0);
}
- KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
+ KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
+
+next:
+ m = packetlist;
+ if (m != NULL) {
+ m->m_flags |= m_loop_set;
+ packetlist = packetlist->m_nextpkt;
+ m->m_nextpkt = NULL;
+ }
+ /* Reset the proto family to old proto family for CLAT */
+ if (did_clat46) {
+ proto_family = old_proto_family;
+ }
+ } while (m != NULL);
+
+ if (send_head != NULL) {
+ KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
+ 0, 0, 0, 0, 0);
+ if (ifp->if_eflags & IFEF_SENDLIST) {
+ retval = (*ifp->if_output_dlil)(ifp, send_head);
+ if (retval == EQFULL || retval == EQSUSPENDED) {
+ if (adv != NULL) {
+ adv->code = (retval == EQFULL ?
+ FADV_FLOW_CONTROLLED :
+ FADV_SUSPENDED);
+ }
+ retval = 0;
+ }
+ if (retval == 0 && flen > 0) {
+ fbytes += flen;
+ fpkts++;
+ }
+ if (retval != 0 && dlil_verbose) {
+ DLIL_PRINTF("%s: output error on %s retval = %d\n",
+ __func__, if_name(ifp), retval);
+ }
+ } else {
+ struct mbuf *send_m;
+ int enq_cnt = 0;
+ VERIFY(ifp->if_eflags & IFEF_ENQUEUE_MULTI);
+ while (send_head != NULL) {
+ send_m = send_head;
+ send_head = send_m->m_nextpkt;
+ send_m->m_nextpkt = NULL;
+ retval = (*ifp->if_output_dlil)(ifp, send_m);
+ if (retval == EQFULL || retval == EQSUSPENDED) {
+ if (adv != NULL) {
+ adv->code = (retval == EQFULL ?
+ FADV_FLOW_CONTROLLED :
+ FADV_SUSPENDED);
+ }
+ retval = 0;
+ }
+ if (retval == 0) {
+ enq_cnt++;
+ if (flen > 0) {
+ fpkts++;
+ }
+ }
+ if (retval != 0 && dlil_verbose) {
+ DLIL_PRINTF("%s: output error on %s "
+ "retval = %d\n",
+ __func__, if_name(ifp), retval);
+ }
+ }
+ if (enq_cnt > 0) {
+ fbytes += flen;
+ ifnet_start(ifp);
+ }
+ }
+ KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
+ }
+
+ KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
+
+cleanup:
+ if (fbytes > 0) {
+ ifp->if_fbytes += fbytes;
+ }
+ if (fpkts > 0) {
+ ifp->if_fpackets += fpkts;
+ }
+ if (proto != NULL) {
+ if_proto_free(proto);
+ }
+ if (packetlist) { /* if any packets are left, clean up */
+ mbuf_freem_list(packetlist);
+ }
+ if (retval == EJUSTRETURN) {
+ retval = 0;
+ }
+ if (iorefcnt == 1) {
+ ifnet_datamov_end(ifp);
+ }
+ if (rt != NULL) {
+ rtfree(rt);
+ rt = NULL;
+ }
+
+ return retval;
+}
+
+/*
+ * This routine checks if the destination address is not a loopback, link-local,
+ * multicast or broadcast address.
+ */
+static int
+dlil_is_clat_needed(protocol_family_t proto_family, mbuf_t m)
+{
+ int ret = 0;
+ switch (proto_family) {
+ case PF_INET: {
+ struct ip *iph = mtod(m, struct ip *);
+ if (CLAT46_NEEDED(ntohl(iph->ip_dst.s_addr))) {
+ ret = 1;
+ }
+ break;
+ }
+ case PF_INET6: {
+ struct ip6_hdr *ip6h = mtod(m, struct ip6_hdr *);
+ if ((size_t)m_pktlen(m) >= sizeof(struct ip6_hdr) &&
+ CLAT64_NEEDED(&ip6h->ip6_dst)) {
+ ret = 1;
+ }
+ break;
+ }
+ }
+
+ return ret;
+}
+/*
+ * @brief This routine translates IPv4 packet to IPv6 packet,
+ * updates protocol checksum and also translates ICMP for code
+ * along with inner header translation.
+ *
+ * @param ifp Pointer to the interface
+ * @param proto_family pointer to protocol family. It is updated if function
+ * performs the translation successfully.
+ * @param m Pointer to the pointer pointing to the packet. Needed because this
+ * routine can end up changing the mbuf to a different one.
+ *
+ * @return 0 on success or else a negative value.
+ */
+static errno_t
+dlil_clat46(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
+{
+ VERIFY(*proto_family == PF_INET);
+ VERIFY(IS_INTF_CLAT46(ifp));
+
+ pbuf_t pbuf_store, *pbuf = NULL;
+ struct ip *iph = NULL;
+ struct in_addr osrc, odst;
+ uint8_t proto = 0;
+ struct in6_ifaddr *ia6_clat_src = NULL;
+ struct in6_addr *src = NULL;
+ struct in6_addr dst;
+ int error = 0;
+ uint32_t off = 0;
+ uint64_t tot_len = 0;
+ uint16_t ip_id_val = 0;
+ uint16_t ip_frag_off = 0;
+
+ boolean_t is_frag = FALSE;
+ boolean_t is_first_frag = TRUE;
+ boolean_t is_last_frag = TRUE;
+
+ pbuf_init_mbuf(&pbuf_store, *m, ifp);
+ pbuf = &pbuf_store;
+ iph = pbuf->pb_data;
+
+ osrc = iph->ip_src;
+ odst = iph->ip_dst;
+ proto = iph->ip_p;
+ off = iph->ip_hl << 2;
+ ip_id_val = iph->ip_id;
+ ip_frag_off = ntohs(iph->ip_off) & IP_OFFMASK;
+
+ tot_len = ntohs(iph->ip_len);
+
+ /*
+ * For packets that are not first frags
+ * we only need to adjust CSUM.
+ * For 4 to 6, Fragmentation header gets appended
+ * after proto translation.
+ */
+ if (ntohs(iph->ip_off) & ~(IP_DF | IP_RF)) {
+ is_frag = TRUE;
+
+ /* If the offset is not zero, it is not first frag */
+ if (ip_frag_off != 0) {
+ is_first_frag = FALSE;
+ }
+
+ /* If IP_MF is set, then it is not last frag */
+ if (ntohs(iph->ip_off) & IP_MF) {
+ is_last_frag = FALSE;
+ }
+ }
+
+ /*
+ * Retrive the local IPv6 CLAT46 address reserved for stateless
+ * translation.
+ */
+ ia6_clat_src = in6ifa_ifpwithflag(ifp, IN6_IFF_CLAT46);
+ if (ia6_clat_src == NULL) {
+ ip6stat.ip6s_clat464_out_nov6addr_drop++;
+ error = -1;
+ goto cleanup;
+ }
+
+ src = &ia6_clat_src->ia_addr.sin6_addr;
+
+ /*
+ * Translate IPv4 destination to IPv6 destination by using the
+ * prefixes learned through prior PLAT discovery.
+ */
+ if ((error = nat464_synthesize_ipv6(ifp, &odst, &dst)) != 0) {
+ ip6stat.ip6s_clat464_out_v6synthfail_drop++;
+ goto cleanup;
+ }
+
+ /* Translate the IP header part first */
+ error = (nat464_translate_46(pbuf, off, iph->ip_tos, iph->ip_p,
+ iph->ip_ttl, *src, dst, tot_len) == NT_NAT64) ? 0 : -1;
+
+ iph = NULL; /* Invalidate iph as pbuf has been modified */
+
+ if (error != 0) {
+ ip6stat.ip6s_clat464_out_46transfail_drop++;
+ goto cleanup;
+ }
+
+ /*
+ * Translate protocol header, update checksum, checksum flags
+ * and related fields.
+ */
+ error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc, (struct nat464_addr *)&odst,
+ proto, PF_INET, PF_INET6, NT_OUT, !is_first_frag) == NT_NAT64) ? 0 : -1;
+
+ if (error != 0) {
+ ip6stat.ip6s_clat464_out_46proto_transfail_drop++;
+ goto cleanup;
+ }
+
+ /* Now insert the IPv6 fragment header */
+ if (is_frag) {
+ error = nat464_insert_frag46(pbuf, ip_id_val, ip_frag_off, is_last_frag);
+
+ if (error != 0) {
+ ip6stat.ip6s_clat464_out_46frag_transfail_drop++;
+ goto cleanup;
+ }
+ }
+
+cleanup:
+ if (ia6_clat_src != NULL) {
+ IFA_REMREF(&ia6_clat_src->ia_ifa);
+ }
+
+ if (pbuf_is_valid(pbuf)) {
+ *m = pbuf->pb_mbuf;
+ pbuf->pb_mbuf = NULL;
+ pbuf_destroy(pbuf);
+ } else {
+ error = -1;
+ ip6stat.ip6s_clat464_out_invalpbuf_drop++;
+ }
+
+ if (error == 0) {
+ *proto_family = PF_INET6;
+ ip6stat.ip6s_clat464_out_success++;
+ }
+
+ return error;
+}
+
+/*
+ * @brief This routine translates incoming IPv6 to IPv4 packet,
+ * updates protocol checksum and also translates ICMPv6 outer
+ * and inner headers
+ *
+ * @return 0 on success or else a negative value.
+ */
+static errno_t
+dlil_clat64(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
+{
+ VERIFY(*proto_family == PF_INET6);
+ VERIFY(IS_INTF_CLAT46(ifp));
+
+ struct ip6_hdr *ip6h = NULL;
+ struct in6_addr osrc, odst;
+ uint8_t proto = 0;
+ struct in6_ifaddr *ia6_clat_dst = NULL;
+ struct in_ifaddr *ia4_clat_dst = NULL;
+ struct in_addr *dst = NULL;
+ struct in_addr src;
+ int error = 0;
+ uint32_t off = 0;
+ u_int64_t tot_len = 0;
+ uint8_t tos = 0;
+ boolean_t is_first_frag = TRUE;
+
+ /* Incoming mbuf does not contain valid IP6 header */
+ if ((size_t)(*m)->m_pkthdr.len < sizeof(struct ip6_hdr) ||
+ ((size_t)(*m)->m_len < sizeof(struct ip6_hdr) &&
+ (*m = m_pullup(*m, sizeof(struct ip6_hdr))) == NULL)) {
+ ip6stat.ip6s_clat464_in_tooshort_drop++;
+ return -1;
+ }
+
+ ip6h = mtod(*m, struct ip6_hdr *);
+ /* Validate that mbuf contains IP payload equal to ip6_plen */
+ if ((size_t)(*m)->m_pkthdr.len < ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr)) {
+ ip6stat.ip6s_clat464_in_tooshort_drop++;
+ return -1;
+ }
+
+ osrc = ip6h->ip6_src;
+ odst = ip6h->ip6_dst;
+
+ /*
+ * Retrieve the local CLAT46 reserved IPv6 address.
+ * Let the packet pass if we don't find one, as the flag
+ * may get set before IPv6 configuration has taken place.
+ */
+ ia6_clat_dst = in6ifa_ifpwithflag(ifp, IN6_IFF_CLAT46);
+ if (ia6_clat_dst == NULL) {
+ goto done;
+ }
+
+ /*
+ * Check if the original dest in the packet is same as the reserved
+ * CLAT46 IPv6 address
+ */
+ if (IN6_ARE_ADDR_EQUAL(&odst, &ia6_clat_dst->ia_addr.sin6_addr)) {
+ pbuf_t pbuf_store, *pbuf = NULL;
+ pbuf_init_mbuf(&pbuf_store, *m, ifp);
+ pbuf = &pbuf_store;
+
+ /*
+ * Retrive the local CLAT46 IPv4 address reserved for stateless
+ * translation.
+ */
+ ia4_clat_dst = inifa_ifpclatv4(ifp);
+ if (ia4_clat_dst == NULL) {
+ IFA_REMREF(&ia6_clat_dst->ia_ifa);
+ ip6stat.ip6s_clat464_in_nov4addr_drop++;
+ error = -1;
+ goto cleanup;
+ }
+ IFA_REMREF(&ia6_clat_dst->ia_ifa);
-next:
- m = packetlist;
- if (m != NULL) {
- packetlist = packetlist->m_nextpkt;
- m->m_nextpkt = NULL;
+ /* Translate IPv6 src to IPv4 src by removing the NAT64 prefix */
+ dst = &ia4_clat_dst->ia_addr.sin_addr;
+ if ((error = nat464_synthesize_ipv4(ifp, &osrc, &src)) != 0) {
+ ip6stat.ip6s_clat464_in_v4synthfail_drop++;
+ error = -1;
+ goto cleanup;
}
- } while (m != NULL);
- if (send_head != NULL) {
- VERIFY(ifp->if_eflags & IFEF_SENDLIST);
- KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
- 0, 0, 0, 0, 0);
- retval = (*ifp->if_output)(ifp, send_head);
- if (retval == EQFULL || retval == EQSUSPENDED) {
- if (adv != NULL) {
- adv->code = (retval == EQFULL ?
- FADV_FLOW_CONTROLLED : FADV_SUSPENDED);
- }
- retval = 0;
+ ip6h = pbuf->pb_data;
+ off = sizeof(struct ip6_hdr);
+ proto = ip6h->ip6_nxt;
+ tos = (ntohl(ip6h->ip6_flow) >> 20) & 0xff;
+ tot_len = ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr);
+
+ /*
+ * Translate the IP header and update the fragmentation
+ * header if needed
+ */
+ error = (nat464_translate_64(pbuf, off, tos, &proto,
+ ip6h->ip6_hlim, src, *dst, tot_len, &is_first_frag) == NT_NAT64) ?
+ 0 : -1;
+
+ ip6h = NULL; /* Invalidate ip6h as pbuf has been changed */
+
+ if (error != 0) {
+ ip6stat.ip6s_clat464_in_64transfail_drop++;
+ goto cleanup;
}
- if (retval == 0 && flen > 0) {
- fbytes += flen;
- fpkts++;
+
+ /*
+ * Translate protocol header, update checksum, checksum flags
+ * and related fields.
+ */
+ error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc,
+ (struct nat464_addr *)&odst, proto, PF_INET6, PF_INET,
+ NT_IN, !is_first_frag) == NT_NAT64) ? 0 : -1;
+
+ if (error != 0) {
+ ip6stat.ip6s_clat464_in_64proto_transfail_drop++;
+ goto cleanup;
}
- if (retval != 0 && dlil_verbose) {
- printf("%s: output error on %s retval = %d\n",
- __func__, if_name(ifp), retval);
+
+cleanup:
+ if (ia4_clat_dst != NULL) {
+ IFA_REMREF(&ia4_clat_dst->ia_ifa);
}
- KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
- }
- KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
+ if (pbuf_is_valid(pbuf)) {
+ *m = pbuf->pb_mbuf;
+ pbuf->pb_mbuf = NULL;
+ pbuf_destroy(pbuf);
+ } else {
+ error = -1;
+ ip6stat.ip6s_clat464_in_invalpbuf_drop++;
+ }
-cleanup:
- if (fbytes > 0)
- ifp->if_fbytes += fbytes;
- if (fpkts > 0)
- ifp->if_fpackets += fpkts;
- if (proto != NULL)
- if_proto_free(proto);
- if (packetlist) /* if any packets are left, clean up */
- mbuf_freem_list(packetlist);
- if (retval == EJUSTRETURN)
- retval = 0;
- if (iorefcnt == 1)
- ifnet_decr_iorefcnt(ifp);
+ if (error == 0) {
+ *proto_family = PF_INET;
+ ip6stat.ip6s_clat464_in_success++;
+ }
+ } /* CLAT traffic */
- return (retval);
+done:
+ return error;
}
errno_t
int retval = EOPNOTSUPP;
int result = 0;
- if (ifp == NULL || ioctl_code == 0)
- return (EINVAL);
+ if (ifp == NULL || ioctl_code == 0) {
+ return EINVAL;
+ }
/* Get an io ref count if the interface is attached */
- if (!ifnet_is_attached(ifp, 1))
- return (EOPNOTSUPP);
+ if (!ifnet_is_attached(ifp, 1)) {
+ return EOPNOTSUPP;
+ }
- /* Run the interface filters first.
+ /*
+ * Run the interface filters first.
* We want to run all filters before calling the protocol,
* interface family, or interface.
*/
/* Only update retval if no one has handled the ioctl */
if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
- if (result == ENOTSUP)
+ if (result == ENOTSUP) {
result = EOPNOTSUPP;
+ }
retval = result;
if (retval != 0 && retval != EOPNOTSUPP) {
/* we're done with the filter list */
/* Allow the protocol to handle the ioctl */
if (proto_fam != 0) {
- struct if_proto *proto;
+ struct if_proto *proto;
/* callee holds a proto refcnt upon success */
ifnet_lock_shared(ifp);
(proto->proto_kpi == kProtoKPI_v1 ?
proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
result = EOPNOTSUPP;
- if (ioctlp != NULL)
+ if (ioctlp != NULL) {
result = ioctlp(ifp, proto_fam, ioctl_code,
ioctl_arg);
+ }
if_proto_free(proto);
/* Only update retval if no one has handled the ioctl */
if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
- if (result == ENOTSUP)
+ if (result == ENOTSUP) {
result = EOPNOTSUPP;
+ }
retval = result;
- if (retval && retval != EOPNOTSUPP)
+ if (retval && retval != EOPNOTSUPP) {
goto cleanup;
+ }
}
}
}
* If it returns EOPNOTSUPP, ignore that, we may have
* already handled this in the protocol or family.
*/
- if (ifp->if_ioctl)
+ if (ifp->if_ioctl) {
result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
+ }
/* Only update retval if no one has handled the ioctl */
if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
- if (result == ENOTSUP)
+ if (result == ENOTSUP) {
result = EOPNOTSUPP;
+ }
retval = result;
if (retval && retval != EOPNOTSUPP) {
goto cleanup;
}
cleanup:
- if (retval == EJUSTRETURN)
+ if (retval == EJUSTRETURN) {
retval = 0;
+ }
ifnet_decr_iorefcnt(ifp);
- return (retval);
+ return retval;
}
__private_extern__ errno_t
dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
{
- errno_t error = 0;
+ errno_t error = 0;
if (ifp->if_set_bpf_tap) {
/* Get an io reference on the interface if it is attached */
- if (!ifnet_is_attached(ifp, 1))
+ if (!ifnet_is_attached(ifp, 1)) {
return ENXIO;
+ }
error = ifp->if_set_bpf_tap(ifp, mode, callback);
ifnet_decr_iorefcnt(ifp);
}
- return (error);
+ return error;
}
errno_t
dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
struct sockaddr *ll_addr, size_t ll_len)
{
- errno_t result = EOPNOTSUPP;
+ errno_t result = EOPNOTSUPP;
struct if_proto *proto;
const struct sockaddr *verify;
proto_media_resolve_multi resolvep;
- if (!ifnet_is_attached(ifp, 1))
+ if (!ifnet_is_attached(ifp, 1)) {
return result;
+ }
bzero(ll_addr, ll_len);
if (proto != NULL) {
resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
- if (resolvep != NULL)
+ if (resolvep != NULL) {
result = resolvep(ifp, proto_addr,
- (struct sockaddr_dl*)(void *)ll_addr, ll_len);
+ (struct sockaddr_dl *)(void *)ll_addr, ll_len);
+ }
if_proto_free(proto);
}
/* Let the interface verify the multicast address */
if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
- if (result == 0)
+ if (result == 0) {
verify = ll_addr;
- else
+ } else {
verify = proto_addr;
+ }
result = ifp->if_check_multi(ifp, verify);
}
ifnet_decr_iorefcnt(ifp);
- return (result);
+ return result;
}
__private_extern__ errno_t
dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
- const struct sockaddr_dl* sender_hw, const struct sockaddr* sender_proto,
- const struct sockaddr_dl* target_hw, const struct sockaddr* target_proto)
+ const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
+ const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
{
struct if_proto *proto;
- errno_t result = 0;
+ errno_t result = 0;
/* callee holds a proto refcnt upon success */
ifnet_lock_shared(ifp);
if (proto == NULL) {
result = ENOTSUP;
} else {
- proto_media_send_arp arpp;
+ proto_media_send_arp arpp;
arpp = (proto->proto_kpi == kProtoKPI_v1 ?
proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
if (arpp == NULL) {
switch (arpop) {
case ARPOP_REQUEST:
arpstat.txrequests++;
- if (target_hw != NULL)
+ if (target_hw != NULL) {
arpstat.txurequests++;
+ }
break;
case ARPOP_REPLY:
arpstat.txreplies++;
if_proto_free(proto);
}
- return (result);
+ return result;
}
struct net_thread_marks { };
struct uthread *uth = get_bsdthread_info(current_thread());
pop = push & ~uth->uu_network_marks;
- if (pop != 0)
+ if (pop != 0) {
uth->uu_network_marks |= pop;
+ }
}
- return ((net_thread_marks_t)&base[pop]);
+ return (net_thread_marks_t)&base[pop];
}
__private_extern__ net_thread_marks_t
struct uthread *uth = get_bsdthread_info(current_thread());
unpop = unpush & uth->uu_network_marks;
- if (unpop != 0)
+ if (unpop != 0) {
uth->uu_network_marks &= ~unpop;
+ }
}
- return ((net_thread_marks_t)&base[unpop]);
+ return (net_thread_marks_t)&base[unpop];
}
__private_extern__ void
net_thread_marks_pop(net_thread_marks_t popx)
{
static const char *const base = (const void*)&net_thread_marks_base;
- ptrdiff_t pop = (caddr_t)popx - (caddr_t)base;
+ const ptrdiff_t pop = (const char *)popx - (const char *)base;
if (pop != 0) {
static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
net_thread_unmarks_pop(net_thread_marks_t unpopx)
{
static const char *const base = (const void*)&net_thread_marks_base;
- ptrdiff_t unpop = (caddr_t)unpopx - (caddr_t)base;
+ ptrdiff_t unpop = (const char *)unpopx - (const char *)base;
if (unpop != 0) {
static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
{
if (check != 0) {
struct uthread *uth = get_bsdthread_info(current_thread());
- return (uth->uu_network_marks & check);
+ return uth->uu_network_marks & check;
+ } else {
+ return 0;
}
- else
- return (0);
}
__private_extern__ u_int32_t
{
if (check != 0) {
struct uthread *uth = get_bsdthread_info(current_thread());
- return (~uth->uu_network_marks & check);
+ return ~uth->uu_network_marks & check;
+ } else {
+ return 0;
}
- else
- return (0);
}
static __inline__ int
_is_announcement(const struct sockaddr_in * sender_sin,
const struct sockaddr_in * target_sin)
{
- if (sender_sin == NULL) {
- return (FALSE);
+ if (target_sin == NULL || sender_sin == NULL) {
+ return FALSE;
}
- return (sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr);
+
+ return sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr;
}
__private_extern__ errno_t
-dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl* sender_hw,
- const struct sockaddr* sender_proto, const struct sockaddr_dl* target_hw,
- const struct sockaddr* target_proto0, u_int32_t rtflags)
+dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
+ const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw,
+ const struct sockaddr *target_proto0, u_int32_t rtflags)
{
- errno_t result = 0;
+ errno_t result = 0;
const struct sockaddr_in * sender_sin;
const struct sockaddr_in * target_sin;
struct sockaddr_inarp target_proto_sinarp;
struct sockaddr *target_proto = (void *)(uintptr_t)target_proto0;
- if (target_proto == NULL || (sender_proto != NULL &&
- sender_proto->sa_family != target_proto->sa_family))
- return (EINVAL);
+ if (target_proto == NULL || sender_proto == NULL) {
+ return EINVAL;
+ }
+
+ if (sender_proto->sa_family != target_proto->sa_family) {
+ return EINVAL;
+ }
/*
* If the target is a (default) router, provide that
*/
if (rtflags & RTF_ROUTER) {
bcopy(target_proto, &target_proto_sinarp,
- sizeof (struct sockaddr_in));
+ sizeof(struct sockaddr_in));
target_proto_sinarp.sin_other |= SIN_ROUTER;
target_proto = (struct sockaddr *)&target_proto_sinarp;
}
if (target_proto->sa_family == AF_INET &&
IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
- !_is_announcement(target_sin, sender_sin)) {
- ifnet_t *ifp_list;
- u_int32_t count;
- u_int32_t ifp_on;
+ !_is_announcement(sender_sin, target_sin)) {
+ ifnet_t *ifp_list;
+ u_int32_t count;
+ u_int32_t ifp_on;
result = ENOTSUP;
* ARPing. This may mean that we don't ARP on
* the interface the subnet route points to.
*/
- if (!(cur_ifp->if_eflags & IFEF_ARPLL))
+ if (!(cur_ifp->if_eflags & IFEF_ARPLL)) {
continue;
+ }
/* Find the source IP address */
ifnet_lock_shared(cur_ifp);
sender_proto, target_hw, target_proto);
}
- return (result);
+ return result;
}
/*
{
struct ifnet *_ifp;
- lck_rw_assert(&ifnet_head_lock, LCK_RW_ASSERT_HELD);
+ LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_HELD);
TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
- if (_ifp == ifp)
+ if (_ifp == ifp) {
break;
+ }
}
- return (_ifp != NULL);
+ return _ifp != NULL;
}
+
/*
* Caller has to pass a non-zero refio argument to get a
* IO reference count. This will prevent ifnet_detach from
- * being called when there are outstanding io reference counts.
+ * being called when there are outstanding io reference counts.
*/
int
ifnet_is_attached(struct ifnet *ifp, int refio)
int ret;
lck_mtx_lock_spin(&ifp->if_ref_lock);
- if ((ret = ((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) ==
- IFRF_ATTACHED))) {
- if (refio > 0)
+ if ((ret = IF_FULLY_ATTACHED(ifp))) {
+ if (refio > 0) {
ifp->if_refio++;
+ }
}
lck_mtx_unlock(&ifp->if_ref_lock);
- return (ret);
+ return ret;
}
void
-ifnet_decr_iorefcnt(struct ifnet *ifp)
+ifnet_incr_pending_thread_count(struct ifnet *ifp)
+{
+ lck_mtx_lock_spin(&ifp->if_ref_lock);
+ ifp->if_threads_pending++;
+ lck_mtx_unlock(&ifp->if_ref_lock);
+}
+
+void
+ifnet_decr_pending_thread_count(struct ifnet *ifp)
+{
+ lck_mtx_lock_spin(&ifp->if_ref_lock);
+ VERIFY(ifp->if_threads_pending > 0);
+ ifp->if_threads_pending--;
+ if (ifp->if_threads_pending == 0) {
+ wakeup(&ifp->if_threads_pending);
+ }
+ lck_mtx_unlock(&ifp->if_ref_lock);
+}
+
+/*
+ * Caller must ensure the interface is attached; the assumption is that
+ * there is at least an outstanding IO reference count held already.
+ * Most callers would call ifnet_is_{attached,data_ready}() instead.
+ */
+void
+ifnet_incr_iorefcnt(struct ifnet *ifp)
{
lck_mtx_lock_spin(&ifp->if_ref_lock);
+ VERIFY(IF_FULLY_ATTACHED(ifp));
+ VERIFY(ifp->if_refio > 0);
+ ifp->if_refio++;
+ lck_mtx_unlock(&ifp->if_ref_lock);
+}
+
+__attribute__((always_inline))
+static void
+ifnet_decr_iorefcnt_locked(struct ifnet *ifp)
+{
+ LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_MTX_ASSERT_OWNED);
+
VERIFY(ifp->if_refio > 0);
- VERIFY((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) != 0);
+ VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
+
ifp->if_refio--;
+ VERIFY(ifp->if_refio != 0 || ifp->if_datamov == 0);
- /* if there are no more outstanding io references, wakeup the
+ /*
+ * if there are no more outstanding io references, wakeup the
* ifnet_detach thread if detaching flag is set.
*/
- if (ifp->if_refio == 0 &&
- (ifp->if_refflags & IFRF_DETACHING) != 0) {
+ if (ifp->if_refio == 0 && (ifp->if_refflags & IFRF_DETACHING)) {
wakeup(&(ifp->if_refio));
}
+}
+
+void
+ifnet_decr_iorefcnt(struct ifnet *ifp)
+{
+ lck_mtx_lock_spin(&ifp->if_ref_lock);
+ ifnet_decr_iorefcnt_locked(ifp);
+ lck_mtx_unlock(&ifp->if_ref_lock);
+}
+
+boolean_t
+ifnet_datamov_begin(struct ifnet *ifp)
+{
+ boolean_t ret;
+
+ lck_mtx_lock_spin(&ifp->if_ref_lock);
+ if ((ret = IF_FULLY_ATTACHED_AND_READY(ifp))) {
+ ifp->if_refio++;
+ ifp->if_datamov++;
+ }
+ lck_mtx_unlock(&ifp->if_ref_lock);
+
+ return ret;
+}
+
+void
+ifnet_datamov_end(struct ifnet *ifp)
+{
+ lck_mtx_lock_spin(&ifp->if_ref_lock);
+ VERIFY(ifp->if_datamov > 0);
+ /*
+ * if there's no more thread moving data, wakeup any
+ * drainers that's blocked waiting for this.
+ */
+ if (--ifp->if_datamov == 0 && ifp->if_drainers > 0) {
+ wakeup(&(ifp->if_datamov));
+ }
+ ifnet_decr_iorefcnt_locked(ifp);
+ lck_mtx_unlock(&ifp->if_ref_lock);
+}
+
+void
+ifnet_datamov_suspend(struct ifnet *ifp)
+{
+ lck_mtx_lock_spin(&ifp->if_ref_lock);
+ VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
+ ifp->if_refio++;
+ if (ifp->if_suspend++ == 0) {
+ VERIFY(ifp->if_refflags & IFRF_READY);
+ ifp->if_refflags &= ~IFRF_READY;
+ }
+ lck_mtx_unlock(&ifp->if_ref_lock);
+}
+
+void
+ifnet_datamov_drain(struct ifnet *ifp)
+{
+ lck_mtx_lock(&ifp->if_ref_lock);
+ VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
+ /* data movement must already be suspended */
+ VERIFY(ifp->if_suspend > 0);
+ VERIFY(!(ifp->if_refflags & IFRF_READY));
+ ifp->if_drainers++;
+ while (ifp->if_datamov != 0) {
+ (void) msleep(&(ifp->if_datamov), &ifp->if_ref_lock,
+ (PZERO - 1), __func__, NULL);
+ }
+ VERIFY(!(ifp->if_refflags & IFRF_READY));
+ VERIFY(ifp->if_drainers > 0);
+ ifp->if_drainers--;
+ lck_mtx_unlock(&ifp->if_ref_lock);
+
+ /* purge the interface queues */
+ if ((ifp->if_eflags & IFEF_TXSTART) != 0) {
+ if_qflush(ifp, 0);
+ }
+}
+
+void
+ifnet_datamov_resume(struct ifnet *ifp)
+{
+ lck_mtx_lock(&ifp->if_ref_lock);
+ /* data movement must already be suspended */
+ VERIFY(ifp->if_suspend > 0);
+ if (--ifp->if_suspend == 0) {
+ VERIFY(!(ifp->if_refflags & IFRF_READY));
+ ifp->if_refflags |= IFRF_READY;
+ }
+ ifnet_decr_iorefcnt_locked(ifp);
lck_mtx_unlock(&ifp->if_ref_lock);
}
{
struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
- if (dl_if == NULL)
- return (EINVAL);
+ if (dl_if == NULL) {
+ return EINVAL;
+ }
lck_mtx_lock_spin(&dl_if->dl_if_lock);
++dl_if->dl_if_refcnt;
panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
/* NOTREACHED */
}
- if (dl_if->dl_if_trace != NULL)
+ if (dl_if->dl_if_trace != NULL) {
(*dl_if->dl_if_trace)(dl_if, TRUE);
+ }
lck_mtx_unlock(&dl_if->dl_if_lock);
- return (0);
+ return 0;
}
errno_t
dlil_if_free(struct ifnet *ifp)
{
struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
+ bool need_release = FALSE;
- if (dl_if == NULL)
- return (EINVAL);
+ if (dl_if == NULL) {
+ return EINVAL;
+ }
lck_mtx_lock_spin(&dl_if->dl_if_lock);
- if (dl_if->dl_if_refcnt == 0) {
+ switch (dl_if->dl_if_refcnt) {
+ case 0:
panic("%s: negative refcnt for ifp=%p", __func__, ifp);
/* NOTREACHED */
+ break;
+ case 1:
+ if ((ifp->if_refflags & IFRF_EMBRYONIC) != 0) {
+ need_release = TRUE;
+ }
+ break;
+ default:
+ break;
}
--dl_if->dl_if_refcnt;
- if (dl_if->dl_if_trace != NULL)
+ if (dl_if->dl_if_trace != NULL) {
(*dl_if->dl_if_trace)(dl_if, FALSE);
+ }
lck_mtx_unlock(&dl_if->dl_if_lock);
-
- return (0);
+ if (need_release) {
+ dlil_if_release(ifp);
+ }
+ return 0;
}
static errno_t
dlil_attach_protocol_internal(struct if_proto *proto,
- const struct ifnet_demux_desc *demux_list, u_int32_t demux_count)
+ const struct ifnet_demux_desc *demux_list, u_int32_t demux_count,
+ uint32_t * proto_count)
{
struct kev_dl_proto_data ev_pr_data;
struct ifnet *ifp = proto->ifp;
if (_proto != NULL) {
ifnet_lock_done(ifp);
if_proto_free(_proto);
- return (EEXIST);
+ return EEXIST;
}
/*
demux_count);
if (retval) {
ifnet_lock_done(ifp);
- return (retval);
+ return retval;
}
/*
* Insert the protocol in the hash
*/
prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
- while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL)
+ while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL) {
prev_proto = SLIST_NEXT(prev_proto, next_hash);
- if (prev_proto)
+ }
+ if (prev_proto) {
SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
- else
+ } else {
SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
proto, next_hash);
+ }
/* hold a proto refcnt for attach */
if_proto_ref(proto);
* (subject to change)
*/
ev_pr_data.proto_family = proto->protocol_family;
- ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp);
+ ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
+
ifnet_lock_done(ifp);
dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
(struct net_event_data *)&ev_pr_data,
- sizeof (struct kev_dl_proto_data));
- return (retval);
+ sizeof(struct kev_dl_proto_data));
+ if (proto_count != NULL) {
+ *proto_count = ev_pr_data.proto_remaining_count;
+ }
+ return retval;
}
errno_t
{
int retval = 0;
struct if_proto *ifproto = NULL;
+ uint32_t proto_count = 0;
ifnet_head_lock_shared();
if (ifp == NULL || protocol == 0 || proto_details == NULL) {
ifproto->kpi.v1.send_arp = proto_details->send_arp;
retval = dlil_attach_protocol_internal(ifproto,
- proto_details->demux_list, proto_details->demux_count);
-
- if (dlil_verbose) {
- printf("%s: attached v1 protocol %d\n", if_name(ifp),
- protocol);
- }
+ proto_details->demux_list, proto_details->demux_count,
+ &proto_count);
end:
- if (retval != 0 && retval != EEXIST && ifp != NULL) {
+ if (retval != 0 && retval != EEXIST) {
DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
- if_name(ifp), protocol, retval);
+ ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
+ } else {
+ if (dlil_verbose) {
+ DLIL_PRINTF("%s: attached v1 protocol %d (count = %d)\n",
+ ifp != NULL ? if_name(ifp) : "N/A",
+ protocol, proto_count);
+ }
}
ifnet_head_done();
- if (retval != 0 && ifproto != NULL)
+ if (retval == 0) {
+ /*
+ * A protocol has been attached, mark the interface up.
+ * This used to be done by configd.KernelEventMonitor, but that
+ * is inherently prone to races (rdar://problem/30810208).
+ */
+ (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
+ (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
+ dlil_post_sifflags_msg(ifp);
+ } else if (ifproto != NULL) {
zfree(dlif_proto_zone, ifproto);
- return (retval);
+ }
+ return retval;
}
errno_t
{
int retval = 0;
struct if_proto *ifproto = NULL;
+ uint32_t proto_count = 0;
ifnet_head_lock_shared();
if (ifp == NULL || protocol == 0 || proto_details == NULL) {
ifproto->kpi.v2.send_arp = proto_details->send_arp;
retval = dlil_attach_protocol_internal(ifproto,
- proto_details->demux_list, proto_details->demux_count);
-
- if (dlil_verbose) {
- printf("%s: attached v2 protocol %d\n", if_name(ifp),
- protocol);
- }
+ proto_details->demux_list, proto_details->demux_count,
+ &proto_count);
end:
- if (retval != 0 && retval != EEXIST && ifp != NULL) {
+ if (retval != 0 && retval != EEXIST) {
DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
- if_name(ifp), protocol, retval);
+ ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
+ } else {
+ if (dlil_verbose) {
+ DLIL_PRINTF("%s: attached v2 protocol %d (count = %d)\n",
+ ifp != NULL ? if_name(ifp) : "N/A",
+ protocol, proto_count);
+ }
}
ifnet_head_done();
- if (retval != 0 && ifproto != NULL)
+ if (retval == 0) {
+ /*
+ * A protocol has been attached, mark the interface up.
+ * This used to be done by configd.KernelEventMonitor, but that
+ * is inherently prone to races (rdar://problem/30810208).
+ */
+ (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
+ (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
+ dlil_post_sifflags_msg(ifp);
+ } else if (ifproto != NULL) {
zfree(dlif_proto_zone, ifproto);
- return (retval);
+ }
+ return retval;
}
errno_t
ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
{
struct if_proto *proto = NULL;
- int retval = 0;
+ int retval = 0;
if (ifp == NULL || proto_family == 0) {
retval = EINVAL;
}
/* call family module del_proto */
- if (ifp->if_del_proto)
+ if (ifp->if_del_proto) {
ifp->if_del_proto(ifp, proto->protocol_family);
+ }
SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
proto, if_proto, next_hash);
if (proto->proto_kpi == kProtoKPI_v1) {
proto->kpi.v1.input = ifproto_media_input_v1;
- proto->kpi.v1.pre_output= ifproto_media_preout;
+ proto->kpi.v1.pre_output = ifproto_media_preout;
proto->kpi.v1.event = ifproto_media_event;
proto->kpi.v1.ioctl = ifproto_media_ioctl;
proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
ifnet_lock_done(ifp);
if (dlil_verbose) {
- printf("%s: detached %s protocol %d\n", if_name(ifp),
+ DLIL_PRINTF("%s: detached %s protocol %d\n", if_name(ifp),
(proto->proto_kpi == kProtoKPI_v1) ?
"v1" : "v2", proto_family);
}
if_proto_free(proto);
end:
- return (retval);
+ return retval;
}
struct mbuf *packet, char *header)
{
#pragma unused(ifp, protocol, packet, header)
- return (ENXIO);
+ return ENXIO;
}
static errno_t
struct mbuf *packet)
{
#pragma unused(ifp, protocol, packet)
- return (ENXIO);
-
+ return ENXIO;
}
static errno_t
char *link_layer_dest)
{
#pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
- return (ENXIO);
-
+ return ENXIO;
}
static void
unsigned long command, void *argument)
{
#pragma unused(ifp, protocol, command, argument)
- return (ENXIO);
+ return ENXIO;
}
static errno_t
struct sockaddr_dl *out_ll, size_t ll_len)
{
#pragma unused(ifp, proto_addr, out_ll, ll_len)
- return (ENXIO);
+ return ENXIO;
}
static errno_t
const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
{
#pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
- return (ENXIO);
+ return ENXIO;
}
extern int if_next_index(void);
+extern int tcp_ecn_outbound;
errno_t
ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
u_int32_t sflags = 0;
int err;
- if (ifp == NULL)
- return (EINVAL);
+ if (ifp == NULL) {
+ return EINVAL;
+ }
/*
* Serialize ifnet attach using dlil_ifnet_lock, in order to
if (tmp_if == ifp) {
ifnet_head_done();
dlil_if_unlock();
- return (EEXIST);
+ return EEXIST;
}
}
lck_mtx_lock_spin(&ifp->if_ref_lock);
- if (ifp->if_refflags & IFRF_ATTACHED) {
- panic_plain("%s: flags mismatch (attached set) ifp=%p",
+ if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
+ panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
__func__, ifp);
/* NOTREACHED */
}
/* Sanity check */
VERIFY(ifp->if_detaching_link.tqe_next == NULL);
VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
+ VERIFY(ifp->if_threads_pending == 0);
if (ll_addr != NULL) {
if (ifp->if_addrlen == 0) {
ifnet_lock_done(ifp);
ifnet_head_done();
dlil_if_unlock();
- return (EINVAL);
+ return EINVAL;
}
}
ifnet_lock_done(ifp);
ifnet_head_done();
dlil_if_unlock();
- return (ENODEV);
+ return ENODEV;
}
/* Allocate protocol hash table */
ifnet_lock_done(ifp);
ifnet_head_done();
dlil_if_unlock();
- return (ENOBUFS);
+ return ENOBUFS;
}
bzero(ifp->if_proto_hash, dlif_phash_size);
VERIFY(ifp->if_flt_waiters == 0);
lck_mtx_unlock(&ifp->if_flt_lock);
- VERIFY(TAILQ_EMPTY(&ifp->if_prefixhead));
- TAILQ_INIT(&ifp->if_prefixhead);
-
if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
LIST_INIT(&ifp->if_multiaddrs);
ifnet_lock_done(ifp);
ifnet_head_done();
dlil_if_unlock();
- return (ENOBUFS);
+ return ENOBUFS;
}
ifp->if_index = idx;
}
VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
/* allocate (if needed) and initialize a link address */
- VERIFY(!(dl_if->dl_if_flags & DLIF_REUSE) || ifp->if_lladdr != NULL);
ifa = dlil_alloc_lladdr(ifp, ll_addr);
if (ifa == NULL) {
ifnet_lock_done(ifp);
ifnet_head_done();
dlil_if_unlock();
- return (ENOBUFS);
+ return ENOBUFS;
}
VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
/* Clear stats (save and restore other fields that we care) */
if_data_saved = ifp->if_data;
- bzero(&ifp->if_data, sizeof (ifp->if_data));
+ bzero(&ifp->if_data, sizeof(ifp->if_data));
ifp->if_data.ifi_type = if_data_saved.ifi_type;
ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
ifnet_touch_lastchange(ifp);
VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
- ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED);
+ ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED ||
+ ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL);
/* By default, use SFB and enable flow advisory */
sflags = PKTSCHEDF_QALG_SFB;
- if (if_flowadv)
+ if (if_flowadv) {
sflags |= PKTSCHEDF_QALG_FLOWCTL;
+ }
- if (if_delaybased_queue)
+ if (if_delaybased_queue) {
sflags |= PKTSCHEDF_QALG_DELAYBASED;
+ }
+
+ if (ifp->if_output_sched_model ==
+ IFNET_SCHED_MODEL_DRIVER_MANAGED) {
+ sflags |= PKTSCHEDF_QALG_DRIVER_MANAGED;
+ }
/* Initialize transmit queue(s) */
err = ifclassq_setup(ifp, sflags, (dl_if->dl_if_flags & DLIF_REUSE));
/* Sanity checks on the input thread storage */
dl_inp = &dl_if->dl_if_inpstorage;
- bzero(&dl_inp->stats, sizeof (dl_inp->stats));
+ bzero(&dl_inp->stats, sizeof(dl_inp->stats));
VERIFY(dl_inp->input_waiting == 0);
VERIFY(dl_inp->wtot == 0);
VERIFY(dl_inp->ifp == NULL);
VERIFY(dl_inp->wloop_thr == THREAD_NULL);
VERIFY(dl_inp->poll_thr == THREAD_NULL);
VERIFY(dl_inp->tag == 0);
- VERIFY(dl_inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
- bzero(&dl_inp->tstats, sizeof (dl_inp->tstats));
- bzero(&dl_inp->pstats, sizeof (dl_inp->pstats));
- bzero(&dl_inp->sstats, sizeof (dl_inp->sstats));
+
#if IFNET_INPUT_SANITY_CHK
VERIFY(dl_inp->input_mbuf_cnt == 0);
#endif /* IFNET_INPUT_SANITY_CHK */
+ VERIFY(ifp->if_poll_thread == THREAD_NULL);
+ dlil_reset_rxpoll_params(ifp);
/*
- * A specific DLIL input thread is created per Ethernet/cellular
- * interface or for an interface which supports opportunistic
- * input polling. Pseudo interfaces or other types of interfaces
- * use the main input thread instead.
+ * A specific DLIL input thread is created per non-loopback interface.
*/
- if ((net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) ||
- ifp->if_type == IFT_ETHER || ifp->if_type == IFT_CELLULAR) {
+ if (ifp->if_family != IFNET_FAMILY_LOOPBACK) {
ifp->if_inp = dl_inp;
+ ifnet_incr_pending_thread_count(ifp);
err = dlil_create_input_thread(ifp, ifp->if_inp);
if (err != 0) {
panic_plain("%s: ifp=%p couldn't get an input thread; "
/* NOTREACHED */
}
}
-
/*
* If the driver supports the new transmit model, calculate flow hash
* and create a workloop starter thread to invoke the if_start callback
if (ifp->if_eflags & IFEF_TXSTART) {
ifp->if_flowhash = ifnet_calc_flowhash(ifp);
VERIFY(ifp->if_flowhash != 0);
-
- VERIFY(ifp->if_start != NULL);
VERIFY(ifp->if_start_thread == THREAD_NULL);
ifnet_set_start_cycle(ifp, NULL);
ifp->if_start_active = 0;
ifp->if_start_req = 0;
ifp->if_start_flags = 0;
- if ((err = kernel_thread_start(ifnet_start_thread_fn, ifp,
- &ifp->if_start_thread)) != KERN_SUCCESS) {
- panic_plain("%s: ifp=%p couldn't get a start thread; "
+ VERIFY(ifp->if_start != NULL);
+ ifnet_incr_pending_thread_count(ifp);
+ if ((err = kernel_thread_start(ifnet_start_thread_func,
+ ifp, &ifp->if_start_thread)) != KERN_SUCCESS) {
+ panic_plain("%s: "
+ "ifp=%p couldn't get a start thread; "
"err=%d", __func__, ifp, err);
/* NOTREACHED */
}
ml_thread_policy(ifp->if_start_thread, MACHINE_GROUP,
- (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
+ (MACHINE_NETWORK_GROUP | MACHINE_NETWORK_WORKLOOP));
} else {
ifp->if_flowhash = 0;
}
+ /* Reset polling parameters */
+ ifnet_set_poll_cycle(ifp, NULL);
+ ifp->if_poll_update = 0;
+ ifp->if_poll_flags = 0;
+ ifp->if_poll_req = 0;
+ VERIFY(ifp->if_poll_thread == THREAD_NULL);
+
/*
* If the driver supports the new receive model, create a poller
* thread to invoke if_input_poll callback where the packets may
* be dequeued from the driver and processed for reception.
+ * if the interface is netif compat then the poller thread is managed by netif.
*/
- if (ifp->if_eflags & IFEF_RXPOLL) {
+ if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL) &&
+ (ifp->if_xflags & IFXF_LEGACY)) {
VERIFY(ifp->if_input_poll != NULL);
VERIFY(ifp->if_input_ctl != NULL);
- VERIFY(ifp->if_poll_thread == THREAD_NULL);
-
- ifnet_set_poll_cycle(ifp, NULL);
- ifp->if_poll_update = 0;
- ifp->if_poll_active = 0;
- ifp->if_poll_req = 0;
- if ((err = kernel_thread_start(ifnet_poll_thread_fn, ifp,
+ ifnet_incr_pending_thread_count(ifp);
+ if ((err = kernel_thread_start(ifnet_poll_thread_func, ifp,
&ifp->if_poll_thread)) != KERN_SUCCESS) {
panic_plain("%s: ifp=%p couldn't get a poll thread; "
"err=%d", __func__, ifp, err);
/* NOTREACHED */
}
ml_thread_policy(ifp->if_poll_thread, MACHINE_GROUP,
- (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
+ (MACHINE_NETWORK_GROUP | MACHINE_NETWORK_WORKLOOP));
}
VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
IFMA_LOCK(ifma);
if (ifma->ifma_addr->sa_family == AF_LINK ||
- ifma->ifma_addr->sa_family == AF_UNSPEC)
+ ifma->ifma_addr->sa_family == AF_UNSPEC) {
ifp->if_updatemcasts++;
+ }
IFMA_UNLOCK(ifma);
}
- printf("%s: attached with %d suspended link-layer multicast "
+ DLIL_PRINTF("%s: attached with %d suspended link-layer multicast "
"membership(s)\n", if_name(ifp),
ifp->if_updatemcasts);
}
/* Clear logging parameters */
- bzero(&ifp->if_log, sizeof (ifp->if_log));
+ bzero(&ifp->if_log, sizeof(ifp->if_log));
+
+ /* Clear foreground/realtime activity timestamps */
ifp->if_fg_sendts = 0;
+ ifp->if_rt_sendts = 0;
VERIFY(ifp->if_delegated.ifp == NULL);
VERIFY(ifp->if_delegated.type == 0);
VERIFY(ifp->if_delegated.family == 0);
VERIFY(ifp->if_delegated.subfamily == 0);
VERIFY(ifp->if_delegated.expensive == 0);
+ VERIFY(ifp->if_delegated.constrained == 0);
+
+ VERIFY(ifp->if_agentids == NULL);
+ VERIFY(ifp->if_agentcount == 0);
+
+ /* Reset interface state */
+ bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
+ ifp->if_interface_state.valid_bitmask |=
+ IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
+ ifp->if_interface_state.interface_availability =
+ IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
+
+ /* Initialize Link Quality Metric (loopback [lo0] is always good) */
+ if (ifp == lo_ifp) {
+ ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
+ ifp->if_interface_state.valid_bitmask |=
+ IF_INTERFACE_STATE_LQM_STATE_VALID;
+ } else {
+ ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
+ }
+
+ /*
+ * Enable ECN capability on this interface depending on the
+ * value of ECN global setting
+ */
+ if (tcp_ecn_outbound == 2 && !IFNET_IS_CELLULAR(ifp)) {
+ ifp->if_eflags |= IFEF_ECN_ENABLE;
+ ifp->if_eflags &= ~IFEF_ECN_DISABLE;
+ }
+
+ /*
+ * Built-in Cyclops always on policy for WiFi infra
+ */
+ if (IFNET_IS_WIFI_INFRA(ifp) && net_qos_policy_wifi_enabled != 0) {
+ errno_t error;
+
+ error = if_set_qosmarking_mode(ifp,
+ IFRTYPE_QOSMARKING_FASTLANE);
+ if (error != 0) {
+ DLIL_PRINTF("%s if_set_qosmarking_mode(%s) error %d\n",
+ __func__, ifp->if_xname, error);
+ } else {
+ ifp->if_eflags |= IFEF_QOSMARKING_ENABLED;
+#if (DEVELOPMENT || DEBUG)
+ DLIL_PRINTF("%s fastlane enabled on %s\n",
+ __func__, ifp->if_xname);
+#endif /* (DEVELOPMENT || DEBUG) */
+ }
+ }
ifnet_lock_done(ifp);
ifnet_head_done();
+
lck_mtx_lock(&ifp->if_cached_route_lock);
/* Enable forwarding cached route */
ifp->if_fwd_cacheok = 1;
/* Clean up any existing cached routes */
ROUTE_RELEASE(&ifp->if_fwd_route);
- bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
+ bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
ROUTE_RELEASE(&ifp->if_src_route);
- bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
+ bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
ROUTE_RELEASE(&ifp->if_src_route6);
- bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
+ bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
lck_mtx_unlock(&ifp->if_cached_route_lock);
ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
#endif /* INET6 */
VERIFY(ifp->if_data_threshold == 0);
+ VERIFY(ifp->if_dt_tcall != NULL);
/*
- * Finally, mark this ifnet as attached.
+ * Wait for the created kernel threads for I/O to get
+ * scheduled and run at least once before we proceed
+ * to mark interface as attached.
*/
+ lck_mtx_lock(&ifp->if_ref_lock);
+ while (ifp->if_threads_pending != 0) {
+ DLIL_PRINTF("%s: Waiting for all kernel threads created for "
+ "interface %s to get scheduled at least once.\n",
+ __func__, ifp->if_xname);
+ (void) msleep(&ifp->if_threads_pending, &ifp->if_ref_lock, (PZERO - 1),
+ __func__, NULL);
+ LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_ASSERT_OWNED);
+ }
+ lck_mtx_unlock(&ifp->if_ref_lock);
+ DLIL_PRINTF("%s: All kernel threads created for interface %s have been scheduled "
+ "at least once. Proceeding.\n", __func__, ifp->if_xname);
+
+ /* Final mark this ifnet as attached. */
lck_mtx_lock(rnh_lock);
ifnet_lock_exclusive(ifp);
- /* Initialize Link Quality Metric (loopback [lo0] is always good) */
- ifp->if_lqm = (ifp == lo_ifp) ? IFNET_LQM_THRESH_GOOD :
- IFNET_LQM_THRESH_UNKNOWN;
lck_mtx_lock_spin(&ifp->if_ref_lock);
- ifp->if_refflags = IFRF_ATTACHED;
+ ifp->if_refflags = (IFRF_ATTACHED | IFRF_READY); /* clears embryonic */
lck_mtx_unlock(&ifp->if_ref_lock);
if (net_rtref) {
/* boot-args override; enable idle notification */
/* apply previous request(s) to set the idle flags, if any */
(void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
ifp->if_idle_new_flags_mask);
-
}
ifnet_lock_done(ifp);
lck_mtx_unlock(rnh_lock);
dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0);
if (dlil_verbose) {
- printf("%s: attached%s\n", if_name(ifp),
+ DLIL_PRINTF("%s: attached%s\n", if_name(ifp),
(dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
}
- return (0);
+ return 0;
}
/*
{
struct ifaddr *ifa, *oifa;
struct sockaddr_dl *asdl, *msdl;
- char workbuf[IFNAMSIZ*2];
+ char workbuf[IFNAMSIZ * 2];
int namelen, masklen, socksize;
struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);
- namelen = snprintf(workbuf, sizeof (workbuf), "%s",
+ namelen = scnprintf(workbuf, sizeof(workbuf), "%s",
if_name(ifp));
- masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
+ masklen = offsetof(struct sockaddr_dl, sdl_data[0])
+ + ((namelen > 0) ? namelen : 0);
socksize = masklen + ifp->if_addrlen;
#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
- if ((u_int32_t)socksize < sizeof (struct sockaddr_dl))
+ if ((u_int32_t)socksize < sizeof(struct sockaddr_dl)) {
socksize = sizeof(struct sockaddr_dl);
+ }
socksize = ROUNDUP(socksize);
#undef ROUNDUP
* This same space will be used when if_addrlen shrinks.
*/
if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
- int ifasize = sizeof (*ifa) + 2 * SOCK_MAXADDRLEN;
+ int ifasize = sizeof(*ifa) + 2 * SOCK_MAXADDRLEN;
ifa = _MALLOC(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
- if (ifa == NULL)
- return (NULL);
+ if (ifa == NULL) {
+ return NULL;
+ }
ifa_lock_init(ifa);
/* Don't set IFD_ALLOC, as this is permanent */
ifa->ifa_debug = IFD_LINK;
IFA_LOCK(ifa);
/* address and mask sockaddr_dl locations */
asdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.asdl;
- bzero(asdl, sizeof (dl_if->dl_if_lladdr.asdl));
+ bzero(asdl, sizeof(dl_if->dl_if_lladdr.asdl));
msdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.msdl;
- bzero(msdl, sizeof (dl_if->dl_if_lladdr.msdl));
+ bzero(msdl, sizeof(dl_if->dl_if_lladdr.msdl));
}
/* hold a permanent reference for the ifnet itself */
ifa->ifa_addr = (struct sockaddr *)asdl;
asdl->sdl_len = socksize;
asdl->sdl_family = AF_LINK;
- bcopy(workbuf, asdl->sdl_data, namelen);
- asdl->sdl_nlen = namelen;
+ if (namelen > 0) {
+ bcopy(workbuf, asdl->sdl_data, min(namelen,
+ sizeof(asdl->sdl_data)));
+ asdl->sdl_nlen = namelen;
+ } else {
+ asdl->sdl_nlen = 0;
+ }
asdl->sdl_index = ifp->if_index;
asdl->sdl_type = ifp->if_type;
if (ll_addr != NULL) {
} else {
asdl->sdl_alen = 0;
}
- ifa->ifa_netmask = (struct sockaddr*)msdl;
+ ifa->ifa_netmask = (struct sockaddr *)msdl;
msdl->sdl_len = masklen;
- while (namelen != 0)
+ while (namelen > 0) {
msdl->sdl_data[--namelen] = 0xff;
+ }
IFA_UNLOCK(ifa);
- if (oifa != NULL)
+ if (oifa != NULL) {
IFA_REMREF(oifa);
+ }
- return (ifa);
+ return ifa;
}
static void
ifnet_detach(ifnet_t ifp)
{
struct ifnet *delegated_ifp;
+ struct nd_ifinfo *ndi = NULL;
+
+ if (ifp == NULL) {
+ return EINVAL;
+ }
- if (ifp == NULL)
- return (EINVAL);
+ ndi = ND_IFINFO(ifp);
+ if (NULL != ndi) {
+ ndi->cga_initialized = FALSE;
+ }
lck_mtx_lock(rnh_lock);
ifnet_head_lock_exclusive();
ifnet_lock_exclusive(ifp);
+ if (ifp->if_output_netem != NULL) {
+ netem_destroy(ifp->if_output_netem);
+ ifp->if_output_netem = NULL;
+ }
+
/*
* Check to see if this interface has previously triggered
* aggressive protocol draining; if so, decrement the global
(void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
lck_mtx_lock_spin(&ifp->if_ref_lock);
- if (!(ifp->if_refflags & IFRF_ATTACHED)) {
+ if (!(ifp->if_refflags & IFRF_ATTACHED)) {
lck_mtx_unlock(&ifp->if_ref_lock);
ifnet_lock_done(ifp);
ifnet_head_done();
lck_mtx_unlock(rnh_lock);
- return (EINVAL);
+ return EINVAL;
} else if (ifp->if_refflags & IFRF_DETACHING) {
/* Interface has already been detached */
lck_mtx_unlock(&ifp->if_ref_lock);
ifnet_lock_done(ifp);
ifnet_head_done();
lck_mtx_unlock(rnh_lock);
- return (ENXIO);
+ return ENXIO;
}
+ VERIFY(!(ifp->if_refflags & IFRF_EMBRYONIC));
/* Indicate this interface is being detached */
ifp->if_refflags &= ~IFRF_ATTACHED;
ifp->if_refflags |= IFRF_DETACHING;
lck_mtx_unlock(&ifp->if_ref_lock);
- if (dlil_verbose)
- printf("%s: detaching\n", if_name(ifp));
+ if (dlil_verbose) {
+ DLIL_PRINTF("%s: detaching\n", if_name(ifp));
+ }
+
+ /* clean up flow control entry object if there's any */
+ if (ifp->if_eflags & IFEF_TXSTART) {
+ ifnet_flowadv(ifp->if_flowhash);
+ }
+
+ /* Reset ECN enable/disable flags */
+ ifp->if_eflags &= ~IFEF_ECN_DISABLE;
+ ifp->if_eflags &= ~IFEF_ECN_ENABLE;
+
+ /* Reset CLAT46 flag */
+ ifp->if_eflags &= ~IFEF_CLAT46;
+
+ /*
+ * We do not reset the TCP keep alive counters in case
+ * a TCP connection stays connection after the interface
+ * went down
+ */
+ if (ifp->if_tcp_kao_cnt > 0) {
+ os_log(OS_LOG_DEFAULT, "%s %s tcp_kao_cnt %u not zero",
+ __func__, if_name(ifp), ifp->if_tcp_kao_cnt);
+ }
+ ifp->if_tcp_kao_max = 0;
/*
* Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
TAILQ_REMOVE(&ifnet_head, ifp, if_link);
ifp->if_link.tqe_next = NULL;
ifp->if_link.tqe_prev = NULL;
+ if (ifp->if_ordered_link.tqe_next != NULL ||
+ ifp->if_ordered_link.tqe_prev != NULL) {
+ ifnet_remove_from_ordered_list(ifp);
+ }
ifindex2ifnet[ifp->if_index] = NULL;
+ /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */
+ ifp->if_eflags &= ~(IFEF_IPV4_ROUTER | IFEF_IPV6_ROUTER);
+
/* Record detach PC stacktrace */
ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
/* Clear logging parameters */
- bzero(&ifp->if_log, sizeof (ifp->if_log));
+ bzero(&ifp->if_log, sizeof(ifp->if_log));
/* Clear delegated interface info (reference released below) */
delegated_ifp = ifp->if_delegated.ifp;
- bzero(&ifp->if_delegated, sizeof (ifp->if_delegated));
+ bzero(&ifp->if_delegated, sizeof(ifp->if_delegated));
+
+ /* Reset interface state */
+ bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
ifnet_lock_done(ifp);
ifnet_head_done();
lck_mtx_unlock(rnh_lock);
+
/* Release reference held on the delegated interface */
- if (delegated_ifp != NULL)
+ if (delegated_ifp != NULL) {
ifnet_release(delegated_ifp);
+ }
/* Reset Link Quality Metric (unless loopback [lo0]) */
- if (ifp != lo_ifp)
- if_lqm_update(ifp, IFNET_LQM_THRESH_OFF);
+ if (ifp != lo_ifp) {
+ if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
+ }
/* Reset TCP local statistics */
- if (ifp->if_tcp_stat != NULL)
+ if (ifp->if_tcp_stat != NULL) {
bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
+ }
/* Reset UDP local statistics */
- if (ifp->if_udp_stat != NULL)
+ if (ifp->if_udp_stat != NULL) {
bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
+ }
+
+ /* Reset ifnet IPv4 stats */
+ if (ifp->if_ipv4_stat != NULL) {
+ bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
+ }
+
+ /* Reset ifnet IPv6 stats */
+ if (ifp->if_ipv6_stat != NULL) {
+ bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
+ }
+
+ /* Release memory held for interface link status report */
+ if (ifp->if_link_status != NULL) {
+ FREE(ifp->if_link_status, M_TEMP);
+ ifp->if_link_status = NULL;
+ }
+
+ /* Clear agent IDs */
+ if (ifp->if_agentids != NULL) {
+ FREE(ifp->if_agentids, M_NETAGENT);
+ ifp->if_agentids = NULL;
+ }
+ ifp->if_agentcount = 0;
+
/* Let BPF know we're detaching */
bpfdetach(ifp);
ifp->if_fwd_cacheok = 0;
lck_mtx_unlock(&ifp->if_cached_route_lock);
+ /* Disable data threshold and wait for any pending event posting */
ifp->if_data_threshold = 0;
+ VERIFY(ifp->if_dt_tcall != NULL);
+ (void) thread_call_cancel_wait(ifp->if_dt_tcall);
+
/*
* Drain any deferred IGMPv3/MLDv2 query responses, but keep the
* references to the info structures and leave them attached to
ifnet_detaching_enqueue(ifp);
dlil_if_unlock();
- return (0);
+ return 0;
}
static void
ifp->if_detaching_link.tqe_next = NULL;
ifp->if_detaching_link.tqe_prev = NULL;
}
- return (ifp);
+ return ifp;
}
static int
/* NOTREACHED */
}
+ net_update_uptime();
+
VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
/* Take care of detaching ifnet */
dlil_if_lock();
}
}
- /* NOTREACHED */
- return (0);
}
+__dead2
static void
ifnet_detacher_thread_func(void *v, wait_result_t w)
{
#pragma unused(v, w)
+ dlil_decr_pending_thread_count();
dlil_if_lock();
(void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
(PZERO - 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont);
* common case, so block without using a continuation.
*/
while (ifp->if_refio > 0) {
- printf("%s: Waiting for IO references on %s interface "
+ DLIL_PRINTF("%s: Waiting for IO references on %s interface "
"to be released\n", __func__, if_name(ifp));
(void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
- (PZERO - 1), "ifnet_ioref_wait", NULL);
+ (PZERO - 1), "ifnet_ioref_wait", NULL);
}
+
+ VERIFY(ifp->if_datamov == 0);
+ VERIFY(ifp->if_drainers == 0);
+ VERIFY(ifp->if_suspend == 0);
+ ifp->if_refflags &= ~IFRF_READY;
lck_mtx_unlock(&ifp->if_ref_lock);
/* Drain and destroy send queue */
lck_mtx_lock(&ifp->if_flt_lock);
if_flt_monitor_enter(ifp);
- lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
+ LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
fhead = ifp->if_flt_head;
TAILQ_INIT(&ifp->if_flt_head);
VERIFY(ifp->if_link.tqe_prev == NULL);
VERIFY(ifp->if_detaching_link.tqe_next == NULL);
VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
-
- /* Prefix list should be empty by now */
- VERIFY(TAILQ_EMPTY(&ifp->if_prefixhead));
+ VERIFY(ifp->if_ordered_link.tqe_next == NULL);
+ VERIFY(ifp->if_ordered_link.tqe_prev == NULL);
/* The slot should have been emptied */
VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
inp->wloop_thr = THREAD_NULL;
ptp = inp->poll_thr;
inp->poll_thr = THREAD_NULL;
- tp = inp->input_thr; /* don't nullify now */
+ tp = inp->input_thr; /* don't nullify now */
inp->tag = 0;
inp->net_affinity = FALSE;
lck_mtx_unlock(&inp->input_lck);
/* Tear down poll thread affinity */
if (ptp != NULL) {
VERIFY(ifp->if_eflags & IFEF_RXPOLL);
+ VERIFY(ifp->if_xflags & IFXF_LEGACY);
(void) dlil_affinity_set(ptp,
THREAD_AFFINITY_TAG_NULL);
thread_deallocate(ptp);
/* disassociate ifp DLIL input thread */
ifp->if_inp = NULL;
+ /* tell the input thread to terminate */
lck_mtx_lock_spin(&inp->input_lck);
inp->input_waiting |= DLIL_INPUT_TERMINATE;
if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
wakeup_one((caddr_t)&inp->input_waiting);
}
lck_mtx_unlock(&inp->input_lck);
+ ifnet_lock_done(ifp);
+
+ /* wait for the input thread to terminate */
+ lck_mtx_lock_spin(&inp->input_lck);
+ while ((inp->input_waiting & DLIL_INPUT_TERMINATE_COMPLETE)
+ == 0) {
+ (void) msleep(&inp->input_waiting, &inp->input_lck,
+ (PZERO - 1) | PSPIN, inp->input_name, NULL);
+ }
+ lck_mtx_unlock(&inp->input_lck);
+ ifnet_lock_exclusive(ifp);
+
+ /* clean-up input thread state */
+ dlil_clean_threading_info(inp);
+ /* clean-up poll parameters */
+ VERIFY(ifp->if_poll_thread == THREAD_NULL);
+ dlil_reset_rxpoll_params(ifp);
}
/* The driver might unload, so point these to ourselves */
if_free = ifp->if_free;
+ ifp->if_output_dlil = ifp_if_output;
ifp->if_output = ifp_if_output;
ifp->if_pre_enqueue = ifp_if_output;
ifp->if_start = ifp_if_start;
ifp->if_output_ctl = ifp_if_ctl;
+ ifp->if_input_dlil = ifp_if_input;
ifp->if_input_poll = ifp_if_input_poll;
ifp->if_input_ctl = ifp_if_ctl;
ifp->if_ioctl = ifp_if_ioctl;
VERIFY(ifp->if_delegated.family == 0);
VERIFY(ifp->if_delegated.subfamily == 0);
VERIFY(ifp->if_delegated.expensive == 0);
+ VERIFY(ifp->if_delegated.constrained == 0);
+
+ /* QoS marking get cleared */
+ ifp->if_eflags &= ~IFEF_QOSMARKING_ENABLED;
+ if_set_qosmarking_mode(ifp, IFRTYPE_QOSMARKING_MODE_NONE);
+
ifnet_lock_done(ifp);
lck_mtx_lock(&ifp->if_cached_route_lock);
VERIFY(!ifp->if_fwd_cacheok);
ROUTE_RELEASE(&ifp->if_fwd_route);
- bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
+ bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
ROUTE_RELEASE(&ifp->if_src_route);
- bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
+ bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
ROUTE_RELEASE(&ifp->if_src_route6);
- bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
+ bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
lck_mtx_unlock(&ifp->if_cached_route_lock);
VERIFY(ifp->if_data_threshold == 0);
+ VERIFY(ifp->if_dt_tcall != NULL);
+ VERIFY(!thread_call_isactive(ifp->if_dt_tcall));
ifnet_llreach_ifdetach(ifp);
dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0);
- if (if_free != NULL)
- if_free(ifp);
-
/*
* Finally, mark this ifnet as detached.
*/
}
ifp->if_refflags &= ~IFRF_DETACHING;
lck_mtx_unlock(&ifp->if_ref_lock);
+ if (if_free != NULL) {
+ if_free(ifp);
+ }
- if (dlil_verbose)
- printf("%s: detached\n", if_name(ifp));
+ if (dlil_verbose) {
+ DLIL_PRINTF("%s: detached\n", if_name(ifp));
+ }
/* Release reference held during ifnet attach */
ifnet_release(ifp);
}
-static errno_t
+errno_t
ifp_if_output(struct ifnet *ifp, struct mbuf *m)
{
#pragma unused(ifp)
- m_freem(m);
- return (0);
+ m_freem_list(m);
+ return 0;
}
-static void
+void
ifp_if_start(struct ifnet *ifp)
{
ifnet_purge(ifp);
}
+static errno_t
+ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
+ struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
+ boolean_t poll, struct thread *tp)
+{
+#pragma unused(ifp, m_tail, s, poll, tp)
+ m_freem_list(m_head);
+ return ENXIO;
+}
+
static void
ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
{
#pragma unused(ifp, flags, max_cnt)
- if (m_head != NULL)
+ if (m_head != NULL) {
*m_head = NULL;
- if (m_tail != NULL)
+ }
+ if (m_tail != NULL) {
*m_tail = NULL;
- if (cnt != NULL)
+ }
+ if (cnt != NULL) {
*cnt = 0;
- if (len != NULL)
+ }
+ if (len != NULL) {
*len = 0;
+ }
}
static errno_t
ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
{
#pragma unused(ifp, cmd, arglen, arg)
- return (EOPNOTSUPP);
+ return EOPNOTSUPP;
}
static errno_t
{
#pragma unused(ifp, fh, pf)
m_freem(m);
- return (EJUSTRETURN);
+ return EJUSTRETURN;
}
static errno_t
const struct ifnet_demux_desc *da, u_int32_t dc)
{
#pragma unused(ifp, pf, da, dc)
- return (EINVAL);
+ return EINVAL;
}
static errno_t
ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
{
#pragma unused(ifp, pf)
- return (EINVAL);
+ return EINVAL;
}
static errno_t
ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
{
#pragma unused(ifp, sa)
- return (EOPNOTSUPP);
+ return EOPNOTSUPP;
}
+#if CONFIG_EMBEDDED
+static errno_t
+ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
+ const struct sockaddr *sa, const char *ll, const char *t,
+ u_int32_t *pre, u_int32_t *post)
+#else
static errno_t
ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
const struct sockaddr *sa, const char *ll, const char *t)
+#endif /* !CONFIG_EMBEDDED */
{
#pragma unused(ifp, m, sa, ll, t)
- return (ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL));
+#if CONFIG_EMBEDDED
+ return ifp_if_framer_extended(ifp, m, sa, ll, t, pre, post);
+#else
+ return ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL);
+#endif /* !CONFIG_EMBEDDED */
}
static errno_t
m_freem(*m);
*m = NULL;
- if (pre != NULL)
+ if (pre != NULL) {
*pre = 0;
- if (post != NULL)
+ }
+ if (post != NULL) {
*post = 0;
+ }
- return (EJUSTRETURN);
+ return EJUSTRETURN;
}
errno_t
ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
{
#pragma unused(ifp, cmd, arg)
- return (EOPNOTSUPP);
+ return EOPNOTSUPP;
}
static errno_t
{
#pragma unused(ifp, tm, f)
/* XXX not sure what to do here */
- return (0);
+ return 0;
}
static void
#pragma unused(ifp, e)
}
-__private_extern__
-int dlil_if_acquire(u_int32_t family, const void *uniqueid,
- size_t uniqueid_len, struct ifnet **ifp)
+int
+dlil_if_acquire(u_int32_t family, const void *uniqueid,
+ size_t uniqueid_len, const char *ifxname, struct ifnet **ifp)
{
struct ifnet *ifp1 = NULL;
struct dlil_ifnet *dlifp1 = NULL;
+ struct dlil_ifnet *dlifp1_saved = NULL;
void *buf, *base, **pbuf;
int ret = 0;
+ VERIFY(*ifp == NULL);
dlil_if_lock();
+ /*
+ * We absolutely can't have an interface with the same name
+ * in in-use state.
+ * To make sure of that list has to be traversed completely
+ */
TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
ifp1 = (struct ifnet *)dlifp1;
- if (ifp1->if_family != family)
+ if (ifp1->if_family != family) {
continue;
+ }
+ /*
+ * If interface is in use, return EBUSY if either unique id
+ * or interface extended names are the same
+ */
lck_mtx_lock(&dlifp1->dl_if_lock);
- /* same uniqueid and same len or no unique id specified */
- if ((uniqueid_len == dlifp1->dl_if_uniqueid_len) &&
- !bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len)) {
- /* check for matching interface in use */
+ if (strncmp(ifxname, ifp1->if_xname, IFXNAMSIZ) == 0) {
if (dlifp1->dl_if_flags & DLIF_INUSE) {
- if (uniqueid_len) {
- ret = EBUSY;
+ lck_mtx_unlock(&dlifp1->dl_if_lock);
+ ret = EBUSY;
+ goto end;
+ }
+ }
+
+ if (uniqueid_len) {
+ if (uniqueid_len == dlifp1->dl_if_uniqueid_len &&
+ bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len) == 0) {
+ if (dlifp1->dl_if_flags & DLIF_INUSE) {
lck_mtx_unlock(&dlifp1->dl_if_lock);
+ ret = EBUSY;
goto end;
+ } else {
+ /* Cache the first interface that can be recycled */
+ if (*ifp == NULL) {
+ *ifp = ifp1;
+ dlifp1_saved = dlifp1;
+ }
+ /*
+ * XXX Do not break or jump to end as we have to traverse
+ * the whole list to ensure there are no name collisions
+ */
}
- } else {
- dlifp1->dl_if_flags |= (DLIF_INUSE|DLIF_REUSE);
- lck_mtx_unlock(&dlifp1->dl_if_lock);
- *ifp = ifp1;
- goto end;
}
}
lck_mtx_unlock(&dlifp1->dl_if_lock);
}
+ /* If there's an interface that can be recycled, use that */
+ if (*ifp != NULL) {
+ if (dlifp1_saved != NULL) {
+ lck_mtx_lock(&dlifp1_saved->dl_if_lock);
+ dlifp1_saved->dl_if_flags |= (DLIF_INUSE | DLIF_REUSE);
+ lck_mtx_unlock(&dlifp1_saved->dl_if_lock);
+ dlifp1_saved = NULL;
+ }
+ goto end;
+ }
+
/* no interface found, allocate a new one */
buf = zalloc(dlif_zone);
if (buf == NULL) {
bzero(buf, dlif_bufsize);
/* Get the 64-bit aligned base address for this object */
- base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
- sizeof (u_int64_t));
+ base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
+ sizeof(u_int64_t));
VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize));
/*
* Wind back a pointer size from the aligned base and
* save the original address so we can free it later.
*/
- pbuf = (void **)((intptr_t)base - sizeof (void *));
+ pbuf = (void **)((intptr_t)base - sizeof(void *));
*pbuf = buf;
dlifp1 = base;
MALLOC(dlifp1->dl_if_uniqueid, void *, uniqueid_len,
M_NKE, M_WAITOK);
if (dlifp1->dl_if_uniqueid == NULL) {
- zfree(dlif_zone, dlifp1);
+ zfree(dlif_zone, buf);
ret = ENOMEM;
goto end;
}
ifp1->if_desc.ifd_len = 0;
ifp1->if_desc.ifd_desc = dlifp1->dl_if_descstorage;
+
#if CONFIG_MACF_NET
mac_ifnet_label_init(ifp1);
#endif
lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group,
ifnet_lock_attr);
lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr);
+#if INET
+ lck_rw_init(&ifp1->if_inetdata_lock, ifnet_lock_group,
+ ifnet_lock_attr);
+ ifp1->if_inetdata = NULL;
+#endif
#if INET6
- lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group, ifnet_lock_attr);
+ lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group,
+ ifnet_lock_attr);
ifp1->if_inet6data = NULL;
#endif
+ lck_rw_init(&ifp1->if_link_status_lock, ifnet_lock_group,
+ ifnet_lock_attr);
+ ifp1->if_link_status = NULL;
/* for send data paths */
lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group,
lck_mtx_init(&ifp1->if_poll_lock, ifnet_rcv_lock_group,
ifnet_lock_attr);
+ /* thread call allocation is done with sleeping zalloc */
+ ifp1->if_dt_tcall = thread_call_allocate_with_options(dlil_dt_tcall_fn,
+ ifp1, THREAD_CALL_PRIORITY_KERNEL, THREAD_CALL_OPTIONS_ONCE);
+ if (ifp1->if_dt_tcall == NULL) {
+ panic_plain("%s: couldn't create if_dt_tcall", __func__);
+ /* NOTREACHED */
+ }
+
TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
*ifp = ifp1;
end:
dlil_if_unlock();
- VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof (u_int64_t)) &&
- IS_P2ALIGNED(&ifp1->if_data, sizeof (u_int64_t))));
+ VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof(u_int64_t)) &&
+ IS_P2ALIGNED(&ifp1->if_data, sizeof(u_int64_t))));
- return (ret);
+ return ret;
}
__private_extern__ void
-dlil_if_release(ifnet_t ifp)
+dlil_if_release(ifnet_t ifp)
{
struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
+ VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_count) > 0);
+ if (!(ifp->if_xflags & IFXF_ALLOC_KPI)) {
+ VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_os_count) > 0);
+ }
+
ifnet_lock_exclusive(ifp);
lck_mtx_lock(&dlifp->dl_if_lock);
dlifp->dl_if_flags &= ~DLIF_INUSE;
ifp->if_name = dlifp->dl_if_namestorage;
/* Reset external name (name + unit) */
ifp->if_xname = dlifp->dl_if_xnamestorage;
- snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ,
+ snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ,
"%s?", ifp->if_name);
lck_mtx_unlock(&dlifp->dl_if_lock);
#if CONFIG_MACF_NET
/*
- * We can either recycle the MAC label here or in dlil_if_acquire().
- * It seems logical to do it here but this means that anything that
- * still has a handle on ifp will now see it as unlabeled.
- * Since the interface is "dead" that may be OK. Revisit later.
- */
+ * We can either recycle the MAC label here or in dlil_if_acquire().
+ * It seems logical to do it here but this means that anything that
+ * still has a handle on ifp will now see it as unlabeled.
+ * Since the interface is "dead" that may be OK. Revisit later.
+ */
mac_ifnet_label_recycle(ifp);
#endif
ifnet_lock_done(ifp);
__private_extern__ void
dlil_if_lock_assert(void)
{
- lck_mtx_assert(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
+ LCK_MTX_ASSERT(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
}
__private_extern__ void
lck_mtx_lock_spin(&ifp->if_cached_route_lock);
lck_mtx_convert_spin(&ifp->if_cached_route_lock);
- route_copyout(dst, &ifp->if_src_route, sizeof (*dst));
+ route_copyout(dst, &ifp->if_src_route, sizeof(*dst));
lck_mtx_unlock(&ifp->if_cached_route_lock);
}
lck_mtx_convert_spin(&ifp->if_cached_route_lock);
if (ifp->if_fwd_cacheok) {
- route_copyin(src, &ifp->if_src_route, sizeof (*src));
+ route_copyin(src, &ifp->if_src_route, sizeof(*src));
} else {
ROUTE_RELEASE(src);
}
lck_mtx_convert_spin(&ifp->if_cached_route_lock);
route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
- sizeof (*dst));
+ sizeof(*dst));
lck_mtx_unlock(&ifp->if_cached_route_lock);
}
if (ifp->if_fwd_cacheok) {
route_copyin((struct route *)src,
- (struct route *)&ifp->if_src_route6, sizeof (*src));
+ (struct route *)&ifp->if_src_route6, sizeof(*src));
} else {
ROUTE_RELEASE(src);
}
#endif /* INET6 */
struct rtentry *
-ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
+ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
{
- struct route src_rt;
- struct sockaddr_in *dst;
+ struct route src_rt;
+ struct sockaddr_in *dst;
dst = (struct sockaddr_in *)(void *)(&src_rt.ro_dst);
if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
ROUTE_RELEASE(&src_rt);
if (dst->sin_family != AF_INET) {
- bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
- dst->sin_len = sizeof (src_rt.ro_dst);
+ bzero(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
+ dst->sin_len = sizeof(src_rt.ro_dst);
dst->sin_family = AF_INET;
}
dst->sin_addr = src_ip;
- if (src_rt.ro_rt == NULL) {
- src_rt.ro_rt = rtalloc1_scoped((struct sockaddr *)dst,
- 0, 0, ifp->if_index);
+ VERIFY(src_rt.ro_rt == NULL);
+ src_rt.ro_rt = rtalloc1_scoped((struct sockaddr *)dst,
+ 0, 0, ifp->if_index);
- if (src_rt.ro_rt != NULL) {
- /* retain a ref, copyin consumes one */
- struct rtentry *rte = src_rt.ro_rt;
- RT_ADDREF(rte);
- ifp_src_route_copyin(ifp, &src_rt);
- src_rt.ro_rt = rte;
- }
+ if (src_rt.ro_rt != NULL) {
+ /* retain a ref, copyin consumes one */
+ struct rtentry *rte = src_rt.ro_rt;
+ RT_ADDREF(rte);
+ ifp_src_route_copyin(ifp, &src_rt);
+ src_rt.ro_rt = rte;
}
}
- return (src_rt.ro_rt);
+ return src_rt.ro_rt;
}
#if INET6
-struct rtentry*
+struct rtentry *
ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
{
struct route_in6 src_rt;
!IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
ROUTE_RELEASE(&src_rt);
if (src_rt.ro_dst.sin6_family != AF_INET6) {
- bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
- src_rt.ro_dst.sin6_len = sizeof (src_rt.ro_dst);
+ bzero(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
+ src_rt.ro_dst.sin6_len = sizeof(src_rt.ro_dst);
src_rt.ro_dst.sin6_family = AF_INET6;
}
src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
- sizeof (src_rt.ro_dst.sin6_addr));
+ sizeof(src_rt.ro_dst.sin6_addr));
if (src_rt.ro_rt == NULL) {
src_rt.ro_rt = rtalloc1_scoped(
- (struct sockaddr *)&src_rt.ro_dst, 0, 0,
- ifp->if_index);
+ (struct sockaddr *)&src_rt.ro_dst, 0, 0,
+ ifp->if_index);
if (src_rt.ro_rt != NULL) {
/* retain a ref, copyin consumes one */
- struct rtentry *rte = src_rt.ro_rt;
+ struct rtentry *rte = src_rt.ro_rt;
RT_ADDREF(rte);
ifp_src_route6_copyin(ifp, &src_rt);
src_rt.ro_rt = rte;
}
}
- return (src_rt.ro_rt);
+ return src_rt.ro_rt;
}
#endif /* INET6 */
void
-if_lqm_update(struct ifnet *ifp, int lqm)
+if_lqm_update(struct ifnet *ifp, int lqm, int locked)
{
struct kev_dl_link_quality_metric_data ev_lqm_data;
VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
/* Normalize to edge */
- if (lqm > IFNET_LQM_THRESH_UNKNOWN && lqm <= IFNET_LQM_THRESH_BAD)
- lqm = IFNET_LQM_THRESH_BAD;
- else if (lqm > IFNET_LQM_THRESH_BAD && lqm <= IFNET_LQM_THRESH_POOR)
+ if (lqm >= 0 && lqm <= IFNET_LQM_THRESH_ABORT) {
+ lqm = IFNET_LQM_THRESH_ABORT;
+ atomic_bitset_32(&tcbinfo.ipi_flags,
+ INPCBINFO_HANDLE_LQM_ABORT);
+ inpcb_timer_sched(&tcbinfo, INPCB_TIMER_FAST);
+ } else if (lqm > IFNET_LQM_THRESH_ABORT &&
+ lqm <= IFNET_LQM_THRESH_MINIMALLY_VIABLE) {
+ lqm = IFNET_LQM_THRESH_MINIMALLY_VIABLE;
+ } else if (lqm > IFNET_LQM_THRESH_MINIMALLY_VIABLE &&
+ lqm <= IFNET_LQM_THRESH_POOR) {
lqm = IFNET_LQM_THRESH_POOR;
- else if (lqm > IFNET_LQM_THRESH_POOR && lqm <= IFNET_LQM_THRESH_GOOD)
+ } else if (lqm > IFNET_LQM_THRESH_POOR &&
+ lqm <= IFNET_LQM_THRESH_GOOD) {
lqm = IFNET_LQM_THRESH_GOOD;
+ }
- ifnet_lock_exclusive(ifp);
- if (lqm == ifp->if_lqm) {
- ifnet_lock_done(ifp);
- return; /* nothing to update */
+ /*
+ * Take the lock if needed
+ */
+ if (!locked) {
+ ifnet_lock_exclusive(ifp);
+ }
+
+ if (lqm == ifp->if_interface_state.lqm_state &&
+ (ifp->if_interface_state.valid_bitmask &
+ IF_INTERFACE_STATE_LQM_STATE_VALID)) {
+ /*
+ * Release the lock if was not held by the caller
+ */
+ if (!locked) {
+ ifnet_lock_done(ifp);
+ }
+ return; /* nothing to update */
}
- ifp->if_lqm = lqm;
+ ifp->if_interface_state.valid_bitmask |=
+ IF_INTERFACE_STATE_LQM_STATE_VALID;
+ ifp->if_interface_state.lqm_state = lqm;
+
+ /*
+ * Don't want to hold the lock when issuing kernel events
+ */
ifnet_lock_done(ifp);
- bzero(&ev_lqm_data, sizeof (ev_lqm_data));
+ bzero(&ev_lqm_data, sizeof(ev_lqm_data));
ev_lqm_data.link_quality_metric = lqm;
dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
- (struct net_event_data *)&ev_lqm_data, sizeof (ev_lqm_data));
+ (struct net_event_data *)&ev_lqm_data, sizeof(ev_lqm_data));
+
+ /*
+ * Reacquire the lock for the caller
+ */
+ if (locked) {
+ ifnet_lock_exclusive(ifp);
+ }
+}
+
+static void
+if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
+{
+ struct kev_dl_rrc_state kev;
+
+ if (rrc_state == ifp->if_interface_state.rrc_state &&
+ (ifp->if_interface_state.valid_bitmask &
+ IF_INTERFACE_STATE_RRC_STATE_VALID)) {
+ return;
+ }
+
+ ifp->if_interface_state.valid_bitmask |=
+ IF_INTERFACE_STATE_RRC_STATE_VALID;
+
+ ifp->if_interface_state.rrc_state = rrc_state;
+
+ /*
+ * Don't want to hold the lock when issuing kernel events
+ */
+ ifnet_lock_done(ifp);
+
+ bzero(&kev, sizeof(struct kev_dl_rrc_state));
+ kev.rrc_state = rrc_state;
+
+ dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
+ (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state));
+
+ ifnet_lock_exclusive(ifp);
+}
+
+errno_t
+if_state_update(struct ifnet *ifp,
+ struct if_interface_state *if_interface_state)
+{
+ u_short if_index_available = 0;
+
+ ifnet_lock_exclusive(ifp);
+
+ if ((ifp->if_type != IFT_CELLULAR) &&
+ (if_interface_state->valid_bitmask &
+ IF_INTERFACE_STATE_RRC_STATE_VALID)) {
+ ifnet_lock_done(ifp);
+ return ENOTSUP;
+ }
+ if ((if_interface_state->valid_bitmask &
+ IF_INTERFACE_STATE_LQM_STATE_VALID) &&
+ (if_interface_state->lqm_state < IFNET_LQM_MIN ||
+ if_interface_state->lqm_state > IFNET_LQM_MAX)) {
+ ifnet_lock_done(ifp);
+ return EINVAL;
+ }
+ if ((if_interface_state->valid_bitmask &
+ IF_INTERFACE_STATE_RRC_STATE_VALID) &&
+ if_interface_state->rrc_state !=
+ IF_INTERFACE_STATE_RRC_STATE_IDLE &&
+ if_interface_state->rrc_state !=
+ IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
+ ifnet_lock_done(ifp);
+ return EINVAL;
+ }
+
+ if (if_interface_state->valid_bitmask &
+ IF_INTERFACE_STATE_LQM_STATE_VALID) {
+ if_lqm_update(ifp, if_interface_state->lqm_state, 1);
+ }
+ if (if_interface_state->valid_bitmask &
+ IF_INTERFACE_STATE_RRC_STATE_VALID) {
+ if_rrc_state_update(ifp, if_interface_state->rrc_state);
+ }
+ if (if_interface_state->valid_bitmask &
+ IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
+ ifp->if_interface_state.valid_bitmask |=
+ IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
+ ifp->if_interface_state.interface_availability =
+ if_interface_state->interface_availability;
+
+ if (ifp->if_interface_state.interface_availability ==
+ IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
+ os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) available\n",
+ __func__, if_name(ifp), ifp->if_index);
+ if_index_available = ifp->if_index;
+ } else {
+ os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) unavailable)\n",
+ __func__, if_name(ifp), ifp->if_index);
+ }
+ }
+ ifnet_lock_done(ifp);
+
+ /*
+ * Check if the TCP connections going on this interface should be
+ * forced to send probe packets instead of waiting for TCP timers
+ * to fire. This is done on an explicit notification such as
+ * SIOCSIFINTERFACESTATE which marks the interface as available.
+ */
+ if (if_index_available > 0) {
+ tcp_interface_send_probe(if_index_available);
+ }
+
+ return 0;
+}
+
+void
+if_get_state(struct ifnet *ifp,
+ struct if_interface_state *if_interface_state)
+{
+ ifnet_lock_shared(ifp);
+
+ if_interface_state->valid_bitmask = 0;
+
+ if (ifp->if_interface_state.valid_bitmask &
+ IF_INTERFACE_STATE_RRC_STATE_VALID) {
+ if_interface_state->valid_bitmask |=
+ IF_INTERFACE_STATE_RRC_STATE_VALID;
+ if_interface_state->rrc_state =
+ ifp->if_interface_state.rrc_state;
+ }
+ if (ifp->if_interface_state.valid_bitmask &
+ IF_INTERFACE_STATE_LQM_STATE_VALID) {
+ if_interface_state->valid_bitmask |=
+ IF_INTERFACE_STATE_LQM_STATE_VALID;
+ if_interface_state->lqm_state =
+ ifp->if_interface_state.lqm_state;
+ }
+ if (ifp->if_interface_state.valid_bitmask &
+ IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
+ if_interface_state->valid_bitmask |=
+ IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
+ if_interface_state->interface_availability =
+ ifp->if_interface_state.interface_availability;
+ }
+
+ ifnet_lock_done(ifp);
+}
+
+errno_t
+if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
+{
+ ifnet_lock_exclusive(ifp);
+ if (conn_probe > 1) {
+ ifnet_lock_done(ifp);
+ return EINVAL;
+ }
+ if (conn_probe == 0) {
+ ifp->if_eflags &= ~IFEF_PROBE_CONNECTIVITY;
+ } else {
+ ifp->if_eflags |= IFEF_PROBE_CONNECTIVITY;
+ }
+ ifnet_lock_done(ifp);
+
+#if NECP
+ necp_update_all_clients();
+#endif /* NECP */
+
+ tcp_probe_connectivity(ifp, conn_probe);
+ return 0;
}
/* for uuid.c */
-int
-uuid_get_ethernet(u_int8_t *node)
+static int
+get_ether_index(int * ret_other_index)
{
struct ifnet *ifp;
- struct sockaddr_dl *sdl;
+ int en0_index = 0;
+ int other_en_index = 0;
+ int any_ether_index = 0;
+ short best_unit = 0;
- ifnet_head_lock_shared();
+ *ret_other_index = 0;
TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
+ /*
+ * find en0, or if not en0, the lowest unit en*, and if not
+ * that, any ethernet
+ */
ifnet_lock_shared(ifp);
- IFA_LOCK_SPIN(ifp->if_lladdr);
- sdl = (struct sockaddr_dl *)(void *)ifp->if_lladdr->ifa_addr;
- if (sdl->sdl_type == IFT_ETHER) {
- memcpy(node, LLADDR(sdl), ETHER_ADDR_LEN);
- IFA_UNLOCK(ifp->if_lladdr);
- ifnet_lock_done(ifp);
- ifnet_head_done();
- return (0);
+ if (strcmp(ifp->if_name, "en") == 0) {
+ if (ifp->if_unit == 0) {
+ /* found en0, we're done */
+ en0_index = ifp->if_index;
+ ifnet_lock_done(ifp);
+ break;
+ }
+ if (other_en_index == 0 || ifp->if_unit < best_unit) {
+ other_en_index = ifp->if_index;
+ best_unit = ifp->if_unit;
+ }
+ } else if (ifp->if_type == IFT_ETHER && any_ether_index == 0) {
+ any_ether_index = ifp->if_index;
}
- IFA_UNLOCK(ifp->if_lladdr);
ifnet_lock_done(ifp);
}
- ifnet_head_done();
+ if (en0_index == 0) {
+ if (other_en_index != 0) {
+ *ret_other_index = other_en_index;
+ } else if (any_ether_index != 0) {
+ *ret_other_index = any_ether_index;
+ }
+ }
+ return en0_index;
+}
+
+int
+uuid_get_ethernet(u_int8_t *node)
+{
+ static int en0_index;
+ struct ifnet *ifp;
+ int other_index = 0;
+ int the_index = 0;
+ int ret;
- return (-1);
+ ifnet_head_lock_shared();
+ if (en0_index == 0 || ifindex2ifnet[en0_index] == NULL) {
+ en0_index = get_ether_index(&other_index);
+ }
+ if (en0_index != 0) {
+ the_index = en0_index;
+ } else if (other_index != 0) {
+ the_index = other_index;
+ }
+ if (the_index != 0) {
+ ifp = ifindex2ifnet[the_index];
+ VERIFY(ifp != NULL);
+ memcpy(node, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+ ret = 0;
+ } else {
+ ret = -1;
+ }
+ ifnet_head_done();
+ return ret;
}
static int
i = if_rxpoll;
err = sysctl_handle_int(oidp, &i, 0, req);
- if (err != 0 || req->newptr == USER_ADDR_NULL)
- return (err);
+ if (err != 0 || req->newptr == USER_ADDR_NULL) {
+ return err;
+ }
- if (net_rxpoll == 0)
- return (ENXIO);
+ if (net_rxpoll == 0) {
+ return ENXIO;
+ }
if_rxpoll = i;
- return (err);
+ return err;
}
static int
q = if_rxpoll_mode_holdtime;
err = sysctl_handle_quad(oidp, &q, 0, req);
- if (err != 0 || req->newptr == USER_ADDR_NULL)
- return (err);
+ if (err != 0 || req->newptr == USER_ADDR_NULL) {
+ return err;
+ }
- if (q < IF_RXPOLL_MODE_HOLDTIME_MIN)
+ if (q < IF_RXPOLL_MODE_HOLDTIME_MIN) {
q = IF_RXPOLL_MODE_HOLDTIME_MIN;
+ }
if_rxpoll_mode_holdtime = q;
- return (err);
+ return err;
}
static int
q = if_rxpoll_sample_holdtime;
err = sysctl_handle_quad(oidp, &q, 0, req);
- if (err != 0 || req->newptr == USER_ADDR_NULL)
- return (err);
+ if (err != 0 || req->newptr == USER_ADDR_NULL) {
+ return err;
+ }
- if (q < IF_RXPOLL_SAMPLETIME_MIN)
+ if (q < IF_RXPOLL_SAMPLETIME_MIN) {
q = IF_RXPOLL_SAMPLETIME_MIN;
+ }
if_rxpoll_sample_holdtime = q;
- return (err);
+ return err;
}
static int
q = if_rxpoll_interval_time;
err = sysctl_handle_quad(oidp, &q, 0, req);
- if (err != 0 || req->newptr == USER_ADDR_NULL)
- return (err);
+ if (err != 0 || req->newptr == USER_ADDR_NULL) {
+ return err;
+ }
- if (q < IF_RXPOLL_INTERVALTIME_MIN)
+ if (q < IF_RXPOLL_INTERVALTIME_MIN) {
q = IF_RXPOLL_INTERVALTIME_MIN;
+ }
if_rxpoll_interval_time = q;
- return (err);
+ return err;
}
static int
uint32_t i;
int err;
- i = if_rxpoll_wlowat;
+ i = if_sysctl_rxpoll_wlowat;
err = sysctl_handle_int(oidp, &i, 0, req);
- if (err != 0 || req->newptr == USER_ADDR_NULL)
- return (err);
+ if (err != 0 || req->newptr == USER_ADDR_NULL) {
+ return err;
+ }
- if (i == 0 || i >= if_rxpoll_whiwat)
- return (EINVAL);
+ if (i == 0 || i >= if_sysctl_rxpoll_whiwat) {
+ return EINVAL;
+ }
- if_rxpoll_wlowat = i;
- return (err);
+ if_sysctl_rxpoll_wlowat = i;
+ return err;
}
static int
uint32_t i;
int err;
- i = if_rxpoll_whiwat;
+ i = if_sysctl_rxpoll_whiwat;
err = sysctl_handle_int(oidp, &i, 0, req);
- if (err != 0 || req->newptr == USER_ADDR_NULL)
- return (err);
+ if (err != 0 || req->newptr == USER_ADDR_NULL) {
+ return err;
+ }
- if (i <= if_rxpoll_wlowat)
- return (EINVAL);
+ if (i <= if_sysctl_rxpoll_wlowat) {
+ return EINVAL;
+ }
- if_rxpoll_whiwat = i;
- return (err);
+ if_sysctl_rxpoll_whiwat = i;
+ return err;
}
static int
i = if_sndq_maxlen;
err = sysctl_handle_int(oidp, &i, 0, req);
- if (err != 0 || req->newptr == USER_ADDR_NULL)
- return (err);
+ if (err != 0 || req->newptr == USER_ADDR_NULL) {
+ return err;
+ }
- if (i < IF_SNDQ_MINLEN)
+ if (i < IF_SNDQ_MINLEN) {
i = IF_SNDQ_MINLEN;
+ }
if_sndq_maxlen = i;
- return (err);
+ return err;
}
static int
i = if_rcvq_maxlen;
err = sysctl_handle_int(oidp, &i, 0, req);
- if (err != 0 || req->newptr == USER_ADDR_NULL)
- return (err);
+ if (err != 0 || req->newptr == USER_ADDR_NULL) {
+ return err;
+ }
- if (i < IF_RCVQ_MINLEN)
+ if (i < IF_RCVQ_MINLEN) {
i = IF_RCVQ_MINLEN;
+ }
if_rcvq_maxlen = i;
- return (err);
+ return err;
}
-void
+int
dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
{
struct kev_dl_node_presence kev;
struct sockaddr_dl *sdl;
struct sockaddr_in6 *sin6;
+ int ret = 0;
VERIFY(ifp);
VERIFY(sa);
VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
- bzero(&kev, sizeof (kev));
+ bzero(&kev, sizeof(kev));
sin6 = &kev.sin6_node_address;
sdl = &kev.sdl_node_address;
nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
kev.rssi = rssi;
kev.link_quality_metric = lqm;
kev.node_proximity_metric = npm;
- bcopy(srvinfo, kev.node_service_info, sizeof (kev.node_service_info));
+ bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
+
+ ret = nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
+ if (ret == 0) {
+ int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
+ &kev.link_data, sizeof(kev));
+ if (err != 0) {
+ log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with"
+ "error %d\n", __func__, err);
+ }
+ }
+ return ret;
+}
+
+void
+dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
+{
+ struct kev_dl_node_absence kev = {};
+ struct sockaddr_in6 *kev_sin6 = NULL;
+ struct sockaddr_dl *kev_sdl = NULL;
+
+ VERIFY(ifp != NULL);
+ VERIFY(sa != NULL);
+ VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
+
+ kev_sin6 = &kev.sin6_node_address;
+ kev_sdl = &kev.sdl_node_address;
+
+ if (sa->sa_family == AF_INET6) {
+ /*
+ * If IPv6 address is given, get the link layer
+ * address from what was cached in the neighbor cache
+ */
+ VERIFY(sa->sa_len <= sizeof(*kev_sin6));
+ bcopy(sa, kev_sin6, sa->sa_len);
+ nd6_alt_node_absent(ifp, kev_sin6, kev_sdl);
+ } else {
+ /*
+ * If passed address is AF_LINK type, derive the address
+ * based on the link address.
+ */
+ nd6_alt_node_addr_decompose(ifp, sa, kev_sdl, kev_sin6);
+ nd6_alt_node_absent(ifp, kev_sin6, NULL);
+ }
+
+ kev_sdl->sdl_type = ifp->if_type;
+ kev_sdl->sdl_index = ifp->if_index;
- nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
- dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
- &kev.link_data, sizeof (kev));
+ dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
+ &kev.link_data, sizeof(kev));
}
-void
-dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
+int
+dlil_node_present_v2(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr_dl *sdl,
+ int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
{
- struct kev_dl_node_absence kev;
- struct sockaddr_in6 *sin6;
- struct sockaddr_dl *sdl;
+ struct kev_dl_node_presence kev = {};
+ struct sockaddr_dl *kev_sdl = NULL;
+ struct sockaddr_in6 *kev_sin6 = NULL;
+ int ret = 0;
- VERIFY(ifp);
- VERIFY(sa);
- VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
+ VERIFY(ifp != NULL);
+ VERIFY(sa != NULL && sdl != NULL);
+ VERIFY(sa->sa_family == AF_INET6 && sdl->sdl_family == AF_LINK);
- bzero(&kev, sizeof (kev));
- sin6 = &kev.sin6_node_address;
- sdl = &kev.sdl_node_address;
- nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
+ kev_sin6 = &kev.sin6_node_address;
+ kev_sdl = &kev.sdl_node_address;
- nd6_alt_node_absent(ifp, sin6);
- dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
- &kev.link_data, sizeof (kev));
+ VERIFY(sdl->sdl_len <= sizeof(*kev_sdl));
+ bcopy(sdl, kev_sdl, sdl->sdl_len);
+ kev_sdl->sdl_type = ifp->if_type;
+ kev_sdl->sdl_index = ifp->if_index;
+
+ VERIFY(sa->sa_len <= sizeof(*kev_sin6));
+ bcopy(sa, kev_sin6, sa->sa_len);
+
+ kev.rssi = rssi;
+ kev.link_quality_metric = lqm;
+ kev.node_proximity_metric = npm;
+ bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
+
+ ret = nd6_alt_node_present(ifp, SIN6(sa), sdl, rssi, lqm, npm);
+ if (ret == 0) {
+ int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
+ &kev.link_data, sizeof(kev));
+ if (err != 0) {
+ log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with",
+ "error %d\n", __func__, err);
+ }
+ }
+ return ret;
}
const void *
dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
- kauth_cred_t *credp)
+ kauth_cred_t *credp)
{
const u_int8_t *bytes;
size_t size;
default:
credp = NULL;
break;
- };
+ }
+ ;
if (credp && mac_system_check_info(*credp, "net.link.addr")) {
static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
- [0] = 2
+ [0] = 2
};
- switch (sdl->sdl_type) {
- case IFT_ETHER:
- VERIFY(size == ETHER_ADDR_LEN);
- bytes = unspec;
- break;
- case IFT_IEEE1394:
- VERIFY(size == FIREWIRE_EUI64_LEN);
- bytes = unspec;
- break;
- default:
- VERIFY(FALSE);
- break;
- };
+ bytes = unspec;
}
}
#else
#pragma unused(credp)
#endif
- if (sizep != NULL) *sizep = size;
- return (bytes);
+ if (sizep != NULL) {
+ *sizep = size;
+ }
+ return bytes;
}
void
VERIFY(ifp != NULL);
VERIFY(modid != NULL);
- _CASSERT(sizeof (kev.modid) == DLIL_MODIDLEN);
- _CASSERT(sizeof (kev.info) == DLIL_MODARGLEN);
+ _CASSERT(sizeof(kev.modid) == DLIL_MODIDLEN);
+ _CASSERT(sizeof(kev.info) == DLIL_MODARGLEN);
- bzero(&kev, sizeof (&kev));
+ bzero(&kev, sizeof(kev));
microtime(&tv);
kev.timestamp = tv.tv_sec;
bcopy(modid, &kev.modid, DLIL_MODIDLEN);
- if (info != NULL)
+ if (info != NULL) {
bcopy(info, &kev.info, DLIL_MODARGLEN);
+ }
dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
- &kev.link_data, sizeof (kev));
+ &kev.link_data, sizeof(kev));
}
errno_t
/*
* XXX: Use priv_check_cred() instead of root check?
*/
- if ((result = proc_suser(p)) != 0)
- return (result);
+ if ((result = proc_suser(p)) != 0) {
+ return result;
+ }
if (ifr->ifr_opportunistic.ifo_flags ==
- IFRIFOF_BLOCK_OPPORTUNISTIC)
+ IFRIFOF_BLOCK_OPPORTUNISTIC) {
level = IFNET_THROTTLE_OPPORTUNISTIC;
- else if (ifr->ifr_opportunistic.ifo_flags == 0)
+ } else if (ifr->ifr_opportunistic.ifo_flags == 0) {
level = IFNET_THROTTLE_OFF;
- else
+ } else {
result = EINVAL;
+ }
- if (result == 0)
+ if (result == 0) {
result = ifnet_set_throttle(ifp, level);
+ }
} else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
ifr->ifr_opportunistic.ifo_flags = 0;
if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
if (result == 0) {
uint32_t flags = 0;
flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
- INPCB_OPPORTUNISTIC_SETCMD : 0;
- flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
- INPCB_OPPORTUNISTIC_THROTTLEON : 0;
+ INPCB_OPPORTUNISTIC_SETCMD : 0;
+ flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
+ INPCB_OPPORTUNISTIC_THROTTLEON : 0;
ifr->ifr_opportunistic.ifo_inuse =
udp_count_opportunistic(ifp->if_index, flags) +
tcp_count_opportunistic(ifp->if_index, flags);
}
- if (result == EALREADY)
+ if (result == EALREADY) {
result = 0;
+ }
- return (result);
+ return result;
}
int
struct ifclassq *ifq;
int err = 0;
- if (!(ifp->if_eflags & IFEF_TXSTART))
- return (ENXIO);
+ if (!(ifp->if_eflags & IFEF_TXSTART)) {
+ return ENXIO;
+ }
*level = IFNET_THROTTLE_OFF;
ifq = &ifp->if_snd;
IFCQ_LOCK(ifq);
/* Throttling works only for IFCQ, not ALTQ instances */
- if (IFCQ_IS_ENABLED(ifq))
+ if (IFCQ_IS_ENABLED(ifq)) {
IFCQ_GET_THROTTLE(ifq, *level, err);
+ }
IFCQ_UNLOCK(ifq);
- return (err);
+ return err;
}
int
struct ifclassq *ifq;
int err = 0;
- if (!(ifp->if_eflags & IFEF_TXSTART))
- return (ENXIO);
+ if (!(ifp->if_eflags & IFEF_TXSTART)) {
+ return ENXIO;
+ }
ifq = &ifp->if_snd;
switch (level) {
case IFNET_THROTTLE_OFF:
case IFNET_THROTTLE_OPPORTUNISTIC:
-#if PF_ALTQ
- /* Throttling works only for IFCQ, not ALTQ instances */
- if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq)))
- return (ENXIO);
-#endif /* PF_ALTQ */
break;
default:
- return (EINVAL);
+ return EINVAL;
}
IFCQ_LOCK(ifq);
- if (IFCQ_IS_ENABLED(ifq))
+ if (IFCQ_IS_ENABLED(ifq)) {
IFCQ_SET_THROTTLE(ifq, level, err);
+ }
IFCQ_UNLOCK(ifq);
if (err == 0) {
- printf("%s: throttling level set to %d\n", if_name(ifp),
+ DLIL_PRINTF("%s: throttling level set to %d\n", if_name(ifp),
level);
- if (level == IFNET_THROTTLE_OFF)
+#if NECP
+ necp_update_all_clients();
+#endif /* NECP */
+ if (level == IFNET_THROTTLE_OFF) {
ifnet_start(ifp);
+ }
}
- return (err);
+ return err;
}
errno_t
if (cmd == SIOCSIFLOG) {
if ((result = priv_check_cred(kauth_cred_get(),
- PRIV_NET_INTERFACE_CONTROL, 0)) != 0)
- return (result);
+ PRIV_NET_INTERFACE_CONTROL, 0)) != 0) {
+ return result;
+ }
level = ifr->ifr_log.ifl_level;
- if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX)
+ if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX) {
result = EINVAL;
+ }
flags = ifr->ifr_log.ifl_flags;
- if ((flags &= IFNET_LOGF_MASK) == 0)
+ if ((flags &= IFNET_LOGF_MASK) == 0) {
result = EINVAL;
+ }
category = ifr->ifr_log.ifl_category;
subcategory = ifr->ifr_log.ifl_subcategory;
- if (result == 0)
+ if (result == 0) {
result = ifnet_set_log(ifp, level, flags,
category, subcategory);
+ }
} else {
result = ifnet_get_log(ifp, &level, &flags, &category,
&subcategory);
}
}
- return (result);
+ return result;
}
int
if (ifp->if_output_ctl != NULL) {
struct ifnet_log_params l;
- bzero(&l, sizeof (l));
+ bzero(&l, sizeof(l));
l.level = level;
l.flags = flags;
l.flags &= ~IFNET_LOGF_DLIL;
/* Send this request to lower layers */
if (l.flags != 0) {
err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
- sizeof (l), &l);
+ sizeof(l), &l);
}
} else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
/*
* silently ignore facilities other than ours.
*/
flags &= IFNET_LOGF_DLIL;
- if (flags == 0 && (!ifp->if_log.flags & IFNET_LOGF_DLIL))
+ if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL))) {
level = 0;
+ }
}
if (err == 0) {
- if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT)
+ if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT) {
ifp->if_log.flags = 0;
- else
+ } else {
ifp->if_log.flags |= flags;
+ }
log(LOG_INFO, "%s: logging level set to %d flags=%b "
"arg=%b, category=%d subcategory=%d\n", if_name(ifp),
category, subcategory);
}
- return (err);
+ return err;
}
int
ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
int32_t *category, int32_t *subcategory)
{
- if (level != NULL)
+ if (level != NULL) {
*level = ifp->if_log.level;
- if (flags != NULL)
+ }
+ if (flags != NULL) {
*flags = ifp->if_log.flags;
- if (category != NULL)
+ }
+ if (category != NULL) {
*category = ifp->if_log.category;
- if (subcategory != NULL)
+ }
+ if (subcategory != NULL) {
*subcategory = ifp->if_log.subcategory;
+ }
- return (0);
+ return 0;
}
int
(void) pf_ifaddr_hook(ifp);
#endif /* PF */
- if (ifp->if_output_ctl == NULL)
- return (EOPNOTSUPP);
+ if (ifp->if_output_ctl == NULL) {
+ return EOPNOTSUPP;
+ }
- bzero(&na, sizeof (na));
+ bzero(&na, sizeof(na));
na.address_family = af;
- return (ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
- sizeof (na), &na));
+ return ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
+ sizeof(na), &na);
}
errno_t
ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
{
if (ifp == NULL || flowid == NULL) {
- return (EINVAL);
+ return EINVAL;
} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
- !(ifp->if_refflags & IFRF_ATTACHED)) {
- return (ENXIO);
+ !IF_FULLY_ATTACHED(ifp)) {
+ return ENXIO;
}
*flowid = ifp->if_flowhash;
- return (0);
+ return 0;
}
errno_t
int err;
if (ifp == NULL) {
- return (EINVAL);
+ return EINVAL;
} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
- !(ifp->if_refflags & IFRF_ATTACHED)) {
- return (ENXIO);
+ !IF_FULLY_ATTACHED(ifp)) {
+ return ENXIO;
}
if ((err = ifnet_fc_add(ifp)) == 0) {
ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
lck_mtx_unlock(&ifp->if_start_lock);
}
- return (err);
+ return err;
}
errno_t
ifnet_enable_output(struct ifnet *ifp)
{
if (ifp == NULL) {
- return (EINVAL);
+ return EINVAL;
} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
- !(ifp->if_refflags & IFRF_ATTACHED)) {
- return (ENXIO);
+ !IF_FULLY_ATTACHED(ifp)) {
+ return ENXIO;
}
- ifnet_start_common(ifp, 1);
- return (0);
+ ifnet_start_common(ifp, TRUE);
+ return 0;
}
void
struct ifnet *ifp;
ifce = ifnet_fc_get(flowhash);
- if (ifce == NULL)
+ if (ifce == NULL) {
return;
+ }
VERIFY(ifce->ifce_ifp != NULL);
ifp = ifce->ifce_ifp;
/* flow hash gets recalculated per attach, so check */
if (ifnet_is_attached(ifp, 1)) {
- if (ifp->if_flowhash == flowhash)
+ if (ifp->if_flowhash == flowhash) {
(void) ifnet_enable_output(ifp);
+ }
ifnet_decr_iorefcnt(ifp);
}
ifnet_fc_entry_free(ifce);
static inline int
ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
{
- return (fc1->ifce_flowhash - fc2->ifce_flowhash);
+ return fc1->ifce_flowhash - fc2->ifce_flowhash;
}
static int
VERIFY(ifp->if_flowhash != 0);
flowhash = ifp->if_flowhash;
- bzero(&keyfc, sizeof (keyfc));
+ bzero(&keyfc, sizeof(keyfc));
keyfc.ifce_flowhash = flowhash;
lck_mtx_lock_spin(&ifnet_fc_lock);
if (ifce != NULL && ifce->ifce_ifp == ifp) {
/* Entry is already in ifnet_fc_tree, return */
lck_mtx_unlock(&ifnet_fc_lock);
- return (0);
+ return 0;
}
if (ifce != NULL) {
* avoid adding a second one when there is a collision.
*/
lck_mtx_unlock(&ifnet_fc_lock);
- return (EAGAIN);
+ return EAGAIN;
}
/* become regular mutex */
lck_mtx_convert_spin(&ifnet_fc_lock);
- ifce = zalloc_noblock(ifnet_fc_zone);
+ ifce = zalloc(ifnet_fc_zone);
if (ifce == NULL) {
/* memory allocation failed */
lck_mtx_unlock(&ifnet_fc_lock);
- return (ENOMEM);
+ return ENOMEM;
}
bzero(ifce, ifnet_fc_zone_size);
RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
lck_mtx_unlock(&ifnet_fc_lock);
- return (0);
+ return 0;
}
static struct ifnet_fc_entry *
struct ifnet_fc_entry keyfc, *ifce;
struct ifnet *ifp;
- bzero(&keyfc, sizeof (keyfc));
+ bzero(&keyfc, sizeof(keyfc));
keyfc.ifce_flowhash = flowhash;
lck_mtx_lock_spin(&ifnet_fc_lock);
if (ifce == NULL) {
/* Entry is not present in ifnet_fc_tree, return */
lck_mtx_unlock(&ifnet_fc_lock);
- return (NULL);
+ return NULL;
}
RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
}
lck_mtx_unlock(&ifnet_fc_lock);
- return (ifce);
+ return ifce;
}
static void
struct ifnet_flowhash_key fh __attribute__((aligned(8)));
uint32_t flowhash = 0;
- if (ifnet_flowhash_seed == 0)
+ if (ifnet_flowhash_seed == 0) {
ifnet_flowhash_seed = RandomULong();
+ }
- bzero(&fh, sizeof (fh));
+ bzero(&fh, sizeof(fh));
- (void) snprintf(fh.ifk_name, sizeof (fh.ifk_name), "%s", ifp->if_name);
+ (void) snprintf(fh.ifk_name, sizeof(fh.ifk_name), "%s", ifp->if_name);
fh.ifk_unit = ifp->if_unit;
fh.ifk_flags = ifp->if_flags;
fh.ifk_eflags = ifp->if_eflags;
fh.ifk_rand2 = RandomULong();
try_again:
- flowhash = net_flowhash(&fh, sizeof (fh), ifnet_flowhash_seed);
+ flowhash = net_flowhash(&fh, sizeof(fh), ifnet_flowhash_seed);
if (flowhash == 0) {
/* try to get a non-zero flowhash */
ifnet_flowhash_seed = RandomULong();
goto try_again;
}
- return (flowhash);
+ return flowhash;
+}
+
+int
+ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
+ uint16_t flags, uint8_t *data)
+{
+#pragma unused(flags)
+ int error = 0;
+
+ switch (family) {
+ case AF_INET:
+ if_inetdata_lock_exclusive(ifp);
+ if (IN_IFEXTRA(ifp) != NULL) {
+ if (len == 0) {
+ /* Allow clearing the signature */
+ IN_IFEXTRA(ifp)->netsig_len = 0;
+ bzero(IN_IFEXTRA(ifp)->netsig,
+ sizeof(IN_IFEXTRA(ifp)->netsig));
+ if_inetdata_lock_done(ifp);
+ break;
+ } else if (len > sizeof(IN_IFEXTRA(ifp)->netsig)) {
+ error = EINVAL;
+ if_inetdata_lock_done(ifp);
+ break;
+ }
+ IN_IFEXTRA(ifp)->netsig_len = len;
+ bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
+ } else {
+ error = ENOMEM;
+ }
+ if_inetdata_lock_done(ifp);
+ break;
+
+ case AF_INET6:
+ if_inet6data_lock_exclusive(ifp);
+ if (IN6_IFEXTRA(ifp) != NULL) {
+ if (len == 0) {
+ /* Allow clearing the signature */
+ IN6_IFEXTRA(ifp)->netsig_len = 0;
+ bzero(IN6_IFEXTRA(ifp)->netsig,
+ sizeof(IN6_IFEXTRA(ifp)->netsig));
+ if_inet6data_lock_done(ifp);
+ break;
+ } else if (len > sizeof(IN6_IFEXTRA(ifp)->netsig)) {
+ error = EINVAL;
+ if_inet6data_lock_done(ifp);
+ break;
+ }
+ IN6_IFEXTRA(ifp)->netsig_len = len;
+ bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
+ } else {
+ error = ENOMEM;
+ }
+ if_inet6data_lock_done(ifp);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return error;
+}
+
+int
+ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
+ uint16_t *flags, uint8_t *data)
+{
+ int error = 0;
+
+ if (ifp == NULL || len == NULL || data == NULL) {
+ return EINVAL;
+ }
+
+ switch (family) {
+ case AF_INET:
+ if_inetdata_lock_shared(ifp);
+ if (IN_IFEXTRA(ifp) != NULL) {
+ if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
+ error = EINVAL;
+ if_inetdata_lock_done(ifp);
+ break;
+ }
+ if ((*len = IN_IFEXTRA(ifp)->netsig_len) > 0) {
+ bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
+ } else {
+ error = ENOENT;
+ }
+ } else {
+ error = ENOMEM;
+ }
+ if_inetdata_lock_done(ifp);
+ break;
+
+ case AF_INET6:
+ if_inet6data_lock_shared(ifp);
+ if (IN6_IFEXTRA(ifp) != NULL) {
+ if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
+ error = EINVAL;
+ if_inet6data_lock_done(ifp);
+ break;
+ }
+ if ((*len = IN6_IFEXTRA(ifp)->netsig_len) > 0) {
+ bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
+ } else {
+ error = ENOENT;
+ }
+ } else {
+ error = ENOMEM;
+ }
+ if_inet6data_lock_done(ifp);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ if (error == 0 && flags != NULL) {
+ *flags = 0;
+ }
+
+ return error;
+}
+
+#if INET6
+int
+ifnet_set_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
+{
+ int i, error = 0, one_set = 0;
+
+ if_inet6data_lock_exclusive(ifp);
+
+ if (IN6_IFEXTRA(ifp) == NULL) {
+ error = ENOMEM;
+ goto out;
+ }
+
+ for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
+ uint32_t prefix_len =
+ prefixes[i].prefix_len;
+ struct in6_addr *prefix =
+ &prefixes[i].ipv6_prefix;
+
+ if (prefix_len == 0) {
+ clat_log0((LOG_DEBUG,
+ "NAT64 prefixes purged from Interface %s\n",
+ if_name(ifp)));
+ /* Allow clearing the signature */
+ IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = 0;
+ bzero(&IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
+ sizeof(struct in6_addr));
+
+ continue;
+ } else if (prefix_len != NAT64_PREFIX_LEN_32 &&
+ prefix_len != NAT64_PREFIX_LEN_40 &&
+ prefix_len != NAT64_PREFIX_LEN_48 &&
+ prefix_len != NAT64_PREFIX_LEN_56 &&
+ prefix_len != NAT64_PREFIX_LEN_64 &&
+ prefix_len != NAT64_PREFIX_LEN_96) {
+ clat_log0((LOG_DEBUG,
+ "NAT64 prefixlen is incorrect %d\n", prefix_len));
+ error = EINVAL;
+ goto out;
+ }
+
+ if (IN6_IS_SCOPE_EMBED(prefix)) {
+ clat_log0((LOG_DEBUG,
+ "NAT64 prefix has interface/link local scope.\n"));
+ error = EINVAL;
+ goto out;
+ }
+
+ IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = prefix_len;
+ bcopy(prefix, &IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
+ sizeof(struct in6_addr));
+ clat_log0((LOG_DEBUG,
+ "NAT64 prefix set to %s with prefixlen: %d\n",
+ ip6_sprintf(prefix), prefix_len));
+ one_set = 1;
+ }
+
+out:
+ if_inet6data_lock_done(ifp);
+
+ if (error == 0 && one_set != 0) {
+ necp_update_all_clients();
+ }
+
+ return error;
+}
+
+int
+ifnet_get_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
+{
+ int i, found_one = 0, error = 0;
+
+ if (ifp == NULL) {
+ return EINVAL;
+ }
+
+ if_inet6data_lock_shared(ifp);
+
+ if (IN6_IFEXTRA(ifp) == NULL) {
+ error = ENOMEM;
+ goto out;
+ }
+
+ for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
+ if (IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len != 0) {
+ found_one = 1;
+ }
+ }
+
+ if (found_one == 0) {
+ error = ENOENT;
+ goto out;
+ }
+
+ if (prefixes) {
+ bcopy(IN6_IFEXTRA(ifp)->nat64_prefixes, prefixes,
+ sizeof(IN6_IFEXTRA(ifp)->nat64_prefixes));
+ }
+
+out:
+ if_inet6data_lock_done(ifp);
+
+ return error;
}
+#endif
static void
dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff,
uint32_t did_sw;
if (!(hwcksum_dbg_mode & HWCKSUM_DBG_FINALIZE_FORCED) ||
- (m->m_pkthdr.csum_flags & (CSUM_TSO_IPV4|CSUM_TSO_IPV6)))
+ (m->m_pkthdr.csum_flags & (CSUM_TSO_IPV4 | CSUM_TSO_IPV6))) {
return;
+ }
switch (pf) {
case PF_INET:
did_sw = in_finalize_cksum(m, hoff, m->m_pkthdr.csum_flags);
- if (did_sw & CSUM_DELAY_IP)
+ if (did_sw & CSUM_DELAY_IP) {
hwcksum_dbg_finalized_hdr++;
- if (did_sw & CSUM_DELAY_DATA)
+ }
+ if (did_sw & CSUM_DELAY_DATA) {
hwcksum_dbg_finalized_data++;
+ }
break;
#if INET6
case PF_INET6:
*/
did_sw = in6_finalize_cksum(m, hoff, -1, -1,
m->m_pkthdr.csum_flags);
- if (did_sw & CSUM_DELAY_IPV6_DATA)
+ if (did_sw & CSUM_DELAY_IPV6_DATA) {
hwcksum_dbg_finalized_data++;
+ }
break;
#endif /* INET6 */
default:
dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header,
protocol_family_t pf)
{
- uint16_t sum;
+ uint16_t sum = 0;
uint32_t hlen;
if (frame_header == NULL ||
frame_header < (char *)mbuf_datastart(m) ||
frame_header > (char *)m->m_data) {
- printf("%s: frame header pointer 0x%llx out of range "
+ DLIL_PRINTF("%s: frame header pointer 0x%llx out of range "
"[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp),
(uint64_t)VM_KERNEL_ADDRPERM(frame_header),
(uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED) {
uint32_t foff = hwcksum_dbg_partial_rxoff_forced;
- if (foff > (uint32_t)m->m_pkthdr.len)
+ if (foff > (uint32_t)m->m_pkthdr.len) {
return;
+ }
m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
if (hlen > rxoff) {
hwcksum_dbg_bad_rxoff++;
if (dlil_verbose) {
- printf("%s: partial cksum start offset %d "
+ DLIL_PRINTF("%s: partial cksum start offset %d "
"is less than frame header length %d for "
"mbuf 0x%llx\n", if_name(ifp), rxoff, hlen,
(uint64_t)VM_KERNEL_ADDRPERM(m));
}
return;
}
- rxoff -=hlen;
+ rxoff -= hlen;
if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
/*
if (sum != m->m_pkthdr.csum_rx_val) {
hwcksum_dbg_bad_cksum++;
if (dlil_verbose) {
- printf("%s: bad partial cksum value "
+ DLIL_PRINTF("%s: bad partial cksum value "
"0x%x (expected 0x%x) for mbuf "
"0x%llx [rx_start %d]\n",
if_name(ifp),
if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ) {
uint32_t aoff = hwcksum_dbg_partial_rxoff_adj;
- if (aoff == rxoff || aoff > (uint32_t)m->m_pkthdr.len)
+ if (aoff == rxoff || aoff > (uint32_t)m->m_pkthdr.len) {
return;
+ }
- sum = m_adj_sum16(m, rxoff, aoff, sum);
+ sum = m_adj_sum16(m, rxoff, aoff,
+ m_pktlen(m) - aoff, sum);
m->m_pkthdr.csum_rx_val = sum;
m->m_pkthdr.csum_rx_start = (aoff + hlen);
i = hwcksum_dbg_mode;
err = sysctl_handle_int(oidp, &i, 0, req);
- if (err != 0 || req->newptr == USER_ADDR_NULL)
- return (err);
+ if (err != 0 || req->newptr == USER_ADDR_NULL) {
+ return err;
+ }
- if (hwcksum_dbg == 0)
- return (ENODEV);
+ if (hwcksum_dbg == 0) {
+ return ENODEV;
+ }
- if ((i & ~HWCKSUM_DBG_MASK) != 0)
- return (EINVAL);
+ if ((i & ~HWCKSUM_DBG_MASK) != 0) {
+ return EINVAL;
+ }
hwcksum_dbg_mode = (i & HWCKSUM_DBG_MASK);
- return (err);
+ return err;
}
static int
i = hwcksum_dbg_partial_rxoff_forced;
err = sysctl_handle_int(oidp, &i, 0, req);
- if (err != 0 || req->newptr == USER_ADDR_NULL)
- return (err);
+ if (err != 0 || req->newptr == USER_ADDR_NULL) {
+ return err;
+ }
- if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED))
- return (ENODEV);
+ if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
+ return ENODEV;
+ }
hwcksum_dbg_partial_rxoff_forced = i;
- return (err);
+ return err;
}
static int
i = hwcksum_dbg_partial_rxoff_adj;
err = sysctl_handle_int(oidp, &i, 0, req);
- if (err != 0 || req->newptr == USER_ADDR_NULL)
- return (err);
+ if (err != 0 || req->newptr == USER_ADDR_NULL) {
+ return err;
+ }
- if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ))
- return (ENODEV);
+ if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ)) {
+ return ENODEV;
+ }
hwcksum_dbg_partial_rxoff_adj = i;
- return (err);
+ return err;
}
-#if DEBUG
+static int
+sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+ int err;
+
+ if (req->oldptr == USER_ADDR_NULL) {
+ }
+ if (req->newptr != USER_ADDR_NULL) {
+ return EPERM;
+ }
+ err = SYSCTL_OUT(req, &tx_chain_len_stats,
+ sizeof(struct chain_len_stats));
+
+ return err;
+}
+
+
+#if DEBUG || DEVELOPMENT
/* Blob for sum16 verification */
static uint8_t sumdata[] = {
0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
/* Precomputed 16-bit 1's complement sums for various spans of the above data */
static struct {
- int len;
- uint16_t sum;
+ boolean_t init;
+ uint16_t len;
+ uint16_t sumr; /* reference */
+ uint16_t sumrp; /* reference, precomputed */
} sumtbl[] = {
- { 11, 0xcb6d },
- { 20, 0x20dd },
- { 27, 0xbabd },
- { 32, 0xf3e8 },
- { 37, 0x197d },
- { 43, 0x9eae },
- { 64, 0x4678 },
- { 127, 0x9399 },
- { 256, 0xd147 },
- { 325, 0x0358 }
+ { FALSE, 0, 0, 0x0000 },
+ { FALSE, 1, 0, 0x001f },
+ { FALSE, 2, 0, 0x8b1f },
+ { FALSE, 3, 0, 0x8b27 },
+ { FALSE, 7, 0, 0x790e },
+ { FALSE, 11, 0, 0xcb6d },
+ { FALSE, 20, 0, 0x20dd },
+ { FALSE, 27, 0, 0xbabd },
+ { FALSE, 32, 0, 0xf3e8 },
+ { FALSE, 37, 0, 0x197d },
+ { FALSE, 43, 0, 0x9eae },
+ { FALSE, 64, 0, 0x4678 },
+ { FALSE, 127, 0, 0x9399 },
+ { FALSE, 256, 0, 0xd147 },
+ { FALSE, 325, 0, 0x0358 },
};
-#define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
+#define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
static void
dlil_verify_sum16(void)
int n;
/* Make sure test data plus extra room for alignment fits in cluster */
- _CASSERT((sizeof (sumdata) + (sizeof (uint64_t) * 2)) <= MCLBYTES);
+ _CASSERT((sizeof(sumdata) + (sizeof(uint64_t) * 2)) <= MCLBYTES);
+
+ kprintf("DLIL: running SUM16 self-tests ... ");
m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
- MH_ALIGN(m, sizeof (uint32_t)); /* 32-bit starting alignment */
- buf = mtod(m, uint8_t *); /* base address */
+ m_align(m, sizeof(sumdata) + (sizeof(uint64_t) * 2));
+
+ buf = mtod(m, uint8_t *); /* base address */
for (n = 0; n < SUMTBL_MAX; n++) {
uint16_t len = sumtbl[n].len;
int i;
/* Verify for all possible alignments */
- for (i = 0; i < (int)sizeof (uint64_t); i++) {
- uint16_t sum;
+ for (i = 0; i < (int)sizeof(uint64_t); i++) {
+ uint16_t sum, sumr;
uint8_t *c;
/* Copy over test data to mbuf */
- VERIFY(len <= sizeof (sumdata));
+ VERIFY(len <= sizeof(sumdata));
c = buf + i;
bcopy(sumdata, c, len);
m->m_len = len;
sum = m_sum16(m, 0, len);
+ if (!sumtbl[n].init) {
+ sumr = in_cksum_mbuf_ref(m, len, 0, 0);
+ sumtbl[n].sumr = sumr;
+ sumtbl[n].init = TRUE;
+ } else {
+ sumr = sumtbl[n].sumr;
+ }
+
/* Something is horribly broken; stop now */
- if (sum != sumtbl[n].sum) {
- panic("%s: broken m_sum16 for len=%d align=%d "
- "sum=0x%04x [expected=0x%04x]\n", __func__,
- len, i, sum, sumtbl[n].sum);
+ if (sumr != sumtbl[n].sumrp) {
+ panic_plain("\n%s: broken in_cksum_mbuf_ref() "
+ "for len=%d align=%d sum=0x%04x "
+ "[expected=0x%04x]\n", __func__,
+ len, i, sum, sumr);
+ /* NOTREACHED */
+ } else if (sum != sumr) {
+ panic_plain("\n%s: broken m_sum16() for len=%d "
+ "align=%d sum=0x%04x [expected=0x%04x]\n",
+ __func__, len, i, sum, sumr);
/* NOTREACHED */
}
sum = m_sum16(m, i, len);
/* Something is horribly broken; stop now */
- if (sum != sumtbl[n].sum) {
- panic("%s: broken m_sum16 for len=%d offset=%d "
- "sum=0x%04x [expected=0x%04x]\n", __func__,
- len, i, sum, sumtbl[n].sum);
+ if (sum != sumr) {
+ panic_plain("\n%s: broken m_sum16() for len=%d "
+ "offset=%d sum=0x%04x [expected=0x%04x]\n",
+ __func__, len, i, sum, sumr);
/* NOTREACHED */
}
#if INET
sum = b_sum16(c, len);
/* Something is horribly broken; stop now */
- if (sum != sumtbl[n].sum) {
- panic("%s: broken b_sum16 for len=%d align=%d "
- "sum=0x%04x [expected=0x%04x]\n", __func__,
- len, i, sum, sumtbl[n].sum);
+ if (sum != sumr) {
+ panic_plain("\n%s: broken b_sum16() for len=%d "
+ "align=%d sum=0x%04x [expected=0x%04x]\n",
+ __func__, len, i, sum, sumr);
/* NOTREACHED */
}
#endif /* INET */
}
m_freem(m);
- printf("DLIL: SUM16 self-tests PASSED\n");
+ kprintf("PASSED\n");
}
-#endif /* DEBUG */
+#endif /* DEBUG || DEVELOPMENT */
-#define CASE_STRINGIFY(x) case x: return #x
+#define CASE_STRINGIFY(x) case x: return #x
__private_extern__ const char *
dlil_kev_dl_code_str(u_int32_t event_code)
{
switch (event_code) {
- CASE_STRINGIFY(KEV_DL_SIFFLAGS);
- CASE_STRINGIFY(KEV_DL_SIFMETRICS);
- CASE_STRINGIFY(KEV_DL_SIFMTU);
- CASE_STRINGIFY(KEV_DL_SIFPHYS);
- CASE_STRINGIFY(KEV_DL_SIFMEDIA);
- CASE_STRINGIFY(KEV_DL_SIFGENERIC);
- CASE_STRINGIFY(KEV_DL_ADDMULTI);
- CASE_STRINGIFY(KEV_DL_DELMULTI);
- CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
- CASE_STRINGIFY(KEV_DL_IF_DETACHING);
- CASE_STRINGIFY(KEV_DL_IF_DETACHED);
- CASE_STRINGIFY(KEV_DL_LINK_OFF);
- CASE_STRINGIFY(KEV_DL_LINK_ON);
- CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
- CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
- CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
- CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
- CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
- CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
- CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
- CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
- CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
- CASE_STRINGIFY(KEV_DL_MASTER_ELECTED);
- CASE_STRINGIFY(KEV_DL_ISSUES);
- CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
+ CASE_STRINGIFY(KEV_DL_SIFFLAGS);
+ CASE_STRINGIFY(KEV_DL_SIFMETRICS);
+ CASE_STRINGIFY(KEV_DL_SIFMTU);
+ CASE_STRINGIFY(KEV_DL_SIFPHYS);
+ CASE_STRINGIFY(KEV_DL_SIFMEDIA);
+ CASE_STRINGIFY(KEV_DL_SIFGENERIC);
+ CASE_STRINGIFY(KEV_DL_ADDMULTI);
+ CASE_STRINGIFY(KEV_DL_DELMULTI);
+ CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
+ CASE_STRINGIFY(KEV_DL_IF_DETACHING);
+ CASE_STRINGIFY(KEV_DL_IF_DETACHED);
+ CASE_STRINGIFY(KEV_DL_LINK_OFF);
+ CASE_STRINGIFY(KEV_DL_LINK_ON);
+ CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
+ CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
+ CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
+ CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
+ CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
+ CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
+ CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
+ CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
+ CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
+ CASE_STRINGIFY(KEV_DL_MASTER_ELECTED);
+ CASE_STRINGIFY(KEV_DL_ISSUES);
+ CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
default:
break;
}
- return ("");
+ return "";
+}
+
+static void
+dlil_dt_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
+{
+#pragma unused(arg1)
+ struct ifnet *ifp = arg0;
+
+ if (ifnet_is_attached(ifp, 1)) {
+ nstat_ifnet_threshold_reached(ifp->if_index);
+ ifnet_decr_iorefcnt(ifp);
+ }
+}
+
+void
+ifnet_notify_data_threshold(struct ifnet *ifp)
+{
+ uint64_t bytes = (ifp->if_ibytes + ifp->if_obytes);
+ uint64_t oldbytes = ifp->if_dt_bytes;
+
+ ASSERT(ifp->if_dt_tcall != NULL);
+
+ /*
+ * If we went over the threshold, notify NetworkStatistics.
+ * We rate-limit it based on the threshold interval value.
+ */
+ if (threshold_notify && (bytes - oldbytes) > ifp->if_data_threshold &&
+ OSCompareAndSwap64(oldbytes, bytes, &ifp->if_dt_bytes) &&
+ !thread_call_isactive(ifp->if_dt_tcall)) {
+ uint64_t tival = (threshold_interval * NSEC_PER_SEC);
+ uint64_t now = mach_absolute_time(), deadline = now;
+ uint64_t ival;
+
+ if (tival != 0) {
+ nanoseconds_to_absolutetime(tival, &ival);
+ clock_deadline_for_periodic_event(ival, now, &deadline);
+ (void) thread_call_enter_delayed(ifp->if_dt_tcall,
+ deadline);
+ } else {
+ (void) thread_call_enter(ifp->if_dt_tcall);
+ }
+ }
+}
+
+#if (DEVELOPMENT || DEBUG)
+/*
+ * The sysctl variable name contains the input parameters of
+ * ifnet_get_keepalive_offload_frames()
+ * ifp (interface index): name[0]
+ * frames_array_count: name[1]
+ * frame_data_offset: name[2]
+ * The return length gives used_frames_count
+ */
+static int
+sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp)
+ int *name = (int *)arg1;
+ u_int namelen = arg2;
+ int idx;
+ ifnet_t ifp = NULL;
+ u_int32_t frames_array_count;
+ size_t frame_data_offset;
+ u_int32_t used_frames_count;
+ struct ifnet_keepalive_offload_frame *frames_array = NULL;
+ int error = 0;
+ u_int32_t i;
+
+ /*
+ * Only root can get look at other people TCP frames
+ */
+ error = proc_suser(current_proc());
+ if (error != 0) {
+ goto done;
+ }
+ /*
+ * Validate the input parameters
+ */
+ if (req->newptr != USER_ADDR_NULL) {
+ error = EPERM;
+ goto done;
+ }
+ if (namelen != 3) {
+ error = EINVAL;
+ goto done;
+ }
+ if (req->oldptr == USER_ADDR_NULL) {
+ error = EINVAL;
+ goto done;
+ }
+ if (req->oldlen == 0) {
+ error = EINVAL;
+ goto done;
+ }
+ idx = name[0];
+ frames_array_count = name[1];
+ frame_data_offset = name[2];
+
+ /* Make sure the passed buffer is large enough */
+ if (frames_array_count * sizeof(struct ifnet_keepalive_offload_frame) >
+ req->oldlen) {
+ error = ENOMEM;
+ goto done;
+ }
+
+ ifnet_head_lock_shared();
+ if (!IF_INDEX_IN_RANGE(idx)) {
+ ifnet_head_done();
+ error = ENOENT;
+ goto done;
+ }
+ ifp = ifindex2ifnet[idx];
+ ifnet_head_done();
+
+ frames_array = _MALLOC(frames_array_count *
+ sizeof(struct ifnet_keepalive_offload_frame), M_TEMP, M_WAITOK);
+ if (frames_array == NULL) {
+ error = ENOMEM;
+ goto done;
+ }
+
+ error = ifnet_get_keepalive_offload_frames(ifp, frames_array,
+ frames_array_count, frame_data_offset, &used_frames_count);
+ if (error != 0) {
+ DLIL_PRINTF("%s: ifnet_get_keepalive_offload_frames error %d\n",
+ __func__, error);
+ goto done;
+ }
+
+ for (i = 0; i < used_frames_count; i++) {
+ error = SYSCTL_OUT(req, frames_array + i,
+ sizeof(struct ifnet_keepalive_offload_frame));
+ if (error != 0) {
+ goto done;
+ }
+ }
+done:
+ if (frames_array != NULL) {
+ _FREE(frames_array, M_TEMP);
+ }
+ return error;
+}
+#endif /* DEVELOPMENT || DEBUG */
+
+void
+ifnet_update_stats_per_flow(struct ifnet_stats_per_flow *ifs,
+ struct ifnet *ifp)
+{
+ tcp_update_stats_per_flow(ifs, ifp);
}