#include <sys/domain.h>
#include <sys/user.h>
#include <sys/random.h>
+#include <sys/socketvar.h>
#include <net/if_dl.h>
#include <net/if.h>
#include <net/route.h>
#include <net/if_var.h>
#include <net/dlil.h>
#include <net/if_arp.h>
+#include <net/iptap.h>
#include <sys/kern_event.h>
#include <sys/kdebug.h>
#include <sys/mcache.h>
#include <net/if_types.h>
#include <net/if_llreach.h>
#include <net/kpi_interfacefilter.h>
+#include <net/classq/classq.h>
+#include <net/classq/classq_sfb.h>
#if INET
#include <netinet/in_var.h>
#include <netinet/igmp_var.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_var.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+#include <netinet/if_ether.h>
+#include <netinet/in_pcb.h>
#endif /* INET */
#if INET6
#if PF
#include <net/pfvar.h>
#endif /* PF */
+#if PF_ALTQ
+#include <net/altq/altq.h>
+#endif /* PF_ALTQ */
+#include <net/pktsched/pktsched.h>
#define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
#define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
#define DLIL_PRINTF kprintf
#endif
-#define _CASSERT(x) \
- switch (0) { case 0: case (x): ; }
-
#define IF_DATA_REQUIRE_ALIGNED_64(f) \
_CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
#define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
_CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
-#define IFNET_IF_TC_REQUIRE_ALIGNED_64(f) \
- _CASSERT(!(offsetof(struct ifnet, if_tc.f) % sizeof (u_int64_t)))
-
enum {
kProtoKPI_v1 = 1,
kProtoKPI_v2 = 2
struct dlil_ifnet {
struct ifnet dl_if; /* public ifnet */
/*
- * dlil private fields, protected by dl_if_lock
+ * DLIL private fields, protected by dl_if_lock
*/
decl_lck_mtx_data(, dl_if_lock);
TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */
u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */
u_int8_t msdl[DLIL_SDLMAXLEN]; /* mask storage */
} dl_if_lladdr;
+ u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */
+ struct dlil_threading_info dl_if_inpstorage; /* input thread storage */
ctrace_t dl_if_attach; /* attach PC stacktrace */
ctrace_t dl_if_detach; /* detach PC stacktrace */
};
static lck_grp_t *dlil_lock_group;
lck_grp_t *ifnet_lock_group;
static lck_grp_t *ifnet_head_lock_group;
+static lck_grp_t *ifnet_snd_lock_group;
+static lck_grp_t *ifnet_rcv_lock_group;
lck_attr_t *ifnet_lock_attr;
decl_lck_rw_data(static, ifnet_head_lock);
decl_lck_mtx_data(static, dlil_ifnet_lock);
u_int32_t dlil_filter_count = 0;
extern u_int32_t ipv4_ll_arp_aware;
+struct sfb_fc_list ifnet_fclist;
+decl_lck_mtx_data(static, ifnet_fclist_lock);
+
+static unsigned int ifnet_fcezone_size; /* size of ifnet_fce */
+static struct zone *ifnet_fcezone; /* zone for ifnet_fce */
+
+#define IFNET_FCEZONE_MAX 32 /* maximum elements in zone */
+#define IFNET_FCEZONE_NAME "ifnet_fcezone" /* zone name */
+
+static void ifnet_fc_thread_func(void *, wait_result_t);
+static void ifnet_fc_init(void);
+
#if DEBUG
static unsigned int ifnet_debug = 1; /* debugging (enabled) */
#else
#define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
#define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
-static unsigned int dlif_inp_size; /* size of dlil_threading_info */
-static struct zone *dlif_inp_zone; /* zone for dlil_threading_info */
-
-#define DLIF_INP_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
-#define DLIF_INP_ZONE_NAME "ifnet_thread" /* zone name */
-
static unsigned int dlif_phash_size; /* size of ifnet proto hash table */
static struct zone *dlif_phash_zone; /* zone for ifnet proto hash table */
#define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
#define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
+static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */
+static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */
+static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */
+
+#define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
+#define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
+
+static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */
+static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */
+static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */
+
+#define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
+#define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
+
/*
* Updating this variable should be done by first acquiring the global
* radix node head (rnh_lock), in tandem with settting/clearing the
u_int32_t ifnet_aggressive_drainers;
static u_int32_t net_rtref;
-static struct dlil_threading_info dlil_lo_thread;
-__private_extern__ struct dlil_threading_info *dlil_lo_thread_ptr = &dlil_lo_thread;
-
-static struct mbuf *dlil_lo_input_mbuf_head = NULL;
-static struct mbuf *dlil_lo_input_mbuf_tail = NULL;
+static struct dlil_main_threading_info dlil_main_input_thread_info;
+__private_extern__ struct dlil_threading_info *dlil_main_input_thread =
+ (struct dlil_threading_info *)&dlil_main_input_thread_info;
static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg);
static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
const struct sockaddr_dl *, const struct sockaddr *);
static errno_t ifp_if_output(struct ifnet *, struct mbuf *);
+static void ifp_if_start(struct ifnet *);
+static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
+ struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
+static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
protocol_family_t *);
static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
- const struct sockaddr *, const char *, const char *);
-static errno_t ifp_if_ioctl(struct ifnet *, unsigned long, void *);
+ const struct sockaddr *, const char *, const char *
+#if CONFIG_EMBEDDED
+ ,
+ u_int32_t *, u_int32_t *
+#endif /* CONFIG_EMBEDDED */
+ );
static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
static void ifp_if_free(struct ifnet *);
static void ifp_if_event(struct ifnet *, const struct kev_msg *);
+static __inline void ifp_inc_traffic_class_in(struct ifnet *, struct mbuf *);
+static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *);
-static void dlil_input_thread_func(struct dlil_threading_info *inpthread);
+static void dlil_main_input_thread_func(void *, wait_result_t);
+static void dlil_input_thread_func(void *, wait_result_t);
+static void dlil_rxpoll_input_thread_func(void *, wait_result_t);
+static void dlil_rxpoll_calc_limits(struct dlil_threading_info *);
static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *);
-
-static void ifnet_delayed_thread_func(void);
+static void dlil_terminate_input_thread(struct dlil_threading_info *);
+static void dlil_input_stats_add(const struct ifnet_stat_increment_param *,
+ struct dlil_threading_info *, boolean_t);
+static void dlil_input_stats_sync(struct ifnet *, struct dlil_threading_info *);
+static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *,
+ u_int32_t, ifnet_model_t, boolean_t);
+static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
+ const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
+
+static void ifnet_detacher_thread_func(void *, wait_result_t);
+static int ifnet_detacher_thread_cont(int);
static void ifnet_detach_final(struct ifnet *);
static void ifnet_detaching_enqueue(struct ifnet *);
static struct ifnet *ifnet_detaching_dequeue(void);
+static void ifnet_start_thread_fn(void *, wait_result_t);
+static void ifnet_poll_thread_fn(void *, wait_result_t);
+static void ifnet_poll(struct ifnet *);
+
static void ifp_src_route_copyout(struct ifnet *, struct route *);
static void ifp_src_route_copyin(struct ifnet *, struct route *);
#if INET6
static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
#endif /* INET6 */
+static int sysctl_rxpoll SYSCTL_HANDLER_ARGS;
+static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS;
+static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS;
+
/* The following are protected by dlil_ifnet_lock */
static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
static u_int32_t ifnet_detaching_cnt;
extern void bpfdetach(struct ifnet*);
extern void proto_input_run(void);
+extern uint32_t udp_count_opportunistic(unsigned int ifindex,
+ u_int32_t flags);
+extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
+ u_int32_t flags);
+
__private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
#if DEBUG
#else
static int dlil_verbose = 0;
#endif /* DEBUG */
-static int dlil_multithreaded_input = 1;
-static int cur_dlil_input_threads = 0;
#if IFNET_INPUT_SANITY_CHK
-static int dlil_lo_input_mbuf_count = 0;
/* sanity checking of input packet lists received */
-static int dlil_input_sanity_check = 0;
-#endif
+static u_int32_t dlil_input_sanity_check = 0;
+#endif /* IFNET_INPUT_SANITY_CHK */
+/* rate limit debug messages */
+struct timespec dlil_dbgrate = { 1, 0 };
SYSCTL_DECL(_net_link_generic_system);
-SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose, CTLFLAG_RW,
- &dlil_verbose, 0, "Log DLIL error messages");
-
-SYSCTL_INT(_net_link_generic_system, OID_AUTO, multi_threaded_input, CTLFLAG_RW,
- &dlil_multithreaded_input , 0, "Uses multiple input thread for DLIL input");
+SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose,
+ CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages");
+
+#define IF_SNDQ_MINLEN 32
+u_int32_t if_sndq_maxlen = IFQ_MAXLEN;
+SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sndq_maxlen, IFQ_MAXLEN,
+ sysctl_sndq_maxlen, "I", "Default transmit queue max length");
+
+#define IF_RCVQ_MINLEN 32
+#define IF_RCVQ_MAXLEN 256
+u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN;
+SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN,
+ sysctl_rcvq_maxlen, "I", "Default receive queue max length");
+
+#define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
+static u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY;
+SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay,
+ CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY,
+ "ilog2 of EWMA decay rate of avg inbound packets");
+
+#define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
+static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME;
+SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time,
+ CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime,
+ "input poll mode freeze time");
+
+#define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
+static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME;
+SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, rxpoll_sample_time,
+ CTLFLAG_RD | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime,
+ "input poll sampling time");
+
+#define IF_RXPOLL_INTERVAL_TIME (1ULL * 1000 * 1000) /* 1 ms */
+static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVAL_TIME;
+SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, rxpoll_interval_time,
+ CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time,
+ "input poll interval (time)");
+
+#define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
+static u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS;
+SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts,
+ CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts,
+ IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)");
+
+#define IF_RXPOLL_WLOWAT 5
+static u_int32_t if_rxpoll_wlowat = IF_RXPOLL_WLOWAT;
+SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat,
+ CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_wlowat, IF_RXPOLL_WLOWAT,
+ "input poll wakeup low watermark");
+
+#define IF_RXPOLL_WHIWAT 100
+static u_int32_t if_rxpoll_whiwat = IF_RXPOLL_WHIWAT;
+SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat,
+ CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_whiwat, IF_RXPOLL_WHIWAT,
+ "input poll wakeup high watermark");
+
+static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */
+SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max,
+ CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_max, 0,
+ "max packets per poll call");
+
+static u_int32_t if_rxpoll = 1;
+SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll, 0,
+ sysctl_rxpoll, "I", "enable opportunistic input polling");
+
+u_int32_t if_bw_smoothing_val = 3;
+SYSCTL_UINT(_net_link_generic_system, OID_AUTO, if_bw_smoothing_val,
+ CTLFLAG_RW | CTLFLAG_LOCKED, &if_bw_smoothing_val, 0, "");
+
+u_int32_t if_bw_measure_size = 10;
+SYSCTL_INT(_net_link_generic_system, OID_AUTO, if_bw_measure_size,
+ CTLFLAG_RW | CTLFLAG_LOCKED, &if_bw_measure_size, 0, "");
+
+static u_int32_t cur_dlil_input_threads = 0;
+SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_threads,
+ CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads , 0,
+ "Current number of DLIL input threads");
#if IFNET_INPUT_SANITY_CHK
-SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check,
- CTLFLAG_RW, &dlil_input_sanity_check , 0,
+SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check,
+ CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check , 0,
"Turn on sanity checking in DLIL input");
-#endif
+#endif /* IFNET_INPUT_SANITY_CHK */
+static u_int32_t if_flowadv = 1;
+SYSCTL_UINT(_net_link_generic_system, OID_AUTO, flow_advisory,
+ CTLFLAG_RW | CTLFLAG_LOCKED, &if_flowadv, 1,
+ "enable flow-advisory mechanism");
+
+unsigned int net_rxpoll = 1;
unsigned int net_affinity = 1;
static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
static lck_grp_attr_t *dlil_grp_attributes = NULL;
static lck_attr_t *dlil_lck_attributes = NULL;
-static lck_grp_t *dlil_input_lock_grp = NULL;
#define PROTO_HASH_SLOTS 0x5
+#define DLIL_INPUT_CHECK(m, ifp) { \
+ struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
+ if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
+ !(mbuf_flags(m) & MBUF_PKTHDR)) { \
+ panic_plain("%s: invalid mbuf %p\n", __func__, m); \
+ /* NOTREACHED */ \
+ } \
+}
+
+#define DLIL_EWMA(old, new, decay) do { \
+ u_int32_t _avg; \
+ if ((_avg = (old)) > 0) \
+ _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
+ else \
+ _avg = (new); \
+ (old) = _avg; \
+} while (0)
+
+#define MBPS (1ULL * 1000 * 1000)
+#define GBPS (MBPS * 1000)
+
+struct rxpoll_time_tbl {
+ u_int64_t speed; /* downlink speed */
+ u_int32_t plowat; /* packets low watermark */
+ u_int32_t phiwat; /* packets high watermark */
+ u_int32_t blowat; /* bytes low watermark */
+ u_int32_t bhiwat; /* bytes high watermark */
+};
+
+static struct rxpoll_time_tbl rxpoll_tbl[] = {
+ { 10 * MBPS, 2, 8, (1 * 1024), (6 * 1024) },
+ { 100 * MBPS, 10, 40, (4 * 1024), (64 * 1024) },
+ { 1 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
+ { 10 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
+ { 100 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
+ { 0, 0, 0, 0, 0 }
+};
+
/*
* Internal functions.
*/
dlil_event_internal(ifp, &ev_msg);
}
+__private_extern__ int
+dlil_alloc_local_stats(struct ifnet *ifp)
+{
+ int ret = EINVAL;
+ void *buf, *base, **pbuf;
+
+ if (ifp == NULL)
+ goto end;
+
+ if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) {
+ /* allocate tcpstat_local structure */
+ buf = zalloc(dlif_tcpstat_zone);
+ if (buf == NULL) {
+ ret = ENOMEM;
+ goto end;
+ }
+ bzero(buf, dlif_tcpstat_bufsize);
+
+ /* Get the 64-bit aligned base address for this object */
+ base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
+ sizeof (u_int64_t));
+ VERIFY(((intptr_t)base + dlif_tcpstat_size) <=
+ ((intptr_t)buf + dlif_tcpstat_bufsize));
+
+ /*
+ * Wind back a pointer size from the aligned base and
+ * save the original address so we can free it later.
+ */
+ pbuf = (void **)((intptr_t)base - sizeof (void *));
+ *pbuf = buf;
+ ifp->if_tcp_stat = base;
+
+ /* allocate udpstat_local structure */
+ buf = zalloc(dlif_udpstat_zone);
+ if (buf == NULL) {
+ ret = ENOMEM;
+ goto end;
+ }
+ bzero(buf, dlif_udpstat_bufsize);
+
+ /* Get the 64-bit aligned base address for this object */
+ base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
+ sizeof (u_int64_t));
+ VERIFY(((intptr_t)base + dlif_udpstat_size) <=
+ ((intptr_t)buf + dlif_udpstat_bufsize));
+
+ /*
+ * Wind back a pointer size from the aligned base and
+ * save the original address so we can free it later.
+ */
+ pbuf = (void **)((intptr_t)base - sizeof (void *));
+ *pbuf = buf;
+ ifp->if_udp_stat = base;
+
+ VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof (u_int64_t)) &&
+ IS_P2ALIGNED(ifp->if_udp_stat, sizeof (u_int64_t)));
+
+ ret = 0;
+ }
+
+end:
+ if (ret != 0) {
+ if (ifp->if_tcp_stat != NULL) {
+ pbuf = (void **)
+ ((intptr_t)ifp->if_tcp_stat - sizeof (void *));
+ zfree(dlif_tcpstat_zone, *pbuf);
+ ifp->if_tcp_stat = NULL;
+ }
+ if (ifp->if_udp_stat != NULL) {
+ pbuf = (void **)
+ ((intptr_t)ifp->if_udp_stat - sizeof (void *));
+ zfree(dlif_udpstat_zone, *pbuf);
+ ifp->if_udp_stat = NULL;
+ }
+ }
+
+ return (ret);
+}
+
static int
-dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inputthread)
+dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp)
{
+ thread_continue_t func;
+ u_int32_t limit;
int error;
- bzero(inputthread, sizeof(*inputthread));
- /* loopback ifp may not be configured at dlil_init time. */
- if (ifp == lo_ifp) {
- (void) strlcat(inputthread->input_name,
- "dlil_input_main_thread_mtx", DLIL_THREADNAME_LEN);
+ /* NULL ifp indicates the main input thread, called at dlil_init time */
+ if (ifp == NULL) {
+ func = dlil_main_input_thread_func;
+ VERIFY(inp == dlil_main_input_thread);
+ (void) strlcat(inp->input_name,
+ "main_input", DLIL_THREADNAME_LEN);
+ } else if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
+ func = dlil_rxpoll_input_thread_func;
+ VERIFY(inp != dlil_main_input_thread);
+ (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
+ "%s%d_input_poll", ifp->if_name, ifp->if_unit);
} else {
- (void) snprintf(inputthread->input_name, DLIL_THREADNAME_LEN,
- "dlil_input_%s%d_mtx", ifp->if_name, ifp->if_unit);
+ func = dlil_input_thread_func;
+ VERIFY(inp != dlil_main_input_thread);
+ (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
+ "%s%d_input", ifp->if_name, ifp->if_unit);
}
+ VERIFY(inp->input_thr == THREAD_NULL);
- inputthread->lck_grp = lck_grp_alloc_init(inputthread->input_name,
- dlil_grp_attributes);
- lck_mtx_init(&inputthread->input_lck, inputthread->lck_grp,
- dlil_lck_attributes);
+ inp->lck_grp = lck_grp_alloc_init(inp->input_name, dlil_grp_attributes);
+ lck_mtx_init(&inp->input_lck, inp->lck_grp, dlil_lck_attributes);
+
+ inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
+ inp->ifp = ifp; /* NULL for main input thread */
+
+ net_timerclear(&inp->mode_holdtime);
+ net_timerclear(&inp->mode_lasttime);
+ net_timerclear(&inp->sample_holdtime);
+ net_timerclear(&inp->sample_lasttime);
+ net_timerclear(&inp->dbg_lasttime);
+
+ /*
+ * For interfaces that support opportunistic polling, set the
+ * low and high watermarks for outstanding inbound packets/bytes.
+ * Also define freeze times for transitioning between modes
+ * and updating the average.
+ */
+ if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
+ limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN);
+ dlil_rxpoll_calc_limits(inp);
+ } else {
+ limit = (u_int32_t)-1;
+ }
+
+ _qinit(&inp->rcvq_pkts, Q_DROPTAIL, limit);
+ if (inp == dlil_main_input_thread) {
+ struct dlil_main_threading_info *inpm =
+ (struct dlil_main_threading_info *)inp;
+ _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit);
+ }
- error= kernel_thread_start((thread_continue_t)dlil_input_thread_func,
- inputthread, &inputthread->input_thread);
- if (error == 0) {
- ml_thread_policy(inputthread->input_thread, MACHINE_GROUP,
+ error = kernel_thread_start(func, inp, &inp->input_thr);
+ if (error == KERN_SUCCESS) {
+ ml_thread_policy(inp->input_thr, MACHINE_GROUP,
(MACHINE_NETWORK_GROUP|MACHINE_NETWORK_NETISR));
/*
- * Except for the loopback dlil input thread, we create
- * an affinity set so that the matching workloop thread
- * can be scheduled on the same processor set.
+ * We create an affinity set so that the matching workloop
+ * thread or the starter thread (for loopback) can be
+ * scheduled on the same processor set as the input thread.
*/
- if (net_affinity && inputthread != dlil_lo_thread_ptr) {
- struct thread *tp = inputthread->input_thread;
+ if (net_affinity) {
+ struct thread *tp = inp->input_thr;
u_int32_t tag;
/*
* Randomize to reduce the probability
read_random(&tag, sizeof (tag));
if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) {
thread_reference(tp);
- inputthread->tag = tag;
- inputthread->net_affinity = TRUE;
+ inp->tag = tag;
+ inp->net_affinity = TRUE;
}
}
+ } else if (inp == dlil_main_input_thread) {
+ panic_plain("%s: couldn't create main input thread", __func__);
+ /* NOTREACHED */
} else {
- panic("%s: couldn't create thread", __func__);
+ panic_plain("%s: couldn't create %s%d input thread", __func__,
+ ifp->if_name, ifp->if_unit);
/* NOTREACHED */
}
OSAddAtomic(1, &cur_dlil_input_threads);
-#if DLIL_DEBUG
- printf("%s: threadinfo: %p input_thread=%p threads: cur=%d max=%d\n",
- __func__, inputthread, inputthread->input_thread,
- dlil_multithreaded_input, cur_dlil_input_threads);
-#endif
+
return (error);
}
+static void
+dlil_terminate_input_thread(struct dlil_threading_info *inp)
+{
+ struct ifnet *ifp;
+
+ VERIFY(current_thread() == inp->input_thr);
+ VERIFY(inp != dlil_main_input_thread);
+
+ OSAddAtomic(-1, &cur_dlil_input_threads);
+
+ lck_mtx_destroy(&inp->input_lck, inp->lck_grp);
+ lck_grp_free(inp->lck_grp);
+
+ inp->input_waiting = 0;
+ inp->wtot = 0;
+ bzero(inp->input_name, sizeof (inp->input_name));
+ ifp = inp->ifp;
+ inp->ifp = NULL;
+ VERIFY(qhead(&inp->rcvq_pkts) == NULL && qempty(&inp->rcvq_pkts));
+ qlimit(&inp->rcvq_pkts) = 0;
+ bzero(&inp->stats, sizeof (inp->stats));
+
+ VERIFY(!inp->net_affinity);
+ inp->input_thr = THREAD_NULL;
+ VERIFY(inp->wloop_thr == THREAD_NULL);
+ VERIFY(inp->poll_thr == THREAD_NULL);
+ VERIFY(inp->tag == 0);
+
+ inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
+ bzero(&inp->tstats, sizeof (inp->tstats));
+ bzero(&inp->pstats, sizeof (inp->pstats));
+ bzero(&inp->sstats, sizeof (inp->sstats));
+
+ net_timerclear(&inp->mode_holdtime);
+ net_timerclear(&inp->mode_lasttime);
+ net_timerclear(&inp->sample_holdtime);
+ net_timerclear(&inp->sample_lasttime);
+ net_timerclear(&inp->dbg_lasttime);
+
+#if IFNET_INPUT_SANITY_CHK
+ inp->input_mbuf_cnt = 0;
+#endif /* IFNET_INPUT_SANITY_CHK */
+
+ if (dlil_verbose) {
+ printf("%s%d: input thread terminated\n",
+ ifp->if_name, ifp->if_unit);
+ }
+
+ /* for the extra refcnt from kernel_thread_start() */
+ thread_deallocate(current_thread());
+
+ /* this is the end */
+ thread_terminate(current_thread());
+ /* NOTREACHED */
+}
+
static kern_return_t
dlil_affinity_set(struct thread *tp, u_int32_t tag)
{
IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
+ IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors)
IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
-
- IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ibkpackets);
- IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ibkbytes);
- IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_obkpackets);
- IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_obkbytes);
- IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ivipackets);
- IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ivibytes);
- IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ovipackets);
- IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ovibytes);
- IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ivopackets);
- IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ivobytes);
- IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ovopackets);
- IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ovobytes);
+ IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
/*
* These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
* Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
*/
_CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
+ _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
PE_parse_boot_argn("net_affinity", &net_affinity,
sizeof (net_affinity));
+ PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof (net_rxpoll));
+
PE_parse_boot_argn("net_rtref", &net_rtref, sizeof (net_rtref));
PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof (ifnet_debug));
dlif_zone = zinit(dlif_bufsize, DLIF_ZONE_MAX * dlif_bufsize,
0, DLIF_ZONE_NAME);
if (dlif_zone == NULL) {
- panic("%s: failed allocating %s", __func__, DLIF_ZONE_NAME);
+ panic_plain("%s: failed allocating %s", __func__,
+ DLIF_ZONE_NAME);
/* NOTREACHED */
}
zone_change(dlif_zone, Z_EXPAND, TRUE);
dlif_filt_zone = zinit(dlif_filt_size,
DLIF_FILT_ZONE_MAX * dlif_filt_size, 0, DLIF_FILT_ZONE_NAME);
if (dlif_filt_zone == NULL) {
- panic("%s: failed allocating %s", __func__,
+ panic_plain("%s: failed allocating %s", __func__,
DLIF_FILT_ZONE_NAME);
/* NOTREACHED */
}
zone_change(dlif_filt_zone, Z_EXPAND, TRUE);
zone_change(dlif_filt_zone, Z_CALLERACCT, FALSE);
- dlif_inp_size = sizeof (struct dlil_threading_info);
- dlif_inp_zone = zinit(dlif_inp_size,
- DLIF_INP_ZONE_MAX * dlif_inp_size, 0, DLIF_INP_ZONE_NAME);
- if (dlif_inp_zone == NULL) {
- panic("%s: failed allocating %s", __func__, DLIF_INP_ZONE_NAME);
- /* NOTREACHED */
- }
- zone_change(dlif_inp_zone, Z_EXPAND, TRUE);
- zone_change(dlif_inp_zone, Z_CALLERACCT, FALSE);
-
dlif_phash_size = sizeof (struct proto_hash_entry) * PROTO_HASH_SLOTS;
dlif_phash_zone = zinit(dlif_phash_size,
DLIF_PHASH_ZONE_MAX * dlif_phash_size, 0, DLIF_PHASH_ZONE_NAME);
if (dlif_phash_zone == NULL) {
- panic("%s: failed allocating %s", __func__,
+ panic_plain("%s: failed allocating %s", __func__,
DLIF_PHASH_ZONE_NAME);
/* NOTREACHED */
}
dlif_proto_zone = zinit(dlif_proto_size,
DLIF_PROTO_ZONE_MAX * dlif_proto_size, 0, DLIF_PROTO_ZONE_NAME);
if (dlif_proto_zone == NULL) {
- panic("%s: failed allocating %s", __func__,
+ panic_plain("%s: failed allocating %s", __func__,
DLIF_PROTO_ZONE_NAME);
/* NOTREACHED */
}
zone_change(dlif_proto_zone, Z_EXPAND, TRUE);
zone_change(dlif_proto_zone, Z_CALLERACCT, FALSE);
+ dlif_tcpstat_size = sizeof (struct tcpstat_local);
+ /* Enforce 64-bit alignment for tcpstat_local structure */
+ dlif_tcpstat_bufsize =
+ dlif_tcpstat_size + sizeof (void *) + sizeof (u_int64_t);
+ dlif_tcpstat_bufsize =
+ P2ROUNDUP(dlif_tcpstat_bufsize, sizeof (u_int64_t));
+ dlif_tcpstat_zone = zinit(dlif_tcpstat_bufsize,
+ DLIF_TCPSTAT_ZONE_MAX * dlif_tcpstat_bufsize, 0,
+ DLIF_TCPSTAT_ZONE_NAME);
+ if (dlif_tcpstat_zone == NULL) {
+ panic_plain("%s: failed allocating %s", __func__,
+ DLIF_TCPSTAT_ZONE_NAME);
+ /* NOTREACHED */
+ }
+ zone_change(dlif_tcpstat_zone, Z_EXPAND, TRUE);
+ zone_change(dlif_tcpstat_zone, Z_CALLERACCT, FALSE);
+
+ dlif_udpstat_size = sizeof (struct udpstat_local);
+ /* Enforce 64-bit alignment for udpstat_local structure */
+ dlif_udpstat_bufsize =
+ dlif_udpstat_size + sizeof (void *) + sizeof (u_int64_t);
+ dlif_udpstat_bufsize =
+ P2ROUNDUP(dlif_udpstat_bufsize, sizeof (u_int64_t));
+ dlif_udpstat_zone = zinit(dlif_udpstat_bufsize,
+ DLIF_TCPSTAT_ZONE_MAX * dlif_udpstat_bufsize, 0,
+ DLIF_UDPSTAT_ZONE_NAME);
+ if (dlif_udpstat_zone == NULL) {
+ panic_plain("%s: failed allocating %s", __func__,
+ DLIF_UDPSTAT_ZONE_NAME);
+ /* NOTREACHED */
+ }
+ zone_change(dlif_udpstat_zone, Z_EXPAND, TRUE);
+ zone_change(dlif_udpstat_zone, Z_CALLERACCT, FALSE);
+
ifnet_llreach_init();
TAILQ_INIT(&dlil_ifnet_head);
/* Setup the lock groups we will use */
dlil_grp_attributes = lck_grp_attr_alloc_init();
- dlil_lock_group = lck_grp_alloc_init("dlil internal locks",
+ dlil_lock_group = lck_grp_alloc_init("DLIL internal locks",
dlil_grp_attributes);
ifnet_lock_group = lck_grp_alloc_init("ifnet locks",
dlil_grp_attributes);
ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock",
dlil_grp_attributes);
- dlil_input_lock_grp = lck_grp_alloc_init("dlil input lock",
+ ifnet_rcv_lock_group = lck_grp_alloc_init("ifnet rcv locks",
+ dlil_grp_attributes);
+ ifnet_snd_lock_group = lck_grp_alloc_init("ifnet snd locks",
dlil_grp_attributes);
/* Setup the lock attributes we will use */
dlil_lck_attributes);
lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes);
+ ifnet_fc_init();
+
lck_attr_free(dlil_lck_attributes);
dlil_lck_attributes = NULL;
ifa_init();
-
/*
- * Create and start up the first dlil input thread once everything
- * is initialized.
+ * Create and start up the main DLIL input thread and the interface
+ * detacher threads once everything is initialized.
*/
- dlil_create_input_thread(lo_ifp, dlil_lo_thread_ptr);
+ dlil_create_input_thread(NULL, dlil_main_input_thread);
- if (kernel_thread_start((thread_continue_t)ifnet_delayed_thread_func,
- NULL, &thread) != 0) {
- panic("%s: couldn't create detach thread", __func__);
+ if (kernel_thread_start(ifnet_detacher_thread_func,
+ NULL, &thread) != KERN_SUCCESS) {
+ panic_plain("%s: couldn't create detacher thread", __func__);
/* NOTREACHED */
}
thread_deallocate(thread);
/* Initialize the packet filter */
pfinit();
#endif /* PF */
+
+ /* Initialize queue algorithms */
+ classq_init();
+
+ /* Initialize packet schedulers */
+ pktsched_init();
}
static void
dlil_detach_filter_internal(filter, 0);
}
+/*
+ * Main input thread:
+ *
+ * a) handles all inbound packets for lo0
+ * b) handles all inbound packets for interfaces with no dedicated
+ * input thread (e.g. anything but Ethernet/PDP or those that support
+ * opportunistic polling.)
+ * c) protocol registrations
+ * d) packet injections
+ */
static void
-dlil_input_thread_func(struct dlil_threading_info *inputthread)
+dlil_main_input_thread_func(void *v, wait_result_t w)
{
+#pragma unused(w)
+ struct dlil_main_threading_info *inpm = v;
+ struct dlil_threading_info *inp = v;
+
+ VERIFY(inp == dlil_main_input_thread);
+ VERIFY(inp->ifp == NULL);
+ VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
+
while (1) {
struct mbuf *m = NULL, *m_loop = NULL;
-#if IFNET_INPUT_SANITY_CHK
- int loop_cnt = 0, mbuf_cnt;
- int count;
- struct mbuf *m1;
-#endif /* IFNET_INPUT_SANITY_CHK */
+ u_int32_t m_cnt, m_cnt_loop;
+ boolean_t proto_req;
- lck_mtx_lock_spin(&inputthread->input_lck);
+ lck_mtx_lock_spin(&inp->input_lck);
/* Wait until there is work to be done */
- while (!(inputthread->input_waiting & ~DLIL_INPUT_RUNNING)) {
- inputthread->input_waiting &= ~DLIL_INPUT_RUNNING;
- msleep(&inputthread->input_waiting,
- &inputthread->input_lck, 0,
- inputthread->input_name, 0);
+ while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
+ inp->input_waiting &= ~DLIL_INPUT_RUNNING;
+ (void) msleep(&inp->input_waiting, &inp->input_lck,
+ (PZERO - 1) | PSPIN, inp->input_name, NULL);
}
- lck_mtx_assert(&inputthread->input_lck, LCK_MTX_ASSERT_OWNED);
+ inp->input_waiting |= DLIL_INPUT_RUNNING;
+ inp->input_waiting &= ~DLIL_INPUT_WAITING;
- m = inputthread->mbuf_head;
- inputthread->mbuf_head = NULL;
- inputthread->mbuf_tail = NULL;
+ /* Main input thread cannot be terminated */
+ VERIFY(!(inp->input_waiting & DLIL_INPUT_TERMINATE));
- if (inputthread->input_waiting & DLIL_INPUT_TERMINATE) {
- lck_mtx_unlock(&inputthread->input_lck);
+ proto_req = (inp->input_waiting &
+ (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER));
- if (m != NULL)
- mbuf_freem_list(m);
+ /* Packets for non-dedicated interfaces other than lo0 */
+ m_cnt = qlen(&inp->rcvq_pkts);
+ m = _getq_all(&inp->rcvq_pkts);
- OSAddAtomic(-1, &cur_dlil_input_threads);
+ /* Packets exclusive for lo0 */
+ m_cnt_loop = qlen(&inpm->lo_rcvq_pkts);
+ m_loop = _getq_all(&inpm->lo_rcvq_pkts);
- lck_mtx_destroy(&inputthread->input_lck,
- inputthread->lck_grp);
- lck_grp_free(inputthread->lck_grp);
+ inp->wtot = 0;
- zfree(dlif_inp_zone, inputthread);
+ lck_mtx_unlock(&inp->input_lck);
- /* for the extra refcnt from kernel_thread_start() */
- thread_deallocate(current_thread());
+ /*
+ * NOTE warning %%% attention !!!!
+ * We should think about putting some thread starvation
+ * safeguards if we deal with long chains of packets.
+ */
+ if (m_loop != NULL)
+ dlil_input_packet_list_extended(lo_ifp, m_loop,
+ m_cnt_loop, inp->mode);
- /* this is the end */
- thread_terminate(current_thread());
- /* NOTREACHED */
- return;
- }
+ if (m != NULL)
+ dlil_input_packet_list_extended(NULL, m,
+ m_cnt, inp->mode);
+
+ if (proto_req)
+ proto_input_run();
+ }
+
+ /* NOTREACHED */
+ VERIFY(0); /* we should never get here */
+}
+
+/*
+ * Input thread for interfaces with legacy input model.
+ */
+static void
+dlil_input_thread_func(void *v, wait_result_t w)
+{
+#pragma unused(w)
+ struct dlil_threading_info *inp = v;
+ struct ifnet *ifp = inp->ifp;
+
+ VERIFY(inp != dlil_main_input_thread);
+ VERIFY(ifp != NULL);
+ VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll);
+ VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
- inputthread->input_waiting |= DLIL_INPUT_RUNNING;
- inputthread->input_waiting &= ~DLIL_INPUT_WAITING;
+ while (1) {
+ struct mbuf *m = NULL;
+ u_int32_t m_cnt;
+
+ lck_mtx_lock_spin(&inp->input_lck);
- if (inputthread == dlil_lo_thread_ptr) {
- m_loop = dlil_lo_input_mbuf_head;
- dlil_lo_input_mbuf_head = NULL;
- dlil_lo_input_mbuf_tail = NULL;
+ /* Wait until there is work to be done */
+ while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
+ inp->input_waiting &= ~DLIL_INPUT_RUNNING;
+ (void) msleep(&inp->input_waiting, &inp->input_lck,
+ (PZERO - 1) | PSPIN, inp->input_name, NULL);
}
-#if IFNET_INPUT_SANITY_CHK
- if (dlil_input_sanity_check != 0) {
- mbuf_cnt = inputthread->mbuf_count;
- inputthread->mbuf_count = 0;
- if (inputthread == dlil_lo_thread_ptr) {
- loop_cnt = dlil_lo_input_mbuf_count;
- dlil_lo_input_mbuf_count = 0;
- }
+ inp->input_waiting |= DLIL_INPUT_RUNNING;
+ inp->input_waiting &= ~DLIL_INPUT_WAITING;
- lck_mtx_unlock(&inputthread->input_lck);
+ /*
+ * Protocol registration and injection must always use
+ * the main input thread; in theory the latter can utilize
+ * the corresponding input thread where the packet arrived
+ * on, but that requires our knowing the interface in advance
+ * (and the benefits might not worth the trouble.)
+ */
+ VERIFY(!(inp->input_waiting &
+ (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
- for (m1 = m, count = 0; m1; m1 = mbuf_nextpkt(m1)) {
- count++;
- }
- if (count != mbuf_cnt) {
- panic("%s - thread=%p reg. loop queue "
- "has %d packets, should have %d\n",
- __func__, inputthread, count, mbuf_cnt);
- /* NOTREACHED */
- }
+ /* Packets for this interface */
+ m_cnt = qlen(&inp->rcvq_pkts);
+ m = _getq_all(&inp->rcvq_pkts);
- if (inputthread == dlil_lo_thread_ptr) {
- for (m1 = m_loop, count = 0; m1;
- m1 = mbuf_nextpkt(m1)) {
- count++;
- }
- if (count != loop_cnt) {
- panic("%s - thread=%p loop queue "
- "has %d packets, should have %d\n",
- __func__, inputthread, count,
- loop_cnt);
- /* NOTREACHED */
- }
- }
- } else
-#endif /* IFNET_INPUT_SANITY_CHK */
- {
- lck_mtx_unlock(&inputthread->input_lck);
+ if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
+ lck_mtx_unlock(&inp->input_lck);
+
+ /* Free up pending packets */
+ if (m != NULL)
+ mbuf_freem_list(m);
+
+ dlil_terminate_input_thread(inp);
+ /* NOTREACHED */
+ return;
}
+ inp->wtot = 0;
+
+ dlil_input_stats_sync(ifp, inp);
+
+ lck_mtx_unlock(&inp->input_lck);
/*
* NOTE warning %%% attention !!!!
* We should think about putting some thread starvation
* safeguards if we deal with long chains of packets.
*/
- if (m_loop) {
- if (inputthread == dlil_lo_thread_ptr) {
- dlil_input_packet_list(lo_ifp, m_loop);
- }
-#if IFNET_INPUT_SANITY_CHK
- else {
- panic("%s - thread=%p loop queue has %d "
- "packets, should have none!\n", __func__,
- inputthread, loop_cnt);
- /* NOTREACHED */
- }
-#endif /* IFNET_INPUT_SANITY_CHK */
- }
-
if (m != NULL)
- dlil_input_packet_list(0, m);
-
- lck_mtx_lock_spin(&inputthread->input_lck);
-
- if (inputthread->input_waiting &
- (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)) {
- lck_mtx_unlock(&inputthread->input_lck);
- proto_input_run();
- } else {
- lck_mtx_unlock(&inputthread->input_lck);
- }
+ dlil_input_packet_list_extended(NULL, m,
+ m_cnt, inp->mode);
}
+
+ /* NOTREACHED */
+ VERIFY(0); /* we should never get here */
}
-errno_t
-ifnet_input(ifnet_t ifp, mbuf_t m_head,
- const struct ifnet_stat_increment_param *stats)
+/*
+ * Input thread for interfaces with opportunistic polling input model.
+ */
+static void
+dlil_rxpoll_input_thread_func(void *v, wait_result_t w)
{
- struct thread *tp = current_thread();
- mbuf_t m_tail;
- struct dlil_threading_info *inp;
-#if IFNET_INPUT_SANITY_CHK
- u_int32_t pkt_count = 0;
-#endif /* IFNET_INPUT_SANITY_CHK */
+#pragma unused(w)
+ struct dlil_threading_info *inp = v;
+ struct ifnet *ifp = inp->ifp;
+ struct timespec ts;
- if (ifp == NULL || m_head == NULL) {
- if (m_head != NULL)
- mbuf_freem_list(m_head);
- return (EINVAL);
- }
+ VERIFY(inp != dlil_main_input_thread);
+ VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL));
- m_tail = m_head;
while (1) {
-#if IFNET_INPUT_SANITY_CHK
- if (dlil_input_sanity_check != 0) {
- ifnet_t rcvif;
+ struct mbuf *m = NULL;
+ u_int32_t m_cnt, m_size, poll_req = 0;
+ ifnet_model_t mode;
+ struct timespec now, delta;
- rcvif = mbuf_pkthdr_rcvif(m_tail);
- pkt_count++;
+ lck_mtx_lock_spin(&inp->input_lck);
- if (rcvif == NULL ||
- (ifp->if_type != IFT_LOOP && rcvif != ifp) ||
- !(mbuf_flags(m_head) & MBUF_PKTHDR)) {
- panic("%s - invalid mbuf %p\n",
- __func__, m_tail);
- /* NOTREACHED */
- }
+ /* Link parameters changed? */
+ if (ifp->if_poll_update != 0) {
+ ifp->if_poll_update = 0;
+ dlil_rxpoll_calc_limits(inp);
}
-#endif /* IFNET_INPUT_SANITY_CHK */
- if (mbuf_nextpkt(m_tail) == NULL)
- break;
- m_tail = mbuf_nextpkt(m_tail);
- }
- inp = ifp->if_input_thread;
+ /* Current operating mode */
+ mode = inp->mode;
- if (dlil_multithreaded_input == 0 || inp == NULL)
- inp = dlil_lo_thread_ptr;
-
- /*
- * If there is a matching dlil input thread associated with an
- * affinity set, associate this workloop thread with the same set.
- * We will only do this once.
- */
- lck_mtx_lock_spin(&inp->input_lck);
- if (inp->net_affinity && inp->workloop_thread == NULL) {
- u_int32_t tag = inp->tag;
- inp->workloop_thread = tp;
- lck_mtx_unlock(&inp->input_lck);
+ /* Wait until there is work to be done */
+ while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING) &&
+ qempty(&inp->rcvq_pkts)) {
+ inp->input_waiting &= ~DLIL_INPUT_RUNNING;
+ (void) msleep(&inp->input_waiting, &inp->input_lck,
+ (PZERO - 1) | PSPIN, inp->input_name, NULL);
+ }
- /* Associated the current thread with the new affinity tag */
- (void) dlil_affinity_set(tp, tag);
+ inp->input_waiting |= DLIL_INPUT_RUNNING;
+ inp->input_waiting &= ~DLIL_INPUT_WAITING;
/*
- * Take a reference on the workloop (current) thread; during
- * detach, we will need to refer to it in order ot tear down
- * its affinity.
+ * Protocol registration and injection must always use
+ * the main input thread; in theory the latter can utilize
+ * the corresponding input thread where the packet arrived
+ * on, but that requires our knowing the interface in advance
+ * (and the benefits might not worth the trouble.)
*/
- thread_reference(tp);
- lck_mtx_lock_spin(&inp->input_lck);
- }
-
- /* WARNING
- * Because of loopbacked multicast we cannot stuff the ifp in
- * the rcvif of the packet header: loopback has its own dlil
- * input queue
- */
+ VERIFY(!(inp->input_waiting &
+ (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
- if (inp == dlil_lo_thread_ptr && ifp->if_type == IFT_LOOP) {
- if (dlil_lo_input_mbuf_head == NULL)
- dlil_lo_input_mbuf_head = m_head;
- else if (dlil_lo_input_mbuf_tail != NULL)
- dlil_lo_input_mbuf_tail->m_nextpkt = m_head;
- dlil_lo_input_mbuf_tail = m_tail;
-#if IFNET_INPUT_SANITY_CHK
- if (dlil_input_sanity_check != 0) {
- dlil_lo_input_mbuf_count += pkt_count;
- inp->input_mbuf_cnt += pkt_count;
- inp->input_wake_cnt++;
+ if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
+ /* Free up pending packets */
+ _flushq(&inp->rcvq_pkts);
+ lck_mtx_unlock(&inp->input_lck);
- lck_mtx_assert(&inp->input_lck, LCK_MTX_ASSERT_OWNED);
+ dlil_terminate_input_thread(inp);
+ /* NOTREACHED */
+ return;
}
-#endif
- } else {
- if (inp->mbuf_head == NULL)
- inp->mbuf_head = m_head;
- else if (inp->mbuf_tail != NULL)
- inp->mbuf_tail->m_nextpkt = m_head;
- inp->mbuf_tail = m_tail;
-#if IFNET_INPUT_SANITY_CHK
- if (dlil_input_sanity_check != 0) {
- inp->mbuf_count += pkt_count;
- inp->input_mbuf_cnt += pkt_count;
- inp->input_wake_cnt++;
- lck_mtx_assert(&inp->input_lck, LCK_MTX_ASSERT_OWNED);
- }
-#endif
- }
+ /* Total count of all packets */
+ m_cnt = qlen(&inp->rcvq_pkts);
+
+ /* Total bytes of all packets */
+ m_size = qsize(&inp->rcvq_pkts);
+
+ /* Packets for this interface */
+ m = _getq_all(&inp->rcvq_pkts);
+ VERIFY(m != NULL || m_cnt == 0);
+
+ nanouptime(&now);
+ if (!net_timerisset(&inp->sample_lasttime))
+ *(&inp->sample_lasttime) = *(&now);
+
+ net_timersub(&now, &inp->sample_lasttime, &delta);
+ if (if_rxpoll && net_timerisset(&inp->sample_holdtime)) {
+ u_int32_t ptot, btot;
+
+ /* Accumulate statistics for current sampling */
+ PKTCNTR_ADD(&inp->sstats, m_cnt, m_size);
+
+ if (net_timercmp(&delta, &inp->sample_holdtime, <))
+ goto skip;
+
+ *(&inp->sample_lasttime) = *(&now);
+
+ /* Calculate min/max of inbound bytes */
+ btot = (u_int32_t)inp->sstats.bytes;
+ if (inp->rxpoll_bmin == 0 || inp->rxpoll_bmin > btot)
+ inp->rxpoll_bmin = btot;
+ if (btot > inp->rxpoll_bmax)
+ inp->rxpoll_bmax = btot;
+
+ /* Calculate EWMA of inbound bytes */
+ DLIL_EWMA(inp->rxpoll_bavg, btot, if_rxpoll_decay);
+
+ /* Calculate min/max of inbound packets */
+ ptot = (u_int32_t)inp->sstats.packets;
+ if (inp->rxpoll_pmin == 0 || inp->rxpoll_pmin > ptot)
+ inp->rxpoll_pmin = ptot;
+ if (ptot > inp->rxpoll_pmax)
+ inp->rxpoll_pmax = ptot;
+
+ /* Calculate EWMA of inbound packets */
+ DLIL_EWMA(inp->rxpoll_pavg, ptot, if_rxpoll_decay);
+
+ /* Reset sampling statistics */
+ PKTCNTR_CLEAR(&inp->sstats);
+
+ /* Calculate EWMA of wakeup requests */
+ DLIL_EWMA(inp->rxpoll_wavg, inp->wtot, if_rxpoll_decay);
+ inp->wtot = 0;
+
+ if (dlil_verbose) {
+ if (!net_timerisset(&inp->dbg_lasttime))
+ *(&inp->dbg_lasttime) = *(&now);
+ net_timersub(&now, &inp->dbg_lasttime, &delta);
+ if (net_timercmp(&delta, &dlil_dbgrate, >=)) {
+ *(&inp->dbg_lasttime) = *(&now);
+ printf("%s%d: [%s] pkts avg %d max %d "
+ "limits [%d/%d], wreq avg %d "
+ "limits [%d/%d], bytes avg %d "
+ "limits [%d/%d]\n", ifp->if_name,
+ ifp->if_unit, (inp->mode ==
+ IFNET_MODEL_INPUT_POLL_ON) ?
+ "ON" : "OFF", inp->rxpoll_pavg,
+ inp->rxpoll_pmax,
+ inp->rxpoll_plowat,
+ inp->rxpoll_phiwat,
+ inp->rxpoll_wavg,
+ inp->rxpoll_wlowat,
+ inp->rxpoll_whiwat,
+ inp->rxpoll_bavg,
+ inp->rxpoll_blowat,
+ inp->rxpoll_bhiwat);
+ }
+ }
- inp->input_waiting |= DLIL_INPUT_WAITING;
- if ((inp->input_waiting & DLIL_INPUT_RUNNING) == 0) {
- wakeup((caddr_t)&inp->input_waiting);
- }
- lck_mtx_unlock(&inp->input_lck);
+ /* Perform mode transition, if necessary */
+ if (!net_timerisset(&inp->mode_lasttime))
+ *(&inp->mode_lasttime) = *(&now);
+
+ net_timersub(&now, &inp->mode_lasttime, &delta);
+ if (net_timercmp(&delta, &inp->mode_holdtime, <))
+ goto skip;
+
+ if (inp->rxpoll_pavg <= inp->rxpoll_plowat &&
+ inp->rxpoll_bavg <= inp->rxpoll_blowat &&
+ inp->rxpoll_wavg <= inp->rxpoll_wlowat &&
+ inp->mode != IFNET_MODEL_INPUT_POLL_OFF) {
+ mode = IFNET_MODEL_INPUT_POLL_OFF;
+ } else if (inp->rxpoll_pavg >= inp->rxpoll_phiwat &&
+ (inp->rxpoll_bavg >= inp->rxpoll_bhiwat ||
+ inp->rxpoll_wavg >= inp->rxpoll_whiwat) &&
+ inp->mode != IFNET_MODEL_INPUT_POLL_ON) {
+ mode = IFNET_MODEL_INPUT_POLL_ON;
+ }
- if (stats) {
- atomic_add_64(&ifp->if_data.ifi_ipackets, stats->packets_in);
- atomic_add_64(&ifp->if_data.ifi_ibytes, stats->bytes_in);
- atomic_add_64(&ifp->if_data.ifi_ierrors, stats->errors_in);
+ if (mode != inp->mode) {
+ inp->mode = mode;
+ *(&inp->mode_lasttime) = *(&now);
+ poll_req++;
+ }
+ }
+skip:
+ dlil_input_stats_sync(ifp, inp);
- atomic_add_64(&ifp->if_data.ifi_opackets, stats->packets_out);
- atomic_add_64(&ifp->if_data.ifi_obytes, stats->bytes_out);
- atomic_add_64(&ifp->if_data.ifi_oerrors, stats->errors_out);
+ lck_mtx_unlock(&inp->input_lck);
- atomic_add_64(&ifp->if_data.ifi_collisions, stats->collisions);
- atomic_add_64(&ifp->if_data.ifi_iqdrops, stats->dropped);
- }
+ /*
+ * If there's a mode change and interface is still attached,
+ * perform a downcall to the driver for the new mode. Also
+ * hold an IO refcnt on the interface to prevent it from
+ * being detached (will be release below.)
+ */
+ if (poll_req != 0 && ifnet_is_attached(ifp, 1)) {
+ struct ifnet_model_params p = { mode, { 0 } };
+ errno_t err;
+
+ if (dlil_verbose) {
+ printf("%s%d: polling is now %s, "
+ "pkts avg %d max %d limits [%d/%d], "
+ "wreq avg %d limits [%d/%d], "
+ "bytes avg %d limits [%d/%d]\n",
+ ifp->if_name, ifp->if_unit,
+ (mode == IFNET_MODEL_INPUT_POLL_ON) ?
+ "ON" : "OFF", inp->rxpoll_pavg,
+ inp->rxpoll_pmax, inp->rxpoll_plowat,
+ inp->rxpoll_phiwat, inp->rxpoll_wavg,
+ inp->rxpoll_wlowat, inp->rxpoll_whiwat,
+ inp->rxpoll_bavg, inp->rxpoll_blowat,
+ inp->rxpoll_bhiwat);
+ }
- return (0);
-}
+ if ((err = ((*ifp->if_input_ctl)(ifp,
+ IFNET_CTL_SET_INPUT_MODEL, sizeof (p), &p))) != 0) {
+ printf("%s%d: error setting polling mode "
+ "to %s (%d)\n", ifp->if_name, ifp->if_unit,
+ (mode == IFNET_MODEL_INPUT_POLL_ON) ?
+ "ON" : "OFF", err);
+ }
-static int
-dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
- char **frame_header_p, protocol_family_t protocol_family)
-{
- struct ifnet_filter *filter;
+ switch (mode) {
+ case IFNET_MODEL_INPUT_POLL_OFF:
+ ifnet_set_poll_cycle(ifp, NULL);
+ inp->rxpoll_offreq++;
+ if (err != 0)
+ inp->rxpoll_offerr++;
+ break;
- /*
- * Pass the inbound packet to the interface filters
+ case IFNET_MODEL_INPUT_POLL_ON:
+ net_nsectimer(&if_rxpoll_interval_time, &ts);
+ ifnet_set_poll_cycle(ifp, &ts);
+ ifnet_poll(ifp);
+ inp->rxpoll_onreq++;
+ if (err != 0)
+ inp->rxpoll_onerr++;
+ break;
+
+ default:
+ VERIFY(0);
+ /* NOTREACHED */
+ }
+
+ /* Release the IO refcnt */
+ ifnet_decr_iorefcnt(ifp);
+ }
+
+ /*
+ * NOTE warning %%% attention !!!!
+ * We should think about putting some thread starvation
+ * safeguards if we deal with long chains of packets.
+ */
+ if (m != NULL)
+ dlil_input_packet_list_extended(NULL, m, m_cnt, mode);
+ }
+
+ /* NOTREACHED */
+ VERIFY(0); /* we should never get here */
+}
+
+static void
+dlil_rxpoll_calc_limits(struct dlil_threading_info *inp)
+{
+ struct ifnet *ifp = inp->ifp;
+ u_int64_t sample_holdtime, inbw;
+
+ VERIFY(inp != dlil_main_input_thread);
+ VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL));
+
+ if ((inbw = ifnet_input_linkrate(ifp)) == 0) {
+ sample_holdtime = 0; /* polling is disabled */
+ inp->rxpoll_wlowat = inp->rxpoll_plowat =
+ inp->rxpoll_blowat = 0;
+ inp->rxpoll_whiwat = inp->rxpoll_phiwat =
+ inp->rxpoll_bhiwat = (u_int32_t)-1;
+ } else {
+ unsigned int n, i;
+
+ n = 0;
+ for (i = 0; rxpoll_tbl[i].speed != 0; i++) {
+ if (inbw < rxpoll_tbl[i].speed)
+ break;
+ n = i;
+ }
+ sample_holdtime = if_rxpoll_sample_holdtime;
+ inp->rxpoll_wlowat = if_rxpoll_wlowat;
+ inp->rxpoll_whiwat = if_rxpoll_whiwat;
+ inp->rxpoll_plowat = rxpoll_tbl[n].plowat;
+ inp->rxpoll_phiwat = rxpoll_tbl[n].phiwat;
+ inp->rxpoll_blowat = rxpoll_tbl[n].blowat;
+ inp->rxpoll_bhiwat = rxpoll_tbl[n].bhiwat;
+ }
+
+ net_nsectimer(&if_rxpoll_mode_holdtime, &inp->mode_holdtime);
+ net_nsectimer(&sample_holdtime, &inp->sample_holdtime);
+
+ if (dlil_verbose) {
+ printf("%s%d: speed %llu bps, sample per %llu nsec, "
+ "pkt limits [%d/%d], wreq limits [%d/%d], "
+ "bytes limits [%d/%d]\n", ifp->if_name, ifp->if_unit,
+ inbw, sample_holdtime, inp->rxpoll_plowat,
+ inp->rxpoll_phiwat, inp->rxpoll_wlowat, inp->rxpoll_whiwat,
+ inp->rxpoll_blowat, inp->rxpoll_bhiwat);
+ }
+}
+
+errno_t
+ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
+ const struct ifnet_stat_increment_param *s)
+{
+ return (ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE));
+}
+
+errno_t
+ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
+ struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
+{
+ return (ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE));
+}
+
+static errno_t
+ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
+ const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
+{
+ struct thread *tp = current_thread();
+ struct mbuf *last;
+ struct dlil_threading_info *inp;
+ u_int32_t m_cnt = 0, m_size = 0;
+
+ /*
+ * Drop the packet(s) if the parameters are invalid, or if the
+ * interface is no longer attached; else hold an IO refcnt to
+ * prevent it from being detached (will be released below.)
+ */
+ if (ifp == NULL || m_head == NULL || (s == NULL && ext) ||
+ (ifp != lo_ifp && !ifnet_is_attached(ifp, 1))) {
+ if (m_head != NULL)
+ mbuf_freem_list(m_head);
+ return (EINVAL);
+ }
+
+ VERIFY(m_tail == NULL || ext);
+ VERIFY(s != NULL || !ext);
+
+ if (m_tail == NULL) {
+ last = m_head;
+ while (1) {
+#if IFNET_INPUT_SANITY_CHK
+ if (dlil_input_sanity_check != 0)
+ DLIL_INPUT_CHECK(last, ifp);
+#endif /* IFNET_INPUT_SANITY_CHK */
+ m_cnt++;
+ m_size += m_length(last);
+ if (mbuf_nextpkt(last) == NULL)
+ break;
+ last = mbuf_nextpkt(last);
+ }
+ m_tail = last;
+ } else {
+#if IFNET_INPUT_SANITY_CHK
+ if (dlil_input_sanity_check != 0) {
+ last = m_head;
+ while (1) {
+ DLIL_INPUT_CHECK(last, ifp);
+ m_cnt++;
+ m_size += m_length(last);
+ if (mbuf_nextpkt(last) == NULL)
+ break;
+ last = mbuf_nextpkt(last);
+ }
+ } else {
+ m_cnt = s->packets_in;
+ m_size = s->bytes_in;
+ last = m_tail;
+ }
+#else
+ m_cnt = s->packets_in;
+ m_size = s->bytes_in;
+ last = m_tail;
+#endif /* IFNET_INPUT_SANITY_CHK */
+ }
+
+ if (last != m_tail) {
+ panic_plain("%s: invalid input packet chain for %s%d, "
+ "tail mbuf %p instead of %p\n", __func__, ifp->if_name,
+ ifp->if_unit, m_tail, last);
+ }
+
+ /*
+ * Assert packet count only for the extended variant, for backwards
+ * compatibility, since this came directly from the device driver.
+ * Relax this assertion for input bytes, as the driver may have
+ * included the link-layer headers in the computation; hence
+ * m_size is just an approximation.
+ */
+ if (ext && s->packets_in != m_cnt) {
+ panic_plain("%s: input packet count mismatch for %s%d, "
+ "%d instead of %d\n", __func__, ifp->if_name,
+ ifp->if_unit, s->packets_in, m_cnt);
+ }
+
+ if ((inp = ifp->if_inp) == NULL)
+ inp = dlil_main_input_thread;
+
+ /*
+ * If there is a matching DLIL input thread associated with an
+ * affinity set, associate this thread with the same set. We
+ * will only do this once.
+ */
+ lck_mtx_lock_spin(&inp->input_lck);
+ if (inp != dlil_main_input_thread && inp->net_affinity &&
+ ((!poll && inp->wloop_thr == THREAD_NULL) ||
+ (poll && inp->poll_thr == THREAD_NULL))) {
+ u_int32_t tag = inp->tag;
+
+ if (poll) {
+ VERIFY(inp->poll_thr == THREAD_NULL);
+ inp->poll_thr = tp;
+ } else {
+ VERIFY(inp->wloop_thr == THREAD_NULL);
+ inp->wloop_thr = tp;
+ }
+ lck_mtx_unlock(&inp->input_lck);
+
+ /* Associate the current thread with the new affinity tag */
+ (void) dlil_affinity_set(tp, tag);
+
+ /*
+ * Take a reference on the current thread; during detach,
+ * we will need to refer to it in order ot tear down its
+ * affinity.
+ */
+ thread_reference(tp);
+ lck_mtx_lock_spin(&inp->input_lck);
+ }
+
+ /*
+ * Because of loopbacked multicast we cannot stuff the ifp in
+ * the rcvif of the packet header: loopback (lo0) packets use a
+ * dedicated list so that we can later associate them with lo_ifp
+ * on their way up the stack. Packets for other interfaces without
+ * dedicated input threads go to the regular list.
+ */
+ if (inp == dlil_main_input_thread && ifp == lo_ifp) {
+ struct dlil_main_threading_info *inpm =
+ (struct dlil_main_threading_info *)inp;
+ _addq_multi(&inpm->lo_rcvq_pkts, m_head, m_tail, m_cnt, m_size);
+ } else {
+ _addq_multi(&inp->rcvq_pkts, m_head, m_tail, m_cnt, m_size);
+ }
+
+#if IFNET_INPUT_SANITY_CHK
+ if (dlil_input_sanity_check != 0) {
+ u_int32_t count;
+ struct mbuf *m0;
+
+ for (m0 = m_head, count = 0; m0; m0 = mbuf_nextpkt(m0))
+ count++;
+
+ if (count != m_cnt) {
+ panic_plain("%s%d: invalid packet count %d "
+ "(expected %d)\n", ifp->if_name, ifp->if_unit,
+ count, m_cnt);
+ /* NOTREACHED */
+ }
+
+ inp->input_mbuf_cnt += m_cnt;
+ }
+#endif /* IFNET_INPUT_SANITY_CHK */
+
+ if (s != NULL) {
+ dlil_input_stats_add(s, inp, poll);
+ /*
+ * If we're using the main input thread, synchronize the
+ * stats now since we have the interface context. All
+ * other cases involving dedicated input threads will
+ * have their stats synchronized there.
+ */
+ if (inp == dlil_main_input_thread)
+ dlil_input_stats_sync(ifp, inp);
+ }
+
+ inp->input_waiting |= DLIL_INPUT_WAITING;
+ if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
+ inp->wtot++;
+ wakeup_one((caddr_t)&inp->input_waiting);
+ }
+ lck_mtx_unlock(&inp->input_lck);
+
+ if (ifp != lo_ifp) {
+ /* Release the IO refcnt */
+ ifnet_decr_iorefcnt(ifp);
+ }
+
+ return (0);
+}
+
+void
+ifnet_start(struct ifnet *ifp)
+{
+ /*
+ * If the starter thread is inactive, signal it to do work.
+ */
+ lck_mtx_lock_spin(&ifp->if_start_lock);
+ ifp->if_start_req++;
+ if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL) {
+ wakeup_one((caddr_t)&ifp->if_start_thread);
+ }
+ lck_mtx_unlock(&ifp->if_start_lock);
+}
+
+static void
+ifnet_start_thread_fn(void *v, wait_result_t w)
+{
+#pragma unused(w)
+ struct ifnet *ifp = v;
+ char ifname[IFNAMSIZ + 1];
+ struct timespec *ts = NULL;
+ struct ifclassq *ifq = &ifp->if_snd;
+
+ /*
+ * Treat the dedicated starter thread for lo0 as equivalent to
+ * the driver workloop thread; if net_affinity is enabled for
+ * the main input thread, associate this starter thread to it
+ * by binding them with the same affinity tag. This is done
+ * only once (as we only have one lo_ifp which never goes away.)
+ */
+ if (ifp == lo_ifp) {
+ struct dlil_threading_info *inp = dlil_main_input_thread;
+ struct thread *tp = current_thread();
+
+ lck_mtx_lock(&inp->input_lck);
+ if (inp->net_affinity) {
+ u_int32_t tag = inp->tag;
+
+ VERIFY(inp->wloop_thr == THREAD_NULL);
+ VERIFY(inp->poll_thr == THREAD_NULL);
+ inp->wloop_thr = tp;
+ lck_mtx_unlock(&inp->input_lck);
+
+ /* Associate this thread with the affinity tag */
+ (void) dlil_affinity_set(tp, tag);
+ } else {
+ lck_mtx_unlock(&inp->input_lck);
+ }
+ }
+
+ snprintf(ifname, sizeof (ifname), "%s%d_starter",
+ ifp->if_name, ifp->if_unit);
+
+ lck_mtx_lock_spin(&ifp->if_start_lock);
+
+ for (;;) {
+ (void) msleep(&ifp->if_start_thread, &ifp->if_start_lock,
+ (PZERO - 1) | PSPIN, ifname, ts);
+
+ /* interface is detached? */
+ if (ifp->if_start_thread == THREAD_NULL) {
+ ifnet_set_start_cycle(ifp, NULL);
+ lck_mtx_unlock(&ifp->if_start_lock);
+ ifnet_purge(ifp);
+
+ if (dlil_verbose) {
+ printf("%s%d: starter thread terminated\n",
+ ifp->if_name, ifp->if_unit);
+ }
+
+ /* for the extra refcnt from kernel_thread_start() */
+ thread_deallocate(current_thread());
+ /* this is the end */
+ thread_terminate(current_thread());
+ /* NOTREACHED */
+ return;
+ }
+
+ ifp->if_start_active = 1;
+ for (;;) {
+ u_int32_t req = ifp->if_start_req;
+
+ lck_mtx_unlock(&ifp->if_start_lock);
+ /* invoke the driver's start routine */
+ ((*ifp->if_start)(ifp));
+ lck_mtx_lock_spin(&ifp->if_start_lock);
+
+ /* if there's no pending request, we're done */
+ if (req == ifp->if_start_req)
+ break;
+ }
+ ifp->if_start_req = 0;
+ ifp->if_start_active = 0;
+ /*
+ * Wakeup N ns from now if rate-controlled by TBR, and if
+ * there are still packets in the send queue which haven't
+ * been dequeued so far; else sleep indefinitely (ts = NULL)
+ * until ifnet_start() is called again.
+ */
+ ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
+ &ifp->if_start_cycle : NULL);
+
+ if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0)
+ ts = NULL;
+ }
+
+ /* NOTREACHED */
+ lck_mtx_unlock(&ifp->if_start_lock);
+ VERIFY(0); /* we should never get here */
+}
+
+void
+ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
+{
+ if (ts == NULL)
+ bzero(&ifp->if_start_cycle, sizeof (ifp->if_start_cycle));
+ else
+ *(&ifp->if_start_cycle) = *ts;
+
+ if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
+ printf("%s%d: restart interval set to %lu nsec\n",
+ ifp->if_name, ifp->if_unit, ts->tv_nsec);
+}
+
+static void
+ifnet_poll(struct ifnet *ifp)
+{
+ /*
+ * If the poller thread is inactive, signal it to do work.
+ */
+ lck_mtx_lock_spin(&ifp->if_poll_lock);
+ ifp->if_poll_req++;
+ if (!ifp->if_poll_active && ifp->if_poll_thread != THREAD_NULL) {
+ wakeup_one((caddr_t)&ifp->if_poll_thread);
+ }
+ lck_mtx_unlock(&ifp->if_poll_lock);
+}
+
+static void
+ifnet_poll_thread_fn(void *v, wait_result_t w)
+{
+#pragma unused(w)
+ struct dlil_threading_info *inp;
+ struct ifnet *ifp = v;
+ char ifname[IFNAMSIZ + 1];
+ struct timespec *ts = NULL;
+ struct ifnet_stat_increment_param s;
+
+ snprintf(ifname, sizeof (ifname), "%s%d_poller",
+ ifp->if_name, ifp->if_unit);
+ bzero(&s, sizeof (s));
+
+ lck_mtx_lock_spin(&ifp->if_poll_lock);
+
+ inp = ifp->if_inp;
+ VERIFY(inp != NULL);
+
+ for (;;) {
+ if (ifp->if_poll_thread != THREAD_NULL) {
+ (void) msleep(&ifp->if_poll_thread, &ifp->if_poll_lock,
+ (PZERO - 1) | PSPIN, ifname, ts);
+ }
+
+ /* interface is detached (maybe while asleep)? */
+ if (ifp->if_poll_thread == THREAD_NULL) {
+ ifnet_set_poll_cycle(ifp, NULL);
+ lck_mtx_unlock(&ifp->if_poll_lock);
+
+ if (dlil_verbose) {
+ printf("%s%d: poller thread terminated\n",
+ ifp->if_name, ifp->if_unit);
+ }
+
+ /* for the extra refcnt from kernel_thread_start() */
+ thread_deallocate(current_thread());
+ /* this is the end */
+ thread_terminate(current_thread());
+ /* NOTREACHED */
+ return;
+ }
+
+ ifp->if_poll_active = 1;
+ for (;;) {
+ struct mbuf *m_head, *m_tail;
+ u_int32_t m_lim, m_cnt, m_totlen;
+ u_int16_t req = ifp->if_poll_req;
+
+ lck_mtx_unlock(&ifp->if_poll_lock);
+
+ /*
+ * If no longer attached, there's nothing to do;
+ * else hold an IO refcnt to prevent the interface
+ * from being detached (will be released below.)
+ */
+ if (!ifnet_is_attached(ifp, 1))
+ break;
+
+ m_lim = (if_rxpoll_max != 0) ? if_rxpoll_max :
+ MAX((qlimit(&inp->rcvq_pkts)),
+ (inp->rxpoll_phiwat << 2));
+
+ if (dlil_verbose > 1) {
+ printf("%s%d: polling up to %d pkts, "
+ "pkts avg %d max %d, wreq avg %d, "
+ "bytes avg %d\n",
+ ifp->if_name, ifp->if_unit, m_lim,
+ inp->rxpoll_pavg, inp->rxpoll_pmax,
+ inp->rxpoll_wavg, inp->rxpoll_bavg);
+ }
+
+ /* invoke the driver's input poll routine */
+ ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
+ &m_cnt, &m_totlen));
+
+ if (m_head != NULL) {
+ VERIFY(m_tail != NULL && m_cnt > 0);
+
+ if (dlil_verbose > 1) {
+ printf("%s%d: polled %d pkts, "
+ "pkts avg %d max %d, wreq avg %d, "
+ "bytes avg %d\n",
+ ifp->if_name, ifp->if_unit, m_cnt,
+ inp->rxpoll_pavg, inp->rxpoll_pmax,
+ inp->rxpoll_wavg, inp->rxpoll_bavg);
+ }
+
+ /* stats are required for extended variant */
+ s.packets_in = m_cnt;
+ s.bytes_in = m_totlen;
+
+ (void) ifnet_input_common(ifp, m_head, m_tail,
+ &s, TRUE, TRUE);
+ } else if (dlil_verbose > 1) {
+ printf("%s%d: no packets, pkts avg %d max %d, "
+ "wreq avg %d, bytes avg %d\n", ifp->if_name,
+ ifp->if_unit, inp->rxpoll_pavg,
+ inp->rxpoll_pmax, inp->rxpoll_wavg,
+ inp->rxpoll_bavg);
+ }
+
+ /* Release the io ref count */
+ ifnet_decr_iorefcnt(ifp);
+
+ lck_mtx_lock_spin(&ifp->if_poll_lock);
+
+ /* if there's no pending request, we're done */
+ if (req == ifp->if_poll_req)
+ break;
+ }
+ ifp->if_poll_req = 0;
+ ifp->if_poll_active = 0;
+
+ /*
+ * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
+ * until ifnet_poll() is called again.
+ */
+ ts = &ifp->if_poll_cycle;
+ if (ts->tv_sec == 0 && ts->tv_nsec == 0)
+ ts = NULL;
+ }
+
+ /* NOTREACHED */
+ lck_mtx_unlock(&ifp->if_poll_lock);
+ VERIFY(0); /* we should never get here */
+}
+
+void
+ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
+{
+ if (ts == NULL)
+ bzero(&ifp->if_poll_cycle, sizeof (ifp->if_poll_cycle));
+ else
+ *(&ifp->if_poll_cycle) = *ts;
+
+ if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
+ printf("%s%d: poll interval set to %lu nsec\n",
+ ifp->if_name, ifp->if_unit, ts->tv_nsec);
+}
+
+void
+ifnet_purge(struct ifnet *ifp)
+{
+ if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART))
+ if_qflush(ifp, 0);
+}
+
+void
+ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
+{
+ IFCQ_LOCK_ASSERT_HELD(ifq);
+
+ if (!(IFCQ_IS_READY(ifq)))
+ return;
+
+ if (IFCQ_TBR_IS_ENABLED(ifq)) {
+ struct tb_profile tb = { ifq->ifcq_tbr.tbr_rate_raw,
+ ifq->ifcq_tbr.tbr_percent, 0 };
+ (void) ifclassq_tbr_set(ifq, &tb, FALSE);
+ }
+
+ ifclassq_update(ifq, ev);
+}
+
+void
+ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
+{
+ switch (ev) {
+ case CLASSQ_EV_LINK_SPEED:
+ if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL))
+ ifp->if_poll_update++;
+ break;
+
+ default:
+ break;
+ }
+}
+
+errno_t
+ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
+{
+ struct ifclassq *ifq;
+ u_int32_t omodel;
+ errno_t err;
+
+ if (ifp == NULL || (model != IFNET_SCHED_MODEL_DRIVER_MANAGED &&
+ model != IFNET_SCHED_MODEL_NORMAL))
+ return (EINVAL);
+ else if (!(ifp->if_eflags & IFEF_TXSTART))
+ return (ENXIO);
+
+ ifq = &ifp->if_snd;
+ IFCQ_LOCK(ifq);
+ omodel = ifp->if_output_sched_model;
+ ifp->if_output_sched_model = model;
+ if ((err = ifclassq_pktsched_setup(ifq)) != 0)
+ ifp->if_output_sched_model = omodel;
+ IFCQ_UNLOCK(ifq);
+
+ return (err);
+}
+
+errno_t
+ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
+{
+ if (ifp == NULL)
+ return (EINVAL);
+ else if (!(ifp->if_eflags & IFEF_TXSTART))
+ return (ENXIO);
+
+ ifclassq_set_maxlen(&ifp->if_snd, maxqlen);
+
+ return (0);
+}
+
+errno_t
+ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
+{
+ if (ifp == NULL || maxqlen == NULL)
+ return (EINVAL);
+ else if (!(ifp->if_eflags & IFEF_TXSTART))
+ return (ENXIO);
+
+ *maxqlen = ifclassq_get_maxlen(&ifp->if_snd);
+
+ return (0);
+}
+
+errno_t
+ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *qlen)
+{
+ if (ifp == NULL || qlen == NULL)
+ return (EINVAL);
+ else if (!(ifp->if_eflags & IFEF_TXSTART))
+ return (ENXIO);
+
+ *qlen = ifclassq_get_len(&ifp->if_snd);
+
+ return (0);
+}
+
+errno_t
+ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
+{
+ struct dlil_threading_info *inp;
+
+ if (ifp == NULL)
+ return (EINVAL);
+ else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
+ return (ENXIO);
+
+ if (maxqlen == 0)
+ maxqlen = if_rcvq_maxlen;
+ else if (maxqlen < IF_RCVQ_MINLEN)
+ maxqlen = IF_RCVQ_MINLEN;
+
+ inp = ifp->if_inp;
+ lck_mtx_lock(&inp->input_lck);
+ qlimit(&inp->rcvq_pkts) = maxqlen;
+ lck_mtx_unlock(&inp->input_lck);
+
+ return (0);
+}
+
+errno_t
+ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
+{
+ struct dlil_threading_info *inp;
+
+ if (ifp == NULL || maxqlen == NULL)
+ return (EINVAL);
+ else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
+ return (ENXIO);
+
+ inp = ifp->if_inp;
+ lck_mtx_lock(&inp->input_lck);
+ *maxqlen = qlimit(&inp->rcvq_pkts);
+ lck_mtx_unlock(&inp->input_lck);
+ return (0);
+}
+
+errno_t
+ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
+{
+ int error;
+
+ if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
+ m->m_nextpkt != NULL) {
+ if (m != NULL)
+ m_freem_list(m);
+ return (EINVAL);
+ } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
+ !(ifp->if_refflags & IFRF_ATTACHED)) {
+ /* flag tested without lock for performance */
+ m_freem(m);
+ return (ENXIO);
+ } else if (!(ifp->if_flags & IFF_UP)) {
+ m_freem(m);
+ return (ENETDOWN);
+
+ }
+
+ /* enqueue the packet */
+ error = ifclassq_enqueue(&ifp->if_snd, m);
+
+ /*
+ * Tell the driver to start dequeueing; do this even when the queue
+ * for the packet is suspended (EQSUSPENDED), as the driver could still
+ * be dequeueing from other unsuspended queues.
+ */
+ if (error == 0 || error == EQFULL || error == EQSUSPENDED)
+ ifnet_start(ifp);
+
+ return (error);
+}
+
+errno_t
+ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
+{
+ if (ifp == NULL || mp == NULL)
+ return (EINVAL);
+ else if (!(ifp->if_eflags & IFEF_TXSTART) ||
+ (ifp->if_output_sched_model != IFNET_SCHED_MODEL_NORMAL))
+ return (ENXIO);
+
+ return (ifclassq_dequeue(&ifp->if_snd, 1, mp, NULL, NULL, NULL));
+}
+
+errno_t
+ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
+ struct mbuf **mp)
+{
+ if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc))
+ return (EINVAL);
+ else if (!(ifp->if_eflags & IFEF_TXSTART) ||
+ (ifp->if_output_sched_model != IFNET_SCHED_MODEL_DRIVER_MANAGED))
+ return (ENXIO);
+
+ return (ifclassq_dequeue_sc(&ifp->if_snd, sc, 1, mp, NULL, NULL, NULL));
+}
+
+errno_t
+ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t limit, struct mbuf **head,
+ struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
+{
+ if (ifp == NULL || head == NULL || limit < 1)
+ return (EINVAL);
+ else if (!(ifp->if_eflags & IFEF_TXSTART) ||
+ (ifp->if_output_sched_model != IFNET_SCHED_MODEL_NORMAL))
+ return (ENXIO);
+
+ return (ifclassq_dequeue(&ifp->if_snd, limit, head, tail, cnt, len));
+}
+
+errno_t
+ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
+ u_int32_t limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
+ u_int32_t *len)
+{
+
+ if (ifp == NULL || head == NULL || limit < 1 || !MBUF_VALID_SC(sc))
+ return (EINVAL);
+ else if (!(ifp->if_eflags & IFEF_TXSTART) ||
+ (ifp->if_output_sched_model != IFNET_SCHED_MODEL_DRIVER_MANAGED))
+ return (ENXIO);
+
+ return (ifclassq_dequeue_sc(&ifp->if_snd, sc, limit, head,
+ tail, cnt, len));
+}
+
+static int
+dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
+ char **frame_header_p, protocol_family_t protocol_family)
+{
+ struct ifnet_filter *filter;
+
+ /*
+ * Pass the inbound packet to the interface filters
*/
lck_mtx_lock_spin(&ifp->if_flt_lock);
/* prevent filter list from changing in case we drop the lock */
return;
}
+static void
+dlil_input_stats_add(const struct ifnet_stat_increment_param *s,
+ struct dlil_threading_info *inp, boolean_t poll)
+{
+ struct ifnet_stat_increment_param *d = &inp->stats;
+
+ if (s->packets_in != 0)
+ d->packets_in += s->packets_in;
+ if (s->bytes_in != 0)
+ d->bytes_in += s->bytes_in;
+ if (s->errors_in != 0)
+ d->errors_in += s->errors_in;
+
+ if (s->packets_out != 0)
+ d->packets_out += s->packets_out;
+ if (s->bytes_out != 0)
+ d->bytes_out += s->bytes_out;
+ if (s->errors_out != 0)
+ d->errors_out += s->errors_out;
+
+ if (s->collisions != 0)
+ d->collisions += s->collisions;
+ if (s->dropped != 0)
+ d->dropped += s->dropped;
+
+ if (poll)
+ PKTCNTR_ADD(&inp->tstats, s->packets_in, s->bytes_in);
+}
+
+static void
+dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp)
+{
+ struct ifnet_stat_increment_param *s = &inp->stats;
+
+ /*
+ * Use of atomic operations is unavoidable here because
+ * these stats may also be incremented elsewhere via KPIs.
+ */
+ if (s->packets_in != 0) {
+ atomic_add_64(&ifp->if_data.ifi_ipackets, s->packets_in);
+ s->packets_in = 0;
+ }
+ if (s->bytes_in != 0) {
+ atomic_add_64(&ifp->if_data.ifi_ibytes, s->bytes_in);
+ s->bytes_in = 0;
+ }
+ if (s->errors_in != 0) {
+ atomic_add_64(&ifp->if_data.ifi_ierrors, s->errors_in);
+ s->errors_in = 0;
+ }
+
+ if (s->packets_out != 0) {
+ atomic_add_64(&ifp->if_data.ifi_opackets, s->packets_out);
+ s->packets_out = 0;
+ }
+ if (s->bytes_out != 0) {
+ atomic_add_64(&ifp->if_data.ifi_obytes, s->bytes_out);
+ s->bytes_out = 0;
+ }
+ if (s->errors_out != 0) {
+ atomic_add_64(&ifp->if_data.ifi_oerrors, s->errors_out);
+ s->errors_out = 0;
+ }
+
+ if (s->collisions != 0) {
+ atomic_add_64(&ifp->if_data.ifi_collisions, s->collisions);
+ s->collisions = 0;
+ }
+ if (s->dropped != 0) {
+ atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped);
+ s->dropped = 0;
+ }
+
+ /*
+ * No need for atomic operations as they are modified here
+ * only from within the DLIL input thread context.
+ */
+ if (inp->tstats.packets != 0) {
+ inp->pstats.ifi_poll_packets += inp->tstats.packets;
+ inp->tstats.packets = 0;
+ }
+ if (inp->tstats.bytes != 0) {
+ inp->pstats.ifi_poll_bytes += inp->tstats.bytes;
+ inp->tstats.bytes = 0;
+ }
+}
+
+__private_extern__ void
+dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m)
+{
+ return (dlil_input_packet_list_common(ifp, m, 0,
+ IFNET_MODEL_INPUT_POLL_OFF, FALSE));
+}
+
__private_extern__ void
-dlil_input_packet_list(struct ifnet * ifp_param, struct mbuf *m)
+dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m,
+ u_int32_t cnt, ifnet_model_t mode)
+{
+ return (dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE));
+}
+
+static void
+dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m,
+ u_int32_t cnt, ifnet_model_t mode, boolean_t ext)
{
int error = 0;
protocol_family_t protocol_family;
struct if_proto * last_ifproto = NULL;
mbuf_t pkt_first = NULL;
mbuf_t * pkt_next = NULL;
+ u_int32_t poll_thresh = 0, poll_ival = 0;
KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START,0,0,0,0,0);
+ if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 &&
+ (poll_ival = if_rxpoll_interval_pkts) > 0)
+ poll_thresh = cnt;
while (m != NULL) {
struct if_proto *ifproto = NULL;
if (ifp_param == NULL)
ifp = m->m_pkthdr.rcvif;
+ if ((ifp->if_eflags & IFEF_RXPOLL) && poll_thresh != 0 &&
+ poll_ival > 0 && (--poll_thresh % poll_ival) == 0)
+ ifnet_poll(ifp);
+
/* Check if this mbuf looks valid */
- MBUF_INPUT_CHECK(m, ifp);
+ MBUF_INPUT_CHECK(m, ifp);
next_packet = m->m_nextpkt;
m->m_nextpkt = NULL;
frame_header = m->m_pkthdr.header;
m->m_pkthdr.header = NULL;
- /* Get an IO reference count if the interface is not
- * loopback and it is attached.
+ /*
+ * Get an IO reference count if the interface is not
+ * loopback (lo0) and it is attached; lo0 never goes
+ * away, so optimize for that.
*/
if (ifp != lo_ifp) {
if (!ifnet_is_attached(ifp, 1)) {
iorefcnt = 1;
}
- switch (m->m_pkthdr.prio) {
- case MBUF_TC_BK:
- atomic_add_64(&ifp->if_tc.ifi_ibkpackets, 1);
- atomic_add_64(&ifp->if_tc.ifi_ibkbytes, m->m_pkthdr.len);
- break;
- case MBUF_TC_VI:
- atomic_add_64(&ifp->if_tc.ifi_ivipackets, 1);
- atomic_add_64(&ifp->if_tc.ifi_ivibytes, m->m_pkthdr.len);
- break;
- case MBUF_TC_VO:
- atomic_add_64(&ifp->if_tc.ifi_ivopackets, 1);
- atomic_add_64(&ifp->if_tc.ifi_ivobytes, m->m_pkthdr.len);
- break;
- default:
- break;
- }
+ ifp_inc_traffic_class_in(ifp, m);
/* find which protocol family this packet is for */
ifnet_lock_shared(ifp);
protocol_family = 0;
}
+#if CONFIG_EMBEDDED
+ iptap_ipf_input(ifp, protocol_family, m, frame_header);
+#endif /* CONFIG_EMBEDDED */
+
if (m->m_flags & (M_BCAST|M_MCAST))
atomic_add_64(&ifp->if_imcasts, 1);
if_proto_free(ifproto);
ifproto = NULL;
}
-
+
m = next_packet;
/* update the driver's multicast filter, if needed */
}
KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END,0,0,0,0,0);
- return;
}
errno_t
}
#endif
-static void
-if_inc_traffic_class_out(ifnet_t ifp, mbuf_t m)
+/*
+ * This is mostly called from the context of the DLIL input thread;
+ * because of that there is no need for atomic operations.
+ */
+static __inline void
+ifp_inc_traffic_class_in(struct ifnet *ifp, struct mbuf *m)
{
if (!(m->m_flags & M_PKTHDR))
return;
- switch (m->m_pkthdr.prio) {
- case MBUF_TC_BK:
- atomic_add_64(&ifp->if_tc.ifi_obkpackets, 1);
- atomic_add_64(&ifp->if_tc.ifi_obkbytes, m->m_pkthdr.len);
- break;
- case MBUF_TC_VI:
- atomic_add_64(&ifp->if_tc.ifi_ovipackets, 1);
- atomic_add_64(&ifp->if_tc.ifi_ovibytes, m->m_pkthdr.len);
- break;
- case MBUF_TC_VO:
- atomic_add_64(&ifp->if_tc.ifi_ovopackets, 1);
- atomic_add_64(&ifp->if_tc.ifi_ovobytes, m->m_pkthdr.len);
- break;
- default:
- break;
+ switch (m_get_traffic_class(m)) {
+ case MBUF_TC_BE:
+ ifp->if_tc.ifi_ibepackets++;
+ ifp->if_tc.ifi_ibebytes += m->m_pkthdr.len;
+ break;
+ case MBUF_TC_BK:
+ ifp->if_tc.ifi_ibkpackets++;
+ ifp->if_tc.ifi_ibkbytes += m->m_pkthdr.len;
+ break;
+ case MBUF_TC_VI:
+ ifp->if_tc.ifi_ivipackets++;
+ ifp->if_tc.ifi_ivibytes += m->m_pkthdr.len;
+ break;
+ case MBUF_TC_VO:
+ ifp->if_tc.ifi_ivopackets++;
+ ifp->if_tc.ifi_ivobytes += m->m_pkthdr.len;
+ break;
+ default:
+ break;
+ }
+
+ if (mbuf_is_traffic_class_privileged(m)) {
+ ifp->if_tc.ifi_ipvpackets++;
+ ifp->if_tc.ifi_ipvbytes += m->m_pkthdr.len;
+ }
+}
+
+/*
+ * This is called from DLIL output, hence multiple threads could end
+ * up modifying the statistics. We trade off acccuracy for performance
+ * by not using atomic operations here.
+ */
+static __inline void
+ifp_inc_traffic_class_out(struct ifnet *ifp, struct mbuf *m)
+{
+ if (!(m->m_flags & M_PKTHDR))
+ return;
+
+ switch (m_get_traffic_class(m)) {
+ case MBUF_TC_BE:
+ ifp->if_tc.ifi_obepackets++;
+ ifp->if_tc.ifi_obebytes += m->m_pkthdr.len;
+ break;
+ case MBUF_TC_BK:
+ ifp->if_tc.ifi_obkpackets++;
+ ifp->if_tc.ifi_obkbytes += m->m_pkthdr.len;
+ break;
+ case MBUF_TC_VI:
+ ifp->if_tc.ifi_ovipackets++;
+ ifp->if_tc.ifi_ovibytes += m->m_pkthdr.len;
+ break;
+ case MBUF_TC_VO:
+ ifp->if_tc.ifi_ovopackets++;
+ ifp->if_tc.ifi_ovobytes += m->m_pkthdr.len;
+ break;
+ default:
+ break;
+ }
+
+ if (mbuf_is_traffic_class_privileged(m)) {
+ ifp->if_tc.ifi_opvpackets++;
+ ifp->if_tc.ifi_opvbytes += m->m_pkthdr.len;
}
}
* an interface lock if we're going to take both. This makes sense
* because a protocol is likely to interact with an ifp while it
* is under the protocol lock.
+ *
+ * An advisory code will be returned if adv is not null. This
+ * can be used to provide feedback about interface queues to the
+ * application.
*/
errno_t
dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
- void *route, const struct sockaddr *dest, int raw)
+ void *route, const struct sockaddr *dest, int raw, struct flowadv *adv)
{
char *frame_type = NULL;
char *dst_linkaddr = NULL;
mbuf_t send_head = NULL;
mbuf_t *send_tail = &send_head;
int iorefcnt = 0;
+#if CONFIG_EMBEDDED
+ u_int32_t pre = 0, post = 0;
+#endif /* CONFIG_EMBEDDED */
KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START,0,0,0,0,0);
do {
#if CONFIG_DTRACE
- if (proto_family == PF_INET) {
+ if (!raw && proto_family == PF_INET) {
struct ip *ip = mtod(m, struct ip*);
DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
struct ip *, ip, struct ifnet *, ifp,
struct ip *, ip, struct ip6_hdr *, NULL);
- } else if (proto_family == PF_INET6) {
+ } else if (!raw && proto_family == PF_INET6) {
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr*);
DTRACE_IP6(send, struct mbuf*, m, struct inpcb *, NULL,
struct ip6_hdr *, ip6, struct ifnet*, ifp,
}
retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
- frame_type);
+ frame_type
+#if CONFIG_EMBEDDED
+ ,
+ &pre, &post
+#endif /* CONFIG_EMBEDDED */
+ );
if (retval) {
if (retval != EJUSTRETURN)
m_freem(m);
*send_tail = m;
send_tail = &m->m_nextpkt;
} else {
- if_inc_traffic_class_out(ifp, m);
+#if CONFIG_EMBEDDED
+ iptap_ipf_output(ifp, proto_family, (struct mbuf *)m,
+ pre, post);
+#endif /* CONFIG_EMBEDDED */
+ ifp_inc_traffic_class_out(ifp, m);
KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
0,0,0,0,0);
- retval = ifp->if_output(ifp, m);
+ retval = (*ifp->if_output)(ifp, m);
+ if (retval == EQFULL || retval == EQSUSPENDED) {
+ if (adv != NULL && adv->code == FADV_SUCCESS) {
+ adv->code = (retval == EQFULL ?
+ FADV_FLOW_CONTROLLED :
+ FADV_SUSPENDED);
+ }
+ retval = 0;
+ }
if (retval && dlil_verbose) {
printf("%s: output error on %s%d retval = %d\n",
__func__, ifp->if_name, ifp->if_unit,
} while (m);
if (send_head) {
- if_inc_traffic_class_out(ifp, send_head);
+#if CONFIG_EMBEDDED
+ iptap_ipf_output(ifp, proto_family, (struct mbuf *)send_head,
+ pre, post);
+#endif /* CONFIG_EMBEDDED */
+ ifp_inc_traffic_class_out(ifp, send_head);
KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START, 0,0,0,0,0);
- retval = ifp->if_output(ifp, send_head);
+ retval = (*ifp->if_output)(ifp, send_head);
+ if (retval == EQFULL || retval == EQSUSPENDED) {
+ if (adv != NULL) {
+ adv->code = (retval == EQFULL ?
+ FADV_FLOW_CONTROLLED : FADV_SUSPENDED);
+ }
+ retval = 0;
+ }
if (retval && dlil_verbose) {
printf("%s: output error on %s%d retval = %d\n",
__func__, ifp->if_name, ifp->if_unit, retval);
proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
if (resolvep != NULL)
result = resolvep(ifp, proto_addr,
- (struct sockaddr_dl*)ll_addr, ll_len);
+ (struct sockaddr_dl*)(void *)ll_addr, ll_len);
if_proto_free(proto);
}
return (result);
}
+__private_extern__ errno_t
+net_thread_check_lock(u_int32_t flag)
+{
+ struct uthread *uth = get_bsdthread_info(current_thread());
+ return ((uth->uu_network_lock_held & flag) == flag);
+}
+
+__private_extern__ void
+net_thread_set_lock(u_int32_t flag)
+{
+ struct uthread *uth = get_bsdthread_info(current_thread());
+
+ VERIFY((uth->uu_network_lock_held & flag) != flag);
+ uth->uu_network_lock_held |= flag;
+}
+
+__private_extern__ void
+net_thread_unset_lock(u_int32_t flag)
+{
+ struct uthread *uth = get_bsdthread_info(current_thread());
+
+ VERIFY((uth->uu_network_lock_held & flag) == flag);
+ uth->uu_network_lock_held &= (~flag);
+}
+
static __inline__ int
_is_announcement(const struct sockaddr_in * sender_sin,
const struct sockaddr_in * target_sin)
__private_extern__ errno_t
dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl* sender_hw,
const struct sockaddr* sender_proto, const struct sockaddr_dl* target_hw,
- const struct sockaddr* target_proto)
+ const struct sockaddr* target_proto0, u_int32_t rtflags)
{
errno_t result = 0;
const struct sockaddr_in * sender_sin;
const struct sockaddr_in * target_sin;
+ struct sockaddr_inarp target_proto_sinarp;
+ struct sockaddr *target_proto = (void *)(uintptr_t)target_proto0;
if (target_proto == NULL || (sender_proto != NULL &&
sender_proto->sa_family != target_proto->sa_family))
return (EINVAL);
+ /*
+ * If the target is a (default) router, provide that
+ * information to the send_arp callback routine.
+ */
+ if (rtflags & RTF_ROUTER) {
+ bcopy(target_proto, &target_proto_sinarp,
+ sizeof (struct sockaddr_in));
+ target_proto_sinarp.sin_other |= SIN_ROUTER;
+ target_proto = (struct sockaddr *)&target_proto_sinarp;
+ }
+
/*
* If this is an ARP request and the target IP is IPv4LL,
* send the request on all interfaces. The exception is
* an announcement, which must only appear on the specific
* interface.
*/
- sender_sin = (const struct sockaddr_in *)sender_proto;
- target_sin = (const struct sockaddr_in *)target_proto;
+ sender_sin = (struct sockaddr_in *)(void *)(uintptr_t)sender_proto;
+ target_sin = (struct sockaddr_in *)(void *)(uintptr_t)target_proto;
if (target_proto->sa_family == AF_INET &&
IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
/* Copy the source IP address */
source_ip_copy =
*(struct sockaddr_in *)
- source_ip->ifa_addr;
+ (void *)source_ip->ifa_addr;
IFA_UNLOCK(source_ip);
break;
}
/* Send the ARP */
new_result = dlil_send_arp_internal(cur_ifp,
- arpop,
- (struct sockaddr_dl *)source_hw->ifa_addr,
+ arpop, (struct sockaddr_dl *)(void *)
+ source_hw->ifa_addr,
(struct sockaddr *)&source_ip_copy, NULL,
target_proto);
*/
if (ifp->if_refio == 0 &&
(ifp->if_refflags & IFRF_DETACHING) != 0) {
- /* Convert the spinlock to a regular mutex if we have
- * to wait for any reason while doing a wakeup.
- */
- lck_mtx_convert_spin(&ifp->if_ref_lock);
wakeup(&(ifp->if_refio));
}
lck_mtx_unlock(&ifp->if_ref_lock);
struct ifaddr *ifa;
struct if_data_internal if_data_saved;
struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
+ struct dlil_threading_info *dl_inp;
+ u_int32_t sflags = 0;
+ int err;
if (ifp == NULL)
return (EINVAL);
lck_mtx_lock_spin(&ifp->if_ref_lock);
if (ifp->if_refflags & IFRF_ATTACHED) {
- panic("%s: flags mismatch (attached set) ifp=%p",
+ panic_plain("%s: flags mismatch (attached set) ifp=%p",
__func__, ifp);
/* NOTREACHED */
}
VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
TAILQ_INIT(&ifp->if_addrhead);
- if (ifp->if_snd.ifq_maxlen == 0)
- ifp->if_snd.ifq_maxlen = ifqmaxlen;
-
if (ifp->if_index == 0) {
int idx = if_next_index();
/* Hold a reference to the underlying dlil_ifnet */
ifnet_reference(ifp);
+ /* Clear stats (save and restore other fields that we care) */
+ if_data_saved = ifp->if_data;
+ bzero(&ifp->if_data, sizeof (ifp->if_data));
+ ifp->if_data.ifi_type = if_data_saved.ifi_type;
+ ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
+ ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
+ ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
+ ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
+ ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
+ ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
+ ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
+ ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
+ ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
+ ifnet_touch_lastchange(ifp);
+
+ VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
+ ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED);
+
+ /* By default, use SFB and enable flow advisory */
+ sflags = PKTSCHEDF_QALG_SFB;
+ if (if_flowadv)
+ sflags |= PKTSCHEDF_QALG_FLOWCTL;
+
+ /* Initialize transmit queue(s) */
+ err = ifclassq_setup(ifp, sflags, (dl_if->dl_if_flags & DLIF_REUSE));
+ if (err != 0) {
+ panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
+ "err=%d", __func__, ifp, err);
+ /* NOTREACHED */
+ }
+
+ /* Sanity checks on the input thread storage */
+ dl_inp = &dl_if->dl_if_inpstorage;
+ bzero(&dl_inp->stats, sizeof (dl_inp->stats));
+ VERIFY(dl_inp->input_waiting == 0);
+ VERIFY(dl_inp->wtot == 0);
+ VERIFY(dl_inp->ifp == NULL);
+ VERIFY(qhead(&dl_inp->rcvq_pkts) == NULL && qempty(&dl_inp->rcvq_pkts));
+ VERIFY(qlimit(&dl_inp->rcvq_pkts) == 0);
+ VERIFY(!dl_inp->net_affinity);
+ VERIFY(ifp->if_inp == NULL);
+ VERIFY(dl_inp->input_thr == THREAD_NULL);
+ VERIFY(dl_inp->wloop_thr == THREAD_NULL);
+ VERIFY(dl_inp->poll_thr == THREAD_NULL);
+ VERIFY(dl_inp->tag == 0);
+ VERIFY(dl_inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
+ bzero(&dl_inp->tstats, sizeof (dl_inp->tstats));
+ bzero(&dl_inp->pstats, sizeof (dl_inp->pstats));
+ bzero(&dl_inp->sstats, sizeof (dl_inp->sstats));
+#if IFNET_INPUT_SANITY_CHK
+ VERIFY(dl_inp->input_mbuf_cnt == 0);
+#endif /* IFNET_INPUT_SANITY_CHK */
+
+ /*
+ * A specific DLIL input thread is created per Ethernet/cellular
+ * interface or for an interface which supports opportunistic
+ * input polling. Pseudo interfaces or other types of interfaces
+ * use the main input thread instead.
+ */
+ if ((net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) ||
+ ifp->if_type == IFT_ETHER || ifp->if_type == IFT_CELLULAR) {
+ ifp->if_inp = dl_inp;
+ err = dlil_create_input_thread(ifp, ifp->if_inp);
+ if (err != 0) {
+ panic_plain("%s: ifp=%p couldn't get an input thread; "
+ "err=%d", __func__, ifp, err);
+ /* NOTREACHED */
+ }
+ }
+
/*
- * A specific dlil input thread is created per Ethernet/cellular
- * interface. pseudo interfaces or other types of interfaces use
- * the main ("loopback") thread.
- *
- * If the sysctl "net.link.generic.system.multi_threaded_input" is set
- * to zero, all packets will be handled by the main loopback thread,
- * reverting to 10.4.x behaviour.
+ * If the driver supports the new transmit model, create a workloop
+ * starter thread to invoke the if_start callback where the packets
+ * may be dequeued and transmitted.
*/
- if (dlil_multithreaded_input &&
- (ifp->if_type == IFT_ETHER || ifp->if_type == IFT_CELLULAR)) {
- int err;
-
- ifp->if_input_thread = zalloc(dlif_inp_zone);
- if (ifp->if_input_thread == NULL) {
- panic("%s: ifp=%p couldn't alloc threading",
- __func__, ifp);
+ if (ifp->if_eflags & IFEF_TXSTART) {
+ VERIFY(ifp->if_start != NULL);
+ VERIFY(ifp->if_start_thread == THREAD_NULL);
+
+ ifnet_set_start_cycle(ifp, NULL);
+ ifp->if_start_active = 0;
+ ifp->if_start_req = 0;
+ if ((err = kernel_thread_start(ifnet_start_thread_fn, ifp,
+ &ifp->if_start_thread)) != KERN_SUCCESS) {
+ panic_plain("%s: ifp=%p couldn't get a start thread; "
+ "err=%d", __func__, ifp, err);
/* NOTREACHED */
}
- bzero(ifp->if_input_thread, dlif_inp_size);
- err = dlil_create_input_thread(ifp, ifp->if_input_thread);
- if (err != 0) {
- panic("%s: ifp=%p couldn't get a thread. "
+ ml_thread_policy(ifp->if_start_thread, MACHINE_GROUP,
+ (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
+ }
+
+ /*
+ * If the driver supports the new receive model, create a poller
+ * thread to invoke if_input_poll callback where the packets may
+ * be dequeued from the driver and processed for reception.
+ */
+ if (ifp->if_eflags & IFEF_RXPOLL) {
+ VERIFY(ifp->if_input_poll != NULL);
+ VERIFY(ifp->if_input_ctl != NULL);
+ VERIFY(ifp->if_poll_thread == THREAD_NULL);
+
+ ifnet_set_poll_cycle(ifp, NULL);
+ ifp->if_poll_update = 0;
+ ifp->if_poll_active = 0;
+ ifp->if_poll_req = 0;
+ if ((err = kernel_thread_start(ifnet_poll_thread_fn, ifp,
+ &ifp->if_poll_thread)) != KERN_SUCCESS) {
+ panic_plain("%s: ifp=%p couldn't get a poll thread; "
"err=%d", __func__, ifp, err);
/* NOTREACHED */
}
-#ifdef DLIL_DEBUG
- printf("%s: dlil thread for ifp=%p if_index=%d\n",
- __func__, ifp, ifp->if_index);
-#endif
+ ml_thread_policy(ifp->if_poll_thread, MACHINE_GROUP,
+ (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
}
- /* Clear stats (save and restore other fields that we care) */
- if_data_saved = ifp->if_data;
- bzero(&ifp->if_data, sizeof (ifp->if_data));
- ifp->if_data.ifi_type = if_data_saved.ifi_type;
- ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
- ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
- ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
- ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
- ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
- ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
- ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
- ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
- ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
- ifnet_touch_lastchange(ifp);
+ VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
+ VERIFY(ifp->if_desc.ifd_len == 0);
+ VERIFY(ifp->if_desc.ifd_desc != NULL);
/* Record attach PC stacktrace */
ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
*/
lck_mtx_lock(rnh_lock);
ifnet_lock_exclusive(ifp);
+ /* Initialize Link Quality Metric (loopback [lo0] is always good) */
+ ifp->if_lqm = (ifp == lo_ifp) ? IFNET_LQM_THRESH_GOOD :
+ IFNET_LQM_THRESH_UNKNOWN;
lck_mtx_lock_spin(&ifp->if_ref_lock);
ifp->if_refflags = IFRF_ATTACHED;
lck_mtx_unlock(&ifp->if_ref_lock);
/* address and mask sockaddr_dl locations */
asdl = (struct sockaddr_dl *)(ifa + 1);
bzero(asdl, SOCK_MAXADDRLEN);
- msdl = (struct sockaddr_dl *)((char *)asdl + SOCK_MAXADDRLEN);
+ msdl = (struct sockaddr_dl *)(void *)
+ ((char *)asdl + SOCK_MAXADDRLEN);
bzero(msdl, SOCK_MAXADDRLEN);
} else {
VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
}
IFA_LOCK(ifa);
/* address and mask sockaddr_dl locations */
- asdl = (struct sockaddr_dl *)&dl_if->dl_if_lladdr.asdl;
+ asdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.asdl;
bzero(asdl, sizeof (dl_if->dl_if_lladdr.asdl));
- msdl = (struct sockaddr_dl *)&dl_if->dl_if_lladdr.msdl;
+ msdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.msdl;
bzero(msdl, sizeof (dl_if->dl_if_lladdr.msdl));
}
if (ifp == NULL)
return (EINVAL);
- ifnet_head_lock_exclusive();
lck_mtx_lock(rnh_lock);
+ ifnet_head_lock_exclusive();
ifnet_lock_exclusive(ifp);
/*
if (!(ifp->if_refflags & IFRF_ATTACHED)) {
lck_mtx_unlock(&ifp->if_ref_lock);
ifnet_lock_done(ifp);
- lck_mtx_unlock(rnh_lock);
ifnet_head_done();
+ lck_mtx_unlock(rnh_lock);
return (EINVAL);
} else if (ifp->if_refflags & IFRF_DETACHING) {
/* Interface has already been detached */
lck_mtx_unlock(&ifp->if_ref_lock);
ifnet_lock_done(ifp);
- lck_mtx_unlock(rnh_lock);
ifnet_head_done();
+ lck_mtx_unlock(rnh_lock);
return (ENXIO);
}
/* Indicate this interface is being detached */
ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
ifnet_lock_done(ifp);
- lck_mtx_unlock(rnh_lock);
ifnet_head_done();
+ lck_mtx_unlock(rnh_lock);
+
+ /* Reset Link Quality Metric (unless loopback [lo0]) */
+ if (ifp != lo_ifp)
+ if_lqm_update(ifp, IFNET_LQM_THRESH_OFF);
+
+ /* Reset TCP local statistics */
+ if (ifp->if_tcp_stat != NULL)
+ bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
+
+ /* Reset UDP local statistics */
+ if (ifp->if_udp_stat != NULL)
+ bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
/* Let BPF know we're detaching */
bpfdetach(ifp);
/* Mark the interface as DOWN */
if_down(ifp);
+ /* Drain send queue */
+ ifclassq_teardown(ifp);
+
/* Disable forwarding cached route */
lck_mtx_lock(&ifp->if_cached_route_lock);
ifp->if_fwd_cacheok = 0;
return (ifp);
}
-static void
-ifnet_delayed_thread_func(void)
+static int
+ifnet_detacher_thread_cont(int err)
{
+#pragma unused(err)
struct ifnet *ifp;
for (;;) {
- dlil_if_lock();
+ dlil_if_lock_assert();
while (ifnet_detaching_cnt == 0) {
- (void) msleep(&ifnet_delayed_run, &dlil_ifnet_lock,
- (PZERO - 1), "ifnet_delayed_thread", NULL);
+ (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
+ (PZERO - 1), "ifnet_detacher_cont", 0,
+ ifnet_detacher_thread_cont);
+ /* NOTREACHED */
}
VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
/* Take care of detaching ifnet */
ifp = ifnet_detaching_dequeue();
- dlil_if_unlock();
- if (ifp != NULL)
+ if (ifp != NULL) {
+ dlil_if_unlock();
ifnet_detach_final(ifp);
+ dlil_if_lock();
+ }
}
+ /* NOTREACHED */
+ return (0);
+}
+
+static void
+ifnet_detacher_thread_func(void *v, wait_result_t w)
+{
+#pragma unused(v, w)
+ dlil_if_lock();
+ (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
+ (PZERO - 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont);
+ /*
+ * msleep0() shouldn't have returned as PCATCH was not set;
+ * therefore assert in this case.
+ */
+ dlil_if_unlock();
+ VERIFY(0);
}
static void
{
struct ifnet_filter *filter, *filter_next;
struct ifnet_filter_head fhead;
- struct dlil_threading_info *inputthread;
+ struct dlil_threading_info *inp;
struct ifaddr *ifa;
ifnet_detached_func if_free;
int i;
/* NOTREACHED */
}
- /* Wait until the existing IO references get released
- * before we proceed with ifnet_detach
+ /*
+ * Wait until the existing IO references get released
+ * before we proceed with ifnet_detach. This is not a
+ * common case, so block without using a continuation.
*/
while (ifp->if_refio > 0) {
printf("%s: Waiting for IO references on %s%d interface "
/* There should not be any addresses left */
VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
+ /*
+ * Signal the starter thread to terminate itself.
+ */
+ if (ifp->if_start_thread != THREAD_NULL) {
+ lck_mtx_lock_spin(&ifp->if_start_lock);
+ ifp->if_start_thread = THREAD_NULL;
+ wakeup_one((caddr_t)&ifp->if_start_thread);
+ lck_mtx_unlock(&ifp->if_start_lock);
+ }
+
+ /*
+ * Signal the poller thread to terminate itself.
+ */
+ if (ifp->if_poll_thread != THREAD_NULL) {
+ lck_mtx_lock_spin(&ifp->if_poll_lock);
+ ifp->if_poll_thread = THREAD_NULL;
+ wakeup_one((caddr_t)&ifp->if_poll_thread);
+ lck_mtx_unlock(&ifp->if_poll_lock);
+ }
+
/*
* If thread affinity was set for the workloop thread, we will need
* to tear down the affinity and release the extra reference count
- * taken at attach time;
+ * taken at attach time. Does not apply to lo0 or other interfaces
+ * without dedicated input threads.
*/
- if ((inputthread = ifp->if_input_thread) != NULL) {
- if (inputthread->net_affinity) {
- struct thread *tp;
-
- if (inputthread == dlil_lo_thread_ptr) {
- panic("%s: Thread affinity should not be "
- "enabled on the loopback dlil input "
- "thread", __func__);
- /* NOTREACHED */
+ if ((inp = ifp->if_inp) != NULL) {
+ VERIFY(inp != dlil_main_input_thread);
+
+ if (inp->net_affinity) {
+ struct thread *tp, *wtp, *ptp;
+
+ lck_mtx_lock_spin(&inp->input_lck);
+ wtp = inp->wloop_thr;
+ inp->wloop_thr = THREAD_NULL;
+ ptp = inp->poll_thr;
+ inp->poll_thr = THREAD_NULL;
+ tp = inp->input_thr; /* don't nullify now */
+ inp->tag = 0;
+ inp->net_affinity = FALSE;
+ lck_mtx_unlock(&inp->input_lck);
+
+ /* Tear down poll thread affinity */
+ if (ptp != NULL) {
+ VERIFY(ifp->if_eflags & IFEF_RXPOLL);
+ (void) dlil_affinity_set(ptp,
+ THREAD_AFFINITY_TAG_NULL);
+ thread_deallocate(ptp);
}
- lck_mtx_lock_spin(&inputthread->input_lck);
- tp = inputthread->workloop_thread;
- inputthread->workloop_thread = NULL;
- inputthread->tag = 0;
- inputthread->net_affinity = FALSE;
- lck_mtx_unlock(&inputthread->input_lck);
-
/* Tear down workloop thread affinity */
- if (tp != NULL) {
- (void) dlil_affinity_set(tp,
+ if (wtp != NULL) {
+ (void) dlil_affinity_set(wtp,
THREAD_AFFINITY_TAG_NULL);
- thread_deallocate(tp);
+ thread_deallocate(wtp);
}
- /* Tear down dlil input thread affinity */
- tp = inputthread->input_thread;
+ /* Tear down DLIL input thread affinity */
(void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
thread_deallocate(tp);
}
- /* cleanup ifp dlil input thread, if any */
- ifp->if_input_thread = NULL;
-
- if (inputthread != dlil_lo_thread_ptr) {
-#ifdef DLIL_DEBUG
- printf("%s: wakeup thread threadinfo: %p "
- "input_thread=%p threads: cur=%d max=%d\n",
- __func__, inputthread, inputthread->input_thread,
- dlil_multithreaded_input, cur_dlil_input_threads);
-#endif
- lck_mtx_lock_spin(&inputthread->input_lck);
-
- inputthread->input_waiting |= DLIL_INPUT_TERMINATE;
- if (!(inputthread->input_waiting & DLIL_INPUT_RUNNING))
- wakeup((caddr_t)&inputthread->input_waiting);
+ /* disassociate ifp DLIL input thread */
+ ifp->if_inp = NULL;
- lck_mtx_unlock(&inputthread->input_lck);
+ lck_mtx_lock_spin(&inp->input_lck);
+ inp->input_waiting |= DLIL_INPUT_TERMINATE;
+ if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
+ wakeup_one((caddr_t)&inp->input_waiting);
}
+ lck_mtx_unlock(&inp->input_lck);
}
/* The driver might unload, so point these to ourselves */
if_free = ifp->if_free;
ifp->if_output = ifp_if_output;
+ ifp->if_pre_enqueue = ifp_if_output;
+ ifp->if_start = ifp_if_start;
+ ifp->if_output_ctl = ifp_if_ctl;
+ ifp->if_input_poll = ifp_if_input_poll;
+ ifp->if_input_ctl = ifp_if_ctl;
ifp->if_ioctl = ifp_if_ioctl;
ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
ifp->if_free = ifp_if_free;
ifp->if_del_proto = ifp_if_del_proto;
ifp->if_check_multi = ifp_if_check_multi;
+ /* wipe out interface description */
+ VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
+ ifp->if_desc.ifd_len = 0;
+ VERIFY(ifp->if_desc.ifd_desc != NULL);
+ bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
+
ifnet_lock_done(ifp);
#if PF
VERIFY(ifp->if_flt_waiters == 0);
lck_mtx_unlock(&ifp->if_flt_lock);
+ /* Last chance to drain send queue */
+ if_qflush(ifp, 0);
+
/* Last chance to cleanup any cached route */
lck_mtx_lock(&ifp->if_cached_route_lock);
VERIFY(!ifp->if_fwd_cacheok);
return (0);
}
+static void
+ifp_if_start(struct ifnet *ifp)
+{
+ ifnet_purge(ifp);
+}
+
+static void
+ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
+ struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
+{
+#pragma unused(ifp, flags, max_cnt)
+ if (m_head != NULL)
+ *m_head = NULL;
+ if (m_tail != NULL)
+ *m_tail = NULL;
+ if (cnt != NULL)
+ *cnt = 0;
+ if (len != NULL)
+ *len = 0;
+}
+
+static errno_t
+ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
+{
+#pragma unused(ifp, cmd, arglen, arg)
+ return (EOPNOTSUPP);
+}
+
static errno_t
ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
{
return (EOPNOTSUPP);
}
-static errno_t
-ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
- const struct sockaddr *sa, const char *ll, const char *t)
+static errno_t ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
+const struct sockaddr *sa, const char *ll, const char *t
+#if CONFIG_EMBEDDED
+ ,
+ u_int32_t *pre, u_int32_t *post
+#endif /* CONFIG_EMBEDDED */
+ )
{
#pragma unused(ifp, m, sa, ll, t)
m_freem(*m);
*m = NULL;
+#if CONFIG_EMBEDDED
+ *pre = 0;
+ *post = 0;
+#endif /* CONFIG_EMBEDDED */
return (EJUSTRETURN);
}
-static errno_t
+errno_t
ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
{
#pragma unused(ifp, cmd, arg)
dlifp1->dl_if_trace = dlil_if_trace;
}
ifp1->if_name = dlifp1->dl_if_namestorage;
+
+ /* initialize interface description */
+ ifp1->if_desc.ifd_maxlen = IF_DESCSIZE;
+ ifp1->if_desc.ifd_len = 0;
+ ifp1->if_desc.ifd_desc = dlifp1->dl_if_descstorage;
+
#if CONFIG_MACF_NET
mac_ifnet_label_init(ifp1);
#endif
+ if ((ret = dlil_alloc_local_stats(ifp1)) != 0) {
+ DLIL_PRINTF("%s: failed to allocate if local stats, "
+ "error: %d\n", __func__, ret);
+ /* This probably shouldn't be fatal */
+ ret = 0;
+ }
+
lck_mtx_init(&dlifp1->dl_if_lock, ifnet_lock_group, ifnet_lock_attr);
lck_rw_init(&ifp1->if_lock, ifnet_lock_group, ifnet_lock_attr);
lck_mtx_init(&ifp1->if_ref_lock, ifnet_lock_group, ifnet_lock_attr);
lck_mtx_init(&ifp1->if_flt_lock, ifnet_lock_group, ifnet_lock_attr);
- lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_lock_group,
- ifnet_lock_attr);
lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group,
ifnet_lock_attr);
lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr);
+ /* for send data paths */
+ lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group,
+ ifnet_lock_attr);
+ lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_snd_lock_group,
+ ifnet_lock_attr);
+ lck_mtx_init(&ifp1->if_snd.ifcq_lock, ifnet_snd_lock_group,
+ ifnet_lock_attr);
+
+ /* for receive data paths */
+ lck_mtx_init(&ifp1->if_poll_lock, ifnet_rcv_lock_group,
+ ifnet_lock_attr);
+
TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
*ifp = ifp1;
ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
{
struct route src_rt;
- struct sockaddr_in *dst = (struct sockaddr_in *)(&src_rt.ro_dst);
+ struct sockaddr_in *dst;
+
+ dst = (struct sockaddr_in *)(void *)(&src_rt.ro_dst);
ifp_src_route_copyout(ifp, &src_rt);
src_rt.ro_dst.sin6_family = AF_INET6;
}
src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
- src_rt.ro_dst.sin6_addr = *src_ip6;
+ bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
+ sizeof (src_rt.ro_dst.sin6_addr));
if (src_rt.ro_rt == NULL) {
src_rt.ro_rt = rtalloc1_scoped(
return (src_rt.ro_rt);
}
#endif /* INET6 */
+
+void
+if_lqm_update(struct ifnet *ifp, int lqm)
+{
+ struct kev_dl_link_quality_metric_data ev_lqm_data;
+
+ VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
+
+ /* Normalize to edge */
+ if (lqm > IFNET_LQM_THRESH_UNKNOWN && lqm <= IFNET_LQM_THRESH_POOR)
+ lqm = IFNET_LQM_THRESH_POOR;
+ else if (lqm > IFNET_LQM_THRESH_POOR && lqm <= IFNET_LQM_THRESH_GOOD)
+ lqm = IFNET_LQM_THRESH_GOOD;
+
+ ifnet_lock_exclusive(ifp);
+ if (lqm == ifp->if_lqm) {
+ ifnet_lock_done(ifp);
+ return; /* nothing to update */
+ }
+ ifp->if_lqm = lqm;
+ ifnet_lock_done(ifp);
+
+ bzero(&ev_lqm_data, sizeof (ev_lqm_data));
+ ev_lqm_data.link_quality_metric = lqm;
+
+ dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
+ (struct net_event_data *)&ev_lqm_data, sizeof (ev_lqm_data));
+}
+
+/* for uuid.c */
+int
+uuid_get_ethernet(u_int8_t *node)
+{
+ struct ifnet *ifp;
+ struct sockaddr_dl *sdl;
+
+ ifnet_head_lock_shared();
+ TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
+ ifnet_lock_shared(ifp);
+ IFA_LOCK_SPIN(ifp->if_lladdr);
+ sdl = (struct sockaddr_dl *)(void *)ifp->if_lladdr->ifa_addr;
+ if (sdl->sdl_type == IFT_ETHER) {
+ memcpy(node, LLADDR(sdl), ETHER_ADDR_LEN);
+ IFA_UNLOCK(ifp->if_lladdr);
+ ifnet_lock_done(ifp);
+ ifnet_head_done();
+ return (0);
+ }
+ IFA_UNLOCK(ifp->if_lladdr);
+ ifnet_lock_done(ifp);
+ }
+ ifnet_head_done();
+
+ return (-1);
+}
+
+static int
+sysctl_rxpoll SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+ int i, err;
+
+ i = if_rxpoll;
+
+ err = sysctl_handle_int(oidp, &i, 0, req);
+ if (err != 0 || req->newptr == USER_ADDR_NULL)
+ return (err);
+
+ if (net_rxpoll == 0)
+ return (ENXIO);
+
+ if_rxpoll = i;
+ return (err);
+}
+
+static int
+sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+ int i, err;
+
+ i = if_sndq_maxlen;
+
+ err = sysctl_handle_int(oidp, &i, 0, req);
+ if (err != 0 || req->newptr == USER_ADDR_NULL)
+ return (err);
+
+ if (i < IF_SNDQ_MINLEN)
+ i = IF_SNDQ_MINLEN;
+
+ if_sndq_maxlen = i;
+ return (err);
+}
+
+static int
+sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+ int i, err;
+
+ i = if_rcvq_maxlen;
+
+ err = sysctl_handle_int(oidp, &i, 0, req);
+ if (err != 0 || req->newptr == USER_ADDR_NULL)
+ return (err);
+
+ if (i < IF_RCVQ_MINLEN)
+ i = IF_RCVQ_MINLEN;
+
+ if_rcvq_maxlen = i;
+ return (err);
+}
+
+void
+ifnet_fclist_append(struct sfb *sp, struct sfb_fc_list *fcl)
+{
+ struct sfb_bin_fcentry *fce, *tfce;
+
+ lck_mtx_lock_spin(&ifnet_fclist_lock);
+
+ SLIST_FOREACH_SAFE(fce, fcl, fce_link, tfce) {
+ SLIST_REMOVE(fcl, fce, sfb_bin_fcentry, fce_link);
+ SLIST_INSERT_HEAD(&ifnet_fclist, fce, fce_link);
+ sp->sfb_stats.flow_feedback++;
+ }
+ VERIFY(SLIST_EMPTY(fcl) && !SLIST_EMPTY(&ifnet_fclist));
+
+ wakeup(&ifnet_fclist);
+
+ lck_mtx_unlock(&ifnet_fclist_lock);
+}
+
+struct sfb_bin_fcentry *
+ifnet_fce_alloc(int how)
+{
+ struct sfb_bin_fcentry *fce;
+
+ fce = (how == M_WAITOK) ? zalloc(ifnet_fcezone) :
+ zalloc_noblock(ifnet_fcezone);
+ if (fce != NULL)
+ bzero(fce, ifnet_fcezone_size);
+
+ return (fce);
+}
+
+void
+ifnet_fce_free(struct sfb_bin_fcentry *fce)
+{
+ zfree(ifnet_fcezone, fce);
+}
+
+static void
+ifnet_fc_init(void)
+{
+ thread_t thread = THREAD_NULL;
+
+ SLIST_INIT(&ifnet_fclist);
+ lck_mtx_init(&ifnet_fclist_lock, ifnet_snd_lock_group, NULL);
+
+ ifnet_fcezone_size = P2ROUNDUP(sizeof (struct sfb_bin_fcentry),
+ sizeof (u_int64_t));
+ ifnet_fcezone = zinit(ifnet_fcezone_size,
+ IFNET_FCEZONE_MAX * ifnet_fcezone_size, 0, IFNET_FCEZONE_NAME);
+ if (ifnet_fcezone == NULL) {
+ panic("%s: failed allocating %s", __func__, IFNET_FCEZONE_NAME);
+ /* NOTREACHED */
+ }
+ zone_change(ifnet_fcezone, Z_EXPAND, TRUE);
+ zone_change(ifnet_fcezone, Z_CALLERACCT, FALSE);
+
+ if (kernel_thread_start(ifnet_fc_thread_func,
+ NULL, &thread) != KERN_SUCCESS) {
+ panic("%s: couldn't create flow event advisory thread",
+ __func__);
+ /* NOTREACHED */
+ }
+ thread_deallocate(thread);
+}
+
+static int
+ifnet_fc_thread_cont(int err)
+{
+#pragma unused(err)
+ struct sfb_bin_fcentry *fce;
+ struct inp_fc_entry *infc;
+
+ for (;;) {
+ lck_mtx_assert(&ifnet_fclist_lock, LCK_MTX_ASSERT_OWNED);
+ while (SLIST_EMPTY(&ifnet_fclist)) {
+ (void) msleep0(&ifnet_fclist, &ifnet_fclist_lock,
+ (PSOCK | PSPIN), "ifnet_fc_cont", 0,
+ ifnet_fc_thread_cont);
+ /* NOTREACHED */
+ }
+
+ fce = SLIST_FIRST(&ifnet_fclist);
+ SLIST_REMOVE(&ifnet_fclist, fce, sfb_bin_fcentry, fce_link);
+ SLIST_NEXT(fce, fce_link) = NULL;
+ lck_mtx_unlock(&ifnet_fclist_lock);
+
+ infc = inp_fc_getinp(fce->fce_flowhash);
+ if (infc == NULL) {
+ ifnet_fce_free(fce);
+ lck_mtx_lock_spin(&ifnet_fclist_lock);
+ continue;
+ }
+ VERIFY(infc->infc_inp != NULL);
+
+ inp_fc_feedback(infc->infc_inp);
+
+ inp_fc_entry_free(infc);
+ ifnet_fce_free(fce);
+ lck_mtx_lock_spin(&ifnet_fclist_lock);
+ }
+}
+
+static void
+ifnet_fc_thread_func(void *v, wait_result_t w)
+{
+#pragma unused(v, w)
+ lck_mtx_lock(&ifnet_fclist_lock);
+ (void) msleep0(&ifnet_fclist, &ifnet_fclist_lock,
+ (PSOCK | PSPIN), "ifnet_fc", 0, ifnet_fc_thread_cont);
+ /*
+ * msleep0() shouldn't have returned as PCATCH was not set;
+ * therefore assert in this case.
+ */
+ lck_mtx_unlock(&ifnet_fclist_lock);
+ VERIFY(0);
+}
+
+void
+dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
+ int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
+{
+ struct kev_dl_node_presence kev;
+ struct sockaddr_dl *sdl;
+ struct sockaddr_in6 *sin6;
+
+ VERIFY(ifp);
+ VERIFY(sa);
+ VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
+
+ bzero(&kev, sizeof (kev));
+ sin6 = &kev.sin6_node_address;
+ sdl = &kev.sdl_node_address;
+ nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
+ kev.rssi = rssi;
+ kev.link_quality_metric = lqm;
+ kev.node_proximity_metric = npm;
+ bcopy(srvinfo, kev.node_service_info, sizeof (kev.node_service_info));
+
+ nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
+ dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
+ &kev.link_data, sizeof (kev));
+}
+
+void
+dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
+{
+ struct kev_dl_node_absence kev;
+ struct sockaddr_in6 *sin6;
+ struct sockaddr_dl *sdl;
+
+ VERIFY(ifp);
+ VERIFY(sa);
+ VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
+
+ bzero(&kev, sizeof (kev));
+ sin6 = &kev.sin6_node_address;
+ sdl = &kev.sdl_node_address;
+ nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
+
+ nd6_alt_node_absent(ifp, sin6);
+ dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
+ &kev.link_data, sizeof (kev));
+}
+
+errno_t
+ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
+ struct proc *p)
+{
+ u_int32_t level = IFNET_THROTTLE_OFF;
+ errno_t result = 0;
+
+ VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
+
+ if (cmd == SIOCSIFOPPORTUNISTIC) {
+ /*
+ * XXX: Use priv_check_cred() instead of root check?
+ */
+ if ((result = proc_suser(p)) != 0)
+ return (result);
+
+ if (ifr->ifr_opportunistic.ifo_flags ==
+ IFRIFOF_BLOCK_OPPORTUNISTIC)
+ level = IFNET_THROTTLE_OPPORTUNISTIC;
+ else if (ifr->ifr_opportunistic.ifo_flags == 0)
+ level = IFNET_THROTTLE_OFF;
+ else
+ result = EINVAL;
+
+ if (result == 0)
+ result = ifnet_set_throttle(ifp, level);
+ } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
+ ifr->ifr_opportunistic.ifo_flags = 0;
+ if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
+ ifr->ifr_opportunistic.ifo_flags |=
+ IFRIFOF_BLOCK_OPPORTUNISTIC;
+ }
+ }
+
+ /*
+ * Return the count of current opportunistic connections
+ * over the interface.
+ */
+ if (result == 0) {
+ uint32_t flags = 0;
+ flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
+ INPCB_OPPORTUNISTIC_SETCMD : 0;
+ flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
+ INPCB_OPPORTUNISTIC_THROTTLEON : 0;
+ ifr->ifr_opportunistic.ifo_inuse =
+ udp_count_opportunistic(ifp->if_index, flags) +
+ tcp_count_opportunistic(ifp->if_index, flags);
+ }
+
+ if (result == EALREADY)
+ result = 0;
+
+ return (result);
+}
+
+int
+ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
+{
+ struct ifclassq *ifq;
+ int err = 0;
+
+ if (!(ifp->if_eflags & IFEF_TXSTART))
+ return (ENXIO);
+
+ *level = IFNET_THROTTLE_OFF;
+
+ ifq = &ifp->if_snd;
+ IFCQ_LOCK(ifq);
+ /* Throttling works only for IFCQ, not ALTQ instances */
+ if (IFCQ_IS_ENABLED(ifq))
+ IFCQ_GET_THROTTLE(ifq, *level, err);
+ IFCQ_UNLOCK(ifq);
+
+ return (err);
+}
+
+int
+ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
+{
+ struct ifclassq *ifq;
+ int err = 0;
+
+ if (!(ifp->if_eflags & IFEF_TXSTART))
+ return (ENXIO);
+
+ switch (level) {
+ case IFNET_THROTTLE_OFF:
+ case IFNET_THROTTLE_OPPORTUNISTIC:
+#if PF_ALTQ
+ /* Throttling works only for IFCQ, not ALTQ instances */
+ if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq)))
+ return (ENXIO);
+#endif /* PF_ALTQ */
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ ifq = &ifp->if_snd;
+ IFCQ_LOCK(ifq);
+ if (IFCQ_IS_ENABLED(ifq))
+ IFCQ_SET_THROTTLE(ifq, level, err);
+ IFCQ_UNLOCK(ifq);
+
+ if (err == 0) {
+ printf("%s%d: throttling level set to %d\n", ifp->if_name,
+ ifp->if_unit, level);
+ if (level == IFNET_THROTTLE_OFF)
+ ifnet_start(ifp);
+ }
+
+ return (err);
+}