2 * Copyright (c) 1999-2014 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
41 #include <sys/socket.h>
42 #include <sys/domain.h>
44 #include <sys/random.h>
45 #include <sys/socketvar.h>
46 #include <net/if_dl.h>
48 #include <net/route.h>
49 #include <net/if_var.h>
51 #include <net/if_arp.h>
52 #include <net/iptap.h>
53 #include <net/pktap.h>
54 #include <sys/kern_event.h>
55 #include <sys/kdebug.h>
56 #include <sys/mcache.h>
57 #include <sys/syslog.h>
58 #include <sys/protosw.h>
61 #include <kern/assert.h>
62 #include <kern/task.h>
63 #include <kern/thread.h>
64 #include <kern/sched_prim.h>
65 #include <kern/locks.h>
66 #include <kern/zalloc.h>
68 #include <net/kpi_protocol.h>
69 #include <net/if_types.h>
70 #include <net/if_llreach.h>
71 #include <net/kpi_interfacefilter.h>
72 #include <net/classq/classq.h>
73 #include <net/classq/classq_sfb.h>
74 #include <net/flowhash.h>
75 #include <net/ntstat.h>
78 #include <netinet/in_var.h>
79 #include <netinet/igmp_var.h>
80 #include <netinet/ip_var.h>
81 #include <netinet/tcp.h>
82 #include <netinet/tcp_var.h>
83 #include <netinet/udp.h>
84 #include <netinet/udp_var.h>
85 #include <netinet/if_ether.h>
86 #include <netinet/in_pcb.h>
90 #include <netinet6/in6_var.h>
91 #include <netinet6/nd6.h>
92 #include <netinet6/mld6_var.h>
93 #include <netinet6/scope6_var.h>
96 #include <libkern/OSAtomic.h>
97 #include <libkern/tree.h>
99 #include <dev/random/randomdev.h>
100 #include <machine/machine_routines.h>
102 #include <mach/thread_act.h>
103 #include <mach/sdt.h>
106 #include <sys/kauth.h>
107 #include <security/mac_framework.h>
108 #include <net/ethernet.h>
109 #include <net/firewire.h>
113 #include <net/pfvar.h>
116 #include <net/altq/altq.h>
118 #include <net/pktsched/pktsched.h>
120 #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
121 #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
122 #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
123 #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
124 #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
126 #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
127 #define MAX_LINKADDR 4 /* LONGWORDS */
128 #define M_NKE M_IFADDR
131 #define DLIL_PRINTF printf
133 #define DLIL_PRINTF kprintf
136 #define IF_DATA_REQUIRE_ALIGNED_64(f) \
137 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
139 #define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
140 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
148 * List of if_proto structures in if_proto_hash[] is protected by
149 * the ifnet lock. The rest of the fields are initialized at protocol
150 * attach time and never change, thus no lock required as long as
151 * a reference to it is valid, via if_proto_ref().
154 SLIST_ENTRY(if_proto
) next_hash
;
158 protocol_family_t protocol_family
;
162 proto_media_input input
;
163 proto_media_preout pre_output
;
164 proto_media_event event
;
165 proto_media_ioctl ioctl
;
166 proto_media_detached detached
;
167 proto_media_resolve_multi resolve_multi
;
168 proto_media_send_arp send_arp
;
171 proto_media_input_v2 input
;
172 proto_media_preout pre_output
;
173 proto_media_event event
;
174 proto_media_ioctl ioctl
;
175 proto_media_detached detached
;
176 proto_media_resolve_multi resolve_multi
;
177 proto_media_send_arp send_arp
;
182 SLIST_HEAD(proto_hash_entry
, if_proto
);
184 #define DLIL_SDLMAXLEN 64
185 #define DLIL_SDLDATALEN \
186 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
189 struct ifnet dl_if
; /* public ifnet */
191 * DLIL private fields, protected by dl_if_lock
193 decl_lck_mtx_data(, dl_if_lock
);
194 TAILQ_ENTRY(dlil_ifnet
) dl_if_link
; /* dlil_ifnet link */
195 u_int32_t dl_if_flags
; /* flags (below) */
196 u_int32_t dl_if_refcnt
; /* refcnt */
197 void (*dl_if_trace
)(struct dlil_ifnet
*, int); /* ref trace callback */
198 void *dl_if_uniqueid
; /* unique interface id */
199 size_t dl_if_uniqueid_len
; /* length of the unique id */
200 char dl_if_namestorage
[IFNAMSIZ
]; /* interface name storage */
201 char dl_if_xnamestorage
[IFXNAMSIZ
]; /* external name storage */
203 struct ifaddr ifa
; /* lladdr ifa */
204 u_int8_t asdl
[DLIL_SDLMAXLEN
]; /* addr storage */
205 u_int8_t msdl
[DLIL_SDLMAXLEN
]; /* mask storage */
207 u_int8_t dl_if_descstorage
[IF_DESCSIZE
]; /* desc storage */
208 struct dlil_threading_info dl_if_inpstorage
; /* input thread storage */
209 ctrace_t dl_if_attach
; /* attach PC stacktrace */
210 ctrace_t dl_if_detach
; /* detach PC stacktrace */
213 /* Values for dl_if_flags (private to DLIL) */
214 #define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
215 #define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
216 #define DLIF_DEBUG 0x4 /* has debugging info */
218 #define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
221 __private_extern__
unsigned int if_ref_trace_hist_size
= IF_REF_TRACE_HIST_SIZE
;
223 struct dlil_ifnet_dbg
{
224 struct dlil_ifnet dldbg_dlif
; /* dlil_ifnet */
225 u_int16_t dldbg_if_refhold_cnt
; /* # ifnet references */
226 u_int16_t dldbg_if_refrele_cnt
; /* # ifnet releases */
228 * Circular lists of ifnet_{reference,release} callers.
230 ctrace_t dldbg_if_refhold
[IF_REF_TRACE_HIST_SIZE
];
231 ctrace_t dldbg_if_refrele
[IF_REF_TRACE_HIST_SIZE
];
234 #define DLIL_TO_IFP(s) (&s->dl_if)
235 #define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
237 struct ifnet_filter
{
238 TAILQ_ENTRY(ifnet_filter
) filt_next
;
240 u_int32_t filt_flags
;
242 const char *filt_name
;
244 protocol_family_t filt_protocol
;
245 iff_input_func filt_input
;
246 iff_output_func filt_output
;
247 iff_event_func filt_event
;
248 iff_ioctl_func filt_ioctl
;
249 iff_detached_func filt_detached
;
252 struct proto_input_entry
;
254 static TAILQ_HEAD(, dlil_ifnet
) dlil_ifnet_head
;
255 static lck_grp_t
*dlil_lock_group
;
256 lck_grp_t
*ifnet_lock_group
;
257 static lck_grp_t
*ifnet_head_lock_group
;
258 static lck_grp_t
*ifnet_snd_lock_group
;
259 static lck_grp_t
*ifnet_rcv_lock_group
;
260 lck_attr_t
*ifnet_lock_attr
;
261 decl_lck_rw_data(static, ifnet_head_lock
);
262 decl_lck_mtx_data(static, dlil_ifnet_lock
);
263 u_int32_t dlil_filter_disable_tso_count
= 0;
266 static unsigned int ifnet_debug
= 1; /* debugging (enabled) */
268 static unsigned int ifnet_debug
; /* debugging (disabled) */
270 static unsigned int dlif_size
; /* size of dlil_ifnet to allocate */
271 static unsigned int dlif_bufsize
; /* size of dlif_size + headroom */
272 static struct zone
*dlif_zone
; /* zone for dlil_ifnet */
274 #define DLIF_ZONE_MAX 64 /* maximum elements in zone */
275 #define DLIF_ZONE_NAME "ifnet" /* zone name */
277 static unsigned int dlif_filt_size
; /* size of ifnet_filter */
278 static struct zone
*dlif_filt_zone
; /* zone for ifnet_filter */
280 #define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
281 #define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
283 static unsigned int dlif_phash_size
; /* size of ifnet proto hash table */
284 static struct zone
*dlif_phash_zone
; /* zone for ifnet proto hash table */
286 #define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
287 #define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
289 static unsigned int dlif_proto_size
; /* size of if_proto */
290 static struct zone
*dlif_proto_zone
; /* zone for if_proto */
292 #define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
293 #define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
295 static unsigned int dlif_tcpstat_size
; /* size of tcpstat_local to allocate */
296 static unsigned int dlif_tcpstat_bufsize
; /* size of dlif_tcpstat_size + headroom */
297 static struct zone
*dlif_tcpstat_zone
; /* zone for tcpstat_local */
299 #define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
300 #define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
302 static unsigned int dlif_udpstat_size
; /* size of udpstat_local to allocate */
303 static unsigned int dlif_udpstat_bufsize
; /* size of dlif_udpstat_size + headroom */
304 static struct zone
*dlif_udpstat_zone
; /* zone for udpstat_local */
306 #define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
307 #define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
310 * Updating this variable should be done by first acquiring the global
311 * radix node head (rnh_lock), in tandem with settting/clearing the
312 * PR_AGGDRAIN for routedomain.
314 u_int32_t ifnet_aggressive_drainers
;
315 static u_int32_t net_rtref
;
317 static struct dlil_main_threading_info dlil_main_input_thread_info
;
318 __private_extern__
struct dlil_threading_info
*dlil_main_input_thread
=
319 (struct dlil_threading_info
*)&dlil_main_input_thread_info
;
321 static int dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*msg
);
322 static int dlil_detach_filter_internal(interface_filter_t filter
, int detached
);
323 static void dlil_if_trace(struct dlil_ifnet
*, int);
324 static void if_proto_ref(struct if_proto
*);
325 static void if_proto_free(struct if_proto
*);
326 static struct if_proto
*find_attached_proto(struct ifnet
*, u_int32_t
);
327 static int dlil_ifp_proto_count(struct ifnet
*);
328 static void if_flt_monitor_busy(struct ifnet
*);
329 static void if_flt_monitor_unbusy(struct ifnet
*);
330 static void if_flt_monitor_enter(struct ifnet
*);
331 static void if_flt_monitor_leave(struct ifnet
*);
332 static int dlil_interface_filters_input(struct ifnet
*, struct mbuf
**,
333 char **, protocol_family_t
);
334 static int dlil_interface_filters_output(struct ifnet
*, struct mbuf
**,
336 static struct ifaddr
*dlil_alloc_lladdr(struct ifnet
*,
337 const struct sockaddr_dl
*);
338 static int ifnet_lookup(struct ifnet
*);
339 static void if_purgeaddrs(struct ifnet
*);
341 static errno_t
ifproto_media_input_v1(struct ifnet
*, protocol_family_t
,
342 struct mbuf
*, char *);
343 static errno_t
ifproto_media_input_v2(struct ifnet
*, protocol_family_t
,
345 static errno_t
ifproto_media_preout(struct ifnet
*, protocol_family_t
,
346 mbuf_t
*, const struct sockaddr
*, void *, char *, char *);
347 static void ifproto_media_event(struct ifnet
*, protocol_family_t
,
348 const struct kev_msg
*);
349 static errno_t
ifproto_media_ioctl(struct ifnet
*, protocol_family_t
,
350 unsigned long, void *);
351 static errno_t
ifproto_media_resolve_multi(ifnet_t
, const struct sockaddr
*,
352 struct sockaddr_dl
*, size_t);
353 static errno_t
ifproto_media_send_arp(struct ifnet
*, u_short
,
354 const struct sockaddr_dl
*, const struct sockaddr
*,
355 const struct sockaddr_dl
*, const struct sockaddr
*);
357 static errno_t
ifp_if_output(struct ifnet
*, struct mbuf
*);
358 static void ifp_if_start(struct ifnet
*);
359 static void ifp_if_input_poll(struct ifnet
*, u_int32_t
, u_int32_t
,
360 struct mbuf
**, struct mbuf
**, u_int32_t
*, u_int32_t
*);
361 static errno_t
ifp_if_ctl(struct ifnet
*, ifnet_ctl_cmd_t
, u_int32_t
, void *);
362 static errno_t
ifp_if_demux(struct ifnet
*, struct mbuf
*, char *,
363 protocol_family_t
*);
364 static errno_t
ifp_if_add_proto(struct ifnet
*, protocol_family_t
,
365 const struct ifnet_demux_desc
*, u_int32_t
);
366 static errno_t
ifp_if_del_proto(struct ifnet
*, protocol_family_t
);
367 static errno_t
ifp_if_check_multi(struct ifnet
*, const struct sockaddr
*);
368 static errno_t
ifp_if_framer(struct ifnet
*, struct mbuf
**,
369 const struct sockaddr
*, const char *, const char *);
370 static errno_t
ifp_if_framer_extended(struct ifnet
*, struct mbuf
**,
371 const struct sockaddr
*, const char *, const char *,
372 u_int32_t
*, u_int32_t
*);
373 static errno_t
ifp_if_set_bpf_tap(struct ifnet
*, bpf_tap_mode
, bpf_packet_func
);
374 static void ifp_if_free(struct ifnet
*);
375 static void ifp_if_event(struct ifnet
*, const struct kev_msg
*);
376 static __inline
void ifp_inc_traffic_class_in(struct ifnet
*, struct mbuf
*);
377 static __inline
void ifp_inc_traffic_class_out(struct ifnet
*, struct mbuf
*);
379 static void dlil_main_input_thread_func(void *, wait_result_t
);
380 static void dlil_input_thread_func(void *, wait_result_t
);
381 static void dlil_rxpoll_input_thread_func(void *, wait_result_t
);
382 static int dlil_create_input_thread(ifnet_t
, struct dlil_threading_info
*);
383 static void dlil_terminate_input_thread(struct dlil_threading_info
*);
384 static void dlil_input_stats_add(const struct ifnet_stat_increment_param
*,
385 struct dlil_threading_info
*, boolean_t
);
386 static void dlil_input_stats_sync(struct ifnet
*, struct dlil_threading_info
*);
387 static void dlil_input_packet_list_common(struct ifnet
*, struct mbuf
*,
388 u_int32_t
, ifnet_model_t
, boolean_t
);
389 static errno_t
ifnet_input_common(struct ifnet
*, struct mbuf
*, struct mbuf
*,
390 const struct ifnet_stat_increment_param
*, boolean_t
, boolean_t
);
393 static void dlil_verify_sum16(void);
395 static void dlil_output_cksum_dbg(struct ifnet
*, struct mbuf
*, uint32_t,
397 static void dlil_input_cksum_dbg(struct ifnet
*, struct mbuf
*, char *,
400 static void ifnet_detacher_thread_func(void *, wait_result_t
);
401 static int ifnet_detacher_thread_cont(int);
402 static void ifnet_detach_final(struct ifnet
*);
403 static void ifnet_detaching_enqueue(struct ifnet
*);
404 static struct ifnet
*ifnet_detaching_dequeue(void);
406 static void ifnet_start_thread_fn(void *, wait_result_t
);
407 static void ifnet_poll_thread_fn(void *, wait_result_t
);
408 static void ifnet_poll(struct ifnet
*);
410 static void ifp_src_route_copyout(struct ifnet
*, struct route
*);
411 static void ifp_src_route_copyin(struct ifnet
*, struct route
*);
413 static void ifp_src_route6_copyout(struct ifnet
*, struct route_in6
*);
414 static void ifp_src_route6_copyin(struct ifnet
*, struct route_in6
*);
417 static int sysctl_rxpoll SYSCTL_HANDLER_ARGS
;
418 static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
;
419 static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
;
420 static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
;
421 static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
;
422 static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
;
423 static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
;
424 static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
;
425 static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
;
426 static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
;
427 static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
;
429 /* The following are protected by dlil_ifnet_lock */
430 static TAILQ_HEAD(, ifnet
) ifnet_detaching_head
;
431 static u_int32_t ifnet_detaching_cnt
;
432 static void *ifnet_delayed_run
; /* wait channel for detaching thread */
434 decl_lck_mtx_data(static, ifnet_fc_lock
);
436 static uint32_t ifnet_flowhash_seed
;
438 struct ifnet_flowhash_key
{
439 char ifk_name
[IFNAMSIZ
];
443 uint32_t ifk_capabilities
;
444 uint32_t ifk_capenable
;
445 uint32_t ifk_output_sched_model
;
450 /* Flow control entry per interface */
451 struct ifnet_fc_entry
{
452 RB_ENTRY(ifnet_fc_entry
) ifce_entry
;
453 u_int32_t ifce_flowhash
;
454 struct ifnet
*ifce_ifp
;
457 static uint32_t ifnet_calc_flowhash(struct ifnet
*);
458 static int ifce_cmp(const struct ifnet_fc_entry
*,
459 const struct ifnet_fc_entry
*);
460 static int ifnet_fc_add(struct ifnet
*);
461 static struct ifnet_fc_entry
*ifnet_fc_get(u_int32_t
);
462 static void ifnet_fc_entry_free(struct ifnet_fc_entry
*);
464 /* protected by ifnet_fc_lock */
465 RB_HEAD(ifnet_fc_tree
, ifnet_fc_entry
) ifnet_fc_tree
;
466 RB_PROTOTYPE(ifnet_fc_tree
, ifnet_fc_entry
, ifce_entry
, ifce_cmp
);
467 RB_GENERATE(ifnet_fc_tree
, ifnet_fc_entry
, ifce_entry
, ifce_cmp
);
469 static unsigned int ifnet_fc_zone_size
; /* sizeof ifnet_fc_entry */
470 static struct zone
*ifnet_fc_zone
; /* ifnet_fc_entry zone */
472 #define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
473 #define IFNET_FC_ZONE_MAX 32
475 extern void bpfdetach(struct ifnet
*);
476 extern void proto_input_run(void);
478 extern uint32_t udp_count_opportunistic(unsigned int ifindex
,
480 extern uint32_t tcp_count_opportunistic(unsigned int ifindex
,
483 __private_extern__
void link_rtrequest(int, struct rtentry
*, struct sockaddr
*);
486 int dlil_lladdr_ckreq
= 0;
490 int dlil_verbose
= 1;
492 int dlil_verbose
= 0;
494 #if IFNET_INPUT_SANITY_CHK
495 /* sanity checking of input packet lists received */
496 static u_int32_t dlil_input_sanity_check
= 0;
497 #endif /* IFNET_INPUT_SANITY_CHK */
498 /* rate limit debug messages */
499 struct timespec dlil_dbgrate
= { 1, 0 };
501 SYSCTL_DECL(_net_link_generic_system
);
504 SYSCTL_INT(_net_link_generic_system
, OID_AUTO
, dlil_lladdr_ckreq
,
505 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_lladdr_ckreq
, 0,
506 "Require MACF system info check to expose link-layer address");
509 SYSCTL_INT(_net_link_generic_system
, OID_AUTO
, dlil_verbose
,
510 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_verbose
, 0, "Log DLIL error messages");
512 #define IF_SNDQ_MINLEN 32
513 u_int32_t if_sndq_maxlen
= IFQ_MAXLEN
;
514 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, sndq_maxlen
,
515 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_sndq_maxlen
, IFQ_MAXLEN
,
516 sysctl_sndq_maxlen
, "I", "Default transmit queue max length");
518 #define IF_RCVQ_MINLEN 32
519 #define IF_RCVQ_MAXLEN 256
520 u_int32_t if_rcvq_maxlen
= IF_RCVQ_MAXLEN
;
521 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rcvq_maxlen
,
522 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rcvq_maxlen
, IFQ_MAXLEN
,
523 sysctl_rcvq_maxlen
, "I", "Default receive queue max length");
525 #define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
526 static u_int32_t if_rxpoll_decay
= IF_RXPOLL_DECAY
;
527 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_decay
,
528 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_decay
, IF_RXPOLL_DECAY
,
529 "ilog2 of EWMA decay rate of avg inbound packets");
531 #define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
532 #define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
533 static u_int64_t if_rxpoll_mode_holdtime
= IF_RXPOLL_MODE_HOLDTIME
;
534 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_freeze_time
,
535 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_mode_holdtime
,
536 IF_RXPOLL_MODE_HOLDTIME
, sysctl_rxpoll_mode_holdtime
,
537 "Q", "input poll mode freeze time");
539 #define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
540 #define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
541 static u_int64_t if_rxpoll_sample_holdtime
= IF_RXPOLL_SAMPLETIME
;
542 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_sample_time
,
543 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_sample_holdtime
,
544 IF_RXPOLL_SAMPLETIME
, sysctl_rxpoll_sample_holdtime
,
545 "Q", "input poll sampling time");
547 #define IF_RXPOLL_INTERVALTIME_MIN (1ULL * 1000) /* 1 us */
548 #define IF_RXPOLL_INTERVALTIME (1ULL * 1000 * 1000) /* 1 ms */
549 static u_int64_t if_rxpoll_interval_time
= IF_RXPOLL_INTERVALTIME
;
550 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_interval_time
,
551 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_interval_time
,
552 IF_RXPOLL_INTERVALTIME
, sysctl_rxpoll_interval_time
,
553 "Q", "input poll interval (time)");
555 #define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
556 static u_int32_t if_rxpoll_interval_pkts
= IF_RXPOLL_INTERVAL_PKTS
;
557 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_interval_pkts
,
558 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_interval_pkts
,
559 IF_RXPOLL_INTERVAL_PKTS
, "input poll interval (packets)");
561 #define IF_RXPOLL_WLOWAT 10
562 static u_int32_t if_rxpoll_wlowat
= IF_RXPOLL_WLOWAT
;
563 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_wakeups_lowat
,
564 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_wlowat
,
565 IF_RXPOLL_WLOWAT
, sysctl_rxpoll_wlowat
,
566 "I", "input poll wakeup low watermark");
568 #define IF_RXPOLL_WHIWAT 100
569 static u_int32_t if_rxpoll_whiwat
= IF_RXPOLL_WHIWAT
;
570 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_wakeups_hiwat
,
571 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_whiwat
,
572 IF_RXPOLL_WHIWAT
, sysctl_rxpoll_whiwat
,
573 "I", "input poll wakeup high watermark");
575 static u_int32_t if_rxpoll_max
= 0; /* 0 (automatic) */
576 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_max
,
577 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_max
, 0,
578 "max packets per poll call");
580 static u_int32_t if_rxpoll
= 1;
581 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll
,
582 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll
, 0,
583 sysctl_rxpoll
, "I", "enable opportunistic input polling");
585 u_int32_t if_bw_smoothing_val
= 3;
586 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, if_bw_smoothing_val
,
587 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_bw_smoothing_val
, 0, "");
589 u_int32_t if_bw_measure_size
= 10;
590 SYSCTL_INT(_net_link_generic_system
, OID_AUTO
, if_bw_measure_size
,
591 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_bw_measure_size
, 0, "");
593 static u_int32_t cur_dlil_input_threads
= 0;
594 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, dlil_input_threads
,
595 CTLFLAG_RD
| CTLFLAG_LOCKED
, &cur_dlil_input_threads
, 0,
596 "Current number of DLIL input threads");
598 #if IFNET_INPUT_SANITY_CHK
599 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, dlil_input_sanity_check
,
600 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_input_sanity_check
, 0,
601 "Turn on sanity checking in DLIL input");
602 #endif /* IFNET_INPUT_SANITY_CHK */
604 static u_int32_t if_flowadv
= 1;
605 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, flow_advisory
,
606 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_flowadv
, 1,
607 "enable flow-advisory mechanism");
609 static u_int32_t if_delaybased_queue
= 1;
610 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, delaybased_queue
,
611 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_delaybased_queue
, 1,
612 "enable delay based dynamic queue sizing");
614 static uint64_t hwcksum_in_invalidated
= 0;
615 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
616 hwcksum_in_invalidated
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
617 &hwcksum_in_invalidated
, "inbound packets with invalidated hardware cksum");
619 uint32_t hwcksum_dbg
= 0;
620 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg
,
621 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg
, 0,
622 "enable hardware cksum debugging");
624 #define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
625 #define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
626 #define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
627 #define HWCKSUM_DBG_MASK \
628 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
629 HWCKSUM_DBG_FINALIZE_FORCED)
631 static uint32_t hwcksum_dbg_mode
= 0;
632 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg_mode
,
633 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg_mode
,
634 0, sysctl_hwcksum_dbg_mode
, "I", "hardware cksum debugging mode");
636 static uint64_t hwcksum_dbg_partial_forced
= 0;
637 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
638 hwcksum_dbg_partial_forced
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
639 &hwcksum_dbg_partial_forced
, "packets forced using partial cksum");
641 static uint64_t hwcksum_dbg_partial_forced_bytes
= 0;
642 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
643 hwcksum_dbg_partial_forced_bytes
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
644 &hwcksum_dbg_partial_forced_bytes
, "bytes forced using partial cksum");
646 static uint32_t hwcksum_dbg_partial_rxoff_forced
= 0;
647 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
,
648 hwcksum_dbg_partial_rxoff_forced
, CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
649 &hwcksum_dbg_partial_rxoff_forced
, 0,
650 sysctl_hwcksum_dbg_partial_rxoff_forced
, "I",
651 "forced partial cksum rx offset");
653 static uint32_t hwcksum_dbg_partial_rxoff_adj
= 0;
654 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg_partial_rxoff_adj
,
655 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg_partial_rxoff_adj
,
656 0, sysctl_hwcksum_dbg_partial_rxoff_adj
, "I",
657 "adjusted partial cksum rx offset");
659 static uint64_t hwcksum_dbg_verified
= 0;
660 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
661 hwcksum_dbg_verified
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
662 &hwcksum_dbg_verified
, "packets verified for having good checksum");
664 static uint64_t hwcksum_dbg_bad_cksum
= 0;
665 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
666 hwcksum_dbg_bad_cksum
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
667 &hwcksum_dbg_bad_cksum
, "packets with bad hardware calculated checksum");
669 static uint64_t hwcksum_dbg_bad_rxoff
= 0;
670 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
671 hwcksum_dbg_bad_rxoff
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
672 &hwcksum_dbg_bad_rxoff
, "packets with invalid rxoff");
674 static uint64_t hwcksum_dbg_adjusted
= 0;
675 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
676 hwcksum_dbg_adjusted
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
677 &hwcksum_dbg_adjusted
, "packets with rxoff adjusted");
679 static uint64_t hwcksum_dbg_finalized_hdr
= 0;
680 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
681 hwcksum_dbg_finalized_hdr
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
682 &hwcksum_dbg_finalized_hdr
, "finalized headers");
684 static uint64_t hwcksum_dbg_finalized_data
= 0;
685 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
686 hwcksum_dbg_finalized_data
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
687 &hwcksum_dbg_finalized_data
, "finalized payloads");
689 uint32_t hwcksum_tx
= 1;
690 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_tx
,
691 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_tx
, 0,
692 "enable transmit hardware checksum offload");
694 uint32_t hwcksum_rx
= 1;
695 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_rx
,
696 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_rx
, 0,
697 "enable receive hardware checksum offload");
699 unsigned int net_rxpoll
= 1;
700 unsigned int net_affinity
= 1;
701 static kern_return_t
dlil_affinity_set(struct thread
*, u_int32_t
);
703 extern u_int32_t inject_buckets
;
705 static lck_grp_attr_t
*dlil_grp_attributes
= NULL
;
706 static lck_attr_t
*dlil_lck_attributes
= NULL
;
709 #define DLIL_INPUT_CHECK(m, ifp) { \
710 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
711 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
712 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
713 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
718 #define DLIL_EWMA(old, new, decay) do { \
720 if ((_avg = (old)) > 0) \
721 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
727 #define MBPS (1ULL * 1000 * 1000)
728 #define GBPS (MBPS * 1000)
730 struct rxpoll_time_tbl
{
731 u_int64_t speed
; /* downlink speed */
732 u_int32_t plowat
; /* packets low watermark */
733 u_int32_t phiwat
; /* packets high watermark */
734 u_int32_t blowat
; /* bytes low watermark */
735 u_int32_t bhiwat
; /* bytes high watermark */
738 static struct rxpoll_time_tbl rxpoll_tbl
[] = {
739 { 10 * MBPS
, 2, 8, (1 * 1024), (6 * 1024) },
740 { 100 * MBPS
, 10, 40, (4 * 1024), (64 * 1024) },
741 { 1 * GBPS
, 10, 40, (4 * 1024), (64 * 1024) },
742 { 10 * GBPS
, 10, 40, (4 * 1024), (64 * 1024) },
743 { 100 * GBPS
, 10, 40, (4 * 1024), (64 * 1024) },
748 proto_hash_value(u_int32_t protocol_family
)
751 * dlil_proto_unplumb_all() depends on the mapping between
752 * the hash bucket index and the protocol family defined
753 * here; future changes must be applied there as well.
755 switch(protocol_family
) {
769 * Caller must already be holding ifnet lock.
771 static struct if_proto
*
772 find_attached_proto(struct ifnet
*ifp
, u_int32_t protocol_family
)
774 struct if_proto
*proto
= NULL
;
775 u_int32_t i
= proto_hash_value(protocol_family
);
777 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_OWNED
);
779 if (ifp
->if_proto_hash
!= NULL
)
780 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
782 while (proto
!= NULL
&& proto
->protocol_family
!= protocol_family
)
783 proto
= SLIST_NEXT(proto
, next_hash
);
792 if_proto_ref(struct if_proto
*proto
)
794 atomic_add_32(&proto
->refcount
, 1);
797 extern void if_rtproto_del(struct ifnet
*ifp
, int protocol
);
800 if_proto_free(struct if_proto
*proto
)
803 struct ifnet
*ifp
= proto
->ifp
;
804 u_int32_t proto_family
= proto
->protocol_family
;
805 struct kev_dl_proto_data ev_pr_data
;
807 oldval
= atomic_add_32_ov(&proto
->refcount
, -1);
811 /* No more reference on this, protocol must have been detached */
812 VERIFY(proto
->detached
);
814 if (proto
->proto_kpi
== kProtoKPI_v1
) {
815 if (proto
->kpi
.v1
.detached
)
816 proto
->kpi
.v1
.detached(ifp
, proto
->protocol_family
);
818 if (proto
->proto_kpi
== kProtoKPI_v2
) {
819 if (proto
->kpi
.v2
.detached
)
820 proto
->kpi
.v2
.detached(ifp
, proto
->protocol_family
);
824 * Cleanup routes that may still be in the routing table for that
825 * interface/protocol pair.
827 if_rtproto_del(ifp
, proto_family
);
830 * The reserved field carries the number of protocol still attached
831 * (subject to change)
833 ifnet_lock_shared(ifp
);
834 ev_pr_data
.proto_family
= proto_family
;
835 ev_pr_data
.proto_remaining_count
= dlil_ifp_proto_count(ifp
);
836 ifnet_lock_done(ifp
);
838 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_DETACHED
,
839 (struct net_event_data
*)&ev_pr_data
,
840 sizeof(struct kev_dl_proto_data
));
842 zfree(dlif_proto_zone
, proto
);
845 __private_extern__
void
846 ifnet_lock_assert(struct ifnet
*ifp
, ifnet_lock_assert_t what
)
848 unsigned int type
= 0;
852 case IFNET_LCK_ASSERT_EXCLUSIVE
:
853 type
= LCK_RW_ASSERT_EXCLUSIVE
;
856 case IFNET_LCK_ASSERT_SHARED
:
857 type
= LCK_RW_ASSERT_SHARED
;
860 case IFNET_LCK_ASSERT_OWNED
:
861 type
= LCK_RW_ASSERT_HELD
;
864 case IFNET_LCK_ASSERT_NOTOWNED
:
865 /* nothing to do here for RW lock; bypass assert */
870 panic("bad ifnet assert type: %d", what
);
874 lck_rw_assert(&ifp
->if_lock
, type
);
877 __private_extern__
void
878 ifnet_lock_shared(struct ifnet
*ifp
)
880 lck_rw_lock_shared(&ifp
->if_lock
);
883 __private_extern__
void
884 ifnet_lock_exclusive(struct ifnet
*ifp
)
886 lck_rw_lock_exclusive(&ifp
->if_lock
);
889 __private_extern__
void
890 ifnet_lock_done(struct ifnet
*ifp
)
892 lck_rw_done(&ifp
->if_lock
);
896 __private_extern__
void
897 if_inet6data_lock_shared(struct ifnet
*ifp
)
899 lck_rw_lock_shared(&ifp
->if_inet6data_lock
);
902 __private_extern__
void
903 if_inet6data_lock_exclusive(struct ifnet
*ifp
)
905 lck_rw_lock_exclusive(&ifp
->if_inet6data_lock
);
908 __private_extern__
void
909 if_inet6data_lock_done(struct ifnet
*ifp
)
911 lck_rw_done(&ifp
->if_inet6data_lock
);
915 __private_extern__
void
916 ifnet_head_lock_shared(void)
918 lck_rw_lock_shared(&ifnet_head_lock
);
921 __private_extern__
void
922 ifnet_head_lock_exclusive(void)
924 lck_rw_lock_exclusive(&ifnet_head_lock
);
927 __private_extern__
void
928 ifnet_head_done(void)
930 lck_rw_done(&ifnet_head_lock
);
934 * Caller must already be holding ifnet lock.
937 dlil_ifp_proto_count(struct ifnet
* ifp
)
941 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_OWNED
);
943 if (ifp
->if_proto_hash
== NULL
)
946 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
947 struct if_proto
*proto
;
948 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
], next_hash
) {
956 __private_extern__
void
957 dlil_post_msg(struct ifnet
*ifp
, u_int32_t event_subclass
,
958 u_int32_t event_code
, struct net_event_data
*event_data
,
959 u_int32_t event_data_len
)
961 struct net_event_data ev_data
;
962 struct kev_msg ev_msg
;
964 bzero(&ev_msg
, sizeof (ev_msg
));
965 bzero(&ev_data
, sizeof (ev_data
));
967 * a net event always starts with a net_event_data structure
968 * but the caller can generate a simple net event or
969 * provide a longer event structure to post
971 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
972 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
973 ev_msg
.kev_subclass
= event_subclass
;
974 ev_msg
.event_code
= event_code
;
976 if (event_data
== NULL
) {
977 event_data
= &ev_data
;
978 event_data_len
= sizeof(struct net_event_data
);
981 strlcpy(&event_data
->if_name
[0], ifp
->if_name
, IFNAMSIZ
);
982 event_data
->if_family
= ifp
->if_family
;
983 event_data
->if_unit
= (u_int32_t
) ifp
->if_unit
;
985 ev_msg
.dv
[0].data_length
= event_data_len
;
986 ev_msg
.dv
[0].data_ptr
= event_data
;
987 ev_msg
.dv
[1].data_length
= 0;
989 dlil_event_internal(ifp
, &ev_msg
);
992 __private_extern__
int
993 dlil_alloc_local_stats(struct ifnet
*ifp
)
996 void *buf
, *base
, **pbuf
;
1001 if (ifp
->if_tcp_stat
== NULL
&& ifp
->if_udp_stat
== NULL
) {
1002 /* allocate tcpstat_local structure */
1003 buf
= zalloc(dlif_tcpstat_zone
);
1008 bzero(buf
, dlif_tcpstat_bufsize
);
1010 /* Get the 64-bit aligned base address for this object */
1011 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof (u_int64_t
),
1012 sizeof (u_int64_t
));
1013 VERIFY(((intptr_t)base
+ dlif_tcpstat_size
) <=
1014 ((intptr_t)buf
+ dlif_tcpstat_bufsize
));
1017 * Wind back a pointer size from the aligned base and
1018 * save the original address so we can free it later.
1020 pbuf
= (void **)((intptr_t)base
- sizeof (void *));
1022 ifp
->if_tcp_stat
= base
;
1024 /* allocate udpstat_local structure */
1025 buf
= zalloc(dlif_udpstat_zone
);
1030 bzero(buf
, dlif_udpstat_bufsize
);
1032 /* Get the 64-bit aligned base address for this object */
1033 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof (u_int64_t
),
1034 sizeof (u_int64_t
));
1035 VERIFY(((intptr_t)base
+ dlif_udpstat_size
) <=
1036 ((intptr_t)buf
+ dlif_udpstat_bufsize
));
1039 * Wind back a pointer size from the aligned base and
1040 * save the original address so we can free it later.
1042 pbuf
= (void **)((intptr_t)base
- sizeof (void *));
1044 ifp
->if_udp_stat
= base
;
1046 VERIFY(IS_P2ALIGNED(ifp
->if_tcp_stat
, sizeof (u_int64_t
)) &&
1047 IS_P2ALIGNED(ifp
->if_udp_stat
, sizeof (u_int64_t
)));
1054 if (ifp
->if_tcp_stat
!= NULL
) {
1056 ((intptr_t)ifp
->if_tcp_stat
- sizeof (void *));
1057 zfree(dlif_tcpstat_zone
, *pbuf
);
1058 ifp
->if_tcp_stat
= NULL
;
1060 if (ifp
->if_udp_stat
!= NULL
) {
1062 ((intptr_t)ifp
->if_udp_stat
- sizeof (void *));
1063 zfree(dlif_udpstat_zone
, *pbuf
);
1064 ifp
->if_udp_stat
= NULL
;
1072 dlil_create_input_thread(ifnet_t ifp
, struct dlil_threading_info
*inp
)
1074 thread_continue_t func
;
1078 /* NULL ifp indicates the main input thread, called at dlil_init time */
1080 func
= dlil_main_input_thread_func
;
1081 VERIFY(inp
== dlil_main_input_thread
);
1082 (void) strlcat(inp
->input_name
,
1083 "main_input", DLIL_THREADNAME_LEN
);
1084 } else if (net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) {
1085 func
= dlil_rxpoll_input_thread_func
;
1086 VERIFY(inp
!= dlil_main_input_thread
);
1087 (void) snprintf(inp
->input_name
, DLIL_THREADNAME_LEN
,
1088 "%s_input_poll", if_name(ifp
));
1090 func
= dlil_input_thread_func
;
1091 VERIFY(inp
!= dlil_main_input_thread
);
1092 (void) snprintf(inp
->input_name
, DLIL_THREADNAME_LEN
,
1093 "%s_input", if_name(ifp
));
1095 VERIFY(inp
->input_thr
== THREAD_NULL
);
1097 inp
->lck_grp
= lck_grp_alloc_init(inp
->input_name
, dlil_grp_attributes
);
1098 lck_mtx_init(&inp
->input_lck
, inp
->lck_grp
, dlil_lck_attributes
);
1100 inp
->mode
= IFNET_MODEL_INPUT_POLL_OFF
;
1101 inp
->ifp
= ifp
; /* NULL for main input thread */
1103 net_timerclear(&inp
->mode_holdtime
);
1104 net_timerclear(&inp
->mode_lasttime
);
1105 net_timerclear(&inp
->sample_holdtime
);
1106 net_timerclear(&inp
->sample_lasttime
);
1107 net_timerclear(&inp
->dbg_lasttime
);
1110 * For interfaces that support opportunistic polling, set the
1111 * low and high watermarks for outstanding inbound packets/bytes.
1112 * Also define freeze times for transitioning between modes
1113 * and updating the average.
1115 if (ifp
!= NULL
&& net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) {
1116 limit
= MAX(if_rcvq_maxlen
, IF_RCVQ_MINLEN
);
1117 (void) dlil_rxpoll_set_params(ifp
, NULL
, FALSE
);
1119 limit
= (u_int32_t
)-1;
1122 _qinit(&inp
->rcvq_pkts
, Q_DROPTAIL
, limit
);
1123 if (inp
== dlil_main_input_thread
) {
1124 struct dlil_main_threading_info
*inpm
=
1125 (struct dlil_main_threading_info
*)inp
;
1126 _qinit(&inpm
->lo_rcvq_pkts
, Q_DROPTAIL
, limit
);
1129 error
= kernel_thread_start(func
, inp
, &inp
->input_thr
);
1130 if (error
== KERN_SUCCESS
) {
1131 ml_thread_policy(inp
->input_thr
, MACHINE_GROUP
,
1132 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_NETISR
));
1134 * We create an affinity set so that the matching workloop
1135 * thread or the starter thread (for loopback) can be
1136 * scheduled on the same processor set as the input thread.
1139 struct thread
*tp
= inp
->input_thr
;
1142 * Randomize to reduce the probability
1143 * of affinity tag namespace collision.
1145 read_random(&tag
, sizeof (tag
));
1146 if (dlil_affinity_set(tp
, tag
) == KERN_SUCCESS
) {
1147 thread_reference(tp
);
1149 inp
->net_affinity
= TRUE
;
1152 } else if (inp
== dlil_main_input_thread
) {
1153 panic_plain("%s: couldn't create main input thread", __func__
);
1156 panic_plain("%s: couldn't create %s input thread", __func__
,
1160 OSAddAtomic(1, &cur_dlil_input_threads
);
1166 dlil_terminate_input_thread(struct dlil_threading_info
*inp
)
1170 VERIFY(current_thread() == inp
->input_thr
);
1171 VERIFY(inp
!= dlil_main_input_thread
);
1173 OSAddAtomic(-1, &cur_dlil_input_threads
);
1175 lck_mtx_destroy(&inp
->input_lck
, inp
->lck_grp
);
1176 lck_grp_free(inp
->lck_grp
);
1178 inp
->input_waiting
= 0;
1180 bzero(inp
->input_name
, sizeof (inp
->input_name
));
1183 VERIFY(qhead(&inp
->rcvq_pkts
) == NULL
&& qempty(&inp
->rcvq_pkts
));
1184 qlimit(&inp
->rcvq_pkts
) = 0;
1185 bzero(&inp
->stats
, sizeof (inp
->stats
));
1187 VERIFY(!inp
->net_affinity
);
1188 inp
->input_thr
= THREAD_NULL
;
1189 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
1190 VERIFY(inp
->poll_thr
== THREAD_NULL
);
1191 VERIFY(inp
->tag
== 0);
1193 inp
->mode
= IFNET_MODEL_INPUT_POLL_OFF
;
1194 bzero(&inp
->tstats
, sizeof (inp
->tstats
));
1195 bzero(&inp
->pstats
, sizeof (inp
->pstats
));
1196 bzero(&inp
->sstats
, sizeof (inp
->sstats
));
1198 net_timerclear(&inp
->mode_holdtime
);
1199 net_timerclear(&inp
->mode_lasttime
);
1200 net_timerclear(&inp
->sample_holdtime
);
1201 net_timerclear(&inp
->sample_lasttime
);
1202 net_timerclear(&inp
->dbg_lasttime
);
1204 #if IFNET_INPUT_SANITY_CHK
1205 inp
->input_mbuf_cnt
= 0;
1206 #endif /* IFNET_INPUT_SANITY_CHK */
1209 printf("%s: input thread terminated\n",
1213 /* for the extra refcnt from kernel_thread_start() */
1214 thread_deallocate(current_thread());
1216 /* this is the end */
1217 thread_terminate(current_thread());
1221 static kern_return_t
1222 dlil_affinity_set(struct thread
*tp
, u_int32_t tag
)
1224 thread_affinity_policy_data_t policy
;
1226 bzero(&policy
, sizeof (policy
));
1227 policy
.affinity_tag
= tag
;
1228 return (thread_policy_set(tp
, THREAD_AFFINITY_POLICY
,
1229 (thread_policy_t
)&policy
, THREAD_AFFINITY_POLICY_COUNT
));
1235 thread_t thread
= THREAD_NULL
;
1238 * The following fields must be 64-bit aligned for atomic operations.
1240 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets
);
1241 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors
)
1242 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets
);
1243 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors
);
1244 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions
);
1245 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes
);
1246 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes
);
1247 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts
);
1248 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts
);
1249 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops
);
1250 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto
);
1251 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs
);
1252 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes
);
1253 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets
);
1254 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes
);
1256 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets
);
1257 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors
)
1258 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets
);
1259 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors
);
1260 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions
);
1261 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes
);
1262 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes
);
1263 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts
);
1264 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts
);
1265 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops
);
1266 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto
);
1267 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs
);
1268 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes
);
1269 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets
);
1270 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes
);
1273 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1275 _CASSERT(IF_HWASSIST_CSUM_IP
== IFNET_CSUM_IP
);
1276 _CASSERT(IF_HWASSIST_CSUM_TCP
== IFNET_CSUM_TCP
);
1277 _CASSERT(IF_HWASSIST_CSUM_UDP
== IFNET_CSUM_UDP
);
1278 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS
== IFNET_CSUM_FRAGMENT
);
1279 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT
== IFNET_IP_FRAGMENT
);
1280 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6
== IFNET_CSUM_TCPIPV6
);
1281 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6
== IFNET_CSUM_UDPIPV6
);
1282 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6
== IFNET_IPV6_FRAGMENT
);
1283 _CASSERT(IF_HWASSIST_CSUM_PARTIAL
== IFNET_CSUM_PARTIAL
);
1284 _CASSERT(IF_HWASSIST_VLAN_TAGGING
== IFNET_VLAN_TAGGING
);
1285 _CASSERT(IF_HWASSIST_VLAN_MTU
== IFNET_VLAN_MTU
);
1286 _CASSERT(IF_HWASSIST_TSO_V4
== IFNET_TSO_IPV4
);
1287 _CASSERT(IF_HWASSIST_TSO_V6
== IFNET_TSO_IPV6
);
1290 * ... as well as the mbuf checksum flags counterparts.
1292 _CASSERT(CSUM_IP
== IF_HWASSIST_CSUM_IP
);
1293 _CASSERT(CSUM_TCP
== IF_HWASSIST_CSUM_TCP
);
1294 _CASSERT(CSUM_UDP
== IF_HWASSIST_CSUM_UDP
);
1295 _CASSERT(CSUM_IP_FRAGS
== IF_HWASSIST_CSUM_IP_FRAGS
);
1296 _CASSERT(CSUM_FRAGMENT
== IF_HWASSIST_CSUM_FRAGMENT
);
1297 _CASSERT(CSUM_TCPIPV6
== IF_HWASSIST_CSUM_TCPIPV6
);
1298 _CASSERT(CSUM_UDPIPV6
== IF_HWASSIST_CSUM_UDPIPV6
);
1299 _CASSERT(CSUM_FRAGMENT_IPV6
== IF_HWASSIST_CSUM_FRAGMENT_IPV6
);
1300 _CASSERT(CSUM_PARTIAL
== IF_HWASSIST_CSUM_PARTIAL
);
1301 _CASSERT(CSUM_VLAN_TAG_VALID
== IF_HWASSIST_VLAN_TAGGING
);
1304 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1306 _CASSERT(IF_LLREACH_MAXLEN
<= IF_LLREACHINFO_ADDRLEN
);
1307 _CASSERT(IFNET_LLREACHINFO_ADDRLEN
== IF_LLREACHINFO_ADDRLEN
);
1309 _CASSERT(IFRLOGF_DLIL
== IFNET_LOGF_DLIL
);
1310 _CASSERT(IFRLOGF_FAMILY
== IFNET_LOGF_FAMILY
);
1311 _CASSERT(IFRLOGF_DRIVER
== IFNET_LOGF_DRIVER
);
1312 _CASSERT(IFRLOGF_FIRMWARE
== IFNET_LOGF_FIRMWARE
);
1314 _CASSERT(IFRLOGCAT_CONNECTIVITY
== IFNET_LOGCAT_CONNECTIVITY
);
1315 _CASSERT(IFRLOGCAT_QUALITY
== IFNET_LOGCAT_QUALITY
);
1316 _CASSERT(IFRLOGCAT_PERFORMANCE
== IFNET_LOGCAT_PERFORMANCE
);
1318 _CASSERT(IFRTYPE_FAMILY_ANY
== IFNET_FAMILY_ANY
);
1319 _CASSERT(IFRTYPE_FAMILY_LOOPBACK
== IFNET_FAMILY_LOOPBACK
);
1320 _CASSERT(IFRTYPE_FAMILY_ETHERNET
== IFNET_FAMILY_ETHERNET
);
1321 _CASSERT(IFRTYPE_FAMILY_SLIP
== IFNET_FAMILY_SLIP
);
1322 _CASSERT(IFRTYPE_FAMILY_TUN
== IFNET_FAMILY_TUN
);
1323 _CASSERT(IFRTYPE_FAMILY_VLAN
== IFNET_FAMILY_VLAN
);
1324 _CASSERT(IFRTYPE_FAMILY_PPP
== IFNET_FAMILY_PPP
);
1325 _CASSERT(IFRTYPE_FAMILY_PVC
== IFNET_FAMILY_PVC
);
1326 _CASSERT(IFRTYPE_FAMILY_DISC
== IFNET_FAMILY_DISC
);
1327 _CASSERT(IFRTYPE_FAMILY_MDECAP
== IFNET_FAMILY_MDECAP
);
1328 _CASSERT(IFRTYPE_FAMILY_GIF
== IFNET_FAMILY_GIF
);
1329 _CASSERT(IFRTYPE_FAMILY_FAITH
== IFNET_FAMILY_FAITH
);
1330 _CASSERT(IFRTYPE_FAMILY_STF
== IFNET_FAMILY_STF
);
1331 _CASSERT(IFRTYPE_FAMILY_FIREWIRE
== IFNET_FAMILY_FIREWIRE
);
1332 _CASSERT(IFRTYPE_FAMILY_BOND
== IFNET_FAMILY_BOND
);
1333 _CASSERT(IFRTYPE_FAMILY_CELLULAR
== IFNET_FAMILY_CELLULAR
);
1335 _CASSERT(IFRTYPE_SUBFAMILY_ANY
== IFNET_SUBFAMILY_ANY
);
1336 _CASSERT(IFRTYPE_SUBFAMILY_USB
== IFNET_SUBFAMILY_USB
);
1337 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH
== IFNET_SUBFAMILY_BLUETOOTH
);
1338 _CASSERT(IFRTYPE_SUBFAMILY_WIFI
== IFNET_SUBFAMILY_WIFI
);
1339 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT
== IFNET_SUBFAMILY_THUNDERBOLT
);
1340 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED
== IFNET_SUBFAMILY_RESERVED
);
1342 _CASSERT(DLIL_MODIDLEN
== IFNET_MODIDLEN
);
1343 _CASSERT(DLIL_MODARGLEN
== IFNET_MODARGLEN
);
1345 PE_parse_boot_argn("net_affinity", &net_affinity
,
1346 sizeof (net_affinity
));
1348 PE_parse_boot_argn("net_rxpoll", &net_rxpoll
, sizeof (net_rxpoll
));
1350 PE_parse_boot_argn("net_rtref", &net_rtref
, sizeof (net_rtref
));
1352 PE_parse_boot_argn("ifnet_debug", &ifnet_debug
, sizeof (ifnet_debug
));
1354 dlif_size
= (ifnet_debug
== 0) ? sizeof (struct dlil_ifnet
) :
1355 sizeof (struct dlil_ifnet_dbg
);
1356 /* Enforce 64-bit alignment for dlil_ifnet structure */
1357 dlif_bufsize
= dlif_size
+ sizeof (void *) + sizeof (u_int64_t
);
1358 dlif_bufsize
= P2ROUNDUP(dlif_bufsize
, sizeof (u_int64_t
));
1359 dlif_zone
= zinit(dlif_bufsize
, DLIF_ZONE_MAX
* dlif_bufsize
,
1361 if (dlif_zone
== NULL
) {
1362 panic_plain("%s: failed allocating %s", __func__
,
1366 zone_change(dlif_zone
, Z_EXPAND
, TRUE
);
1367 zone_change(dlif_zone
, Z_CALLERACCT
, FALSE
);
1369 dlif_filt_size
= sizeof (struct ifnet_filter
);
1370 dlif_filt_zone
= zinit(dlif_filt_size
,
1371 DLIF_FILT_ZONE_MAX
* dlif_filt_size
, 0, DLIF_FILT_ZONE_NAME
);
1372 if (dlif_filt_zone
== NULL
) {
1373 panic_plain("%s: failed allocating %s", __func__
,
1374 DLIF_FILT_ZONE_NAME
);
1377 zone_change(dlif_filt_zone
, Z_EXPAND
, TRUE
);
1378 zone_change(dlif_filt_zone
, Z_CALLERACCT
, FALSE
);
1380 dlif_phash_size
= sizeof (struct proto_hash_entry
) * PROTO_HASH_SLOTS
;
1381 dlif_phash_zone
= zinit(dlif_phash_size
,
1382 DLIF_PHASH_ZONE_MAX
* dlif_phash_size
, 0, DLIF_PHASH_ZONE_NAME
);
1383 if (dlif_phash_zone
== NULL
) {
1384 panic_plain("%s: failed allocating %s", __func__
,
1385 DLIF_PHASH_ZONE_NAME
);
1388 zone_change(dlif_phash_zone
, Z_EXPAND
, TRUE
);
1389 zone_change(dlif_phash_zone
, Z_CALLERACCT
, FALSE
);
1391 dlif_proto_size
= sizeof (struct if_proto
);
1392 dlif_proto_zone
= zinit(dlif_proto_size
,
1393 DLIF_PROTO_ZONE_MAX
* dlif_proto_size
, 0, DLIF_PROTO_ZONE_NAME
);
1394 if (dlif_proto_zone
== NULL
) {
1395 panic_plain("%s: failed allocating %s", __func__
,
1396 DLIF_PROTO_ZONE_NAME
);
1399 zone_change(dlif_proto_zone
, Z_EXPAND
, TRUE
);
1400 zone_change(dlif_proto_zone
, Z_CALLERACCT
, FALSE
);
1402 dlif_tcpstat_size
= sizeof (struct tcpstat_local
);
1403 /* Enforce 64-bit alignment for tcpstat_local structure */
1404 dlif_tcpstat_bufsize
=
1405 dlif_tcpstat_size
+ sizeof (void *) + sizeof (u_int64_t
);
1406 dlif_tcpstat_bufsize
=
1407 P2ROUNDUP(dlif_tcpstat_bufsize
, sizeof (u_int64_t
));
1408 dlif_tcpstat_zone
= zinit(dlif_tcpstat_bufsize
,
1409 DLIF_TCPSTAT_ZONE_MAX
* dlif_tcpstat_bufsize
, 0,
1410 DLIF_TCPSTAT_ZONE_NAME
);
1411 if (dlif_tcpstat_zone
== NULL
) {
1412 panic_plain("%s: failed allocating %s", __func__
,
1413 DLIF_TCPSTAT_ZONE_NAME
);
1416 zone_change(dlif_tcpstat_zone
, Z_EXPAND
, TRUE
);
1417 zone_change(dlif_tcpstat_zone
, Z_CALLERACCT
, FALSE
);
1419 dlif_udpstat_size
= sizeof (struct udpstat_local
);
1420 /* Enforce 64-bit alignment for udpstat_local structure */
1421 dlif_udpstat_bufsize
=
1422 dlif_udpstat_size
+ sizeof (void *) + sizeof (u_int64_t
);
1423 dlif_udpstat_bufsize
=
1424 P2ROUNDUP(dlif_udpstat_bufsize
, sizeof (u_int64_t
));
1425 dlif_udpstat_zone
= zinit(dlif_udpstat_bufsize
,
1426 DLIF_TCPSTAT_ZONE_MAX
* dlif_udpstat_bufsize
, 0,
1427 DLIF_UDPSTAT_ZONE_NAME
);
1428 if (dlif_udpstat_zone
== NULL
) {
1429 panic_plain("%s: failed allocating %s", __func__
,
1430 DLIF_UDPSTAT_ZONE_NAME
);
1433 zone_change(dlif_udpstat_zone
, Z_EXPAND
, TRUE
);
1434 zone_change(dlif_udpstat_zone
, Z_CALLERACCT
, FALSE
);
1436 ifnet_llreach_init();
1438 TAILQ_INIT(&dlil_ifnet_head
);
1439 TAILQ_INIT(&ifnet_head
);
1440 TAILQ_INIT(&ifnet_detaching_head
);
1442 /* Setup the lock groups we will use */
1443 dlil_grp_attributes
= lck_grp_attr_alloc_init();
1445 dlil_lock_group
= lck_grp_alloc_init("DLIL internal locks",
1446 dlil_grp_attributes
);
1447 ifnet_lock_group
= lck_grp_alloc_init("ifnet locks",
1448 dlil_grp_attributes
);
1449 ifnet_head_lock_group
= lck_grp_alloc_init("ifnet head lock",
1450 dlil_grp_attributes
);
1451 ifnet_rcv_lock_group
= lck_grp_alloc_init("ifnet rcv locks",
1452 dlil_grp_attributes
);
1453 ifnet_snd_lock_group
= lck_grp_alloc_init("ifnet snd locks",
1454 dlil_grp_attributes
);
1456 /* Setup the lock attributes we will use */
1457 dlil_lck_attributes
= lck_attr_alloc_init();
1459 ifnet_lock_attr
= lck_attr_alloc_init();
1461 lck_rw_init(&ifnet_head_lock
, ifnet_head_lock_group
,
1462 dlil_lck_attributes
);
1463 lck_mtx_init(&dlil_ifnet_lock
, dlil_lock_group
, dlil_lck_attributes
);
1465 /* Setup interface flow control related items */
1466 lck_mtx_init(&ifnet_fc_lock
, dlil_lock_group
, dlil_lck_attributes
);
1468 ifnet_fc_zone_size
= sizeof (struct ifnet_fc_entry
);
1469 ifnet_fc_zone
= zinit(ifnet_fc_zone_size
,
1470 IFNET_FC_ZONE_MAX
* ifnet_fc_zone_size
, 0, IFNET_FC_ZONE_NAME
);
1471 if (ifnet_fc_zone
== NULL
) {
1472 panic_plain("%s: failed allocating %s", __func__
,
1473 IFNET_FC_ZONE_NAME
);
1476 zone_change(ifnet_fc_zone
, Z_EXPAND
, TRUE
);
1477 zone_change(ifnet_fc_zone
, Z_CALLERACCT
, FALSE
);
1479 /* Initialize interface address subsystem */
1483 /* Initialize the packet filter */
1487 /* Initialize queue algorithms */
1490 /* Initialize packet schedulers */
1493 /* Initialize flow advisory subsystem */
1496 /* Initialize the pktap virtual interface */
1500 /* Run self-tests */
1501 dlil_verify_sum16();
1505 * Create and start up the main DLIL input thread and the interface
1506 * detacher threads once everything is initialized.
1508 dlil_create_input_thread(NULL
, dlil_main_input_thread
);
1510 if (kernel_thread_start(ifnet_detacher_thread_func
,
1511 NULL
, &thread
) != KERN_SUCCESS
) {
1512 panic_plain("%s: couldn't create detacher thread", __func__
);
1515 thread_deallocate(thread
);
1519 if_flt_monitor_busy(struct ifnet
*ifp
)
1521 lck_mtx_assert(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1524 VERIFY(ifp
->if_flt_busy
!= 0);
1528 if_flt_monitor_unbusy(struct ifnet
*ifp
)
1530 if_flt_monitor_leave(ifp
);
1534 if_flt_monitor_enter(struct ifnet
*ifp
)
1536 lck_mtx_assert(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1538 while (ifp
->if_flt_busy
) {
1539 ++ifp
->if_flt_waiters
;
1540 (void) msleep(&ifp
->if_flt_head
, &ifp
->if_flt_lock
,
1541 (PZERO
- 1), "if_flt_monitor", NULL
);
1543 if_flt_monitor_busy(ifp
);
1547 if_flt_monitor_leave(struct ifnet
*ifp
)
1549 lck_mtx_assert(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1551 VERIFY(ifp
->if_flt_busy
!= 0);
1554 if (ifp
->if_flt_busy
== 0 && ifp
->if_flt_waiters
> 0) {
1555 ifp
->if_flt_waiters
= 0;
1556 wakeup(&ifp
->if_flt_head
);
1560 __private_extern__
int
1561 dlil_attach_filter(struct ifnet
*ifp
, const struct iff_filter
*if_filter
,
1562 interface_filter_t
*filter_ref
, u_int32_t flags
)
1565 struct ifnet_filter
*filter
= NULL
;
1567 ifnet_head_lock_shared();
1568 /* Check that the interface is in the global list */
1569 if (!ifnet_lookup(ifp
)) {
1574 filter
= zalloc(dlif_filt_zone
);
1575 if (filter
== NULL
) {
1579 bzero(filter
, dlif_filt_size
);
1581 /* refcnt held above during lookup */
1582 filter
->filt_flags
= flags
;
1583 filter
->filt_ifp
= ifp
;
1584 filter
->filt_cookie
= if_filter
->iff_cookie
;
1585 filter
->filt_name
= if_filter
->iff_name
;
1586 filter
->filt_protocol
= if_filter
->iff_protocol
;
1587 filter
->filt_input
= if_filter
->iff_input
;
1588 filter
->filt_output
= if_filter
->iff_output
;
1589 filter
->filt_event
= if_filter
->iff_event
;
1590 filter
->filt_ioctl
= if_filter
->iff_ioctl
;
1591 filter
->filt_detached
= if_filter
->iff_detached
;
1593 lck_mtx_lock(&ifp
->if_flt_lock
);
1594 if_flt_monitor_enter(ifp
);
1596 lck_mtx_assert(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1597 TAILQ_INSERT_TAIL(&ifp
->if_flt_head
, filter
, filt_next
);
1599 if_flt_monitor_leave(ifp
);
1600 lck_mtx_unlock(&ifp
->if_flt_lock
);
1602 *filter_ref
= filter
;
1605 * Bump filter count and route_generation ID to let TCP
1606 * know it shouldn't do TSO on this connection
1608 if ((filter
->filt_flags
& DLIL_IFF_TSO
) == 0) {
1609 OSAddAtomic(1, &dlil_filter_disable_tso_count
);
1610 routegenid_update();
1613 printf("%s: %s filter attached\n", if_name(ifp
),
1614 if_filter
->iff_name
);
1618 if (retval
!= 0 && ifp
!= NULL
) {
1619 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1620 if_name(ifp
), if_filter
->iff_name
, retval
);
1622 if (retval
!= 0 && filter
!= NULL
)
1623 zfree(dlif_filt_zone
, filter
);
1629 dlil_detach_filter_internal(interface_filter_t filter
, int detached
)
1633 if (detached
== 0) {
1636 ifnet_head_lock_shared();
1637 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
1638 interface_filter_t entry
= NULL
;
1640 lck_mtx_lock(&ifp
->if_flt_lock
);
1641 TAILQ_FOREACH(entry
, &ifp
->if_flt_head
, filt_next
) {
1642 if (entry
!= filter
|| entry
->filt_skip
)
1645 * We've found a match; since it's possible
1646 * that the thread gets blocked in the monitor,
1647 * we do the lock dance. Interface should
1648 * not be detached since we still have a use
1649 * count held during filter attach.
1651 entry
->filt_skip
= 1; /* skip input/output */
1652 lck_mtx_unlock(&ifp
->if_flt_lock
);
1655 lck_mtx_lock(&ifp
->if_flt_lock
);
1656 if_flt_monitor_enter(ifp
);
1657 lck_mtx_assert(&ifp
->if_flt_lock
,
1658 LCK_MTX_ASSERT_OWNED
);
1660 /* Remove the filter from the list */
1661 TAILQ_REMOVE(&ifp
->if_flt_head
, filter
,
1664 if_flt_monitor_leave(ifp
);
1665 lck_mtx_unlock(&ifp
->if_flt_lock
);
1667 printf("%s: %s filter detached\n",
1668 if_name(ifp
), filter
->filt_name
);
1672 lck_mtx_unlock(&ifp
->if_flt_lock
);
1676 /* filter parameter is not a valid filter ref */
1682 printf("%s filter detached\n", filter
->filt_name
);
1686 /* Call the detached function if there is one */
1687 if (filter
->filt_detached
)
1688 filter
->filt_detached(filter
->filt_cookie
, filter
->filt_ifp
);
1690 /* Free the filter */
1691 zfree(dlif_filt_zone
, filter
);
1694 * Decrease filter count and route_generation ID to let TCP
1695 * know it should reevalute doing TSO or not
1697 if ((filter
->filt_flags
& DLIL_IFF_TSO
) == 0) {
1698 OSAddAtomic(-1, &dlil_filter_disable_tso_count
);
1699 routegenid_update();
1703 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1704 filter
->filt_name
, retval
);
1709 __private_extern__
void
1710 dlil_detach_filter(interface_filter_t filter
)
1714 dlil_detach_filter_internal(filter
, 0);
1718 * Main input thread:
1720 * a) handles all inbound packets for lo0
1721 * b) handles all inbound packets for interfaces with no dedicated
1722 * input thread (e.g. anything but Ethernet/PDP or those that support
1723 * opportunistic polling.)
1724 * c) protocol registrations
1725 * d) packet injections
1728 dlil_main_input_thread_func(void *v
, wait_result_t w
)
1731 struct dlil_main_threading_info
*inpm
= v
;
1732 struct dlil_threading_info
*inp
= v
;
1734 VERIFY(inp
== dlil_main_input_thread
);
1735 VERIFY(inp
->ifp
== NULL
);
1736 VERIFY(inp
->mode
== IFNET_MODEL_INPUT_POLL_OFF
);
1739 struct mbuf
*m
= NULL
, *m_loop
= NULL
;
1740 u_int32_t m_cnt
, m_cnt_loop
;
1741 boolean_t proto_req
;
1743 lck_mtx_lock_spin(&inp
->input_lck
);
1745 /* Wait until there is work to be done */
1746 while (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
1747 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
1748 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
1749 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
1752 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
1753 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
1755 /* Main input thread cannot be terminated */
1756 VERIFY(!(inp
->input_waiting
& DLIL_INPUT_TERMINATE
));
1758 proto_req
= (inp
->input_waiting
&
1759 (DLIL_PROTO_WAITING
| DLIL_PROTO_REGISTER
));
1761 /* Packets for non-dedicated interfaces other than lo0 */
1762 m_cnt
= qlen(&inp
->rcvq_pkts
);
1763 m
= _getq_all(&inp
->rcvq_pkts
);
1765 /* Packets exclusive to lo0 */
1766 m_cnt_loop
= qlen(&inpm
->lo_rcvq_pkts
);
1767 m_loop
= _getq_all(&inpm
->lo_rcvq_pkts
);
1771 lck_mtx_unlock(&inp
->input_lck
);
1774 * NOTE warning %%% attention !!!!
1775 * We should think about putting some thread starvation
1776 * safeguards if we deal with long chains of packets.
1779 dlil_input_packet_list_extended(lo_ifp
, m_loop
,
1780 m_cnt_loop
, inp
->mode
);
1783 dlil_input_packet_list_extended(NULL
, m
,
1791 VERIFY(0); /* we should never get here */
1795 * Input thread for interfaces with legacy input model.
1798 dlil_input_thread_func(void *v
, wait_result_t w
)
1801 struct dlil_threading_info
*inp
= v
;
1802 struct ifnet
*ifp
= inp
->ifp
;
1804 VERIFY(inp
!= dlil_main_input_thread
);
1805 VERIFY(ifp
!= NULL
);
1806 VERIFY(!(ifp
->if_eflags
& IFEF_RXPOLL
) || !net_rxpoll
);
1807 VERIFY(inp
->mode
== IFNET_MODEL_INPUT_POLL_OFF
);
1810 struct mbuf
*m
= NULL
;
1813 lck_mtx_lock_spin(&inp
->input_lck
);
1815 /* Wait until there is work to be done */
1816 while (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
1817 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
1818 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
1819 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
1822 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
1823 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
1826 * Protocol registration and injection must always use
1827 * the main input thread; in theory the latter can utilize
1828 * the corresponding input thread where the packet arrived
1829 * on, but that requires our knowing the interface in advance
1830 * (and the benefits might not worth the trouble.)
1832 VERIFY(!(inp
->input_waiting
&
1833 (DLIL_PROTO_WAITING
|DLIL_PROTO_REGISTER
)));
1835 /* Packets for this interface */
1836 m_cnt
= qlen(&inp
->rcvq_pkts
);
1837 m
= _getq_all(&inp
->rcvq_pkts
);
1839 if (inp
->input_waiting
& DLIL_INPUT_TERMINATE
) {
1840 lck_mtx_unlock(&inp
->input_lck
);
1842 /* Free up pending packets */
1846 dlil_terminate_input_thread(inp
);
1853 dlil_input_stats_sync(ifp
, inp
);
1855 lck_mtx_unlock(&inp
->input_lck
);
1858 * NOTE warning %%% attention !!!!
1859 * We should think about putting some thread starvation
1860 * safeguards if we deal with long chains of packets.
1863 dlil_input_packet_list_extended(NULL
, m
,
1868 VERIFY(0); /* we should never get here */
1872 * Input thread for interfaces with opportunistic polling input model.
1875 dlil_rxpoll_input_thread_func(void *v
, wait_result_t w
)
1878 struct dlil_threading_info
*inp
= v
;
1879 struct ifnet
*ifp
= inp
->ifp
;
1882 VERIFY(inp
!= dlil_main_input_thread
);
1883 VERIFY(ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_RXPOLL
));
1886 struct mbuf
*m
= NULL
;
1887 u_int32_t m_cnt
, m_size
, poll_req
= 0;
1889 struct timespec now
, delta
;
1892 lck_mtx_lock_spin(&inp
->input_lck
);
1894 if ((ival
= inp
->rxpoll_ival
) < IF_RXPOLL_INTERVALTIME_MIN
)
1895 ival
= IF_RXPOLL_INTERVALTIME_MIN
;
1897 /* Link parameters changed? */
1898 if (ifp
->if_poll_update
!= 0) {
1899 ifp
->if_poll_update
= 0;
1900 (void) dlil_rxpoll_set_params(ifp
, NULL
, TRUE
);
1903 /* Current operating mode */
1906 /* Wait until there is work to be done */
1907 while (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
1908 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
1909 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
1910 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
1913 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
1914 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
1917 * Protocol registration and injection must always use
1918 * the main input thread; in theory the latter can utilize
1919 * the corresponding input thread where the packet arrived
1920 * on, but that requires our knowing the interface in advance
1921 * (and the benefits might not worth the trouble.)
1923 VERIFY(!(inp
->input_waiting
&
1924 (DLIL_PROTO_WAITING
|DLIL_PROTO_REGISTER
)));
1926 if (inp
->input_waiting
& DLIL_INPUT_TERMINATE
) {
1927 /* Free up pending packets */
1928 _flushq(&inp
->rcvq_pkts
);
1929 lck_mtx_unlock(&inp
->input_lck
);
1931 dlil_terminate_input_thread(inp
);
1936 /* Total count of all packets */
1937 m_cnt
= qlen(&inp
->rcvq_pkts
);
1939 /* Total bytes of all packets */
1940 m_size
= qsize(&inp
->rcvq_pkts
);
1942 /* Packets for this interface */
1943 m
= _getq_all(&inp
->rcvq_pkts
);
1944 VERIFY(m
!= NULL
|| m_cnt
== 0);
1947 if (!net_timerisset(&inp
->sample_lasttime
))
1948 *(&inp
->sample_lasttime
) = *(&now
);
1950 net_timersub(&now
, &inp
->sample_lasttime
, &delta
);
1951 if (if_rxpoll
&& net_timerisset(&inp
->sample_holdtime
)) {
1952 u_int32_t ptot
, btot
;
1954 /* Accumulate statistics for current sampling */
1955 PKTCNTR_ADD(&inp
->sstats
, m_cnt
, m_size
);
1957 if (net_timercmp(&delta
, &inp
->sample_holdtime
, <))
1960 *(&inp
->sample_lasttime
) = *(&now
);
1962 /* Calculate min/max of inbound bytes */
1963 btot
= (u_int32_t
)inp
->sstats
.bytes
;
1964 if (inp
->rxpoll_bmin
== 0 || inp
->rxpoll_bmin
> btot
)
1965 inp
->rxpoll_bmin
= btot
;
1966 if (btot
> inp
->rxpoll_bmax
)
1967 inp
->rxpoll_bmax
= btot
;
1969 /* Calculate EWMA of inbound bytes */
1970 DLIL_EWMA(inp
->rxpoll_bavg
, btot
, if_rxpoll_decay
);
1972 /* Calculate min/max of inbound packets */
1973 ptot
= (u_int32_t
)inp
->sstats
.packets
;
1974 if (inp
->rxpoll_pmin
== 0 || inp
->rxpoll_pmin
> ptot
)
1975 inp
->rxpoll_pmin
= ptot
;
1976 if (ptot
> inp
->rxpoll_pmax
)
1977 inp
->rxpoll_pmax
= ptot
;
1979 /* Calculate EWMA of inbound packets */
1980 DLIL_EWMA(inp
->rxpoll_pavg
, ptot
, if_rxpoll_decay
);
1982 /* Reset sampling statistics */
1983 PKTCNTR_CLEAR(&inp
->sstats
);
1985 /* Calculate EWMA of wakeup requests */
1986 DLIL_EWMA(inp
->rxpoll_wavg
, inp
->wtot
, if_rxpoll_decay
);
1990 if (!net_timerisset(&inp
->dbg_lasttime
))
1991 *(&inp
->dbg_lasttime
) = *(&now
);
1992 net_timersub(&now
, &inp
->dbg_lasttime
, &delta
);
1993 if (net_timercmp(&delta
, &dlil_dbgrate
, >=)) {
1994 *(&inp
->dbg_lasttime
) = *(&now
);
1995 printf("%s: [%s] pkts avg %d max %d "
1996 "limits [%d/%d], wreq avg %d "
1997 "limits [%d/%d], bytes avg %d "
1998 "limits [%d/%d]\n", if_name(ifp
),
2000 IFNET_MODEL_INPUT_POLL_ON
) ?
2001 "ON" : "OFF", inp
->rxpoll_pavg
,
2010 inp
->rxpoll_bhiwat
);
2014 /* Perform mode transition, if necessary */
2015 if (!net_timerisset(&inp
->mode_lasttime
))
2016 *(&inp
->mode_lasttime
) = *(&now
);
2018 net_timersub(&now
, &inp
->mode_lasttime
, &delta
);
2019 if (net_timercmp(&delta
, &inp
->mode_holdtime
, <))
2022 if (inp
->rxpoll_pavg
<= inp
->rxpoll_plowat
&&
2023 inp
->rxpoll_bavg
<= inp
->rxpoll_blowat
&&
2024 inp
->mode
!= IFNET_MODEL_INPUT_POLL_OFF
) {
2025 mode
= IFNET_MODEL_INPUT_POLL_OFF
;
2026 } else if (inp
->rxpoll_pavg
>= inp
->rxpoll_phiwat
&&
2027 (inp
->rxpoll_bavg
>= inp
->rxpoll_bhiwat
||
2028 inp
->rxpoll_wavg
>= inp
->rxpoll_whiwat
) &&
2029 inp
->mode
!= IFNET_MODEL_INPUT_POLL_ON
) {
2030 mode
= IFNET_MODEL_INPUT_POLL_ON
;
2033 if (mode
!= inp
->mode
) {
2035 *(&inp
->mode_lasttime
) = *(&now
);
2040 dlil_input_stats_sync(ifp
, inp
);
2042 lck_mtx_unlock(&inp
->input_lck
);
2045 * If there's a mode change and interface is still attached,
2046 * perform a downcall to the driver for the new mode. Also
2047 * hold an IO refcnt on the interface to prevent it from
2048 * being detached (will be release below.)
2050 if (poll_req
!= 0 && ifnet_is_attached(ifp
, 1)) {
2051 struct ifnet_model_params p
= { mode
, { 0 } };
2055 printf("%s: polling is now %s, "
2056 "pkts avg %d max %d limits [%d/%d], "
2057 "wreq avg %d limits [%d/%d], "
2058 "bytes avg %d limits [%d/%d]\n",
2060 (mode
== IFNET_MODEL_INPUT_POLL_ON
) ?
2061 "ON" : "OFF", inp
->rxpoll_pavg
,
2062 inp
->rxpoll_pmax
, inp
->rxpoll_plowat
,
2063 inp
->rxpoll_phiwat
, inp
->rxpoll_wavg
,
2064 inp
->rxpoll_wlowat
, inp
->rxpoll_whiwat
,
2065 inp
->rxpoll_bavg
, inp
->rxpoll_blowat
,
2066 inp
->rxpoll_bhiwat
);
2069 if ((err
= ((*ifp
->if_input_ctl
)(ifp
,
2070 IFNET_CTL_SET_INPUT_MODEL
, sizeof (p
), &p
))) != 0) {
2071 printf("%s: error setting polling mode "
2072 "to %s (%d)\n", if_name(ifp
),
2073 (mode
== IFNET_MODEL_INPUT_POLL_ON
) ?
2078 case IFNET_MODEL_INPUT_POLL_OFF
:
2079 ifnet_set_poll_cycle(ifp
, NULL
);
2080 inp
->rxpoll_offreq
++;
2082 inp
->rxpoll_offerr
++;
2085 case IFNET_MODEL_INPUT_POLL_ON
:
2086 net_nsectimer(&ival
, &ts
);
2087 ifnet_set_poll_cycle(ifp
, &ts
);
2089 inp
->rxpoll_onreq
++;
2091 inp
->rxpoll_onerr
++;
2099 /* Release the IO refcnt */
2100 ifnet_decr_iorefcnt(ifp
);
2104 * NOTE warning %%% attention !!!!
2105 * We should think about putting some thread starvation
2106 * safeguards if we deal with long chains of packets.
2109 dlil_input_packet_list_extended(NULL
, m
, m_cnt
, mode
);
2113 VERIFY(0); /* we should never get here */
2117 * Must be called on an attached ifnet (caller is expected to check.)
2118 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2121 dlil_rxpoll_set_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
,
2124 struct dlil_threading_info
*inp
;
2125 u_int64_t sample_holdtime
, inbw
;
2127 VERIFY(ifp
!= NULL
);
2128 if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || (inp
= ifp
->if_inp
) == NULL
)
2132 if ((p
->packets_lowat
== 0 && p
->packets_hiwat
!= 0) ||
2133 (p
->packets_lowat
!= 0 && p
->packets_hiwat
== 0))
2135 if (p
->packets_lowat
!= 0 && /* hiwat must be non-zero */
2136 p
->packets_lowat
>= p
->packets_hiwat
)
2138 if ((p
->bytes_lowat
== 0 && p
->bytes_hiwat
!= 0) ||
2139 (p
->bytes_lowat
!= 0 && p
->bytes_hiwat
== 0))
2141 if (p
->bytes_lowat
!= 0 && /* hiwat must be non-zero */
2142 p
->bytes_lowat
>= p
->bytes_hiwat
)
2144 if (p
->interval_time
!= 0 &&
2145 p
->interval_time
< IF_RXPOLL_INTERVALTIME_MIN
)
2146 p
->interval_time
= IF_RXPOLL_INTERVALTIME_MIN
;
2150 lck_mtx_lock(&inp
->input_lck
);
2152 lck_mtx_assert(&inp
->input_lck
, LCK_MTX_ASSERT_OWNED
);
2155 * Normally, we'd reset the parameters to the auto-tuned values
2156 * if the the input thread detects a change in link rate. If the
2157 * driver provides its own parameters right after a link rate
2158 * changes, but before the input thread gets to run, we want to
2159 * make sure to keep the driver's values. Clearing if_poll_update
2160 * will achieve that.
2162 if (p
!= NULL
&& !locked
&& ifp
->if_poll_update
!= 0)
2163 ifp
->if_poll_update
= 0;
2165 if ((inbw
= ifnet_input_linkrate(ifp
)) == 0 && p
== NULL
) {
2166 sample_holdtime
= 0; /* polling is disabled */
2167 inp
->rxpoll_wlowat
= inp
->rxpoll_plowat
=
2168 inp
->rxpoll_blowat
= 0;
2169 inp
->rxpoll_whiwat
= inp
->rxpoll_phiwat
=
2170 inp
->rxpoll_bhiwat
= (u_int32_t
)-1;
2171 inp
->rxpoll_plim
= 0;
2172 inp
->rxpoll_ival
= IF_RXPOLL_INTERVALTIME_MIN
;
2174 u_int32_t plowat
, phiwat
, blowat
, bhiwat
, plim
;
2178 for (n
= 0, i
= 0; rxpoll_tbl
[i
].speed
!= 0; i
++) {
2179 if (inbw
< rxpoll_tbl
[i
].speed
)
2183 /* auto-tune if caller didn't specify a value */
2184 plowat
= ((p
== NULL
|| p
->packets_lowat
== 0) ?
2185 rxpoll_tbl
[n
].plowat
: p
->packets_lowat
);
2186 phiwat
= ((p
== NULL
|| p
->packets_hiwat
== 0) ?
2187 rxpoll_tbl
[n
].phiwat
: p
->packets_hiwat
);
2188 blowat
= ((p
== NULL
|| p
->bytes_lowat
== 0) ?
2189 rxpoll_tbl
[n
].blowat
: p
->bytes_lowat
);
2190 bhiwat
= ((p
== NULL
|| p
->bytes_hiwat
== 0) ?
2191 rxpoll_tbl
[n
].bhiwat
: p
->bytes_hiwat
);
2192 plim
= ((p
== NULL
|| p
->packets_limit
== 0) ?
2193 if_rxpoll_max
: p
->packets_limit
);
2194 ival
= ((p
== NULL
|| p
->interval_time
== 0) ?
2195 if_rxpoll_interval_time
: p
->interval_time
);
2197 VERIFY(plowat
!= 0 && phiwat
!= 0);
2198 VERIFY(blowat
!= 0 && bhiwat
!= 0);
2199 VERIFY(ival
>= IF_RXPOLL_INTERVALTIME_MIN
);
2201 sample_holdtime
= if_rxpoll_sample_holdtime
;
2202 inp
->rxpoll_wlowat
= if_rxpoll_wlowat
;
2203 inp
->rxpoll_whiwat
= if_rxpoll_whiwat
;
2204 inp
->rxpoll_plowat
= plowat
;
2205 inp
->rxpoll_phiwat
= phiwat
;
2206 inp
->rxpoll_blowat
= blowat
;
2207 inp
->rxpoll_bhiwat
= bhiwat
;
2208 inp
->rxpoll_plim
= plim
;
2209 inp
->rxpoll_ival
= ival
;
2212 net_nsectimer(&if_rxpoll_mode_holdtime
, &inp
->mode_holdtime
);
2213 net_nsectimer(&sample_holdtime
, &inp
->sample_holdtime
);
2216 printf("%s: speed %llu bps, sample per %llu nsec, "
2217 "poll interval %llu nsec, pkts per poll %u, "
2218 "pkt limits [%u/%u], wreq limits [%u/%u], "
2219 "bytes limits [%u/%u]\n", if_name(ifp
),
2220 inbw
, sample_holdtime
, inp
->rxpoll_ival
, inp
->rxpoll_plim
,
2221 inp
->rxpoll_plowat
, inp
->rxpoll_phiwat
, inp
->rxpoll_wlowat
,
2222 inp
->rxpoll_whiwat
, inp
->rxpoll_blowat
, inp
->rxpoll_bhiwat
);
2226 lck_mtx_unlock(&inp
->input_lck
);
2232 * Must be called on an attached ifnet (caller is expected to check.)
2235 dlil_rxpoll_get_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
)
2237 struct dlil_threading_info
*inp
;
2239 VERIFY(ifp
!= NULL
&& p
!= NULL
);
2240 if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || (inp
= ifp
->if_inp
) == NULL
)
2243 bzero(p
, sizeof (*p
));
2245 lck_mtx_lock(&inp
->input_lck
);
2246 p
->packets_limit
= inp
->rxpoll_plim
;
2247 p
->packets_lowat
= inp
->rxpoll_plowat
;
2248 p
->packets_hiwat
= inp
->rxpoll_phiwat
;
2249 p
->bytes_lowat
= inp
->rxpoll_blowat
;
2250 p
->bytes_hiwat
= inp
->rxpoll_bhiwat
;
2251 p
->interval_time
= inp
->rxpoll_ival
;
2252 lck_mtx_unlock(&inp
->input_lck
);
2258 ifnet_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
2259 const struct ifnet_stat_increment_param
*s
)
2261 return (ifnet_input_common(ifp
, m_head
, NULL
, s
, FALSE
, FALSE
));
2265 ifnet_input_extended(struct ifnet
*ifp
, struct mbuf
*m_head
,
2266 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
)
2268 return (ifnet_input_common(ifp
, m_head
, m_tail
, s
, TRUE
, FALSE
));
2272 ifnet_input_common(struct ifnet
*ifp
, struct mbuf
*m_head
, struct mbuf
*m_tail
,
2273 const struct ifnet_stat_increment_param
*s
, boolean_t ext
, boolean_t poll
)
2275 struct thread
*tp
= current_thread();
2277 struct dlil_threading_info
*inp
;
2278 u_int32_t m_cnt
= 0, m_size
= 0;
2280 if ((m_head
== NULL
&& !poll
) || (s
== NULL
&& ext
)) {
2282 mbuf_freem_list(m_head
);
2286 VERIFY(m_head
!= NULL
|| (s
== NULL
&& m_tail
== NULL
&& !ext
&& poll
));
2287 VERIFY(m_tail
== NULL
|| ext
);
2288 VERIFY(s
!= NULL
|| !ext
);
2291 * Drop the packet(s) if the parameters are invalid, or if the
2292 * interface is no longer attached; else hold an IO refcnt to
2293 * prevent it from being detached (will be released below.)
2295 if (ifp
== NULL
|| (ifp
!= lo_ifp
&& !ifnet_is_attached(ifp
, 1))) {
2297 mbuf_freem_list(m_head
);
2301 if (m_tail
== NULL
) {
2303 while (m_head
!= NULL
) {
2304 #if IFNET_INPUT_SANITY_CHK
2305 if (dlil_input_sanity_check
!= 0)
2306 DLIL_INPUT_CHECK(last
, ifp
);
2307 #endif /* IFNET_INPUT_SANITY_CHK */
2309 m_size
+= m_length(last
);
2310 if (mbuf_nextpkt(last
) == NULL
)
2312 last
= mbuf_nextpkt(last
);
2316 #if IFNET_INPUT_SANITY_CHK
2317 if (dlil_input_sanity_check
!= 0) {
2320 DLIL_INPUT_CHECK(last
, ifp
);
2322 m_size
+= m_length(last
);
2323 if (mbuf_nextpkt(last
) == NULL
)
2325 last
= mbuf_nextpkt(last
);
2328 m_cnt
= s
->packets_in
;
2329 m_size
= s
->bytes_in
;
2333 m_cnt
= s
->packets_in
;
2334 m_size
= s
->bytes_in
;
2336 #endif /* IFNET_INPUT_SANITY_CHK */
2339 if (last
!= m_tail
) {
2340 panic_plain("%s: invalid input packet chain for %s, "
2341 "tail mbuf %p instead of %p\n", __func__
, if_name(ifp
),
2346 * Assert packet count only for the extended variant, for backwards
2347 * compatibility, since this came directly from the device driver.
2348 * Relax this assertion for input bytes, as the driver may have
2349 * included the link-layer headers in the computation; hence
2350 * m_size is just an approximation.
2352 if (ext
&& s
->packets_in
!= m_cnt
) {
2353 panic_plain("%s: input packet count mismatch for %s, "
2354 "%d instead of %d\n", __func__
, if_name(ifp
),
2355 s
->packets_in
, m_cnt
);
2358 if ((inp
= ifp
->if_inp
) == NULL
)
2359 inp
= dlil_main_input_thread
;
2362 * If there is a matching DLIL input thread associated with an
2363 * affinity set, associate this thread with the same set. We
2364 * will only do this once.
2366 lck_mtx_lock_spin(&inp
->input_lck
);
2367 if (inp
!= dlil_main_input_thread
&& inp
->net_affinity
&&
2368 ((!poll
&& inp
->wloop_thr
== THREAD_NULL
) ||
2369 (poll
&& inp
->poll_thr
== THREAD_NULL
))) {
2370 u_int32_t tag
= inp
->tag
;
2373 VERIFY(inp
->poll_thr
== THREAD_NULL
);
2376 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
2377 inp
->wloop_thr
= tp
;
2379 lck_mtx_unlock(&inp
->input_lck
);
2381 /* Associate the current thread with the new affinity tag */
2382 (void) dlil_affinity_set(tp
, tag
);
2385 * Take a reference on the current thread; during detach,
2386 * we will need to refer to it in order ot tear down its
2389 thread_reference(tp
);
2390 lck_mtx_lock_spin(&inp
->input_lck
);
2393 VERIFY(m_head
!= NULL
|| (m_tail
== NULL
&& m_cnt
== 0));
2396 * Because of loopbacked multicast we cannot stuff the ifp in
2397 * the rcvif of the packet header: loopback (lo0) packets use a
2398 * dedicated list so that we can later associate them with lo_ifp
2399 * on their way up the stack. Packets for other interfaces without
2400 * dedicated input threads go to the regular list.
2402 if (m_head
!= NULL
) {
2403 if (inp
== dlil_main_input_thread
&& ifp
== lo_ifp
) {
2404 struct dlil_main_threading_info
*inpm
=
2405 (struct dlil_main_threading_info
*)inp
;
2406 _addq_multi(&inpm
->lo_rcvq_pkts
, m_head
, m_tail
,
2409 _addq_multi(&inp
->rcvq_pkts
, m_head
, m_tail
,
2414 #if IFNET_INPUT_SANITY_CHK
2415 if (dlil_input_sanity_check
!= 0) {
2419 for (m0
= m_head
, count
= 0; m0
; m0
= mbuf_nextpkt(m0
))
2422 if (count
!= m_cnt
) {
2423 panic_plain("%s: invalid packet count %d "
2424 "(expected %d)\n", if_name(ifp
),
2429 inp
->input_mbuf_cnt
+= m_cnt
;
2431 #endif /* IFNET_INPUT_SANITY_CHK */
2434 dlil_input_stats_add(s
, inp
, poll
);
2436 * If we're using the main input thread, synchronize the
2437 * stats now since we have the interface context. All
2438 * other cases involving dedicated input threads will
2439 * have their stats synchronized there.
2441 if (inp
== dlil_main_input_thread
)
2442 dlil_input_stats_sync(ifp
, inp
);
2445 inp
->input_waiting
|= DLIL_INPUT_WAITING
;
2446 if (!(inp
->input_waiting
& DLIL_INPUT_RUNNING
)) {
2448 wakeup_one((caddr_t
)&inp
->input_waiting
);
2450 lck_mtx_unlock(&inp
->input_lck
);
2452 if (ifp
!= lo_ifp
) {
2453 /* Release the IO refcnt */
2454 ifnet_decr_iorefcnt(ifp
);
2461 ifnet_start_common(struct ifnet
*ifp
, int resetfc
)
2463 if (!(ifp
->if_eflags
& IFEF_TXSTART
))
2466 * If the starter thread is inactive, signal it to do work,
2467 * unless the interface is being flow controlled from below,
2468 * e.g. a virtual interface being flow controlled by a real
2469 * network interface beneath it.
2471 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2473 ifp
->if_start_flags
&= ~IFSF_FLOW_CONTROLLED
;
2474 } else if (ifp
->if_start_flags
& IFSF_FLOW_CONTROLLED
) {
2475 lck_mtx_unlock(&ifp
->if_start_lock
);
2478 ifp
->if_start_req
++;
2479 if (!ifp
->if_start_active
&& ifp
->if_start_thread
!= THREAD_NULL
) {
2480 wakeup_one((caddr_t
)&ifp
->if_start_thread
);
2482 lck_mtx_unlock(&ifp
->if_start_lock
);
2486 ifnet_start(struct ifnet
*ifp
)
2488 ifnet_start_common(ifp
, 0);
2492 ifnet_start_thread_fn(void *v
, wait_result_t w
)
2495 struct ifnet
*ifp
= v
;
2496 char ifname
[IFNAMSIZ
+ 1];
2497 struct timespec
*ts
= NULL
;
2498 struct ifclassq
*ifq
= &ifp
->if_snd
;
2501 * Treat the dedicated starter thread for lo0 as equivalent to
2502 * the driver workloop thread; if net_affinity is enabled for
2503 * the main input thread, associate this starter thread to it
2504 * by binding them with the same affinity tag. This is done
2505 * only once (as we only have one lo_ifp which never goes away.)
2507 if (ifp
== lo_ifp
) {
2508 struct dlil_threading_info
*inp
= dlil_main_input_thread
;
2509 struct thread
*tp
= current_thread();
2511 lck_mtx_lock(&inp
->input_lck
);
2512 if (inp
->net_affinity
) {
2513 u_int32_t tag
= inp
->tag
;
2515 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
2516 VERIFY(inp
->poll_thr
== THREAD_NULL
);
2517 inp
->wloop_thr
= tp
;
2518 lck_mtx_unlock(&inp
->input_lck
);
2520 /* Associate this thread with the affinity tag */
2521 (void) dlil_affinity_set(tp
, tag
);
2523 lck_mtx_unlock(&inp
->input_lck
);
2527 snprintf(ifname
, sizeof (ifname
), "%s_starter",
2530 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2533 (void) msleep(&ifp
->if_start_thread
, &ifp
->if_start_lock
,
2534 (PZERO
- 1) | PSPIN
, ifname
, ts
);
2536 /* interface is detached? */
2537 if (ifp
->if_start_thread
== THREAD_NULL
) {
2538 ifnet_set_start_cycle(ifp
, NULL
);
2539 lck_mtx_unlock(&ifp
->if_start_lock
);
2543 printf("%s: starter thread terminated\n",
2547 /* for the extra refcnt from kernel_thread_start() */
2548 thread_deallocate(current_thread());
2549 /* this is the end */
2550 thread_terminate(current_thread());
2555 ifp
->if_start_active
= 1;
2557 u_int32_t req
= ifp
->if_start_req
;
2559 lck_mtx_unlock(&ifp
->if_start_lock
);
2560 /* invoke the driver's start routine */
2561 ((*ifp
->if_start
)(ifp
));
2562 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2564 /* if there's no pending request, we're done */
2565 if (req
== ifp
->if_start_req
)
2568 ifp
->if_start_req
= 0;
2569 ifp
->if_start_active
= 0;
2571 * Wakeup N ns from now if rate-controlled by TBR, and if
2572 * there are still packets in the send queue which haven't
2573 * been dequeued so far; else sleep indefinitely (ts = NULL)
2574 * until ifnet_start() is called again.
2576 ts
= ((IFCQ_TBR_IS_ENABLED(ifq
) && !IFCQ_IS_EMPTY(ifq
)) ?
2577 &ifp
->if_start_cycle
: NULL
);
2579 if (ts
!= NULL
&& ts
->tv_sec
== 0 && ts
->tv_nsec
== 0)
2587 ifnet_set_start_cycle(struct ifnet
*ifp
, struct timespec
*ts
)
2590 bzero(&ifp
->if_start_cycle
, sizeof (ifp
->if_start_cycle
));
2592 *(&ifp
->if_start_cycle
) = *ts
;
2594 if (ts
!= NULL
&& ts
->tv_nsec
!= 0 && dlil_verbose
)
2595 printf("%s: restart interval set to %lu nsec\n",
2596 if_name(ifp
), ts
->tv_nsec
);
2600 ifnet_poll(struct ifnet
*ifp
)
2603 * If the poller thread is inactive, signal it to do work.
2605 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
2607 if (!ifp
->if_poll_active
&& ifp
->if_poll_thread
!= THREAD_NULL
) {
2608 wakeup_one((caddr_t
)&ifp
->if_poll_thread
);
2610 lck_mtx_unlock(&ifp
->if_poll_lock
);
2614 ifnet_poll_thread_fn(void *v
, wait_result_t w
)
2617 struct dlil_threading_info
*inp
;
2618 struct ifnet
*ifp
= v
;
2619 char ifname
[IFNAMSIZ
+ 1];
2620 struct timespec
*ts
= NULL
;
2621 struct ifnet_stat_increment_param s
;
2623 snprintf(ifname
, sizeof (ifname
), "%s_poller",
2625 bzero(&s
, sizeof (s
));
2627 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
2630 VERIFY(inp
!= NULL
);
2633 if (ifp
->if_poll_thread
!= THREAD_NULL
) {
2634 (void) msleep(&ifp
->if_poll_thread
, &ifp
->if_poll_lock
,
2635 (PZERO
- 1) | PSPIN
, ifname
, ts
);
2638 /* interface is detached (maybe while asleep)? */
2639 if (ifp
->if_poll_thread
== THREAD_NULL
) {
2640 ifnet_set_poll_cycle(ifp
, NULL
);
2641 lck_mtx_unlock(&ifp
->if_poll_lock
);
2644 printf("%s: poller thread terminated\n",
2648 /* for the extra refcnt from kernel_thread_start() */
2649 thread_deallocate(current_thread());
2650 /* this is the end */
2651 thread_terminate(current_thread());
2656 ifp
->if_poll_active
= 1;
2658 struct mbuf
*m_head
, *m_tail
;
2659 u_int32_t m_lim
, m_cnt
, m_totlen
;
2660 u_int16_t req
= ifp
->if_poll_req
;
2662 lck_mtx_unlock(&ifp
->if_poll_lock
);
2665 * If no longer attached, there's nothing to do;
2666 * else hold an IO refcnt to prevent the interface
2667 * from being detached (will be released below.)
2669 if (!ifnet_is_attached(ifp
, 1)) {
2670 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
2674 m_lim
= (inp
->rxpoll_plim
!= 0) ? inp
->rxpoll_plim
:
2675 MAX((qlimit(&inp
->rcvq_pkts
)),
2676 (inp
->rxpoll_phiwat
<< 2));
2678 if (dlil_verbose
> 1) {
2679 printf("%s: polling up to %d pkts, "
2680 "pkts avg %d max %d, wreq avg %d, "
2682 if_name(ifp
), m_lim
,
2683 inp
->rxpoll_pavg
, inp
->rxpoll_pmax
,
2684 inp
->rxpoll_wavg
, inp
->rxpoll_bavg
);
2687 /* invoke the driver's input poll routine */
2688 ((*ifp
->if_input_poll
)(ifp
, 0, m_lim
, &m_head
, &m_tail
,
2689 &m_cnt
, &m_totlen
));
2691 if (m_head
!= NULL
) {
2692 VERIFY(m_tail
!= NULL
&& m_cnt
> 0);
2694 if (dlil_verbose
> 1) {
2695 printf("%s: polled %d pkts, "
2696 "pkts avg %d max %d, wreq avg %d, "
2698 if_name(ifp
), m_cnt
,
2699 inp
->rxpoll_pavg
, inp
->rxpoll_pmax
,
2700 inp
->rxpoll_wavg
, inp
->rxpoll_bavg
);
2703 /* stats are required for extended variant */
2704 s
.packets_in
= m_cnt
;
2705 s
.bytes_in
= m_totlen
;
2707 (void) ifnet_input_common(ifp
, m_head
, m_tail
,
2710 if (dlil_verbose
> 1) {
2711 printf("%s: no packets, "
2712 "pkts avg %d max %d, wreq avg %d, "
2714 if_name(ifp
), inp
->rxpoll_pavg
,
2715 inp
->rxpoll_pmax
, inp
->rxpoll_wavg
,
2719 (void) ifnet_input_common(ifp
, NULL
, NULL
,
2723 /* Release the io ref count */
2724 ifnet_decr_iorefcnt(ifp
);
2726 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
2728 /* if there's no pending request, we're done */
2729 if (req
== ifp
->if_poll_req
)
2732 ifp
->if_poll_req
= 0;
2733 ifp
->if_poll_active
= 0;
2736 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
2737 * until ifnet_poll() is called again.
2739 ts
= &ifp
->if_poll_cycle
;
2740 if (ts
->tv_sec
== 0 && ts
->tv_nsec
== 0)
2748 ifnet_set_poll_cycle(struct ifnet
*ifp
, struct timespec
*ts
)
2751 bzero(&ifp
->if_poll_cycle
, sizeof (ifp
->if_poll_cycle
));
2753 *(&ifp
->if_poll_cycle
) = *ts
;
2755 if (ts
!= NULL
&& ts
->tv_nsec
!= 0 && dlil_verbose
)
2756 printf("%s: poll interval set to %lu nsec\n",
2757 if_name(ifp
), ts
->tv_nsec
);
2761 ifnet_purge(struct ifnet
*ifp
)
2763 if (ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_TXSTART
))
2768 ifnet_update_sndq(struct ifclassq
*ifq
, cqev_t ev
)
2770 IFCQ_LOCK_ASSERT_HELD(ifq
);
2772 if (!(IFCQ_IS_READY(ifq
)))
2775 if (IFCQ_TBR_IS_ENABLED(ifq
)) {
2776 struct tb_profile tb
= { ifq
->ifcq_tbr
.tbr_rate_raw
,
2777 ifq
->ifcq_tbr
.tbr_percent
, 0 };
2778 (void) ifclassq_tbr_set(ifq
, &tb
, FALSE
);
2781 ifclassq_update(ifq
, ev
);
2785 ifnet_update_rcv(struct ifnet
*ifp
, cqev_t ev
)
2788 case CLASSQ_EV_LINK_BANDWIDTH
:
2789 if (net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
))
2790 ifp
->if_poll_update
++;
2799 ifnet_set_output_sched_model(struct ifnet
*ifp
, u_int32_t model
)
2801 struct ifclassq
*ifq
;
2805 if (ifp
== NULL
|| (model
!= IFNET_SCHED_MODEL_DRIVER_MANAGED
&&
2806 model
!= IFNET_SCHED_MODEL_NORMAL
))
2808 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
2813 omodel
= ifp
->if_output_sched_model
;
2814 ifp
->if_output_sched_model
= model
;
2815 if ((err
= ifclassq_pktsched_setup(ifq
)) != 0)
2816 ifp
->if_output_sched_model
= omodel
;
2823 ifnet_set_sndq_maxlen(struct ifnet
*ifp
, u_int32_t maxqlen
)
2827 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
2830 ifclassq_set_maxlen(&ifp
->if_snd
, maxqlen
);
2836 ifnet_get_sndq_maxlen(struct ifnet
*ifp
, u_int32_t
*maxqlen
)
2838 if (ifp
== NULL
|| maxqlen
== NULL
)
2840 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
2843 *maxqlen
= ifclassq_get_maxlen(&ifp
->if_snd
);
2849 ifnet_get_sndq_len(struct ifnet
*ifp
, u_int32_t
*pkts
)
2853 if (ifp
== NULL
|| pkts
== NULL
)
2855 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
2858 err
= ifclassq_get_len(&ifp
->if_snd
, MBUF_SC_UNSPEC
,
2865 ifnet_get_service_class_sndq_len(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
2866 u_int32_t
*pkts
, u_int32_t
*bytes
)
2870 if (ifp
== NULL
|| !MBUF_VALID_SC(sc
) ||
2871 (pkts
== NULL
&& bytes
== NULL
))
2873 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
2876 err
= ifclassq_get_len(&ifp
->if_snd
, sc
, pkts
, bytes
);
2882 ifnet_set_rcvq_maxlen(struct ifnet
*ifp
, u_int32_t maxqlen
)
2884 struct dlil_threading_info
*inp
;
2888 else if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || ifp
->if_inp
== NULL
)
2892 maxqlen
= if_rcvq_maxlen
;
2893 else if (maxqlen
< IF_RCVQ_MINLEN
)
2894 maxqlen
= IF_RCVQ_MINLEN
;
2897 lck_mtx_lock(&inp
->input_lck
);
2898 qlimit(&inp
->rcvq_pkts
) = maxqlen
;
2899 lck_mtx_unlock(&inp
->input_lck
);
2905 ifnet_get_rcvq_maxlen(struct ifnet
*ifp
, u_int32_t
*maxqlen
)
2907 struct dlil_threading_info
*inp
;
2909 if (ifp
== NULL
|| maxqlen
== NULL
)
2911 else if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || ifp
->if_inp
== NULL
)
2915 lck_mtx_lock(&inp
->input_lck
);
2916 *maxqlen
= qlimit(&inp
->rcvq_pkts
);
2917 lck_mtx_unlock(&inp
->input_lck
);
2922 ifnet_enqueue(struct ifnet
*ifp
, struct mbuf
*m
)
2926 if (ifp
== NULL
|| m
== NULL
|| !(m
->m_flags
& M_PKTHDR
) ||
2927 m
->m_nextpkt
!= NULL
) {
2931 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
2932 !(ifp
->if_refflags
& IFRF_ATTACHED
)) {
2933 /* flag tested without lock for performance */
2936 } else if (!(ifp
->if_flags
& IFF_UP
)) {
2941 /* enqueue the packet */
2942 error
= ifclassq_enqueue(&ifp
->if_snd
, m
);
2945 * Tell the driver to start dequeueing; do this even when the queue
2946 * for the packet is suspended (EQSUSPENDED), as the driver could still
2947 * be dequeueing from other unsuspended queues.
2949 if (error
== 0 || error
== EQFULL
|| error
== EQSUSPENDED
)
2956 ifnet_dequeue(struct ifnet
*ifp
, struct mbuf
**mp
)
2959 if (ifp
== NULL
|| mp
== NULL
)
2961 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
2962 (ifp
->if_output_sched_model
!= IFNET_SCHED_MODEL_NORMAL
))
2964 if (!ifnet_is_attached(ifp
, 1))
2966 rc
= ifclassq_dequeue(&ifp
->if_snd
, 1, mp
, NULL
, NULL
, NULL
);
2967 ifnet_decr_iorefcnt(ifp
);
2973 ifnet_dequeue_service_class(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
2977 if (ifp
== NULL
|| mp
== NULL
|| !MBUF_VALID_SC(sc
))
2979 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
2980 (ifp
->if_output_sched_model
!= IFNET_SCHED_MODEL_DRIVER_MANAGED
))
2982 if (!ifnet_is_attached(ifp
, 1))
2985 rc
= ifclassq_dequeue_sc(&ifp
->if_snd
, sc
, 1, mp
, NULL
, NULL
, NULL
);
2986 ifnet_decr_iorefcnt(ifp
);
2991 ifnet_dequeue_multi(struct ifnet
*ifp
, u_int32_t limit
, struct mbuf
**head
,
2992 struct mbuf
**tail
, u_int32_t
*cnt
, u_int32_t
*len
)
2995 if (ifp
== NULL
|| head
== NULL
|| limit
< 1)
2997 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
2998 (ifp
->if_output_sched_model
!= IFNET_SCHED_MODEL_NORMAL
))
3000 if (!ifnet_is_attached(ifp
, 1))
3003 rc
= ifclassq_dequeue(&ifp
->if_snd
, limit
, head
, tail
, cnt
, len
);
3004 ifnet_decr_iorefcnt(ifp
);
3009 ifnet_dequeue_service_class_multi(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3010 u_int32_t limit
, struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
,
3014 if (ifp
== NULL
|| head
== NULL
|| limit
< 1 || !MBUF_VALID_SC(sc
))
3016 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3017 (ifp
->if_output_sched_model
!= IFNET_SCHED_MODEL_DRIVER_MANAGED
))
3019 if (!ifnet_is_attached(ifp
, 1))
3021 rc
= ifclassq_dequeue_sc(&ifp
->if_snd
, sc
, limit
, head
,
3023 ifnet_decr_iorefcnt(ifp
);
3028 ifnet_framer_stub(struct ifnet
*ifp
, struct mbuf
**m
,
3029 const struct sockaddr
*dest
, const char *dest_linkaddr
,
3030 const char *frame_type
, u_int32_t
*pre
, u_int32_t
*post
)
3037 return (ifp
->if_framer_legacy(ifp
, m
, dest
, dest_linkaddr
, frame_type
));
3041 dlil_interface_filters_input(struct ifnet
*ifp
, struct mbuf
**m_p
,
3042 char **frame_header_p
, protocol_family_t protocol_family
)
3044 struct ifnet_filter
*filter
;
3047 * Pass the inbound packet to the interface filters
3049 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3050 /* prevent filter list from changing in case we drop the lock */
3051 if_flt_monitor_busy(ifp
);
3052 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
3055 if (!filter
->filt_skip
&& filter
->filt_input
!= NULL
&&
3056 (filter
->filt_protocol
== 0 ||
3057 filter
->filt_protocol
== protocol_family
)) {
3058 lck_mtx_unlock(&ifp
->if_flt_lock
);
3060 result
= (*filter
->filt_input
)(filter
->filt_cookie
,
3061 ifp
, protocol_family
, m_p
, frame_header_p
);
3063 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3065 /* we're done with the filter list */
3066 if_flt_monitor_unbusy(ifp
);
3067 lck_mtx_unlock(&ifp
->if_flt_lock
);
3072 /* we're done with the filter list */
3073 if_flt_monitor_unbusy(ifp
);
3074 lck_mtx_unlock(&ifp
->if_flt_lock
);
3077 * Strip away M_PROTO1 bit prior to sending packet up the stack as
3078 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
3081 (*m_p
)->m_flags
&= ~M_PROTO1
;
3087 dlil_interface_filters_output(struct ifnet
*ifp
, struct mbuf
**m_p
,
3088 protocol_family_t protocol_family
)
3090 struct ifnet_filter
*filter
;
3093 * Pass the outbound packet to the interface filters
3095 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3096 /* prevent filter list from changing in case we drop the lock */
3097 if_flt_monitor_busy(ifp
);
3098 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
3101 if (!filter
->filt_skip
&& filter
->filt_output
!= NULL
&&
3102 (filter
->filt_protocol
== 0 ||
3103 filter
->filt_protocol
== protocol_family
)) {
3104 lck_mtx_unlock(&ifp
->if_flt_lock
);
3106 result
= filter
->filt_output(filter
->filt_cookie
, ifp
,
3107 protocol_family
, m_p
);
3109 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3111 /* we're done with the filter list */
3112 if_flt_monitor_unbusy(ifp
);
3113 lck_mtx_unlock(&ifp
->if_flt_lock
);
3118 /* we're done with the filter list */
3119 if_flt_monitor_unbusy(ifp
);
3120 lck_mtx_unlock(&ifp
->if_flt_lock
);
3126 dlil_ifproto_input(struct if_proto
* ifproto
, mbuf_t m
)
3130 if (ifproto
->proto_kpi
== kProtoKPI_v1
) {
3131 /* Version 1 protocols get one packet at a time */
3133 char * frame_header
;
3136 next_packet
= m
->m_nextpkt
;
3137 m
->m_nextpkt
= NULL
;
3138 frame_header
= m
->m_pkthdr
.pkt_hdr
;
3139 m
->m_pkthdr
.pkt_hdr
= NULL
;
3140 error
= (*ifproto
->kpi
.v1
.input
)(ifproto
->ifp
,
3141 ifproto
->protocol_family
, m
, frame_header
);
3142 if (error
!= 0 && error
!= EJUSTRETURN
)
3146 } else if (ifproto
->proto_kpi
== kProtoKPI_v2
) {
3147 /* Version 2 protocols support packet lists */
3148 error
= (*ifproto
->kpi
.v2
.input
)(ifproto
->ifp
,
3149 ifproto
->protocol_family
, m
);
3150 if (error
!= 0 && error
!= EJUSTRETURN
)
3157 dlil_input_stats_add(const struct ifnet_stat_increment_param
*s
,
3158 struct dlil_threading_info
*inp
, boolean_t poll
)
3160 struct ifnet_stat_increment_param
*d
= &inp
->stats
;
3162 if (s
->packets_in
!= 0)
3163 d
->packets_in
+= s
->packets_in
;
3164 if (s
->bytes_in
!= 0)
3165 d
->bytes_in
+= s
->bytes_in
;
3166 if (s
->errors_in
!= 0)
3167 d
->errors_in
+= s
->errors_in
;
3169 if (s
->packets_out
!= 0)
3170 d
->packets_out
+= s
->packets_out
;
3171 if (s
->bytes_out
!= 0)
3172 d
->bytes_out
+= s
->bytes_out
;
3173 if (s
->errors_out
!= 0)
3174 d
->errors_out
+= s
->errors_out
;
3176 if (s
->collisions
!= 0)
3177 d
->collisions
+= s
->collisions
;
3178 if (s
->dropped
!= 0)
3179 d
->dropped
+= s
->dropped
;
3182 PKTCNTR_ADD(&inp
->tstats
, s
->packets_in
, s
->bytes_in
);
3186 dlil_input_stats_sync(struct ifnet
*ifp
, struct dlil_threading_info
*inp
)
3188 struct ifnet_stat_increment_param
*s
= &inp
->stats
;
3191 * Use of atomic operations is unavoidable here because
3192 * these stats may also be incremented elsewhere via KPIs.
3194 if (s
->packets_in
!= 0) {
3195 atomic_add_64(&ifp
->if_data
.ifi_ipackets
, s
->packets_in
);
3198 if (s
->bytes_in
!= 0) {
3199 atomic_add_64(&ifp
->if_data
.ifi_ibytes
, s
->bytes_in
);
3202 if (s
->errors_in
!= 0) {
3203 atomic_add_64(&ifp
->if_data
.ifi_ierrors
, s
->errors_in
);
3207 if (s
->packets_out
!= 0) {
3208 atomic_add_64(&ifp
->if_data
.ifi_opackets
, s
->packets_out
);
3211 if (s
->bytes_out
!= 0) {
3212 atomic_add_64(&ifp
->if_data
.ifi_obytes
, s
->bytes_out
);
3215 if (s
->errors_out
!= 0) {
3216 atomic_add_64(&ifp
->if_data
.ifi_oerrors
, s
->errors_out
);
3220 if (s
->collisions
!= 0) {
3221 atomic_add_64(&ifp
->if_data
.ifi_collisions
, s
->collisions
);
3224 if (s
->dropped
!= 0) {
3225 atomic_add_64(&ifp
->if_data
.ifi_iqdrops
, s
->dropped
);
3229 * If we went over the threshold, notify NetworkStatistics.
3231 if (ifp
->if_data_threshold
&&
3232 (ifp
->if_ibytes
+ ifp
->if_obytes
) - ifp
->if_dt_bytes
>
3233 ifp
->if_data_threshold
) {
3234 ifp
->if_dt_bytes
= ifp
->if_ibytes
+ ifp
->if_obytes
;
3235 nstat_ifnet_threshold_reached(ifp
->if_index
);
3238 * No need for atomic operations as they are modified here
3239 * only from within the DLIL input thread context.
3241 if (inp
->tstats
.packets
!= 0) {
3242 inp
->pstats
.ifi_poll_packets
+= inp
->tstats
.packets
;
3243 inp
->tstats
.packets
= 0;
3245 if (inp
->tstats
.bytes
!= 0) {
3246 inp
->pstats
.ifi_poll_bytes
+= inp
->tstats
.bytes
;
3247 inp
->tstats
.bytes
= 0;
3251 __private_extern__
void
3252 dlil_input_packet_list(struct ifnet
*ifp
, struct mbuf
*m
)
3254 return (dlil_input_packet_list_common(ifp
, m
, 0,
3255 IFNET_MODEL_INPUT_POLL_OFF
, FALSE
));
3258 __private_extern__
void
3259 dlil_input_packet_list_extended(struct ifnet
*ifp
, struct mbuf
*m
,
3260 u_int32_t cnt
, ifnet_model_t mode
)
3262 return (dlil_input_packet_list_common(ifp
, m
, cnt
, mode
, TRUE
));
3266 dlil_input_packet_list_common(struct ifnet
*ifp_param
, struct mbuf
*m
,
3267 u_int32_t cnt
, ifnet_model_t mode
, boolean_t ext
)
3270 protocol_family_t protocol_family
;
3272 ifnet_t ifp
= ifp_param
;
3273 char * frame_header
;
3274 struct if_proto
* last_ifproto
= NULL
;
3275 mbuf_t pkt_first
= NULL
;
3276 mbuf_t
* pkt_next
= NULL
;
3277 u_int32_t poll_thresh
= 0, poll_ival
= 0;
3279 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_START
,0,0,0,0,0);
3281 if (ext
&& mode
== IFNET_MODEL_INPUT_POLL_ON
&& cnt
> 1 &&
3282 (poll_ival
= if_rxpoll_interval_pkts
) > 0)
3286 struct if_proto
*ifproto
= NULL
;
3288 uint32_t pktf_mask
; /* pkt flags to preserve */
3290 if (ifp_param
== NULL
)
3291 ifp
= m
->m_pkthdr
.rcvif
;
3293 if ((ifp
->if_eflags
& IFEF_RXPOLL
) && poll_thresh
!= 0 &&
3294 poll_ival
> 0 && (--poll_thresh
% poll_ival
) == 0)
3297 /* Check if this mbuf looks valid */
3298 MBUF_INPUT_CHECK(m
, ifp
);
3300 next_packet
= m
->m_nextpkt
;
3301 m
->m_nextpkt
= NULL
;
3302 frame_header
= m
->m_pkthdr
.pkt_hdr
;
3303 m
->m_pkthdr
.pkt_hdr
= NULL
;
3306 * Get an IO reference count if the interface is not
3307 * loopback (lo0) and it is attached; lo0 never goes
3308 * away, so optimize for that.
3310 if (ifp
!= lo_ifp
) {
3311 if (!ifnet_is_attached(ifp
, 1)) {
3319 * If this arrived on lo0, preserve interface addr
3320 * info to allow for connectivity between loopback
3321 * and local interface addresses.
3323 pktf_mask
= (PKTF_LOOP
|PKTF_IFAINFO
);
3326 /* make sure packet comes in clean */
3327 m_classifier_init(m
, pktf_mask
);
3329 ifp_inc_traffic_class_in(ifp
, m
);
3331 /* find which protocol family this packet is for */
3332 ifnet_lock_shared(ifp
);
3333 error
= (*ifp
->if_demux
)(ifp
, m
, frame_header
,
3335 ifnet_lock_done(ifp
);
3337 if (error
== EJUSTRETURN
)
3339 protocol_family
= 0;
3342 if (hwcksum_dbg
!= 0 && !(ifp
->if_flags
& IFF_LOOPBACK
) &&
3343 !(m
->m_pkthdr
.pkt_flags
& PKTF_LOOP
))
3344 dlil_input_cksum_dbg(ifp
, m
, frame_header
,
3348 * For partial checksum offload, we expect the driver to
3349 * set the start offset indicating the start of the span
3350 * that is covered by the hardware-computed checksum;
3351 * adjust this start offset accordingly because the data
3352 * pointer has been advanced beyond the link-layer header.
3354 * Don't adjust if the interface is a bridge member, as
3355 * the adjustment will occur from the context of the
3356 * bridge interface during input.
3358 if (ifp
->if_bridge
== NULL
&& (m
->m_pkthdr
.csum_flags
&
3359 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
3360 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
3363 if (frame_header
== NULL
||
3364 frame_header
< (char *)mbuf_datastart(m
) ||
3365 frame_header
> (char *)m
->m_data
||
3366 (adj
= (m
->m_data
- frame_header
)) >
3367 m
->m_pkthdr
.csum_rx_start
) {
3368 m
->m_pkthdr
.csum_data
= 0;
3369 m
->m_pkthdr
.csum_flags
&= ~CSUM_DATA_VALID
;
3370 hwcksum_in_invalidated
++;
3372 m
->m_pkthdr
.csum_rx_start
-= adj
;
3376 pktap_input(ifp
, protocol_family
, m
, frame_header
);
3378 if (m
->m_flags
& (M_BCAST
|M_MCAST
))
3379 atomic_add_64(&ifp
->if_imcasts
, 1);
3381 /* run interface filters, exclude VLAN packets PR-3586856 */
3382 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) == 0) {
3383 error
= dlil_interface_filters_input(ifp
, &m
,
3384 &frame_header
, protocol_family
);
3386 if (error
!= EJUSTRETURN
)
3391 if (error
!= 0 || ((m
->m_flags
& M_PROMISC
) != 0) ) {
3396 /* Lookup the protocol attachment to this interface */
3397 if (protocol_family
== 0) {
3399 } else if (last_ifproto
!= NULL
&& last_ifproto
->ifp
== ifp
&&
3400 (last_ifproto
->protocol_family
== protocol_family
)) {
3401 VERIFY(ifproto
== NULL
);
3402 ifproto
= last_ifproto
;
3403 if_proto_ref(last_ifproto
);
3405 VERIFY(ifproto
== NULL
);
3406 ifnet_lock_shared(ifp
);
3407 /* callee holds a proto refcnt upon success */
3408 ifproto
= find_attached_proto(ifp
, protocol_family
);
3409 ifnet_lock_done(ifp
);
3411 if (ifproto
== NULL
) {
3412 /* no protocol for this packet, discard */
3416 if (ifproto
!= last_ifproto
) {
3417 if (last_ifproto
!= NULL
) {
3418 /* pass up the list for the previous protocol */
3419 dlil_ifproto_input(last_ifproto
, pkt_first
);
3421 if_proto_free(last_ifproto
);
3423 last_ifproto
= ifproto
;
3424 if_proto_ref(ifproto
);
3426 /* extend the list */
3427 m
->m_pkthdr
.pkt_hdr
= frame_header
;
3428 if (pkt_first
== NULL
) {
3433 pkt_next
= &m
->m_nextpkt
;
3436 if (next_packet
== NULL
&& last_ifproto
!= NULL
) {
3437 /* pass up the last list of packets */
3438 dlil_ifproto_input(last_ifproto
, pkt_first
);
3439 if_proto_free(last_ifproto
);
3440 last_ifproto
= NULL
;
3442 if (ifproto
!= NULL
) {
3443 if_proto_free(ifproto
);
3449 /* update the driver's multicast filter, if needed */
3450 if (ifp
->if_updatemcasts
> 0 && if_mcasts_update(ifp
) == 0)
3451 ifp
->if_updatemcasts
= 0;
3453 ifnet_decr_iorefcnt(ifp
);
3456 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_END
,0,0,0,0,0);
3460 if_mcasts_update(struct ifnet
*ifp
)
3464 err
= ifnet_ioctl(ifp
, 0, SIOCADDMULTI
, NULL
);
3465 if (err
== EAFNOSUPPORT
)
3467 printf("%s: %s %d suspended link-layer multicast membership(s) "
3468 "(err=%d)\n", if_name(ifp
),
3469 (err
== 0 ? "successfully restored" : "failed to restore"),
3470 ifp
->if_updatemcasts
, err
);
3472 /* just return success */
3477 #define TMP_IF_PROTO_ARR_SIZE 10
3479 dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*event
)
3481 struct ifnet_filter
*filter
= NULL
;
3482 struct if_proto
*proto
= NULL
;
3483 int if_proto_count
= 0;
3484 struct if_proto
**tmp_ifproto_arr
= NULL
;
3485 struct if_proto
*tmp_ifproto_stack_arr
[TMP_IF_PROTO_ARR_SIZE
] = {NULL
};
3486 int tmp_ifproto_arr_idx
= 0;
3487 bool tmp_malloc
= false;
3489 /* Get an io ref count if the interface is attached */
3490 if (!ifnet_is_attached(ifp
, 1))
3494 * Pass the event to the interface filters
3496 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3497 /* prevent filter list from changing in case we drop the lock */
3498 if_flt_monitor_busy(ifp
);
3499 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
3500 if (filter
->filt_event
!= NULL
) {
3501 lck_mtx_unlock(&ifp
->if_flt_lock
);
3503 filter
->filt_event(filter
->filt_cookie
, ifp
,
3504 filter
->filt_protocol
, event
);
3506 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3509 /* we're done with the filter list */
3510 if_flt_monitor_unbusy(ifp
);
3511 lck_mtx_unlock(&ifp
->if_flt_lock
);
3514 * An embedded tmp_list_entry in if_proto may still get
3515 * over-written by another thread after giving up ifnet lock,
3516 * therefore we are avoiding embedded pointers here.
3518 ifnet_lock_shared(ifp
);
3519 if_proto_count
= dlil_ifp_proto_count(ifp
);
3520 if (if_proto_count
) {
3522 VERIFY(ifp
->if_proto_hash
!= NULL
);
3523 if (if_proto_count
<= TMP_IF_PROTO_ARR_SIZE
) {
3524 tmp_ifproto_arr
= tmp_ifproto_stack_arr
;
3526 MALLOC(tmp_ifproto_arr
, struct if_proto
**,
3527 sizeof (*tmp_ifproto_arr
) * if_proto_count
,
3529 if (tmp_ifproto_arr
== NULL
) {
3530 ifnet_lock_done(ifp
);
3536 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
3537 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
],
3539 if_proto_ref(proto
);
3540 tmp_ifproto_arr
[tmp_ifproto_arr_idx
] = proto
;
3541 tmp_ifproto_arr_idx
++;
3544 VERIFY(if_proto_count
== tmp_ifproto_arr_idx
);
3546 ifnet_lock_done(ifp
);
3548 for (tmp_ifproto_arr_idx
= 0; tmp_ifproto_arr_idx
< if_proto_count
;
3549 tmp_ifproto_arr_idx
++) {
3550 proto
= tmp_ifproto_arr
[tmp_ifproto_arr_idx
];
3551 VERIFY(proto
!= NULL
);
3552 proto_media_event eventp
=
3553 (proto
->proto_kpi
== kProtoKPI_v1
?
3554 proto
->kpi
.v1
.event
:
3555 proto
->kpi
.v2
.event
);
3557 if (eventp
!= NULL
) {
3558 eventp(ifp
, proto
->protocol_family
,
3561 if_proto_free(proto
);
3566 FREE(tmp_ifproto_arr
, M_TEMP
);
3569 /* Pass the event to the interface */
3570 if (ifp
->if_event
!= NULL
)
3571 ifp
->if_event(ifp
, event
);
3573 /* Release the io ref count */
3574 ifnet_decr_iorefcnt(ifp
);
3576 return (kev_post_msg(event
));
3580 ifnet_event(ifnet_t ifp
, struct kern_event_msg
*event
)
3582 struct kev_msg kev_msg
;
3585 if (ifp
== NULL
|| event
== NULL
)
3588 bzero(&kev_msg
, sizeof (kev_msg
));
3589 kev_msg
.vendor_code
= event
->vendor_code
;
3590 kev_msg
.kev_class
= event
->kev_class
;
3591 kev_msg
.kev_subclass
= event
->kev_subclass
;
3592 kev_msg
.event_code
= event
->event_code
;
3593 kev_msg
.dv
[0].data_ptr
= &event
->event_data
[0];
3594 kev_msg
.dv
[0].data_length
= event
->total_size
- KEV_MSG_HEADER_SIZE
;
3595 kev_msg
.dv
[1].data_length
= 0;
3597 result
= dlil_event_internal(ifp
, &kev_msg
);
3603 #include <netinet/ip6.h>
3604 #include <netinet/ip.h>
3606 dlil_get_socket_type(struct mbuf
**mp
, int family
, int raw
)
3610 struct ip6_hdr
*ip6
;
3611 int type
= SOCK_RAW
;
3616 m
= m_pullup(*mp
, sizeof(struct ip
));
3620 ip
= mtod(m
, struct ip
*);
3621 if (ip
->ip_p
== IPPROTO_TCP
)
3623 else if (ip
->ip_p
== IPPROTO_UDP
)
3627 m
= m_pullup(*mp
, sizeof(struct ip6_hdr
));
3631 ip6
= mtod(m
, struct ip6_hdr
*);
3632 if (ip6
->ip6_nxt
== IPPROTO_TCP
)
3634 else if (ip6
->ip6_nxt
== IPPROTO_UDP
)
3645 * This is mostly called from the context of the DLIL input thread;
3646 * because of that there is no need for atomic operations.
3648 static __inline
void
3649 ifp_inc_traffic_class_in(struct ifnet
*ifp
, struct mbuf
*m
)
3651 if (!(m
->m_flags
& M_PKTHDR
))
3654 switch (m_get_traffic_class(m
)) {
3656 ifp
->if_tc
.ifi_ibepackets
++;
3657 ifp
->if_tc
.ifi_ibebytes
+= m
->m_pkthdr
.len
;
3660 ifp
->if_tc
.ifi_ibkpackets
++;
3661 ifp
->if_tc
.ifi_ibkbytes
+= m
->m_pkthdr
.len
;
3664 ifp
->if_tc
.ifi_ivipackets
++;
3665 ifp
->if_tc
.ifi_ivibytes
+= m
->m_pkthdr
.len
;
3668 ifp
->if_tc
.ifi_ivopackets
++;
3669 ifp
->if_tc
.ifi_ivobytes
+= m
->m_pkthdr
.len
;
3675 if (mbuf_is_traffic_class_privileged(m
)) {
3676 ifp
->if_tc
.ifi_ipvpackets
++;
3677 ifp
->if_tc
.ifi_ipvbytes
+= m
->m_pkthdr
.len
;
3682 * This is called from DLIL output, hence multiple threads could end
3683 * up modifying the statistics. We trade off acccuracy for performance
3684 * by not using atomic operations here.
3686 static __inline
void
3687 ifp_inc_traffic_class_out(struct ifnet
*ifp
, struct mbuf
*m
)
3689 if (!(m
->m_flags
& M_PKTHDR
))
3692 switch (m_get_traffic_class(m
)) {
3694 ifp
->if_tc
.ifi_obepackets
++;
3695 ifp
->if_tc
.ifi_obebytes
+= m
->m_pkthdr
.len
;
3698 ifp
->if_tc
.ifi_obkpackets
++;
3699 ifp
->if_tc
.ifi_obkbytes
+= m
->m_pkthdr
.len
;
3702 ifp
->if_tc
.ifi_ovipackets
++;
3703 ifp
->if_tc
.ifi_ovibytes
+= m
->m_pkthdr
.len
;
3706 ifp
->if_tc
.ifi_ovopackets
++;
3707 ifp
->if_tc
.ifi_ovobytes
+= m
->m_pkthdr
.len
;
3713 if (mbuf_is_traffic_class_privileged(m
)) {
3714 ifp
->if_tc
.ifi_opvpackets
++;
3715 ifp
->if_tc
.ifi_opvbytes
+= m
->m_pkthdr
.len
;
3722 * Caller should have a lock on the protocol domain if the protocol
3723 * doesn't support finer grained locking. In most cases, the lock
3724 * will be held from the socket layer and won't be released until
3725 * we return back to the socket layer.
3727 * This does mean that we must take a protocol lock before we take
3728 * an interface lock if we're going to take both. This makes sense
3729 * because a protocol is likely to interact with an ifp while it
3730 * is under the protocol lock.
3732 * An advisory code will be returned if adv is not null. This
3733 * can be used to provide feedback about interface queues to the
3737 dlil_output(ifnet_t ifp
, protocol_family_t proto_family
, mbuf_t packetlist
,
3738 void *route
, const struct sockaddr
*dest
, int raw
, struct flowadv
*adv
)
3740 char *frame_type
= NULL
;
3741 char *dst_linkaddr
= NULL
;
3743 char frame_type_buffer
[MAX_FRAME_TYPE_SIZE
* 4];
3744 char dst_linkaddr_buffer
[MAX_LINKADDR
* 4];
3745 struct if_proto
*proto
= NULL
;
3747 mbuf_t send_head
= NULL
;
3748 mbuf_t
*send_tail
= &send_head
;
3750 u_int32_t pre
= 0, post
= 0;
3751 u_int32_t fpkts
= 0, fbytes
= 0;
3754 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
3756 /* Get an io refcnt if the interface is attached to prevent ifnet_detach
3757 * from happening while this operation is in progress */
3758 if (!ifnet_is_attached(ifp
, 1)) {
3764 /* update the driver's multicast filter, if needed */
3765 if (ifp
->if_updatemcasts
> 0 && if_mcasts_update(ifp
) == 0)
3766 ifp
->if_updatemcasts
= 0;
3768 frame_type
= frame_type_buffer
;
3769 dst_linkaddr
= dst_linkaddr_buffer
;
3772 ifnet_lock_shared(ifp
);
3773 /* callee holds a proto refcnt upon success */
3774 proto
= find_attached_proto(ifp
, proto_family
);
3775 if (proto
== NULL
) {
3776 ifnet_lock_done(ifp
);
3780 ifnet_lock_done(ifp
);
3784 if (packetlist
== NULL
)
3788 packetlist
= packetlist
->m_nextpkt
;
3789 m
->m_nextpkt
= NULL
;
3792 proto_media_preout preoutp
= (proto
->proto_kpi
== kProtoKPI_v1
?
3793 proto
->kpi
.v1
.pre_output
: proto
->kpi
.v2
.pre_output
);
3795 if (preoutp
!= NULL
) {
3796 retval
= preoutp(ifp
, proto_family
, &m
, dest
, route
,
3797 frame_type
, dst_linkaddr
);
3800 if (retval
== EJUSTRETURN
)
3809 retval
= mac_ifnet_check_transmit(ifp
, m
, proto_family
,
3810 dlil_get_socket_type(&m
, proto_family
, raw
));
3819 if (!raw
&& proto_family
== PF_INET
) {
3820 struct ip
*ip
= mtod(m
, struct ip
*);
3821 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
3822 struct ip
*, ip
, struct ifnet
*, ifp
,
3823 struct ip
*, ip
, struct ip6_hdr
*, NULL
);
3825 } else if (!raw
&& proto_family
== PF_INET6
) {
3826 struct ip6_hdr
*ip6
= mtod(m
, struct ip6_hdr
*);
3827 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
3828 struct ip6_hdr
*, ip6
, struct ifnet
*, ifp
,
3829 struct ip
*, NULL
, struct ip6_hdr
*, ip6
);
3831 #endif /* CONFIG_DTRACE */
3833 if (raw
== 0 && ifp
->if_framer
!= NULL
) {
3837 * If this is a broadcast packet that needs to be
3838 * looped back into the system, set the inbound ifp
3839 * to that of the outbound ifp. This will allow
3840 * us to determine that it is a legitimate packet
3841 * for the system. Only set the ifp if it's not
3842 * already set, just to be safe.
3844 if ((m
->m_flags
& (M_BCAST
| M_LOOP
)) &&
3845 m
->m_pkthdr
.rcvif
== NULL
) {
3846 m
->m_pkthdr
.rcvif
= ifp
;
3850 retval
= ifp
->if_framer(ifp
, &m
, dest
, dst_linkaddr
,
3851 frame_type
, &pre
, &post
);
3853 if (retval
!= EJUSTRETURN
)
3859 * For partial checksum offload, adjust the start
3860 * and stuff offsets based on the prepended header.
3862 if ((m
->m_pkthdr
.csum_flags
&
3863 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
3864 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
3865 m
->m_pkthdr
.csum_tx_stuff
+= pre
;
3866 m
->m_pkthdr
.csum_tx_start
+= pre
;
3869 if (hwcksum_dbg
!= 0 && !(ifp
->if_flags
& IFF_LOOPBACK
))
3870 dlil_output_cksum_dbg(ifp
, m
, pre
,
3874 * Clear the ifp if it was set above, and to be
3875 * safe, only if it is still the same as the
3876 * outbound ifp we have in context. If it was
3877 * looped back, then a copy of it was sent to the
3878 * loopback interface with the rcvif set, and we
3879 * are clearing the one that will go down to the
3882 if (rcvif_set
&& m
->m_pkthdr
.rcvif
== ifp
)
3883 m
->m_pkthdr
.rcvif
= NULL
;
3887 * Let interface filters (if any) do their thing ...
3889 /* Do not pass VLAN tagged packets to filters PR-3586856 */
3890 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) == 0) {
3891 retval
= dlil_interface_filters_output(ifp
,
3894 if (retval
!= EJUSTRETURN
)
3900 * Strip away M_PROTO1 bit prior to sending packet
3901 * to the driver as this field may be used by the driver
3903 m
->m_flags
&= ~M_PROTO1
;
3906 * If the underlying interface is not capable of handling a
3907 * packet whose data portion spans across physically disjoint
3908 * pages, we need to "normalize" the packet so that we pass
3909 * down a chain of mbufs where each mbuf points to a span that
3910 * resides in the system page boundary. If the packet does
3911 * not cross page(s), the following is a no-op.
3913 if (!(ifp
->if_hwassist
& IFNET_MULTIPAGES
)) {
3914 if ((m
= m_normalize(m
)) == NULL
)
3919 * If this is a TSO packet, make sure the interface still
3920 * advertise TSO capability.
3922 if (TSO_IPV4_NOTOK(ifp
, m
) || TSO_IPV6_NOTOK(ifp
, m
)) {
3929 * If the packet service class is not background,
3930 * update the timestamp to indicate recent activity
3931 * on a foreground socket.
3933 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_SO_BACKGROUND
) &&
3934 (m
->m_pkthdr
.pkt_flags
& PKTF_FLOW_ID
) &&
3935 m
->m_pkthdr
.pkt_flowsrc
== FLOWSRC_INPCB
)
3936 ifp
->if_fg_sendts
= net_uptime();
3938 ifp_inc_traffic_class_out(ifp
, m
);
3939 pktap_output(ifp
, proto_family
, m
, pre
, post
);
3942 * Finally, call the driver.
3944 if (ifp
->if_eflags
& IFEF_SENDLIST
) {
3945 if (m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) {
3946 flen
+= (m_pktlen(m
) - (pre
+ post
));
3947 m
->m_pkthdr
.pkt_flags
&= ~PKTF_FORWARDED
;
3950 send_tail
= &m
->m_nextpkt
;
3952 if (m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) {
3953 flen
= (m_pktlen(m
) - (pre
+ post
));
3954 m
->m_pkthdr
.pkt_flags
&= ~PKTF_FORWARDED
;
3958 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
,
3960 retval
= (*ifp
->if_output
)(ifp
, m
);
3961 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
3962 if (adv
!= NULL
&& adv
->code
== FADV_SUCCESS
) {
3963 adv
->code
= (retval
== EQFULL
?
3964 FADV_FLOW_CONTROLLED
:
3969 if (retval
== 0 && flen
> 0) {
3973 if (retval
!= 0 && dlil_verbose
) {
3974 printf("%s: output error on %s retval = %d\n",
3975 __func__
, if_name(ifp
),
3978 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
,
3981 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
3986 packetlist
= packetlist
->m_nextpkt
;
3987 m
->m_nextpkt
= NULL
;
3989 } while (m
!= NULL
);
3991 if (send_head
!= NULL
) {
3992 VERIFY(ifp
->if_eflags
& IFEF_SENDLIST
);
3993 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
,
3995 retval
= (*ifp
->if_output
)(ifp
, send_head
);
3996 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
3998 adv
->code
= (retval
== EQFULL
?
3999 FADV_FLOW_CONTROLLED
: FADV_SUSPENDED
);
4003 if (retval
== 0 && flen
> 0) {
4007 if (retval
!= 0 && dlil_verbose
) {
4008 printf("%s: output error on %s retval = %d\n",
4009 __func__
, if_name(ifp
), retval
);
4011 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4014 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4018 ifp
->if_fbytes
+= fbytes
;
4020 ifp
->if_fpackets
+= fpkts
;
4022 if_proto_free(proto
);
4023 if (packetlist
) /* if any packets are left, clean up */
4024 mbuf_freem_list(packetlist
);
4025 if (retval
== EJUSTRETURN
)
4028 ifnet_decr_iorefcnt(ifp
);
4034 ifnet_ioctl(ifnet_t ifp
, protocol_family_t proto_fam
, u_long ioctl_code
,
4037 struct ifnet_filter
*filter
;
4038 int retval
= EOPNOTSUPP
;
4041 if (ifp
== NULL
|| ioctl_code
== 0)
4044 /* Get an io ref count if the interface is attached */
4045 if (!ifnet_is_attached(ifp
, 1))
4046 return (EOPNOTSUPP
);
4048 /* Run the interface filters first.
4049 * We want to run all filters before calling the protocol,
4050 * interface family, or interface.
4052 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4053 /* prevent filter list from changing in case we drop the lock */
4054 if_flt_monitor_busy(ifp
);
4055 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
4056 if (filter
->filt_ioctl
!= NULL
&& (filter
->filt_protocol
== 0 ||
4057 filter
->filt_protocol
== proto_fam
)) {
4058 lck_mtx_unlock(&ifp
->if_flt_lock
);
4060 result
= filter
->filt_ioctl(filter
->filt_cookie
, ifp
,
4061 proto_fam
, ioctl_code
, ioctl_arg
);
4063 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4065 /* Only update retval if no one has handled the ioctl */
4066 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
4067 if (result
== ENOTSUP
)
4068 result
= EOPNOTSUPP
;
4070 if (retval
!= 0 && retval
!= EOPNOTSUPP
) {
4071 /* we're done with the filter list */
4072 if_flt_monitor_unbusy(ifp
);
4073 lck_mtx_unlock(&ifp
->if_flt_lock
);
4079 /* we're done with the filter list */
4080 if_flt_monitor_unbusy(ifp
);
4081 lck_mtx_unlock(&ifp
->if_flt_lock
);
4083 /* Allow the protocol to handle the ioctl */
4084 if (proto_fam
!= 0) {
4085 struct if_proto
*proto
;
4087 /* callee holds a proto refcnt upon success */
4088 ifnet_lock_shared(ifp
);
4089 proto
= find_attached_proto(ifp
, proto_fam
);
4090 ifnet_lock_done(ifp
);
4091 if (proto
!= NULL
) {
4092 proto_media_ioctl ioctlp
=
4093 (proto
->proto_kpi
== kProtoKPI_v1
?
4094 proto
->kpi
.v1
.ioctl
: proto
->kpi
.v2
.ioctl
);
4095 result
= EOPNOTSUPP
;
4097 result
= ioctlp(ifp
, proto_fam
, ioctl_code
,
4099 if_proto_free(proto
);
4101 /* Only update retval if no one has handled the ioctl */
4102 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
4103 if (result
== ENOTSUP
)
4104 result
= EOPNOTSUPP
;
4106 if (retval
&& retval
!= EOPNOTSUPP
)
4112 /* retval is either 0 or EOPNOTSUPP */
4115 * Let the interface handle this ioctl.
4116 * If it returns EOPNOTSUPP, ignore that, we may have
4117 * already handled this in the protocol or family.
4120 result
= (*ifp
->if_ioctl
)(ifp
, ioctl_code
, ioctl_arg
);
4122 /* Only update retval if no one has handled the ioctl */
4123 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
4124 if (result
== ENOTSUP
)
4125 result
= EOPNOTSUPP
;
4127 if (retval
&& retval
!= EOPNOTSUPP
) {
4133 if (retval
== EJUSTRETURN
)
4136 ifnet_decr_iorefcnt(ifp
);
4141 __private_extern__ errno_t
4142 dlil_set_bpf_tap(ifnet_t ifp
, bpf_tap_mode mode
, bpf_packet_func callback
)
4147 if (ifp
->if_set_bpf_tap
) {
4148 /* Get an io reference on the interface if it is attached */
4149 if (!ifnet_is_attached(ifp
, 1))
4151 error
= ifp
->if_set_bpf_tap(ifp
, mode
, callback
);
4152 ifnet_decr_iorefcnt(ifp
);
4158 dlil_resolve_multi(struct ifnet
*ifp
, const struct sockaddr
*proto_addr
,
4159 struct sockaddr
*ll_addr
, size_t ll_len
)
4161 errno_t result
= EOPNOTSUPP
;
4162 struct if_proto
*proto
;
4163 const struct sockaddr
*verify
;
4164 proto_media_resolve_multi resolvep
;
4166 if (!ifnet_is_attached(ifp
, 1))
4169 bzero(ll_addr
, ll_len
);
4171 /* Call the protocol first; callee holds a proto refcnt upon success */
4172 ifnet_lock_shared(ifp
);
4173 proto
= find_attached_proto(ifp
, proto_addr
->sa_family
);
4174 ifnet_lock_done(ifp
);
4175 if (proto
!= NULL
) {
4176 resolvep
= (proto
->proto_kpi
== kProtoKPI_v1
?
4177 proto
->kpi
.v1
.resolve_multi
: proto
->kpi
.v2
.resolve_multi
);
4178 if (resolvep
!= NULL
)
4179 result
= resolvep(ifp
, proto_addr
,
4180 (struct sockaddr_dl
*)(void *)ll_addr
, ll_len
);
4181 if_proto_free(proto
);
4184 /* Let the interface verify the multicast address */
4185 if ((result
== EOPNOTSUPP
|| result
== 0) && ifp
->if_check_multi
) {
4189 verify
= proto_addr
;
4190 result
= ifp
->if_check_multi(ifp
, verify
);
4193 ifnet_decr_iorefcnt(ifp
);
4197 __private_extern__ errno_t
4198 dlil_send_arp_internal(ifnet_t ifp
, u_short arpop
,
4199 const struct sockaddr_dl
* sender_hw
, const struct sockaddr
* sender_proto
,
4200 const struct sockaddr_dl
* target_hw
, const struct sockaddr
* target_proto
)
4202 struct if_proto
*proto
;
4205 /* callee holds a proto refcnt upon success */
4206 ifnet_lock_shared(ifp
);
4207 proto
= find_attached_proto(ifp
, target_proto
->sa_family
);
4208 ifnet_lock_done(ifp
);
4209 if (proto
== NULL
) {
4212 proto_media_send_arp arpp
;
4213 arpp
= (proto
->proto_kpi
== kProtoKPI_v1
?
4214 proto
->kpi
.v1
.send_arp
: proto
->kpi
.v2
.send_arp
);
4220 arpstat
.txrequests
++;
4221 if (target_hw
!= NULL
)
4222 arpstat
.txurequests
++;
4225 arpstat
.txreplies
++;
4228 result
= arpp(ifp
, arpop
, sender_hw
, sender_proto
,
4229 target_hw
, target_proto
);
4231 if_proto_free(proto
);
4237 struct net_thread_marks
{ };
4238 static const struct net_thread_marks net_thread_marks_base
= { };
4240 __private_extern__
const net_thread_marks_t net_thread_marks_none
=
4241 &net_thread_marks_base
;
4243 __private_extern__ net_thread_marks_t
4244 net_thread_marks_push(u_int32_t push
)
4246 static const char *const base
= (const void*)&net_thread_marks_base
;
4250 struct uthread
*uth
= get_bsdthread_info(current_thread());
4252 pop
= push
& ~uth
->uu_network_marks
;
4254 uth
->uu_network_marks
|= pop
;
4257 return ((net_thread_marks_t
)&base
[pop
]);
4260 __private_extern__ net_thread_marks_t
4261 net_thread_unmarks_push(u_int32_t unpush
)
4263 static const char *const base
= (const void*)&net_thread_marks_base
;
4264 u_int32_t unpop
= 0;
4267 struct uthread
*uth
= get_bsdthread_info(current_thread());
4269 unpop
= unpush
& uth
->uu_network_marks
;
4271 uth
->uu_network_marks
&= ~unpop
;
4274 return ((net_thread_marks_t
)&base
[unpop
]);
4277 __private_extern__
void
4278 net_thread_marks_pop(net_thread_marks_t popx
)
4280 static const char *const base
= (const void*)&net_thread_marks_base
;
4281 ptrdiff_t pop
= (caddr_t
)popx
- (caddr_t
)base
;
4284 static const ptrdiff_t ones
= (ptrdiff_t)(u_int32_t
)~0U;
4285 struct uthread
*uth
= get_bsdthread_info(current_thread());
4287 VERIFY((pop
& ones
) == pop
);
4288 VERIFY((ptrdiff_t)(uth
->uu_network_marks
& pop
) == pop
);
4289 uth
->uu_network_marks
&= ~pop
;
4293 __private_extern__
void
4294 net_thread_unmarks_pop(net_thread_marks_t unpopx
)
4296 static const char *const base
= (const void*)&net_thread_marks_base
;
4297 ptrdiff_t unpop
= (caddr_t
)unpopx
- (caddr_t
)base
;
4300 static const ptrdiff_t ones
= (ptrdiff_t)(u_int32_t
)~0U;
4301 struct uthread
*uth
= get_bsdthread_info(current_thread());
4303 VERIFY((unpop
& ones
) == unpop
);
4304 VERIFY((ptrdiff_t)(uth
->uu_network_marks
& unpop
) == 0);
4305 uth
->uu_network_marks
|= unpop
;
4309 __private_extern__ u_int32_t
4310 net_thread_is_marked(u_int32_t check
)
4313 struct uthread
*uth
= get_bsdthread_info(current_thread());
4314 return (uth
->uu_network_marks
& check
);
4320 __private_extern__ u_int32_t
4321 net_thread_is_unmarked(u_int32_t check
)
4324 struct uthread
*uth
= get_bsdthread_info(current_thread());
4325 return (~uth
->uu_network_marks
& check
);
4331 static __inline__
int
4332 _is_announcement(const struct sockaddr_in
* sender_sin
,
4333 const struct sockaddr_in
* target_sin
)
4335 if (sender_sin
== NULL
) {
4338 return (sender_sin
->sin_addr
.s_addr
== target_sin
->sin_addr
.s_addr
);
4341 __private_extern__ errno_t
4342 dlil_send_arp(ifnet_t ifp
, u_short arpop
, const struct sockaddr_dl
* sender_hw
,
4343 const struct sockaddr
* sender_proto
, const struct sockaddr_dl
* target_hw
,
4344 const struct sockaddr
* target_proto0
, u_int32_t rtflags
)
4347 const struct sockaddr_in
* sender_sin
;
4348 const struct sockaddr_in
* target_sin
;
4349 struct sockaddr_inarp target_proto_sinarp
;
4350 struct sockaddr
*target_proto
= (void *)(uintptr_t)target_proto0
;
4352 if (target_proto
== NULL
|| (sender_proto
!= NULL
&&
4353 sender_proto
->sa_family
!= target_proto
->sa_family
))
4357 * If the target is a (default) router, provide that
4358 * information to the send_arp callback routine.
4360 if (rtflags
& RTF_ROUTER
) {
4361 bcopy(target_proto
, &target_proto_sinarp
,
4362 sizeof (struct sockaddr_in
));
4363 target_proto_sinarp
.sin_other
|= SIN_ROUTER
;
4364 target_proto
= (struct sockaddr
*)&target_proto_sinarp
;
4368 * If this is an ARP request and the target IP is IPv4LL,
4369 * send the request on all interfaces. The exception is
4370 * an announcement, which must only appear on the specific
4373 sender_sin
= (struct sockaddr_in
*)(void *)(uintptr_t)sender_proto
;
4374 target_sin
= (struct sockaddr_in
*)(void *)(uintptr_t)target_proto
;
4375 if (target_proto
->sa_family
== AF_INET
&&
4376 IN_LINKLOCAL(ntohl(target_sin
->sin_addr
.s_addr
)) &&
4377 ipv4_ll_arp_aware
!= 0 && arpop
== ARPOP_REQUEST
&&
4378 !_is_announcement(target_sin
, sender_sin
)) {
4385 if (ifnet_list_get(IFNET_FAMILY_ANY
, &ifp_list
, &count
) == 0) {
4386 for (ifp_on
= 0; ifp_on
< count
; ifp_on
++) {
4388 ifaddr_t source_hw
= NULL
;
4389 ifaddr_t source_ip
= NULL
;
4390 struct sockaddr_in source_ip_copy
;
4391 struct ifnet
*cur_ifp
= ifp_list
[ifp_on
];
4394 * Only arp on interfaces marked for IPv4LL
4395 * ARPing. This may mean that we don't ARP on
4396 * the interface the subnet route points to.
4398 if (!(cur_ifp
->if_eflags
& IFEF_ARPLL
))
4401 /* Find the source IP address */
4402 ifnet_lock_shared(cur_ifp
);
4403 source_hw
= cur_ifp
->if_lladdr
;
4404 TAILQ_FOREACH(source_ip
, &cur_ifp
->if_addrhead
,
4406 IFA_LOCK(source_ip
);
4407 if (source_ip
->ifa_addr
!= NULL
&&
4408 source_ip
->ifa_addr
->sa_family
==
4410 /* Copy the source IP address */
4412 *(struct sockaddr_in
*)
4413 (void *)source_ip
->ifa_addr
;
4414 IFA_UNLOCK(source_ip
);
4417 IFA_UNLOCK(source_ip
);
4420 /* No IP Source, don't arp */
4421 if (source_ip
== NULL
) {
4422 ifnet_lock_done(cur_ifp
);
4426 IFA_ADDREF(source_hw
);
4427 ifnet_lock_done(cur_ifp
);
4430 new_result
= dlil_send_arp_internal(cur_ifp
,
4431 arpop
, (struct sockaddr_dl
*)(void *)
4432 source_hw
->ifa_addr
,
4433 (struct sockaddr
*)&source_ip_copy
, NULL
,
4436 IFA_REMREF(source_hw
);
4437 if (result
== ENOTSUP
) {
4438 result
= new_result
;
4441 ifnet_list_free(ifp_list
);
4444 result
= dlil_send_arp_internal(ifp
, arpop
, sender_hw
,
4445 sender_proto
, target_hw
, target_proto
);
4452 * Caller must hold ifnet head lock.
4455 ifnet_lookup(struct ifnet
*ifp
)
4459 lck_rw_assert(&ifnet_head_lock
, LCK_RW_ASSERT_HELD
);
4460 TAILQ_FOREACH(_ifp
, &ifnet_head
, if_link
) {
4464 return (_ifp
!= NULL
);
4467 * Caller has to pass a non-zero refio argument to get a
4468 * IO reference count. This will prevent ifnet_detach from
4469 * being called when there are outstanding io reference counts.
4472 ifnet_is_attached(struct ifnet
*ifp
, int refio
)
4476 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
4477 if ((ret
= ((ifp
->if_refflags
& (IFRF_ATTACHED
| IFRF_DETACHING
)) ==
4482 lck_mtx_unlock(&ifp
->if_ref_lock
);
4488 ifnet_decr_iorefcnt(struct ifnet
*ifp
)
4490 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
4491 VERIFY(ifp
->if_refio
> 0);
4492 VERIFY((ifp
->if_refflags
& (IFRF_ATTACHED
| IFRF_DETACHING
)) != 0);
4495 /* if there are no more outstanding io references, wakeup the
4496 * ifnet_detach thread if detaching flag is set.
4498 if (ifp
->if_refio
== 0 &&
4499 (ifp
->if_refflags
& IFRF_DETACHING
) != 0) {
4500 wakeup(&(ifp
->if_refio
));
4502 lck_mtx_unlock(&ifp
->if_ref_lock
);
4506 dlil_if_trace(struct dlil_ifnet
*dl_if
, int refhold
)
4508 struct dlil_ifnet_dbg
*dl_if_dbg
= (struct dlil_ifnet_dbg
*)dl_if
;
4513 if (!(dl_if
->dl_if_flags
& DLIF_DEBUG
)) {
4514 panic("%s: dl_if %p has no debug structure", __func__
, dl_if
);
4519 cnt
= &dl_if_dbg
->dldbg_if_refhold_cnt
;
4520 tr
= dl_if_dbg
->dldbg_if_refhold
;
4522 cnt
= &dl_if_dbg
->dldbg_if_refrele_cnt
;
4523 tr
= dl_if_dbg
->dldbg_if_refrele
;
4526 idx
= atomic_add_16_ov(cnt
, 1) % IF_REF_TRACE_HIST_SIZE
;
4527 ctrace_record(&tr
[idx
]);
4531 dlil_if_ref(struct ifnet
*ifp
)
4533 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
4538 lck_mtx_lock_spin(&dl_if
->dl_if_lock
);
4539 ++dl_if
->dl_if_refcnt
;
4540 if (dl_if
->dl_if_refcnt
== 0) {
4541 panic("%s: wraparound refcnt for ifp=%p", __func__
, ifp
);
4544 if (dl_if
->dl_if_trace
!= NULL
)
4545 (*dl_if
->dl_if_trace
)(dl_if
, TRUE
);
4546 lck_mtx_unlock(&dl_if
->dl_if_lock
);
4552 dlil_if_free(struct ifnet
*ifp
)
4554 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
4559 lck_mtx_lock_spin(&dl_if
->dl_if_lock
);
4560 if (dl_if
->dl_if_refcnt
== 0) {
4561 panic("%s: negative refcnt for ifp=%p", __func__
, ifp
);
4564 --dl_if
->dl_if_refcnt
;
4565 if (dl_if
->dl_if_trace
!= NULL
)
4566 (*dl_if
->dl_if_trace
)(dl_if
, FALSE
);
4567 lck_mtx_unlock(&dl_if
->dl_if_lock
);
4573 dlil_attach_protocol_internal(struct if_proto
*proto
,
4574 const struct ifnet_demux_desc
*demux_list
, u_int32_t demux_count
)
4576 struct kev_dl_proto_data ev_pr_data
;
4577 struct ifnet
*ifp
= proto
->ifp
;
4579 u_int32_t hash_value
= proto_hash_value(proto
->protocol_family
);
4580 struct if_proto
*prev_proto
;
4581 struct if_proto
*_proto
;
4583 /* callee holds a proto refcnt upon success */
4584 ifnet_lock_exclusive(ifp
);
4585 _proto
= find_attached_proto(ifp
, proto
->protocol_family
);
4586 if (_proto
!= NULL
) {
4587 ifnet_lock_done(ifp
);
4588 if_proto_free(_proto
);
4593 * Call family module add_proto routine so it can refine the
4594 * demux descriptors as it wishes.
4596 retval
= ifp
->if_add_proto(ifp
, proto
->protocol_family
, demux_list
,
4599 ifnet_lock_done(ifp
);
4604 * Insert the protocol in the hash
4606 prev_proto
= SLIST_FIRST(&ifp
->if_proto_hash
[hash_value
]);
4607 while (prev_proto
!= NULL
&& SLIST_NEXT(prev_proto
, next_hash
) != NULL
)
4608 prev_proto
= SLIST_NEXT(prev_proto
, next_hash
);
4610 SLIST_INSERT_AFTER(prev_proto
, proto
, next_hash
);
4612 SLIST_INSERT_HEAD(&ifp
->if_proto_hash
[hash_value
],
4615 /* hold a proto refcnt for attach */
4616 if_proto_ref(proto
);
4619 * The reserved field carries the number of protocol still attached
4620 * (subject to change)
4622 ev_pr_data
.proto_family
= proto
->protocol_family
;
4623 ev_pr_data
.proto_remaining_count
= dlil_ifp_proto_count(ifp
);
4624 ifnet_lock_done(ifp
);
4626 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_ATTACHED
,
4627 (struct net_event_data
*)&ev_pr_data
,
4628 sizeof (struct kev_dl_proto_data
));
4633 ifnet_attach_protocol(ifnet_t ifp
, protocol_family_t protocol
,
4634 const struct ifnet_attach_proto_param
*proto_details
)
4637 struct if_proto
*ifproto
= NULL
;
4639 ifnet_head_lock_shared();
4640 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
) {
4644 /* Check that the interface is in the global list */
4645 if (!ifnet_lookup(ifp
)) {
4650 ifproto
= zalloc(dlif_proto_zone
);
4651 if (ifproto
== NULL
) {
4655 bzero(ifproto
, dlif_proto_size
);
4657 /* refcnt held above during lookup */
4659 ifproto
->protocol_family
= protocol
;
4660 ifproto
->proto_kpi
= kProtoKPI_v1
;
4661 ifproto
->kpi
.v1
.input
= proto_details
->input
;
4662 ifproto
->kpi
.v1
.pre_output
= proto_details
->pre_output
;
4663 ifproto
->kpi
.v1
.event
= proto_details
->event
;
4664 ifproto
->kpi
.v1
.ioctl
= proto_details
->ioctl
;
4665 ifproto
->kpi
.v1
.detached
= proto_details
->detached
;
4666 ifproto
->kpi
.v1
.resolve_multi
= proto_details
->resolve
;
4667 ifproto
->kpi
.v1
.send_arp
= proto_details
->send_arp
;
4669 retval
= dlil_attach_protocol_internal(ifproto
,
4670 proto_details
->demux_list
, proto_details
->demux_count
);
4673 printf("%s: attached v1 protocol %d\n", if_name(ifp
),
4678 if (retval
!= 0 && retval
!= EEXIST
&& ifp
!= NULL
) {
4679 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
4680 if_name(ifp
), protocol
, retval
);
4683 if (retval
!= 0 && ifproto
!= NULL
)
4684 zfree(dlif_proto_zone
, ifproto
);
4689 ifnet_attach_protocol_v2(ifnet_t ifp
, protocol_family_t protocol
,
4690 const struct ifnet_attach_proto_param_v2
*proto_details
)
4693 struct if_proto
*ifproto
= NULL
;
4695 ifnet_head_lock_shared();
4696 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
) {
4700 /* Check that the interface is in the global list */
4701 if (!ifnet_lookup(ifp
)) {
4706 ifproto
= zalloc(dlif_proto_zone
);
4707 if (ifproto
== NULL
) {
4711 bzero(ifproto
, sizeof(*ifproto
));
4713 /* refcnt held above during lookup */
4715 ifproto
->protocol_family
= protocol
;
4716 ifproto
->proto_kpi
= kProtoKPI_v2
;
4717 ifproto
->kpi
.v2
.input
= proto_details
->input
;
4718 ifproto
->kpi
.v2
.pre_output
= proto_details
->pre_output
;
4719 ifproto
->kpi
.v2
.event
= proto_details
->event
;
4720 ifproto
->kpi
.v2
.ioctl
= proto_details
->ioctl
;
4721 ifproto
->kpi
.v2
.detached
= proto_details
->detached
;
4722 ifproto
->kpi
.v2
.resolve_multi
= proto_details
->resolve
;
4723 ifproto
->kpi
.v2
.send_arp
= proto_details
->send_arp
;
4725 retval
= dlil_attach_protocol_internal(ifproto
,
4726 proto_details
->demux_list
, proto_details
->demux_count
);
4729 printf("%s: attached v2 protocol %d\n", if_name(ifp
),
4734 if (retval
!= 0 && retval
!= EEXIST
&& ifp
!= NULL
) {
4735 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
4736 if_name(ifp
), protocol
, retval
);
4739 if (retval
!= 0 && ifproto
!= NULL
)
4740 zfree(dlif_proto_zone
, ifproto
);
4745 ifnet_detach_protocol(ifnet_t ifp
, protocol_family_t proto_family
)
4747 struct if_proto
*proto
= NULL
;
4750 if (ifp
== NULL
|| proto_family
== 0) {
4755 ifnet_lock_exclusive(ifp
);
4756 /* callee holds a proto refcnt upon success */
4757 proto
= find_attached_proto(ifp
, proto_family
);
4758 if (proto
== NULL
) {
4760 ifnet_lock_done(ifp
);
4764 /* call family module del_proto */
4765 if (ifp
->if_del_proto
)
4766 ifp
->if_del_proto(ifp
, proto
->protocol_family
);
4768 SLIST_REMOVE(&ifp
->if_proto_hash
[proto_hash_value(proto_family
)],
4769 proto
, if_proto
, next_hash
);
4771 if (proto
->proto_kpi
== kProtoKPI_v1
) {
4772 proto
->kpi
.v1
.input
= ifproto_media_input_v1
;
4773 proto
->kpi
.v1
.pre_output
= ifproto_media_preout
;
4774 proto
->kpi
.v1
.event
= ifproto_media_event
;
4775 proto
->kpi
.v1
.ioctl
= ifproto_media_ioctl
;
4776 proto
->kpi
.v1
.resolve_multi
= ifproto_media_resolve_multi
;
4777 proto
->kpi
.v1
.send_arp
= ifproto_media_send_arp
;
4779 proto
->kpi
.v2
.input
= ifproto_media_input_v2
;
4780 proto
->kpi
.v2
.pre_output
= ifproto_media_preout
;
4781 proto
->kpi
.v2
.event
= ifproto_media_event
;
4782 proto
->kpi
.v2
.ioctl
= ifproto_media_ioctl
;
4783 proto
->kpi
.v2
.resolve_multi
= ifproto_media_resolve_multi
;
4784 proto
->kpi
.v2
.send_arp
= ifproto_media_send_arp
;
4786 proto
->detached
= 1;
4787 ifnet_lock_done(ifp
);
4790 printf("%s: detached %s protocol %d\n", if_name(ifp
),
4791 (proto
->proto_kpi
== kProtoKPI_v1
) ?
4792 "v1" : "v2", proto_family
);
4795 /* release proto refcnt held during protocol attach */
4796 if_proto_free(proto
);
4799 * Release proto refcnt held during lookup; the rest of
4800 * protocol detach steps will happen when the last proto
4801 * reference is released.
4803 if_proto_free(proto
);
4811 ifproto_media_input_v1(struct ifnet
*ifp
, protocol_family_t protocol
,
4812 struct mbuf
*packet
, char *header
)
4814 #pragma unused(ifp, protocol, packet, header)
4819 ifproto_media_input_v2(struct ifnet
*ifp
, protocol_family_t protocol
,
4820 struct mbuf
*packet
)
4822 #pragma unused(ifp, protocol, packet)
4828 ifproto_media_preout(struct ifnet
*ifp
, protocol_family_t protocol
,
4829 mbuf_t
*packet
, const struct sockaddr
*dest
, void *route
, char *frame_type
,
4830 char *link_layer_dest
)
4832 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
4838 ifproto_media_event(struct ifnet
*ifp
, protocol_family_t protocol
,
4839 const struct kev_msg
*event
)
4841 #pragma unused(ifp, protocol, event)
4845 ifproto_media_ioctl(struct ifnet
*ifp
, protocol_family_t protocol
,
4846 unsigned long command
, void *argument
)
4848 #pragma unused(ifp, protocol, command, argument)
4853 ifproto_media_resolve_multi(ifnet_t ifp
, const struct sockaddr
*proto_addr
,
4854 struct sockaddr_dl
*out_ll
, size_t ll_len
)
4856 #pragma unused(ifp, proto_addr, out_ll, ll_len)
4861 ifproto_media_send_arp(struct ifnet
*ifp
, u_short arpop
,
4862 const struct sockaddr_dl
*sender_hw
, const struct sockaddr
*sender_proto
,
4863 const struct sockaddr_dl
*target_hw
, const struct sockaddr
*target_proto
)
4865 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
4869 extern int if_next_index(void);
4872 ifnet_attach(ifnet_t ifp
, const struct sockaddr_dl
*ll_addr
)
4874 struct ifnet
*tmp_if
;
4876 struct if_data_internal if_data_saved
;
4877 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
4878 struct dlil_threading_info
*dl_inp
;
4879 u_int32_t sflags
= 0;
4886 * Serialize ifnet attach using dlil_ifnet_lock, in order to
4887 * prevent the interface from being configured while it is
4888 * embryonic, as ifnet_head_lock is dropped and reacquired
4889 * below prior to marking the ifnet with IFRF_ATTACHED.
4892 ifnet_head_lock_exclusive();
4893 /* Verify we aren't already on the list */
4894 TAILQ_FOREACH(tmp_if
, &ifnet_head
, if_link
) {
4895 if (tmp_if
== ifp
) {
4902 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
4903 if (ifp
->if_refflags
& IFRF_ATTACHED
) {
4904 panic_plain("%s: flags mismatch (attached set) ifp=%p",
4908 lck_mtx_unlock(&ifp
->if_ref_lock
);
4910 ifnet_lock_exclusive(ifp
);
4913 VERIFY(ifp
->if_detaching_link
.tqe_next
== NULL
);
4914 VERIFY(ifp
->if_detaching_link
.tqe_prev
== NULL
);
4916 if (ll_addr
!= NULL
) {
4917 if (ifp
->if_addrlen
== 0) {
4918 ifp
->if_addrlen
= ll_addr
->sdl_alen
;
4919 } else if (ll_addr
->sdl_alen
!= ifp
->if_addrlen
) {
4920 ifnet_lock_done(ifp
);
4928 * Allow interfaces without protocol families to attach
4929 * only if they have the necessary fields filled out.
4931 if (ifp
->if_add_proto
== NULL
|| ifp
->if_del_proto
== NULL
) {
4932 DLIL_PRINTF("%s: Attempt to attach interface without "
4933 "family module - %d\n", __func__
, ifp
->if_family
);
4934 ifnet_lock_done(ifp
);
4940 /* Allocate protocol hash table */
4941 VERIFY(ifp
->if_proto_hash
== NULL
);
4942 ifp
->if_proto_hash
= zalloc(dlif_phash_zone
);
4943 if (ifp
->if_proto_hash
== NULL
) {
4944 ifnet_lock_done(ifp
);
4949 bzero(ifp
->if_proto_hash
, dlif_phash_size
);
4951 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4952 VERIFY(TAILQ_EMPTY(&ifp
->if_flt_head
));
4953 TAILQ_INIT(&ifp
->if_flt_head
);
4954 VERIFY(ifp
->if_flt_busy
== 0);
4955 VERIFY(ifp
->if_flt_waiters
== 0);
4956 lck_mtx_unlock(&ifp
->if_flt_lock
);
4958 VERIFY(TAILQ_EMPTY(&ifp
->if_prefixhead
));
4959 TAILQ_INIT(&ifp
->if_prefixhead
);
4961 if (!(dl_if
->dl_if_flags
& DLIF_REUSE
)) {
4962 VERIFY(LIST_EMPTY(&ifp
->if_multiaddrs
));
4963 LIST_INIT(&ifp
->if_multiaddrs
);
4966 VERIFY(ifp
->if_allhostsinm
== NULL
);
4967 VERIFY(TAILQ_EMPTY(&ifp
->if_addrhead
));
4968 TAILQ_INIT(&ifp
->if_addrhead
);
4970 if (ifp
->if_index
== 0) {
4971 int idx
= if_next_index();
4975 ifnet_lock_done(ifp
);
4980 ifp
->if_index
= idx
;
4982 /* There should not be anything occupying this slot */
4983 VERIFY(ifindex2ifnet
[ifp
->if_index
] == NULL
);
4985 /* allocate (if needed) and initialize a link address */
4986 VERIFY(!(dl_if
->dl_if_flags
& DLIF_REUSE
) || ifp
->if_lladdr
!= NULL
);
4987 ifa
= dlil_alloc_lladdr(ifp
, ll_addr
);
4989 ifnet_lock_done(ifp
);
4995 VERIFY(ifnet_addrs
[ifp
->if_index
- 1] == NULL
);
4996 ifnet_addrs
[ifp
->if_index
- 1] = ifa
;
4998 /* make this address the first on the list */
5000 /* hold a reference for ifnet_addrs[] */
5001 IFA_ADDREF_LOCKED(ifa
);
5002 /* if_attach_link_ifa() holds a reference for ifa_link */
5003 if_attach_link_ifa(ifp
, ifa
);
5007 mac_ifnet_label_associate(ifp
);
5010 TAILQ_INSERT_TAIL(&ifnet_head
, ifp
, if_link
);
5011 ifindex2ifnet
[ifp
->if_index
] = ifp
;
5013 /* Hold a reference to the underlying dlil_ifnet */
5014 ifnet_reference(ifp
);
5016 /* Clear stats (save and restore other fields that we care) */
5017 if_data_saved
= ifp
->if_data
;
5018 bzero(&ifp
->if_data
, sizeof (ifp
->if_data
));
5019 ifp
->if_data
.ifi_type
= if_data_saved
.ifi_type
;
5020 ifp
->if_data
.ifi_typelen
= if_data_saved
.ifi_typelen
;
5021 ifp
->if_data
.ifi_physical
= if_data_saved
.ifi_physical
;
5022 ifp
->if_data
.ifi_addrlen
= if_data_saved
.ifi_addrlen
;
5023 ifp
->if_data
.ifi_hdrlen
= if_data_saved
.ifi_hdrlen
;
5024 ifp
->if_data
.ifi_mtu
= if_data_saved
.ifi_mtu
;
5025 ifp
->if_data
.ifi_baudrate
= if_data_saved
.ifi_baudrate
;
5026 ifp
->if_data
.ifi_hwassist
= if_data_saved
.ifi_hwassist
;
5027 ifp
->if_data
.ifi_tso_v4_mtu
= if_data_saved
.ifi_tso_v4_mtu
;
5028 ifp
->if_data
.ifi_tso_v6_mtu
= if_data_saved
.ifi_tso_v6_mtu
;
5029 ifnet_touch_lastchange(ifp
);
5031 VERIFY(ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_NORMAL
||
5032 ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_DRIVER_MANAGED
);
5034 /* By default, use SFB and enable flow advisory */
5035 sflags
= PKTSCHEDF_QALG_SFB
;
5037 sflags
|= PKTSCHEDF_QALG_FLOWCTL
;
5039 if (if_delaybased_queue
)
5040 sflags
|= PKTSCHEDF_QALG_DELAYBASED
;
5042 /* Initialize transmit queue(s) */
5043 err
= ifclassq_setup(ifp
, sflags
, (dl_if
->dl_if_flags
& DLIF_REUSE
));
5045 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
5046 "err=%d", __func__
, ifp
, err
);
5050 /* Sanity checks on the input thread storage */
5051 dl_inp
= &dl_if
->dl_if_inpstorage
;
5052 bzero(&dl_inp
->stats
, sizeof (dl_inp
->stats
));
5053 VERIFY(dl_inp
->input_waiting
== 0);
5054 VERIFY(dl_inp
->wtot
== 0);
5055 VERIFY(dl_inp
->ifp
== NULL
);
5056 VERIFY(qhead(&dl_inp
->rcvq_pkts
) == NULL
&& qempty(&dl_inp
->rcvq_pkts
));
5057 VERIFY(qlimit(&dl_inp
->rcvq_pkts
) == 0);
5058 VERIFY(!dl_inp
->net_affinity
);
5059 VERIFY(ifp
->if_inp
== NULL
);
5060 VERIFY(dl_inp
->input_thr
== THREAD_NULL
);
5061 VERIFY(dl_inp
->wloop_thr
== THREAD_NULL
);
5062 VERIFY(dl_inp
->poll_thr
== THREAD_NULL
);
5063 VERIFY(dl_inp
->tag
== 0);
5064 VERIFY(dl_inp
->mode
== IFNET_MODEL_INPUT_POLL_OFF
);
5065 bzero(&dl_inp
->tstats
, sizeof (dl_inp
->tstats
));
5066 bzero(&dl_inp
->pstats
, sizeof (dl_inp
->pstats
));
5067 bzero(&dl_inp
->sstats
, sizeof (dl_inp
->sstats
));
5068 #if IFNET_INPUT_SANITY_CHK
5069 VERIFY(dl_inp
->input_mbuf_cnt
== 0);
5070 #endif /* IFNET_INPUT_SANITY_CHK */
5073 * A specific DLIL input thread is created per Ethernet/cellular
5074 * interface or for an interface which supports opportunistic
5075 * input polling. Pseudo interfaces or other types of interfaces
5076 * use the main input thread instead.
5078 if ((net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) ||
5079 ifp
->if_type
== IFT_ETHER
|| ifp
->if_type
== IFT_CELLULAR
) {
5080 ifp
->if_inp
= dl_inp
;
5081 err
= dlil_create_input_thread(ifp
, ifp
->if_inp
);
5083 panic_plain("%s: ifp=%p couldn't get an input thread; "
5084 "err=%d", __func__
, ifp
, err
);
5090 * If the driver supports the new transmit model, calculate flow hash
5091 * and create a workloop starter thread to invoke the if_start callback
5092 * where the packets may be dequeued and transmitted.
5094 if (ifp
->if_eflags
& IFEF_TXSTART
) {
5095 ifp
->if_flowhash
= ifnet_calc_flowhash(ifp
);
5096 VERIFY(ifp
->if_flowhash
!= 0);
5098 VERIFY(ifp
->if_start
!= NULL
);
5099 VERIFY(ifp
->if_start_thread
== THREAD_NULL
);
5101 ifnet_set_start_cycle(ifp
, NULL
);
5102 ifp
->if_start_active
= 0;
5103 ifp
->if_start_req
= 0;
5104 ifp
->if_start_flags
= 0;
5105 if ((err
= kernel_thread_start(ifnet_start_thread_fn
, ifp
,
5106 &ifp
->if_start_thread
)) != KERN_SUCCESS
) {
5107 panic_plain("%s: ifp=%p couldn't get a start thread; "
5108 "err=%d", __func__
, ifp
, err
);
5111 ml_thread_policy(ifp
->if_start_thread
, MACHINE_GROUP
,
5112 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_WORKLOOP
));
5114 ifp
->if_flowhash
= 0;
5118 * If the driver supports the new receive model, create a poller
5119 * thread to invoke if_input_poll callback where the packets may
5120 * be dequeued from the driver and processed for reception.
5122 if (ifp
->if_eflags
& IFEF_RXPOLL
) {
5123 VERIFY(ifp
->if_input_poll
!= NULL
);
5124 VERIFY(ifp
->if_input_ctl
!= NULL
);
5125 VERIFY(ifp
->if_poll_thread
== THREAD_NULL
);
5127 ifnet_set_poll_cycle(ifp
, NULL
);
5128 ifp
->if_poll_update
= 0;
5129 ifp
->if_poll_active
= 0;
5130 ifp
->if_poll_req
= 0;
5131 if ((err
= kernel_thread_start(ifnet_poll_thread_fn
, ifp
,
5132 &ifp
->if_poll_thread
)) != KERN_SUCCESS
) {
5133 panic_plain("%s: ifp=%p couldn't get a poll thread; "
5134 "err=%d", __func__
, ifp
, err
);
5137 ml_thread_policy(ifp
->if_poll_thread
, MACHINE_GROUP
,
5138 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_WORKLOOP
));
5141 VERIFY(ifp
->if_desc
.ifd_maxlen
== IF_DESCSIZE
);
5142 VERIFY(ifp
->if_desc
.ifd_len
== 0);
5143 VERIFY(ifp
->if_desc
.ifd_desc
!= NULL
);
5145 /* Record attach PC stacktrace */
5146 ctrace_record(&((struct dlil_ifnet
*)ifp
)->dl_if_attach
);
5148 ifp
->if_updatemcasts
= 0;
5149 if (!LIST_EMPTY(&ifp
->if_multiaddrs
)) {
5150 struct ifmultiaddr
*ifma
;
5151 LIST_FOREACH(ifma
, &ifp
->if_multiaddrs
, ifma_link
) {
5153 if (ifma
->ifma_addr
->sa_family
== AF_LINK
||
5154 ifma
->ifma_addr
->sa_family
== AF_UNSPEC
)
5155 ifp
->if_updatemcasts
++;
5159 printf("%s: attached with %d suspended link-layer multicast "
5160 "membership(s)\n", if_name(ifp
),
5161 ifp
->if_updatemcasts
);
5164 /* Clear logging parameters */
5165 bzero(&ifp
->if_log
, sizeof (ifp
->if_log
));
5166 ifp
->if_fg_sendts
= 0;
5168 VERIFY(ifp
->if_delegated
.ifp
== NULL
);
5169 VERIFY(ifp
->if_delegated
.type
== 0);
5170 VERIFY(ifp
->if_delegated
.family
== 0);
5171 VERIFY(ifp
->if_delegated
.subfamily
== 0);
5172 VERIFY(ifp
->if_delegated
.expensive
== 0);
5174 ifnet_lock_done(ifp
);
5177 lck_mtx_lock(&ifp
->if_cached_route_lock
);
5178 /* Enable forwarding cached route */
5179 ifp
->if_fwd_cacheok
= 1;
5180 /* Clean up any existing cached routes */
5181 ROUTE_RELEASE(&ifp
->if_fwd_route
);
5182 bzero(&ifp
->if_fwd_route
, sizeof (ifp
->if_fwd_route
));
5183 ROUTE_RELEASE(&ifp
->if_src_route
);
5184 bzero(&ifp
->if_src_route
, sizeof (ifp
->if_src_route
));
5185 ROUTE_RELEASE(&ifp
->if_src_route6
);
5186 bzero(&ifp
->if_src_route6
, sizeof (ifp
->if_src_route6
));
5187 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
5189 ifnet_llreach_ifattach(ifp
, (dl_if
->dl_if_flags
& DLIF_REUSE
));
5192 * Allocate and attach IGMPv3/MLDv2 interface specific variables
5193 * and trees; do this before the ifnet is marked as attached.
5194 * The ifnet keeps the reference to the info structures even after
5195 * the ifnet is detached, since the network-layer records still
5196 * refer to the info structures even after that. This also
5197 * makes it possible for them to still function after the ifnet
5198 * is recycled or reattached.
5201 if (IGMP_IFINFO(ifp
) == NULL
) {
5202 IGMP_IFINFO(ifp
) = igmp_domifattach(ifp
, M_WAITOK
);
5203 VERIFY(IGMP_IFINFO(ifp
) != NULL
);
5205 VERIFY(IGMP_IFINFO(ifp
)->igi_ifp
== ifp
);
5206 igmp_domifreattach(IGMP_IFINFO(ifp
));
5210 if (MLD_IFINFO(ifp
) == NULL
) {
5211 MLD_IFINFO(ifp
) = mld_domifattach(ifp
, M_WAITOK
);
5212 VERIFY(MLD_IFINFO(ifp
) != NULL
);
5214 VERIFY(MLD_IFINFO(ifp
)->mli_ifp
== ifp
);
5215 mld_domifreattach(MLD_IFINFO(ifp
));
5219 VERIFY(ifp
->if_data_threshold
== 0);
5222 * Finally, mark this ifnet as attached.
5224 lck_mtx_lock(rnh_lock
);
5225 ifnet_lock_exclusive(ifp
);
5226 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
5227 ifp
->if_lqm
= (ifp
== lo_ifp
) ? IFNET_LQM_THRESH_GOOD
:
5228 IFNET_LQM_THRESH_UNKNOWN
;
5229 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5230 ifp
->if_refflags
= IFRF_ATTACHED
;
5231 lck_mtx_unlock(&ifp
->if_ref_lock
);
5233 /* boot-args override; enable idle notification */
5234 (void) ifnet_set_idle_flags_locked(ifp
, IFRF_IDLE_NOTIFY
,
5237 /* apply previous request(s) to set the idle flags, if any */
5238 (void) ifnet_set_idle_flags_locked(ifp
, ifp
->if_idle_new_flags
,
5239 ifp
->if_idle_new_flags_mask
);
5242 ifnet_lock_done(ifp
);
5243 lck_mtx_unlock(rnh_lock
);
5248 * Attach packet filter to this interface, if enabled.
5250 pf_ifnet_hook(ifp
, 1);
5253 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_ATTACHED
, NULL
, 0);
5256 printf("%s: attached%s\n", if_name(ifp
),
5257 (dl_if
->dl_if_flags
& DLIF_REUSE
) ? " (recycled)" : "");
5264 * Prepare the storage for the first/permanent link address, which must
5265 * must have the same lifetime as the ifnet itself. Although the link
5266 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
5267 * its location in memory must never change as it may still be referred
5268 * to by some parts of the system afterwards (unfortunate implementation
5269 * artifacts inherited from BSD.)
5271 * Caller must hold ifnet lock as writer.
5273 static struct ifaddr
*
5274 dlil_alloc_lladdr(struct ifnet
*ifp
, const struct sockaddr_dl
*ll_addr
)
5276 struct ifaddr
*ifa
, *oifa
;
5277 struct sockaddr_dl
*asdl
, *msdl
;
5278 char workbuf
[IFNAMSIZ
*2];
5279 int namelen
, masklen
, socksize
;
5280 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
5282 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_EXCLUSIVE
);
5283 VERIFY(ll_addr
== NULL
|| ll_addr
->sdl_alen
== ifp
->if_addrlen
);
5285 namelen
= snprintf(workbuf
, sizeof (workbuf
), "%s",
5287 masklen
= offsetof(struct sockaddr_dl
, sdl_data
[0]) + namelen
;
5288 socksize
= masklen
+ ifp
->if_addrlen
;
5289 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
5290 if ((u_int32_t
)socksize
< sizeof (struct sockaddr_dl
))
5291 socksize
= sizeof(struct sockaddr_dl
);
5292 socksize
= ROUNDUP(socksize
);
5295 ifa
= ifp
->if_lladdr
;
5296 if (socksize
> DLIL_SDLMAXLEN
||
5297 (ifa
!= NULL
&& ifa
!= &dl_if
->dl_if_lladdr
.ifa
)) {
5299 * Rare, but in the event that the link address requires
5300 * more storage space than DLIL_SDLMAXLEN, allocate the
5301 * largest possible storages for address and mask, such
5302 * that we can reuse the same space when if_addrlen grows.
5303 * This same space will be used when if_addrlen shrinks.
5305 if (ifa
== NULL
|| ifa
== &dl_if
->dl_if_lladdr
.ifa
) {
5306 int ifasize
= sizeof (*ifa
) + 2 * SOCK_MAXADDRLEN
;
5307 ifa
= _MALLOC(ifasize
, M_IFADDR
, M_WAITOK
| M_ZERO
);
5311 /* Don't set IFD_ALLOC, as this is permanent */
5312 ifa
->ifa_debug
= IFD_LINK
;
5315 /* address and mask sockaddr_dl locations */
5316 asdl
= (struct sockaddr_dl
*)(ifa
+ 1);
5317 bzero(asdl
, SOCK_MAXADDRLEN
);
5318 msdl
= (struct sockaddr_dl
*)(void *)
5319 ((char *)asdl
+ SOCK_MAXADDRLEN
);
5320 bzero(msdl
, SOCK_MAXADDRLEN
);
5322 VERIFY(ifa
== NULL
|| ifa
== &dl_if
->dl_if_lladdr
.ifa
);
5324 * Use the storage areas for address and mask within the
5325 * dlil_ifnet structure. This is the most common case.
5328 ifa
= &dl_if
->dl_if_lladdr
.ifa
;
5330 /* Don't set IFD_ALLOC, as this is permanent */
5331 ifa
->ifa_debug
= IFD_LINK
;
5334 /* address and mask sockaddr_dl locations */
5335 asdl
= (struct sockaddr_dl
*)(void *)&dl_if
->dl_if_lladdr
.asdl
;
5336 bzero(asdl
, sizeof (dl_if
->dl_if_lladdr
.asdl
));
5337 msdl
= (struct sockaddr_dl
*)(void *)&dl_if
->dl_if_lladdr
.msdl
;
5338 bzero(msdl
, sizeof (dl_if
->dl_if_lladdr
.msdl
));
5341 /* hold a permanent reference for the ifnet itself */
5342 IFA_ADDREF_LOCKED(ifa
);
5343 oifa
= ifp
->if_lladdr
;
5344 ifp
->if_lladdr
= ifa
;
5346 VERIFY(ifa
->ifa_debug
== IFD_LINK
);
5348 ifa
->ifa_rtrequest
= link_rtrequest
;
5349 ifa
->ifa_addr
= (struct sockaddr
*)asdl
;
5350 asdl
->sdl_len
= socksize
;
5351 asdl
->sdl_family
= AF_LINK
;
5352 bcopy(workbuf
, asdl
->sdl_data
, namelen
);
5353 asdl
->sdl_nlen
= namelen
;
5354 asdl
->sdl_index
= ifp
->if_index
;
5355 asdl
->sdl_type
= ifp
->if_type
;
5356 if (ll_addr
!= NULL
) {
5357 asdl
->sdl_alen
= ll_addr
->sdl_alen
;
5358 bcopy(CONST_LLADDR(ll_addr
), LLADDR(asdl
), asdl
->sdl_alen
);
5362 ifa
->ifa_netmask
= (struct sockaddr
*)msdl
;
5363 msdl
->sdl_len
= masklen
;
5364 while (namelen
!= 0)
5365 msdl
->sdl_data
[--namelen
] = 0xff;
5375 if_purgeaddrs(struct ifnet
*ifp
)
5381 in6_purgeaddrs(ifp
);
5386 ifnet_detach(ifnet_t ifp
)
5388 struct ifnet
*delegated_ifp
;
5393 lck_mtx_lock(rnh_lock
);
5394 ifnet_head_lock_exclusive();
5395 ifnet_lock_exclusive(ifp
);
5398 * Check to see if this interface has previously triggered
5399 * aggressive protocol draining; if so, decrement the global
5400 * refcnt and clear PR_AGGDRAIN on the route domain if
5401 * there are no more of such an interface around.
5403 (void) ifnet_set_idle_flags_locked(ifp
, 0, ~0);
5405 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5406 if (!(ifp
->if_refflags
& IFRF_ATTACHED
)) {
5407 lck_mtx_unlock(&ifp
->if_ref_lock
);
5408 ifnet_lock_done(ifp
);
5410 lck_mtx_unlock(rnh_lock
);
5412 } else if (ifp
->if_refflags
& IFRF_DETACHING
) {
5413 /* Interface has already been detached */
5414 lck_mtx_unlock(&ifp
->if_ref_lock
);
5415 ifnet_lock_done(ifp
);
5417 lck_mtx_unlock(rnh_lock
);
5420 /* Indicate this interface is being detached */
5421 ifp
->if_refflags
&= ~IFRF_ATTACHED
;
5422 ifp
->if_refflags
|= IFRF_DETACHING
;
5423 lck_mtx_unlock(&ifp
->if_ref_lock
);
5426 printf("%s: detaching\n", if_name(ifp
));
5429 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
5430 * no longer be visible during lookups from this point.
5432 VERIFY(ifindex2ifnet
[ifp
->if_index
] == ifp
);
5433 TAILQ_REMOVE(&ifnet_head
, ifp
, if_link
);
5434 ifp
->if_link
.tqe_next
= NULL
;
5435 ifp
->if_link
.tqe_prev
= NULL
;
5436 ifindex2ifnet
[ifp
->if_index
] = NULL
;
5438 /* Record detach PC stacktrace */
5439 ctrace_record(&((struct dlil_ifnet
*)ifp
)->dl_if_detach
);
5441 /* Clear logging parameters */
5442 bzero(&ifp
->if_log
, sizeof (ifp
->if_log
));
5444 /* Clear delegated interface info (reference released below) */
5445 delegated_ifp
= ifp
->if_delegated
.ifp
;
5446 bzero(&ifp
->if_delegated
, sizeof (ifp
->if_delegated
));
5448 ifnet_lock_done(ifp
);
5450 lck_mtx_unlock(rnh_lock
);
5452 /* Release reference held on the delegated interface */
5453 if (delegated_ifp
!= NULL
)
5454 ifnet_release(delegated_ifp
);
5456 /* Reset Link Quality Metric (unless loopback [lo0]) */
5458 if_lqm_update(ifp
, IFNET_LQM_THRESH_OFF
);
5460 /* Reset TCP local statistics */
5461 if (ifp
->if_tcp_stat
!= NULL
)
5462 bzero(ifp
->if_tcp_stat
, sizeof(*ifp
->if_tcp_stat
));
5464 /* Reset UDP local statistics */
5465 if (ifp
->if_udp_stat
!= NULL
)
5466 bzero(ifp
->if_udp_stat
, sizeof(*ifp
->if_udp_stat
));
5468 /* Let BPF know we're detaching */
5471 /* Mark the interface as DOWN */
5474 /* Disable forwarding cached route */
5475 lck_mtx_lock(&ifp
->if_cached_route_lock
);
5476 ifp
->if_fwd_cacheok
= 0;
5477 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
5479 ifp
->if_data_threshold
= 0;
5481 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
5482 * references to the info structures and leave them attached to
5486 igmp_domifdetach(ifp
);
5489 mld_domifdetach(ifp
);
5492 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHING
, NULL
, 0);
5494 /* Let worker thread take care of the rest, to avoid reentrancy */
5496 ifnet_detaching_enqueue(ifp
);
5503 ifnet_detaching_enqueue(struct ifnet
*ifp
)
5505 dlil_if_lock_assert();
5507 ++ifnet_detaching_cnt
;
5508 VERIFY(ifnet_detaching_cnt
!= 0);
5509 TAILQ_INSERT_TAIL(&ifnet_detaching_head
, ifp
, if_detaching_link
);
5510 wakeup((caddr_t
)&ifnet_delayed_run
);
5513 static struct ifnet
*
5514 ifnet_detaching_dequeue(void)
5518 dlil_if_lock_assert();
5520 ifp
= TAILQ_FIRST(&ifnet_detaching_head
);
5521 VERIFY(ifnet_detaching_cnt
!= 0 || ifp
== NULL
);
5523 VERIFY(ifnet_detaching_cnt
!= 0);
5524 --ifnet_detaching_cnt
;
5525 TAILQ_REMOVE(&ifnet_detaching_head
, ifp
, if_detaching_link
);
5526 ifp
->if_detaching_link
.tqe_next
= NULL
;
5527 ifp
->if_detaching_link
.tqe_prev
= NULL
;
5533 ifnet_detacher_thread_cont(int err
)
5539 dlil_if_lock_assert();
5540 while (ifnet_detaching_cnt
== 0) {
5541 (void) msleep0(&ifnet_delayed_run
, &dlil_ifnet_lock
,
5542 (PZERO
- 1), "ifnet_detacher_cont", 0,
5543 ifnet_detacher_thread_cont
);
5547 VERIFY(TAILQ_FIRST(&ifnet_detaching_head
) != NULL
);
5549 /* Take care of detaching ifnet */
5550 ifp
= ifnet_detaching_dequeue();
5553 ifnet_detach_final(ifp
);
5562 ifnet_detacher_thread_func(void *v
, wait_result_t w
)
5564 #pragma unused(v, w)
5566 (void) msleep0(&ifnet_delayed_run
, &dlil_ifnet_lock
,
5567 (PZERO
- 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont
);
5569 * msleep0() shouldn't have returned as PCATCH was not set;
5570 * therefore assert in this case.
5577 ifnet_detach_final(struct ifnet
*ifp
)
5579 struct ifnet_filter
*filter
, *filter_next
;
5580 struct ifnet_filter_head fhead
;
5581 struct dlil_threading_info
*inp
;
5583 ifnet_detached_func if_free
;
5586 lck_mtx_lock(&ifp
->if_ref_lock
);
5587 if (!(ifp
->if_refflags
& IFRF_DETACHING
)) {
5588 panic("%s: flags mismatch (detaching not set) ifp=%p",
5594 * Wait until the existing IO references get released
5595 * before we proceed with ifnet_detach. This is not a
5596 * common case, so block without using a continuation.
5598 while (ifp
->if_refio
> 0) {
5599 printf("%s: Waiting for IO references on %s interface "
5600 "to be released\n", __func__
, if_name(ifp
));
5601 (void) msleep(&(ifp
->if_refio
), &ifp
->if_ref_lock
,
5602 (PZERO
- 1), "ifnet_ioref_wait", NULL
);
5604 lck_mtx_unlock(&ifp
->if_ref_lock
);
5606 /* Drain and destroy send queue */
5607 ifclassq_teardown(ifp
);
5609 /* Detach interface filters */
5610 lck_mtx_lock(&ifp
->if_flt_lock
);
5611 if_flt_monitor_enter(ifp
);
5613 lck_mtx_assert(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
5614 fhead
= ifp
->if_flt_head
;
5615 TAILQ_INIT(&ifp
->if_flt_head
);
5617 for (filter
= TAILQ_FIRST(&fhead
); filter
; filter
= filter_next
) {
5618 filter_next
= TAILQ_NEXT(filter
, filt_next
);
5619 lck_mtx_unlock(&ifp
->if_flt_lock
);
5621 dlil_detach_filter_internal(filter
, 1);
5622 lck_mtx_lock(&ifp
->if_flt_lock
);
5624 if_flt_monitor_leave(ifp
);
5625 lck_mtx_unlock(&ifp
->if_flt_lock
);
5627 /* Tell upper layers to drop their network addresses */
5630 ifnet_lock_exclusive(ifp
);
5632 /* Uplumb all protocols */
5633 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
5634 struct if_proto
*proto
;
5636 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
5637 while (proto
!= NULL
) {
5638 protocol_family_t family
= proto
->protocol_family
;
5639 ifnet_lock_done(ifp
);
5640 proto_unplumb(family
, ifp
);
5641 ifnet_lock_exclusive(ifp
);
5642 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
5644 /* There should not be any protocols left */
5645 VERIFY(SLIST_EMPTY(&ifp
->if_proto_hash
[i
]));
5647 zfree(dlif_phash_zone
, ifp
->if_proto_hash
);
5648 ifp
->if_proto_hash
= NULL
;
5650 /* Detach (permanent) link address from if_addrhead */
5651 ifa
= TAILQ_FIRST(&ifp
->if_addrhead
);
5652 VERIFY(ifnet_addrs
[ifp
->if_index
- 1] == ifa
);
5654 if_detach_link_ifa(ifp
, ifa
);
5657 /* Remove (permanent) link address from ifnet_addrs[] */
5659 ifnet_addrs
[ifp
->if_index
- 1] = NULL
;
5661 /* This interface should not be on {ifnet_head,detaching} */
5662 VERIFY(ifp
->if_link
.tqe_next
== NULL
);
5663 VERIFY(ifp
->if_link
.tqe_prev
== NULL
);
5664 VERIFY(ifp
->if_detaching_link
.tqe_next
== NULL
);
5665 VERIFY(ifp
->if_detaching_link
.tqe_prev
== NULL
);
5667 /* Prefix list should be empty by now */
5668 VERIFY(TAILQ_EMPTY(&ifp
->if_prefixhead
));
5670 /* The slot should have been emptied */
5671 VERIFY(ifindex2ifnet
[ifp
->if_index
] == NULL
);
5673 /* There should not be any addresses left */
5674 VERIFY(TAILQ_EMPTY(&ifp
->if_addrhead
));
5677 * Signal the starter thread to terminate itself.
5679 if (ifp
->if_start_thread
!= THREAD_NULL
) {
5680 lck_mtx_lock_spin(&ifp
->if_start_lock
);
5681 ifp
->if_start_flags
= 0;
5682 ifp
->if_start_thread
= THREAD_NULL
;
5683 wakeup_one((caddr_t
)&ifp
->if_start_thread
);
5684 lck_mtx_unlock(&ifp
->if_start_lock
);
5688 * Signal the poller thread to terminate itself.
5690 if (ifp
->if_poll_thread
!= THREAD_NULL
) {
5691 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
5692 ifp
->if_poll_thread
= THREAD_NULL
;
5693 wakeup_one((caddr_t
)&ifp
->if_poll_thread
);
5694 lck_mtx_unlock(&ifp
->if_poll_lock
);
5698 * If thread affinity was set for the workloop thread, we will need
5699 * to tear down the affinity and release the extra reference count
5700 * taken at attach time. Does not apply to lo0 or other interfaces
5701 * without dedicated input threads.
5703 if ((inp
= ifp
->if_inp
) != NULL
) {
5704 VERIFY(inp
!= dlil_main_input_thread
);
5706 if (inp
->net_affinity
) {
5707 struct thread
*tp
, *wtp
, *ptp
;
5709 lck_mtx_lock_spin(&inp
->input_lck
);
5710 wtp
= inp
->wloop_thr
;
5711 inp
->wloop_thr
= THREAD_NULL
;
5712 ptp
= inp
->poll_thr
;
5713 inp
->poll_thr
= THREAD_NULL
;
5714 tp
= inp
->input_thr
; /* don't nullify now */
5716 inp
->net_affinity
= FALSE
;
5717 lck_mtx_unlock(&inp
->input_lck
);
5719 /* Tear down poll thread affinity */
5721 VERIFY(ifp
->if_eflags
& IFEF_RXPOLL
);
5722 (void) dlil_affinity_set(ptp
,
5723 THREAD_AFFINITY_TAG_NULL
);
5724 thread_deallocate(ptp
);
5727 /* Tear down workloop thread affinity */
5729 (void) dlil_affinity_set(wtp
,
5730 THREAD_AFFINITY_TAG_NULL
);
5731 thread_deallocate(wtp
);
5734 /* Tear down DLIL input thread affinity */
5735 (void) dlil_affinity_set(tp
, THREAD_AFFINITY_TAG_NULL
);
5736 thread_deallocate(tp
);
5739 /* disassociate ifp DLIL input thread */
5742 lck_mtx_lock_spin(&inp
->input_lck
);
5743 inp
->input_waiting
|= DLIL_INPUT_TERMINATE
;
5744 if (!(inp
->input_waiting
& DLIL_INPUT_RUNNING
)) {
5745 wakeup_one((caddr_t
)&inp
->input_waiting
);
5747 lck_mtx_unlock(&inp
->input_lck
);
5750 /* The driver might unload, so point these to ourselves */
5751 if_free
= ifp
->if_free
;
5752 ifp
->if_output
= ifp_if_output
;
5753 ifp
->if_pre_enqueue
= ifp_if_output
;
5754 ifp
->if_start
= ifp_if_start
;
5755 ifp
->if_output_ctl
= ifp_if_ctl
;
5756 ifp
->if_input_poll
= ifp_if_input_poll
;
5757 ifp
->if_input_ctl
= ifp_if_ctl
;
5758 ifp
->if_ioctl
= ifp_if_ioctl
;
5759 ifp
->if_set_bpf_tap
= ifp_if_set_bpf_tap
;
5760 ifp
->if_free
= ifp_if_free
;
5761 ifp
->if_demux
= ifp_if_demux
;
5762 ifp
->if_event
= ifp_if_event
;
5763 ifp
->if_framer_legacy
= ifp_if_framer
;
5764 ifp
->if_framer
= ifp_if_framer_extended
;
5765 ifp
->if_add_proto
= ifp_if_add_proto
;
5766 ifp
->if_del_proto
= ifp_if_del_proto
;
5767 ifp
->if_check_multi
= ifp_if_check_multi
;
5769 /* wipe out interface description */
5770 VERIFY(ifp
->if_desc
.ifd_maxlen
== IF_DESCSIZE
);
5771 ifp
->if_desc
.ifd_len
= 0;
5772 VERIFY(ifp
->if_desc
.ifd_desc
!= NULL
);
5773 bzero(ifp
->if_desc
.ifd_desc
, IF_DESCSIZE
);
5775 /* there shouldn't be any delegation by now */
5776 VERIFY(ifp
->if_delegated
.ifp
== NULL
);
5777 VERIFY(ifp
->if_delegated
.type
== 0);
5778 VERIFY(ifp
->if_delegated
.family
== 0);
5779 VERIFY(ifp
->if_delegated
.subfamily
== 0);
5780 VERIFY(ifp
->if_delegated
.expensive
== 0);
5782 ifnet_lock_done(ifp
);
5786 * Detach this interface from packet filter, if enabled.
5788 pf_ifnet_hook(ifp
, 0);
5791 /* Filter list should be empty */
5792 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
5793 VERIFY(TAILQ_EMPTY(&ifp
->if_flt_head
));
5794 VERIFY(ifp
->if_flt_busy
== 0);
5795 VERIFY(ifp
->if_flt_waiters
== 0);
5796 lck_mtx_unlock(&ifp
->if_flt_lock
);
5798 /* Last chance to drain send queue */
5801 /* Last chance to cleanup any cached route */
5802 lck_mtx_lock(&ifp
->if_cached_route_lock
);
5803 VERIFY(!ifp
->if_fwd_cacheok
);
5804 ROUTE_RELEASE(&ifp
->if_fwd_route
);
5805 bzero(&ifp
->if_fwd_route
, sizeof (ifp
->if_fwd_route
));
5806 ROUTE_RELEASE(&ifp
->if_src_route
);
5807 bzero(&ifp
->if_src_route
, sizeof (ifp
->if_src_route
));
5808 ROUTE_RELEASE(&ifp
->if_src_route6
);
5809 bzero(&ifp
->if_src_route6
, sizeof (ifp
->if_src_route6
));
5810 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
5812 VERIFY(ifp
->if_data_threshold
== 0);
5814 ifnet_llreach_ifdetach(ifp
);
5816 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHED
, NULL
, 0);
5818 if (if_free
!= NULL
)
5822 * Finally, mark this ifnet as detached.
5824 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5825 if (!(ifp
->if_refflags
& IFRF_DETACHING
)) {
5826 panic("%s: flags mismatch (detaching not set) ifp=%p",
5830 ifp
->if_refflags
&= ~IFRF_DETACHING
;
5831 lck_mtx_unlock(&ifp
->if_ref_lock
);
5834 printf("%s: detached\n", if_name(ifp
));
5836 /* Release reference held during ifnet attach */
5841 ifp_if_output(struct ifnet
*ifp
, struct mbuf
*m
)
5849 ifp_if_start(struct ifnet
*ifp
)
5855 ifp_if_input_poll(struct ifnet
*ifp
, u_int32_t flags
, u_int32_t max_cnt
,
5856 struct mbuf
**m_head
, struct mbuf
**m_tail
, u_int32_t
*cnt
, u_int32_t
*len
)
5858 #pragma unused(ifp, flags, max_cnt)
5870 ifp_if_ctl(struct ifnet
*ifp
, ifnet_ctl_cmd_t cmd
, u_int32_t arglen
, void *arg
)
5872 #pragma unused(ifp, cmd, arglen, arg)
5873 return (EOPNOTSUPP
);
5877 ifp_if_demux(struct ifnet
*ifp
, struct mbuf
*m
, char *fh
, protocol_family_t
*pf
)
5879 #pragma unused(ifp, fh, pf)
5881 return (EJUSTRETURN
);
5885 ifp_if_add_proto(struct ifnet
*ifp
, protocol_family_t pf
,
5886 const struct ifnet_demux_desc
*da
, u_int32_t dc
)
5888 #pragma unused(ifp, pf, da, dc)
5893 ifp_if_del_proto(struct ifnet
*ifp
, protocol_family_t pf
)
5895 #pragma unused(ifp, pf)
5900 ifp_if_check_multi(struct ifnet
*ifp
, const struct sockaddr
*sa
)
5902 #pragma unused(ifp, sa)
5903 return (EOPNOTSUPP
);
5907 ifp_if_framer(struct ifnet
*ifp
, struct mbuf
**m
,
5908 const struct sockaddr
*sa
, const char *ll
, const char *t
)
5910 #pragma unused(ifp, m, sa, ll, t)
5911 return (ifp_if_framer_extended(ifp
, m
, sa
, ll
, t
, NULL
, NULL
));
5915 ifp_if_framer_extended(struct ifnet
*ifp
, struct mbuf
**m
,
5916 const struct sockaddr
*sa
, const char *ll
, const char *t
,
5917 u_int32_t
*pre
, u_int32_t
*post
)
5919 #pragma unused(ifp, sa, ll, t)
5928 return (EJUSTRETURN
);
5932 ifp_if_ioctl(struct ifnet
*ifp
, unsigned long cmd
, void *arg
)
5934 #pragma unused(ifp, cmd, arg)
5935 return (EOPNOTSUPP
);
5939 ifp_if_set_bpf_tap(struct ifnet
*ifp
, bpf_tap_mode tm
, bpf_packet_func f
)
5941 #pragma unused(ifp, tm, f)
5942 /* XXX not sure what to do here */
5947 ifp_if_free(struct ifnet
*ifp
)
5953 ifp_if_event(struct ifnet
*ifp
, const struct kev_msg
*e
)
5955 #pragma unused(ifp, e)
5959 int dlil_if_acquire(u_int32_t family
, const void *uniqueid
,
5960 size_t uniqueid_len
, struct ifnet
**ifp
)
5962 struct ifnet
*ifp1
= NULL
;
5963 struct dlil_ifnet
*dlifp1
= NULL
;
5964 void *buf
, *base
, **pbuf
;
5968 TAILQ_FOREACH(dlifp1
, &dlil_ifnet_head
, dl_if_link
) {
5969 ifp1
= (struct ifnet
*)dlifp1
;
5971 if (ifp1
->if_family
!= family
)
5974 lck_mtx_lock(&dlifp1
->dl_if_lock
);
5975 /* same uniqueid and same len or no unique id specified */
5976 if ((uniqueid_len
== dlifp1
->dl_if_uniqueid_len
) &&
5977 !bcmp(uniqueid
, dlifp1
->dl_if_uniqueid
, uniqueid_len
)) {
5978 /* check for matching interface in use */
5979 if (dlifp1
->dl_if_flags
& DLIF_INUSE
) {
5982 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
5986 dlifp1
->dl_if_flags
|= (DLIF_INUSE
|DLIF_REUSE
);
5987 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
5992 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
5995 /* no interface found, allocate a new one */
5996 buf
= zalloc(dlif_zone
);
6001 bzero(buf
, dlif_bufsize
);
6003 /* Get the 64-bit aligned base address for this object */
6004 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof (u_int64_t
),
6005 sizeof (u_int64_t
));
6006 VERIFY(((intptr_t)base
+ dlif_size
) <= ((intptr_t)buf
+ dlif_bufsize
));
6009 * Wind back a pointer size from the aligned base and
6010 * save the original address so we can free it later.
6012 pbuf
= (void **)((intptr_t)base
- sizeof (void *));
6017 MALLOC(dlifp1
->dl_if_uniqueid
, void *, uniqueid_len
,
6019 if (dlifp1
->dl_if_uniqueid
== NULL
) {
6020 zfree(dlif_zone
, dlifp1
);
6024 bcopy(uniqueid
, dlifp1
->dl_if_uniqueid
, uniqueid_len
);
6025 dlifp1
->dl_if_uniqueid_len
= uniqueid_len
;
6028 ifp1
= (struct ifnet
*)dlifp1
;
6029 dlifp1
->dl_if_flags
= DLIF_INUSE
;
6031 dlifp1
->dl_if_flags
|= DLIF_DEBUG
;
6032 dlifp1
->dl_if_trace
= dlil_if_trace
;
6034 ifp1
->if_name
= dlifp1
->dl_if_namestorage
;
6035 ifp1
->if_xname
= dlifp1
->dl_if_xnamestorage
;
6037 /* initialize interface description */
6038 ifp1
->if_desc
.ifd_maxlen
= IF_DESCSIZE
;
6039 ifp1
->if_desc
.ifd_len
= 0;
6040 ifp1
->if_desc
.ifd_desc
= dlifp1
->dl_if_descstorage
;
6043 mac_ifnet_label_init(ifp1
);
6046 if ((ret
= dlil_alloc_local_stats(ifp1
)) != 0) {
6047 DLIL_PRINTF("%s: failed to allocate if local stats, "
6048 "error: %d\n", __func__
, ret
);
6049 /* This probably shouldn't be fatal */
6053 lck_mtx_init(&dlifp1
->dl_if_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6054 lck_rw_init(&ifp1
->if_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6055 lck_mtx_init(&ifp1
->if_ref_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6056 lck_mtx_init(&ifp1
->if_flt_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6057 lck_mtx_init(&ifp1
->if_addrconfig_lock
, ifnet_lock_group
,
6059 lck_rw_init(&ifp1
->if_llreach_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6061 lck_rw_init(&ifp1
->if_inet6data_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6062 ifp1
->if_inet6data
= NULL
;
6065 /* for send data paths */
6066 lck_mtx_init(&ifp1
->if_start_lock
, ifnet_snd_lock_group
,
6068 lck_mtx_init(&ifp1
->if_cached_route_lock
, ifnet_snd_lock_group
,
6070 lck_mtx_init(&ifp1
->if_snd
.ifcq_lock
, ifnet_snd_lock_group
,
6073 /* for receive data paths */
6074 lck_mtx_init(&ifp1
->if_poll_lock
, ifnet_rcv_lock_group
,
6077 TAILQ_INSERT_TAIL(&dlil_ifnet_head
, dlifp1
, dl_if_link
);
6084 VERIFY(dlifp1
== NULL
|| (IS_P2ALIGNED(dlifp1
, sizeof (u_int64_t
)) &&
6085 IS_P2ALIGNED(&ifp1
->if_data
, sizeof (u_int64_t
))));
6090 __private_extern__
void
6091 dlil_if_release(ifnet_t ifp
)
6093 struct dlil_ifnet
*dlifp
= (struct dlil_ifnet
*)ifp
;
6095 ifnet_lock_exclusive(ifp
);
6096 lck_mtx_lock(&dlifp
->dl_if_lock
);
6097 dlifp
->dl_if_flags
&= ~DLIF_INUSE
;
6098 strlcpy(dlifp
->dl_if_namestorage
, ifp
->if_name
, IFNAMSIZ
);
6099 ifp
->if_name
= dlifp
->dl_if_namestorage
;
6100 /* Reset external name (name + unit) */
6101 ifp
->if_xname
= dlifp
->dl_if_xnamestorage
;
6102 snprintf(__DECONST(char *, ifp
->if_xname
), IFXNAMSIZ
,
6103 "%s?", ifp
->if_name
);
6104 lck_mtx_unlock(&dlifp
->dl_if_lock
);
6107 * We can either recycle the MAC label here or in dlil_if_acquire().
6108 * It seems logical to do it here but this means that anything that
6109 * still has a handle on ifp will now see it as unlabeled.
6110 * Since the interface is "dead" that may be OK. Revisit later.
6112 mac_ifnet_label_recycle(ifp
);
6114 ifnet_lock_done(ifp
);
6117 __private_extern__
void
6120 lck_mtx_lock(&dlil_ifnet_lock
);
6123 __private_extern__
void
6124 dlil_if_unlock(void)
6126 lck_mtx_unlock(&dlil_ifnet_lock
);
6129 __private_extern__
void
6130 dlil_if_lock_assert(void)
6132 lck_mtx_assert(&dlil_ifnet_lock
, LCK_MTX_ASSERT_OWNED
);
6135 __private_extern__
void
6136 dlil_proto_unplumb_all(struct ifnet
*ifp
)
6139 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
6140 * each bucket contains exactly one entry; PF_VLAN does not need an
6143 * if_proto_hash[3] is for other protocols; we expect anything
6144 * in this bucket to respond to the DETACHING event (which would
6145 * have happened by now) and do the unplumb then.
6147 (void) proto_unplumb(PF_INET
, ifp
);
6149 (void) proto_unplumb(PF_INET6
, ifp
);
6154 ifp_src_route_copyout(struct ifnet
*ifp
, struct route
*dst
)
6156 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
6157 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
6159 route_copyout(dst
, &ifp
->if_src_route
, sizeof (*dst
));
6161 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6165 ifp_src_route_copyin(struct ifnet
*ifp
, struct route
*src
)
6167 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
6168 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
6170 if (ifp
->if_fwd_cacheok
) {
6171 route_copyin(src
, &ifp
->if_src_route
, sizeof (*src
));
6175 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6180 ifp_src_route6_copyout(struct ifnet
*ifp
, struct route_in6
*dst
)
6182 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
6183 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
6185 route_copyout((struct route
*)dst
, (struct route
*)&ifp
->if_src_route6
,
6188 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6192 ifp_src_route6_copyin(struct ifnet
*ifp
, struct route_in6
*src
)
6194 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
6195 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
6197 if (ifp
->if_fwd_cacheok
) {
6198 route_copyin((struct route
*)src
,
6199 (struct route
*)&ifp
->if_src_route6
, sizeof (*src
));
6203 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6208 ifnet_cached_rtlookup_inet(struct ifnet
*ifp
, struct in_addr src_ip
)
6210 struct route src_rt
;
6211 struct sockaddr_in
*dst
;
6213 dst
= (struct sockaddr_in
*)(void *)(&src_rt
.ro_dst
);
6215 ifp_src_route_copyout(ifp
, &src_rt
);
6217 if (ROUTE_UNUSABLE(&src_rt
) || src_ip
.s_addr
!= dst
->sin_addr
.s_addr
) {
6218 ROUTE_RELEASE(&src_rt
);
6219 if (dst
->sin_family
!= AF_INET
) {
6220 bzero(&src_rt
.ro_dst
, sizeof (src_rt
.ro_dst
));
6221 dst
->sin_len
= sizeof (src_rt
.ro_dst
);
6222 dst
->sin_family
= AF_INET
;
6224 dst
->sin_addr
= src_ip
;
6226 if (src_rt
.ro_rt
== NULL
) {
6227 src_rt
.ro_rt
= rtalloc1_scoped((struct sockaddr
*)dst
,
6228 0, 0, ifp
->if_index
);
6230 if (src_rt
.ro_rt
!= NULL
) {
6231 /* retain a ref, copyin consumes one */
6232 struct rtentry
*rte
= src_rt
.ro_rt
;
6234 ifp_src_route_copyin(ifp
, &src_rt
);
6240 return (src_rt
.ro_rt
);
6245 ifnet_cached_rtlookup_inet6(struct ifnet
*ifp
, struct in6_addr
*src_ip6
)
6247 struct route_in6 src_rt
;
6249 ifp_src_route6_copyout(ifp
, &src_rt
);
6251 if (ROUTE_UNUSABLE(&src_rt
) ||
6252 !IN6_ARE_ADDR_EQUAL(src_ip6
, &src_rt
.ro_dst
.sin6_addr
)) {
6253 ROUTE_RELEASE(&src_rt
);
6254 if (src_rt
.ro_dst
.sin6_family
!= AF_INET6
) {
6255 bzero(&src_rt
.ro_dst
, sizeof (src_rt
.ro_dst
));
6256 src_rt
.ro_dst
.sin6_len
= sizeof (src_rt
.ro_dst
);
6257 src_rt
.ro_dst
.sin6_family
= AF_INET6
;
6259 src_rt
.ro_dst
.sin6_scope_id
= in6_addr2scopeid(ifp
, src_ip6
);
6260 bcopy(src_ip6
, &src_rt
.ro_dst
.sin6_addr
,
6261 sizeof (src_rt
.ro_dst
.sin6_addr
));
6263 if (src_rt
.ro_rt
== NULL
) {
6264 src_rt
.ro_rt
= rtalloc1_scoped(
6265 (struct sockaddr
*)&src_rt
.ro_dst
, 0, 0,
6268 if (src_rt
.ro_rt
!= NULL
) {
6269 /* retain a ref, copyin consumes one */
6270 struct rtentry
*rte
= src_rt
.ro_rt
;
6272 ifp_src_route6_copyin(ifp
, &src_rt
);
6278 return (src_rt
.ro_rt
);
6283 if_lqm_update(struct ifnet
*ifp
, int lqm
)
6285 struct kev_dl_link_quality_metric_data ev_lqm_data
;
6287 VERIFY(lqm
>= IFNET_LQM_MIN
&& lqm
<= IFNET_LQM_MAX
);
6289 /* Normalize to edge */
6290 if (lqm
> IFNET_LQM_THRESH_UNKNOWN
&& lqm
<= IFNET_LQM_THRESH_BAD
)
6291 lqm
= IFNET_LQM_THRESH_BAD
;
6292 else if (lqm
> IFNET_LQM_THRESH_BAD
&& lqm
<= IFNET_LQM_THRESH_POOR
)
6293 lqm
= IFNET_LQM_THRESH_POOR
;
6294 else if (lqm
> IFNET_LQM_THRESH_POOR
&& lqm
<= IFNET_LQM_THRESH_GOOD
)
6295 lqm
= IFNET_LQM_THRESH_GOOD
;
6297 ifnet_lock_exclusive(ifp
);
6298 if (lqm
== ifp
->if_lqm
) {
6299 ifnet_lock_done(ifp
);
6300 return; /* nothing to update */
6303 ifnet_lock_done(ifp
);
6305 bzero(&ev_lqm_data
, sizeof (ev_lqm_data
));
6306 ev_lqm_data
.link_quality_metric
= lqm
;
6308 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_LINK_QUALITY_METRIC_CHANGED
,
6309 (struct net_event_data
*)&ev_lqm_data
, sizeof (ev_lqm_data
));
6314 uuid_get_ethernet(u_int8_t
*node
)
6317 struct sockaddr_dl
*sdl
;
6319 ifnet_head_lock_shared();
6320 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
6321 ifnet_lock_shared(ifp
);
6322 IFA_LOCK_SPIN(ifp
->if_lladdr
);
6323 sdl
= (struct sockaddr_dl
*)(void *)ifp
->if_lladdr
->ifa_addr
;
6324 if (sdl
->sdl_type
== IFT_ETHER
) {
6325 memcpy(node
, LLADDR(sdl
), ETHER_ADDR_LEN
);
6326 IFA_UNLOCK(ifp
->if_lladdr
);
6327 ifnet_lock_done(ifp
);
6331 IFA_UNLOCK(ifp
->if_lladdr
);
6332 ifnet_lock_done(ifp
);
6340 sysctl_rxpoll SYSCTL_HANDLER_ARGS
6342 #pragma unused(arg1, arg2)
6348 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
6349 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
6352 if (net_rxpoll
== 0)
6360 sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
6362 #pragma unused(arg1, arg2)
6366 q
= if_rxpoll_mode_holdtime
;
6368 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
6369 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
6372 if (q
< IF_RXPOLL_MODE_HOLDTIME_MIN
)
6373 q
= IF_RXPOLL_MODE_HOLDTIME_MIN
;
6375 if_rxpoll_mode_holdtime
= q
;
6381 sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
6383 #pragma unused(arg1, arg2)
6387 q
= if_rxpoll_sample_holdtime
;
6389 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
6390 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
6393 if (q
< IF_RXPOLL_SAMPLETIME_MIN
)
6394 q
= IF_RXPOLL_SAMPLETIME_MIN
;
6396 if_rxpoll_sample_holdtime
= q
;
6402 sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
6404 #pragma unused(arg1, arg2)
6408 q
= if_rxpoll_interval_time
;
6410 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
6411 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
6414 if (q
< IF_RXPOLL_INTERVALTIME_MIN
)
6415 q
= IF_RXPOLL_INTERVALTIME_MIN
;
6417 if_rxpoll_interval_time
= q
;
6423 sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
6425 #pragma unused(arg1, arg2)
6429 i
= if_rxpoll_wlowat
;
6431 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
6432 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
6435 if (i
== 0 || i
>= if_rxpoll_whiwat
)
6438 if_rxpoll_wlowat
= i
;
6443 sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
6445 #pragma unused(arg1, arg2)
6449 i
= if_rxpoll_whiwat
;
6451 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
6452 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
6455 if (i
<= if_rxpoll_wlowat
)
6458 if_rxpoll_whiwat
= i
;
6463 sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
6465 #pragma unused(arg1, arg2)
6470 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
6471 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
6474 if (i
< IF_SNDQ_MINLEN
)
6482 sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
6484 #pragma unused(arg1, arg2)
6489 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
6490 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
6493 if (i
< IF_RCVQ_MINLEN
)
6501 dlil_node_present(struct ifnet
*ifp
, struct sockaddr
*sa
,
6502 int32_t rssi
, int lqm
, int npm
, u_int8_t srvinfo
[48])
6504 struct kev_dl_node_presence kev
;
6505 struct sockaddr_dl
*sdl
;
6506 struct sockaddr_in6
*sin6
;
6510 VERIFY(sa
->sa_family
== AF_LINK
|| sa
->sa_family
== AF_INET6
);
6512 bzero(&kev
, sizeof (kev
));
6513 sin6
= &kev
.sin6_node_address
;
6514 sdl
= &kev
.sdl_node_address
;
6515 nd6_alt_node_addr_decompose(ifp
, sa
, sdl
, sin6
);
6517 kev
.link_quality_metric
= lqm
;
6518 kev
.node_proximity_metric
= npm
;
6519 bcopy(srvinfo
, kev
.node_service_info
, sizeof (kev
.node_service_info
));
6521 nd6_alt_node_present(ifp
, sin6
, sdl
, rssi
, lqm
, npm
);
6522 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_PRESENCE
,
6523 &kev
.link_data
, sizeof (kev
));
6527 dlil_node_absent(struct ifnet
*ifp
, struct sockaddr
*sa
)
6529 struct kev_dl_node_absence kev
;
6530 struct sockaddr_in6
*sin6
;
6531 struct sockaddr_dl
*sdl
;
6535 VERIFY(sa
->sa_family
== AF_LINK
|| sa
->sa_family
== AF_INET6
);
6537 bzero(&kev
, sizeof (kev
));
6538 sin6
= &kev
.sin6_node_address
;
6539 sdl
= &kev
.sdl_node_address
;
6540 nd6_alt_node_addr_decompose(ifp
, sa
, sdl
, sin6
);
6542 nd6_alt_node_absent(ifp
, sin6
);
6543 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_ABSENCE
,
6544 &kev
.link_data
, sizeof (kev
));
6548 dlil_ifaddr_bytes(const struct sockaddr_dl
*sdl
, size_t *sizep
,
6549 kauth_cred_t
*credp
)
6551 const u_int8_t
*bytes
;
6554 bytes
= CONST_LLADDR(sdl
);
6555 size
= sdl
->sdl_alen
;
6558 if (dlil_lladdr_ckreq
) {
6559 switch (sdl
->sdl_type
) {
6568 if (credp
&& mac_system_check_info(*credp
, "net.link.addr")) {
6569 static const u_int8_t unspec
[FIREWIRE_EUI64_LEN
] = {
6573 switch (sdl
->sdl_type
) {
6575 VERIFY(size
== ETHER_ADDR_LEN
);
6579 VERIFY(size
== FIREWIRE_EUI64_LEN
);
6589 #pragma unused(credp)
6592 if (sizep
!= NULL
) *sizep
= size
;
6597 dlil_report_issues(struct ifnet
*ifp
, u_int8_t modid
[DLIL_MODIDLEN
],
6598 u_int8_t info
[DLIL_MODARGLEN
])
6600 struct kev_dl_issues kev
;
6603 VERIFY(ifp
!= NULL
);
6604 VERIFY(modid
!= NULL
);
6605 _CASSERT(sizeof (kev
.modid
) == DLIL_MODIDLEN
);
6606 _CASSERT(sizeof (kev
.info
) == DLIL_MODARGLEN
);
6608 bzero(&kev
, sizeof (&kev
));
6611 kev
.timestamp
= tv
.tv_sec
;
6612 bcopy(modid
, &kev
.modid
, DLIL_MODIDLEN
);
6614 bcopy(info
, &kev
.info
, DLIL_MODARGLEN
);
6616 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_ISSUES
,
6617 &kev
.link_data
, sizeof (kev
));
6621 ifnet_getset_opportunistic(ifnet_t ifp
, u_long cmd
, struct ifreq
*ifr
,
6624 u_int32_t level
= IFNET_THROTTLE_OFF
;
6627 VERIFY(cmd
== SIOCSIFOPPORTUNISTIC
|| cmd
== SIOCGIFOPPORTUNISTIC
);
6629 if (cmd
== SIOCSIFOPPORTUNISTIC
) {
6631 * XXX: Use priv_check_cred() instead of root check?
6633 if ((result
= proc_suser(p
)) != 0)
6636 if (ifr
->ifr_opportunistic
.ifo_flags
==
6637 IFRIFOF_BLOCK_OPPORTUNISTIC
)
6638 level
= IFNET_THROTTLE_OPPORTUNISTIC
;
6639 else if (ifr
->ifr_opportunistic
.ifo_flags
== 0)
6640 level
= IFNET_THROTTLE_OFF
;
6645 result
= ifnet_set_throttle(ifp
, level
);
6646 } else if ((result
= ifnet_get_throttle(ifp
, &level
)) == 0) {
6647 ifr
->ifr_opportunistic
.ifo_flags
= 0;
6648 if (level
== IFNET_THROTTLE_OPPORTUNISTIC
) {
6649 ifr
->ifr_opportunistic
.ifo_flags
|=
6650 IFRIFOF_BLOCK_OPPORTUNISTIC
;
6655 * Return the count of current opportunistic connections
6656 * over the interface.
6660 flags
|= (cmd
== SIOCSIFOPPORTUNISTIC
) ?
6661 INPCB_OPPORTUNISTIC_SETCMD
: 0;
6662 flags
|= (level
== IFNET_THROTTLE_OPPORTUNISTIC
) ?
6663 INPCB_OPPORTUNISTIC_THROTTLEON
: 0;
6664 ifr
->ifr_opportunistic
.ifo_inuse
=
6665 udp_count_opportunistic(ifp
->if_index
, flags
) +
6666 tcp_count_opportunistic(ifp
->if_index
, flags
);
6669 if (result
== EALREADY
)
6676 ifnet_get_throttle(struct ifnet
*ifp
, u_int32_t
*level
)
6678 struct ifclassq
*ifq
;
6681 if (!(ifp
->if_eflags
& IFEF_TXSTART
))
6684 *level
= IFNET_THROTTLE_OFF
;
6688 /* Throttling works only for IFCQ, not ALTQ instances */
6689 if (IFCQ_IS_ENABLED(ifq
))
6690 IFCQ_GET_THROTTLE(ifq
, *level
, err
);
6697 ifnet_set_throttle(struct ifnet
*ifp
, u_int32_t level
)
6699 struct ifclassq
*ifq
;
6702 if (!(ifp
->if_eflags
& IFEF_TXSTART
))
6708 case IFNET_THROTTLE_OFF
:
6709 case IFNET_THROTTLE_OPPORTUNISTIC
:
6711 /* Throttling works only for IFCQ, not ALTQ instances */
6712 if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq
)))
6714 #endif /* PF_ALTQ */
6721 if (IFCQ_IS_ENABLED(ifq
))
6722 IFCQ_SET_THROTTLE(ifq
, level
, err
);
6726 printf("%s: throttling level set to %d\n", if_name(ifp
),
6728 if (level
== IFNET_THROTTLE_OFF
)
6736 ifnet_getset_log(ifnet_t ifp
, u_long cmd
, struct ifreq
*ifr
,
6742 int level
, category
, subcategory
;
6744 VERIFY(cmd
== SIOCSIFLOG
|| cmd
== SIOCGIFLOG
);
6746 if (cmd
== SIOCSIFLOG
) {
6747 if ((result
= priv_check_cred(kauth_cred_get(),
6748 PRIV_NET_INTERFACE_CONTROL
, 0)) != 0)
6751 level
= ifr
->ifr_log
.ifl_level
;
6752 if (level
< IFNET_LOG_MIN
|| level
> IFNET_LOG_MAX
)
6755 flags
= ifr
->ifr_log
.ifl_flags
;
6756 if ((flags
&= IFNET_LOGF_MASK
) == 0)
6759 category
= ifr
->ifr_log
.ifl_category
;
6760 subcategory
= ifr
->ifr_log
.ifl_subcategory
;
6763 result
= ifnet_set_log(ifp
, level
, flags
,
6764 category
, subcategory
);
6766 result
= ifnet_get_log(ifp
, &level
, &flags
, &category
,
6769 ifr
->ifr_log
.ifl_level
= level
;
6770 ifr
->ifr_log
.ifl_flags
= flags
;
6771 ifr
->ifr_log
.ifl_category
= category
;
6772 ifr
->ifr_log
.ifl_subcategory
= subcategory
;
6780 ifnet_set_log(struct ifnet
*ifp
, int32_t level
, uint32_t flags
,
6781 int32_t category
, int32_t subcategory
)
6785 VERIFY(level
>= IFNET_LOG_MIN
&& level
<= IFNET_LOG_MAX
);
6786 VERIFY(flags
& IFNET_LOGF_MASK
);
6789 * The logging level applies to all facilities; make sure to
6790 * update them all with the most current level.
6792 flags
|= ifp
->if_log
.flags
;
6794 if (ifp
->if_output_ctl
!= NULL
) {
6795 struct ifnet_log_params l
;
6797 bzero(&l
, sizeof (l
));
6800 l
.flags
&= ~IFNET_LOGF_DLIL
;
6801 l
.category
= category
;
6802 l
.subcategory
= subcategory
;
6804 /* Send this request to lower layers */
6806 err
= ifp
->if_output_ctl(ifp
, IFNET_CTL_SET_LOG
,
6809 } else if ((flags
& ~IFNET_LOGF_DLIL
) && ifp
->if_output_ctl
== NULL
) {
6811 * If targeted to the lower layers without an output
6812 * control callback registered on the interface, just
6813 * silently ignore facilities other than ours.
6815 flags
&= IFNET_LOGF_DLIL
;
6816 if (flags
== 0 && (!ifp
->if_log
.flags
& IFNET_LOGF_DLIL
))
6821 if ((ifp
->if_log
.level
= level
) == IFNET_LOG_DEFAULT
)
6822 ifp
->if_log
.flags
= 0;
6824 ifp
->if_log
.flags
|= flags
;
6826 log(LOG_INFO
, "%s: logging level set to %d flags=%b "
6827 "arg=%b, category=%d subcategory=%d\n", if_name(ifp
),
6828 ifp
->if_log
.level
, ifp
->if_log
.flags
,
6829 IFNET_LOGF_BITS
, flags
, IFNET_LOGF_BITS
,
6830 category
, subcategory
);
6837 ifnet_get_log(struct ifnet
*ifp
, int32_t *level
, uint32_t *flags
,
6838 int32_t *category
, int32_t *subcategory
)
6841 *level
= ifp
->if_log
.level
;
6843 *flags
= ifp
->if_log
.flags
;
6844 if (category
!= NULL
)
6845 *category
= ifp
->if_log
.category
;
6846 if (subcategory
!= NULL
)
6847 *subcategory
= ifp
->if_log
.subcategory
;
6853 ifnet_notify_address(struct ifnet
*ifp
, int af
)
6855 struct ifnet_notify_address_params na
;
6858 (void) pf_ifaddr_hook(ifp
);
6861 if (ifp
->if_output_ctl
== NULL
)
6862 return (EOPNOTSUPP
);
6864 bzero(&na
, sizeof (na
));
6865 na
.address_family
= af
;
6867 return (ifp
->if_output_ctl(ifp
, IFNET_CTL_NOTIFY_ADDRESS
,
6872 ifnet_flowid(struct ifnet
*ifp
, uint32_t *flowid
)
6874 if (ifp
== NULL
|| flowid
== NULL
) {
6876 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
6877 !(ifp
->if_refflags
& IFRF_ATTACHED
)) {
6881 *flowid
= ifp
->if_flowhash
;
6887 ifnet_disable_output(struct ifnet
*ifp
)
6893 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
6894 !(ifp
->if_refflags
& IFRF_ATTACHED
)) {
6898 if ((err
= ifnet_fc_add(ifp
)) == 0) {
6899 lck_mtx_lock_spin(&ifp
->if_start_lock
);
6900 ifp
->if_start_flags
|= IFSF_FLOW_CONTROLLED
;
6901 lck_mtx_unlock(&ifp
->if_start_lock
);
6907 ifnet_enable_output(struct ifnet
*ifp
)
6911 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
6912 !(ifp
->if_refflags
& IFRF_ATTACHED
)) {
6916 ifnet_start_common(ifp
, 1);
6921 ifnet_flowadv(uint32_t flowhash
)
6923 struct ifnet_fc_entry
*ifce
;
6926 ifce
= ifnet_fc_get(flowhash
);
6930 VERIFY(ifce
->ifce_ifp
!= NULL
);
6931 ifp
= ifce
->ifce_ifp
;
6933 /* flow hash gets recalculated per attach, so check */
6934 if (ifnet_is_attached(ifp
, 1)) {
6935 if (ifp
->if_flowhash
== flowhash
)
6936 (void) ifnet_enable_output(ifp
);
6937 ifnet_decr_iorefcnt(ifp
);
6939 ifnet_fc_entry_free(ifce
);
6943 * Function to compare ifnet_fc_entries in ifnet flow control tree
6946 ifce_cmp(const struct ifnet_fc_entry
*fc1
, const struct ifnet_fc_entry
*fc2
)
6948 return (fc1
->ifce_flowhash
- fc2
->ifce_flowhash
);
6952 ifnet_fc_add(struct ifnet
*ifp
)
6954 struct ifnet_fc_entry keyfc
, *ifce
;
6957 VERIFY(ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_TXSTART
));
6958 VERIFY(ifp
->if_flowhash
!= 0);
6959 flowhash
= ifp
->if_flowhash
;
6961 bzero(&keyfc
, sizeof (keyfc
));
6962 keyfc
.ifce_flowhash
= flowhash
;
6964 lck_mtx_lock_spin(&ifnet_fc_lock
);
6965 ifce
= RB_FIND(ifnet_fc_tree
, &ifnet_fc_tree
, &keyfc
);
6966 if (ifce
!= NULL
&& ifce
->ifce_ifp
== ifp
) {
6967 /* Entry is already in ifnet_fc_tree, return */
6968 lck_mtx_unlock(&ifnet_fc_lock
);
6974 * There is a different fc entry with the same flow hash
6975 * but different ifp pointer. There can be a collision
6976 * on flow hash but the probability is low. Let's just
6977 * avoid adding a second one when there is a collision.
6979 lck_mtx_unlock(&ifnet_fc_lock
);
6983 /* become regular mutex */
6984 lck_mtx_convert_spin(&ifnet_fc_lock
);
6986 ifce
= zalloc_noblock(ifnet_fc_zone
);
6988 /* memory allocation failed */
6989 lck_mtx_unlock(&ifnet_fc_lock
);
6992 bzero(ifce
, ifnet_fc_zone_size
);
6994 ifce
->ifce_flowhash
= flowhash
;
6995 ifce
->ifce_ifp
= ifp
;
6997 RB_INSERT(ifnet_fc_tree
, &ifnet_fc_tree
, ifce
);
6998 lck_mtx_unlock(&ifnet_fc_lock
);
7002 static struct ifnet_fc_entry
*
7003 ifnet_fc_get(uint32_t flowhash
)
7005 struct ifnet_fc_entry keyfc
, *ifce
;
7008 bzero(&keyfc
, sizeof (keyfc
));
7009 keyfc
.ifce_flowhash
= flowhash
;
7011 lck_mtx_lock_spin(&ifnet_fc_lock
);
7012 ifce
= RB_FIND(ifnet_fc_tree
, &ifnet_fc_tree
, &keyfc
);
7014 /* Entry is not present in ifnet_fc_tree, return */
7015 lck_mtx_unlock(&ifnet_fc_lock
);
7019 RB_REMOVE(ifnet_fc_tree
, &ifnet_fc_tree
, ifce
);
7021 VERIFY(ifce
->ifce_ifp
!= NULL
);
7022 ifp
= ifce
->ifce_ifp
;
7024 /* become regular mutex */
7025 lck_mtx_convert_spin(&ifnet_fc_lock
);
7027 if (!ifnet_is_attached(ifp
, 0)) {
7029 * This ifp is not attached or in the process of being
7030 * detached; just don't process it.
7032 ifnet_fc_entry_free(ifce
);
7035 lck_mtx_unlock(&ifnet_fc_lock
);
7041 ifnet_fc_entry_free(struct ifnet_fc_entry
*ifce
)
7043 zfree(ifnet_fc_zone
, ifce
);
7047 ifnet_calc_flowhash(struct ifnet
*ifp
)
7049 struct ifnet_flowhash_key fh
__attribute__((aligned(8)));
7050 uint32_t flowhash
= 0;
7052 if (ifnet_flowhash_seed
== 0)
7053 ifnet_flowhash_seed
= RandomULong();
7055 bzero(&fh
, sizeof (fh
));
7057 (void) snprintf(fh
.ifk_name
, sizeof (fh
.ifk_name
), "%s", ifp
->if_name
);
7058 fh
.ifk_unit
= ifp
->if_unit
;
7059 fh
.ifk_flags
= ifp
->if_flags
;
7060 fh
.ifk_eflags
= ifp
->if_eflags
;
7061 fh
.ifk_capabilities
= ifp
->if_capabilities
;
7062 fh
.ifk_capenable
= ifp
->if_capenable
;
7063 fh
.ifk_output_sched_model
= ifp
->if_output_sched_model
;
7064 fh
.ifk_rand1
= RandomULong();
7065 fh
.ifk_rand2
= RandomULong();
7068 flowhash
= net_flowhash(&fh
, sizeof (fh
), ifnet_flowhash_seed
);
7069 if (flowhash
== 0) {
7070 /* try to get a non-zero flowhash */
7071 ifnet_flowhash_seed
= RandomULong();
7079 dlil_output_cksum_dbg(struct ifnet
*ifp
, struct mbuf
*m
, uint32_t hoff
,
7080 protocol_family_t pf
)
7085 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_FINALIZE_FORCED
) ||
7086 (m
->m_pkthdr
.csum_flags
& (CSUM_TSO_IPV4
|CSUM_TSO_IPV6
)))
7091 did_sw
= in_finalize_cksum(m
, hoff
, m
->m_pkthdr
.csum_flags
);
7092 if (did_sw
& CSUM_DELAY_IP
)
7093 hwcksum_dbg_finalized_hdr
++;
7094 if (did_sw
& CSUM_DELAY_DATA
)
7095 hwcksum_dbg_finalized_data
++;
7100 * Checksum offload should not have been enabled when
7101 * extension headers exist; that also means that we
7102 * cannot force-finalize packets with extension headers.
7103 * Indicate to the callee should it skip such case by
7104 * setting optlen to -1.
7106 did_sw
= in6_finalize_cksum(m
, hoff
, -1, -1,
7107 m
->m_pkthdr
.csum_flags
);
7108 if (did_sw
& CSUM_DELAY_IPV6_DATA
)
7109 hwcksum_dbg_finalized_data
++;
7118 dlil_input_cksum_dbg(struct ifnet
*ifp
, struct mbuf
*m
, char *frame_header
,
7119 protocol_family_t pf
)
7124 if (frame_header
== NULL
||
7125 frame_header
< (char *)mbuf_datastart(m
) ||
7126 frame_header
> (char *)m
->m_data
) {
7127 printf("%s: frame header pointer 0x%llx out of range "
7128 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp
),
7129 (uint64_t)VM_KERNEL_ADDRPERM(frame_header
),
7130 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m
)),
7131 (uint64_t)VM_KERNEL_ADDRPERM(m
->m_data
),
7132 (uint64_t)VM_KERNEL_ADDRPERM(m
));
7135 hlen
= (m
->m_data
- frame_header
);
7148 * Force partial checksum offload; useful to simulate cases
7149 * where the hardware does not support partial checksum offload,
7150 * in order to validate correctness throughout the layers above.
7152 if (hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
) {
7153 uint32_t foff
= hwcksum_dbg_partial_rxoff_forced
;
7155 if (foff
> (uint32_t)m
->m_pkthdr
.len
)
7158 m
->m_pkthdr
.csum_flags
&= ~CSUM_RX_FLAGS
;
7160 /* Compute 16-bit 1's complement sum from forced offset */
7161 sum
= m_sum16(m
, foff
, (m
->m_pkthdr
.len
- foff
));
7163 m
->m_pkthdr
.csum_flags
|= (CSUM_DATA_VALID
| CSUM_PARTIAL
);
7164 m
->m_pkthdr
.csum_rx_val
= sum
;
7165 m
->m_pkthdr
.csum_rx_start
= (foff
+ hlen
);
7167 hwcksum_dbg_partial_forced
++;
7168 hwcksum_dbg_partial_forced_bytes
+= m
->m_pkthdr
.len
;
7172 * Partial checksum offload verification (and adjustment);
7173 * useful to validate and test cases where the hardware
7174 * supports partial checksum offload.
7176 if ((m
->m_pkthdr
.csum_flags
&
7177 (CSUM_DATA_VALID
| CSUM_PARTIAL
| CSUM_PSEUDO_HDR
)) ==
7178 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
7181 /* Start offset must begin after frame header */
7182 rxoff
= m
->m_pkthdr
.csum_rx_start
;
7184 hwcksum_dbg_bad_rxoff
++;
7186 printf("%s: partial cksum start offset %d "
7187 "is less than frame header length %d for "
7188 "mbuf 0x%llx\n", if_name(ifp
), rxoff
, hlen
,
7189 (uint64_t)VM_KERNEL_ADDRPERM(m
));
7195 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
)) {
7197 * Compute the expected 16-bit 1's complement sum;
7198 * skip this if we've already computed it above
7199 * when partial checksum offload is forced.
7201 sum
= m_sum16(m
, rxoff
, (m
->m_pkthdr
.len
- rxoff
));
7203 /* Hardware or driver is buggy */
7204 if (sum
!= m
->m_pkthdr
.csum_rx_val
) {
7205 hwcksum_dbg_bad_cksum
++;
7207 printf("%s: bad partial cksum value "
7208 "0x%x (expected 0x%x) for mbuf "
7209 "0x%llx [rx_start %d]\n",
7211 m
->m_pkthdr
.csum_rx_val
, sum
,
7212 (uint64_t)VM_KERNEL_ADDRPERM(m
),
7213 m
->m_pkthdr
.csum_rx_start
);
7218 hwcksum_dbg_verified
++;
7221 * This code allows us to emulate various hardwares that
7222 * perform 16-bit 1's complement sum beginning at various
7223 * start offset values.
7225 if (hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_RXOFF_ADJ
) {
7226 uint32_t aoff
= hwcksum_dbg_partial_rxoff_adj
;
7228 if (aoff
== rxoff
|| aoff
> (uint32_t)m
->m_pkthdr
.len
)
7231 sum
= m_adj_sum16(m
, rxoff
, aoff
, sum
);
7233 m
->m_pkthdr
.csum_rx_val
= sum
;
7234 m
->m_pkthdr
.csum_rx_start
= (aoff
+ hlen
);
7236 hwcksum_dbg_adjusted
++;
7242 sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
7244 #pragma unused(arg1, arg2)
7248 i
= hwcksum_dbg_mode
;
7250 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7251 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7254 if (hwcksum_dbg
== 0)
7257 if ((i
& ~HWCKSUM_DBG_MASK
) != 0)
7260 hwcksum_dbg_mode
= (i
& HWCKSUM_DBG_MASK
);
7266 sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
7268 #pragma unused(arg1, arg2)
7272 i
= hwcksum_dbg_partial_rxoff_forced
;
7274 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7275 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7278 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
))
7281 hwcksum_dbg_partial_rxoff_forced
= i
;
7287 sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
7289 #pragma unused(arg1, arg2)
7293 i
= hwcksum_dbg_partial_rxoff_adj
;
7295 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7296 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7299 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_RXOFF_ADJ
))
7302 hwcksum_dbg_partial_rxoff_adj
= i
;
7308 /* Blob for sum16 verification */
7309 static uint8_t sumdata
[] = {
7310 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
7311 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
7312 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
7313 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
7314 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
7315 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
7316 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
7317 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
7318 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
7319 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
7320 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
7321 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
7322 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
7323 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
7324 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
7325 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
7326 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
7327 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
7328 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
7329 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
7330 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
7331 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
7332 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
7333 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
7334 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
7335 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
7336 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
7337 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
7338 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
7339 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
7340 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
7341 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
7342 0xc8, 0x28, 0x02, 0x00, 0x00
7345 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
7361 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
7364 dlil_verify_sum16(void)
7370 /* Make sure test data plus extra room for alignment fits in cluster */
7371 _CASSERT((sizeof (sumdata
) + (sizeof (uint64_t) * 2)) <= MCLBYTES
);
7373 m
= m_getcl(M_WAITOK
, MT_DATA
, M_PKTHDR
);
7374 MH_ALIGN(m
, sizeof (uint32_t)); /* 32-bit starting alignment */
7375 buf
= mtod(m
, uint8_t *); /* base address */
7377 for (n
= 0; n
< SUMTBL_MAX
; n
++) {
7378 uint16_t len
= sumtbl
[n
].len
;
7381 /* Verify for all possible alignments */
7382 for (i
= 0; i
< (int)sizeof (uint64_t); i
++) {
7386 /* Copy over test data to mbuf */
7387 VERIFY(len
<= sizeof (sumdata
));
7389 bcopy(sumdata
, c
, len
);
7391 /* Zero-offset test (align by data pointer) */
7392 m
->m_data
= (caddr_t
)c
;
7394 sum
= m_sum16(m
, 0, len
);
7396 /* Something is horribly broken; stop now */
7397 if (sum
!= sumtbl
[n
].sum
) {
7398 panic("%s: broken m_sum16 for len=%d align=%d "
7399 "sum=0x%04x [expected=0x%04x]\n", __func__
,
7400 len
, i
, sum
, sumtbl
[n
].sum
);
7404 /* Alignment test by offset (fixed data pointer) */
7405 m
->m_data
= (caddr_t
)buf
;
7407 sum
= m_sum16(m
, i
, len
);
7409 /* Something is horribly broken; stop now */
7410 if (sum
!= sumtbl
[n
].sum
) {
7411 panic("%s: broken m_sum16 for len=%d offset=%d "
7412 "sum=0x%04x [expected=0x%04x]\n", __func__
,
7413 len
, i
, sum
, sumtbl
[n
].sum
);
7417 /* Simple sum16 contiguous buffer test by aligment */
7418 sum
= b_sum16(c
, len
);
7420 /* Something is horribly broken; stop now */
7421 if (sum
!= sumtbl
[n
].sum
) {
7422 panic("%s: broken b_sum16 for len=%d align=%d "
7423 "sum=0x%04x [expected=0x%04x]\n", __func__
,
7424 len
, i
, sum
, sumtbl
[n
].sum
);
7432 printf("DLIL: SUM16 self-tests PASSED\n");
7436 #define CASE_STRINGIFY(x) case x: return #x
7438 __private_extern__
const char *
7439 dlil_kev_dl_code_str(u_int32_t event_code
)
7441 switch (event_code
) {
7442 CASE_STRINGIFY(KEV_DL_SIFFLAGS
);
7443 CASE_STRINGIFY(KEV_DL_SIFMETRICS
);
7444 CASE_STRINGIFY(KEV_DL_SIFMTU
);
7445 CASE_STRINGIFY(KEV_DL_SIFPHYS
);
7446 CASE_STRINGIFY(KEV_DL_SIFMEDIA
);
7447 CASE_STRINGIFY(KEV_DL_SIFGENERIC
);
7448 CASE_STRINGIFY(KEV_DL_ADDMULTI
);
7449 CASE_STRINGIFY(KEV_DL_DELMULTI
);
7450 CASE_STRINGIFY(KEV_DL_IF_ATTACHED
);
7451 CASE_STRINGIFY(KEV_DL_IF_DETACHING
);
7452 CASE_STRINGIFY(KEV_DL_IF_DETACHED
);
7453 CASE_STRINGIFY(KEV_DL_LINK_OFF
);
7454 CASE_STRINGIFY(KEV_DL_LINK_ON
);
7455 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED
);
7456 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED
);
7457 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED
);
7458 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED
);
7459 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT
);
7460 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED
);
7461 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED
);
7462 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE
);
7463 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE
);
7464 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED
);
7465 CASE_STRINGIFY(KEV_DL_ISSUES
);
7466 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED
);