2 * Copyright (c) 1999-2019 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
41 #include <sys/socket.h>
42 #include <sys/domain.h>
44 #include <sys/random.h>
45 #include <sys/socketvar.h>
46 #include <net/if_dl.h>
48 #include <net/route.h>
49 #include <net/if_var.h>
51 #include <net/if_arp.h>
52 #include <net/iptap.h>
53 #include <net/pktap.h>
54 #include <sys/kern_event.h>
55 #include <sys/kdebug.h>
56 #include <sys/mcache.h>
57 #include <sys/syslog.h>
58 #include <sys/protosw.h>
61 #include <kern/assert.h>
62 #include <kern/task.h>
63 #include <kern/thread.h>
64 #include <kern/sched_prim.h>
65 #include <kern/locks.h>
66 #include <kern/zalloc.h>
68 #include <net/kpi_protocol.h>
69 #include <net/if_types.h>
70 #include <net/if_ipsec.h>
71 #include <net/if_llreach.h>
72 #include <net/if_utun.h>
73 #include <net/kpi_interfacefilter.h>
74 #include <net/classq/classq.h>
75 #include <net/classq/classq_sfb.h>
76 #include <net/flowhash.h>
77 #include <net/ntstat.h>
78 #include <net/if_llatbl.h>
79 #include <net/net_api_stats.h>
80 #include <net/if_ports_used.h>
81 #include <net/if_vlan_var.h>
82 #include <netinet/in.h>
84 #include <netinet/in_var.h>
85 #include <netinet/igmp_var.h>
86 #include <netinet/ip_var.h>
87 #include <netinet/tcp.h>
88 #include <netinet/tcp_var.h>
89 #include <netinet/udp.h>
90 #include <netinet/udp_var.h>
91 #include <netinet/if_ether.h>
92 #include <netinet/in_pcb.h>
93 #include <netinet/in_tclass.h>
94 #include <netinet/ip.h>
95 #include <netinet/ip_icmp.h>
96 #include <netinet/icmp_var.h>
100 #include <net/nat464_utils.h>
101 #include <netinet6/in6_var.h>
102 #include <netinet6/nd6.h>
103 #include <netinet6/mld6_var.h>
104 #include <netinet6/scope6_var.h>
105 #include <netinet/ip6.h>
106 #include <netinet/icmp6.h>
108 #include <net/pf_pbuf.h>
109 #include <libkern/OSAtomic.h>
110 #include <libkern/tree.h>
112 #include <dev/random/randomdev.h>
113 #include <machine/machine_routines.h>
115 #include <mach/thread_act.h>
116 #include <mach/sdt.h>
119 #include <sys/kauth.h>
120 #include <security/mac_framework.h>
121 #include <net/ethernet.h>
122 #include <net/firewire.h>
126 #include <net/pfvar.h>
128 #include <net/pktsched/pktsched.h>
129 #include <net/pktsched/pktsched_netem.h>
132 #include <net/necp.h>
138 #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
139 #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
140 #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
141 #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
142 #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
144 #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
145 #define MAX_LINKADDR 4 /* LONGWORDS */
146 #define M_NKE M_IFADDR
149 #define DLIL_PRINTF printf
151 #define DLIL_PRINTF kprintf
154 #define IF_DATA_REQUIRE_ALIGNED_64(f) \
155 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
157 #define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
158 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
166 * List of if_proto structures in if_proto_hash[] is protected by
167 * the ifnet lock. The rest of the fields are initialized at protocol
168 * attach time and never change, thus no lock required as long as
169 * a reference to it is valid, via if_proto_ref().
172 SLIST_ENTRY(if_proto
) next_hash
;
176 protocol_family_t protocol_family
;
180 proto_media_input input
;
181 proto_media_preout pre_output
;
182 proto_media_event event
;
183 proto_media_ioctl ioctl
;
184 proto_media_detached detached
;
185 proto_media_resolve_multi resolve_multi
;
186 proto_media_send_arp send_arp
;
189 proto_media_input_v2 input
;
190 proto_media_preout pre_output
;
191 proto_media_event event
;
192 proto_media_ioctl ioctl
;
193 proto_media_detached detached
;
194 proto_media_resolve_multi resolve_multi
;
195 proto_media_send_arp send_arp
;
200 SLIST_HEAD(proto_hash_entry
, if_proto
);
202 #define DLIL_SDLDATALEN \
203 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
206 struct ifnet dl_if
; /* public ifnet */
208 * DLIL private fields, protected by dl_if_lock
210 decl_lck_mtx_data(, dl_if_lock
);
211 TAILQ_ENTRY(dlil_ifnet
) dl_if_link
; /* dlil_ifnet link */
212 u_int32_t dl_if_flags
; /* flags (below) */
213 u_int32_t dl_if_refcnt
; /* refcnt */
214 void (*dl_if_trace
)(struct dlil_ifnet
*, int); /* ref trace callback */
215 void *dl_if_uniqueid
; /* unique interface id */
216 size_t dl_if_uniqueid_len
; /* length of the unique id */
217 char dl_if_namestorage
[IFNAMSIZ
]; /* interface name storage */
218 char dl_if_xnamestorage
[IFXNAMSIZ
]; /* external name storage */
220 struct ifaddr ifa
; /* lladdr ifa */
221 u_int8_t asdl
[DLIL_SDLMAXLEN
]; /* addr storage */
222 u_int8_t msdl
[DLIL_SDLMAXLEN
]; /* mask storage */
224 u_int8_t dl_if_descstorage
[IF_DESCSIZE
]; /* desc storage */
225 struct dlil_threading_info dl_if_inpstorage
; /* input thread storage */
226 ctrace_t dl_if_attach
; /* attach PC stacktrace */
227 ctrace_t dl_if_detach
; /* detach PC stacktrace */
230 /* Values for dl_if_flags (private to DLIL) */
231 #define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
232 #define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
233 #define DLIF_DEBUG 0x4 /* has debugging info */
235 #define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
238 __private_extern__
unsigned int if_ref_trace_hist_size
= IF_REF_TRACE_HIST_SIZE
;
240 struct dlil_ifnet_dbg
{
241 struct dlil_ifnet dldbg_dlif
; /* dlil_ifnet */
242 u_int16_t dldbg_if_refhold_cnt
; /* # ifnet references */
243 u_int16_t dldbg_if_refrele_cnt
; /* # ifnet releases */
245 * Circular lists of ifnet_{reference,release} callers.
247 ctrace_t dldbg_if_refhold
[IF_REF_TRACE_HIST_SIZE
];
248 ctrace_t dldbg_if_refrele
[IF_REF_TRACE_HIST_SIZE
];
251 #define DLIL_TO_IFP(s) (&s->dl_if)
252 #define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
254 struct ifnet_filter
{
255 TAILQ_ENTRY(ifnet_filter
) filt_next
;
257 u_int32_t filt_flags
;
259 const char *filt_name
;
261 protocol_family_t filt_protocol
;
262 iff_input_func filt_input
;
263 iff_output_func filt_output
;
264 iff_event_func filt_event
;
265 iff_ioctl_func filt_ioctl
;
266 iff_detached_func filt_detached
;
269 struct proto_input_entry
;
271 static TAILQ_HEAD(, dlil_ifnet
) dlil_ifnet_head
;
272 static lck_grp_t
*dlil_lock_group
;
273 lck_grp_t
*ifnet_lock_group
;
274 static lck_grp_t
*ifnet_head_lock_group
;
275 static lck_grp_t
*ifnet_snd_lock_group
;
276 static lck_grp_t
*ifnet_rcv_lock_group
;
277 lck_attr_t
*ifnet_lock_attr
;
278 decl_lck_rw_data(static, ifnet_head_lock
);
279 decl_lck_mtx_data(static, dlil_ifnet_lock
);
280 u_int32_t dlil_filter_disable_tso_count
= 0;
283 static unsigned int ifnet_debug
= 1; /* debugging (enabled) */
285 static unsigned int ifnet_debug
; /* debugging (disabled) */
287 static unsigned int dlif_size
; /* size of dlil_ifnet to allocate */
288 static unsigned int dlif_bufsize
; /* size of dlif_size + headroom */
289 static struct zone
*dlif_zone
; /* zone for dlil_ifnet */
291 #define DLIF_ZONE_MAX IFNETS_MAX /* maximum elements in zone */
292 #define DLIF_ZONE_NAME "ifnet" /* zone name */
294 static unsigned int dlif_filt_size
; /* size of ifnet_filter */
295 static struct zone
*dlif_filt_zone
; /* zone for ifnet_filter */
297 #define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
298 #define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
300 static unsigned int dlif_phash_size
; /* size of ifnet proto hash table */
301 static struct zone
*dlif_phash_zone
; /* zone for ifnet proto hash table */
303 #define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
304 #define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
306 static unsigned int dlif_proto_size
; /* size of if_proto */
307 static struct zone
*dlif_proto_zone
; /* zone for if_proto */
309 #define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
310 #define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
312 static unsigned int dlif_tcpstat_size
; /* size of tcpstat_local to allocate */
313 static unsigned int dlif_tcpstat_bufsize
; /* size of dlif_tcpstat_size + headroom */
314 static struct zone
*dlif_tcpstat_zone
; /* zone for tcpstat_local */
316 #define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
317 #define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
319 static unsigned int dlif_udpstat_size
; /* size of udpstat_local to allocate */
320 static unsigned int dlif_udpstat_bufsize
; /* size of dlif_udpstat_size + headroom */
321 static struct zone
*dlif_udpstat_zone
; /* zone for udpstat_local */
323 #define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
324 #define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
326 static u_int32_t net_rtref
;
328 static struct dlil_main_threading_info dlil_main_input_thread_info
;
329 __private_extern__
struct dlil_threading_info
*dlil_main_input_thread
=
330 (struct dlil_threading_info
*)&dlil_main_input_thread_info
;
332 static int dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*msg
, bool update_generation
);
333 static int dlil_detach_filter_internal(interface_filter_t filter
, int detached
);
334 static void dlil_if_trace(struct dlil_ifnet
*, int);
335 static void if_proto_ref(struct if_proto
*);
336 static void if_proto_free(struct if_proto
*);
337 static struct if_proto
*find_attached_proto(struct ifnet
*, u_int32_t
);
338 static u_int32_t
dlil_ifp_protolist(struct ifnet
*ifp
, protocol_family_t
*list
,
339 u_int32_t list_count
);
340 static void if_flt_monitor_busy(struct ifnet
*);
341 static void if_flt_monitor_unbusy(struct ifnet
*);
342 static void if_flt_monitor_enter(struct ifnet
*);
343 static void if_flt_monitor_leave(struct ifnet
*);
344 static int dlil_interface_filters_input(struct ifnet
*, struct mbuf
**,
345 char **, protocol_family_t
);
346 static int dlil_interface_filters_output(struct ifnet
*, struct mbuf
**,
348 static struct ifaddr
*dlil_alloc_lladdr(struct ifnet
*,
349 const struct sockaddr_dl
*);
350 static int ifnet_lookup(struct ifnet
*);
351 static void if_purgeaddrs(struct ifnet
*);
353 static errno_t
ifproto_media_input_v1(struct ifnet
*, protocol_family_t
,
354 struct mbuf
*, char *);
355 static errno_t
ifproto_media_input_v2(struct ifnet
*, protocol_family_t
,
357 static errno_t
ifproto_media_preout(struct ifnet
*, protocol_family_t
,
358 mbuf_t
*, const struct sockaddr
*, void *, char *, char *);
359 static void ifproto_media_event(struct ifnet
*, protocol_family_t
,
360 const struct kev_msg
*);
361 static errno_t
ifproto_media_ioctl(struct ifnet
*, protocol_family_t
,
362 unsigned long, void *);
363 static errno_t
ifproto_media_resolve_multi(ifnet_t
, const struct sockaddr
*,
364 struct sockaddr_dl
*, size_t);
365 static errno_t
ifproto_media_send_arp(struct ifnet
*, u_short
,
366 const struct sockaddr_dl
*, const struct sockaddr
*,
367 const struct sockaddr_dl
*, const struct sockaddr
*);
369 static errno_t
ifp_if_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
370 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
371 boolean_t poll
, struct thread
*tp
);
372 static void ifp_if_input_poll(struct ifnet
*, u_int32_t
, u_int32_t
,
373 struct mbuf
**, struct mbuf
**, u_int32_t
*, u_int32_t
*);
374 static errno_t
ifp_if_ctl(struct ifnet
*, ifnet_ctl_cmd_t
, u_int32_t
, void *);
375 static errno_t
ifp_if_demux(struct ifnet
*, struct mbuf
*, char *,
376 protocol_family_t
*);
377 static errno_t
ifp_if_add_proto(struct ifnet
*, protocol_family_t
,
378 const struct ifnet_demux_desc
*, u_int32_t
);
379 static errno_t
ifp_if_del_proto(struct ifnet
*, protocol_family_t
);
380 static errno_t
ifp_if_check_multi(struct ifnet
*, const struct sockaddr
*);
382 static errno_t
ifp_if_framer(struct ifnet
*, struct mbuf
**,
383 const struct sockaddr
*, const char *, const char *,
384 u_int32_t
*, u_int32_t
*);
386 static errno_t
ifp_if_framer(struct ifnet
*, struct mbuf
**,
387 const struct sockaddr
*, const char *, const char *);
388 #endif /* CONFIG_EMBEDDED */
389 static errno_t
ifp_if_framer_extended(struct ifnet
*, struct mbuf
**,
390 const struct sockaddr
*, const char *, const char *,
391 u_int32_t
*, u_int32_t
*);
392 static errno_t
ifp_if_set_bpf_tap(struct ifnet
*, bpf_tap_mode
, bpf_packet_func
);
393 static void ifp_if_free(struct ifnet
*);
394 static void ifp_if_event(struct ifnet
*, const struct kev_msg
*);
395 static __inline
void ifp_inc_traffic_class_in(struct ifnet
*, struct mbuf
*);
396 static __inline
void ifp_inc_traffic_class_out(struct ifnet
*, struct mbuf
*);
398 static void dlil_main_input_thread_func(void *, wait_result_t
);
399 static void dlil_main_input_thread_cont(void *, wait_result_t
);
401 static void dlil_input_thread_func(void *, wait_result_t
);
402 static void dlil_input_thread_cont(void *, wait_result_t
);
404 static void dlil_rxpoll_input_thread_func(void *, wait_result_t
);
405 static void dlil_rxpoll_input_thread_cont(void *, wait_result_t
);
407 static int dlil_create_input_thread(ifnet_t
, struct dlil_threading_info
*);
408 static void dlil_terminate_input_thread(struct dlil_threading_info
*);
409 static void dlil_input_stats_add(const struct ifnet_stat_increment_param
*,
410 struct dlil_threading_info
*, struct ifnet
*, boolean_t
);
411 static boolean_t
dlil_input_stats_sync(struct ifnet
*,
412 struct dlil_threading_info
*);
413 static void dlil_input_packet_list_common(struct ifnet
*, struct mbuf
*,
414 u_int32_t
, ifnet_model_t
, boolean_t
);
415 static errno_t
ifnet_input_common(struct ifnet
*, struct mbuf
*, struct mbuf
*,
416 const struct ifnet_stat_increment_param
*, boolean_t
, boolean_t
);
417 static int dlil_is_clat_needed(protocol_family_t
, mbuf_t
);
418 static errno_t
dlil_clat46(ifnet_t
, protocol_family_t
*, mbuf_t
*);
419 static errno_t
dlil_clat64(ifnet_t
, protocol_family_t
*, mbuf_t
*);
420 #if DEBUG || DEVELOPMENT
421 static void dlil_verify_sum16(void);
422 #endif /* DEBUG || DEVELOPMENT */
423 static void dlil_output_cksum_dbg(struct ifnet
*, struct mbuf
*, uint32_t,
425 static void dlil_input_cksum_dbg(struct ifnet
*, struct mbuf
*, char *,
428 static void dlil_incr_pending_thread_count(void);
429 static void dlil_decr_pending_thread_count(void);
431 static void ifnet_detacher_thread_func(void *, wait_result_t
);
432 static int ifnet_detacher_thread_cont(int);
433 static void ifnet_detach_final(struct ifnet
*);
434 static void ifnet_detaching_enqueue(struct ifnet
*);
435 static struct ifnet
*ifnet_detaching_dequeue(void);
437 static void ifnet_start_thread_func(void *, wait_result_t
);
438 static void ifnet_start_thread_cont(void *, wait_result_t
);
440 static void ifnet_poll_thread_func(void *, wait_result_t
);
441 static void ifnet_poll_thread_cont(void *, wait_result_t
);
443 static errno_t
ifnet_enqueue_common(struct ifnet
*, classq_pkt_t
*,
444 boolean_t
, boolean_t
*);
446 static void ifp_src_route_copyout(struct ifnet
*, struct route
*);
447 static void ifp_src_route_copyin(struct ifnet
*, struct route
*);
449 static void ifp_src_route6_copyout(struct ifnet
*, struct route_in6
*);
450 static void ifp_src_route6_copyin(struct ifnet
*, struct route_in6
*);
453 static int sysctl_rxpoll SYSCTL_HANDLER_ARGS
;
454 static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
;
455 static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
;
456 static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
;
457 static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
;
458 static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
;
459 static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
;
460 static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
;
461 static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
;
462 static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
;
463 static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
;
465 struct chain_len_stats tx_chain_len_stats
;
466 static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
;
468 #if TEST_INPUT_THREAD_TERMINATION
469 static int sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
;
470 #endif /* TEST_INPUT_THREAD_TERMINATION */
472 /* The following are protected by dlil_ifnet_lock */
473 static TAILQ_HEAD(, ifnet
) ifnet_detaching_head
;
474 static u_int32_t ifnet_detaching_cnt
;
475 static void *ifnet_delayed_run
; /* wait channel for detaching thread */
477 decl_lck_mtx_data(static, ifnet_fc_lock
);
479 static uint32_t ifnet_flowhash_seed
;
481 struct ifnet_flowhash_key
{
482 char ifk_name
[IFNAMSIZ
];
486 uint32_t ifk_capabilities
;
487 uint32_t ifk_capenable
;
488 uint32_t ifk_output_sched_model
;
493 /* Flow control entry per interface */
494 struct ifnet_fc_entry
{
495 RB_ENTRY(ifnet_fc_entry
) ifce_entry
;
496 u_int32_t ifce_flowhash
;
497 struct ifnet
*ifce_ifp
;
500 static uint32_t ifnet_calc_flowhash(struct ifnet
*);
501 static int ifce_cmp(const struct ifnet_fc_entry
*,
502 const struct ifnet_fc_entry
*);
503 static int ifnet_fc_add(struct ifnet
*);
504 static struct ifnet_fc_entry
*ifnet_fc_get(u_int32_t
);
505 static void ifnet_fc_entry_free(struct ifnet_fc_entry
*);
507 /* protected by ifnet_fc_lock */
508 RB_HEAD(ifnet_fc_tree
, ifnet_fc_entry
) ifnet_fc_tree
;
509 RB_PROTOTYPE(ifnet_fc_tree
, ifnet_fc_entry
, ifce_entry
, ifce_cmp
);
510 RB_GENERATE(ifnet_fc_tree
, ifnet_fc_entry
, ifce_entry
, ifce_cmp
);
512 static unsigned int ifnet_fc_zone_size
; /* sizeof ifnet_fc_entry */
513 static struct zone
*ifnet_fc_zone
; /* ifnet_fc_entry zone */
515 #define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
516 #define IFNET_FC_ZONE_MAX 32
518 extern void bpfdetach(struct ifnet
*);
519 extern void proto_input_run(void);
521 extern uint32_t udp_count_opportunistic(unsigned int ifindex
,
523 extern uint32_t tcp_count_opportunistic(unsigned int ifindex
,
526 __private_extern__
void link_rtrequest(int, struct rtentry
*, struct sockaddr
*);
529 #ifdef CONFIG_EMBEDDED
530 int dlil_lladdr_ckreq
= 1;
532 int dlil_lladdr_ckreq
= 0;
537 int dlil_verbose
= 1;
539 int dlil_verbose
= 0;
541 #if IFNET_INPUT_SANITY_CHK
542 /* sanity checking of input packet lists received */
543 static u_int32_t dlil_input_sanity_check
= 0;
544 #endif /* IFNET_INPUT_SANITY_CHK */
545 /* rate limit debug messages */
546 struct timespec dlil_dbgrate
= { .tv_sec
= 1, .tv_nsec
= 0 };
548 SYSCTL_DECL(_net_link_generic_system
);
550 SYSCTL_INT(_net_link_generic_system
, OID_AUTO
, dlil_verbose
,
551 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_verbose
, 0, "Log DLIL error messages");
553 #define IF_SNDQ_MINLEN 32
554 u_int32_t if_sndq_maxlen
= IFQ_MAXLEN
;
555 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, sndq_maxlen
,
556 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_sndq_maxlen
, IFQ_MAXLEN
,
557 sysctl_sndq_maxlen
, "I", "Default transmit queue max length");
559 #define IF_RCVQ_MINLEN 32
560 #define IF_RCVQ_MAXLEN 256
561 u_int32_t if_rcvq_maxlen
= IF_RCVQ_MAXLEN
;
562 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rcvq_maxlen
,
563 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rcvq_maxlen
, IFQ_MAXLEN
,
564 sysctl_rcvq_maxlen
, "I", "Default receive queue max length");
566 #define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
567 u_int32_t if_rxpoll_decay
= IF_RXPOLL_DECAY
;
568 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_decay
,
569 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_decay
, IF_RXPOLL_DECAY
,
570 "ilog2 of EWMA decay rate of avg inbound packets");
572 #define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
573 #define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
574 static u_int64_t if_rxpoll_mode_holdtime
= IF_RXPOLL_MODE_HOLDTIME
;
575 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_freeze_time
,
576 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_mode_holdtime
,
577 IF_RXPOLL_MODE_HOLDTIME
, sysctl_rxpoll_mode_holdtime
,
578 "Q", "input poll mode freeze time");
580 #define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
581 #define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
582 static u_int64_t if_rxpoll_sample_holdtime
= IF_RXPOLL_SAMPLETIME
;
583 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_sample_time
,
584 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_sample_holdtime
,
585 IF_RXPOLL_SAMPLETIME
, sysctl_rxpoll_sample_holdtime
,
586 "Q", "input poll sampling time");
588 static u_int64_t if_rxpoll_interval_time
= IF_RXPOLL_INTERVALTIME
;
589 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_interval_time
,
590 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_interval_time
,
591 IF_RXPOLL_INTERVALTIME
, sysctl_rxpoll_interval_time
,
592 "Q", "input poll interval (time)");
594 #define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
595 u_int32_t if_rxpoll_interval_pkts
= IF_RXPOLL_INTERVAL_PKTS
;
596 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_interval_pkts
,
597 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_interval_pkts
,
598 IF_RXPOLL_INTERVAL_PKTS
, "input poll interval (packets)");
600 #define IF_RXPOLL_WLOWAT 10
601 static u_int32_t if_sysctl_rxpoll_wlowat
= IF_RXPOLL_WLOWAT
;
602 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_wakeups_lowat
,
603 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_sysctl_rxpoll_wlowat
,
604 IF_RXPOLL_WLOWAT
, sysctl_rxpoll_wlowat
,
605 "I", "input poll wakeup low watermark");
607 #define IF_RXPOLL_WHIWAT 100
608 static u_int32_t if_sysctl_rxpoll_whiwat
= IF_RXPOLL_WHIWAT
;
609 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_wakeups_hiwat
,
610 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_sysctl_rxpoll_whiwat
,
611 IF_RXPOLL_WHIWAT
, sysctl_rxpoll_whiwat
,
612 "I", "input poll wakeup high watermark");
614 static u_int32_t if_rxpoll_max
= 0; /* 0 (automatic) */
615 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_max
,
616 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_max
, 0,
617 "max packets per poll call");
619 u_int32_t if_rxpoll
= 1;
620 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll
,
621 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll
, 0,
622 sysctl_rxpoll
, "I", "enable opportunistic input polling");
624 #if TEST_INPUT_THREAD_TERMINATION
625 static u_int32_t if_input_thread_termination_spin
= 0;
626 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, input_thread_termination_spin
,
627 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
628 &if_input_thread_termination_spin
, 0,
629 sysctl_input_thread_termination_spin
,
630 "I", "input thread termination spin limit");
631 #endif /* TEST_INPUT_THREAD_TERMINATION */
633 static u_int32_t cur_dlil_input_threads
= 0;
634 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, dlil_input_threads
,
635 CTLFLAG_RD
| CTLFLAG_LOCKED
, &cur_dlil_input_threads
, 0,
636 "Current number of DLIL input threads");
638 #if IFNET_INPUT_SANITY_CHK
639 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, dlil_input_sanity_check
,
640 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_input_sanity_check
, 0,
641 "Turn on sanity checking in DLIL input");
642 #endif /* IFNET_INPUT_SANITY_CHK */
644 static u_int32_t if_flowadv
= 1;
645 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, flow_advisory
,
646 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_flowadv
, 1,
647 "enable flow-advisory mechanism");
649 static u_int32_t if_delaybased_queue
= 1;
650 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, delaybased_queue
,
651 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_delaybased_queue
, 1,
652 "enable delay based dynamic queue sizing");
654 static uint64_t hwcksum_in_invalidated
= 0;
655 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
656 hwcksum_in_invalidated
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
657 &hwcksum_in_invalidated
, "inbound packets with invalidated hardware cksum");
659 uint32_t hwcksum_dbg
= 0;
660 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg
,
661 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg
, 0,
662 "enable hardware cksum debugging");
664 u_int32_t ifnet_start_delayed
= 0;
665 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, start_delayed
,
666 CTLFLAG_RW
| CTLFLAG_LOCKED
, &ifnet_start_delayed
, 0,
667 "number of times start was delayed");
669 u_int32_t ifnet_delay_start_disabled
= 0;
670 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, start_delay_disabled
,
671 CTLFLAG_RW
| CTLFLAG_LOCKED
, &ifnet_delay_start_disabled
, 0,
672 "number of times start was delayed");
674 #define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
675 #define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
676 #define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
677 #define HWCKSUM_DBG_MASK \
678 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
679 HWCKSUM_DBG_FINALIZE_FORCED)
681 static uint32_t hwcksum_dbg_mode
= 0;
682 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg_mode
,
683 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg_mode
,
684 0, sysctl_hwcksum_dbg_mode
, "I", "hardware cksum debugging mode");
686 static uint64_t hwcksum_dbg_partial_forced
= 0;
687 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
688 hwcksum_dbg_partial_forced
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
689 &hwcksum_dbg_partial_forced
, "packets forced using partial cksum");
691 static uint64_t hwcksum_dbg_partial_forced_bytes
= 0;
692 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
693 hwcksum_dbg_partial_forced_bytes
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
694 &hwcksum_dbg_partial_forced_bytes
, "bytes forced using partial cksum");
696 static uint32_t hwcksum_dbg_partial_rxoff_forced
= 0;
697 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
,
698 hwcksum_dbg_partial_rxoff_forced
, CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
699 &hwcksum_dbg_partial_rxoff_forced
, 0,
700 sysctl_hwcksum_dbg_partial_rxoff_forced
, "I",
701 "forced partial cksum rx offset");
703 static uint32_t hwcksum_dbg_partial_rxoff_adj
= 0;
704 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg_partial_rxoff_adj
,
705 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg_partial_rxoff_adj
,
706 0, sysctl_hwcksum_dbg_partial_rxoff_adj
, "I",
707 "adjusted partial cksum rx offset");
709 static uint64_t hwcksum_dbg_verified
= 0;
710 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
711 hwcksum_dbg_verified
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
712 &hwcksum_dbg_verified
, "packets verified for having good checksum");
714 static uint64_t hwcksum_dbg_bad_cksum
= 0;
715 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
716 hwcksum_dbg_bad_cksum
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
717 &hwcksum_dbg_bad_cksum
, "packets with bad hardware calculated checksum");
719 static uint64_t hwcksum_dbg_bad_rxoff
= 0;
720 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
721 hwcksum_dbg_bad_rxoff
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
722 &hwcksum_dbg_bad_rxoff
, "packets with invalid rxoff");
724 static uint64_t hwcksum_dbg_adjusted
= 0;
725 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
726 hwcksum_dbg_adjusted
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
727 &hwcksum_dbg_adjusted
, "packets with rxoff adjusted");
729 static uint64_t hwcksum_dbg_finalized_hdr
= 0;
730 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
731 hwcksum_dbg_finalized_hdr
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
732 &hwcksum_dbg_finalized_hdr
, "finalized headers");
734 static uint64_t hwcksum_dbg_finalized_data
= 0;
735 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
736 hwcksum_dbg_finalized_data
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
737 &hwcksum_dbg_finalized_data
, "finalized payloads");
739 uint32_t hwcksum_tx
= 1;
740 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_tx
,
741 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_tx
, 0,
742 "enable transmit hardware checksum offload");
744 uint32_t hwcksum_rx
= 1;
745 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_rx
,
746 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_rx
, 0,
747 "enable receive hardware checksum offload");
749 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, tx_chain_len_stats
,
750 CTLFLAG_RD
| CTLFLAG_LOCKED
, 0, 9,
751 sysctl_tx_chain_len_stats
, "S", "");
753 uint32_t tx_chain_len_count
= 0;
754 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, tx_chain_len_count
,
755 CTLFLAG_RW
| CTLFLAG_LOCKED
, &tx_chain_len_count
, 0, "");
757 static uint32_t threshold_notify
= 1; /* enable/disable */
758 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, threshold_notify
,
759 CTLFLAG_RW
| CTLFLAG_LOCKED
, &threshold_notify
, 0, "");
761 static uint32_t threshold_interval
= 2; /* in seconds */
762 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, threshold_interval
,
763 CTLFLAG_RW
| CTLFLAG_LOCKED
, &threshold_interval
, 0, "");
765 #if (DEVELOPMENT || DEBUG)
766 static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
;
767 SYSCTL_NODE(_net_link_generic_system
, OID_AUTO
, get_kao_frames
,
768 CTLFLAG_RD
| CTLFLAG_LOCKED
, sysctl_get_kao_frames
, "");
769 #endif /* DEVELOPMENT || DEBUG */
771 struct net_api_stats net_api_stats
;
772 SYSCTL_STRUCT(_net
, OID_AUTO
, api_stats
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
773 &net_api_stats
, net_api_stats
, "");
776 unsigned int net_rxpoll
= 1;
777 unsigned int net_affinity
= 1;
778 static kern_return_t
dlil_affinity_set(struct thread
*, u_int32_t
);
780 extern u_int32_t inject_buckets
;
782 static lck_grp_attr_t
*dlil_grp_attributes
= NULL
;
783 static lck_attr_t
*dlil_lck_attributes
= NULL
;
785 /* DLIL data threshold thread call */
786 static void dlil_dt_tcall_fn(thread_call_param_t
, thread_call_param_t
);
789 ifnet_filter_update_tso(boolean_t filter_enable
)
792 * update filter count and route_generation ID to let TCP
793 * know it should reevalute doing TSO or not
795 OSAddAtomic(filter_enable
? 1 : -1, &dlil_filter_disable_tso_count
);
800 #define DLIL_INPUT_CHECK(m, ifp) { \
801 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
802 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
803 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
804 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
809 #define DLIL_EWMA(old, new, decay) do { \
811 if ((_avg = (old)) > 0) \
812 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
818 #define MBPS (1ULL * 1000 * 1000)
819 #define GBPS (MBPS * 1000)
821 struct rxpoll_time_tbl
{
822 u_int64_t speed
; /* downlink speed */
823 u_int32_t plowat
; /* packets low watermark */
824 u_int32_t phiwat
; /* packets high watermark */
825 u_int32_t blowat
; /* bytes low watermark */
826 u_int32_t bhiwat
; /* bytes high watermark */
829 static struct rxpoll_time_tbl rxpoll_tbl
[] = {
830 { .speed
= 10 * MBPS
, .plowat
= 2, .phiwat
= 8, .blowat
= (1 * 1024), .bhiwat
= (6 * 1024) },
831 { .speed
= 100 * MBPS
, .plowat
= 10, .phiwat
= 40, .blowat
= (4 * 1024), .bhiwat
= (64 * 1024) },
832 { .speed
= 1 * GBPS
, .plowat
= 10, .phiwat
= 40, .blowat
= (4 * 1024), .bhiwat
= (64 * 1024) },
833 { .speed
= 10 * GBPS
, .plowat
= 10, .phiwat
= 40, .blowat
= (4 * 1024), .bhiwat
= (64 * 1024) },
834 { .speed
= 100 * GBPS
, .plowat
= 10, .phiwat
= 40, .blowat
= (4 * 1024), .bhiwat
= (64 * 1024) },
835 { .speed
= 0, .plowat
= 0, .phiwat
= 0, .blowat
= 0, .bhiwat
= 0 }
838 decl_lck_mtx_data(static, dlil_thread_sync_lock
);
839 static uint32_t dlil_pending_thread_cnt
= 0;
841 dlil_incr_pending_thread_count(void)
843 LCK_MTX_ASSERT(&dlil_thread_sync_lock
, LCK_MTX_ASSERT_NOTOWNED
);
844 lck_mtx_lock(&dlil_thread_sync_lock
);
845 dlil_pending_thread_cnt
++;
846 lck_mtx_unlock(&dlil_thread_sync_lock
);
850 dlil_decr_pending_thread_count(void)
852 LCK_MTX_ASSERT(&dlil_thread_sync_lock
, LCK_MTX_ASSERT_NOTOWNED
);
853 lck_mtx_lock(&dlil_thread_sync_lock
);
854 VERIFY(dlil_pending_thread_cnt
> 0);
855 dlil_pending_thread_cnt
--;
856 if (dlil_pending_thread_cnt
== 0) {
857 wakeup(&dlil_pending_thread_cnt
);
859 lck_mtx_unlock(&dlil_thread_sync_lock
);
863 proto_hash_value(u_int32_t protocol_family
)
866 * dlil_proto_unplumb_all() depends on the mapping between
867 * the hash bucket index and the protocol family defined
868 * here; future changes must be applied there as well.
870 switch (protocol_family
) {
886 * Caller must already be holding ifnet lock.
888 static struct if_proto
*
889 find_attached_proto(struct ifnet
*ifp
, u_int32_t protocol_family
)
891 struct if_proto
*proto
= NULL
;
892 u_int32_t i
= proto_hash_value(protocol_family
);
894 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_OWNED
);
896 if (ifp
->if_proto_hash
!= NULL
) {
897 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
900 while (proto
!= NULL
&& proto
->protocol_family
!= protocol_family
) {
901 proto
= SLIST_NEXT(proto
, next_hash
);
912 if_proto_ref(struct if_proto
*proto
)
914 atomic_add_32(&proto
->refcount
, 1);
917 extern void if_rtproto_del(struct ifnet
*ifp
, int protocol
);
920 if_proto_free(struct if_proto
*proto
)
923 struct ifnet
*ifp
= proto
->ifp
;
924 u_int32_t proto_family
= proto
->protocol_family
;
925 struct kev_dl_proto_data ev_pr_data
;
927 oldval
= atomic_add_32_ov(&proto
->refcount
, -1);
932 /* No more reference on this, protocol must have been detached */
933 VERIFY(proto
->detached
);
935 if (proto
->proto_kpi
== kProtoKPI_v1
) {
936 if (proto
->kpi
.v1
.detached
) {
937 proto
->kpi
.v1
.detached(ifp
, proto
->protocol_family
);
940 if (proto
->proto_kpi
== kProtoKPI_v2
) {
941 if (proto
->kpi
.v2
.detached
) {
942 proto
->kpi
.v2
.detached(ifp
, proto
->protocol_family
);
947 * Cleanup routes that may still be in the routing table for that
948 * interface/protocol pair.
950 if_rtproto_del(ifp
, proto_family
);
953 * The reserved field carries the number of protocol still attached
954 * (subject to change)
956 ifnet_lock_shared(ifp
);
957 ev_pr_data
.proto_family
= proto_family
;
958 ev_pr_data
.proto_remaining_count
= dlil_ifp_protolist(ifp
, NULL
, 0);
959 ifnet_lock_done(ifp
);
961 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_DETACHED
,
962 (struct net_event_data
*)&ev_pr_data
,
963 sizeof(struct kev_dl_proto_data
));
965 if (ev_pr_data
.proto_remaining_count
== 0) {
967 * The protocol count has gone to zero, mark the interface down.
968 * This used to be done by configd.KernelEventMonitor, but that
969 * is inherently prone to races (rdar://problem/30810208).
971 (void) ifnet_set_flags(ifp
, 0, IFF_UP
);
972 (void) ifnet_ioctl(ifp
, 0, SIOCSIFFLAGS
, NULL
);
973 dlil_post_sifflags_msg(ifp
);
976 zfree(dlif_proto_zone
, proto
);
979 __private_extern__
void
980 ifnet_lock_assert(struct ifnet
*ifp
, ifnet_lock_assert_t what
)
985 unsigned int type
= 0;
989 case IFNET_LCK_ASSERT_EXCLUSIVE
:
990 type
= LCK_RW_ASSERT_EXCLUSIVE
;
993 case IFNET_LCK_ASSERT_SHARED
:
994 type
= LCK_RW_ASSERT_SHARED
;
997 case IFNET_LCK_ASSERT_OWNED
:
998 type
= LCK_RW_ASSERT_HELD
;
1001 case IFNET_LCK_ASSERT_NOTOWNED
:
1002 /* nothing to do here for RW lock; bypass assert */
1007 panic("bad ifnet assert type: %d", what
);
1011 LCK_RW_ASSERT(&ifp
->if_lock
, type
);
1015 __private_extern__
void
1016 ifnet_lock_shared(struct ifnet
*ifp
)
1018 lck_rw_lock_shared(&ifp
->if_lock
);
1021 __private_extern__
void
1022 ifnet_lock_exclusive(struct ifnet
*ifp
)
1024 lck_rw_lock_exclusive(&ifp
->if_lock
);
1027 __private_extern__
void
1028 ifnet_lock_done(struct ifnet
*ifp
)
1030 lck_rw_done(&ifp
->if_lock
);
1034 __private_extern__
void
1035 if_inetdata_lock_shared(struct ifnet
*ifp
)
1037 lck_rw_lock_shared(&ifp
->if_inetdata_lock
);
1040 __private_extern__
void
1041 if_inetdata_lock_exclusive(struct ifnet
*ifp
)
1043 lck_rw_lock_exclusive(&ifp
->if_inetdata_lock
);
1046 __private_extern__
void
1047 if_inetdata_lock_done(struct ifnet
*ifp
)
1049 lck_rw_done(&ifp
->if_inetdata_lock
);
1054 __private_extern__
void
1055 if_inet6data_lock_shared(struct ifnet
*ifp
)
1057 lck_rw_lock_shared(&ifp
->if_inet6data_lock
);
1060 __private_extern__
void
1061 if_inet6data_lock_exclusive(struct ifnet
*ifp
)
1063 lck_rw_lock_exclusive(&ifp
->if_inet6data_lock
);
1066 __private_extern__
void
1067 if_inet6data_lock_done(struct ifnet
*ifp
)
1069 lck_rw_done(&ifp
->if_inet6data_lock
);
1073 __private_extern__
void
1074 ifnet_head_lock_shared(void)
1076 lck_rw_lock_shared(&ifnet_head_lock
);
1079 __private_extern__
void
1080 ifnet_head_lock_exclusive(void)
1082 lck_rw_lock_exclusive(&ifnet_head_lock
);
1085 __private_extern__
void
1086 ifnet_head_done(void)
1088 lck_rw_done(&ifnet_head_lock
);
1091 __private_extern__
void
1092 ifnet_head_assert_exclusive(void)
1094 LCK_RW_ASSERT(&ifnet_head_lock
, LCK_RW_ASSERT_EXCLUSIVE
);
1098 * dlil_ifp_protolist
1099 * - get the list of protocols attached to the interface, or just the number
1100 * of attached protocols
1101 * - if the number returned is greater than 'list_count', truncation occurred
1104 * - caller must already be holding ifnet lock.
1107 dlil_ifp_protolist(struct ifnet
*ifp
, protocol_family_t
*list
,
1108 u_int32_t list_count
)
1110 u_int32_t count
= 0;
1113 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_OWNED
);
1115 if (ifp
->if_proto_hash
== NULL
) {
1119 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
1120 struct if_proto
*proto
;
1121 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
], next_hash
) {
1122 if (list
!= NULL
&& count
< list_count
) {
1123 list
[count
] = proto
->protocol_family
;
1132 __private_extern__ u_int32_t
1133 if_get_protolist(struct ifnet
* ifp
, u_int32_t
*protolist
, u_int32_t count
)
1135 ifnet_lock_shared(ifp
);
1136 count
= dlil_ifp_protolist(ifp
, protolist
, count
);
1137 ifnet_lock_done(ifp
);
1141 __private_extern__
void
1142 if_free_protolist(u_int32_t
*list
)
1144 _FREE(list
, M_TEMP
);
1147 __private_extern__
int
1148 dlil_post_msg(struct ifnet
*ifp
, u_int32_t event_subclass
,
1149 u_int32_t event_code
, struct net_event_data
*event_data
,
1150 u_int32_t event_data_len
)
1152 struct net_event_data ev_data
;
1153 struct kev_msg ev_msg
;
1155 bzero(&ev_msg
, sizeof(ev_msg
));
1156 bzero(&ev_data
, sizeof(ev_data
));
1158 * a net event always starts with a net_event_data structure
1159 * but the caller can generate a simple net event or
1160 * provide a longer event structure to post
1162 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
1163 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
1164 ev_msg
.kev_subclass
= event_subclass
;
1165 ev_msg
.event_code
= event_code
;
1167 if (event_data
== NULL
) {
1168 event_data
= &ev_data
;
1169 event_data_len
= sizeof(struct net_event_data
);
1172 strlcpy(&event_data
->if_name
[0], ifp
->if_name
, IFNAMSIZ
);
1173 event_data
->if_family
= ifp
->if_family
;
1174 event_data
->if_unit
= (u_int32_t
)ifp
->if_unit
;
1176 ev_msg
.dv
[0].data_length
= event_data_len
;
1177 ev_msg
.dv
[0].data_ptr
= event_data
;
1178 ev_msg
.dv
[1].data_length
= 0;
1180 bool update_generation
= true;
1181 if (event_subclass
== KEV_DL_SUBCLASS
) {
1182 /* Don't update interface generation for frequent link quality and state changes */
1183 switch (event_code
) {
1184 case KEV_DL_LINK_QUALITY_METRIC_CHANGED
:
1185 case KEV_DL_RRC_STATE_CHANGED
:
1186 case KEV_DL_NODE_PRESENCE
:
1187 case KEV_DL_NODE_ABSENCE
:
1188 case KEV_DL_MASTER_ELECTED
:
1189 update_generation
= false;
1196 return dlil_event_internal(ifp
, &ev_msg
, update_generation
);
1199 __private_extern__
int
1200 dlil_alloc_local_stats(struct ifnet
*ifp
)
1203 void *buf
, *base
, **pbuf
;
1209 if (ifp
->if_tcp_stat
== NULL
&& ifp
->if_udp_stat
== NULL
) {
1210 /* allocate tcpstat_local structure */
1211 buf
= zalloc(dlif_tcpstat_zone
);
1216 bzero(buf
, dlif_tcpstat_bufsize
);
1218 /* Get the 64-bit aligned base address for this object */
1219 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof(u_int64_t
),
1221 VERIFY(((intptr_t)base
+ dlif_tcpstat_size
) <=
1222 ((intptr_t)buf
+ dlif_tcpstat_bufsize
));
1225 * Wind back a pointer size from the aligned base and
1226 * save the original address so we can free it later.
1228 pbuf
= (void **)((intptr_t)base
- sizeof(void *));
1230 ifp
->if_tcp_stat
= base
;
1232 /* allocate udpstat_local structure */
1233 buf
= zalloc(dlif_udpstat_zone
);
1238 bzero(buf
, dlif_udpstat_bufsize
);
1240 /* Get the 64-bit aligned base address for this object */
1241 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof(u_int64_t
),
1243 VERIFY(((intptr_t)base
+ dlif_udpstat_size
) <=
1244 ((intptr_t)buf
+ dlif_udpstat_bufsize
));
1247 * Wind back a pointer size from the aligned base and
1248 * save the original address so we can free it later.
1250 pbuf
= (void **)((intptr_t)base
- sizeof(void *));
1252 ifp
->if_udp_stat
= base
;
1254 VERIFY(IS_P2ALIGNED(ifp
->if_tcp_stat
, sizeof(u_int64_t
)) &&
1255 IS_P2ALIGNED(ifp
->if_udp_stat
, sizeof(u_int64_t
)));
1260 if (ifp
->if_ipv4_stat
== NULL
) {
1261 MALLOC(ifp
->if_ipv4_stat
, struct if_tcp_ecn_stat
*,
1262 sizeof(struct if_tcp_ecn_stat
), M_TEMP
, M_WAITOK
| M_ZERO
);
1263 if (ifp
->if_ipv4_stat
== NULL
) {
1269 if (ifp
->if_ipv6_stat
== NULL
) {
1270 MALLOC(ifp
->if_ipv6_stat
, struct if_tcp_ecn_stat
*,
1271 sizeof(struct if_tcp_ecn_stat
), M_TEMP
, M_WAITOK
| M_ZERO
);
1272 if (ifp
->if_ipv6_stat
== NULL
) {
1278 if (ifp
!= NULL
&& ret
!= 0) {
1279 if (ifp
->if_tcp_stat
!= NULL
) {
1281 ((intptr_t)ifp
->if_tcp_stat
- sizeof(void *));
1282 zfree(dlif_tcpstat_zone
, *pbuf
);
1283 ifp
->if_tcp_stat
= NULL
;
1285 if (ifp
->if_udp_stat
!= NULL
) {
1287 ((intptr_t)ifp
->if_udp_stat
- sizeof(void *));
1288 zfree(dlif_udpstat_zone
, *pbuf
);
1289 ifp
->if_udp_stat
= NULL
;
1291 if (ifp
->if_ipv4_stat
!= NULL
) {
1292 FREE(ifp
->if_ipv4_stat
, M_TEMP
);
1293 ifp
->if_ipv4_stat
= NULL
;
1295 if (ifp
->if_ipv6_stat
!= NULL
) {
1296 FREE(ifp
->if_ipv6_stat
, M_TEMP
);
1297 ifp
->if_ipv6_stat
= NULL
;
1305 dlil_reset_rxpoll_params(ifnet_t ifp
)
1307 ASSERT(ifp
!= NULL
);
1308 ifnet_set_poll_cycle(ifp
, NULL
);
1309 ifp
->if_poll_update
= 0;
1310 ifp
->if_poll_flags
= 0;
1311 ifp
->if_poll_req
= 0;
1312 ifp
->if_poll_mode
= IFNET_MODEL_INPUT_POLL_OFF
;
1313 bzero(&ifp
->if_poll_tstats
, sizeof(ifp
->if_poll_tstats
));
1314 bzero(&ifp
->if_poll_pstats
, sizeof(ifp
->if_poll_pstats
));
1315 bzero(&ifp
->if_poll_sstats
, sizeof(ifp
->if_poll_sstats
));
1316 net_timerclear(&ifp
->if_poll_mode_holdtime
);
1317 net_timerclear(&ifp
->if_poll_mode_lasttime
);
1318 net_timerclear(&ifp
->if_poll_sample_holdtime
);
1319 net_timerclear(&ifp
->if_poll_sample_lasttime
);
1320 net_timerclear(&ifp
->if_poll_dbg_lasttime
);
1324 dlil_create_input_thread(ifnet_t ifp
, struct dlil_threading_info
*inp
)
1326 boolean_t dlil_rxpoll_input
;
1327 thread_continue_t func
;
1331 dlil_rxpoll_input
= (ifp
!= NULL
&& net_rxpoll
&&
1332 (ifp
->if_eflags
& IFEF_RXPOLL
) && (ifp
->if_xflags
& IFXF_LEGACY
));
1334 /* NULL ifp indicates the main input thread, called at dlil_init time */
1336 func
= dlil_main_input_thread_func
;
1337 VERIFY(inp
== dlil_main_input_thread
);
1338 (void) strlcat(inp
->input_name
,
1339 "main_input", DLIL_THREADNAME_LEN
);
1340 } else if (dlil_rxpoll_input
) {
1341 func
= dlil_rxpoll_input_thread_func
;
1342 VERIFY(inp
!= dlil_main_input_thread
);
1343 (void) snprintf(inp
->input_name
, DLIL_THREADNAME_LEN
,
1344 "%s_input_poll", if_name(ifp
));
1346 func
= dlil_input_thread_func
;
1347 VERIFY(inp
!= dlil_main_input_thread
);
1348 (void) snprintf(inp
->input_name
, DLIL_THREADNAME_LEN
,
1349 "%s_input", if_name(ifp
));
1351 VERIFY(inp
->input_thr
== THREAD_NULL
);
1353 inp
->lck_grp
= lck_grp_alloc_init(inp
->input_name
, dlil_grp_attributes
);
1354 lck_mtx_init(&inp
->input_lck
, inp
->lck_grp
, dlil_lck_attributes
);
1356 inp
->ifp
= ifp
; /* NULL for main input thread */
1358 * For interfaces that support opportunistic polling, set the
1359 * low and high watermarks for outstanding inbound packets/bytes.
1360 * Also define freeze times for transitioning between modes
1361 * and updating the average.
1363 if (ifp
!= NULL
&& net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) {
1364 limit
= MAX(if_rcvq_maxlen
, IF_RCVQ_MINLEN
);
1365 if (ifp
->if_xflags
& IFXF_LEGACY
) {
1366 (void) dlil_rxpoll_set_params(ifp
, NULL
, FALSE
);
1369 limit
= (u_int32_t
)-1;
1372 _qinit(&inp
->rcvq_pkts
, Q_DROPTAIL
, limit
, QP_MBUF
);
1373 if (inp
== dlil_main_input_thread
) {
1374 struct dlil_main_threading_info
*inpm
=
1375 (struct dlil_main_threading_info
*)inp
;
1376 _qinit(&inpm
->lo_rcvq_pkts
, Q_DROPTAIL
, limit
, QP_MBUF
);
1379 error
= kernel_thread_start(func
, inp
, &inp
->input_thr
);
1380 if (error
== KERN_SUCCESS
) {
1381 ml_thread_policy(inp
->input_thr
, MACHINE_GROUP
,
1382 (MACHINE_NETWORK_GROUP
| MACHINE_NETWORK_NETISR
));
1384 * We create an affinity set so that the matching workloop
1385 * thread or the starter thread (for loopback) can be
1386 * scheduled on the same processor set as the input thread.
1389 struct thread
*tp
= inp
->input_thr
;
1392 * Randomize to reduce the probability
1393 * of affinity tag namespace collision.
1395 read_frandom(&tag
, sizeof(tag
));
1396 if (dlil_affinity_set(tp
, tag
) == KERN_SUCCESS
) {
1397 thread_reference(tp
);
1399 inp
->net_affinity
= TRUE
;
1402 } else if (inp
== dlil_main_input_thread
) {
1403 panic_plain("%s: couldn't create main input thread", __func__
);
1406 panic_plain("%s: couldn't create %s input thread", __func__
,
1410 OSAddAtomic(1, &cur_dlil_input_threads
);
1415 #if TEST_INPUT_THREAD_TERMINATION
1417 sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
1419 #pragma unused(arg1, arg2)
1423 i
= if_input_thread_termination_spin
;
1425 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
1426 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
1430 if (net_rxpoll
== 0) {
1434 if_input_thread_termination_spin
= i
;
1437 #endif /* TEST_INPUT_THREAD_TERMINATION */
1440 dlil_clean_threading_info(struct dlil_threading_info
*inp
)
1442 lck_mtx_destroy(&inp
->input_lck
, inp
->lck_grp
);
1443 lck_grp_free(inp
->lck_grp
);
1445 inp
->input_waiting
= 0;
1447 bzero(inp
->input_name
, sizeof(inp
->input_name
));
1449 VERIFY(qhead(&inp
->rcvq_pkts
) == NULL
&& qempty(&inp
->rcvq_pkts
));
1450 qlimit(&inp
->rcvq_pkts
) = 0;
1451 bzero(&inp
->stats
, sizeof(inp
->stats
));
1453 VERIFY(!inp
->net_affinity
);
1454 inp
->input_thr
= THREAD_NULL
;
1455 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
1456 VERIFY(inp
->poll_thr
== THREAD_NULL
);
1457 VERIFY(inp
->tag
== 0);
1458 #if IFNET_INPUT_SANITY_CHK
1459 inp
->input_mbuf_cnt
= 0;
1460 #endif /* IFNET_INPUT_SANITY_CHK */
1464 dlil_terminate_input_thread(struct dlil_threading_info
*inp
)
1466 struct ifnet
*ifp
= inp
->ifp
;
1467 classq_pkt_t pkt
= CLASSQ_PKT_INITIALIZER(pkt
);
1469 VERIFY(current_thread() == inp
->input_thr
);
1470 VERIFY(inp
!= dlil_main_input_thread
);
1472 OSAddAtomic(-1, &cur_dlil_input_threads
);
1474 #if TEST_INPUT_THREAD_TERMINATION
1475 { /* do something useless that won't get optimized away */
1477 for (uint32_t i
= 0;
1478 i
< if_input_thread_termination_spin
;
1482 DLIL_PRINTF("the value is %d\n", v
);
1484 #endif /* TEST_INPUT_THREAD_TERMINATION */
1486 lck_mtx_lock_spin(&inp
->input_lck
);
1487 _getq_all(&inp
->rcvq_pkts
, &pkt
, NULL
, NULL
, NULL
);
1488 VERIFY((inp
->input_waiting
& DLIL_INPUT_TERMINATE
) != 0);
1489 inp
->input_waiting
|= DLIL_INPUT_TERMINATE_COMPLETE
;
1490 wakeup_one((caddr_t
)&inp
->input_waiting
);
1491 lck_mtx_unlock(&inp
->input_lck
);
1493 /* free up pending packets */
1494 if (pkt
.cp_mbuf
!= NULL
) {
1495 mbuf_freem_list(pkt
.cp_mbuf
);
1498 /* for the extra refcnt from kernel_thread_start() */
1499 thread_deallocate(current_thread());
1502 DLIL_PRINTF("%s: input thread terminated\n",
1506 /* this is the end */
1507 thread_terminate(current_thread());
1511 static kern_return_t
1512 dlil_affinity_set(struct thread
*tp
, u_int32_t tag
)
1514 thread_affinity_policy_data_t policy
;
1516 bzero(&policy
, sizeof(policy
));
1517 policy
.affinity_tag
= tag
;
1518 return thread_policy_set(tp
, THREAD_AFFINITY_POLICY
,
1519 (thread_policy_t
)&policy
, THREAD_AFFINITY_POLICY_COUNT
);
1525 thread_t thread
= THREAD_NULL
;
1528 * The following fields must be 64-bit aligned for atomic operations.
1530 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets
);
1531 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors
);
1532 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets
);
1533 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors
);
1534 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions
);
1535 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes
);
1536 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes
);
1537 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts
);
1538 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts
);
1539 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops
);
1540 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto
);
1541 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs
);
1542 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes
);
1543 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets
);
1544 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes
);
1546 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets
);
1547 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors
);
1548 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets
);
1549 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors
);
1550 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions
);
1551 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes
);
1552 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes
);
1553 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts
);
1554 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts
);
1555 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops
);
1556 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto
);
1557 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs
);
1558 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes
);
1559 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets
);
1560 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes
);
1563 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1565 _CASSERT(IF_HWASSIST_CSUM_IP
== IFNET_CSUM_IP
);
1566 _CASSERT(IF_HWASSIST_CSUM_TCP
== IFNET_CSUM_TCP
);
1567 _CASSERT(IF_HWASSIST_CSUM_UDP
== IFNET_CSUM_UDP
);
1568 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS
== IFNET_CSUM_FRAGMENT
);
1569 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT
== IFNET_IP_FRAGMENT
);
1570 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6
== IFNET_CSUM_TCPIPV6
);
1571 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6
== IFNET_CSUM_UDPIPV6
);
1572 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6
== IFNET_IPV6_FRAGMENT
);
1573 _CASSERT(IF_HWASSIST_CSUM_PARTIAL
== IFNET_CSUM_PARTIAL
);
1574 _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT
== IFNET_CSUM_ZERO_INVERT
);
1575 _CASSERT(IF_HWASSIST_VLAN_TAGGING
== IFNET_VLAN_TAGGING
);
1576 _CASSERT(IF_HWASSIST_VLAN_MTU
== IFNET_VLAN_MTU
);
1577 _CASSERT(IF_HWASSIST_TSO_V4
== IFNET_TSO_IPV4
);
1578 _CASSERT(IF_HWASSIST_TSO_V6
== IFNET_TSO_IPV6
);
1581 * ... as well as the mbuf checksum flags counterparts.
1583 _CASSERT(CSUM_IP
== IF_HWASSIST_CSUM_IP
);
1584 _CASSERT(CSUM_TCP
== IF_HWASSIST_CSUM_TCP
);
1585 _CASSERT(CSUM_UDP
== IF_HWASSIST_CSUM_UDP
);
1586 _CASSERT(CSUM_IP_FRAGS
== IF_HWASSIST_CSUM_IP_FRAGS
);
1587 _CASSERT(CSUM_FRAGMENT
== IF_HWASSIST_CSUM_FRAGMENT
);
1588 _CASSERT(CSUM_TCPIPV6
== IF_HWASSIST_CSUM_TCPIPV6
);
1589 _CASSERT(CSUM_UDPIPV6
== IF_HWASSIST_CSUM_UDPIPV6
);
1590 _CASSERT(CSUM_FRAGMENT_IPV6
== IF_HWASSIST_CSUM_FRAGMENT_IPV6
);
1591 _CASSERT(CSUM_PARTIAL
== IF_HWASSIST_CSUM_PARTIAL
);
1592 _CASSERT(CSUM_ZERO_INVERT
== IF_HWASSIST_CSUM_ZERO_INVERT
);
1593 _CASSERT(CSUM_VLAN_TAG_VALID
== IF_HWASSIST_VLAN_TAGGING
);
1596 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1598 _CASSERT(IF_LLREACH_MAXLEN
<= IF_LLREACHINFO_ADDRLEN
);
1599 _CASSERT(IFNET_LLREACHINFO_ADDRLEN
== IF_LLREACHINFO_ADDRLEN
);
1601 _CASSERT(IFRLOGF_DLIL
== IFNET_LOGF_DLIL
);
1602 _CASSERT(IFRLOGF_FAMILY
== IFNET_LOGF_FAMILY
);
1603 _CASSERT(IFRLOGF_DRIVER
== IFNET_LOGF_DRIVER
);
1604 _CASSERT(IFRLOGF_FIRMWARE
== IFNET_LOGF_FIRMWARE
);
1606 _CASSERT(IFRLOGCAT_CONNECTIVITY
== IFNET_LOGCAT_CONNECTIVITY
);
1607 _CASSERT(IFRLOGCAT_QUALITY
== IFNET_LOGCAT_QUALITY
);
1608 _CASSERT(IFRLOGCAT_PERFORMANCE
== IFNET_LOGCAT_PERFORMANCE
);
1610 _CASSERT(IFRTYPE_FAMILY_ANY
== IFNET_FAMILY_ANY
);
1611 _CASSERT(IFRTYPE_FAMILY_LOOPBACK
== IFNET_FAMILY_LOOPBACK
);
1612 _CASSERT(IFRTYPE_FAMILY_ETHERNET
== IFNET_FAMILY_ETHERNET
);
1613 _CASSERT(IFRTYPE_FAMILY_SLIP
== IFNET_FAMILY_SLIP
);
1614 _CASSERT(IFRTYPE_FAMILY_TUN
== IFNET_FAMILY_TUN
);
1615 _CASSERT(IFRTYPE_FAMILY_VLAN
== IFNET_FAMILY_VLAN
);
1616 _CASSERT(IFRTYPE_FAMILY_PPP
== IFNET_FAMILY_PPP
);
1617 _CASSERT(IFRTYPE_FAMILY_PVC
== IFNET_FAMILY_PVC
);
1618 _CASSERT(IFRTYPE_FAMILY_DISC
== IFNET_FAMILY_DISC
);
1619 _CASSERT(IFRTYPE_FAMILY_MDECAP
== IFNET_FAMILY_MDECAP
);
1620 _CASSERT(IFRTYPE_FAMILY_GIF
== IFNET_FAMILY_GIF
);
1621 _CASSERT(IFRTYPE_FAMILY_FAITH
== IFNET_FAMILY_FAITH
);
1622 _CASSERT(IFRTYPE_FAMILY_STF
== IFNET_FAMILY_STF
);
1623 _CASSERT(IFRTYPE_FAMILY_FIREWIRE
== IFNET_FAMILY_FIREWIRE
);
1624 _CASSERT(IFRTYPE_FAMILY_BOND
== IFNET_FAMILY_BOND
);
1625 _CASSERT(IFRTYPE_FAMILY_CELLULAR
== IFNET_FAMILY_CELLULAR
);
1626 _CASSERT(IFRTYPE_FAMILY_6LOWPAN
== IFNET_FAMILY_6LOWPAN
);
1627 _CASSERT(IFRTYPE_FAMILY_UTUN
== IFNET_FAMILY_UTUN
);
1628 _CASSERT(IFRTYPE_FAMILY_IPSEC
== IFNET_FAMILY_IPSEC
);
1630 _CASSERT(IFRTYPE_SUBFAMILY_ANY
== IFNET_SUBFAMILY_ANY
);
1631 _CASSERT(IFRTYPE_SUBFAMILY_USB
== IFNET_SUBFAMILY_USB
);
1632 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH
== IFNET_SUBFAMILY_BLUETOOTH
);
1633 _CASSERT(IFRTYPE_SUBFAMILY_WIFI
== IFNET_SUBFAMILY_WIFI
);
1634 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT
== IFNET_SUBFAMILY_THUNDERBOLT
);
1635 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED
== IFNET_SUBFAMILY_RESERVED
);
1636 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC
== IFNET_SUBFAMILY_INTCOPROC
);
1637 _CASSERT(IFRTYPE_SUBFAMILY_QUICKRELAY
== IFNET_SUBFAMILY_QUICKRELAY
);
1638 _CASSERT(IFRTYPE_SUBFAMILY_DEFAULT
== IFNET_SUBFAMILY_DEFAULT
);
1640 _CASSERT(DLIL_MODIDLEN
== IFNET_MODIDLEN
);
1641 _CASSERT(DLIL_MODARGLEN
== IFNET_MODARGLEN
);
1643 PE_parse_boot_argn("net_affinity", &net_affinity
,
1644 sizeof(net_affinity
));
1646 PE_parse_boot_argn("net_rxpoll", &net_rxpoll
, sizeof(net_rxpoll
));
1648 PE_parse_boot_argn("net_rtref", &net_rtref
, sizeof(net_rtref
));
1650 PE_parse_boot_argn("ifnet_debug", &ifnet_debug
, sizeof(ifnet_debug
));
1652 VERIFY(dlil_pending_thread_cnt
== 0);
1653 dlif_size
= (ifnet_debug
== 0) ? sizeof(struct dlil_ifnet
) :
1654 sizeof(struct dlil_ifnet_dbg
);
1655 /* Enforce 64-bit alignment for dlil_ifnet structure */
1656 dlif_bufsize
= dlif_size
+ sizeof(void *) + sizeof(u_int64_t
);
1657 dlif_bufsize
= P2ROUNDUP(dlif_bufsize
, sizeof(u_int64_t
));
1658 dlif_zone
= zinit(dlif_bufsize
, DLIF_ZONE_MAX
* dlif_bufsize
,
1660 if (dlif_zone
== NULL
) {
1661 panic_plain("%s: failed allocating %s", __func__
,
1665 zone_change(dlif_zone
, Z_EXPAND
, TRUE
);
1666 zone_change(dlif_zone
, Z_CALLERACCT
, FALSE
);
1668 dlif_filt_size
= sizeof(struct ifnet_filter
);
1669 dlif_filt_zone
= zinit(dlif_filt_size
,
1670 DLIF_FILT_ZONE_MAX
* dlif_filt_size
, 0, DLIF_FILT_ZONE_NAME
);
1671 if (dlif_filt_zone
== NULL
) {
1672 panic_plain("%s: failed allocating %s", __func__
,
1673 DLIF_FILT_ZONE_NAME
);
1676 zone_change(dlif_filt_zone
, Z_EXPAND
, TRUE
);
1677 zone_change(dlif_filt_zone
, Z_CALLERACCT
, FALSE
);
1679 dlif_phash_size
= sizeof(struct proto_hash_entry
) * PROTO_HASH_SLOTS
;
1680 dlif_phash_zone
= zinit(dlif_phash_size
,
1681 DLIF_PHASH_ZONE_MAX
* dlif_phash_size
, 0, DLIF_PHASH_ZONE_NAME
);
1682 if (dlif_phash_zone
== NULL
) {
1683 panic_plain("%s: failed allocating %s", __func__
,
1684 DLIF_PHASH_ZONE_NAME
);
1687 zone_change(dlif_phash_zone
, Z_EXPAND
, TRUE
);
1688 zone_change(dlif_phash_zone
, Z_CALLERACCT
, FALSE
);
1690 dlif_proto_size
= sizeof(struct if_proto
);
1691 dlif_proto_zone
= zinit(dlif_proto_size
,
1692 DLIF_PROTO_ZONE_MAX
* dlif_proto_size
, 0, DLIF_PROTO_ZONE_NAME
);
1693 if (dlif_proto_zone
== NULL
) {
1694 panic_plain("%s: failed allocating %s", __func__
,
1695 DLIF_PROTO_ZONE_NAME
);
1698 zone_change(dlif_proto_zone
, Z_EXPAND
, TRUE
);
1699 zone_change(dlif_proto_zone
, Z_CALLERACCT
, FALSE
);
1701 dlif_tcpstat_size
= sizeof(struct tcpstat_local
);
1702 /* Enforce 64-bit alignment for tcpstat_local structure */
1703 dlif_tcpstat_bufsize
=
1704 dlif_tcpstat_size
+ sizeof(void *) + sizeof(u_int64_t
);
1705 dlif_tcpstat_bufsize
=
1706 P2ROUNDUP(dlif_tcpstat_bufsize
, sizeof(u_int64_t
));
1707 dlif_tcpstat_zone
= zinit(dlif_tcpstat_bufsize
,
1708 DLIF_TCPSTAT_ZONE_MAX
* dlif_tcpstat_bufsize
, 0,
1709 DLIF_TCPSTAT_ZONE_NAME
);
1710 if (dlif_tcpstat_zone
== NULL
) {
1711 panic_plain("%s: failed allocating %s", __func__
,
1712 DLIF_TCPSTAT_ZONE_NAME
);
1715 zone_change(dlif_tcpstat_zone
, Z_EXPAND
, TRUE
);
1716 zone_change(dlif_tcpstat_zone
, Z_CALLERACCT
, FALSE
);
1718 dlif_udpstat_size
= sizeof(struct udpstat_local
);
1719 /* Enforce 64-bit alignment for udpstat_local structure */
1720 dlif_udpstat_bufsize
=
1721 dlif_udpstat_size
+ sizeof(void *) + sizeof(u_int64_t
);
1722 dlif_udpstat_bufsize
=
1723 P2ROUNDUP(dlif_udpstat_bufsize
, sizeof(u_int64_t
));
1724 dlif_udpstat_zone
= zinit(dlif_udpstat_bufsize
,
1725 DLIF_TCPSTAT_ZONE_MAX
* dlif_udpstat_bufsize
, 0,
1726 DLIF_UDPSTAT_ZONE_NAME
);
1727 if (dlif_udpstat_zone
== NULL
) {
1728 panic_plain("%s: failed allocating %s", __func__
,
1729 DLIF_UDPSTAT_ZONE_NAME
);
1732 zone_change(dlif_udpstat_zone
, Z_EXPAND
, TRUE
);
1733 zone_change(dlif_udpstat_zone
, Z_CALLERACCT
, FALSE
);
1735 ifnet_llreach_init();
1736 eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt
);
1738 TAILQ_INIT(&dlil_ifnet_head
);
1739 TAILQ_INIT(&ifnet_head
);
1740 TAILQ_INIT(&ifnet_detaching_head
);
1741 TAILQ_INIT(&ifnet_ordered_head
);
1743 /* Setup the lock groups we will use */
1744 dlil_grp_attributes
= lck_grp_attr_alloc_init();
1746 dlil_lock_group
= lck_grp_alloc_init("DLIL internal locks",
1747 dlil_grp_attributes
);
1748 ifnet_lock_group
= lck_grp_alloc_init("ifnet locks",
1749 dlil_grp_attributes
);
1750 ifnet_head_lock_group
= lck_grp_alloc_init("ifnet head lock",
1751 dlil_grp_attributes
);
1752 ifnet_rcv_lock_group
= lck_grp_alloc_init("ifnet rcv locks",
1753 dlil_grp_attributes
);
1754 ifnet_snd_lock_group
= lck_grp_alloc_init("ifnet snd locks",
1755 dlil_grp_attributes
);
1757 /* Setup the lock attributes we will use */
1758 dlil_lck_attributes
= lck_attr_alloc_init();
1760 ifnet_lock_attr
= lck_attr_alloc_init();
1762 lck_rw_init(&ifnet_head_lock
, ifnet_head_lock_group
,
1763 dlil_lck_attributes
);
1764 lck_mtx_init(&dlil_ifnet_lock
, dlil_lock_group
, dlil_lck_attributes
);
1765 lck_mtx_init(&dlil_thread_sync_lock
, dlil_lock_group
, dlil_lck_attributes
);
1767 /* Setup interface flow control related items */
1768 lck_mtx_init(&ifnet_fc_lock
, dlil_lock_group
, dlil_lck_attributes
);
1770 ifnet_fc_zone_size
= sizeof(struct ifnet_fc_entry
);
1771 ifnet_fc_zone
= zinit(ifnet_fc_zone_size
,
1772 IFNET_FC_ZONE_MAX
* ifnet_fc_zone_size
, 0, IFNET_FC_ZONE_NAME
);
1773 if (ifnet_fc_zone
== NULL
) {
1774 panic_plain("%s: failed allocating %s", __func__
,
1775 IFNET_FC_ZONE_NAME
);
1778 zone_change(ifnet_fc_zone
, Z_EXPAND
, TRUE
);
1779 zone_change(ifnet_fc_zone
, Z_CALLERACCT
, FALSE
);
1781 /* Initialize interface address subsystem */
1785 /* Initialize the packet filter */
1789 /* Initialize queue algorithms */
1792 /* Initialize packet schedulers */
1795 /* Initialize flow advisory subsystem */
1798 /* Initialize the pktap virtual interface */
1801 /* Initialize the service class to dscp map */
1804 /* Initialize the interface port list */
1805 if_ports_used_init();
1807 /* Initialize the interface low power mode event handler */
1808 if_low_power_evhdlr_init();
1810 #if DEBUG || DEVELOPMENT
1811 /* Run self-tests */
1812 dlil_verify_sum16();
1813 #endif /* DEBUG || DEVELOPMENT */
1815 /* Initialize link layer table */
1816 lltable_glbl_init();
1819 * Create and start up the main DLIL input thread and the interface
1820 * detacher threads once everything is initialized.
1822 dlil_incr_pending_thread_count();
1823 dlil_create_input_thread(NULL
, dlil_main_input_thread
);
1826 * Create ifnet detacher thread.
1827 * When an interface gets detached, part of the detach processing
1828 * is delayed. The interface is added to delayed detach list
1829 * and this thread is woken up to call ifnet_detach_final
1830 * on these interfaces.
1832 dlil_incr_pending_thread_count();
1833 if (kernel_thread_start(ifnet_detacher_thread_func
,
1834 NULL
, &thread
) != KERN_SUCCESS
) {
1835 panic_plain("%s: couldn't create detacher thread", __func__
);
1838 thread_deallocate(thread
);
1841 * Wait for the created kernel threads for dlil to get
1842 * scheduled and run at least once before we proceed
1844 lck_mtx_lock(&dlil_thread_sync_lock
);
1845 while (dlil_pending_thread_cnt
!= 0) {
1846 DLIL_PRINTF("%s: Waiting for all the create dlil kernel threads "
1847 "to get scheduled at least once.\n", __func__
);
1848 (void) msleep(&dlil_pending_thread_cnt
, &dlil_thread_sync_lock
, (PZERO
- 1),
1850 LCK_MTX_ASSERT(&dlil_thread_sync_lock
, LCK_ASSERT_OWNED
);
1852 lck_mtx_unlock(&dlil_thread_sync_lock
);
1853 DLIL_PRINTF("%s: All the created dlil kernel threads have been scheduled "
1854 "at least once. Proceeding.\n", __func__
);
1858 if_flt_monitor_busy(struct ifnet
*ifp
)
1860 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1863 VERIFY(ifp
->if_flt_busy
!= 0);
1867 if_flt_monitor_unbusy(struct ifnet
*ifp
)
1869 if_flt_monitor_leave(ifp
);
1873 if_flt_monitor_enter(struct ifnet
*ifp
)
1875 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1877 while (ifp
->if_flt_busy
) {
1878 ++ifp
->if_flt_waiters
;
1879 (void) msleep(&ifp
->if_flt_head
, &ifp
->if_flt_lock
,
1880 (PZERO
- 1), "if_flt_monitor", NULL
);
1882 if_flt_monitor_busy(ifp
);
1886 if_flt_monitor_leave(struct ifnet
*ifp
)
1888 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1890 VERIFY(ifp
->if_flt_busy
!= 0);
1893 if (ifp
->if_flt_busy
== 0 && ifp
->if_flt_waiters
> 0) {
1894 ifp
->if_flt_waiters
= 0;
1895 wakeup(&ifp
->if_flt_head
);
1899 __private_extern__
int
1900 dlil_attach_filter(struct ifnet
*ifp
, const struct iff_filter
*if_filter
,
1901 interface_filter_t
*filter_ref
, u_int32_t flags
)
1904 struct ifnet_filter
*filter
= NULL
;
1906 ifnet_head_lock_shared();
1907 /* Check that the interface is in the global list */
1908 if (!ifnet_lookup(ifp
)) {
1913 filter
= zalloc(dlif_filt_zone
);
1914 if (filter
== NULL
) {
1918 bzero(filter
, dlif_filt_size
);
1920 /* refcnt held above during lookup */
1921 filter
->filt_flags
= flags
;
1922 filter
->filt_ifp
= ifp
;
1923 filter
->filt_cookie
= if_filter
->iff_cookie
;
1924 filter
->filt_name
= if_filter
->iff_name
;
1925 filter
->filt_protocol
= if_filter
->iff_protocol
;
1927 * Do not install filter callbacks for internal coproc interface
1929 if (!IFNET_IS_INTCOPROC(ifp
)) {
1930 filter
->filt_input
= if_filter
->iff_input
;
1931 filter
->filt_output
= if_filter
->iff_output
;
1932 filter
->filt_event
= if_filter
->iff_event
;
1933 filter
->filt_ioctl
= if_filter
->iff_ioctl
;
1935 filter
->filt_detached
= if_filter
->iff_detached
;
1937 lck_mtx_lock(&ifp
->if_flt_lock
);
1938 if_flt_monitor_enter(ifp
);
1940 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1941 TAILQ_INSERT_TAIL(&ifp
->if_flt_head
, filter
, filt_next
);
1943 if_flt_monitor_leave(ifp
);
1944 lck_mtx_unlock(&ifp
->if_flt_lock
);
1946 *filter_ref
= filter
;
1949 * Bump filter count and route_generation ID to let TCP
1950 * know it shouldn't do TSO on this connection
1952 if ((filter
->filt_flags
& DLIL_IFF_TSO
) == 0) {
1953 ifnet_filter_update_tso(TRUE
);
1955 OSIncrementAtomic64(&net_api_stats
.nas_iflt_attach_count
);
1956 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_iflt_attach_total
);
1957 if ((filter
->filt_flags
& DLIL_IFF_INTERNAL
)) {
1958 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_iflt_attach_os_total
);
1961 DLIL_PRINTF("%s: %s filter attached\n", if_name(ifp
),
1962 if_filter
->iff_name
);
1966 if (retval
!= 0 && ifp
!= NULL
) {
1967 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1968 if_name(ifp
), if_filter
->iff_name
, retval
);
1970 if (retval
!= 0 && filter
!= NULL
) {
1971 zfree(dlif_filt_zone
, filter
);
1978 dlil_detach_filter_internal(interface_filter_t filter
, int detached
)
1982 if (detached
== 0) {
1985 ifnet_head_lock_shared();
1986 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
1987 interface_filter_t entry
= NULL
;
1989 lck_mtx_lock(&ifp
->if_flt_lock
);
1990 TAILQ_FOREACH(entry
, &ifp
->if_flt_head
, filt_next
) {
1991 if (entry
!= filter
|| entry
->filt_skip
) {
1995 * We've found a match; since it's possible
1996 * that the thread gets blocked in the monitor,
1997 * we do the lock dance. Interface should
1998 * not be detached since we still have a use
1999 * count held during filter attach.
2001 entry
->filt_skip
= 1; /* skip input/output */
2002 lck_mtx_unlock(&ifp
->if_flt_lock
);
2005 lck_mtx_lock(&ifp
->if_flt_lock
);
2006 if_flt_monitor_enter(ifp
);
2007 LCK_MTX_ASSERT(&ifp
->if_flt_lock
,
2008 LCK_MTX_ASSERT_OWNED
);
2010 /* Remove the filter from the list */
2011 TAILQ_REMOVE(&ifp
->if_flt_head
, filter
,
2014 if_flt_monitor_leave(ifp
);
2015 lck_mtx_unlock(&ifp
->if_flt_lock
);
2017 DLIL_PRINTF("%s: %s filter detached\n",
2018 if_name(ifp
), filter
->filt_name
);
2022 lck_mtx_unlock(&ifp
->if_flt_lock
);
2026 /* filter parameter is not a valid filter ref */
2032 DLIL_PRINTF("%s filter detached\n", filter
->filt_name
);
2037 /* Call the detached function if there is one */
2038 if (filter
->filt_detached
) {
2039 filter
->filt_detached(filter
->filt_cookie
, filter
->filt_ifp
);
2043 * Decrease filter count and route_generation ID to let TCP
2044 * know it should reevalute doing TSO or not
2046 if ((filter
->filt_flags
& DLIL_IFF_TSO
) == 0) {
2047 ifnet_filter_update_tso(FALSE
);
2050 VERIFY(OSDecrementAtomic64(&net_api_stats
.nas_iflt_attach_count
) > 0);
2052 /* Free the filter */
2053 zfree(dlif_filt_zone
, filter
);
2056 if (retval
!= 0 && filter
!= NULL
) {
2057 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
2058 filter
->filt_name
, retval
);
2064 __private_extern__
void
2065 dlil_detach_filter(interface_filter_t filter
)
2067 if (filter
== NULL
) {
2070 dlil_detach_filter_internal(filter
, 0);
2073 __attribute__((noreturn
))
2075 dlil_main_input_thread_func(void *v
, wait_result_t w
)
2078 struct dlil_threading_info
*inp
= v
;
2080 VERIFY(inp
== dlil_main_input_thread
);
2081 VERIFY(inp
->ifp
== NULL
);
2082 VERIFY(current_thread() == inp
->input_thr
);
2084 dlil_decr_pending_thread_count();
2085 lck_mtx_lock(&inp
->input_lck
);
2086 VERIFY(!(inp
->input_waiting
& DLIL_INPUT_RUNNING
));
2087 (void) assert_wait(&inp
->input_waiting
, THREAD_UNINT
);
2088 lck_mtx_unlock(&inp
->input_lck
);
2089 (void) thread_block_parameter(dlil_main_input_thread_cont
, inp
);
2091 __builtin_unreachable();
2095 * Main input thread:
2097 * a) handles all inbound packets for lo0
2098 * b) handles all inbound packets for interfaces with no dedicated
2099 * input thread (e.g. anything but Ethernet/PDP or those that support
2100 * opportunistic polling.)
2101 * c) protocol registrations
2102 * d) packet injections
2104 __attribute__((noreturn
))
2106 dlil_main_input_thread_cont(void *v
, wait_result_t wres
)
2108 struct dlil_main_threading_info
*inpm
= v
;
2109 struct dlil_threading_info
*inp
= v
;
2111 /* main input thread is uninterruptible */
2112 VERIFY(wres
!= THREAD_INTERRUPTED
);
2113 lck_mtx_lock_spin(&inp
->input_lck
);
2114 VERIFY(!(inp
->input_waiting
& (DLIL_INPUT_TERMINATE
|
2115 DLIL_INPUT_RUNNING
)));
2116 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
2119 struct mbuf
*m
= NULL
, *m_loop
= NULL
;
2120 u_int32_t m_cnt
, m_cnt_loop
;
2121 classq_pkt_t pkt
= CLASSQ_PKT_INITIALIZER(pkt
);
2122 boolean_t proto_req
;
2124 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
2126 proto_req
= (inp
->input_waiting
&
2127 (DLIL_PROTO_WAITING
| DLIL_PROTO_REGISTER
));
2129 /* Packets for non-dedicated interfaces other than lo0 */
2130 m_cnt
= qlen(&inp
->rcvq_pkts
);
2131 _getq_all(&inp
->rcvq_pkts
, &pkt
, NULL
, NULL
, NULL
);
2134 /* Packets exclusive to lo0 */
2135 m_cnt_loop
= qlen(&inpm
->lo_rcvq_pkts
);
2136 _getq_all(&inpm
->lo_rcvq_pkts
, &pkt
, NULL
, NULL
, NULL
);
2137 m_loop
= pkt
.cp_mbuf
;
2141 lck_mtx_unlock(&inp
->input_lck
);
2144 * NOTE warning %%% attention !!!!
2145 * We should think about putting some thread starvation
2146 * safeguards if we deal with long chains of packets.
2148 if (m_loop
!= NULL
) {
2149 dlil_input_packet_list_extended(lo_ifp
, m_loop
,
2150 m_cnt_loop
, IFNET_MODEL_INPUT_POLL_OFF
);
2154 dlil_input_packet_list_extended(NULL
, m
,
2155 m_cnt
, IFNET_MODEL_INPUT_POLL_OFF
);
2162 lck_mtx_lock_spin(&inp
->input_lck
);
2163 VERIFY(inp
->input_waiting
& DLIL_INPUT_RUNNING
);
2164 /* main input thread cannot be terminated */
2165 VERIFY(!(inp
->input_waiting
& DLIL_INPUT_TERMINATE
));
2166 if (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
2171 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
2172 (void) assert_wait(&inp
->input_waiting
, THREAD_UNINT
);
2173 lck_mtx_unlock(&inp
->input_lck
);
2174 (void) thread_block_parameter(dlil_main_input_thread_cont
, inp
);
2176 VERIFY(0); /* we should never get here */
2178 __builtin_unreachable();
2182 * Input thread for interfaces with legacy input model.
2184 __attribute__((noreturn
))
2186 dlil_input_thread_func(void *v
, wait_result_t w
)
2189 char thread_name
[MAXTHREADNAMESIZE
];
2190 struct dlil_threading_info
*inp
= v
;
2191 struct ifnet
*ifp
= inp
->ifp
;
2193 VERIFY(inp
!= dlil_main_input_thread
);
2194 VERIFY(ifp
!= NULL
);
2195 VERIFY(!(ifp
->if_eflags
& IFEF_RXPOLL
) || !net_rxpoll
||
2196 !(ifp
->if_xflags
& IFXF_LEGACY
));
2197 VERIFY(ifp
->if_poll_mode
== IFNET_MODEL_INPUT_POLL_OFF
||
2198 !(ifp
->if_xflags
& IFXF_LEGACY
));
2199 VERIFY(current_thread() == inp
->input_thr
);
2201 /* construct the name for this thread, and then apply it */
2202 bzero(thread_name
, sizeof(thread_name
));
2203 (void) snprintf(thread_name
, sizeof(thread_name
),
2204 "dlil_input_%s", ifp
->if_xname
);
2205 thread_set_thread_name(inp
->input_thr
, thread_name
);
2206 ifnet_decr_pending_thread_count(ifp
);
2208 lck_mtx_lock(&inp
->input_lck
);
2209 VERIFY(!(inp
->input_waiting
& DLIL_INPUT_RUNNING
));
2210 (void) assert_wait(&inp
->input_waiting
, THREAD_UNINT
);
2211 lck_mtx_unlock(&inp
->input_lck
);
2212 (void) thread_block_parameter(dlil_input_thread_cont
, inp
);
2214 __builtin_unreachable();
2217 __attribute__((noreturn
))
2219 dlil_input_thread_cont(void *v
, wait_result_t wres
)
2221 struct dlil_threading_info
*inp
= v
;
2222 struct ifnet
*ifp
= inp
->ifp
;
2224 lck_mtx_lock_spin(&inp
->input_lck
);
2225 if (__improbable(wres
== THREAD_INTERRUPTED
||
2226 (inp
->input_waiting
& DLIL_INPUT_TERMINATE
))) {
2230 VERIFY(!(inp
->input_waiting
& DLIL_INPUT_RUNNING
));
2231 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
2234 struct mbuf
*m
= NULL
;
2235 classq_pkt_t pkt
= CLASSQ_PKT_INITIALIZER(pkt
);
2236 boolean_t notify
= FALSE
;
2239 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
2242 * Protocol registration and injection must always use
2243 * the main input thread; in theory the latter can utilize
2244 * the corresponding input thread where the packet arrived
2245 * on, but that requires our knowing the interface in advance
2246 * (and the benefits might not worth the trouble.)
2248 VERIFY(!(inp
->input_waiting
&
2249 (DLIL_PROTO_WAITING
| DLIL_PROTO_REGISTER
)));
2251 /* Packets for this interface */
2252 m_cnt
= qlen(&inp
->rcvq_pkts
);
2253 _getq_all(&inp
->rcvq_pkts
, &pkt
, NULL
, NULL
, NULL
);
2258 notify
= dlil_input_stats_sync(ifp
, inp
);
2260 lck_mtx_unlock(&inp
->input_lck
);
2263 ifnet_notify_data_threshold(ifp
);
2267 * NOTE warning %%% attention !!!!
2268 * We should think about putting some thread starvation
2269 * safeguards if we deal with long chains of packets.
2272 dlil_input_packet_list_extended(NULL
, m
,
2273 m_cnt
, ifp
->if_poll_mode
);
2276 lck_mtx_lock_spin(&inp
->input_lck
);
2277 VERIFY(inp
->input_waiting
& DLIL_INPUT_RUNNING
);
2278 if (!(inp
->input_waiting
& ~(DLIL_INPUT_RUNNING
|
2279 DLIL_INPUT_TERMINATE
))) {
2284 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
2286 if (__improbable(inp
->input_waiting
& DLIL_INPUT_TERMINATE
)) {
2288 lck_mtx_unlock(&inp
->input_lck
);
2289 dlil_terminate_input_thread(inp
);
2292 (void) assert_wait(&inp
->input_waiting
, THREAD_UNINT
);
2293 lck_mtx_unlock(&inp
->input_lck
);
2294 (void) thread_block_parameter(dlil_input_thread_cont
, inp
);
2298 VERIFY(0); /* we should never get here */
2300 __builtin_unreachable();
2304 * Input thread for interfaces with opportunistic polling input model.
2306 __attribute__((noreturn
))
2308 dlil_rxpoll_input_thread_func(void *v
, wait_result_t w
)
2311 char thread_name
[MAXTHREADNAMESIZE
];
2312 struct dlil_threading_info
*inp
= v
;
2313 struct ifnet
*ifp
= inp
->ifp
;
2315 VERIFY(inp
!= dlil_main_input_thread
);
2316 VERIFY(ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_RXPOLL
) &&
2317 (ifp
->if_xflags
& IFXF_LEGACY
));
2318 VERIFY(current_thread() == inp
->input_thr
);
2320 /* construct the name for this thread, and then apply it */
2321 bzero(thread_name
, sizeof(thread_name
));
2322 (void) snprintf(thread_name
, sizeof(thread_name
),
2323 "dlil_input_poll_%s", ifp
->if_xname
);
2324 thread_set_thread_name(inp
->input_thr
, thread_name
);
2325 ifnet_decr_pending_thread_count(ifp
);
2327 lck_mtx_lock(&inp
->input_lck
);
2328 VERIFY(!(inp
->input_waiting
& DLIL_INPUT_RUNNING
));
2329 (void) assert_wait(&inp
->input_waiting
, THREAD_UNINT
);
2330 lck_mtx_unlock(&inp
->input_lck
);
2331 (void) thread_block_parameter(dlil_rxpoll_input_thread_cont
, inp
);
2333 __builtin_unreachable();
2336 __attribute__((noreturn
))
2338 dlil_rxpoll_input_thread_cont(void *v
, wait_result_t wres
)
2340 struct dlil_threading_info
*inp
= v
;
2341 struct ifnet
*ifp
= inp
->ifp
;
2344 lck_mtx_lock_spin(&inp
->input_lck
);
2345 if (__improbable(wres
== THREAD_INTERRUPTED
||
2346 (inp
->input_waiting
& DLIL_INPUT_TERMINATE
))) {
2350 VERIFY(!(inp
->input_waiting
& DLIL_INPUT_RUNNING
));
2351 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
2354 struct mbuf
*m
= NULL
;
2355 u_int32_t m_cnt
, m_size
, poll_req
= 0;
2357 struct timespec now
, delta
;
2358 classq_pkt_t pkt
= CLASSQ_PKT_INITIALIZER(pkt
);
2362 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
2364 if ((ival
= ifp
->if_rxpoll_ival
) < IF_RXPOLL_INTERVALTIME_MIN
) {
2365 ival
= IF_RXPOLL_INTERVALTIME_MIN
;
2368 /* Link parameters changed? */
2369 if (ifp
->if_poll_update
!= 0) {
2370 ifp
->if_poll_update
= 0;
2371 (void) dlil_rxpoll_set_params(ifp
, NULL
, TRUE
);
2374 /* Current operating mode */
2375 mode
= ifp
->if_poll_mode
;
2378 * Protocol registration and injection must always use
2379 * the main input thread; in theory the latter can utilize
2380 * the corresponding input thread where the packet arrived
2381 * on, but that requires our knowing the interface in advance
2382 * (and the benefits might not worth the trouble.)
2384 VERIFY(!(inp
->input_waiting
&
2385 (DLIL_PROTO_WAITING
| DLIL_PROTO_REGISTER
)));
2387 /* Total count of all packets */
2388 m_cnt
= qlen(&inp
->rcvq_pkts
);
2390 /* Total bytes of all packets */
2391 m_size
= qsize(&inp
->rcvq_pkts
);
2393 /* Packets for this interface */
2394 _getq_all(&inp
->rcvq_pkts
, &pkt
, NULL
, NULL
, NULL
);
2396 VERIFY(m
!= NULL
|| m_cnt
== 0);
2399 if (!net_timerisset(&ifp
->if_poll_sample_lasttime
)) {
2400 *(&ifp
->if_poll_sample_lasttime
) = *(&now
);
2403 net_timersub(&now
, &ifp
->if_poll_sample_lasttime
, &delta
);
2404 if (if_rxpoll
&& net_timerisset(&ifp
->if_poll_sample_holdtime
)) {
2405 u_int32_t ptot
, btot
;
2407 /* Accumulate statistics for current sampling */
2408 PKTCNTR_ADD(&ifp
->if_poll_sstats
, m_cnt
, m_size
);
2410 if (net_timercmp(&delta
, &ifp
->if_poll_sample_holdtime
, <)) {
2414 *(&ifp
->if_poll_sample_lasttime
) = *(&now
);
2416 /* Calculate min/max of inbound bytes */
2417 btot
= (u_int32_t
)ifp
->if_poll_sstats
.bytes
;
2418 if (ifp
->if_rxpoll_bmin
== 0 || ifp
->if_rxpoll_bmin
> btot
) {
2419 ifp
->if_rxpoll_bmin
= btot
;
2421 if (btot
> ifp
->if_rxpoll_bmax
) {
2422 ifp
->if_rxpoll_bmax
= btot
;
2425 /* Calculate EWMA of inbound bytes */
2426 DLIL_EWMA(ifp
->if_rxpoll_bavg
, btot
, if_rxpoll_decay
);
2428 /* Calculate min/max of inbound packets */
2429 ptot
= (u_int32_t
)ifp
->if_poll_sstats
.packets
;
2430 if (ifp
->if_rxpoll_pmin
== 0 || ifp
->if_rxpoll_pmin
> ptot
) {
2431 ifp
->if_rxpoll_pmin
= ptot
;
2433 if (ptot
> ifp
->if_rxpoll_pmax
) {
2434 ifp
->if_rxpoll_pmax
= ptot
;
2437 /* Calculate EWMA of inbound packets */
2438 DLIL_EWMA(ifp
->if_rxpoll_pavg
, ptot
, if_rxpoll_decay
);
2440 /* Reset sampling statistics */
2441 PKTCNTR_CLEAR(&ifp
->if_poll_sstats
);
2443 /* Calculate EWMA of wakeup requests */
2444 DLIL_EWMA(ifp
->if_rxpoll_wavg
, inp
->wtot
, if_rxpoll_decay
);
2448 if (!net_timerisset(&ifp
->if_poll_dbg_lasttime
)) {
2449 *(&ifp
->if_poll_dbg_lasttime
) = *(&now
);
2451 net_timersub(&now
, &ifp
->if_poll_dbg_lasttime
, &delta
);
2452 if (net_timercmp(&delta
, &dlil_dbgrate
, >=)) {
2453 *(&ifp
->if_poll_dbg_lasttime
) = *(&now
);
2454 DLIL_PRINTF("%s: [%s] pkts avg %d max %d "
2455 "limits [%d/%d], wreq avg %d "
2456 "limits [%d/%d], bytes avg %d "
2457 "limits [%d/%d]\n", if_name(ifp
),
2458 (ifp
->if_poll_mode
==
2459 IFNET_MODEL_INPUT_POLL_ON
) ?
2460 "ON" : "OFF", ifp
->if_rxpoll_pavg
,
2461 ifp
->if_rxpoll_pmax
,
2462 ifp
->if_rxpoll_plowat
,
2463 ifp
->if_rxpoll_phiwat
,
2464 ifp
->if_rxpoll_wavg
,
2465 ifp
->if_rxpoll_wlowat
,
2466 ifp
->if_rxpoll_whiwat
,
2467 ifp
->if_rxpoll_bavg
,
2468 ifp
->if_rxpoll_blowat
,
2469 ifp
->if_rxpoll_bhiwat
);
2473 /* Perform mode transition, if necessary */
2474 if (!net_timerisset(&ifp
->if_poll_mode_lasttime
)) {
2475 *(&ifp
->if_poll_mode_lasttime
) = *(&now
);
2478 net_timersub(&now
, &ifp
->if_poll_mode_lasttime
, &delta
);
2479 if (net_timercmp(&delta
, &ifp
->if_poll_mode_holdtime
, <)) {
2483 if (ifp
->if_rxpoll_pavg
<= ifp
->if_rxpoll_plowat
&&
2484 ifp
->if_rxpoll_bavg
<= ifp
->if_rxpoll_blowat
&&
2485 ifp
->if_poll_mode
!= IFNET_MODEL_INPUT_POLL_OFF
) {
2486 mode
= IFNET_MODEL_INPUT_POLL_OFF
;
2487 } else if (ifp
->if_rxpoll_pavg
>= ifp
->if_rxpoll_phiwat
&&
2488 (ifp
->if_rxpoll_bavg
>= ifp
->if_rxpoll_bhiwat
||
2489 ifp
->if_rxpoll_wavg
>= ifp
->if_rxpoll_whiwat
) &&
2490 ifp
->if_poll_mode
!= IFNET_MODEL_INPUT_POLL_ON
) {
2491 mode
= IFNET_MODEL_INPUT_POLL_ON
;
2494 if (mode
!= ifp
->if_poll_mode
) {
2495 ifp
->if_poll_mode
= mode
;
2496 *(&ifp
->if_poll_mode_lasttime
) = *(&now
);
2501 notify
= dlil_input_stats_sync(ifp
, inp
);
2503 lck_mtx_unlock(&inp
->input_lck
);
2506 ifnet_notify_data_threshold(ifp
);
2510 * If there's a mode change and interface is still attached,
2511 * perform a downcall to the driver for the new mode. Also
2512 * hold an IO refcnt on the interface to prevent it from
2513 * being detached (will be release below.)
2515 if (poll_req
!= 0 && ifnet_is_attached(ifp
, 1)) {
2516 struct ifnet_model_params p
= {
2517 .model
= mode
, .reserved
= { 0 }
2522 DLIL_PRINTF("%s: polling is now %s, "
2523 "pkts avg %d max %d limits [%d/%d], "
2524 "wreq avg %d limits [%d/%d], "
2525 "bytes avg %d limits [%d/%d]\n",
2527 (mode
== IFNET_MODEL_INPUT_POLL_ON
) ?
2528 "ON" : "OFF", ifp
->if_rxpoll_pavg
,
2529 ifp
->if_rxpoll_pmax
, ifp
->if_rxpoll_plowat
,
2530 ifp
->if_rxpoll_phiwat
, ifp
->if_rxpoll_wavg
,
2531 ifp
->if_rxpoll_wlowat
, ifp
->if_rxpoll_whiwat
,
2532 ifp
->if_rxpoll_bavg
, ifp
->if_rxpoll_blowat
,
2533 ifp
->if_rxpoll_bhiwat
);
2536 if ((err
= ((*ifp
->if_input_ctl
)(ifp
,
2537 IFNET_CTL_SET_INPUT_MODEL
, sizeof(p
), &p
))) != 0) {
2538 DLIL_PRINTF("%s: error setting polling mode "
2539 "to %s (%d)\n", if_name(ifp
),
2540 (mode
== IFNET_MODEL_INPUT_POLL_ON
) ?
2545 case IFNET_MODEL_INPUT_POLL_OFF
:
2546 ifnet_set_poll_cycle(ifp
, NULL
);
2547 ifp
->if_rxpoll_offreq
++;
2549 ifp
->if_rxpoll_offerr
++;
2553 case IFNET_MODEL_INPUT_POLL_ON
:
2554 net_nsectimer(&ival
, &ts
);
2555 ifnet_set_poll_cycle(ifp
, &ts
);
2557 ifp
->if_rxpoll_onreq
++;
2559 ifp
->if_rxpoll_onerr
++;
2568 /* Release the IO refcnt */
2569 ifnet_decr_iorefcnt(ifp
);
2573 * NOTE warning %%% attention !!!!
2574 * We should think about putting some thread starvation
2575 * safeguards if we deal with long chains of packets.
2578 dlil_input_packet_list_extended(NULL
, m
, m_cnt
, mode
);
2581 lck_mtx_lock_spin(&inp
->input_lck
);
2582 VERIFY(inp
->input_waiting
& DLIL_INPUT_RUNNING
);
2583 if (!(inp
->input_waiting
& ~(DLIL_INPUT_RUNNING
|
2584 DLIL_INPUT_TERMINATE
))) {
2589 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
2591 if (__improbable(inp
->input_waiting
& DLIL_INPUT_TERMINATE
)) {
2593 lck_mtx_unlock(&inp
->input_lck
);
2594 dlil_terminate_input_thread(inp
);
2597 (void) assert_wait(&inp
->input_waiting
, THREAD_UNINT
);
2598 lck_mtx_unlock(&inp
->input_lck
);
2599 (void) thread_block_parameter(dlil_rxpoll_input_thread_cont
,
2604 VERIFY(0); /* we should never get here */
2606 __builtin_unreachable();
2610 dlil_rxpoll_validate_params(struct ifnet_poll_params
*p
)
2613 if ((p
->packets_lowat
== 0 && p
->packets_hiwat
!= 0) ||
2614 (p
->packets_lowat
!= 0 && p
->packets_hiwat
== 0)) {
2617 if (p
->packets_lowat
!= 0 && /* hiwat must be non-zero */
2618 p
->packets_lowat
>= p
->packets_hiwat
) {
2621 if ((p
->bytes_lowat
== 0 && p
->bytes_hiwat
!= 0) ||
2622 (p
->bytes_lowat
!= 0 && p
->bytes_hiwat
== 0)) {
2625 if (p
->bytes_lowat
!= 0 && /* hiwat must be non-zero */
2626 p
->bytes_lowat
>= p
->bytes_hiwat
) {
2629 if (p
->interval_time
!= 0 &&
2630 p
->interval_time
< IF_RXPOLL_INTERVALTIME_MIN
) {
2631 p
->interval_time
= IF_RXPOLL_INTERVALTIME_MIN
;
2638 dlil_rxpoll_update_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
)
2640 u_int64_t sample_holdtime
, inbw
;
2642 if ((inbw
= ifnet_input_linkrate(ifp
)) == 0 && p
== NULL
) {
2643 sample_holdtime
= 0; /* polling is disabled */
2644 ifp
->if_rxpoll_wlowat
= ifp
->if_rxpoll_plowat
=
2645 ifp
->if_rxpoll_blowat
= 0;
2646 ifp
->if_rxpoll_whiwat
= ifp
->if_rxpoll_phiwat
=
2647 ifp
->if_rxpoll_bhiwat
= (u_int32_t
)-1;
2648 ifp
->if_rxpoll_plim
= 0;
2649 ifp
->if_rxpoll_ival
= IF_RXPOLL_INTERVALTIME_MIN
;
2651 u_int32_t plowat
, phiwat
, blowat
, bhiwat
, plim
;
2655 for (n
= 0, i
= 0; rxpoll_tbl
[i
].speed
!= 0; i
++) {
2656 if (inbw
< rxpoll_tbl
[i
].speed
) {
2661 /* auto-tune if caller didn't specify a value */
2662 plowat
= ((p
== NULL
|| p
->packets_lowat
== 0) ?
2663 rxpoll_tbl
[n
].plowat
: p
->packets_lowat
);
2664 phiwat
= ((p
== NULL
|| p
->packets_hiwat
== 0) ?
2665 rxpoll_tbl
[n
].phiwat
: p
->packets_hiwat
);
2666 blowat
= ((p
== NULL
|| p
->bytes_lowat
== 0) ?
2667 rxpoll_tbl
[n
].blowat
: p
->bytes_lowat
);
2668 bhiwat
= ((p
== NULL
|| p
->bytes_hiwat
== 0) ?
2669 rxpoll_tbl
[n
].bhiwat
: p
->bytes_hiwat
);
2670 plim
= ((p
== NULL
|| p
->packets_limit
== 0) ?
2671 if_rxpoll_max
: p
->packets_limit
);
2672 ival
= ((p
== NULL
|| p
->interval_time
== 0) ?
2673 if_rxpoll_interval_time
: p
->interval_time
);
2675 VERIFY(plowat
!= 0 && phiwat
!= 0);
2676 VERIFY(blowat
!= 0 && bhiwat
!= 0);
2677 VERIFY(ival
>= IF_RXPOLL_INTERVALTIME_MIN
);
2679 sample_holdtime
= if_rxpoll_sample_holdtime
;
2680 ifp
->if_rxpoll_wlowat
= if_sysctl_rxpoll_wlowat
;
2681 ifp
->if_rxpoll_whiwat
= if_sysctl_rxpoll_whiwat
;
2682 ifp
->if_rxpoll_plowat
= plowat
;
2683 ifp
->if_rxpoll_phiwat
= phiwat
;
2684 ifp
->if_rxpoll_blowat
= blowat
;
2685 ifp
->if_rxpoll_bhiwat
= bhiwat
;
2686 ifp
->if_rxpoll_plim
= plim
;
2687 ifp
->if_rxpoll_ival
= ival
;
2690 net_nsectimer(&if_rxpoll_mode_holdtime
, &ifp
->if_poll_mode_holdtime
);
2691 net_nsectimer(&sample_holdtime
, &ifp
->if_poll_sample_holdtime
);
2694 DLIL_PRINTF("%s: speed %llu bps, sample per %llu nsec, "
2695 "poll interval %llu nsec, pkts per poll %u, "
2696 "pkt limits [%u/%u], wreq limits [%u/%u], "
2697 "bytes limits [%u/%u]\n", if_name(ifp
),
2698 inbw
, sample_holdtime
, ifp
->if_rxpoll_ival
,
2699 ifp
->if_rxpoll_plim
, ifp
->if_rxpoll_plowat
,
2700 ifp
->if_rxpoll_phiwat
, ifp
->if_rxpoll_wlowat
,
2701 ifp
->if_rxpoll_whiwat
, ifp
->if_rxpoll_blowat
,
2702 ifp
->if_rxpoll_bhiwat
);
2707 * Must be called on an attached ifnet (caller is expected to check.)
2708 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2711 dlil_rxpoll_set_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
,
2715 struct dlil_threading_info
*inp
;
2717 VERIFY(ifp
!= NULL
);
2718 if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || (inp
= ifp
->if_inp
) == NULL
) {
2721 err
= dlil_rxpoll_validate_params(p
);
2727 lck_mtx_lock(&inp
->input_lck
);
2729 LCK_MTX_ASSERT(&inp
->input_lck
, LCK_MTX_ASSERT_OWNED
);
2731 * Normally, we'd reset the parameters to the auto-tuned values
2732 * if the the input thread detects a change in link rate. If the
2733 * driver provides its own parameters right after a link rate
2734 * changes, but before the input thread gets to run, we want to
2735 * make sure to keep the driver's values. Clearing if_poll_update
2736 * will achieve that.
2738 if (p
!= NULL
&& !locked
&& ifp
->if_poll_update
!= 0) {
2739 ifp
->if_poll_update
= 0;
2741 dlil_rxpoll_update_params(ifp
, p
);
2743 lck_mtx_unlock(&inp
->input_lck
);
2749 * Must be called on an attached ifnet (caller is expected to check.)
2752 dlil_rxpoll_get_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
)
2754 struct dlil_threading_info
*inp
;
2756 VERIFY(ifp
!= NULL
&& p
!= NULL
);
2757 if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || (inp
= ifp
->if_inp
) == NULL
) {
2761 bzero(p
, sizeof(*p
));
2763 lck_mtx_lock(&inp
->input_lck
);
2764 p
->packets_limit
= ifp
->if_rxpoll_plim
;
2765 p
->packets_lowat
= ifp
->if_rxpoll_plowat
;
2766 p
->packets_hiwat
= ifp
->if_rxpoll_phiwat
;
2767 p
->bytes_lowat
= ifp
->if_rxpoll_blowat
;
2768 p
->bytes_hiwat
= ifp
->if_rxpoll_bhiwat
;
2769 p
->interval_time
= ifp
->if_rxpoll_ival
;
2770 lck_mtx_unlock(&inp
->input_lck
);
2776 ifnet_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
2777 const struct ifnet_stat_increment_param
*s
)
2779 return ifnet_input_common(ifp
, m_head
, NULL
, s
, FALSE
, FALSE
);
2783 ifnet_input_extended(struct ifnet
*ifp
, struct mbuf
*m_head
,
2784 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
)
2786 return ifnet_input_common(ifp
, m_head
, m_tail
, s
, TRUE
, FALSE
);
2790 ifnet_input_poll(struct ifnet
*ifp
, struct mbuf
*m_head
,
2791 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
)
2793 return ifnet_input_common(ifp
, m_head
, m_tail
, s
,
2794 (m_head
!= NULL
), TRUE
);
2798 ifnet_input_common(struct ifnet
*ifp
, struct mbuf
*m_head
, struct mbuf
*m_tail
,
2799 const struct ifnet_stat_increment_param
*s
, boolean_t ext
, boolean_t poll
)
2801 dlil_input_func input_func
;
2802 struct ifnet_stat_increment_param _s
;
2803 u_int32_t m_cnt
= 0, m_size
= 0;
2807 if ((m_head
== NULL
&& !poll
) || (s
== NULL
&& ext
)) {
2808 if (m_head
!= NULL
) {
2809 mbuf_freem_list(m_head
);
2814 VERIFY(m_head
!= NULL
|| (s
== NULL
&& m_tail
== NULL
&& !ext
&& poll
));
2815 VERIFY(m_tail
== NULL
|| ext
);
2816 VERIFY(s
!= NULL
|| !ext
);
2819 * Drop the packet(s) if the parameters are invalid, or if the
2820 * interface is no longer attached; else hold an IO refcnt to
2821 * prevent it from being detached (will be released below.)
2823 if (ifp
== NULL
|| (ifp
!= lo_ifp
&& !ifnet_datamov_begin(ifp
))) {
2824 if (m_head
!= NULL
) {
2825 mbuf_freem_list(m_head
);
2830 input_func
= ifp
->if_input_dlil
;
2831 VERIFY(input_func
!= NULL
);
2833 if (m_tail
== NULL
) {
2835 while (m_head
!= NULL
) {
2836 #if IFNET_INPUT_SANITY_CHK
2837 if (dlil_input_sanity_check
!= 0) {
2838 DLIL_INPUT_CHECK(last
, ifp
);
2840 #endif /* IFNET_INPUT_SANITY_CHK */
2842 m_size
+= m_length(last
);
2843 if (mbuf_nextpkt(last
) == NULL
) {
2846 last
= mbuf_nextpkt(last
);
2850 #if IFNET_INPUT_SANITY_CHK
2851 if (dlil_input_sanity_check
!= 0) {
2854 DLIL_INPUT_CHECK(last
, ifp
);
2856 m_size
+= m_length(last
);
2857 if (mbuf_nextpkt(last
) == NULL
) {
2860 last
= mbuf_nextpkt(last
);
2863 m_cnt
= s
->packets_in
;
2864 m_size
= s
->bytes_in
;
2868 m_cnt
= s
->packets_in
;
2869 m_size
= s
->bytes_in
;
2871 #endif /* IFNET_INPUT_SANITY_CHK */
2874 if (last
!= m_tail
) {
2875 panic_plain("%s: invalid input packet chain for %s, "
2876 "tail mbuf %p instead of %p\n", __func__
, if_name(ifp
),
2881 * Assert packet count only for the extended variant, for backwards
2882 * compatibility, since this came directly from the device driver.
2883 * Relax this assertion for input bytes, as the driver may have
2884 * included the link-layer headers in the computation; hence
2885 * m_size is just an approximation.
2887 if (ext
&& s
->packets_in
!= m_cnt
) {
2888 panic_plain("%s: input packet count mismatch for %s, "
2889 "%d instead of %d\n", __func__
, if_name(ifp
),
2890 s
->packets_in
, m_cnt
);
2894 bzero(&_s
, sizeof(_s
));
2899 _s
.packets_in
= m_cnt
;
2900 _s
.bytes_in
= m_size
;
2902 err
= (*input_func
)(ifp
, m_head
, m_tail
, s
, poll
, current_thread());
2904 if (ifp
!= lo_ifp
) {
2905 /* Release the IO refcnt */
2906 ifnet_datamov_end(ifp
);
2914 dlil_output_handler(struct ifnet
*ifp
, struct mbuf
*m
)
2916 return ifp
->if_output(ifp
, m
);
2920 dlil_input_handler(struct ifnet
*ifp
, struct mbuf
*m_head
,
2921 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
2922 boolean_t poll
, struct thread
*tp
)
2924 struct dlil_threading_info
*inp
;
2925 u_int32_t m_cnt
= s
->packets_in
;
2926 u_int32_t m_size
= s
->bytes_in
;
2927 boolean_t notify
= FALSE
;
2929 if ((inp
= ifp
->if_inp
) == NULL
) {
2930 inp
= dlil_main_input_thread
;
2934 * If there is a matching DLIL input thread associated with an
2935 * affinity set, associate this thread with the same set. We
2936 * will only do this once.
2938 lck_mtx_lock_spin(&inp
->input_lck
);
2939 if (inp
!= dlil_main_input_thread
&& inp
->net_affinity
&& tp
!= NULL
&&
2940 ((!poll
&& inp
->wloop_thr
== THREAD_NULL
) ||
2941 (poll
&& inp
->poll_thr
== THREAD_NULL
))) {
2942 u_int32_t tag
= inp
->tag
;
2945 VERIFY(inp
->poll_thr
== THREAD_NULL
);
2948 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
2949 inp
->wloop_thr
= tp
;
2951 lck_mtx_unlock(&inp
->input_lck
);
2953 /* Associate the current thread with the new affinity tag */
2954 (void) dlil_affinity_set(tp
, tag
);
2957 * Take a reference on the current thread; during detach,
2958 * we will need to refer to it in order to tear down its
2961 thread_reference(tp
);
2962 lck_mtx_lock_spin(&inp
->input_lck
);
2965 VERIFY(m_head
!= NULL
|| (m_tail
== NULL
&& m_cnt
== 0));
2968 * Because of loopbacked multicast we cannot stuff the ifp in
2969 * the rcvif of the packet header: loopback (lo0) packets use a
2970 * dedicated list so that we can later associate them with lo_ifp
2971 * on their way up the stack. Packets for other interfaces without
2972 * dedicated input threads go to the regular list.
2974 if (m_head
!= NULL
) {
2975 classq_pkt_t head
, tail
;
2976 CLASSQ_PKT_INIT_MBUF(&head
, m_head
);
2977 CLASSQ_PKT_INIT_MBUF(&tail
, m_tail
);
2978 if (inp
== dlil_main_input_thread
&& ifp
== lo_ifp
) {
2979 struct dlil_main_threading_info
*inpm
=
2980 (struct dlil_main_threading_info
*)inp
;
2981 _addq_multi(&inpm
->lo_rcvq_pkts
, &head
, &tail
,
2984 _addq_multi(&inp
->rcvq_pkts
, &head
, &tail
,
2989 #if IFNET_INPUT_SANITY_CHK
2990 if (dlil_input_sanity_check
!= 0) {
2994 for (m0
= m_head
, count
= 0; m0
; m0
= mbuf_nextpkt(m0
)) {
2998 if (count
!= m_cnt
) {
2999 panic_plain("%s: invalid packet count %d "
3000 "(expected %d)\n", if_name(ifp
),
3005 inp
->input_mbuf_cnt
+= m_cnt
;
3007 #endif /* IFNET_INPUT_SANITY_CHK */
3009 dlil_input_stats_add(s
, inp
, ifp
, poll
);
3011 * If we're using the main input thread, synchronize the
3012 * stats now since we have the interface context. All
3013 * other cases involving dedicated input threads will
3014 * have their stats synchronized there.
3016 if (inp
== dlil_main_input_thread
) {
3017 notify
= dlil_input_stats_sync(ifp
, inp
);
3020 inp
->input_waiting
|= DLIL_INPUT_WAITING
;
3021 if (!(inp
->input_waiting
& DLIL_INPUT_RUNNING
)) {
3023 wakeup_one((caddr_t
)&inp
->input_waiting
);
3025 lck_mtx_unlock(&inp
->input_lck
);
3028 ifnet_notify_data_threshold(ifp
);
3036 ifnet_start_common(struct ifnet
*ifp
, boolean_t resetfc
)
3038 if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
3042 * If the starter thread is inactive, signal it to do work,
3043 * unless the interface is being flow controlled from below,
3044 * e.g. a virtual interface being flow controlled by a real
3045 * network interface beneath it, or it's been disabled via
3046 * a call to ifnet_disable_output().
3048 lck_mtx_lock_spin(&ifp
->if_start_lock
);
3050 ifp
->if_start_flags
&= ~IFSF_FLOW_CONTROLLED
;
3051 } else if (ifp
->if_start_flags
& IFSF_FLOW_CONTROLLED
) {
3052 lck_mtx_unlock(&ifp
->if_start_lock
);
3055 ifp
->if_start_req
++;
3056 if (!ifp
->if_start_active
&& ifp
->if_start_thread
!= THREAD_NULL
&&
3057 (resetfc
|| !(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) ||
3058 IFCQ_LEN(&ifp
->if_snd
) >= ifp
->if_start_delay_qlen
||
3059 ifp
->if_start_delayed
== 0)) {
3060 (void) thread_wakeup_thread((caddr_t
)&ifp
->if_start_thread
,
3061 ifp
->if_start_thread
);
3063 lck_mtx_unlock(&ifp
->if_start_lock
);
3067 ifnet_start(struct ifnet
*ifp
)
3069 ifnet_start_common(ifp
, FALSE
);
3072 __attribute__((noreturn
))
3074 ifnet_start_thread_func(void *v
, wait_result_t w
)
3077 struct ifnet
*ifp
= v
;
3078 char thread_name
[MAXTHREADNAMESIZE
];
3080 /* Construct the name for this thread, and then apply it. */
3081 bzero(thread_name
, sizeof(thread_name
));
3082 (void) snprintf(thread_name
, sizeof(thread_name
),
3083 "ifnet_start_%s", ifp
->if_xname
);
3084 ASSERT(ifp
->if_start_thread
== current_thread());
3085 thread_set_thread_name(current_thread(), thread_name
);
3088 * Treat the dedicated starter thread for lo0 as equivalent to
3089 * the driver workloop thread; if net_affinity is enabled for
3090 * the main input thread, associate this starter thread to it
3091 * by binding them with the same affinity tag. This is done
3092 * only once (as we only have one lo_ifp which never goes away.)
3094 if (ifp
== lo_ifp
) {
3095 struct dlil_threading_info
*inp
= dlil_main_input_thread
;
3096 struct thread
*tp
= current_thread();
3098 lck_mtx_lock(&inp
->input_lck
);
3099 if (inp
->net_affinity
) {
3100 u_int32_t tag
= inp
->tag
;
3102 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
3103 VERIFY(inp
->poll_thr
== THREAD_NULL
);
3104 inp
->wloop_thr
= tp
;
3105 lck_mtx_unlock(&inp
->input_lck
);
3107 /* Associate this thread with the affinity tag */
3108 (void) dlil_affinity_set(tp
, tag
);
3110 lck_mtx_unlock(&inp
->input_lck
);
3113 ifnet_decr_pending_thread_count(ifp
);
3115 lck_mtx_lock(&ifp
->if_start_lock
);
3116 VERIFY(!ifp
->if_start_active
);
3117 (void) assert_wait(&ifp
->if_start_thread
, THREAD_UNINT
);
3118 lck_mtx_unlock(&ifp
->if_start_lock
);
3119 (void) thread_block_parameter(ifnet_start_thread_cont
, ifp
);
3121 __builtin_unreachable();
3124 __attribute__((noreturn
))
3126 ifnet_start_thread_cont(void *v
, wait_result_t wres
)
3128 struct ifnet
*ifp
= v
;
3129 struct ifclassq
*ifq
= &ifp
->if_snd
;
3131 lck_mtx_lock(&ifp
->if_start_lock
);
3132 if (__improbable(wres
== THREAD_INTERRUPTED
||
3133 ifp
->if_start_thread
== THREAD_NULL
)) {
3137 ifp
->if_start_active
= 1;
3140 * Keep on servicing until no more request.
3143 u_int32_t req
= ifp
->if_start_req
;
3144 if (!IFCQ_IS_EMPTY(ifq
) &&
3145 (ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) &&
3146 ifp
->if_start_delayed
== 0 &&
3147 IFCQ_LEN(ifq
) < ifp
->if_start_delay_qlen
&&
3148 (ifp
->if_eflags
& IFEF_DELAY_START
)) {
3149 ifp
->if_start_delayed
= 1;
3150 ifnet_start_delayed
++;
3153 ifp
->if_start_delayed
= 0;
3155 lck_mtx_unlock(&ifp
->if_start_lock
);
3158 * If no longer attached, don't call start because ifp
3159 * is being destroyed; else hold an IO refcnt to
3160 * prevent the interface from being detached (will be
3163 if (!ifnet_datamov_begin(ifp
)) {
3164 lck_mtx_lock_spin(&ifp
->if_start_lock
);
3168 /* invoke the driver's start routine */
3169 ((*ifp
->if_start
)(ifp
));
3172 * Release the io ref count taken above.
3174 ifnet_datamov_end(ifp
);
3176 lck_mtx_lock_spin(&ifp
->if_start_lock
);
3179 * If there's no pending request or if the
3180 * interface has been disabled, we're done.
3182 if (req
== ifp
->if_start_req
||
3183 (ifp
->if_start_flags
& IFSF_FLOW_CONTROLLED
)) {
3188 ifp
->if_start_req
= 0;
3189 ifp
->if_start_active
= 0;
3192 if (__probable(ifp
->if_start_thread
!= THREAD_NULL
)) {
3193 uint64_t deadline
= TIMEOUT_WAIT_FOREVER
;
3194 struct timespec delay_start_ts
;
3195 struct timespec
*ts
;
3198 * Wakeup N ns from now if rate-controlled by TBR, and if
3199 * there are still packets in the send queue which haven't
3200 * been dequeued so far; else sleep indefinitely (ts = NULL)
3201 * until ifnet_start() is called again.
3203 ts
= ((IFCQ_TBR_IS_ENABLED(ifq
) && !IFCQ_IS_EMPTY(ifq
)) ?
3204 &ifp
->if_start_cycle
: NULL
);
3206 if (ts
== NULL
&& ifp
->if_start_delayed
== 1) {
3207 delay_start_ts
.tv_sec
= 0;
3208 delay_start_ts
.tv_nsec
= ifp
->if_start_delay_timeout
;
3209 ts
= &delay_start_ts
;
3212 if (ts
!= NULL
&& ts
->tv_sec
== 0 && ts
->tv_nsec
== 0) {
3216 if (__improbable(ts
!= NULL
)) {
3217 clock_interval_to_deadline((ts
->tv_nsec
+
3218 (ts
->tv_sec
* NSEC_PER_SEC
)), 1, &deadline
);
3221 (void) assert_wait_deadline(&ifp
->if_start_thread
,
3222 THREAD_UNINT
, deadline
);
3223 lck_mtx_unlock(&ifp
->if_start_lock
);
3224 (void) thread_block_parameter(ifnet_start_thread_cont
, ifp
);
3228 /* interface is detached? */
3229 ifnet_set_start_cycle(ifp
, NULL
);
3230 lck_mtx_unlock(&ifp
->if_start_lock
);
3234 DLIL_PRINTF("%s: starter thread terminated\n",
3238 /* for the extra refcnt from kernel_thread_start() */
3239 thread_deallocate(current_thread());
3240 /* this is the end */
3241 thread_terminate(current_thread());
3245 /* must never get here */
3248 __builtin_unreachable();
3252 ifnet_set_start_cycle(struct ifnet
*ifp
, struct timespec
*ts
)
3255 bzero(&ifp
->if_start_cycle
, sizeof(ifp
->if_start_cycle
));
3257 *(&ifp
->if_start_cycle
) = *ts
;
3260 if (ts
!= NULL
&& ts
->tv_nsec
!= 0 && dlil_verbose
) {
3261 DLIL_PRINTF("%s: restart interval set to %lu nsec\n",
3262 if_name(ifp
), ts
->tv_nsec
);
3267 ifnet_poll(struct ifnet
*ifp
)
3270 * If the poller thread is inactive, signal it to do work.
3272 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
3274 if (!(ifp
->if_poll_flags
& IF_POLLF_RUNNING
) &&
3275 ifp
->if_poll_thread
!= THREAD_NULL
) {
3276 wakeup_one((caddr_t
)&ifp
->if_poll_thread
);
3278 lck_mtx_unlock(&ifp
->if_poll_lock
);
3281 __attribute__((noreturn
))
3283 ifnet_poll_thread_func(void *v
, wait_result_t w
)
3286 char thread_name
[MAXTHREADNAMESIZE
];
3287 struct ifnet
*ifp
= v
;
3289 VERIFY(ifp
->if_eflags
& IFEF_RXPOLL
);
3290 VERIFY(current_thread() == ifp
->if_poll_thread
);
3292 /* construct the name for this thread, and then apply it */
3293 bzero(thread_name
, sizeof(thread_name
));
3294 (void) snprintf(thread_name
, sizeof(thread_name
),
3295 "ifnet_poller_%s", ifp
->if_xname
);
3296 thread_set_thread_name(ifp
->if_poll_thread
, thread_name
);
3297 ifnet_decr_pending_thread_count(ifp
);
3299 lck_mtx_lock(&ifp
->if_poll_lock
);
3300 (void) assert_wait(&ifp
->if_poll_thread
, THREAD_UNINT
);
3301 lck_mtx_unlock(&ifp
->if_poll_lock
);
3302 (void) thread_block_parameter(ifnet_poll_thread_cont
, ifp
);
3304 __builtin_unreachable();
3307 __attribute__((noreturn
))
3309 ifnet_poll_thread_cont(void *v
, wait_result_t wres
)
3311 struct dlil_threading_info
*inp
;
3312 struct ifnet
*ifp
= v
;
3313 struct ifnet_stat_increment_param s
;
3314 struct timespec start_time
;
3316 VERIFY(ifp
->if_eflags
& IFEF_RXPOLL
);
3318 bzero(&s
, sizeof(s
));
3319 net_timerclear(&start_time
);
3321 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
3322 if (__improbable(wres
== THREAD_INTERRUPTED
||
3323 ifp
->if_poll_thread
== THREAD_NULL
)) {
3328 VERIFY(inp
!= NULL
);
3330 ifp
->if_poll_flags
|= IF_POLLF_RUNNING
;
3333 * Keep on servicing until no more request.
3336 struct mbuf
*m_head
, *m_tail
;
3337 u_int32_t m_lim
, m_cnt
, m_totlen
;
3338 u_int16_t req
= ifp
->if_poll_req
;
3340 m_lim
= (ifp
->if_rxpoll_plim
!= 0) ? ifp
->if_rxpoll_plim
:
3341 MAX((qlimit(&inp
->rcvq_pkts
)), (ifp
->if_rxpoll_phiwat
<< 2));
3342 lck_mtx_unlock(&ifp
->if_poll_lock
);
3345 * If no longer attached, there's nothing to do;
3346 * else hold an IO refcnt to prevent the interface
3347 * from being detached (will be released below.)
3349 if (!ifnet_is_attached(ifp
, 1)) {
3350 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
3354 if (dlil_verbose
> 1) {
3355 DLIL_PRINTF("%s: polling up to %d pkts, "
3356 "pkts avg %d max %d, wreq avg %d, "
3358 if_name(ifp
), m_lim
,
3359 ifp
->if_rxpoll_pavg
, ifp
->if_rxpoll_pmax
,
3360 ifp
->if_rxpoll_wavg
, ifp
->if_rxpoll_bavg
);
3363 /* invoke the driver's input poll routine */
3364 ((*ifp
->if_input_poll
)(ifp
, 0, m_lim
, &m_head
, &m_tail
,
3365 &m_cnt
, &m_totlen
));
3367 if (m_head
!= NULL
) {
3368 VERIFY(m_tail
!= NULL
&& m_cnt
> 0);
3370 if (dlil_verbose
> 1) {
3371 DLIL_PRINTF("%s: polled %d pkts, "
3372 "pkts avg %d max %d, wreq avg %d, "
3374 if_name(ifp
), m_cnt
,
3375 ifp
->if_rxpoll_pavg
, ifp
->if_rxpoll_pmax
,
3376 ifp
->if_rxpoll_wavg
, ifp
->if_rxpoll_bavg
);
3379 /* stats are required for extended variant */
3380 s
.packets_in
= m_cnt
;
3381 s
.bytes_in
= m_totlen
;
3383 (void) ifnet_input_common(ifp
, m_head
, m_tail
,
3386 if (dlil_verbose
> 1) {
3387 DLIL_PRINTF("%s: no packets, "
3388 "pkts avg %d max %d, wreq avg %d, "
3390 if_name(ifp
), ifp
->if_rxpoll_pavg
,
3391 ifp
->if_rxpoll_pmax
, ifp
->if_rxpoll_wavg
,
3392 ifp
->if_rxpoll_bavg
);
3395 (void) ifnet_input_common(ifp
, NULL
, NULL
,
3399 /* Release the io ref count */
3400 ifnet_decr_iorefcnt(ifp
);
3402 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
3404 /* if there's no pending request, we're done */
3405 if (req
== ifp
->if_poll_req
||
3406 ifp
->if_poll_thread
== THREAD_NULL
) {
3411 ifp
->if_poll_req
= 0;
3412 ifp
->if_poll_flags
&= ~IF_POLLF_RUNNING
;
3414 if (ifp
->if_poll_thread
!= THREAD_NULL
) {
3415 uint64_t deadline
= TIMEOUT_WAIT_FOREVER
;
3416 struct timespec
*ts
;
3419 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
3420 * until ifnet_poll() is called again.
3422 ts
= &ifp
->if_poll_cycle
;
3423 if (ts
->tv_sec
== 0 && ts
->tv_nsec
== 0) {
3428 clock_interval_to_deadline((ts
->tv_nsec
+
3429 (ts
->tv_sec
* NSEC_PER_SEC
)), 1, &deadline
);
3432 (void) assert_wait_deadline(&ifp
->if_poll_thread
,
3433 THREAD_UNINT
, deadline
);
3434 lck_mtx_unlock(&ifp
->if_poll_lock
);
3435 (void) thread_block_parameter(ifnet_poll_thread_cont
, ifp
);
3439 /* interface is detached (maybe while asleep)? */
3440 ifnet_set_poll_cycle(ifp
, NULL
);
3441 lck_mtx_unlock(&ifp
->if_poll_lock
);
3444 DLIL_PRINTF("%s: poller thread terminated\n",
3448 /* for the extra refcnt from kernel_thread_start() */
3449 thread_deallocate(current_thread());
3450 /* this is the end */
3451 thread_terminate(current_thread());
3455 /* must never get here */
3458 __builtin_unreachable();
3462 ifnet_set_poll_cycle(struct ifnet
*ifp
, struct timespec
*ts
)
3465 bzero(&ifp
->if_poll_cycle
, sizeof(ifp
->if_poll_cycle
));
3467 *(&ifp
->if_poll_cycle
) = *ts
;
3470 if (ts
!= NULL
&& ts
->tv_nsec
!= 0 && dlil_verbose
) {
3471 DLIL_PRINTF("%s: poll interval set to %lu nsec\n",
3472 if_name(ifp
), ts
->tv_nsec
);
3477 ifnet_purge(struct ifnet
*ifp
)
3479 if (ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_TXSTART
)) {
3485 ifnet_update_sndq(struct ifclassq
*ifq
, cqev_t ev
)
3487 IFCQ_LOCK_ASSERT_HELD(ifq
);
3489 if (!(IFCQ_IS_READY(ifq
))) {
3493 if (IFCQ_TBR_IS_ENABLED(ifq
)) {
3494 struct tb_profile tb
= {
3495 .rate
= ifq
->ifcq_tbr
.tbr_rate_raw
,
3496 .percent
= ifq
->ifcq_tbr
.tbr_percent
, .depth
= 0
3498 (void) ifclassq_tbr_set(ifq
, &tb
, FALSE
);
3501 ifclassq_update(ifq
, ev
);
3505 ifnet_update_rcv(struct ifnet
*ifp
, cqev_t ev
)
3508 case CLASSQ_EV_LINK_BANDWIDTH
:
3509 if (net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) {
3510 ifp
->if_poll_update
++;
3520 ifnet_set_output_sched_model(struct ifnet
*ifp
, u_int32_t model
)
3522 struct ifclassq
*ifq
;
3526 if (ifp
== NULL
|| model
>= IFNET_SCHED_MODEL_MAX
) {
3528 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
3534 omodel
= ifp
->if_output_sched_model
;
3535 ifp
->if_output_sched_model
= model
;
3536 if ((err
= ifclassq_pktsched_setup(ifq
)) != 0) {
3537 ifp
->if_output_sched_model
= omodel
;
3545 ifnet_set_sndq_maxlen(struct ifnet
*ifp
, u_int32_t maxqlen
)
3549 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
3553 ifclassq_set_maxlen(&ifp
->if_snd
, maxqlen
);
3559 ifnet_get_sndq_maxlen(struct ifnet
*ifp
, u_int32_t
*maxqlen
)
3561 if (ifp
== NULL
|| maxqlen
== NULL
) {
3563 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
3567 *maxqlen
= ifclassq_get_maxlen(&ifp
->if_snd
);
3573 ifnet_get_sndq_len(struct ifnet
*ifp
, u_int32_t
*pkts
)
3577 if (ifp
== NULL
|| pkts
== NULL
) {
3579 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
3582 err
= ifclassq_get_len(&ifp
->if_snd
, MBUF_SC_UNSPEC
,
3590 ifnet_get_service_class_sndq_len(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3591 u_int32_t
*pkts
, u_int32_t
*bytes
)
3595 if (ifp
== NULL
|| !MBUF_VALID_SC(sc
) ||
3596 (pkts
== NULL
&& bytes
== NULL
)) {
3598 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
3601 err
= ifclassq_get_len(&ifp
->if_snd
, sc
, pkts
, bytes
);
3608 ifnet_set_rcvq_maxlen(struct ifnet
*ifp
, u_int32_t maxqlen
)
3610 struct dlil_threading_info
*inp
;
3614 } else if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || ifp
->if_inp
== NULL
) {
3619 maxqlen
= if_rcvq_maxlen
;
3620 } else if (maxqlen
< IF_RCVQ_MINLEN
) {
3621 maxqlen
= IF_RCVQ_MINLEN
;
3625 lck_mtx_lock(&inp
->input_lck
);
3626 qlimit(&inp
->rcvq_pkts
) = maxqlen
;
3627 lck_mtx_unlock(&inp
->input_lck
);
3633 ifnet_get_rcvq_maxlen(struct ifnet
*ifp
, u_int32_t
*maxqlen
)
3635 struct dlil_threading_info
*inp
;
3637 if (ifp
== NULL
|| maxqlen
== NULL
) {
3639 } else if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || ifp
->if_inp
== NULL
) {
3644 lck_mtx_lock(&inp
->input_lck
);
3645 *maxqlen
= qlimit(&inp
->rcvq_pkts
);
3646 lck_mtx_unlock(&inp
->input_lck
);
3651 ifnet_enqueue_multi_setup(struct ifnet
*ifp
, uint16_t delay_qlen
,
3652 uint16_t delay_timeout
)
3654 if (delay_qlen
> 0 && delay_timeout
> 0) {
3655 ifp
->if_eflags
|= IFEF_ENQUEUE_MULTI
;
3656 ifp
->if_start_delay_qlen
= min(100, delay_qlen
);
3657 ifp
->if_start_delay_timeout
= min(20000, delay_timeout
);
3658 /* convert timeout to nanoseconds */
3659 ifp
->if_start_delay_timeout
*= 1000;
3660 kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
3661 ifp
->if_xname
, (uint32_t)delay_qlen
,
3662 (uint32_t)delay_timeout
);
3664 ifp
->if_eflags
&= ~IFEF_ENQUEUE_MULTI
;
3669 * This function clears the DSCP bits in the IPV4/V6 header pointed to by buf.
3670 * While it's ok for buf to be not 32 bit aligned, the caller must ensure that
3671 * buf holds the full header.
3673 static __attribute__((noinline
)) void
3674 ifnet_mcast_clear_dscp(uint8_t *buf
, uint8_t ip_ver
)
3677 struct ip6_hdr
*ip6
;
3678 uint8_t lbuf
[64] __attribute__((aligned(8)));
3681 if (ip_ver
== IPVERSION
) {
3685 if (__improbable(!IP_HDR_ALIGNED_P(p
))) {
3686 DTRACE_IP1(not__aligned__v4
, uint8_t *, buf
);
3687 bcopy(buf
, lbuf
, sizeof(struct ip
));
3690 ip
= (struct ip
*)(void *)p
;
3691 if (__probable((ip
->ip_tos
& ~IPTOS_ECN_MASK
) == 0)) {
3695 DTRACE_IP1(clear__v4
, struct ip
*, ip
);
3696 old_tos
= ip
->ip_tos
;
3697 ip
->ip_tos
&= IPTOS_ECN_MASK
;
3698 sum
= ip
->ip_sum
+ htons(old_tos
) - htons(ip
->ip_tos
);
3699 sum
= (sum
>> 16) + (sum
& 0xffff);
3700 ip
->ip_sum
= (uint16_t)(sum
& 0xffff);
3702 if (__improbable(p
== lbuf
)) {
3703 bcopy(lbuf
, buf
, sizeof(struct ip
));
3707 ASSERT(ip_ver
== IPV6_VERSION
);
3709 if (__improbable(!IP_HDR_ALIGNED_P(p
))) {
3710 DTRACE_IP1(not__aligned__v6
, uint8_t *, buf
);
3711 bcopy(buf
, lbuf
, sizeof(struct ip6_hdr
));
3714 ip6
= (struct ip6_hdr
*)(void *)p
;
3715 flow
= ntohl(ip6
->ip6_flow
);
3716 if (__probable((flow
& IP6FLOW_DSCP_MASK
) == 0)) {
3720 DTRACE_IP1(clear__v6
, struct ip6_hdr
*, ip6
);
3721 ip6
->ip6_flow
= htonl(flow
& ~IP6FLOW_DSCP_MASK
);
3723 if (__improbable(p
== lbuf
)) {
3724 bcopy(lbuf
, buf
, sizeof(struct ip6_hdr
));
3729 static inline errno_t
3730 ifnet_enqueue_ifclassq(struct ifnet
*ifp
, classq_pkt_t
*p
, boolean_t flush
,
3733 volatile uint64_t *fg_ts
= NULL
;
3734 volatile uint64_t *rt_ts
= NULL
;
3735 struct timespec now
;
3736 u_int64_t now_nsec
= 0;
3738 uint8_t *mcast_buf
= NULL
;
3741 ASSERT(ifp
->if_eflags
& IFEF_TXSTART
);
3744 * If packet already carries a timestamp, either from dlil_output()
3745 * or from flowswitch, use it here. Otherwise, record timestamp.
3746 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
3747 * the timestamp value is used internally there.
3749 switch (p
->cp_ptype
) {
3751 ASSERT(p
->cp_mbuf
->m_flags
& M_PKTHDR
);
3752 ASSERT(p
->cp_mbuf
->m_nextpkt
== NULL
);
3754 if (!(p
->cp_mbuf
->m_pkthdr
.pkt_flags
& PKTF_TS_VALID
) ||
3755 p
->cp_mbuf
->m_pkthdr
.pkt_timestamp
== 0) {
3757 net_timernsec(&now
, &now_nsec
);
3758 p
->cp_mbuf
->m_pkthdr
.pkt_timestamp
= now_nsec
;
3760 p
->cp_mbuf
->m_pkthdr
.pkt_flags
&= ~PKTF_TS_VALID
;
3762 * If the packet service class is not background,
3763 * update the timestamp to indicate recent activity
3764 * on a foreground socket.
3766 if ((p
->cp_mbuf
->m_pkthdr
.pkt_flags
& PKTF_FLOW_ID
) &&
3767 p
->cp_mbuf
->m_pkthdr
.pkt_flowsrc
== FLOWSRC_INPCB
) {
3768 if (!(p
->cp_mbuf
->m_pkthdr
.pkt_flags
&
3769 PKTF_SO_BACKGROUND
)) {
3770 ifp
->if_fg_sendts
= _net_uptime
;
3771 if (fg_ts
!= NULL
) {
3772 *fg_ts
= _net_uptime
;
3775 if (p
->cp_mbuf
->m_pkthdr
.pkt_flags
& PKTF_SO_REALTIME
) {
3776 ifp
->if_rt_sendts
= _net_uptime
;
3777 if (rt_ts
!= NULL
) {
3778 *rt_ts
= _net_uptime
;
3784 * Some Wi-Fi AP implementations do not correctly handle
3785 * multicast IP packets with DSCP bits set (radr://9331522).
3786 * As a workaround we clear the DSCP bits and set the service
3789 if ((p
->cp_mbuf
->m_flags
& M_MCAST
) != 0 &&
3790 IFNET_IS_WIFI_INFRA(ifp
)) {
3791 size_t len
= mbuf_len(p
->cp_mbuf
), hlen
;
3792 struct ether_header
*eh
;
3793 boolean_t pullup
= FALSE
;
3796 if (__improbable(len
< sizeof(struct ether_header
))) {
3797 DTRACE_IP1(small__ether
, size_t, len
);
3798 if ((p
->cp_mbuf
= m_pullup(p
->cp_mbuf
,
3799 sizeof(struct ether_header
))) == NULL
) {
3803 eh
= (struct ether_header
*)mbuf_data(p
->cp_mbuf
);
3804 etype
= ntohs(eh
->ether_type
);
3805 if (etype
== ETHERTYPE_IP
) {
3806 hlen
= sizeof(struct ether_header
) +
3809 DTRACE_IP1(small__v4
, size_t, len
);
3813 } else if (etype
== ETHERTYPE_IPV6
) {
3814 hlen
= sizeof(struct ether_header
) +
3815 sizeof(struct ip6_hdr
);
3817 DTRACE_IP1(small__v6
, size_t, len
);
3820 ip_ver
= IPV6_VERSION
;
3822 DTRACE_IP1(invalid__etype
, uint16_t, etype
);
3826 if ((p
->cp_mbuf
= m_pullup(p
->cp_mbuf
, hlen
)) ==
3831 eh
= (struct ether_header
*)mbuf_data(
3834 mbuf_set_service_class(p
->cp_mbuf
, MBUF_SC_BE
);
3835 mcast_buf
= (uint8_t *)(eh
+ 1);
3837 * ifnet_mcast_clear_dscp() will finish the work below.
3838 * Note that the pullups above ensure that mcast_buf
3839 * points to a full IP header.
3848 __builtin_unreachable();
3851 if (mcast_buf
!= NULL
) {
3852 ifnet_mcast_clear_dscp(mcast_buf
, ip_ver
);
3855 if (ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) {
3856 if (now_nsec
== 0) {
3858 net_timernsec(&now
, &now_nsec
);
3861 * If the driver chose to delay start callback for
3862 * coalescing multiple packets, Then use the following
3863 * heuristics to make sure that start callback will
3864 * be delayed only when bulk data transfer is detected.
3865 * 1. number of packets enqueued in (delay_win * 2) is
3866 * greater than or equal to the delay qlen.
3867 * 2. If delay_start is enabled it will stay enabled for
3868 * another 10 idle windows. This is to take into account
3869 * variable RTT and burst traffic.
3870 * 3. If the time elapsed since last enqueue is more
3871 * than 200ms we disable delaying start callback. This is
3872 * is to take idle time into account.
3874 u_int64_t dwin
= (ifp
->if_start_delay_timeout
<< 1);
3875 if (ifp
->if_start_delay_swin
> 0) {
3876 if ((ifp
->if_start_delay_swin
+ dwin
) > now_nsec
) {
3877 ifp
->if_start_delay_cnt
++;
3878 } else if ((now_nsec
- ifp
->if_start_delay_swin
)
3879 >= (200 * 1000 * 1000)) {
3880 ifp
->if_start_delay_swin
= now_nsec
;
3881 ifp
->if_start_delay_cnt
= 1;
3882 ifp
->if_start_delay_idle
= 0;
3883 if (ifp
->if_eflags
& IFEF_DELAY_START
) {
3885 ~(IFEF_DELAY_START
);
3886 ifnet_delay_start_disabled
++;
3889 if (ifp
->if_start_delay_cnt
>=
3890 ifp
->if_start_delay_qlen
) {
3891 ifp
->if_eflags
|= IFEF_DELAY_START
;
3892 ifp
->if_start_delay_idle
= 0;
3894 if (ifp
->if_start_delay_idle
>= 10) {
3896 ~(IFEF_DELAY_START
);
3897 ifnet_delay_start_disabled
++;
3899 ifp
->if_start_delay_idle
++;
3902 ifp
->if_start_delay_swin
= now_nsec
;
3903 ifp
->if_start_delay_cnt
= 1;
3906 ifp
->if_start_delay_swin
= now_nsec
;
3907 ifp
->if_start_delay_cnt
= 1;
3908 ifp
->if_start_delay_idle
= 0;
3909 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3912 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3915 /* enqueue the packet (caller consumes object) */
3916 error
= ifclassq_enqueue(&ifp
->if_snd
, p
, pdrop
);
3919 * Tell the driver to start dequeueing; do this even when the queue
3920 * for the packet is suspended (EQSUSPENDED), as the driver could still
3921 * be dequeueing from other unsuspended queues.
3923 if (!(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) &&
3924 ((error
== 0 && flush
) || error
== EQFULL
|| error
== EQSUSPENDED
)) {
3932 ifnet_enqueue_netem(void *handle
, pktsched_pkt_t
*pkts
, uint32_t n_pkts
)
3934 struct ifnet
*ifp
= handle
;
3935 boolean_t pdrop
; /* dummy */
3938 ASSERT(n_pkts
>= 1);
3939 for (i
= 0; i
< n_pkts
- 1; i
++) {
3940 (void) ifnet_enqueue_ifclassq(ifp
, &pkts
[i
].pktsched_pkt
,
3943 /* flush with the last packet */
3944 (void) ifnet_enqueue_ifclassq(ifp
, &pkts
[i
].pktsched_pkt
, TRUE
, &pdrop
);
3949 static inline errno_t
3950 ifnet_enqueue_common(struct ifnet
*ifp
, classq_pkt_t
*pkt
, boolean_t flush
,
3953 if (ifp
->if_output_netem
!= NULL
) {
3954 return netem_enqueue(ifp
->if_output_netem
, pkt
, pdrop
);
3956 return ifnet_enqueue_ifclassq(ifp
, pkt
, flush
, pdrop
);
3961 ifnet_enqueue(struct ifnet
*ifp
, struct mbuf
*m
)
3964 return ifnet_enqueue_mbuf(ifp
, m
, TRUE
, &pdrop
);
3968 ifnet_enqueue_mbuf(struct ifnet
*ifp
, struct mbuf
*m
, boolean_t flush
,
3973 if (ifp
== NULL
|| m
== NULL
|| !(m
->m_flags
& M_PKTHDR
) ||
3974 m
->m_nextpkt
!= NULL
) {
3980 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3981 !IF_FULLY_ATTACHED(ifp
)) {
3982 /* flag tested without lock for performance */
3986 } else if (!(ifp
->if_flags
& IFF_UP
)) {
3992 CLASSQ_PKT_INIT_MBUF(&pkt
, m
);
3993 return ifnet_enqueue_common(ifp
, &pkt
, flush
, pdrop
);
3998 ifnet_dequeue(struct ifnet
*ifp
, struct mbuf
**mp
)
4001 classq_pkt_t pkt
= CLASSQ_PKT_INITIALIZER(pkt
);
4003 if (ifp
== NULL
|| mp
== NULL
) {
4005 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
4006 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
) {
4009 if (!ifnet_is_attached(ifp
, 1)) {
4013 rc
= ifclassq_dequeue(&ifp
->if_snd
, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
,
4014 &pkt
, NULL
, NULL
, NULL
);
4015 VERIFY((pkt
.cp_ptype
== QP_MBUF
) || (pkt
.cp_mbuf
== NULL
));
4016 ifnet_decr_iorefcnt(ifp
);
4022 ifnet_dequeue_service_class(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
4026 classq_pkt_t pkt
= CLASSQ_PKT_INITIALIZER(pkt
);
4028 if (ifp
== NULL
|| mp
== NULL
|| !MBUF_VALID_SC(sc
)) {
4030 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
4031 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
) {
4034 if (!ifnet_is_attached(ifp
, 1)) {
4038 rc
= ifclassq_dequeue_sc(&ifp
->if_snd
, sc
, 1,
4039 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, &pkt
, NULL
, NULL
, NULL
);
4040 VERIFY((pkt
.cp_ptype
== QP_MBUF
) || (pkt
.cp_mbuf
== NULL
));
4041 ifnet_decr_iorefcnt(ifp
);
4047 ifnet_dequeue_multi(struct ifnet
*ifp
, u_int32_t pkt_limit
,
4048 struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
, u_int32_t
*len
)
4051 classq_pkt_t pkt_head
= CLASSQ_PKT_INITIALIZER(pkt_head
);
4052 classq_pkt_t pkt_tail
= CLASSQ_PKT_INITIALIZER(pkt_tail
);
4054 if (ifp
== NULL
|| head
== NULL
|| pkt_limit
< 1) {
4056 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
4057 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
) {
4060 if (!ifnet_is_attached(ifp
, 1)) {
4064 rc
= ifclassq_dequeue(&ifp
->if_snd
, pkt_limit
,
4065 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, &pkt_head
, &pkt_tail
, cnt
, len
);
4066 VERIFY((pkt_head
.cp_ptype
== QP_MBUF
) || (pkt_head
.cp_mbuf
== NULL
));
4067 ifnet_decr_iorefcnt(ifp
);
4068 *head
= pkt_head
.cp_mbuf
;
4070 *tail
= pkt_tail
.cp_mbuf
;
4076 ifnet_dequeue_multi_bytes(struct ifnet
*ifp
, u_int32_t byte_limit
,
4077 struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
, u_int32_t
*len
)
4080 classq_pkt_t pkt_head
= CLASSQ_PKT_INITIALIZER(pkt_head
);
4081 classq_pkt_t pkt_tail
= CLASSQ_PKT_INITIALIZER(pkt_tail
);
4083 if (ifp
== NULL
|| head
== NULL
|| byte_limit
< 1) {
4085 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
4086 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
) {
4089 if (!ifnet_is_attached(ifp
, 1)) {
4093 rc
= ifclassq_dequeue(&ifp
->if_snd
, CLASSQ_DEQUEUE_MAX_PKT_LIMIT
,
4094 byte_limit
, &pkt_head
, &pkt_tail
, cnt
, len
);
4095 VERIFY((pkt_head
.cp_ptype
== QP_MBUF
) || (pkt_head
.cp_mbuf
== NULL
));
4096 ifnet_decr_iorefcnt(ifp
);
4097 *head
= pkt_head
.cp_mbuf
;
4099 *tail
= pkt_tail
.cp_mbuf
;
4105 ifnet_dequeue_service_class_multi(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
4106 u_int32_t pkt_limit
, struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
,
4110 classq_pkt_t pkt_head
= CLASSQ_PKT_INITIALIZER(pkt_head
);
4111 classq_pkt_t pkt_tail
= CLASSQ_PKT_INITIALIZER(pkt_tail
);
4113 if (ifp
== NULL
|| head
== NULL
|| pkt_limit
< 1 ||
4114 !MBUF_VALID_SC(sc
)) {
4116 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
4117 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
) {
4120 if (!ifnet_is_attached(ifp
, 1)) {
4124 rc
= ifclassq_dequeue_sc(&ifp
->if_snd
, sc
, pkt_limit
,
4125 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, &pkt_head
, &pkt_tail
,
4127 VERIFY((pkt_head
.cp_ptype
== QP_MBUF
) || (pkt_head
.cp_mbuf
== NULL
));
4128 ifnet_decr_iorefcnt(ifp
);
4129 *head
= pkt_head
.cp_mbuf
;
4131 *tail
= pkt_tail
.cp_mbuf
;
4136 #if !CONFIG_EMBEDDED
4138 ifnet_framer_stub(struct ifnet
*ifp
, struct mbuf
**m
,
4139 const struct sockaddr
*dest
, const char *dest_linkaddr
,
4140 const char *frame_type
, u_int32_t
*pre
, u_int32_t
*post
)
4149 return ifp
->if_framer_legacy(ifp
, m
, dest
, dest_linkaddr
, frame_type
);
4151 #endif /* !CONFIG_EMBEDDED */
4154 packet_has_vlan_tag(struct mbuf
* m
)
4158 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) != 0) {
4159 tag
= EVL_VLANOFTAG(m
->m_pkthdr
.vlan_tag
);
4161 /* the packet is just priority-tagged, clear the bit */
4162 m
->m_pkthdr
.csum_flags
&= ~CSUM_VLAN_TAG_VALID
;
4169 dlil_interface_filters_input(struct ifnet
*ifp
, struct mbuf
**m_p
,
4170 char **frame_header_p
, protocol_family_t protocol_family
)
4172 boolean_t is_vlan_packet
= FALSE
;
4173 struct ifnet_filter
*filter
;
4174 struct mbuf
*m
= *m_p
;
4176 is_vlan_packet
= packet_has_vlan_tag(m
);
4179 * Pass the inbound packet to the interface filters
4181 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4182 /* prevent filter list from changing in case we drop the lock */
4183 if_flt_monitor_busy(ifp
);
4184 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
4187 /* exclude VLAN packets from external filters PR-3586856 */
4188 if (is_vlan_packet
&&
4189 (filter
->filt_flags
& DLIL_IFF_INTERNAL
) == 0) {
4193 if (!filter
->filt_skip
&& filter
->filt_input
!= NULL
&&
4194 (filter
->filt_protocol
== 0 ||
4195 filter
->filt_protocol
== protocol_family
)) {
4196 lck_mtx_unlock(&ifp
->if_flt_lock
);
4198 result
= (*filter
->filt_input
)(filter
->filt_cookie
,
4199 ifp
, protocol_family
, m_p
, frame_header_p
);
4201 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4203 /* we're done with the filter list */
4204 if_flt_monitor_unbusy(ifp
);
4205 lck_mtx_unlock(&ifp
->if_flt_lock
);
4210 /* we're done with the filter list */
4211 if_flt_monitor_unbusy(ifp
);
4212 lck_mtx_unlock(&ifp
->if_flt_lock
);
4215 * Strip away M_PROTO1 bit prior to sending packet up the stack as
4216 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
4219 (*m_p
)->m_flags
&= ~M_PROTO1
;
4226 dlil_interface_filters_output(struct ifnet
*ifp
, struct mbuf
**m_p
,
4227 protocol_family_t protocol_family
)
4229 boolean_t is_vlan_packet
;
4230 struct ifnet_filter
*filter
;
4231 struct mbuf
*m
= *m_p
;
4233 is_vlan_packet
= packet_has_vlan_tag(m
);
4236 * Pass the outbound packet to the interface filters
4238 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4239 /* prevent filter list from changing in case we drop the lock */
4240 if_flt_monitor_busy(ifp
);
4241 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
4244 /* exclude VLAN packets from external filters PR-3586856 */
4245 if (is_vlan_packet
&&
4246 (filter
->filt_flags
& DLIL_IFF_INTERNAL
) == 0) {
4250 if (!filter
->filt_skip
&& filter
->filt_output
!= NULL
&&
4251 (filter
->filt_protocol
== 0 ||
4252 filter
->filt_protocol
== protocol_family
)) {
4253 lck_mtx_unlock(&ifp
->if_flt_lock
);
4255 result
= filter
->filt_output(filter
->filt_cookie
, ifp
,
4256 protocol_family
, m_p
);
4258 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4260 /* we're done with the filter list */
4261 if_flt_monitor_unbusy(ifp
);
4262 lck_mtx_unlock(&ifp
->if_flt_lock
);
4267 /* we're done with the filter list */
4268 if_flt_monitor_unbusy(ifp
);
4269 lck_mtx_unlock(&ifp
->if_flt_lock
);
4275 dlil_ifproto_input(struct if_proto
* ifproto
, mbuf_t m
)
4279 if (ifproto
->proto_kpi
== kProtoKPI_v1
) {
4280 /* Version 1 protocols get one packet at a time */
4282 char * frame_header
;
4285 next_packet
= m
->m_nextpkt
;
4286 m
->m_nextpkt
= NULL
;
4287 frame_header
= m
->m_pkthdr
.pkt_hdr
;
4288 m
->m_pkthdr
.pkt_hdr
= NULL
;
4289 error
= (*ifproto
->kpi
.v1
.input
)(ifproto
->ifp
,
4290 ifproto
->protocol_family
, m
, frame_header
);
4291 if (error
!= 0 && error
!= EJUSTRETURN
) {
4296 } else if (ifproto
->proto_kpi
== kProtoKPI_v2
) {
4297 /* Version 2 protocols support packet lists */
4298 error
= (*ifproto
->kpi
.v2
.input
)(ifproto
->ifp
,
4299 ifproto
->protocol_family
, m
);
4300 if (error
!= 0 && error
!= EJUSTRETURN
) {
4307 dlil_input_stats_add(const struct ifnet_stat_increment_param
*s
,
4308 struct dlil_threading_info
*inp
, struct ifnet
*ifp
, boolean_t poll
)
4310 struct ifnet_stat_increment_param
*d
= &inp
->stats
;
4312 if (s
->packets_in
!= 0) {
4313 d
->packets_in
+= s
->packets_in
;
4315 if (s
->bytes_in
!= 0) {
4316 d
->bytes_in
+= s
->bytes_in
;
4318 if (s
->errors_in
!= 0) {
4319 d
->errors_in
+= s
->errors_in
;
4322 if (s
->packets_out
!= 0) {
4323 d
->packets_out
+= s
->packets_out
;
4325 if (s
->bytes_out
!= 0) {
4326 d
->bytes_out
+= s
->bytes_out
;
4328 if (s
->errors_out
!= 0) {
4329 d
->errors_out
+= s
->errors_out
;
4332 if (s
->collisions
!= 0) {
4333 d
->collisions
+= s
->collisions
;
4335 if (s
->dropped
!= 0) {
4336 d
->dropped
+= s
->dropped
;
4340 PKTCNTR_ADD(&ifp
->if_poll_tstats
, s
->packets_in
, s
->bytes_in
);
4345 dlil_input_stats_sync(struct ifnet
*ifp
, struct dlil_threading_info
*inp
)
4347 struct ifnet_stat_increment_param
*s
= &inp
->stats
;
4350 * Use of atomic operations is unavoidable here because
4351 * these stats may also be incremented elsewhere via KPIs.
4353 if (s
->packets_in
!= 0) {
4354 atomic_add_64(&ifp
->if_data
.ifi_ipackets
, s
->packets_in
);
4357 if (s
->bytes_in
!= 0) {
4358 atomic_add_64(&ifp
->if_data
.ifi_ibytes
, s
->bytes_in
);
4361 if (s
->errors_in
!= 0) {
4362 atomic_add_64(&ifp
->if_data
.ifi_ierrors
, s
->errors_in
);
4366 if (s
->packets_out
!= 0) {
4367 atomic_add_64(&ifp
->if_data
.ifi_opackets
, s
->packets_out
);
4370 if (s
->bytes_out
!= 0) {
4371 atomic_add_64(&ifp
->if_data
.ifi_obytes
, s
->bytes_out
);
4374 if (s
->errors_out
!= 0) {
4375 atomic_add_64(&ifp
->if_data
.ifi_oerrors
, s
->errors_out
);
4379 if (s
->collisions
!= 0) {
4380 atomic_add_64(&ifp
->if_data
.ifi_collisions
, s
->collisions
);
4383 if (s
->dropped
!= 0) {
4384 atomic_add_64(&ifp
->if_data
.ifi_iqdrops
, s
->dropped
);
4389 * No need for atomic operations as they are modified here
4390 * only from within the DLIL input thread context.
4392 if (ifp
->if_poll_tstats
.packets
!= 0) {
4393 ifp
->if_poll_pstats
.ifi_poll_packets
+= ifp
->if_poll_tstats
.packets
;
4394 ifp
->if_poll_tstats
.packets
= 0;
4396 if (ifp
->if_poll_tstats
.bytes
!= 0) {
4397 ifp
->if_poll_pstats
.ifi_poll_bytes
+= ifp
->if_poll_tstats
.bytes
;
4398 ifp
->if_poll_tstats
.bytes
= 0;
4401 return ifp
->if_data_threshold
!= 0;
4404 __private_extern__
void
4405 dlil_input_packet_list(struct ifnet
*ifp
, struct mbuf
*m
)
4407 return dlil_input_packet_list_common(ifp
, m
, 0,
4408 IFNET_MODEL_INPUT_POLL_OFF
, FALSE
);
4411 __private_extern__
void
4412 dlil_input_packet_list_extended(struct ifnet
*ifp
, struct mbuf
*m
,
4413 u_int32_t cnt
, ifnet_model_t mode
)
4415 return dlil_input_packet_list_common(ifp
, m
, cnt
, mode
, TRUE
);
4419 dlil_input_packet_list_common(struct ifnet
*ifp_param
, struct mbuf
*m
,
4420 u_int32_t cnt
, ifnet_model_t mode
, boolean_t ext
)
4423 protocol_family_t protocol_family
;
4425 ifnet_t ifp
= ifp_param
;
4426 char *frame_header
= NULL
;
4427 struct if_proto
*last_ifproto
= NULL
;
4428 mbuf_t pkt_first
= NULL
;
4429 mbuf_t
*pkt_next
= NULL
;
4430 u_int32_t poll_thresh
= 0, poll_ival
= 0;
4432 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
4434 if (ext
&& mode
== IFNET_MODEL_INPUT_POLL_ON
&& cnt
> 1 &&
4435 (poll_ival
= if_rxpoll_interval_pkts
) > 0) {
4440 struct if_proto
*ifproto
= NULL
;
4442 uint32_t pktf_mask
; /* pkt flags to preserve */
4444 if (ifp_param
== NULL
) {
4445 ifp
= m
->m_pkthdr
.rcvif
;
4448 if ((ifp
->if_eflags
& IFEF_RXPOLL
) &&
4449 (ifp
->if_xflags
& IFXF_LEGACY
) && poll_thresh
!= 0 &&
4450 poll_ival
> 0 && (--poll_thresh
% poll_ival
) == 0) {
4454 /* Check if this mbuf looks valid */
4455 MBUF_INPUT_CHECK(m
, ifp
);
4457 next_packet
= m
->m_nextpkt
;
4458 m
->m_nextpkt
= NULL
;
4459 frame_header
= m
->m_pkthdr
.pkt_hdr
;
4460 m
->m_pkthdr
.pkt_hdr
= NULL
;
4463 * Get an IO reference count if the interface is not
4464 * loopback (lo0) and it is attached; lo0 never goes
4465 * away, so optimize for that.
4467 if (ifp
!= lo_ifp
) {
4468 if (!ifnet_datamov_begin(ifp
)) {
4474 * Preserve the time stamp if it was set.
4476 pktf_mask
= PKTF_TS_VALID
;
4479 * If this arrived on lo0, preserve interface addr
4480 * info to allow for connectivity between loopback
4481 * and local interface addresses.
4483 pktf_mask
= (PKTF_LOOP
| PKTF_IFAINFO
);
4486 /* make sure packet comes in clean */
4487 m_classifier_init(m
, pktf_mask
);
4489 ifp_inc_traffic_class_in(ifp
, m
);
4491 /* find which protocol family this packet is for */
4492 ifnet_lock_shared(ifp
);
4493 error
= (*ifp
->if_demux
)(ifp
, m
, frame_header
,
4495 ifnet_lock_done(ifp
);
4497 if (error
== EJUSTRETURN
) {
4500 protocol_family
= 0;
4503 pktap_input(ifp
, protocol_family
, m
, frame_header
);
4505 /* Drop v4 packets received on CLAT46 enabled interface */
4506 if (protocol_family
== PF_INET
&& IS_INTF_CLAT46(ifp
)) {
4508 ip6stat
.ip6s_clat464_in_v4_drop
++;
4512 /* Translate the packet if it is received on CLAT interface */
4513 if (protocol_family
== PF_INET6
&& IS_INTF_CLAT46(ifp
)
4514 && dlil_is_clat_needed(protocol_family
, m
)) {
4516 struct ether_header eh
;
4517 struct ether_header
*ehp
= NULL
;
4519 if (ifp
->if_type
== IFT_ETHER
) {
4520 ehp
= (struct ether_header
*)(void *)frame_header
;
4521 /* Skip RX Ethernet packets if they are not IPV6 */
4522 if (ntohs(ehp
->ether_type
) != ETHERTYPE_IPV6
) {
4526 /* Keep a copy of frame_header for Ethernet packets */
4527 bcopy(frame_header
, (caddr_t
)&eh
, ETHER_HDR_LEN
);
4529 error
= dlil_clat64(ifp
, &protocol_family
, &m
);
4530 data
= (char *) mbuf_data(m
);
4533 ip6stat
.ip6s_clat464_in_drop
++;
4536 /* Native v6 should be No-op */
4537 if (protocol_family
!= PF_INET
) {
4541 /* Do this only for translated v4 packets. */
4542 switch (ifp
->if_type
) {
4544 frame_header
= data
;
4548 * Drop if the mbuf doesn't have enough
4549 * space for Ethernet header
4551 if (M_LEADINGSPACE(m
) < ETHER_HDR_LEN
) {
4553 ip6stat
.ip6s_clat464_in_drop
++;
4557 * Set the frame_header ETHER_HDR_LEN bytes
4558 * preceeding the data pointer. Change
4559 * the ether_type too.
4561 frame_header
= data
- ETHER_HDR_LEN
;
4562 eh
.ether_type
= htons(ETHERTYPE_IP
);
4563 bcopy((caddr_t
)&eh
, frame_header
, ETHER_HDR_LEN
);
4568 if (hwcksum_dbg
!= 0 && !(ifp
->if_flags
& IFF_LOOPBACK
) &&
4569 !(m
->m_pkthdr
.pkt_flags
& PKTF_LOOP
)) {
4570 dlil_input_cksum_dbg(ifp
, m
, frame_header
,
4574 * For partial checksum offload, we expect the driver to
4575 * set the start offset indicating the start of the span
4576 * that is covered by the hardware-computed checksum;
4577 * adjust this start offset accordingly because the data
4578 * pointer has been advanced beyond the link-layer header.
4580 * Virtual lan types (bridge, vlan, bond) can call
4581 * dlil_input_packet_list() with the same packet with the
4582 * checksum flags set. Set a flag indicating that the
4583 * adjustment has already been done.
4585 if ((m
->m_pkthdr
.csum_flags
& CSUM_ADJUST_DONE
) != 0) {
4586 /* adjustment has already been done */
4587 } else if ((m
->m_pkthdr
.csum_flags
&
4588 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
4589 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
4591 if (frame_header
== NULL
||
4592 frame_header
< (char *)mbuf_datastart(m
) ||
4593 frame_header
> (char *)m
->m_data
||
4594 (adj
= (m
->m_data
- frame_header
)) >
4595 m
->m_pkthdr
.csum_rx_start
) {
4596 m
->m_pkthdr
.csum_data
= 0;
4597 m
->m_pkthdr
.csum_flags
&= ~CSUM_DATA_VALID
;
4598 hwcksum_in_invalidated
++;
4600 m
->m_pkthdr
.csum_rx_start
-= adj
;
4602 /* make sure we don't adjust more than once */
4603 m
->m_pkthdr
.csum_flags
|= CSUM_ADJUST_DONE
;
4606 pktap_input(ifp
, protocol_family
, m
, frame_header
);
4609 if (m
->m_flags
& (M_BCAST
| M_MCAST
)) {
4610 atomic_add_64(&ifp
->if_imcasts
, 1);
4613 /* run interface filters */
4614 error
= dlil_interface_filters_input(ifp
, &m
,
4615 &frame_header
, protocol_family
);
4617 if (error
!= EJUSTRETURN
) {
4622 if ((m
->m_flags
& M_PROMISC
) != 0) {
4627 /* Lookup the protocol attachment to this interface */
4628 if (protocol_family
== 0) {
4630 } else if (last_ifproto
!= NULL
&& last_ifproto
->ifp
== ifp
&&
4631 (last_ifproto
->protocol_family
== protocol_family
)) {
4632 VERIFY(ifproto
== NULL
);
4633 ifproto
= last_ifproto
;
4634 if_proto_ref(last_ifproto
);
4636 VERIFY(ifproto
== NULL
);
4637 ifnet_lock_shared(ifp
);
4638 /* callee holds a proto refcnt upon success */
4639 ifproto
= find_attached_proto(ifp
, protocol_family
);
4640 ifnet_lock_done(ifp
);
4642 if (ifproto
== NULL
) {
4643 /* no protocol for this packet, discard */
4647 if (ifproto
!= last_ifproto
) {
4648 if (last_ifproto
!= NULL
) {
4649 /* pass up the list for the previous protocol */
4650 dlil_ifproto_input(last_ifproto
, pkt_first
);
4652 if_proto_free(last_ifproto
);
4654 last_ifproto
= ifproto
;
4655 if_proto_ref(ifproto
);
4657 /* extend the list */
4658 m
->m_pkthdr
.pkt_hdr
= frame_header
;
4659 if (pkt_first
== NULL
) {
4664 pkt_next
= &m
->m_nextpkt
;
4667 if (next_packet
== NULL
&& last_ifproto
!= NULL
) {
4668 /* pass up the last list of packets */
4669 dlil_ifproto_input(last_ifproto
, pkt_first
);
4670 if_proto_free(last_ifproto
);
4671 last_ifproto
= NULL
;
4673 if (ifproto
!= NULL
) {
4674 if_proto_free(ifproto
);
4680 /* update the driver's multicast filter, if needed */
4681 if (ifp
->if_updatemcasts
> 0 && if_mcasts_update(ifp
) == 0) {
4682 ifp
->if_updatemcasts
= 0;
4684 if (iorefcnt
== 1) {
4685 ifnet_datamov_end(ifp
);
4689 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4693 if_mcasts_update(struct ifnet
*ifp
)
4697 err
= ifnet_ioctl(ifp
, 0, SIOCADDMULTI
, NULL
);
4698 if (err
== EAFNOSUPPORT
) {
4701 DLIL_PRINTF("%s: %s %d suspended link-layer multicast membership(s) "
4702 "(err=%d)\n", if_name(ifp
),
4703 (err
== 0 ? "successfully restored" : "failed to restore"),
4704 ifp
->if_updatemcasts
, err
);
4706 /* just return success */
4710 /* If ifp is set, we will increment the generation for the interface */
4712 dlil_post_complete_msg(struct ifnet
*ifp
, struct kev_msg
*event
)
4715 ifnet_increment_generation(ifp
);
4719 necp_update_all_clients();
4722 return kev_post_msg(event
);
4725 __private_extern__
void
4726 dlil_post_sifflags_msg(struct ifnet
* ifp
)
4728 struct kev_msg ev_msg
;
4729 struct net_event_data ev_data
;
4731 bzero(&ev_data
, sizeof(ev_data
));
4732 bzero(&ev_msg
, sizeof(ev_msg
));
4733 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
4734 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
4735 ev_msg
.kev_subclass
= KEV_DL_SUBCLASS
;
4736 ev_msg
.event_code
= KEV_DL_SIFFLAGS
;
4737 strlcpy(&ev_data
.if_name
[0], ifp
->if_name
, IFNAMSIZ
);
4738 ev_data
.if_family
= ifp
->if_family
;
4739 ev_data
.if_unit
= (u_int32_t
) ifp
->if_unit
;
4740 ev_msg
.dv
[0].data_length
= sizeof(struct net_event_data
);
4741 ev_msg
.dv
[0].data_ptr
= &ev_data
;
4742 ev_msg
.dv
[1].data_length
= 0;
4743 dlil_post_complete_msg(ifp
, &ev_msg
);
4746 #define TMP_IF_PROTO_ARR_SIZE 10
4748 dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*event
, bool update_generation
)
4750 struct ifnet_filter
*filter
= NULL
;
4751 struct if_proto
*proto
= NULL
;
4752 int if_proto_count
= 0;
4753 struct if_proto
**tmp_ifproto_arr
= NULL
;
4754 struct if_proto
*tmp_ifproto_stack_arr
[TMP_IF_PROTO_ARR_SIZE
] = {NULL
};
4755 int tmp_ifproto_arr_idx
= 0;
4756 bool tmp_malloc
= false;
4759 * Pass the event to the interface filters
4761 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4762 /* prevent filter list from changing in case we drop the lock */
4763 if_flt_monitor_busy(ifp
);
4764 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
4765 if (filter
->filt_event
!= NULL
) {
4766 lck_mtx_unlock(&ifp
->if_flt_lock
);
4768 filter
->filt_event(filter
->filt_cookie
, ifp
,
4769 filter
->filt_protocol
, event
);
4771 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4774 /* we're done with the filter list */
4775 if_flt_monitor_unbusy(ifp
);
4776 lck_mtx_unlock(&ifp
->if_flt_lock
);
4778 /* Get an io ref count if the interface is attached */
4779 if (!ifnet_is_attached(ifp
, 1)) {
4784 * An embedded tmp_list_entry in if_proto may still get
4785 * over-written by another thread after giving up ifnet lock,
4786 * therefore we are avoiding embedded pointers here.
4788 ifnet_lock_shared(ifp
);
4789 if_proto_count
= dlil_ifp_protolist(ifp
, NULL
, 0);
4790 if (if_proto_count
) {
4792 VERIFY(ifp
->if_proto_hash
!= NULL
);
4793 if (if_proto_count
<= TMP_IF_PROTO_ARR_SIZE
) {
4794 tmp_ifproto_arr
= tmp_ifproto_stack_arr
;
4796 MALLOC(tmp_ifproto_arr
, struct if_proto
**,
4797 sizeof(*tmp_ifproto_arr
) * if_proto_count
,
4799 if (tmp_ifproto_arr
== NULL
) {
4800 ifnet_lock_done(ifp
);
4806 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
4807 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
],
4809 if_proto_ref(proto
);
4810 tmp_ifproto_arr
[tmp_ifproto_arr_idx
] = proto
;
4811 tmp_ifproto_arr_idx
++;
4814 VERIFY(if_proto_count
== tmp_ifproto_arr_idx
);
4816 ifnet_lock_done(ifp
);
4818 for (tmp_ifproto_arr_idx
= 0; tmp_ifproto_arr_idx
< if_proto_count
;
4819 tmp_ifproto_arr_idx
++) {
4820 proto
= tmp_ifproto_arr
[tmp_ifproto_arr_idx
];
4821 VERIFY(proto
!= NULL
);
4822 proto_media_event eventp
=
4823 (proto
->proto_kpi
== kProtoKPI_v1
?
4824 proto
->kpi
.v1
.event
:
4825 proto
->kpi
.v2
.event
);
4827 if (eventp
!= NULL
) {
4828 eventp(ifp
, proto
->protocol_family
,
4831 if_proto_free(proto
);
4836 FREE(tmp_ifproto_arr
, M_TEMP
);
4839 /* Pass the event to the interface */
4840 if (ifp
->if_event
!= NULL
) {
4841 ifp
->if_event(ifp
, event
);
4844 /* Release the io ref count */
4845 ifnet_decr_iorefcnt(ifp
);
4847 return dlil_post_complete_msg(update_generation
? ifp
: NULL
, event
);
4851 ifnet_event(ifnet_t ifp
, struct kern_event_msg
*event
)
4853 struct kev_msg kev_msg
;
4856 if (ifp
== NULL
|| event
== NULL
) {
4860 bzero(&kev_msg
, sizeof(kev_msg
));
4861 kev_msg
.vendor_code
= event
->vendor_code
;
4862 kev_msg
.kev_class
= event
->kev_class
;
4863 kev_msg
.kev_subclass
= event
->kev_subclass
;
4864 kev_msg
.event_code
= event
->event_code
;
4865 kev_msg
.dv
[0].data_ptr
= &event
->event_data
[0];
4866 kev_msg
.dv
[0].data_length
= event
->total_size
- KEV_MSG_HEADER_SIZE
;
4867 kev_msg
.dv
[1].data_length
= 0;
4869 result
= dlil_event_internal(ifp
, &kev_msg
, TRUE
);
4875 #include <netinet/ip6.h>
4876 #include <netinet/ip.h>
4878 dlil_get_socket_type(struct mbuf
**mp
, int family
, int raw
)
4882 struct ip6_hdr
*ip6
;
4883 int type
= SOCK_RAW
;
4888 m
= m_pullup(*mp
, sizeof(struct ip
));
4893 ip
= mtod(m
, struct ip
*);
4894 if (ip
->ip_p
== IPPROTO_TCP
) {
4896 } else if (ip
->ip_p
== IPPROTO_UDP
) {
4901 m
= m_pullup(*mp
, sizeof(struct ip6_hdr
));
4906 ip6
= mtod(m
, struct ip6_hdr
*);
4907 if (ip6
->ip6_nxt
== IPPROTO_TCP
) {
4909 } else if (ip6
->ip6_nxt
== IPPROTO_UDP
) {
4921 dlil_count_chain_len(mbuf_t m
, struct chain_len_stats
*cls
)
4934 atomic_add_64(&cls
->cls_one
, 1);
4937 atomic_add_64(&cls
->cls_two
, 1);
4940 atomic_add_64(&cls
->cls_three
, 1);
4943 atomic_add_64(&cls
->cls_four
, 1);
4947 atomic_add_64(&cls
->cls_five_or_more
, 1);
4955 * Caller should have a lock on the protocol domain if the protocol
4956 * doesn't support finer grained locking. In most cases, the lock
4957 * will be held from the socket layer and won't be released until
4958 * we return back to the socket layer.
4960 * This does mean that we must take a protocol lock before we take
4961 * an interface lock if we're going to take both. This makes sense
4962 * because a protocol is likely to interact with an ifp while it
4963 * is under the protocol lock.
4965 * An advisory code will be returned if adv is not null. This
4966 * can be used to provide feedback about interface queues to the
4970 dlil_output(ifnet_t ifp
, protocol_family_t proto_family
, mbuf_t packetlist
,
4971 void *route
, const struct sockaddr
*dest
, int raw
, struct flowadv
*adv
)
4973 char *frame_type
= NULL
;
4974 char *dst_linkaddr
= NULL
;
4976 char frame_type_buffer
[MAX_FRAME_TYPE_SIZE
* 4];
4977 char dst_linkaddr_buffer
[MAX_LINKADDR
* 4];
4978 struct if_proto
*proto
= NULL
;
4980 mbuf_t send_head
= NULL
;
4981 mbuf_t
*send_tail
= &send_head
;
4983 u_int32_t pre
= 0, post
= 0;
4984 u_int32_t fpkts
= 0, fbytes
= 0;
4986 struct timespec now
;
4988 boolean_t did_clat46
= FALSE
;
4989 protocol_family_t old_proto_family
= proto_family
;
4990 struct sockaddr_in6 dest6
;
4991 struct rtentry
*rt
= NULL
;
4992 u_int32_t m_loop_set
= 0;
4994 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
4997 * Get an io refcnt if the interface is attached to prevent ifnet_detach
4998 * from happening while this operation is in progress
5000 if (!ifnet_datamov_begin(ifp
)) {
5006 VERIFY(ifp
->if_output_dlil
!= NULL
);
5008 /* update the driver's multicast filter, if needed */
5009 if (ifp
->if_updatemcasts
> 0 && if_mcasts_update(ifp
) == 0) {
5010 ifp
->if_updatemcasts
= 0;
5013 frame_type
= frame_type_buffer
;
5014 dst_linkaddr
= dst_linkaddr_buffer
;
5017 ifnet_lock_shared(ifp
);
5018 /* callee holds a proto refcnt upon success */
5019 proto
= find_attached_proto(ifp
, proto_family
);
5020 if (proto
== NULL
) {
5021 ifnet_lock_done(ifp
);
5025 ifnet_lock_done(ifp
);
5029 if (packetlist
== NULL
) {
5034 packetlist
= packetlist
->m_nextpkt
;
5035 m
->m_nextpkt
= NULL
;
5038 * Perform address family translation for the first
5039 * packet outside the loop in order to perform address
5040 * lookup for the translated proto family.
5042 if (proto_family
== PF_INET
&& IS_INTF_CLAT46(ifp
) &&
5043 (ifp
->if_type
== IFT_CELLULAR
||
5044 dlil_is_clat_needed(proto_family
, m
))) {
5045 retval
= dlil_clat46(ifp
, &proto_family
, &m
);
5047 * Go to the next packet if translation fails
5052 ip6stat
.ip6s_clat464_out_drop
++;
5053 /* Make sure that the proto family is PF_INET */
5054 ASSERT(proto_family
== PF_INET
);
5058 * Free the old one and make it point to the IPv6 proto structure.
5060 * Change proto for the first time we have successfully
5061 * performed address family translation.
5063 if (!did_clat46
&& proto_family
== PF_INET6
) {
5066 if (proto
!= NULL
) {
5067 if_proto_free(proto
);
5069 ifnet_lock_shared(ifp
);
5070 /* callee holds a proto refcnt upon success */
5071 proto
= find_attached_proto(ifp
, proto_family
);
5072 if (proto
== NULL
) {
5073 ifnet_lock_done(ifp
);
5079 ifnet_lock_done(ifp
);
5080 if (ifp
->if_type
== IFT_ETHER
) {
5081 /* Update the dest to translated v6 address */
5082 dest6
.sin6_len
= sizeof(struct sockaddr_in6
);
5083 dest6
.sin6_family
= AF_INET6
;
5084 dest6
.sin6_addr
= (mtod(m
, struct ip6_hdr
*))->ip6_dst
;
5085 dest
= (const struct sockaddr
*)&dest6
;
5088 * Lookup route to the translated destination
5089 * Free this route ref during cleanup
5091 rt
= rtalloc1_scoped((struct sockaddr
*)&dest6
,
5092 0, 0, ifp
->if_index
);
5100 * This path gets packet chain going to the same destination.
5101 * The pre output routine is used to either trigger resolution of
5102 * the next hop or retreive the next hop's link layer addressing.
5103 * For ex: ether_inet(6)_pre_output routine.
5105 * If the routine returns EJUSTRETURN, it implies that packet has
5106 * been queued, and therefore we have to call preout_again for the
5107 * following packet in the chain.
5109 * For errors other than EJUSTRETURN, the current packet is freed
5110 * and the rest of the chain (pointed by packetlist is freed as
5113 * Else if there is no error the retrieved information is used for
5114 * all the packets in the chain.
5117 proto_media_preout preoutp
= (proto
->proto_kpi
== kProtoKPI_v1
?
5118 proto
->kpi
.v1
.pre_output
: proto
->kpi
.v2
.pre_output
);
5120 if (preoutp
!= NULL
) {
5121 retval
= preoutp(ifp
, proto_family
, &m
, dest
, route
,
5122 frame_type
, dst_linkaddr
);
5125 if (retval
== EJUSTRETURN
) {
5136 retval
= mac_ifnet_check_transmit(ifp
, m
, proto_family
,
5137 dlil_get_socket_type(&m
, proto_family
, raw
));
5146 * Perform address family translation if needed.
5147 * For now we only support stateless 4 to 6 translation
5150 * The routine below translates IP header, updates protocol
5151 * checksum and also translates ICMP.
5153 * We skip the first packet as it is already translated and
5154 * the proto family is set to PF_INET6.
5156 if (proto_family
== PF_INET
&& IS_INTF_CLAT46(ifp
) &&
5157 (ifp
->if_type
== IFT_CELLULAR
||
5158 dlil_is_clat_needed(proto_family
, m
))) {
5159 retval
= dlil_clat46(ifp
, &proto_family
, &m
);
5160 /* Goto the next packet if the translation fails */
5164 ip6stat
.ip6s_clat464_out_drop
++;
5170 if (!raw
&& proto_family
== PF_INET
) {
5171 struct ip
*ip
= mtod(m
, struct ip
*);
5172 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
5173 struct ip
*, ip
, struct ifnet
*, ifp
,
5174 struct ip
*, ip
, struct ip6_hdr
*, NULL
);
5175 } else if (!raw
&& proto_family
== PF_INET6
) {
5176 struct ip6_hdr
*ip6
= mtod(m
, struct ip6_hdr
*);
5177 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
5178 struct ip6_hdr
*, ip6
, struct ifnet
*, ifp
,
5179 struct ip
*, NULL
, struct ip6_hdr
*, ip6
);
5181 #endif /* CONFIG_DTRACE */
5183 if (raw
== 0 && ifp
->if_framer
!= NULL
) {
5187 * If this is a broadcast packet that needs to be
5188 * looped back into the system, set the inbound ifp
5189 * to that of the outbound ifp. This will allow
5190 * us to determine that it is a legitimate packet
5191 * for the system. Only set the ifp if it's not
5192 * already set, just to be safe.
5194 if ((m
->m_flags
& (M_BCAST
| M_LOOP
)) &&
5195 m
->m_pkthdr
.rcvif
== NULL
) {
5196 m
->m_pkthdr
.rcvif
= ifp
;
5199 m_loop_set
= m
->m_flags
& M_LOOP
;
5200 retval
= ifp
->if_framer(ifp
, &m
, dest
, dst_linkaddr
,
5201 frame_type
, &pre
, &post
);
5203 if (retval
!= EJUSTRETURN
) {
5210 * For partial checksum offload, adjust the start
5211 * and stuff offsets based on the prepended header.
5213 if ((m
->m_pkthdr
.csum_flags
&
5214 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
5215 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
5216 m
->m_pkthdr
.csum_tx_stuff
+= pre
;
5217 m
->m_pkthdr
.csum_tx_start
+= pre
;
5220 if (hwcksum_dbg
!= 0 && !(ifp
->if_flags
& IFF_LOOPBACK
)) {
5221 dlil_output_cksum_dbg(ifp
, m
, pre
,
5226 * Clear the ifp if it was set above, and to be
5227 * safe, only if it is still the same as the
5228 * outbound ifp we have in context. If it was
5229 * looped back, then a copy of it was sent to the
5230 * loopback interface with the rcvif set, and we
5231 * are clearing the one that will go down to the
5234 if (rcvif_set
&& m
->m_pkthdr
.rcvif
== ifp
) {
5235 m
->m_pkthdr
.rcvif
= NULL
;
5240 * Let interface filters (if any) do their thing ...
5242 retval
= dlil_interface_filters_output(ifp
, &m
, proto_family
);
5244 if (retval
!= EJUSTRETURN
) {
5250 * Strip away M_PROTO1 bit prior to sending packet
5251 * to the driver as this field may be used by the driver
5253 m
->m_flags
&= ~M_PROTO1
;
5256 * If the underlying interface is not capable of handling a
5257 * packet whose data portion spans across physically disjoint
5258 * pages, we need to "normalize" the packet so that we pass
5259 * down a chain of mbufs where each mbuf points to a span that
5260 * resides in the system page boundary. If the packet does
5261 * not cross page(s), the following is a no-op.
5263 if (!(ifp
->if_hwassist
& IFNET_MULTIPAGES
)) {
5264 if ((m
= m_normalize(m
)) == NULL
) {
5270 * If this is a TSO packet, make sure the interface still
5271 * advertise TSO capability.
5273 if (TSO_IPV4_NOTOK(ifp
, m
) || TSO_IPV6_NOTOK(ifp
, m
)) {
5279 ifp_inc_traffic_class_out(ifp
, m
);
5280 pktap_output(ifp
, proto_family
, m
, pre
, post
);
5283 * Count the number of elements in the mbuf chain
5285 if (tx_chain_len_count
) {
5286 dlil_count_chain_len(m
, &tx_chain_len_stats
);
5290 * Record timestamp; ifnet_enqueue() will use this info
5291 * rather than redoing the work. An optimization could
5292 * involve doing this just once at the top, if there are
5293 * no interface filters attached, but that's probably
5297 net_timernsec(&now
, &now_nsec
);
5298 (void) mbuf_set_timestamp(m
, now_nsec
, TRUE
);
5301 * Discard partial sum information if this packet originated
5302 * from another interface; the packet would already have the
5303 * final checksum and we shouldn't recompute it.
5305 if ((m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) &&
5306 (m
->m_pkthdr
.csum_flags
& (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
5307 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
5308 m
->m_pkthdr
.csum_flags
&= ~CSUM_TX_FLAGS
;
5309 m
->m_pkthdr
.csum_data
= 0;
5313 * Finally, call the driver.
5315 if (ifp
->if_eflags
& (IFEF_SENDLIST
| IFEF_ENQUEUE_MULTI
)) {
5316 if (m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) {
5317 flen
+= (m_pktlen(m
) - (pre
+ post
));
5318 m
->m_pkthdr
.pkt_flags
&= ~PKTF_FORWARDED
;
5321 send_tail
= &m
->m_nextpkt
;
5323 if (m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) {
5324 flen
= (m_pktlen(m
) - (pre
+ post
));
5325 m
->m_pkthdr
.pkt_flags
&= ~PKTF_FORWARDED
;
5329 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
,
5331 retval
= (*ifp
->if_output_dlil
)(ifp
, m
);
5332 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
5333 if (adv
!= NULL
&& adv
->code
== FADV_SUCCESS
) {
5334 adv
->code
= (retval
== EQFULL
?
5335 FADV_FLOW_CONTROLLED
:
5340 if (retval
== 0 && flen
> 0) {
5344 if (retval
!= 0 && dlil_verbose
) {
5345 DLIL_PRINTF("%s: output error on %s retval = %d\n",
5346 __func__
, if_name(ifp
),
5349 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
,
5352 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
5357 m
->m_flags
|= m_loop_set
;
5358 packetlist
= packetlist
->m_nextpkt
;
5359 m
->m_nextpkt
= NULL
;
5361 /* Reset the proto family to old proto family for CLAT */
5363 proto_family
= old_proto_family
;
5365 } while (m
!= NULL
);
5367 if (send_head
!= NULL
) {
5368 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
,
5370 if (ifp
->if_eflags
& IFEF_SENDLIST
) {
5371 retval
= (*ifp
->if_output_dlil
)(ifp
, send_head
);
5372 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
5374 adv
->code
= (retval
== EQFULL
?
5375 FADV_FLOW_CONTROLLED
:
5380 if (retval
== 0 && flen
> 0) {
5384 if (retval
!= 0 && dlil_verbose
) {
5385 DLIL_PRINTF("%s: output error on %s retval = %d\n",
5386 __func__
, if_name(ifp
), retval
);
5389 struct mbuf
*send_m
;
5391 VERIFY(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
);
5392 while (send_head
!= NULL
) {
5394 send_head
= send_m
->m_nextpkt
;
5395 send_m
->m_nextpkt
= NULL
;
5396 retval
= (*ifp
->if_output_dlil
)(ifp
, send_m
);
5397 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
5399 adv
->code
= (retval
== EQFULL
?
5400 FADV_FLOW_CONTROLLED
:
5411 if (retval
!= 0 && dlil_verbose
) {
5412 DLIL_PRINTF("%s: output error on %s "
5414 __func__
, if_name(ifp
), retval
);
5422 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
5425 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
5429 ifp
->if_fbytes
+= fbytes
;
5432 ifp
->if_fpackets
+= fpkts
;
5434 if (proto
!= NULL
) {
5435 if_proto_free(proto
);
5437 if (packetlist
) { /* if any packets are left, clean up */
5438 mbuf_freem_list(packetlist
);
5440 if (retval
== EJUSTRETURN
) {
5443 if (iorefcnt
== 1) {
5444 ifnet_datamov_end(ifp
);
5455 * This routine checks if the destination address is not a loopback, link-local,
5456 * multicast or broadcast address.
5459 dlil_is_clat_needed(protocol_family_t proto_family
, mbuf_t m
)
5462 switch (proto_family
) {
5464 struct ip
*iph
= mtod(m
, struct ip
*);
5465 if (CLAT46_NEEDED(ntohl(iph
->ip_dst
.s_addr
))) {
5471 struct ip6_hdr
*ip6h
= mtod(m
, struct ip6_hdr
*);
5472 if ((size_t)m_pktlen(m
) >= sizeof(struct ip6_hdr
) &&
5473 CLAT64_NEEDED(&ip6h
->ip6_dst
)) {
5483 * @brief This routine translates IPv4 packet to IPv6 packet,
5484 * updates protocol checksum and also translates ICMP for code
5485 * along with inner header translation.
5487 * @param ifp Pointer to the interface
5488 * @param proto_family pointer to protocol family. It is updated if function
5489 * performs the translation successfully.
5490 * @param m Pointer to the pointer pointing to the packet. Needed because this
5491 * routine can end up changing the mbuf to a different one.
5493 * @return 0 on success or else a negative value.
5496 dlil_clat46(ifnet_t ifp
, protocol_family_t
*proto_family
, mbuf_t
*m
)
5498 VERIFY(*proto_family
== PF_INET
);
5499 VERIFY(IS_INTF_CLAT46(ifp
));
5501 pbuf_t pbuf_store
, *pbuf
= NULL
;
5502 struct ip
*iph
= NULL
;
5503 struct in_addr osrc
, odst
;
5505 struct in6_ifaddr
*ia6_clat_src
= NULL
;
5506 struct in6_addr
*src
= NULL
;
5507 struct in6_addr dst
;
5510 uint64_t tot_len
= 0;
5511 uint16_t ip_id_val
= 0;
5512 uint16_t ip_frag_off
= 0;
5514 boolean_t is_frag
= FALSE
;
5515 boolean_t is_first_frag
= TRUE
;
5516 boolean_t is_last_frag
= TRUE
;
5518 pbuf_init_mbuf(&pbuf_store
, *m
, ifp
);
5520 iph
= pbuf
->pb_data
;
5525 off
= iph
->ip_hl
<< 2;
5526 ip_id_val
= iph
->ip_id
;
5527 ip_frag_off
= ntohs(iph
->ip_off
) & IP_OFFMASK
;
5529 tot_len
= ntohs(iph
->ip_len
);
5532 * For packets that are not first frags
5533 * we only need to adjust CSUM.
5534 * For 4 to 6, Fragmentation header gets appended
5535 * after proto translation.
5537 if (ntohs(iph
->ip_off
) & ~(IP_DF
| IP_RF
)) {
5540 /* If the offset is not zero, it is not first frag */
5541 if (ip_frag_off
!= 0) {
5542 is_first_frag
= FALSE
;
5545 /* If IP_MF is set, then it is not last frag */
5546 if (ntohs(iph
->ip_off
) & IP_MF
) {
5547 is_last_frag
= FALSE
;
5552 * Retrive the local IPv6 CLAT46 address reserved for stateless
5555 ia6_clat_src
= in6ifa_ifpwithflag(ifp
, IN6_IFF_CLAT46
);
5556 if (ia6_clat_src
== NULL
) {
5557 ip6stat
.ip6s_clat464_out_nov6addr_drop
++;
5562 src
= &ia6_clat_src
->ia_addr
.sin6_addr
;
5565 * Translate IPv4 destination to IPv6 destination by using the
5566 * prefixes learned through prior PLAT discovery.
5568 if ((error
= nat464_synthesize_ipv6(ifp
, &odst
, &dst
)) != 0) {
5569 ip6stat
.ip6s_clat464_out_v6synthfail_drop
++;
5573 /* Translate the IP header part first */
5574 error
= (nat464_translate_46(pbuf
, off
, iph
->ip_tos
, iph
->ip_p
,
5575 iph
->ip_ttl
, *src
, dst
, tot_len
) == NT_NAT64
) ? 0 : -1;
5577 iph
= NULL
; /* Invalidate iph as pbuf has been modified */
5580 ip6stat
.ip6s_clat464_out_46transfail_drop
++;
5585 * Translate protocol header, update checksum, checksum flags
5586 * and related fields.
5588 error
= (nat464_translate_proto(pbuf
, (struct nat464_addr
*)&osrc
, (struct nat464_addr
*)&odst
,
5589 proto
, PF_INET
, PF_INET6
, NT_OUT
, !is_first_frag
) == NT_NAT64
) ? 0 : -1;
5592 ip6stat
.ip6s_clat464_out_46proto_transfail_drop
++;
5596 /* Now insert the IPv6 fragment header */
5598 error
= nat464_insert_frag46(pbuf
, ip_id_val
, ip_frag_off
, is_last_frag
);
5601 ip6stat
.ip6s_clat464_out_46frag_transfail_drop
++;
5607 if (ia6_clat_src
!= NULL
) {
5608 IFA_REMREF(&ia6_clat_src
->ia_ifa
);
5611 if (pbuf_is_valid(pbuf
)) {
5613 pbuf
->pb_mbuf
= NULL
;
5617 ip6stat
.ip6s_clat464_out_invalpbuf_drop
++;
5621 *proto_family
= PF_INET6
;
5622 ip6stat
.ip6s_clat464_out_success
++;
5629 * @brief This routine translates incoming IPv6 to IPv4 packet,
5630 * updates protocol checksum and also translates ICMPv6 outer
5633 * @return 0 on success or else a negative value.
5636 dlil_clat64(ifnet_t ifp
, protocol_family_t
*proto_family
, mbuf_t
*m
)
5638 VERIFY(*proto_family
== PF_INET6
);
5639 VERIFY(IS_INTF_CLAT46(ifp
));
5641 struct ip6_hdr
*ip6h
= NULL
;
5642 struct in6_addr osrc
, odst
;
5644 struct in6_ifaddr
*ia6_clat_dst
= NULL
;
5645 struct in_ifaddr
*ia4_clat_dst
= NULL
;
5646 struct in_addr
*dst
= NULL
;
5650 u_int64_t tot_len
= 0;
5652 boolean_t is_first_frag
= TRUE
;
5654 /* Incoming mbuf does not contain valid IP6 header */
5655 if ((size_t)(*m
)->m_pkthdr
.len
< sizeof(struct ip6_hdr
) ||
5656 ((size_t)(*m
)->m_len
< sizeof(struct ip6_hdr
) &&
5657 (*m
= m_pullup(*m
, sizeof(struct ip6_hdr
))) == NULL
)) {
5658 ip6stat
.ip6s_clat464_in_tooshort_drop
++;
5662 ip6h
= mtod(*m
, struct ip6_hdr
*);
5663 /* Validate that mbuf contains IP payload equal to ip6_plen */
5664 if ((size_t)(*m
)->m_pkthdr
.len
< ntohs(ip6h
->ip6_plen
) + sizeof(struct ip6_hdr
)) {
5665 ip6stat
.ip6s_clat464_in_tooshort_drop
++;
5669 osrc
= ip6h
->ip6_src
;
5670 odst
= ip6h
->ip6_dst
;
5673 * Retrieve the local CLAT46 reserved IPv6 address.
5674 * Let the packet pass if we don't find one, as the flag
5675 * may get set before IPv6 configuration has taken place.
5677 ia6_clat_dst
= in6ifa_ifpwithflag(ifp
, IN6_IFF_CLAT46
);
5678 if (ia6_clat_dst
== NULL
) {
5683 * Check if the original dest in the packet is same as the reserved
5684 * CLAT46 IPv6 address
5686 if (IN6_ARE_ADDR_EQUAL(&odst
, &ia6_clat_dst
->ia_addr
.sin6_addr
)) {
5687 pbuf_t pbuf_store
, *pbuf
= NULL
;
5688 pbuf_init_mbuf(&pbuf_store
, *m
, ifp
);
5692 * Retrive the local CLAT46 IPv4 address reserved for stateless
5695 ia4_clat_dst
= inifa_ifpclatv4(ifp
);
5696 if (ia4_clat_dst
== NULL
) {
5697 IFA_REMREF(&ia6_clat_dst
->ia_ifa
);
5698 ip6stat
.ip6s_clat464_in_nov4addr_drop
++;
5702 IFA_REMREF(&ia6_clat_dst
->ia_ifa
);
5704 /* Translate IPv6 src to IPv4 src by removing the NAT64 prefix */
5705 dst
= &ia4_clat_dst
->ia_addr
.sin_addr
;
5706 if ((error
= nat464_synthesize_ipv4(ifp
, &osrc
, &src
)) != 0) {
5707 ip6stat
.ip6s_clat464_in_v4synthfail_drop
++;
5712 ip6h
= pbuf
->pb_data
;
5713 off
= sizeof(struct ip6_hdr
);
5714 proto
= ip6h
->ip6_nxt
;
5715 tos
= (ntohl(ip6h
->ip6_flow
) >> 20) & 0xff;
5716 tot_len
= ntohs(ip6h
->ip6_plen
) + sizeof(struct ip6_hdr
);
5719 * Translate the IP header and update the fragmentation
5722 error
= (nat464_translate_64(pbuf
, off
, tos
, &proto
,
5723 ip6h
->ip6_hlim
, src
, *dst
, tot_len
, &is_first_frag
) == NT_NAT64
) ?
5726 ip6h
= NULL
; /* Invalidate ip6h as pbuf has been changed */
5729 ip6stat
.ip6s_clat464_in_64transfail_drop
++;
5734 * Translate protocol header, update checksum, checksum flags
5735 * and related fields.
5737 error
= (nat464_translate_proto(pbuf
, (struct nat464_addr
*)&osrc
,
5738 (struct nat464_addr
*)&odst
, proto
, PF_INET6
, PF_INET
,
5739 NT_IN
, !is_first_frag
) == NT_NAT64
) ? 0 : -1;
5742 ip6stat
.ip6s_clat464_in_64proto_transfail_drop
++;
5747 if (ia4_clat_dst
!= NULL
) {
5748 IFA_REMREF(&ia4_clat_dst
->ia_ifa
);
5751 if (pbuf_is_valid(pbuf
)) {
5753 pbuf
->pb_mbuf
= NULL
;
5757 ip6stat
.ip6s_clat464_in_invalpbuf_drop
++;
5761 *proto_family
= PF_INET
;
5762 ip6stat
.ip6s_clat464_in_success
++;
5764 } /* CLAT traffic */
5771 ifnet_ioctl(ifnet_t ifp
, protocol_family_t proto_fam
, u_long ioctl_code
,
5774 struct ifnet_filter
*filter
;
5775 int retval
= EOPNOTSUPP
;
5778 if (ifp
== NULL
|| ioctl_code
== 0) {
5782 /* Get an io ref count if the interface is attached */
5783 if (!ifnet_is_attached(ifp
, 1)) {
5788 * Run the interface filters first.
5789 * We want to run all filters before calling the protocol,
5790 * interface family, or interface.
5792 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
5793 /* prevent filter list from changing in case we drop the lock */
5794 if_flt_monitor_busy(ifp
);
5795 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
5796 if (filter
->filt_ioctl
!= NULL
&& (filter
->filt_protocol
== 0 ||
5797 filter
->filt_protocol
== proto_fam
)) {
5798 lck_mtx_unlock(&ifp
->if_flt_lock
);
5800 result
= filter
->filt_ioctl(filter
->filt_cookie
, ifp
,
5801 proto_fam
, ioctl_code
, ioctl_arg
);
5803 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
5805 /* Only update retval if no one has handled the ioctl */
5806 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
5807 if (result
== ENOTSUP
) {
5808 result
= EOPNOTSUPP
;
5811 if (retval
!= 0 && retval
!= EOPNOTSUPP
) {
5812 /* we're done with the filter list */
5813 if_flt_monitor_unbusy(ifp
);
5814 lck_mtx_unlock(&ifp
->if_flt_lock
);
5820 /* we're done with the filter list */
5821 if_flt_monitor_unbusy(ifp
);
5822 lck_mtx_unlock(&ifp
->if_flt_lock
);
5824 /* Allow the protocol to handle the ioctl */
5825 if (proto_fam
!= 0) {
5826 struct if_proto
*proto
;
5828 /* callee holds a proto refcnt upon success */
5829 ifnet_lock_shared(ifp
);
5830 proto
= find_attached_proto(ifp
, proto_fam
);
5831 ifnet_lock_done(ifp
);
5832 if (proto
!= NULL
) {
5833 proto_media_ioctl ioctlp
=
5834 (proto
->proto_kpi
== kProtoKPI_v1
?
5835 proto
->kpi
.v1
.ioctl
: proto
->kpi
.v2
.ioctl
);
5836 result
= EOPNOTSUPP
;
5837 if (ioctlp
!= NULL
) {
5838 result
= ioctlp(ifp
, proto_fam
, ioctl_code
,
5841 if_proto_free(proto
);
5843 /* Only update retval if no one has handled the ioctl */
5844 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
5845 if (result
== ENOTSUP
) {
5846 result
= EOPNOTSUPP
;
5849 if (retval
&& retval
!= EOPNOTSUPP
) {
5856 /* retval is either 0 or EOPNOTSUPP */
5859 * Let the interface handle this ioctl.
5860 * If it returns EOPNOTSUPP, ignore that, we may have
5861 * already handled this in the protocol or family.
5863 if (ifp
->if_ioctl
) {
5864 result
= (*ifp
->if_ioctl
)(ifp
, ioctl_code
, ioctl_arg
);
5867 /* Only update retval if no one has handled the ioctl */
5868 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
5869 if (result
== ENOTSUP
) {
5870 result
= EOPNOTSUPP
;
5873 if (retval
&& retval
!= EOPNOTSUPP
) {
5879 if (retval
== EJUSTRETURN
) {
5883 ifnet_decr_iorefcnt(ifp
);
5888 __private_extern__ errno_t
5889 dlil_set_bpf_tap(ifnet_t ifp
, bpf_tap_mode mode
, bpf_packet_func callback
)
5894 if (ifp
->if_set_bpf_tap
) {
5895 /* Get an io reference on the interface if it is attached */
5896 if (!ifnet_is_attached(ifp
, 1)) {
5899 error
= ifp
->if_set_bpf_tap(ifp
, mode
, callback
);
5900 ifnet_decr_iorefcnt(ifp
);
5906 dlil_resolve_multi(struct ifnet
*ifp
, const struct sockaddr
*proto_addr
,
5907 struct sockaddr
*ll_addr
, size_t ll_len
)
5909 errno_t result
= EOPNOTSUPP
;
5910 struct if_proto
*proto
;
5911 const struct sockaddr
*verify
;
5912 proto_media_resolve_multi resolvep
;
5914 if (!ifnet_is_attached(ifp
, 1)) {
5918 bzero(ll_addr
, ll_len
);
5920 /* Call the protocol first; callee holds a proto refcnt upon success */
5921 ifnet_lock_shared(ifp
);
5922 proto
= find_attached_proto(ifp
, proto_addr
->sa_family
);
5923 ifnet_lock_done(ifp
);
5924 if (proto
!= NULL
) {
5925 resolvep
= (proto
->proto_kpi
== kProtoKPI_v1
?
5926 proto
->kpi
.v1
.resolve_multi
: proto
->kpi
.v2
.resolve_multi
);
5927 if (resolvep
!= NULL
) {
5928 result
= resolvep(ifp
, proto_addr
,
5929 (struct sockaddr_dl
*)(void *)ll_addr
, ll_len
);
5931 if_proto_free(proto
);
5934 /* Let the interface verify the multicast address */
5935 if ((result
== EOPNOTSUPP
|| result
== 0) && ifp
->if_check_multi
) {
5939 verify
= proto_addr
;
5941 result
= ifp
->if_check_multi(ifp
, verify
);
5944 ifnet_decr_iorefcnt(ifp
);
5948 __private_extern__ errno_t
5949 dlil_send_arp_internal(ifnet_t ifp
, u_short arpop
,
5950 const struct sockaddr_dl
*sender_hw
, const struct sockaddr
*sender_proto
,
5951 const struct sockaddr_dl
*target_hw
, const struct sockaddr
*target_proto
)
5953 struct if_proto
*proto
;
5956 /* callee holds a proto refcnt upon success */
5957 ifnet_lock_shared(ifp
);
5958 proto
= find_attached_proto(ifp
, target_proto
->sa_family
);
5959 ifnet_lock_done(ifp
);
5960 if (proto
== NULL
) {
5963 proto_media_send_arp arpp
;
5964 arpp
= (proto
->proto_kpi
== kProtoKPI_v1
?
5965 proto
->kpi
.v1
.send_arp
: proto
->kpi
.v2
.send_arp
);
5971 arpstat
.txrequests
++;
5972 if (target_hw
!= NULL
) {
5973 arpstat
.txurequests
++;
5977 arpstat
.txreplies
++;
5980 result
= arpp(ifp
, arpop
, sender_hw
, sender_proto
,
5981 target_hw
, target_proto
);
5983 if_proto_free(proto
);
5989 struct net_thread_marks
{ };
5990 static const struct net_thread_marks net_thread_marks_base
= { };
5992 __private_extern__
const net_thread_marks_t net_thread_marks_none
=
5993 &net_thread_marks_base
;
5995 __private_extern__ net_thread_marks_t
5996 net_thread_marks_push(u_int32_t push
)
5998 static const char *const base
= (const void*)&net_thread_marks_base
;
6002 struct uthread
*uth
= get_bsdthread_info(current_thread());
6004 pop
= push
& ~uth
->uu_network_marks
;
6006 uth
->uu_network_marks
|= pop
;
6010 return (net_thread_marks_t
)&base
[pop
];
6013 __private_extern__ net_thread_marks_t
6014 net_thread_unmarks_push(u_int32_t unpush
)
6016 static const char *const base
= (const void*)&net_thread_marks_base
;
6017 u_int32_t unpop
= 0;
6020 struct uthread
*uth
= get_bsdthread_info(current_thread());
6022 unpop
= unpush
& uth
->uu_network_marks
;
6024 uth
->uu_network_marks
&= ~unpop
;
6028 return (net_thread_marks_t
)&base
[unpop
];
6031 __private_extern__
void
6032 net_thread_marks_pop(net_thread_marks_t popx
)
6034 static const char *const base
= (const void*)&net_thread_marks_base
;
6035 const ptrdiff_t pop
= (const char *)popx
- (const char *)base
;
6038 static const ptrdiff_t ones
= (ptrdiff_t)(u_int32_t
)~0U;
6039 struct uthread
*uth
= get_bsdthread_info(current_thread());
6041 VERIFY((pop
& ones
) == pop
);
6042 VERIFY((ptrdiff_t)(uth
->uu_network_marks
& pop
) == pop
);
6043 uth
->uu_network_marks
&= ~pop
;
6047 __private_extern__
void
6048 net_thread_unmarks_pop(net_thread_marks_t unpopx
)
6050 static const char *const base
= (const void*)&net_thread_marks_base
;
6051 ptrdiff_t unpop
= (const char *)unpopx
- (const char *)base
;
6054 static const ptrdiff_t ones
= (ptrdiff_t)(u_int32_t
)~0U;
6055 struct uthread
*uth
= get_bsdthread_info(current_thread());
6057 VERIFY((unpop
& ones
) == unpop
);
6058 VERIFY((ptrdiff_t)(uth
->uu_network_marks
& unpop
) == 0);
6059 uth
->uu_network_marks
|= unpop
;
6063 __private_extern__ u_int32_t
6064 net_thread_is_marked(u_int32_t check
)
6067 struct uthread
*uth
= get_bsdthread_info(current_thread());
6068 return uth
->uu_network_marks
& check
;
6074 __private_extern__ u_int32_t
6075 net_thread_is_unmarked(u_int32_t check
)
6078 struct uthread
*uth
= get_bsdthread_info(current_thread());
6079 return ~uth
->uu_network_marks
& check
;
6085 static __inline__
int
6086 _is_announcement(const struct sockaddr_in
* sender_sin
,
6087 const struct sockaddr_in
* target_sin
)
6089 if (target_sin
== NULL
|| sender_sin
== NULL
) {
6093 return sender_sin
->sin_addr
.s_addr
== target_sin
->sin_addr
.s_addr
;
6096 __private_extern__ errno_t
6097 dlil_send_arp(ifnet_t ifp
, u_short arpop
, const struct sockaddr_dl
*sender_hw
,
6098 const struct sockaddr
*sender_proto
, const struct sockaddr_dl
*target_hw
,
6099 const struct sockaddr
*target_proto0
, u_int32_t rtflags
)
6102 const struct sockaddr_in
* sender_sin
;
6103 const struct sockaddr_in
* target_sin
;
6104 struct sockaddr_inarp target_proto_sinarp
;
6105 struct sockaddr
*target_proto
= (void *)(uintptr_t)target_proto0
;
6107 if (target_proto
== NULL
|| sender_proto
== NULL
) {
6111 if (sender_proto
->sa_family
!= target_proto
->sa_family
) {
6116 * If the target is a (default) router, provide that
6117 * information to the send_arp callback routine.
6119 if (rtflags
& RTF_ROUTER
) {
6120 bcopy(target_proto
, &target_proto_sinarp
,
6121 sizeof(struct sockaddr_in
));
6122 target_proto_sinarp
.sin_other
|= SIN_ROUTER
;
6123 target_proto
= (struct sockaddr
*)&target_proto_sinarp
;
6127 * If this is an ARP request and the target IP is IPv4LL,
6128 * send the request on all interfaces. The exception is
6129 * an announcement, which must only appear on the specific
6132 sender_sin
= (struct sockaddr_in
*)(void *)(uintptr_t)sender_proto
;
6133 target_sin
= (struct sockaddr_in
*)(void *)(uintptr_t)target_proto
;
6134 if (target_proto
->sa_family
== AF_INET
&&
6135 IN_LINKLOCAL(ntohl(target_sin
->sin_addr
.s_addr
)) &&
6136 ipv4_ll_arp_aware
!= 0 && arpop
== ARPOP_REQUEST
&&
6137 !_is_announcement(sender_sin
, target_sin
)) {
6144 if (ifnet_list_get(IFNET_FAMILY_ANY
, &ifp_list
, &count
) == 0) {
6145 for (ifp_on
= 0; ifp_on
< count
; ifp_on
++) {
6147 ifaddr_t source_hw
= NULL
;
6148 ifaddr_t source_ip
= NULL
;
6149 struct sockaddr_in source_ip_copy
;
6150 struct ifnet
*cur_ifp
= ifp_list
[ifp_on
];
6153 * Only arp on interfaces marked for IPv4LL
6154 * ARPing. This may mean that we don't ARP on
6155 * the interface the subnet route points to.
6157 if (!(cur_ifp
->if_eflags
& IFEF_ARPLL
)) {
6161 /* Find the source IP address */
6162 ifnet_lock_shared(cur_ifp
);
6163 source_hw
= cur_ifp
->if_lladdr
;
6164 TAILQ_FOREACH(source_ip
, &cur_ifp
->if_addrhead
,
6166 IFA_LOCK(source_ip
);
6167 if (source_ip
->ifa_addr
!= NULL
&&
6168 source_ip
->ifa_addr
->sa_family
==
6170 /* Copy the source IP address */
6172 *(struct sockaddr_in
*)
6173 (void *)source_ip
->ifa_addr
;
6174 IFA_UNLOCK(source_ip
);
6177 IFA_UNLOCK(source_ip
);
6180 /* No IP Source, don't arp */
6181 if (source_ip
== NULL
) {
6182 ifnet_lock_done(cur_ifp
);
6186 IFA_ADDREF(source_hw
);
6187 ifnet_lock_done(cur_ifp
);
6190 new_result
= dlil_send_arp_internal(cur_ifp
,
6191 arpop
, (struct sockaddr_dl
*)(void *)
6192 source_hw
->ifa_addr
,
6193 (struct sockaddr
*)&source_ip_copy
, NULL
,
6196 IFA_REMREF(source_hw
);
6197 if (result
== ENOTSUP
) {
6198 result
= new_result
;
6201 ifnet_list_free(ifp_list
);
6204 result
= dlil_send_arp_internal(ifp
, arpop
, sender_hw
,
6205 sender_proto
, target_hw
, target_proto
);
6212 * Caller must hold ifnet head lock.
6215 ifnet_lookup(struct ifnet
*ifp
)
6219 LCK_RW_ASSERT(&ifnet_head_lock
, LCK_RW_ASSERT_HELD
);
6220 TAILQ_FOREACH(_ifp
, &ifnet_head
, if_link
) {
6225 return _ifp
!= NULL
;
6229 * Caller has to pass a non-zero refio argument to get a
6230 * IO reference count. This will prevent ifnet_detach from
6231 * being called when there are outstanding io reference counts.
6234 ifnet_is_attached(struct ifnet
*ifp
, int refio
)
6238 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6239 if ((ret
= IF_FULLY_ATTACHED(ifp
))) {
6244 lck_mtx_unlock(&ifp
->if_ref_lock
);
6250 ifnet_incr_pending_thread_count(struct ifnet
*ifp
)
6252 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6253 ifp
->if_threads_pending
++;
6254 lck_mtx_unlock(&ifp
->if_ref_lock
);
6258 ifnet_decr_pending_thread_count(struct ifnet
*ifp
)
6260 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6261 VERIFY(ifp
->if_threads_pending
> 0);
6262 ifp
->if_threads_pending
--;
6263 if (ifp
->if_threads_pending
== 0) {
6264 wakeup(&ifp
->if_threads_pending
);
6266 lck_mtx_unlock(&ifp
->if_ref_lock
);
6270 * Caller must ensure the interface is attached; the assumption is that
6271 * there is at least an outstanding IO reference count held already.
6272 * Most callers would call ifnet_is_{attached,data_ready}() instead.
6275 ifnet_incr_iorefcnt(struct ifnet
*ifp
)
6277 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6278 VERIFY(IF_FULLY_ATTACHED(ifp
));
6279 VERIFY(ifp
->if_refio
> 0);
6281 lck_mtx_unlock(&ifp
->if_ref_lock
);
6284 __attribute__((always_inline
))
6286 ifnet_decr_iorefcnt_locked(struct ifnet
*ifp
)
6288 LCK_MTX_ASSERT(&ifp
->if_ref_lock
, LCK_MTX_ASSERT_OWNED
);
6290 VERIFY(ifp
->if_refio
> 0);
6291 VERIFY(ifp
->if_refflags
& (IFRF_ATTACHED
| IFRF_DETACHING
));
6294 VERIFY(ifp
->if_refio
!= 0 || ifp
->if_datamov
== 0);
6297 * if there are no more outstanding io references, wakeup the
6298 * ifnet_detach thread if detaching flag is set.
6300 if (ifp
->if_refio
== 0 && (ifp
->if_refflags
& IFRF_DETACHING
)) {
6301 wakeup(&(ifp
->if_refio
));
6306 ifnet_decr_iorefcnt(struct ifnet
*ifp
)
6308 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6309 ifnet_decr_iorefcnt_locked(ifp
);
6310 lck_mtx_unlock(&ifp
->if_ref_lock
);
6314 ifnet_datamov_begin(struct ifnet
*ifp
)
6318 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6319 if ((ret
= IF_FULLY_ATTACHED_AND_READY(ifp
))) {
6323 lck_mtx_unlock(&ifp
->if_ref_lock
);
6329 ifnet_datamov_end(struct ifnet
*ifp
)
6331 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6332 VERIFY(ifp
->if_datamov
> 0);
6334 * if there's no more thread moving data, wakeup any
6335 * drainers that's blocked waiting for this.
6337 if (--ifp
->if_datamov
== 0 && ifp
->if_drainers
> 0) {
6338 wakeup(&(ifp
->if_datamov
));
6340 ifnet_decr_iorefcnt_locked(ifp
);
6341 lck_mtx_unlock(&ifp
->if_ref_lock
);
6345 ifnet_datamov_suspend(struct ifnet
*ifp
)
6347 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6348 VERIFY(ifp
->if_refflags
& (IFRF_ATTACHED
| IFRF_DETACHING
));
6350 if (ifp
->if_suspend
++ == 0) {
6351 VERIFY(ifp
->if_refflags
& IFRF_READY
);
6352 ifp
->if_refflags
&= ~IFRF_READY
;
6354 lck_mtx_unlock(&ifp
->if_ref_lock
);
6358 ifnet_datamov_drain(struct ifnet
*ifp
)
6360 lck_mtx_lock(&ifp
->if_ref_lock
);
6361 VERIFY(ifp
->if_refflags
& (IFRF_ATTACHED
| IFRF_DETACHING
));
6362 /* data movement must already be suspended */
6363 VERIFY(ifp
->if_suspend
> 0);
6364 VERIFY(!(ifp
->if_refflags
& IFRF_READY
));
6366 while (ifp
->if_datamov
!= 0) {
6367 (void) msleep(&(ifp
->if_datamov
), &ifp
->if_ref_lock
,
6368 (PZERO
- 1), __func__
, NULL
);
6370 VERIFY(!(ifp
->if_refflags
& IFRF_READY
));
6371 VERIFY(ifp
->if_drainers
> 0);
6373 lck_mtx_unlock(&ifp
->if_ref_lock
);
6375 /* purge the interface queues */
6376 if ((ifp
->if_eflags
& IFEF_TXSTART
) != 0) {
6382 ifnet_datamov_resume(struct ifnet
*ifp
)
6384 lck_mtx_lock(&ifp
->if_ref_lock
);
6385 /* data movement must already be suspended */
6386 VERIFY(ifp
->if_suspend
> 0);
6387 if (--ifp
->if_suspend
== 0) {
6388 VERIFY(!(ifp
->if_refflags
& IFRF_READY
));
6389 ifp
->if_refflags
|= IFRF_READY
;
6391 ifnet_decr_iorefcnt_locked(ifp
);
6392 lck_mtx_unlock(&ifp
->if_ref_lock
);
6396 dlil_if_trace(struct dlil_ifnet
*dl_if
, int refhold
)
6398 struct dlil_ifnet_dbg
*dl_if_dbg
= (struct dlil_ifnet_dbg
*)dl_if
;
6403 if (!(dl_if
->dl_if_flags
& DLIF_DEBUG
)) {
6404 panic("%s: dl_if %p has no debug structure", __func__
, dl_if
);
6409 cnt
= &dl_if_dbg
->dldbg_if_refhold_cnt
;
6410 tr
= dl_if_dbg
->dldbg_if_refhold
;
6412 cnt
= &dl_if_dbg
->dldbg_if_refrele_cnt
;
6413 tr
= dl_if_dbg
->dldbg_if_refrele
;
6416 idx
= atomic_add_16_ov(cnt
, 1) % IF_REF_TRACE_HIST_SIZE
;
6417 ctrace_record(&tr
[idx
]);
6421 dlil_if_ref(struct ifnet
*ifp
)
6423 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
6425 if (dl_if
== NULL
) {
6429 lck_mtx_lock_spin(&dl_if
->dl_if_lock
);
6430 ++dl_if
->dl_if_refcnt
;
6431 if (dl_if
->dl_if_refcnt
== 0) {
6432 panic("%s: wraparound refcnt for ifp=%p", __func__
, ifp
);
6435 if (dl_if
->dl_if_trace
!= NULL
) {
6436 (*dl_if
->dl_if_trace
)(dl_if
, TRUE
);
6438 lck_mtx_unlock(&dl_if
->dl_if_lock
);
6444 dlil_if_free(struct ifnet
*ifp
)
6446 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
6447 bool need_release
= FALSE
;
6449 if (dl_if
== NULL
) {
6453 lck_mtx_lock_spin(&dl_if
->dl_if_lock
);
6454 switch (dl_if
->dl_if_refcnt
) {
6456 panic("%s: negative refcnt for ifp=%p", __func__
, ifp
);
6460 if ((ifp
->if_refflags
& IFRF_EMBRYONIC
) != 0) {
6461 need_release
= TRUE
;
6467 --dl_if
->dl_if_refcnt
;
6468 if (dl_if
->dl_if_trace
!= NULL
) {
6469 (*dl_if
->dl_if_trace
)(dl_if
, FALSE
);
6471 lck_mtx_unlock(&dl_if
->dl_if_lock
);
6473 dlil_if_release(ifp
);
6479 dlil_attach_protocol_internal(struct if_proto
*proto
,
6480 const struct ifnet_demux_desc
*demux_list
, u_int32_t demux_count
,
6481 uint32_t * proto_count
)
6483 struct kev_dl_proto_data ev_pr_data
;
6484 struct ifnet
*ifp
= proto
->ifp
;
6486 u_int32_t hash_value
= proto_hash_value(proto
->protocol_family
);
6487 struct if_proto
*prev_proto
;
6488 struct if_proto
*_proto
;
6490 /* callee holds a proto refcnt upon success */
6491 ifnet_lock_exclusive(ifp
);
6492 _proto
= find_attached_proto(ifp
, proto
->protocol_family
);
6493 if (_proto
!= NULL
) {
6494 ifnet_lock_done(ifp
);
6495 if_proto_free(_proto
);
6500 * Call family module add_proto routine so it can refine the
6501 * demux descriptors as it wishes.
6503 retval
= ifp
->if_add_proto(ifp
, proto
->protocol_family
, demux_list
,
6506 ifnet_lock_done(ifp
);
6511 * Insert the protocol in the hash
6513 prev_proto
= SLIST_FIRST(&ifp
->if_proto_hash
[hash_value
]);
6514 while (prev_proto
!= NULL
&& SLIST_NEXT(prev_proto
, next_hash
) != NULL
) {
6515 prev_proto
= SLIST_NEXT(prev_proto
, next_hash
);
6518 SLIST_INSERT_AFTER(prev_proto
, proto
, next_hash
);
6520 SLIST_INSERT_HEAD(&ifp
->if_proto_hash
[hash_value
],
6524 /* hold a proto refcnt for attach */
6525 if_proto_ref(proto
);
6528 * The reserved field carries the number of protocol still attached
6529 * (subject to change)
6531 ev_pr_data
.proto_family
= proto
->protocol_family
;
6532 ev_pr_data
.proto_remaining_count
= dlil_ifp_protolist(ifp
, NULL
, 0);
6534 ifnet_lock_done(ifp
);
6536 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_ATTACHED
,
6537 (struct net_event_data
*)&ev_pr_data
,
6538 sizeof(struct kev_dl_proto_data
));
6539 if (proto_count
!= NULL
) {
6540 *proto_count
= ev_pr_data
.proto_remaining_count
;
6546 ifnet_attach_protocol(ifnet_t ifp
, protocol_family_t protocol
,
6547 const struct ifnet_attach_proto_param
*proto_details
)
6550 struct if_proto
*ifproto
= NULL
;
6551 uint32_t proto_count
= 0;
6553 ifnet_head_lock_shared();
6554 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
) {
6558 /* Check that the interface is in the global list */
6559 if (!ifnet_lookup(ifp
)) {
6564 ifproto
= zalloc(dlif_proto_zone
);
6565 if (ifproto
== NULL
) {
6569 bzero(ifproto
, dlif_proto_size
);
6571 /* refcnt held above during lookup */
6573 ifproto
->protocol_family
= protocol
;
6574 ifproto
->proto_kpi
= kProtoKPI_v1
;
6575 ifproto
->kpi
.v1
.input
= proto_details
->input
;
6576 ifproto
->kpi
.v1
.pre_output
= proto_details
->pre_output
;
6577 ifproto
->kpi
.v1
.event
= proto_details
->event
;
6578 ifproto
->kpi
.v1
.ioctl
= proto_details
->ioctl
;
6579 ifproto
->kpi
.v1
.detached
= proto_details
->detached
;
6580 ifproto
->kpi
.v1
.resolve_multi
= proto_details
->resolve
;
6581 ifproto
->kpi
.v1
.send_arp
= proto_details
->send_arp
;
6583 retval
= dlil_attach_protocol_internal(ifproto
,
6584 proto_details
->demux_list
, proto_details
->demux_count
,
6588 if (retval
!= 0 && retval
!= EEXIST
) {
6589 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
6590 ifp
!= NULL
? if_name(ifp
) : "N/A", protocol
, retval
);
6593 DLIL_PRINTF("%s: attached v1 protocol %d (count = %d)\n",
6594 ifp
!= NULL
? if_name(ifp
) : "N/A",
6595 protocol
, proto_count
);
6601 * A protocol has been attached, mark the interface up.
6602 * This used to be done by configd.KernelEventMonitor, but that
6603 * is inherently prone to races (rdar://problem/30810208).
6605 (void) ifnet_set_flags(ifp
, IFF_UP
, IFF_UP
);
6606 (void) ifnet_ioctl(ifp
, 0, SIOCSIFFLAGS
, NULL
);
6607 dlil_post_sifflags_msg(ifp
);
6608 } else if (ifproto
!= NULL
) {
6609 zfree(dlif_proto_zone
, ifproto
);
6615 ifnet_attach_protocol_v2(ifnet_t ifp
, protocol_family_t protocol
,
6616 const struct ifnet_attach_proto_param_v2
*proto_details
)
6619 struct if_proto
*ifproto
= NULL
;
6620 uint32_t proto_count
= 0;
6622 ifnet_head_lock_shared();
6623 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
) {
6627 /* Check that the interface is in the global list */
6628 if (!ifnet_lookup(ifp
)) {
6633 ifproto
= zalloc(dlif_proto_zone
);
6634 if (ifproto
== NULL
) {
6638 bzero(ifproto
, sizeof(*ifproto
));
6640 /* refcnt held above during lookup */
6642 ifproto
->protocol_family
= protocol
;
6643 ifproto
->proto_kpi
= kProtoKPI_v2
;
6644 ifproto
->kpi
.v2
.input
= proto_details
->input
;
6645 ifproto
->kpi
.v2
.pre_output
= proto_details
->pre_output
;
6646 ifproto
->kpi
.v2
.event
= proto_details
->event
;
6647 ifproto
->kpi
.v2
.ioctl
= proto_details
->ioctl
;
6648 ifproto
->kpi
.v2
.detached
= proto_details
->detached
;
6649 ifproto
->kpi
.v2
.resolve_multi
= proto_details
->resolve
;
6650 ifproto
->kpi
.v2
.send_arp
= proto_details
->send_arp
;
6652 retval
= dlil_attach_protocol_internal(ifproto
,
6653 proto_details
->demux_list
, proto_details
->demux_count
,
6657 if (retval
!= 0 && retval
!= EEXIST
) {
6658 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
6659 ifp
!= NULL
? if_name(ifp
) : "N/A", protocol
, retval
);
6662 DLIL_PRINTF("%s: attached v2 protocol %d (count = %d)\n",
6663 ifp
!= NULL
? if_name(ifp
) : "N/A",
6664 protocol
, proto_count
);
6670 * A protocol has been attached, mark the interface up.
6671 * This used to be done by configd.KernelEventMonitor, but that
6672 * is inherently prone to races (rdar://problem/30810208).
6674 (void) ifnet_set_flags(ifp
, IFF_UP
, IFF_UP
);
6675 (void) ifnet_ioctl(ifp
, 0, SIOCSIFFLAGS
, NULL
);
6676 dlil_post_sifflags_msg(ifp
);
6677 } else if (ifproto
!= NULL
) {
6678 zfree(dlif_proto_zone
, ifproto
);
6684 ifnet_detach_protocol(ifnet_t ifp
, protocol_family_t proto_family
)
6686 struct if_proto
*proto
= NULL
;
6689 if (ifp
== NULL
|| proto_family
== 0) {
6694 ifnet_lock_exclusive(ifp
);
6695 /* callee holds a proto refcnt upon success */
6696 proto
= find_attached_proto(ifp
, proto_family
);
6697 if (proto
== NULL
) {
6699 ifnet_lock_done(ifp
);
6703 /* call family module del_proto */
6704 if (ifp
->if_del_proto
) {
6705 ifp
->if_del_proto(ifp
, proto
->protocol_family
);
6708 SLIST_REMOVE(&ifp
->if_proto_hash
[proto_hash_value(proto_family
)],
6709 proto
, if_proto
, next_hash
);
6711 if (proto
->proto_kpi
== kProtoKPI_v1
) {
6712 proto
->kpi
.v1
.input
= ifproto_media_input_v1
;
6713 proto
->kpi
.v1
.pre_output
= ifproto_media_preout
;
6714 proto
->kpi
.v1
.event
= ifproto_media_event
;
6715 proto
->kpi
.v1
.ioctl
= ifproto_media_ioctl
;
6716 proto
->kpi
.v1
.resolve_multi
= ifproto_media_resolve_multi
;
6717 proto
->kpi
.v1
.send_arp
= ifproto_media_send_arp
;
6719 proto
->kpi
.v2
.input
= ifproto_media_input_v2
;
6720 proto
->kpi
.v2
.pre_output
= ifproto_media_preout
;
6721 proto
->kpi
.v2
.event
= ifproto_media_event
;
6722 proto
->kpi
.v2
.ioctl
= ifproto_media_ioctl
;
6723 proto
->kpi
.v2
.resolve_multi
= ifproto_media_resolve_multi
;
6724 proto
->kpi
.v2
.send_arp
= ifproto_media_send_arp
;
6726 proto
->detached
= 1;
6727 ifnet_lock_done(ifp
);
6730 DLIL_PRINTF("%s: detached %s protocol %d\n", if_name(ifp
),
6731 (proto
->proto_kpi
== kProtoKPI_v1
) ?
6732 "v1" : "v2", proto_family
);
6735 /* release proto refcnt held during protocol attach */
6736 if_proto_free(proto
);
6739 * Release proto refcnt held during lookup; the rest of
6740 * protocol detach steps will happen when the last proto
6741 * reference is released.
6743 if_proto_free(proto
);
6751 ifproto_media_input_v1(struct ifnet
*ifp
, protocol_family_t protocol
,
6752 struct mbuf
*packet
, char *header
)
6754 #pragma unused(ifp, protocol, packet, header)
6759 ifproto_media_input_v2(struct ifnet
*ifp
, protocol_family_t protocol
,
6760 struct mbuf
*packet
)
6762 #pragma unused(ifp, protocol, packet)
6767 ifproto_media_preout(struct ifnet
*ifp
, protocol_family_t protocol
,
6768 mbuf_t
*packet
, const struct sockaddr
*dest
, void *route
, char *frame_type
,
6769 char *link_layer_dest
)
6771 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
6776 ifproto_media_event(struct ifnet
*ifp
, protocol_family_t protocol
,
6777 const struct kev_msg
*event
)
6779 #pragma unused(ifp, protocol, event)
6783 ifproto_media_ioctl(struct ifnet
*ifp
, protocol_family_t protocol
,
6784 unsigned long command
, void *argument
)
6786 #pragma unused(ifp, protocol, command, argument)
6791 ifproto_media_resolve_multi(ifnet_t ifp
, const struct sockaddr
*proto_addr
,
6792 struct sockaddr_dl
*out_ll
, size_t ll_len
)
6794 #pragma unused(ifp, proto_addr, out_ll, ll_len)
6799 ifproto_media_send_arp(struct ifnet
*ifp
, u_short arpop
,
6800 const struct sockaddr_dl
*sender_hw
, const struct sockaddr
*sender_proto
,
6801 const struct sockaddr_dl
*target_hw
, const struct sockaddr
*target_proto
)
6803 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
6807 extern int if_next_index(void);
6808 extern int tcp_ecn_outbound
;
6811 ifnet_attach(ifnet_t ifp
, const struct sockaddr_dl
*ll_addr
)
6813 struct ifnet
*tmp_if
;
6815 struct if_data_internal if_data_saved
;
6816 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
6817 struct dlil_threading_info
*dl_inp
;
6818 u_int32_t sflags
= 0;
6826 * Serialize ifnet attach using dlil_ifnet_lock, in order to
6827 * prevent the interface from being configured while it is
6828 * embryonic, as ifnet_head_lock is dropped and reacquired
6829 * below prior to marking the ifnet with IFRF_ATTACHED.
6832 ifnet_head_lock_exclusive();
6833 /* Verify we aren't already on the list */
6834 TAILQ_FOREACH(tmp_if
, &ifnet_head
, if_link
) {
6835 if (tmp_if
== ifp
) {
6842 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6843 if (!(ifp
->if_refflags
& IFRF_EMBRYONIC
)) {
6844 panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
6848 lck_mtx_unlock(&ifp
->if_ref_lock
);
6850 ifnet_lock_exclusive(ifp
);
6853 VERIFY(ifp
->if_detaching_link
.tqe_next
== NULL
);
6854 VERIFY(ifp
->if_detaching_link
.tqe_prev
== NULL
);
6855 VERIFY(ifp
->if_threads_pending
== 0);
6857 if (ll_addr
!= NULL
) {
6858 if (ifp
->if_addrlen
== 0) {
6859 ifp
->if_addrlen
= ll_addr
->sdl_alen
;
6860 } else if (ll_addr
->sdl_alen
!= ifp
->if_addrlen
) {
6861 ifnet_lock_done(ifp
);
6869 * Allow interfaces without protocol families to attach
6870 * only if they have the necessary fields filled out.
6872 if (ifp
->if_add_proto
== NULL
|| ifp
->if_del_proto
== NULL
) {
6873 DLIL_PRINTF("%s: Attempt to attach interface without "
6874 "family module - %d\n", __func__
, ifp
->if_family
);
6875 ifnet_lock_done(ifp
);
6881 /* Allocate protocol hash table */
6882 VERIFY(ifp
->if_proto_hash
== NULL
);
6883 ifp
->if_proto_hash
= zalloc(dlif_phash_zone
);
6884 if (ifp
->if_proto_hash
== NULL
) {
6885 ifnet_lock_done(ifp
);
6890 bzero(ifp
->if_proto_hash
, dlif_phash_size
);
6892 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
6893 VERIFY(TAILQ_EMPTY(&ifp
->if_flt_head
));
6894 TAILQ_INIT(&ifp
->if_flt_head
);
6895 VERIFY(ifp
->if_flt_busy
== 0);
6896 VERIFY(ifp
->if_flt_waiters
== 0);
6897 lck_mtx_unlock(&ifp
->if_flt_lock
);
6899 if (!(dl_if
->dl_if_flags
& DLIF_REUSE
)) {
6900 VERIFY(LIST_EMPTY(&ifp
->if_multiaddrs
));
6901 LIST_INIT(&ifp
->if_multiaddrs
);
6904 VERIFY(ifp
->if_allhostsinm
== NULL
);
6905 VERIFY(TAILQ_EMPTY(&ifp
->if_addrhead
));
6906 TAILQ_INIT(&ifp
->if_addrhead
);
6908 if (ifp
->if_index
== 0) {
6909 int idx
= if_next_index();
6913 ifnet_lock_done(ifp
);
6918 ifp
->if_index
= idx
;
6920 /* There should not be anything occupying this slot */
6921 VERIFY(ifindex2ifnet
[ifp
->if_index
] == NULL
);
6923 /* allocate (if needed) and initialize a link address */
6924 ifa
= dlil_alloc_lladdr(ifp
, ll_addr
);
6926 ifnet_lock_done(ifp
);
6932 VERIFY(ifnet_addrs
[ifp
->if_index
- 1] == NULL
);
6933 ifnet_addrs
[ifp
->if_index
- 1] = ifa
;
6935 /* make this address the first on the list */
6937 /* hold a reference for ifnet_addrs[] */
6938 IFA_ADDREF_LOCKED(ifa
);
6939 /* if_attach_link_ifa() holds a reference for ifa_link */
6940 if_attach_link_ifa(ifp
, ifa
);
6944 mac_ifnet_label_associate(ifp
);
6947 TAILQ_INSERT_TAIL(&ifnet_head
, ifp
, if_link
);
6948 ifindex2ifnet
[ifp
->if_index
] = ifp
;
6950 /* Hold a reference to the underlying dlil_ifnet */
6951 ifnet_reference(ifp
);
6953 /* Clear stats (save and restore other fields that we care) */
6954 if_data_saved
= ifp
->if_data
;
6955 bzero(&ifp
->if_data
, sizeof(ifp
->if_data
));
6956 ifp
->if_data
.ifi_type
= if_data_saved
.ifi_type
;
6957 ifp
->if_data
.ifi_typelen
= if_data_saved
.ifi_typelen
;
6958 ifp
->if_data
.ifi_physical
= if_data_saved
.ifi_physical
;
6959 ifp
->if_data
.ifi_addrlen
= if_data_saved
.ifi_addrlen
;
6960 ifp
->if_data
.ifi_hdrlen
= if_data_saved
.ifi_hdrlen
;
6961 ifp
->if_data
.ifi_mtu
= if_data_saved
.ifi_mtu
;
6962 ifp
->if_data
.ifi_baudrate
= if_data_saved
.ifi_baudrate
;
6963 ifp
->if_data
.ifi_hwassist
= if_data_saved
.ifi_hwassist
;
6964 ifp
->if_data
.ifi_tso_v4_mtu
= if_data_saved
.ifi_tso_v4_mtu
;
6965 ifp
->if_data
.ifi_tso_v6_mtu
= if_data_saved
.ifi_tso_v6_mtu
;
6966 ifnet_touch_lastchange(ifp
);
6968 VERIFY(ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_NORMAL
||
6969 ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_DRIVER_MANAGED
||
6970 ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_FQ_CODEL
);
6972 /* By default, use SFB and enable flow advisory */
6973 sflags
= PKTSCHEDF_QALG_SFB
;
6975 sflags
|= PKTSCHEDF_QALG_FLOWCTL
;
6978 if (if_delaybased_queue
) {
6979 sflags
|= PKTSCHEDF_QALG_DELAYBASED
;
6982 if (ifp
->if_output_sched_model
==
6983 IFNET_SCHED_MODEL_DRIVER_MANAGED
) {
6984 sflags
|= PKTSCHEDF_QALG_DRIVER_MANAGED
;
6987 /* Initialize transmit queue(s) */
6988 err
= ifclassq_setup(ifp
, sflags
, (dl_if
->dl_if_flags
& DLIF_REUSE
));
6990 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
6991 "err=%d", __func__
, ifp
, err
);
6995 /* Sanity checks on the input thread storage */
6996 dl_inp
= &dl_if
->dl_if_inpstorage
;
6997 bzero(&dl_inp
->stats
, sizeof(dl_inp
->stats
));
6998 VERIFY(dl_inp
->input_waiting
== 0);
6999 VERIFY(dl_inp
->wtot
== 0);
7000 VERIFY(dl_inp
->ifp
== NULL
);
7001 VERIFY(qhead(&dl_inp
->rcvq_pkts
) == NULL
&& qempty(&dl_inp
->rcvq_pkts
));
7002 VERIFY(qlimit(&dl_inp
->rcvq_pkts
) == 0);
7003 VERIFY(!dl_inp
->net_affinity
);
7004 VERIFY(ifp
->if_inp
== NULL
);
7005 VERIFY(dl_inp
->input_thr
== THREAD_NULL
);
7006 VERIFY(dl_inp
->wloop_thr
== THREAD_NULL
);
7007 VERIFY(dl_inp
->poll_thr
== THREAD_NULL
);
7008 VERIFY(dl_inp
->tag
== 0);
7010 #if IFNET_INPUT_SANITY_CHK
7011 VERIFY(dl_inp
->input_mbuf_cnt
== 0);
7012 #endif /* IFNET_INPUT_SANITY_CHK */
7014 VERIFY(ifp
->if_poll_thread
== THREAD_NULL
);
7015 dlil_reset_rxpoll_params(ifp
);
7017 * A specific DLIL input thread is created per non-loopback interface.
7019 if (ifp
->if_family
!= IFNET_FAMILY_LOOPBACK
) {
7020 ifp
->if_inp
= dl_inp
;
7021 ifnet_incr_pending_thread_count(ifp
);
7022 err
= dlil_create_input_thread(ifp
, ifp
->if_inp
);
7024 panic_plain("%s: ifp=%p couldn't get an input thread; "
7025 "err=%d", __func__
, ifp
, err
);
7030 * If the driver supports the new transmit model, calculate flow hash
7031 * and create a workloop starter thread to invoke the if_start callback
7032 * where the packets may be dequeued and transmitted.
7034 if (ifp
->if_eflags
& IFEF_TXSTART
) {
7035 ifp
->if_flowhash
= ifnet_calc_flowhash(ifp
);
7036 VERIFY(ifp
->if_flowhash
!= 0);
7037 VERIFY(ifp
->if_start_thread
== THREAD_NULL
);
7039 ifnet_set_start_cycle(ifp
, NULL
);
7040 ifp
->if_start_active
= 0;
7041 ifp
->if_start_req
= 0;
7042 ifp
->if_start_flags
= 0;
7043 VERIFY(ifp
->if_start
!= NULL
);
7044 ifnet_incr_pending_thread_count(ifp
);
7045 if ((err
= kernel_thread_start(ifnet_start_thread_func
,
7046 ifp
, &ifp
->if_start_thread
)) != KERN_SUCCESS
) {
7048 "ifp=%p couldn't get a start thread; "
7049 "err=%d", __func__
, ifp
, err
);
7052 ml_thread_policy(ifp
->if_start_thread
, MACHINE_GROUP
,
7053 (MACHINE_NETWORK_GROUP
| MACHINE_NETWORK_WORKLOOP
));
7055 ifp
->if_flowhash
= 0;
7058 /* Reset polling parameters */
7059 ifnet_set_poll_cycle(ifp
, NULL
);
7060 ifp
->if_poll_update
= 0;
7061 ifp
->if_poll_flags
= 0;
7062 ifp
->if_poll_req
= 0;
7063 VERIFY(ifp
->if_poll_thread
== THREAD_NULL
);
7066 * If the driver supports the new receive model, create a poller
7067 * thread to invoke if_input_poll callback where the packets may
7068 * be dequeued from the driver and processed for reception.
7069 * if the interface is netif compat then the poller thread is managed by netif.
7071 if (net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
) &&
7072 (ifp
->if_xflags
& IFXF_LEGACY
)) {
7073 VERIFY(ifp
->if_input_poll
!= NULL
);
7074 VERIFY(ifp
->if_input_ctl
!= NULL
);
7075 ifnet_incr_pending_thread_count(ifp
);
7076 if ((err
= kernel_thread_start(ifnet_poll_thread_func
, ifp
,
7077 &ifp
->if_poll_thread
)) != KERN_SUCCESS
) {
7078 panic_plain("%s: ifp=%p couldn't get a poll thread; "
7079 "err=%d", __func__
, ifp
, err
);
7082 ml_thread_policy(ifp
->if_poll_thread
, MACHINE_GROUP
,
7083 (MACHINE_NETWORK_GROUP
| MACHINE_NETWORK_WORKLOOP
));
7086 VERIFY(ifp
->if_desc
.ifd_maxlen
== IF_DESCSIZE
);
7087 VERIFY(ifp
->if_desc
.ifd_len
== 0);
7088 VERIFY(ifp
->if_desc
.ifd_desc
!= NULL
);
7090 /* Record attach PC stacktrace */
7091 ctrace_record(&((struct dlil_ifnet
*)ifp
)->dl_if_attach
);
7093 ifp
->if_updatemcasts
= 0;
7094 if (!LIST_EMPTY(&ifp
->if_multiaddrs
)) {
7095 struct ifmultiaddr
*ifma
;
7096 LIST_FOREACH(ifma
, &ifp
->if_multiaddrs
, ifma_link
) {
7098 if (ifma
->ifma_addr
->sa_family
== AF_LINK
||
7099 ifma
->ifma_addr
->sa_family
== AF_UNSPEC
) {
7100 ifp
->if_updatemcasts
++;
7105 DLIL_PRINTF("%s: attached with %d suspended link-layer multicast "
7106 "membership(s)\n", if_name(ifp
),
7107 ifp
->if_updatemcasts
);
7110 /* Clear logging parameters */
7111 bzero(&ifp
->if_log
, sizeof(ifp
->if_log
));
7113 /* Clear foreground/realtime activity timestamps */
7114 ifp
->if_fg_sendts
= 0;
7115 ifp
->if_rt_sendts
= 0;
7117 VERIFY(ifp
->if_delegated
.ifp
== NULL
);
7118 VERIFY(ifp
->if_delegated
.type
== 0);
7119 VERIFY(ifp
->if_delegated
.family
== 0);
7120 VERIFY(ifp
->if_delegated
.subfamily
== 0);
7121 VERIFY(ifp
->if_delegated
.expensive
== 0);
7122 VERIFY(ifp
->if_delegated
.constrained
== 0);
7124 VERIFY(ifp
->if_agentids
== NULL
);
7125 VERIFY(ifp
->if_agentcount
== 0);
7127 /* Reset interface state */
7128 bzero(&ifp
->if_interface_state
, sizeof(ifp
->if_interface_state
));
7129 ifp
->if_interface_state
.valid_bitmask
|=
7130 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
7131 ifp
->if_interface_state
.interface_availability
=
7132 IF_INTERFACE_STATE_INTERFACE_AVAILABLE
;
7134 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
7135 if (ifp
== lo_ifp
) {
7136 ifp
->if_interface_state
.lqm_state
= IFNET_LQM_THRESH_GOOD
;
7137 ifp
->if_interface_state
.valid_bitmask
|=
7138 IF_INTERFACE_STATE_LQM_STATE_VALID
;
7140 ifp
->if_interface_state
.lqm_state
= IFNET_LQM_THRESH_UNKNOWN
;
7144 * Enable ECN capability on this interface depending on the
7145 * value of ECN global setting
7147 if (tcp_ecn_outbound
== 2 && !IFNET_IS_CELLULAR(ifp
)) {
7148 ifp
->if_eflags
|= IFEF_ECN_ENABLE
;
7149 ifp
->if_eflags
&= ~IFEF_ECN_DISABLE
;
7153 * Built-in Cyclops always on policy for WiFi infra
7155 if (IFNET_IS_WIFI_INFRA(ifp
) && net_qos_policy_wifi_enabled
!= 0) {
7158 error
= if_set_qosmarking_mode(ifp
,
7159 IFRTYPE_QOSMARKING_FASTLANE
);
7161 DLIL_PRINTF("%s if_set_qosmarking_mode(%s) error %d\n",
7162 __func__
, ifp
->if_xname
, error
);
7164 ifp
->if_eflags
|= IFEF_QOSMARKING_ENABLED
;
7165 #if (DEVELOPMENT || DEBUG)
7166 DLIL_PRINTF("%s fastlane enabled on %s\n",
7167 __func__
, ifp
->if_xname
);
7168 #endif /* (DEVELOPMENT || DEBUG) */
7172 ifnet_lock_done(ifp
);
7176 lck_mtx_lock(&ifp
->if_cached_route_lock
);
7177 /* Enable forwarding cached route */
7178 ifp
->if_fwd_cacheok
= 1;
7179 /* Clean up any existing cached routes */
7180 ROUTE_RELEASE(&ifp
->if_fwd_route
);
7181 bzero(&ifp
->if_fwd_route
, sizeof(ifp
->if_fwd_route
));
7182 ROUTE_RELEASE(&ifp
->if_src_route
);
7183 bzero(&ifp
->if_src_route
, sizeof(ifp
->if_src_route
));
7184 ROUTE_RELEASE(&ifp
->if_src_route6
);
7185 bzero(&ifp
->if_src_route6
, sizeof(ifp
->if_src_route6
));
7186 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
7188 ifnet_llreach_ifattach(ifp
, (dl_if
->dl_if_flags
& DLIF_REUSE
));
7191 * Allocate and attach IGMPv3/MLDv2 interface specific variables
7192 * and trees; do this before the ifnet is marked as attached.
7193 * The ifnet keeps the reference to the info structures even after
7194 * the ifnet is detached, since the network-layer records still
7195 * refer to the info structures even after that. This also
7196 * makes it possible for them to still function after the ifnet
7197 * is recycled or reattached.
7200 if (IGMP_IFINFO(ifp
) == NULL
) {
7201 IGMP_IFINFO(ifp
) = igmp_domifattach(ifp
, M_WAITOK
);
7202 VERIFY(IGMP_IFINFO(ifp
) != NULL
);
7204 VERIFY(IGMP_IFINFO(ifp
)->igi_ifp
== ifp
);
7205 igmp_domifreattach(IGMP_IFINFO(ifp
));
7209 if (MLD_IFINFO(ifp
) == NULL
) {
7210 MLD_IFINFO(ifp
) = mld_domifattach(ifp
, M_WAITOK
);
7211 VERIFY(MLD_IFINFO(ifp
) != NULL
);
7213 VERIFY(MLD_IFINFO(ifp
)->mli_ifp
== ifp
);
7214 mld_domifreattach(MLD_IFINFO(ifp
));
7218 VERIFY(ifp
->if_data_threshold
== 0);
7219 VERIFY(ifp
->if_dt_tcall
!= NULL
);
7222 * Wait for the created kernel threads for I/O to get
7223 * scheduled and run at least once before we proceed
7224 * to mark interface as attached.
7226 lck_mtx_lock(&ifp
->if_ref_lock
);
7227 while (ifp
->if_threads_pending
!= 0) {
7228 DLIL_PRINTF("%s: Waiting for all kernel threads created for "
7229 "interface %s to get scheduled at least once.\n",
7230 __func__
, ifp
->if_xname
);
7231 (void) msleep(&ifp
->if_threads_pending
, &ifp
->if_ref_lock
, (PZERO
- 1),
7233 LCK_MTX_ASSERT(&ifp
->if_ref_lock
, LCK_ASSERT_OWNED
);
7235 lck_mtx_unlock(&ifp
->if_ref_lock
);
7236 DLIL_PRINTF("%s: All kernel threads created for interface %s have been scheduled "
7237 "at least once. Proceeding.\n", __func__
, ifp
->if_xname
);
7239 /* Final mark this ifnet as attached. */
7240 lck_mtx_lock(rnh_lock
);
7241 ifnet_lock_exclusive(ifp
);
7242 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
7243 ifp
->if_refflags
= (IFRF_ATTACHED
| IFRF_READY
); /* clears embryonic */
7244 lck_mtx_unlock(&ifp
->if_ref_lock
);
7246 /* boot-args override; enable idle notification */
7247 (void) ifnet_set_idle_flags_locked(ifp
, IFRF_IDLE_NOTIFY
,
7250 /* apply previous request(s) to set the idle flags, if any */
7251 (void) ifnet_set_idle_flags_locked(ifp
, ifp
->if_idle_new_flags
,
7252 ifp
->if_idle_new_flags_mask
);
7254 ifnet_lock_done(ifp
);
7255 lck_mtx_unlock(rnh_lock
);
7260 * Attach packet filter to this interface, if enabled.
7262 pf_ifnet_hook(ifp
, 1);
7265 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_ATTACHED
, NULL
, 0);
7268 DLIL_PRINTF("%s: attached%s\n", if_name(ifp
),
7269 (dl_if
->dl_if_flags
& DLIF_REUSE
) ? " (recycled)" : "");
7276 * Prepare the storage for the first/permanent link address, which must
7277 * must have the same lifetime as the ifnet itself. Although the link
7278 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
7279 * its location in memory must never change as it may still be referred
7280 * to by some parts of the system afterwards (unfortunate implementation
7281 * artifacts inherited from BSD.)
7283 * Caller must hold ifnet lock as writer.
7285 static struct ifaddr
*
7286 dlil_alloc_lladdr(struct ifnet
*ifp
, const struct sockaddr_dl
*ll_addr
)
7288 struct ifaddr
*ifa
, *oifa
;
7289 struct sockaddr_dl
*asdl
, *msdl
;
7290 char workbuf
[IFNAMSIZ
* 2];
7291 int namelen
, masklen
, socksize
;
7292 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
7294 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_EXCLUSIVE
);
7295 VERIFY(ll_addr
== NULL
|| ll_addr
->sdl_alen
== ifp
->if_addrlen
);
7297 namelen
= scnprintf(workbuf
, sizeof(workbuf
), "%s",
7299 masklen
= offsetof(struct sockaddr_dl
, sdl_data
[0])
7300 + ((namelen
> 0) ? namelen
: 0);
7301 socksize
= masklen
+ ifp
->if_addrlen
;
7302 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
7303 if ((u_int32_t
)socksize
< sizeof(struct sockaddr_dl
)) {
7304 socksize
= sizeof(struct sockaddr_dl
);
7306 socksize
= ROUNDUP(socksize
);
7309 ifa
= ifp
->if_lladdr
;
7310 if (socksize
> DLIL_SDLMAXLEN
||
7311 (ifa
!= NULL
&& ifa
!= &dl_if
->dl_if_lladdr
.ifa
)) {
7313 * Rare, but in the event that the link address requires
7314 * more storage space than DLIL_SDLMAXLEN, allocate the
7315 * largest possible storages for address and mask, such
7316 * that we can reuse the same space when if_addrlen grows.
7317 * This same space will be used when if_addrlen shrinks.
7319 if (ifa
== NULL
|| ifa
== &dl_if
->dl_if_lladdr
.ifa
) {
7320 int ifasize
= sizeof(*ifa
) + 2 * SOCK_MAXADDRLEN
;
7321 ifa
= _MALLOC(ifasize
, M_IFADDR
, M_WAITOK
| M_ZERO
);
7326 /* Don't set IFD_ALLOC, as this is permanent */
7327 ifa
->ifa_debug
= IFD_LINK
;
7330 /* address and mask sockaddr_dl locations */
7331 asdl
= (struct sockaddr_dl
*)(ifa
+ 1);
7332 bzero(asdl
, SOCK_MAXADDRLEN
);
7333 msdl
= (struct sockaddr_dl
*)(void *)
7334 ((char *)asdl
+ SOCK_MAXADDRLEN
);
7335 bzero(msdl
, SOCK_MAXADDRLEN
);
7337 VERIFY(ifa
== NULL
|| ifa
== &dl_if
->dl_if_lladdr
.ifa
);
7339 * Use the storage areas for address and mask within the
7340 * dlil_ifnet structure. This is the most common case.
7343 ifa
= &dl_if
->dl_if_lladdr
.ifa
;
7345 /* Don't set IFD_ALLOC, as this is permanent */
7346 ifa
->ifa_debug
= IFD_LINK
;
7349 /* address and mask sockaddr_dl locations */
7350 asdl
= (struct sockaddr_dl
*)(void *)&dl_if
->dl_if_lladdr
.asdl
;
7351 bzero(asdl
, sizeof(dl_if
->dl_if_lladdr
.asdl
));
7352 msdl
= (struct sockaddr_dl
*)(void *)&dl_if
->dl_if_lladdr
.msdl
;
7353 bzero(msdl
, sizeof(dl_if
->dl_if_lladdr
.msdl
));
7356 /* hold a permanent reference for the ifnet itself */
7357 IFA_ADDREF_LOCKED(ifa
);
7358 oifa
= ifp
->if_lladdr
;
7359 ifp
->if_lladdr
= ifa
;
7361 VERIFY(ifa
->ifa_debug
== IFD_LINK
);
7363 ifa
->ifa_rtrequest
= link_rtrequest
;
7364 ifa
->ifa_addr
= (struct sockaddr
*)asdl
;
7365 asdl
->sdl_len
= socksize
;
7366 asdl
->sdl_family
= AF_LINK
;
7368 bcopy(workbuf
, asdl
->sdl_data
, min(namelen
,
7369 sizeof(asdl
->sdl_data
)));
7370 asdl
->sdl_nlen
= namelen
;
7374 asdl
->sdl_index
= ifp
->if_index
;
7375 asdl
->sdl_type
= ifp
->if_type
;
7376 if (ll_addr
!= NULL
) {
7377 asdl
->sdl_alen
= ll_addr
->sdl_alen
;
7378 bcopy(CONST_LLADDR(ll_addr
), LLADDR(asdl
), asdl
->sdl_alen
);
7382 ifa
->ifa_netmask
= (struct sockaddr
*)msdl
;
7383 msdl
->sdl_len
= masklen
;
7384 while (namelen
> 0) {
7385 msdl
->sdl_data
[--namelen
] = 0xff;
7397 if_purgeaddrs(struct ifnet
*ifp
)
7403 in6_purgeaddrs(ifp
);
7408 ifnet_detach(ifnet_t ifp
)
7410 struct ifnet
*delegated_ifp
;
7411 struct nd_ifinfo
*ndi
= NULL
;
7417 ndi
= ND_IFINFO(ifp
);
7419 ndi
->cga_initialized
= FALSE
;
7422 lck_mtx_lock(rnh_lock
);
7423 ifnet_head_lock_exclusive();
7424 ifnet_lock_exclusive(ifp
);
7426 if (ifp
->if_output_netem
!= NULL
) {
7427 netem_destroy(ifp
->if_output_netem
);
7428 ifp
->if_output_netem
= NULL
;
7432 * Check to see if this interface has previously triggered
7433 * aggressive protocol draining; if so, decrement the global
7434 * refcnt and clear PR_AGGDRAIN on the route domain if
7435 * there are no more of such an interface around.
7437 (void) ifnet_set_idle_flags_locked(ifp
, 0, ~0);
7439 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
7440 if (!(ifp
->if_refflags
& IFRF_ATTACHED
)) {
7441 lck_mtx_unlock(&ifp
->if_ref_lock
);
7442 ifnet_lock_done(ifp
);
7444 lck_mtx_unlock(rnh_lock
);
7446 } else if (ifp
->if_refflags
& IFRF_DETACHING
) {
7447 /* Interface has already been detached */
7448 lck_mtx_unlock(&ifp
->if_ref_lock
);
7449 ifnet_lock_done(ifp
);
7451 lck_mtx_unlock(rnh_lock
);
7454 VERIFY(!(ifp
->if_refflags
& IFRF_EMBRYONIC
));
7455 /* Indicate this interface is being detached */
7456 ifp
->if_refflags
&= ~IFRF_ATTACHED
;
7457 ifp
->if_refflags
|= IFRF_DETACHING
;
7458 lck_mtx_unlock(&ifp
->if_ref_lock
);
7461 DLIL_PRINTF("%s: detaching\n", if_name(ifp
));
7464 /* clean up flow control entry object if there's any */
7465 if (ifp
->if_eflags
& IFEF_TXSTART
) {
7466 ifnet_flowadv(ifp
->if_flowhash
);
7469 /* Reset ECN enable/disable flags */
7470 ifp
->if_eflags
&= ~IFEF_ECN_DISABLE
;
7471 ifp
->if_eflags
&= ~IFEF_ECN_ENABLE
;
7473 /* Reset CLAT46 flag */
7474 ifp
->if_eflags
&= ~IFEF_CLAT46
;
7477 * We do not reset the TCP keep alive counters in case
7478 * a TCP connection stays connection after the interface
7481 if (ifp
->if_tcp_kao_cnt
> 0) {
7482 os_log(OS_LOG_DEFAULT
, "%s %s tcp_kao_cnt %u not zero",
7483 __func__
, if_name(ifp
), ifp
->if_tcp_kao_cnt
);
7485 ifp
->if_tcp_kao_max
= 0;
7488 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
7489 * no longer be visible during lookups from this point.
7491 VERIFY(ifindex2ifnet
[ifp
->if_index
] == ifp
);
7492 TAILQ_REMOVE(&ifnet_head
, ifp
, if_link
);
7493 ifp
->if_link
.tqe_next
= NULL
;
7494 ifp
->if_link
.tqe_prev
= NULL
;
7495 if (ifp
->if_ordered_link
.tqe_next
!= NULL
||
7496 ifp
->if_ordered_link
.tqe_prev
!= NULL
) {
7497 ifnet_remove_from_ordered_list(ifp
);
7499 ifindex2ifnet
[ifp
->if_index
] = NULL
;
7501 /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */
7502 ifp
->if_eflags
&= ~(IFEF_IPV4_ROUTER
| IFEF_IPV6_ROUTER
);
7504 /* Record detach PC stacktrace */
7505 ctrace_record(&((struct dlil_ifnet
*)ifp
)->dl_if_detach
);
7507 /* Clear logging parameters */
7508 bzero(&ifp
->if_log
, sizeof(ifp
->if_log
));
7510 /* Clear delegated interface info (reference released below) */
7511 delegated_ifp
= ifp
->if_delegated
.ifp
;
7512 bzero(&ifp
->if_delegated
, sizeof(ifp
->if_delegated
));
7514 /* Reset interface state */
7515 bzero(&ifp
->if_interface_state
, sizeof(ifp
->if_interface_state
));
7517 ifnet_lock_done(ifp
);
7519 lck_mtx_unlock(rnh_lock
);
7522 /* Release reference held on the delegated interface */
7523 if (delegated_ifp
!= NULL
) {
7524 ifnet_release(delegated_ifp
);
7527 /* Reset Link Quality Metric (unless loopback [lo0]) */
7528 if (ifp
!= lo_ifp
) {
7529 if_lqm_update(ifp
, IFNET_LQM_THRESH_OFF
, 0);
7532 /* Reset TCP local statistics */
7533 if (ifp
->if_tcp_stat
!= NULL
) {
7534 bzero(ifp
->if_tcp_stat
, sizeof(*ifp
->if_tcp_stat
));
7537 /* Reset UDP local statistics */
7538 if (ifp
->if_udp_stat
!= NULL
) {
7539 bzero(ifp
->if_udp_stat
, sizeof(*ifp
->if_udp_stat
));
7542 /* Reset ifnet IPv4 stats */
7543 if (ifp
->if_ipv4_stat
!= NULL
) {
7544 bzero(ifp
->if_ipv4_stat
, sizeof(*ifp
->if_ipv4_stat
));
7547 /* Reset ifnet IPv6 stats */
7548 if (ifp
->if_ipv6_stat
!= NULL
) {
7549 bzero(ifp
->if_ipv6_stat
, sizeof(*ifp
->if_ipv6_stat
));
7552 /* Release memory held for interface link status report */
7553 if (ifp
->if_link_status
!= NULL
) {
7554 FREE(ifp
->if_link_status
, M_TEMP
);
7555 ifp
->if_link_status
= NULL
;
7558 /* Clear agent IDs */
7559 if (ifp
->if_agentids
!= NULL
) {
7560 FREE(ifp
->if_agentids
, M_NETAGENT
);
7561 ifp
->if_agentids
= NULL
;
7563 ifp
->if_agentcount
= 0;
7566 /* Let BPF know we're detaching */
7569 /* Mark the interface as DOWN */
7572 /* Disable forwarding cached route */
7573 lck_mtx_lock(&ifp
->if_cached_route_lock
);
7574 ifp
->if_fwd_cacheok
= 0;
7575 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
7577 /* Disable data threshold and wait for any pending event posting */
7578 ifp
->if_data_threshold
= 0;
7579 VERIFY(ifp
->if_dt_tcall
!= NULL
);
7580 (void) thread_call_cancel_wait(ifp
->if_dt_tcall
);
7583 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
7584 * references to the info structures and leave them attached to
7588 igmp_domifdetach(ifp
);
7591 mld_domifdetach(ifp
);
7594 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHING
, NULL
, 0);
7596 /* Let worker thread take care of the rest, to avoid reentrancy */
7598 ifnet_detaching_enqueue(ifp
);
7605 ifnet_detaching_enqueue(struct ifnet
*ifp
)
7607 dlil_if_lock_assert();
7609 ++ifnet_detaching_cnt
;
7610 VERIFY(ifnet_detaching_cnt
!= 0);
7611 TAILQ_INSERT_TAIL(&ifnet_detaching_head
, ifp
, if_detaching_link
);
7612 wakeup((caddr_t
)&ifnet_delayed_run
);
7615 static struct ifnet
*
7616 ifnet_detaching_dequeue(void)
7620 dlil_if_lock_assert();
7622 ifp
= TAILQ_FIRST(&ifnet_detaching_head
);
7623 VERIFY(ifnet_detaching_cnt
!= 0 || ifp
== NULL
);
7625 VERIFY(ifnet_detaching_cnt
!= 0);
7626 --ifnet_detaching_cnt
;
7627 TAILQ_REMOVE(&ifnet_detaching_head
, ifp
, if_detaching_link
);
7628 ifp
->if_detaching_link
.tqe_next
= NULL
;
7629 ifp
->if_detaching_link
.tqe_prev
= NULL
;
7635 ifnet_detacher_thread_cont(int err
)
7641 dlil_if_lock_assert();
7642 while (ifnet_detaching_cnt
== 0) {
7643 (void) msleep0(&ifnet_delayed_run
, &dlil_ifnet_lock
,
7644 (PZERO
- 1), "ifnet_detacher_cont", 0,
7645 ifnet_detacher_thread_cont
);
7649 net_update_uptime();
7651 VERIFY(TAILQ_FIRST(&ifnet_detaching_head
) != NULL
);
7653 /* Take care of detaching ifnet */
7654 ifp
= ifnet_detaching_dequeue();
7657 ifnet_detach_final(ifp
);
7665 ifnet_detacher_thread_func(void *v
, wait_result_t w
)
7667 #pragma unused(v, w)
7668 dlil_decr_pending_thread_count();
7670 (void) msleep0(&ifnet_delayed_run
, &dlil_ifnet_lock
,
7671 (PZERO
- 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont
);
7673 * msleep0() shouldn't have returned as PCATCH was not set;
7674 * therefore assert in this case.
7681 ifnet_detach_final(struct ifnet
*ifp
)
7683 struct ifnet_filter
*filter
, *filter_next
;
7684 struct ifnet_filter_head fhead
;
7685 struct dlil_threading_info
*inp
;
7687 ifnet_detached_func if_free
;
7690 lck_mtx_lock(&ifp
->if_ref_lock
);
7691 if (!(ifp
->if_refflags
& IFRF_DETACHING
)) {
7692 panic("%s: flags mismatch (detaching not set) ifp=%p",
7698 * Wait until the existing IO references get released
7699 * before we proceed with ifnet_detach. This is not a
7700 * common case, so block without using a continuation.
7702 while (ifp
->if_refio
> 0) {
7703 DLIL_PRINTF("%s: Waiting for IO references on %s interface "
7704 "to be released\n", __func__
, if_name(ifp
));
7705 (void) msleep(&(ifp
->if_refio
), &ifp
->if_ref_lock
,
7706 (PZERO
- 1), "ifnet_ioref_wait", NULL
);
7709 VERIFY(ifp
->if_datamov
== 0);
7710 VERIFY(ifp
->if_drainers
== 0);
7711 VERIFY(ifp
->if_suspend
== 0);
7712 ifp
->if_refflags
&= ~IFRF_READY
;
7713 lck_mtx_unlock(&ifp
->if_ref_lock
);
7715 /* Drain and destroy send queue */
7716 ifclassq_teardown(ifp
);
7718 /* Detach interface filters */
7719 lck_mtx_lock(&ifp
->if_flt_lock
);
7720 if_flt_monitor_enter(ifp
);
7722 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
7723 fhead
= ifp
->if_flt_head
;
7724 TAILQ_INIT(&ifp
->if_flt_head
);
7726 for (filter
= TAILQ_FIRST(&fhead
); filter
; filter
= filter_next
) {
7727 filter_next
= TAILQ_NEXT(filter
, filt_next
);
7728 lck_mtx_unlock(&ifp
->if_flt_lock
);
7730 dlil_detach_filter_internal(filter
, 1);
7731 lck_mtx_lock(&ifp
->if_flt_lock
);
7733 if_flt_monitor_leave(ifp
);
7734 lck_mtx_unlock(&ifp
->if_flt_lock
);
7736 /* Tell upper layers to drop their network addresses */
7739 ifnet_lock_exclusive(ifp
);
7741 /* Uplumb all protocols */
7742 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
7743 struct if_proto
*proto
;
7745 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
7746 while (proto
!= NULL
) {
7747 protocol_family_t family
= proto
->protocol_family
;
7748 ifnet_lock_done(ifp
);
7749 proto_unplumb(family
, ifp
);
7750 ifnet_lock_exclusive(ifp
);
7751 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
7753 /* There should not be any protocols left */
7754 VERIFY(SLIST_EMPTY(&ifp
->if_proto_hash
[i
]));
7756 zfree(dlif_phash_zone
, ifp
->if_proto_hash
);
7757 ifp
->if_proto_hash
= NULL
;
7759 /* Detach (permanent) link address from if_addrhead */
7760 ifa
= TAILQ_FIRST(&ifp
->if_addrhead
);
7761 VERIFY(ifnet_addrs
[ifp
->if_index
- 1] == ifa
);
7763 if_detach_link_ifa(ifp
, ifa
);
7766 /* Remove (permanent) link address from ifnet_addrs[] */
7768 ifnet_addrs
[ifp
->if_index
- 1] = NULL
;
7770 /* This interface should not be on {ifnet_head,detaching} */
7771 VERIFY(ifp
->if_link
.tqe_next
== NULL
);
7772 VERIFY(ifp
->if_link
.tqe_prev
== NULL
);
7773 VERIFY(ifp
->if_detaching_link
.tqe_next
== NULL
);
7774 VERIFY(ifp
->if_detaching_link
.tqe_prev
== NULL
);
7775 VERIFY(ifp
->if_ordered_link
.tqe_next
== NULL
);
7776 VERIFY(ifp
->if_ordered_link
.tqe_prev
== NULL
);
7778 /* The slot should have been emptied */
7779 VERIFY(ifindex2ifnet
[ifp
->if_index
] == NULL
);
7781 /* There should not be any addresses left */
7782 VERIFY(TAILQ_EMPTY(&ifp
->if_addrhead
));
7785 * Signal the starter thread to terminate itself.
7787 if (ifp
->if_start_thread
!= THREAD_NULL
) {
7788 lck_mtx_lock_spin(&ifp
->if_start_lock
);
7789 ifp
->if_start_flags
= 0;
7790 ifp
->if_start_thread
= THREAD_NULL
;
7791 wakeup_one((caddr_t
)&ifp
->if_start_thread
);
7792 lck_mtx_unlock(&ifp
->if_start_lock
);
7796 * Signal the poller thread to terminate itself.
7798 if (ifp
->if_poll_thread
!= THREAD_NULL
) {
7799 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
7800 ifp
->if_poll_thread
= THREAD_NULL
;
7801 wakeup_one((caddr_t
)&ifp
->if_poll_thread
);
7802 lck_mtx_unlock(&ifp
->if_poll_lock
);
7806 * If thread affinity was set for the workloop thread, we will need
7807 * to tear down the affinity and release the extra reference count
7808 * taken at attach time. Does not apply to lo0 or other interfaces
7809 * without dedicated input threads.
7811 if ((inp
= ifp
->if_inp
) != NULL
) {
7812 VERIFY(inp
!= dlil_main_input_thread
);
7814 if (inp
->net_affinity
) {
7815 struct thread
*tp
, *wtp
, *ptp
;
7817 lck_mtx_lock_spin(&inp
->input_lck
);
7818 wtp
= inp
->wloop_thr
;
7819 inp
->wloop_thr
= THREAD_NULL
;
7820 ptp
= inp
->poll_thr
;
7821 inp
->poll_thr
= THREAD_NULL
;
7822 tp
= inp
->input_thr
; /* don't nullify now */
7824 inp
->net_affinity
= FALSE
;
7825 lck_mtx_unlock(&inp
->input_lck
);
7827 /* Tear down poll thread affinity */
7829 VERIFY(ifp
->if_eflags
& IFEF_RXPOLL
);
7830 VERIFY(ifp
->if_xflags
& IFXF_LEGACY
);
7831 (void) dlil_affinity_set(ptp
,
7832 THREAD_AFFINITY_TAG_NULL
);
7833 thread_deallocate(ptp
);
7836 /* Tear down workloop thread affinity */
7838 (void) dlil_affinity_set(wtp
,
7839 THREAD_AFFINITY_TAG_NULL
);
7840 thread_deallocate(wtp
);
7843 /* Tear down DLIL input thread affinity */
7844 (void) dlil_affinity_set(tp
, THREAD_AFFINITY_TAG_NULL
);
7845 thread_deallocate(tp
);
7848 /* disassociate ifp DLIL input thread */
7851 /* tell the input thread to terminate */
7852 lck_mtx_lock_spin(&inp
->input_lck
);
7853 inp
->input_waiting
|= DLIL_INPUT_TERMINATE
;
7854 if (!(inp
->input_waiting
& DLIL_INPUT_RUNNING
)) {
7855 wakeup_one((caddr_t
)&inp
->input_waiting
);
7857 lck_mtx_unlock(&inp
->input_lck
);
7858 ifnet_lock_done(ifp
);
7860 /* wait for the input thread to terminate */
7861 lck_mtx_lock_spin(&inp
->input_lck
);
7862 while ((inp
->input_waiting
& DLIL_INPUT_TERMINATE_COMPLETE
)
7864 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
7865 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
7867 lck_mtx_unlock(&inp
->input_lck
);
7868 ifnet_lock_exclusive(ifp
);
7870 /* clean-up input thread state */
7871 dlil_clean_threading_info(inp
);
7872 /* clean-up poll parameters */
7873 VERIFY(ifp
->if_poll_thread
== THREAD_NULL
);
7874 dlil_reset_rxpoll_params(ifp
);
7877 /* The driver might unload, so point these to ourselves */
7878 if_free
= ifp
->if_free
;
7879 ifp
->if_output_dlil
= ifp_if_output
;
7880 ifp
->if_output
= ifp_if_output
;
7881 ifp
->if_pre_enqueue
= ifp_if_output
;
7882 ifp
->if_start
= ifp_if_start
;
7883 ifp
->if_output_ctl
= ifp_if_ctl
;
7884 ifp
->if_input_dlil
= ifp_if_input
;
7885 ifp
->if_input_poll
= ifp_if_input_poll
;
7886 ifp
->if_input_ctl
= ifp_if_ctl
;
7887 ifp
->if_ioctl
= ifp_if_ioctl
;
7888 ifp
->if_set_bpf_tap
= ifp_if_set_bpf_tap
;
7889 ifp
->if_free
= ifp_if_free
;
7890 ifp
->if_demux
= ifp_if_demux
;
7891 ifp
->if_event
= ifp_if_event
;
7892 ifp
->if_framer_legacy
= ifp_if_framer
;
7893 ifp
->if_framer
= ifp_if_framer_extended
;
7894 ifp
->if_add_proto
= ifp_if_add_proto
;
7895 ifp
->if_del_proto
= ifp_if_del_proto
;
7896 ifp
->if_check_multi
= ifp_if_check_multi
;
7898 /* wipe out interface description */
7899 VERIFY(ifp
->if_desc
.ifd_maxlen
== IF_DESCSIZE
);
7900 ifp
->if_desc
.ifd_len
= 0;
7901 VERIFY(ifp
->if_desc
.ifd_desc
!= NULL
);
7902 bzero(ifp
->if_desc
.ifd_desc
, IF_DESCSIZE
);
7904 /* there shouldn't be any delegation by now */
7905 VERIFY(ifp
->if_delegated
.ifp
== NULL
);
7906 VERIFY(ifp
->if_delegated
.type
== 0);
7907 VERIFY(ifp
->if_delegated
.family
== 0);
7908 VERIFY(ifp
->if_delegated
.subfamily
== 0);
7909 VERIFY(ifp
->if_delegated
.expensive
== 0);
7910 VERIFY(ifp
->if_delegated
.constrained
== 0);
7912 /* QoS marking get cleared */
7913 ifp
->if_eflags
&= ~IFEF_QOSMARKING_ENABLED
;
7914 if_set_qosmarking_mode(ifp
, IFRTYPE_QOSMARKING_MODE_NONE
);
7917 ifnet_lock_done(ifp
);
7921 * Detach this interface from packet filter, if enabled.
7923 pf_ifnet_hook(ifp
, 0);
7926 /* Filter list should be empty */
7927 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
7928 VERIFY(TAILQ_EMPTY(&ifp
->if_flt_head
));
7929 VERIFY(ifp
->if_flt_busy
== 0);
7930 VERIFY(ifp
->if_flt_waiters
== 0);
7931 lck_mtx_unlock(&ifp
->if_flt_lock
);
7933 /* Last chance to drain send queue */
7936 /* Last chance to cleanup any cached route */
7937 lck_mtx_lock(&ifp
->if_cached_route_lock
);
7938 VERIFY(!ifp
->if_fwd_cacheok
);
7939 ROUTE_RELEASE(&ifp
->if_fwd_route
);
7940 bzero(&ifp
->if_fwd_route
, sizeof(ifp
->if_fwd_route
));
7941 ROUTE_RELEASE(&ifp
->if_src_route
);
7942 bzero(&ifp
->if_src_route
, sizeof(ifp
->if_src_route
));
7943 ROUTE_RELEASE(&ifp
->if_src_route6
);
7944 bzero(&ifp
->if_src_route6
, sizeof(ifp
->if_src_route6
));
7945 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
7947 VERIFY(ifp
->if_data_threshold
== 0);
7948 VERIFY(ifp
->if_dt_tcall
!= NULL
);
7949 VERIFY(!thread_call_isactive(ifp
->if_dt_tcall
));
7951 ifnet_llreach_ifdetach(ifp
);
7953 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHED
, NULL
, 0);
7956 * Finally, mark this ifnet as detached.
7958 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
7959 if (!(ifp
->if_refflags
& IFRF_DETACHING
)) {
7960 panic("%s: flags mismatch (detaching not set) ifp=%p",
7964 ifp
->if_refflags
&= ~IFRF_DETACHING
;
7965 lck_mtx_unlock(&ifp
->if_ref_lock
);
7966 if (if_free
!= NULL
) {
7971 DLIL_PRINTF("%s: detached\n", if_name(ifp
));
7974 /* Release reference held during ifnet attach */
7979 ifp_if_output(struct ifnet
*ifp
, struct mbuf
*m
)
7987 ifp_if_start(struct ifnet
*ifp
)
7993 ifp_if_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
7994 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
7995 boolean_t poll
, struct thread
*tp
)
7997 #pragma unused(ifp, m_tail, s, poll, tp)
7998 m_freem_list(m_head
);
8003 ifp_if_input_poll(struct ifnet
*ifp
, u_int32_t flags
, u_int32_t max_cnt
,
8004 struct mbuf
**m_head
, struct mbuf
**m_tail
, u_int32_t
*cnt
, u_int32_t
*len
)
8006 #pragma unused(ifp, flags, max_cnt)
8007 if (m_head
!= NULL
) {
8010 if (m_tail
!= NULL
) {
8022 ifp_if_ctl(struct ifnet
*ifp
, ifnet_ctl_cmd_t cmd
, u_int32_t arglen
, void *arg
)
8024 #pragma unused(ifp, cmd, arglen, arg)
8029 ifp_if_demux(struct ifnet
*ifp
, struct mbuf
*m
, char *fh
, protocol_family_t
*pf
)
8031 #pragma unused(ifp, fh, pf)
8037 ifp_if_add_proto(struct ifnet
*ifp
, protocol_family_t pf
,
8038 const struct ifnet_demux_desc
*da
, u_int32_t dc
)
8040 #pragma unused(ifp, pf, da, dc)
8045 ifp_if_del_proto(struct ifnet
*ifp
, protocol_family_t pf
)
8047 #pragma unused(ifp, pf)
8052 ifp_if_check_multi(struct ifnet
*ifp
, const struct sockaddr
*sa
)
8054 #pragma unused(ifp, sa)
8060 ifp_if_framer(struct ifnet
*ifp
, struct mbuf
**m
,
8061 const struct sockaddr
*sa
, const char *ll
, const char *t
,
8062 u_int32_t
*pre
, u_int32_t
*post
)
8065 ifp_if_framer(struct ifnet
*ifp
, struct mbuf
**m
,
8066 const struct sockaddr
*sa
, const char *ll
, const char *t
)
8067 #endif /* !CONFIG_EMBEDDED */
8069 #pragma unused(ifp, m, sa, ll, t)
8071 return ifp_if_framer_extended(ifp
, m
, sa
, ll
, t
, pre
, post
);
8073 return ifp_if_framer_extended(ifp
, m
, sa
, ll
, t
, NULL
, NULL
);
8074 #endif /* !CONFIG_EMBEDDED */
8078 ifp_if_framer_extended(struct ifnet
*ifp
, struct mbuf
**m
,
8079 const struct sockaddr
*sa
, const char *ll
, const char *t
,
8080 u_int32_t
*pre
, u_int32_t
*post
)
8082 #pragma unused(ifp, sa, ll, t)
8097 ifp_if_ioctl(struct ifnet
*ifp
, unsigned long cmd
, void *arg
)
8099 #pragma unused(ifp, cmd, arg)
8104 ifp_if_set_bpf_tap(struct ifnet
*ifp
, bpf_tap_mode tm
, bpf_packet_func f
)
8106 #pragma unused(ifp, tm, f)
8107 /* XXX not sure what to do here */
8112 ifp_if_free(struct ifnet
*ifp
)
8118 ifp_if_event(struct ifnet
*ifp
, const struct kev_msg
*e
)
8120 #pragma unused(ifp, e)
8124 dlil_if_acquire(u_int32_t family
, const void *uniqueid
,
8125 size_t uniqueid_len
, const char *ifxname
, struct ifnet
**ifp
)
8127 struct ifnet
*ifp1
= NULL
;
8128 struct dlil_ifnet
*dlifp1
= NULL
;
8129 struct dlil_ifnet
*dlifp1_saved
= NULL
;
8130 void *buf
, *base
, **pbuf
;
8133 VERIFY(*ifp
== NULL
);
8136 * We absolutely can't have an interface with the same name
8138 * To make sure of that list has to be traversed completely
8140 TAILQ_FOREACH(dlifp1
, &dlil_ifnet_head
, dl_if_link
) {
8141 ifp1
= (struct ifnet
*)dlifp1
;
8143 if (ifp1
->if_family
!= family
) {
8148 * If interface is in use, return EBUSY if either unique id
8149 * or interface extended names are the same
8151 lck_mtx_lock(&dlifp1
->dl_if_lock
);
8152 if (strncmp(ifxname
, ifp1
->if_xname
, IFXNAMSIZ
) == 0) {
8153 if (dlifp1
->dl_if_flags
& DLIF_INUSE
) {
8154 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
8161 if (uniqueid_len
== dlifp1
->dl_if_uniqueid_len
&&
8162 bcmp(uniqueid
, dlifp1
->dl_if_uniqueid
, uniqueid_len
) == 0) {
8163 if (dlifp1
->dl_if_flags
& DLIF_INUSE
) {
8164 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
8168 /* Cache the first interface that can be recycled */
8171 dlifp1_saved
= dlifp1
;
8174 * XXX Do not break or jump to end as we have to traverse
8175 * the whole list to ensure there are no name collisions
8180 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
8183 /* If there's an interface that can be recycled, use that */
8185 if (dlifp1_saved
!= NULL
) {
8186 lck_mtx_lock(&dlifp1_saved
->dl_if_lock
);
8187 dlifp1_saved
->dl_if_flags
|= (DLIF_INUSE
| DLIF_REUSE
);
8188 lck_mtx_unlock(&dlifp1_saved
->dl_if_lock
);
8189 dlifp1_saved
= NULL
;
8194 /* no interface found, allocate a new one */
8195 buf
= zalloc(dlif_zone
);
8200 bzero(buf
, dlif_bufsize
);
8202 /* Get the 64-bit aligned base address for this object */
8203 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof(u_int64_t
),
8205 VERIFY(((intptr_t)base
+ dlif_size
) <= ((intptr_t)buf
+ dlif_bufsize
));
8208 * Wind back a pointer size from the aligned base and
8209 * save the original address so we can free it later.
8211 pbuf
= (void **)((intptr_t)base
- sizeof(void *));
8216 MALLOC(dlifp1
->dl_if_uniqueid
, void *, uniqueid_len
,
8218 if (dlifp1
->dl_if_uniqueid
== NULL
) {
8219 zfree(dlif_zone
, buf
);
8223 bcopy(uniqueid
, dlifp1
->dl_if_uniqueid
, uniqueid_len
);
8224 dlifp1
->dl_if_uniqueid_len
= uniqueid_len
;
8227 ifp1
= (struct ifnet
*)dlifp1
;
8228 dlifp1
->dl_if_flags
= DLIF_INUSE
;
8230 dlifp1
->dl_if_flags
|= DLIF_DEBUG
;
8231 dlifp1
->dl_if_trace
= dlil_if_trace
;
8233 ifp1
->if_name
= dlifp1
->dl_if_namestorage
;
8234 ifp1
->if_xname
= dlifp1
->dl_if_xnamestorage
;
8236 /* initialize interface description */
8237 ifp1
->if_desc
.ifd_maxlen
= IF_DESCSIZE
;
8238 ifp1
->if_desc
.ifd_len
= 0;
8239 ifp1
->if_desc
.ifd_desc
= dlifp1
->dl_if_descstorage
;
8243 mac_ifnet_label_init(ifp1
);
8246 if ((ret
= dlil_alloc_local_stats(ifp1
)) != 0) {
8247 DLIL_PRINTF("%s: failed to allocate if local stats, "
8248 "error: %d\n", __func__
, ret
);
8249 /* This probably shouldn't be fatal */
8253 lck_mtx_init(&dlifp1
->dl_if_lock
, ifnet_lock_group
, ifnet_lock_attr
);
8254 lck_rw_init(&ifp1
->if_lock
, ifnet_lock_group
, ifnet_lock_attr
);
8255 lck_mtx_init(&ifp1
->if_ref_lock
, ifnet_lock_group
, ifnet_lock_attr
);
8256 lck_mtx_init(&ifp1
->if_flt_lock
, ifnet_lock_group
, ifnet_lock_attr
);
8257 lck_mtx_init(&ifp1
->if_addrconfig_lock
, ifnet_lock_group
,
8259 lck_rw_init(&ifp1
->if_llreach_lock
, ifnet_lock_group
, ifnet_lock_attr
);
8261 lck_rw_init(&ifp1
->if_inetdata_lock
, ifnet_lock_group
,
8263 ifp1
->if_inetdata
= NULL
;
8266 lck_rw_init(&ifp1
->if_inet6data_lock
, ifnet_lock_group
,
8268 ifp1
->if_inet6data
= NULL
;
8270 lck_rw_init(&ifp1
->if_link_status_lock
, ifnet_lock_group
,
8272 ifp1
->if_link_status
= NULL
;
8274 /* for send data paths */
8275 lck_mtx_init(&ifp1
->if_start_lock
, ifnet_snd_lock_group
,
8277 lck_mtx_init(&ifp1
->if_cached_route_lock
, ifnet_snd_lock_group
,
8279 lck_mtx_init(&ifp1
->if_snd
.ifcq_lock
, ifnet_snd_lock_group
,
8282 /* for receive data paths */
8283 lck_mtx_init(&ifp1
->if_poll_lock
, ifnet_rcv_lock_group
,
8286 /* thread call allocation is done with sleeping zalloc */
8287 ifp1
->if_dt_tcall
= thread_call_allocate_with_options(dlil_dt_tcall_fn
,
8288 ifp1
, THREAD_CALL_PRIORITY_KERNEL
, THREAD_CALL_OPTIONS_ONCE
);
8289 if (ifp1
->if_dt_tcall
== NULL
) {
8290 panic_plain("%s: couldn't create if_dt_tcall", __func__
);
8294 TAILQ_INSERT_TAIL(&dlil_ifnet_head
, dlifp1
, dl_if_link
);
8301 VERIFY(dlifp1
== NULL
|| (IS_P2ALIGNED(dlifp1
, sizeof(u_int64_t
)) &&
8302 IS_P2ALIGNED(&ifp1
->if_data
, sizeof(u_int64_t
))));
8307 __private_extern__
void
8308 dlil_if_release(ifnet_t ifp
)
8310 struct dlil_ifnet
*dlifp
= (struct dlil_ifnet
*)ifp
;
8312 VERIFY(OSDecrementAtomic64(&net_api_stats
.nas_ifnet_alloc_count
) > 0);
8313 if (!(ifp
->if_xflags
& IFXF_ALLOC_KPI
)) {
8314 VERIFY(OSDecrementAtomic64(&net_api_stats
.nas_ifnet_alloc_os_count
) > 0);
8317 ifnet_lock_exclusive(ifp
);
8318 lck_mtx_lock(&dlifp
->dl_if_lock
);
8319 dlifp
->dl_if_flags
&= ~DLIF_INUSE
;
8320 strlcpy(dlifp
->dl_if_namestorage
, ifp
->if_name
, IFNAMSIZ
);
8321 ifp
->if_name
= dlifp
->dl_if_namestorage
;
8322 /* Reset external name (name + unit) */
8323 ifp
->if_xname
= dlifp
->dl_if_xnamestorage
;
8324 snprintf(__DECONST(char *, ifp
->if_xname
), IFXNAMSIZ
,
8325 "%s?", ifp
->if_name
);
8326 lck_mtx_unlock(&dlifp
->dl_if_lock
);
8329 * We can either recycle the MAC label here or in dlil_if_acquire().
8330 * It seems logical to do it here but this means that anything that
8331 * still has a handle on ifp will now see it as unlabeled.
8332 * Since the interface is "dead" that may be OK. Revisit later.
8334 mac_ifnet_label_recycle(ifp
);
8336 ifnet_lock_done(ifp
);
8339 __private_extern__
void
8342 lck_mtx_lock(&dlil_ifnet_lock
);
8345 __private_extern__
void
8346 dlil_if_unlock(void)
8348 lck_mtx_unlock(&dlil_ifnet_lock
);
8351 __private_extern__
void
8352 dlil_if_lock_assert(void)
8354 LCK_MTX_ASSERT(&dlil_ifnet_lock
, LCK_MTX_ASSERT_OWNED
);
8357 __private_extern__
void
8358 dlil_proto_unplumb_all(struct ifnet
*ifp
)
8361 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
8362 * each bucket contains exactly one entry; PF_VLAN does not need an
8365 * if_proto_hash[3] is for other protocols; we expect anything
8366 * in this bucket to respond to the DETACHING event (which would
8367 * have happened by now) and do the unplumb then.
8369 (void) proto_unplumb(PF_INET
, ifp
);
8371 (void) proto_unplumb(PF_INET6
, ifp
);
8376 ifp_src_route_copyout(struct ifnet
*ifp
, struct route
*dst
)
8378 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
8379 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
8381 route_copyout(dst
, &ifp
->if_src_route
, sizeof(*dst
));
8383 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
8387 ifp_src_route_copyin(struct ifnet
*ifp
, struct route
*src
)
8389 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
8390 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
8392 if (ifp
->if_fwd_cacheok
) {
8393 route_copyin(src
, &ifp
->if_src_route
, sizeof(*src
));
8397 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
8402 ifp_src_route6_copyout(struct ifnet
*ifp
, struct route_in6
*dst
)
8404 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
8405 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
8407 route_copyout((struct route
*)dst
, (struct route
*)&ifp
->if_src_route6
,
8410 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
8414 ifp_src_route6_copyin(struct ifnet
*ifp
, struct route_in6
*src
)
8416 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
8417 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
8419 if (ifp
->if_fwd_cacheok
) {
8420 route_copyin((struct route
*)src
,
8421 (struct route
*)&ifp
->if_src_route6
, sizeof(*src
));
8425 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
8430 ifnet_cached_rtlookup_inet(struct ifnet
*ifp
, struct in_addr src_ip
)
8432 struct route src_rt
;
8433 struct sockaddr_in
*dst
;
8435 dst
= (struct sockaddr_in
*)(void *)(&src_rt
.ro_dst
);
8437 ifp_src_route_copyout(ifp
, &src_rt
);
8439 if (ROUTE_UNUSABLE(&src_rt
) || src_ip
.s_addr
!= dst
->sin_addr
.s_addr
) {
8440 ROUTE_RELEASE(&src_rt
);
8441 if (dst
->sin_family
!= AF_INET
) {
8442 bzero(&src_rt
.ro_dst
, sizeof(src_rt
.ro_dst
));
8443 dst
->sin_len
= sizeof(src_rt
.ro_dst
);
8444 dst
->sin_family
= AF_INET
;
8446 dst
->sin_addr
= src_ip
;
8448 VERIFY(src_rt
.ro_rt
== NULL
);
8449 src_rt
.ro_rt
= rtalloc1_scoped((struct sockaddr
*)dst
,
8450 0, 0, ifp
->if_index
);
8452 if (src_rt
.ro_rt
!= NULL
) {
8453 /* retain a ref, copyin consumes one */
8454 struct rtentry
*rte
= src_rt
.ro_rt
;
8456 ifp_src_route_copyin(ifp
, &src_rt
);
8461 return src_rt
.ro_rt
;
8466 ifnet_cached_rtlookup_inet6(struct ifnet
*ifp
, struct in6_addr
*src_ip6
)
8468 struct route_in6 src_rt
;
8470 ifp_src_route6_copyout(ifp
, &src_rt
);
8472 if (ROUTE_UNUSABLE(&src_rt
) ||
8473 !IN6_ARE_ADDR_EQUAL(src_ip6
, &src_rt
.ro_dst
.sin6_addr
)) {
8474 ROUTE_RELEASE(&src_rt
);
8475 if (src_rt
.ro_dst
.sin6_family
!= AF_INET6
) {
8476 bzero(&src_rt
.ro_dst
, sizeof(src_rt
.ro_dst
));
8477 src_rt
.ro_dst
.sin6_len
= sizeof(src_rt
.ro_dst
);
8478 src_rt
.ro_dst
.sin6_family
= AF_INET6
;
8480 src_rt
.ro_dst
.sin6_scope_id
= in6_addr2scopeid(ifp
, src_ip6
);
8481 bcopy(src_ip6
, &src_rt
.ro_dst
.sin6_addr
,
8482 sizeof(src_rt
.ro_dst
.sin6_addr
));
8484 if (src_rt
.ro_rt
== NULL
) {
8485 src_rt
.ro_rt
= rtalloc1_scoped(
8486 (struct sockaddr
*)&src_rt
.ro_dst
, 0, 0,
8489 if (src_rt
.ro_rt
!= NULL
) {
8490 /* retain a ref, copyin consumes one */
8491 struct rtentry
*rte
= src_rt
.ro_rt
;
8493 ifp_src_route6_copyin(ifp
, &src_rt
);
8499 return src_rt
.ro_rt
;
8504 if_lqm_update(struct ifnet
*ifp
, int lqm
, int locked
)
8506 struct kev_dl_link_quality_metric_data ev_lqm_data
;
8508 VERIFY(lqm
>= IFNET_LQM_MIN
&& lqm
<= IFNET_LQM_MAX
);
8510 /* Normalize to edge */
8511 if (lqm
>= 0 && lqm
<= IFNET_LQM_THRESH_ABORT
) {
8512 lqm
= IFNET_LQM_THRESH_ABORT
;
8513 atomic_bitset_32(&tcbinfo
.ipi_flags
,
8514 INPCBINFO_HANDLE_LQM_ABORT
);
8515 inpcb_timer_sched(&tcbinfo
, INPCB_TIMER_FAST
);
8516 } else if (lqm
> IFNET_LQM_THRESH_ABORT
&&
8517 lqm
<= IFNET_LQM_THRESH_MINIMALLY_VIABLE
) {
8518 lqm
= IFNET_LQM_THRESH_MINIMALLY_VIABLE
;
8519 } else if (lqm
> IFNET_LQM_THRESH_MINIMALLY_VIABLE
&&
8520 lqm
<= IFNET_LQM_THRESH_POOR
) {
8521 lqm
= IFNET_LQM_THRESH_POOR
;
8522 } else if (lqm
> IFNET_LQM_THRESH_POOR
&&
8523 lqm
<= IFNET_LQM_THRESH_GOOD
) {
8524 lqm
= IFNET_LQM_THRESH_GOOD
;
8528 * Take the lock if needed
8531 ifnet_lock_exclusive(ifp
);
8534 if (lqm
== ifp
->if_interface_state
.lqm_state
&&
8535 (ifp
->if_interface_state
.valid_bitmask
&
8536 IF_INTERFACE_STATE_LQM_STATE_VALID
)) {
8538 * Release the lock if was not held by the caller
8541 ifnet_lock_done(ifp
);
8543 return; /* nothing to update */
8545 ifp
->if_interface_state
.valid_bitmask
|=
8546 IF_INTERFACE_STATE_LQM_STATE_VALID
;
8547 ifp
->if_interface_state
.lqm_state
= lqm
;
8550 * Don't want to hold the lock when issuing kernel events
8552 ifnet_lock_done(ifp
);
8554 bzero(&ev_lqm_data
, sizeof(ev_lqm_data
));
8555 ev_lqm_data
.link_quality_metric
= lqm
;
8557 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_LINK_QUALITY_METRIC_CHANGED
,
8558 (struct net_event_data
*)&ev_lqm_data
, sizeof(ev_lqm_data
));
8561 * Reacquire the lock for the caller
8564 ifnet_lock_exclusive(ifp
);
8569 if_rrc_state_update(struct ifnet
*ifp
, unsigned int rrc_state
)
8571 struct kev_dl_rrc_state kev
;
8573 if (rrc_state
== ifp
->if_interface_state
.rrc_state
&&
8574 (ifp
->if_interface_state
.valid_bitmask
&
8575 IF_INTERFACE_STATE_RRC_STATE_VALID
)) {
8579 ifp
->if_interface_state
.valid_bitmask
|=
8580 IF_INTERFACE_STATE_RRC_STATE_VALID
;
8582 ifp
->if_interface_state
.rrc_state
= rrc_state
;
8585 * Don't want to hold the lock when issuing kernel events
8587 ifnet_lock_done(ifp
);
8589 bzero(&kev
, sizeof(struct kev_dl_rrc_state
));
8590 kev
.rrc_state
= rrc_state
;
8592 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_RRC_STATE_CHANGED
,
8593 (struct net_event_data
*)&kev
, sizeof(struct kev_dl_rrc_state
));
8595 ifnet_lock_exclusive(ifp
);
8599 if_state_update(struct ifnet
*ifp
,
8600 struct if_interface_state
*if_interface_state
)
8602 u_short if_index_available
= 0;
8604 ifnet_lock_exclusive(ifp
);
8606 if ((ifp
->if_type
!= IFT_CELLULAR
) &&
8607 (if_interface_state
->valid_bitmask
&
8608 IF_INTERFACE_STATE_RRC_STATE_VALID
)) {
8609 ifnet_lock_done(ifp
);
8612 if ((if_interface_state
->valid_bitmask
&
8613 IF_INTERFACE_STATE_LQM_STATE_VALID
) &&
8614 (if_interface_state
->lqm_state
< IFNET_LQM_MIN
||
8615 if_interface_state
->lqm_state
> IFNET_LQM_MAX
)) {
8616 ifnet_lock_done(ifp
);
8619 if ((if_interface_state
->valid_bitmask
&
8620 IF_INTERFACE_STATE_RRC_STATE_VALID
) &&
8621 if_interface_state
->rrc_state
!=
8622 IF_INTERFACE_STATE_RRC_STATE_IDLE
&&
8623 if_interface_state
->rrc_state
!=
8624 IF_INTERFACE_STATE_RRC_STATE_CONNECTED
) {
8625 ifnet_lock_done(ifp
);
8629 if (if_interface_state
->valid_bitmask
&
8630 IF_INTERFACE_STATE_LQM_STATE_VALID
) {
8631 if_lqm_update(ifp
, if_interface_state
->lqm_state
, 1);
8633 if (if_interface_state
->valid_bitmask
&
8634 IF_INTERFACE_STATE_RRC_STATE_VALID
) {
8635 if_rrc_state_update(ifp
, if_interface_state
->rrc_state
);
8637 if (if_interface_state
->valid_bitmask
&
8638 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
) {
8639 ifp
->if_interface_state
.valid_bitmask
|=
8640 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
8641 ifp
->if_interface_state
.interface_availability
=
8642 if_interface_state
->interface_availability
;
8644 if (ifp
->if_interface_state
.interface_availability
==
8645 IF_INTERFACE_STATE_INTERFACE_AVAILABLE
) {
8646 os_log(OS_LOG_DEFAULT
, "%s: interface %s (%u) available\n",
8647 __func__
, if_name(ifp
), ifp
->if_index
);
8648 if_index_available
= ifp
->if_index
;
8650 os_log(OS_LOG_DEFAULT
, "%s: interface %s (%u) unavailable)\n",
8651 __func__
, if_name(ifp
), ifp
->if_index
);
8654 ifnet_lock_done(ifp
);
8657 * Check if the TCP connections going on this interface should be
8658 * forced to send probe packets instead of waiting for TCP timers
8659 * to fire. This is done on an explicit notification such as
8660 * SIOCSIFINTERFACESTATE which marks the interface as available.
8662 if (if_index_available
> 0) {
8663 tcp_interface_send_probe(if_index_available
);
8670 if_get_state(struct ifnet
*ifp
,
8671 struct if_interface_state
*if_interface_state
)
8673 ifnet_lock_shared(ifp
);
8675 if_interface_state
->valid_bitmask
= 0;
8677 if (ifp
->if_interface_state
.valid_bitmask
&
8678 IF_INTERFACE_STATE_RRC_STATE_VALID
) {
8679 if_interface_state
->valid_bitmask
|=
8680 IF_INTERFACE_STATE_RRC_STATE_VALID
;
8681 if_interface_state
->rrc_state
=
8682 ifp
->if_interface_state
.rrc_state
;
8684 if (ifp
->if_interface_state
.valid_bitmask
&
8685 IF_INTERFACE_STATE_LQM_STATE_VALID
) {
8686 if_interface_state
->valid_bitmask
|=
8687 IF_INTERFACE_STATE_LQM_STATE_VALID
;
8688 if_interface_state
->lqm_state
=
8689 ifp
->if_interface_state
.lqm_state
;
8691 if (ifp
->if_interface_state
.valid_bitmask
&
8692 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
) {
8693 if_interface_state
->valid_bitmask
|=
8694 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
8695 if_interface_state
->interface_availability
=
8696 ifp
->if_interface_state
.interface_availability
;
8699 ifnet_lock_done(ifp
);
8703 if_probe_connectivity(struct ifnet
*ifp
, u_int32_t conn_probe
)
8705 ifnet_lock_exclusive(ifp
);
8706 if (conn_probe
> 1) {
8707 ifnet_lock_done(ifp
);
8710 if (conn_probe
== 0) {
8711 ifp
->if_eflags
&= ~IFEF_PROBE_CONNECTIVITY
;
8713 ifp
->if_eflags
|= IFEF_PROBE_CONNECTIVITY
;
8715 ifnet_lock_done(ifp
);
8718 necp_update_all_clients();
8721 tcp_probe_connectivity(ifp
, conn_probe
);
8727 get_ether_index(int * ret_other_index
)
8731 int other_en_index
= 0;
8732 int any_ether_index
= 0;
8733 short best_unit
= 0;
8735 *ret_other_index
= 0;
8736 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
8738 * find en0, or if not en0, the lowest unit en*, and if not
8739 * that, any ethernet
8741 ifnet_lock_shared(ifp
);
8742 if (strcmp(ifp
->if_name
, "en") == 0) {
8743 if (ifp
->if_unit
== 0) {
8744 /* found en0, we're done */
8745 en0_index
= ifp
->if_index
;
8746 ifnet_lock_done(ifp
);
8749 if (other_en_index
== 0 || ifp
->if_unit
< best_unit
) {
8750 other_en_index
= ifp
->if_index
;
8751 best_unit
= ifp
->if_unit
;
8753 } else if (ifp
->if_type
== IFT_ETHER
&& any_ether_index
== 0) {
8754 any_ether_index
= ifp
->if_index
;
8756 ifnet_lock_done(ifp
);
8758 if (en0_index
== 0) {
8759 if (other_en_index
!= 0) {
8760 *ret_other_index
= other_en_index
;
8761 } else if (any_ether_index
!= 0) {
8762 *ret_other_index
= any_ether_index
;
8769 uuid_get_ethernet(u_int8_t
*node
)
8771 static int en0_index
;
8773 int other_index
= 0;
8777 ifnet_head_lock_shared();
8778 if (en0_index
== 0 || ifindex2ifnet
[en0_index
] == NULL
) {
8779 en0_index
= get_ether_index(&other_index
);
8781 if (en0_index
!= 0) {
8782 the_index
= en0_index
;
8783 } else if (other_index
!= 0) {
8784 the_index
= other_index
;
8786 if (the_index
!= 0) {
8787 ifp
= ifindex2ifnet
[the_index
];
8788 VERIFY(ifp
!= NULL
);
8789 memcpy(node
, IF_LLADDR(ifp
), ETHER_ADDR_LEN
);
8799 sysctl_rxpoll SYSCTL_HANDLER_ARGS
8801 #pragma unused(arg1, arg2)
8807 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8808 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
8812 if (net_rxpoll
== 0) {
8821 sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
8823 #pragma unused(arg1, arg2)
8827 q
= if_rxpoll_mode_holdtime
;
8829 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
8830 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
8834 if (q
< IF_RXPOLL_MODE_HOLDTIME_MIN
) {
8835 q
= IF_RXPOLL_MODE_HOLDTIME_MIN
;
8838 if_rxpoll_mode_holdtime
= q
;
8844 sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
8846 #pragma unused(arg1, arg2)
8850 q
= if_rxpoll_sample_holdtime
;
8852 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
8853 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
8857 if (q
< IF_RXPOLL_SAMPLETIME_MIN
) {
8858 q
= IF_RXPOLL_SAMPLETIME_MIN
;
8861 if_rxpoll_sample_holdtime
= q
;
8867 sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
8869 #pragma unused(arg1, arg2)
8873 q
= if_rxpoll_interval_time
;
8875 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
8876 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
8880 if (q
< IF_RXPOLL_INTERVALTIME_MIN
) {
8881 q
= IF_RXPOLL_INTERVALTIME_MIN
;
8884 if_rxpoll_interval_time
= q
;
8890 sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
8892 #pragma unused(arg1, arg2)
8896 i
= if_sysctl_rxpoll_wlowat
;
8898 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8899 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
8903 if (i
== 0 || i
>= if_sysctl_rxpoll_whiwat
) {
8907 if_sysctl_rxpoll_wlowat
= i
;
8912 sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
8914 #pragma unused(arg1, arg2)
8918 i
= if_sysctl_rxpoll_whiwat
;
8920 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8921 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
8925 if (i
<= if_sysctl_rxpoll_wlowat
) {
8929 if_sysctl_rxpoll_whiwat
= i
;
8934 sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
8936 #pragma unused(arg1, arg2)
8941 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8942 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
8946 if (i
< IF_SNDQ_MINLEN
) {
8955 sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
8957 #pragma unused(arg1, arg2)
8962 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8963 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
8967 if (i
< IF_RCVQ_MINLEN
) {
8976 dlil_node_present(struct ifnet
*ifp
, struct sockaddr
*sa
,
8977 int32_t rssi
, int lqm
, int npm
, u_int8_t srvinfo
[48])
8979 struct kev_dl_node_presence kev
;
8980 struct sockaddr_dl
*sdl
;
8981 struct sockaddr_in6
*sin6
;
8986 VERIFY(sa
->sa_family
== AF_LINK
|| sa
->sa_family
== AF_INET6
);
8988 bzero(&kev
, sizeof(kev
));
8989 sin6
= &kev
.sin6_node_address
;
8990 sdl
= &kev
.sdl_node_address
;
8991 nd6_alt_node_addr_decompose(ifp
, sa
, sdl
, sin6
);
8993 kev
.link_quality_metric
= lqm
;
8994 kev
.node_proximity_metric
= npm
;
8995 bcopy(srvinfo
, kev
.node_service_info
, sizeof(kev
.node_service_info
));
8997 ret
= nd6_alt_node_present(ifp
, sin6
, sdl
, rssi
, lqm
, npm
);
8999 int err
= dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_PRESENCE
,
9000 &kev
.link_data
, sizeof(kev
));
9002 log(LOG_ERR
, "%s: Post DL_NODE_PRESENCE failed with"
9003 "error %d\n", __func__
, err
);
9010 dlil_node_absent(struct ifnet
*ifp
, struct sockaddr
*sa
)
9012 struct kev_dl_node_absence kev
= {};
9013 struct sockaddr_in6
*kev_sin6
= NULL
;
9014 struct sockaddr_dl
*kev_sdl
= NULL
;
9016 VERIFY(ifp
!= NULL
);
9018 VERIFY(sa
->sa_family
== AF_LINK
|| sa
->sa_family
== AF_INET6
);
9020 kev_sin6
= &kev
.sin6_node_address
;
9021 kev_sdl
= &kev
.sdl_node_address
;
9023 if (sa
->sa_family
== AF_INET6
) {
9025 * If IPv6 address is given, get the link layer
9026 * address from what was cached in the neighbor cache
9028 VERIFY(sa
->sa_len
<= sizeof(*kev_sin6
));
9029 bcopy(sa
, kev_sin6
, sa
->sa_len
);
9030 nd6_alt_node_absent(ifp
, kev_sin6
, kev_sdl
);
9033 * If passed address is AF_LINK type, derive the address
9034 * based on the link address.
9036 nd6_alt_node_addr_decompose(ifp
, sa
, kev_sdl
, kev_sin6
);
9037 nd6_alt_node_absent(ifp
, kev_sin6
, NULL
);
9040 kev_sdl
->sdl_type
= ifp
->if_type
;
9041 kev_sdl
->sdl_index
= ifp
->if_index
;
9043 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_ABSENCE
,
9044 &kev
.link_data
, sizeof(kev
));
9048 dlil_node_present_v2(struct ifnet
*ifp
, struct sockaddr
*sa
, struct sockaddr_dl
*sdl
,
9049 int32_t rssi
, int lqm
, int npm
, u_int8_t srvinfo
[48])
9051 struct kev_dl_node_presence kev
= {};
9052 struct sockaddr_dl
*kev_sdl
= NULL
;
9053 struct sockaddr_in6
*kev_sin6
= NULL
;
9056 VERIFY(ifp
!= NULL
);
9057 VERIFY(sa
!= NULL
&& sdl
!= NULL
);
9058 VERIFY(sa
->sa_family
== AF_INET6
&& sdl
->sdl_family
== AF_LINK
);
9060 kev_sin6
= &kev
.sin6_node_address
;
9061 kev_sdl
= &kev
.sdl_node_address
;
9063 VERIFY(sdl
->sdl_len
<= sizeof(*kev_sdl
));
9064 bcopy(sdl
, kev_sdl
, sdl
->sdl_len
);
9065 kev_sdl
->sdl_type
= ifp
->if_type
;
9066 kev_sdl
->sdl_index
= ifp
->if_index
;
9068 VERIFY(sa
->sa_len
<= sizeof(*kev_sin6
));
9069 bcopy(sa
, kev_sin6
, sa
->sa_len
);
9072 kev
.link_quality_metric
= lqm
;
9073 kev
.node_proximity_metric
= npm
;
9074 bcopy(srvinfo
, kev
.node_service_info
, sizeof(kev
.node_service_info
));
9076 ret
= nd6_alt_node_present(ifp
, SIN6(sa
), sdl
, rssi
, lqm
, npm
);
9078 int err
= dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_PRESENCE
,
9079 &kev
.link_data
, sizeof(kev
));
9081 log(LOG_ERR
, "%s: Post DL_NODE_PRESENCE failed with",
9082 "error %d\n", __func__
, err
);
9089 dlil_ifaddr_bytes(const struct sockaddr_dl
*sdl
, size_t *sizep
,
9090 kauth_cred_t
*credp
)
9092 const u_int8_t
*bytes
;
9095 bytes
= CONST_LLADDR(sdl
);
9096 size
= sdl
->sdl_alen
;
9099 if (dlil_lladdr_ckreq
) {
9100 switch (sdl
->sdl_type
) {
9110 if (credp
&& mac_system_check_info(*credp
, "net.link.addr")) {
9111 static const u_int8_t unspec
[FIREWIRE_EUI64_LEN
] = {
9119 #pragma unused(credp)
9122 if (sizep
!= NULL
) {
9129 dlil_report_issues(struct ifnet
*ifp
, u_int8_t modid
[DLIL_MODIDLEN
],
9130 u_int8_t info
[DLIL_MODARGLEN
])
9132 struct kev_dl_issues kev
;
9135 VERIFY(ifp
!= NULL
);
9136 VERIFY(modid
!= NULL
);
9137 _CASSERT(sizeof(kev
.modid
) == DLIL_MODIDLEN
);
9138 _CASSERT(sizeof(kev
.info
) == DLIL_MODARGLEN
);
9140 bzero(&kev
, sizeof(kev
));
9143 kev
.timestamp
= tv
.tv_sec
;
9144 bcopy(modid
, &kev
.modid
, DLIL_MODIDLEN
);
9146 bcopy(info
, &kev
.info
, DLIL_MODARGLEN
);
9149 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_ISSUES
,
9150 &kev
.link_data
, sizeof(kev
));
9154 ifnet_getset_opportunistic(ifnet_t ifp
, u_long cmd
, struct ifreq
*ifr
,
9157 u_int32_t level
= IFNET_THROTTLE_OFF
;
9160 VERIFY(cmd
== SIOCSIFOPPORTUNISTIC
|| cmd
== SIOCGIFOPPORTUNISTIC
);
9162 if (cmd
== SIOCSIFOPPORTUNISTIC
) {
9164 * XXX: Use priv_check_cred() instead of root check?
9166 if ((result
= proc_suser(p
)) != 0) {
9170 if (ifr
->ifr_opportunistic
.ifo_flags
==
9171 IFRIFOF_BLOCK_OPPORTUNISTIC
) {
9172 level
= IFNET_THROTTLE_OPPORTUNISTIC
;
9173 } else if (ifr
->ifr_opportunistic
.ifo_flags
== 0) {
9174 level
= IFNET_THROTTLE_OFF
;
9180 result
= ifnet_set_throttle(ifp
, level
);
9182 } else if ((result
= ifnet_get_throttle(ifp
, &level
)) == 0) {
9183 ifr
->ifr_opportunistic
.ifo_flags
= 0;
9184 if (level
== IFNET_THROTTLE_OPPORTUNISTIC
) {
9185 ifr
->ifr_opportunistic
.ifo_flags
|=
9186 IFRIFOF_BLOCK_OPPORTUNISTIC
;
9191 * Return the count of current opportunistic connections
9192 * over the interface.
9196 flags
|= (cmd
== SIOCSIFOPPORTUNISTIC
) ?
9197 INPCB_OPPORTUNISTIC_SETCMD
: 0;
9198 flags
|= (level
== IFNET_THROTTLE_OPPORTUNISTIC
) ?
9199 INPCB_OPPORTUNISTIC_THROTTLEON
: 0;
9200 ifr
->ifr_opportunistic
.ifo_inuse
=
9201 udp_count_opportunistic(ifp
->if_index
, flags
) +
9202 tcp_count_opportunistic(ifp
->if_index
, flags
);
9205 if (result
== EALREADY
) {
9213 ifnet_get_throttle(struct ifnet
*ifp
, u_int32_t
*level
)
9215 struct ifclassq
*ifq
;
9218 if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
9222 *level
= IFNET_THROTTLE_OFF
;
9226 /* Throttling works only for IFCQ, not ALTQ instances */
9227 if (IFCQ_IS_ENABLED(ifq
)) {
9228 IFCQ_GET_THROTTLE(ifq
, *level
, err
);
9236 ifnet_set_throttle(struct ifnet
*ifp
, u_int32_t level
)
9238 struct ifclassq
*ifq
;
9241 if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
9248 case IFNET_THROTTLE_OFF
:
9249 case IFNET_THROTTLE_OPPORTUNISTIC
:
9256 if (IFCQ_IS_ENABLED(ifq
)) {
9257 IFCQ_SET_THROTTLE(ifq
, level
, err
);
9262 DLIL_PRINTF("%s: throttling level set to %d\n", if_name(ifp
),
9265 necp_update_all_clients();
9267 if (level
== IFNET_THROTTLE_OFF
) {
9276 ifnet_getset_log(ifnet_t ifp
, u_long cmd
, struct ifreq
*ifr
,
9282 int level
, category
, subcategory
;
9284 VERIFY(cmd
== SIOCSIFLOG
|| cmd
== SIOCGIFLOG
);
9286 if (cmd
== SIOCSIFLOG
) {
9287 if ((result
= priv_check_cred(kauth_cred_get(),
9288 PRIV_NET_INTERFACE_CONTROL
, 0)) != 0) {
9292 level
= ifr
->ifr_log
.ifl_level
;
9293 if (level
< IFNET_LOG_MIN
|| level
> IFNET_LOG_MAX
) {
9297 flags
= ifr
->ifr_log
.ifl_flags
;
9298 if ((flags
&= IFNET_LOGF_MASK
) == 0) {
9302 category
= ifr
->ifr_log
.ifl_category
;
9303 subcategory
= ifr
->ifr_log
.ifl_subcategory
;
9306 result
= ifnet_set_log(ifp
, level
, flags
,
9307 category
, subcategory
);
9310 result
= ifnet_get_log(ifp
, &level
, &flags
, &category
,
9313 ifr
->ifr_log
.ifl_level
= level
;
9314 ifr
->ifr_log
.ifl_flags
= flags
;
9315 ifr
->ifr_log
.ifl_category
= category
;
9316 ifr
->ifr_log
.ifl_subcategory
= subcategory
;
9324 ifnet_set_log(struct ifnet
*ifp
, int32_t level
, uint32_t flags
,
9325 int32_t category
, int32_t subcategory
)
9329 VERIFY(level
>= IFNET_LOG_MIN
&& level
<= IFNET_LOG_MAX
);
9330 VERIFY(flags
& IFNET_LOGF_MASK
);
9333 * The logging level applies to all facilities; make sure to
9334 * update them all with the most current level.
9336 flags
|= ifp
->if_log
.flags
;
9338 if (ifp
->if_output_ctl
!= NULL
) {
9339 struct ifnet_log_params l
;
9341 bzero(&l
, sizeof(l
));
9344 l
.flags
&= ~IFNET_LOGF_DLIL
;
9345 l
.category
= category
;
9346 l
.subcategory
= subcategory
;
9348 /* Send this request to lower layers */
9350 err
= ifp
->if_output_ctl(ifp
, IFNET_CTL_SET_LOG
,
9353 } else if ((flags
& ~IFNET_LOGF_DLIL
) && ifp
->if_output_ctl
== NULL
) {
9355 * If targeted to the lower layers without an output
9356 * control callback registered on the interface, just
9357 * silently ignore facilities other than ours.
9359 flags
&= IFNET_LOGF_DLIL
;
9360 if (flags
== 0 && (!(ifp
->if_log
.flags
& IFNET_LOGF_DLIL
))) {
9366 if ((ifp
->if_log
.level
= level
) == IFNET_LOG_DEFAULT
) {
9367 ifp
->if_log
.flags
= 0;
9369 ifp
->if_log
.flags
|= flags
;
9372 log(LOG_INFO
, "%s: logging level set to %d flags=%b "
9373 "arg=%b, category=%d subcategory=%d\n", if_name(ifp
),
9374 ifp
->if_log
.level
, ifp
->if_log
.flags
,
9375 IFNET_LOGF_BITS
, flags
, IFNET_LOGF_BITS
,
9376 category
, subcategory
);
9383 ifnet_get_log(struct ifnet
*ifp
, int32_t *level
, uint32_t *flags
,
9384 int32_t *category
, int32_t *subcategory
)
9386 if (level
!= NULL
) {
9387 *level
= ifp
->if_log
.level
;
9389 if (flags
!= NULL
) {
9390 *flags
= ifp
->if_log
.flags
;
9392 if (category
!= NULL
) {
9393 *category
= ifp
->if_log
.category
;
9395 if (subcategory
!= NULL
) {
9396 *subcategory
= ifp
->if_log
.subcategory
;
9403 ifnet_notify_address(struct ifnet
*ifp
, int af
)
9405 struct ifnet_notify_address_params na
;
9408 (void) pf_ifaddr_hook(ifp
);
9411 if (ifp
->if_output_ctl
== NULL
) {
9415 bzero(&na
, sizeof(na
));
9416 na
.address_family
= af
;
9418 return ifp
->if_output_ctl(ifp
, IFNET_CTL_NOTIFY_ADDRESS
,
9423 ifnet_flowid(struct ifnet
*ifp
, uint32_t *flowid
)
9425 if (ifp
== NULL
|| flowid
== NULL
) {
9427 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
9428 !IF_FULLY_ATTACHED(ifp
)) {
9432 *flowid
= ifp
->if_flowhash
;
9438 ifnet_disable_output(struct ifnet
*ifp
)
9444 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
9445 !IF_FULLY_ATTACHED(ifp
)) {
9449 if ((err
= ifnet_fc_add(ifp
)) == 0) {
9450 lck_mtx_lock_spin(&ifp
->if_start_lock
);
9451 ifp
->if_start_flags
|= IFSF_FLOW_CONTROLLED
;
9452 lck_mtx_unlock(&ifp
->if_start_lock
);
9458 ifnet_enable_output(struct ifnet
*ifp
)
9462 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
9463 !IF_FULLY_ATTACHED(ifp
)) {
9467 ifnet_start_common(ifp
, TRUE
);
9472 ifnet_flowadv(uint32_t flowhash
)
9474 struct ifnet_fc_entry
*ifce
;
9477 ifce
= ifnet_fc_get(flowhash
);
9482 VERIFY(ifce
->ifce_ifp
!= NULL
);
9483 ifp
= ifce
->ifce_ifp
;
9485 /* flow hash gets recalculated per attach, so check */
9486 if (ifnet_is_attached(ifp
, 1)) {
9487 if (ifp
->if_flowhash
== flowhash
) {
9488 (void) ifnet_enable_output(ifp
);
9490 ifnet_decr_iorefcnt(ifp
);
9492 ifnet_fc_entry_free(ifce
);
9496 * Function to compare ifnet_fc_entries in ifnet flow control tree
9499 ifce_cmp(const struct ifnet_fc_entry
*fc1
, const struct ifnet_fc_entry
*fc2
)
9501 return fc1
->ifce_flowhash
- fc2
->ifce_flowhash
;
9505 ifnet_fc_add(struct ifnet
*ifp
)
9507 struct ifnet_fc_entry keyfc
, *ifce
;
9510 VERIFY(ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_TXSTART
));
9511 VERIFY(ifp
->if_flowhash
!= 0);
9512 flowhash
= ifp
->if_flowhash
;
9514 bzero(&keyfc
, sizeof(keyfc
));
9515 keyfc
.ifce_flowhash
= flowhash
;
9517 lck_mtx_lock_spin(&ifnet_fc_lock
);
9518 ifce
= RB_FIND(ifnet_fc_tree
, &ifnet_fc_tree
, &keyfc
);
9519 if (ifce
!= NULL
&& ifce
->ifce_ifp
== ifp
) {
9520 /* Entry is already in ifnet_fc_tree, return */
9521 lck_mtx_unlock(&ifnet_fc_lock
);
9527 * There is a different fc entry with the same flow hash
9528 * but different ifp pointer. There can be a collision
9529 * on flow hash but the probability is low. Let's just
9530 * avoid adding a second one when there is a collision.
9532 lck_mtx_unlock(&ifnet_fc_lock
);
9536 /* become regular mutex */
9537 lck_mtx_convert_spin(&ifnet_fc_lock
);
9539 ifce
= zalloc(ifnet_fc_zone
);
9541 /* memory allocation failed */
9542 lck_mtx_unlock(&ifnet_fc_lock
);
9545 bzero(ifce
, ifnet_fc_zone_size
);
9547 ifce
->ifce_flowhash
= flowhash
;
9548 ifce
->ifce_ifp
= ifp
;
9550 RB_INSERT(ifnet_fc_tree
, &ifnet_fc_tree
, ifce
);
9551 lck_mtx_unlock(&ifnet_fc_lock
);
9555 static struct ifnet_fc_entry
*
9556 ifnet_fc_get(uint32_t flowhash
)
9558 struct ifnet_fc_entry keyfc
, *ifce
;
9561 bzero(&keyfc
, sizeof(keyfc
));
9562 keyfc
.ifce_flowhash
= flowhash
;
9564 lck_mtx_lock_spin(&ifnet_fc_lock
);
9565 ifce
= RB_FIND(ifnet_fc_tree
, &ifnet_fc_tree
, &keyfc
);
9567 /* Entry is not present in ifnet_fc_tree, return */
9568 lck_mtx_unlock(&ifnet_fc_lock
);
9572 RB_REMOVE(ifnet_fc_tree
, &ifnet_fc_tree
, ifce
);
9574 VERIFY(ifce
->ifce_ifp
!= NULL
);
9575 ifp
= ifce
->ifce_ifp
;
9577 /* become regular mutex */
9578 lck_mtx_convert_spin(&ifnet_fc_lock
);
9580 if (!ifnet_is_attached(ifp
, 0)) {
9582 * This ifp is not attached or in the process of being
9583 * detached; just don't process it.
9585 ifnet_fc_entry_free(ifce
);
9588 lck_mtx_unlock(&ifnet_fc_lock
);
9594 ifnet_fc_entry_free(struct ifnet_fc_entry
*ifce
)
9596 zfree(ifnet_fc_zone
, ifce
);
9600 ifnet_calc_flowhash(struct ifnet
*ifp
)
9602 struct ifnet_flowhash_key fh
__attribute__((aligned(8)));
9603 uint32_t flowhash
= 0;
9605 if (ifnet_flowhash_seed
== 0) {
9606 ifnet_flowhash_seed
= RandomULong();
9609 bzero(&fh
, sizeof(fh
));
9611 (void) snprintf(fh
.ifk_name
, sizeof(fh
.ifk_name
), "%s", ifp
->if_name
);
9612 fh
.ifk_unit
= ifp
->if_unit
;
9613 fh
.ifk_flags
= ifp
->if_flags
;
9614 fh
.ifk_eflags
= ifp
->if_eflags
;
9615 fh
.ifk_capabilities
= ifp
->if_capabilities
;
9616 fh
.ifk_capenable
= ifp
->if_capenable
;
9617 fh
.ifk_output_sched_model
= ifp
->if_output_sched_model
;
9618 fh
.ifk_rand1
= RandomULong();
9619 fh
.ifk_rand2
= RandomULong();
9622 flowhash
= net_flowhash(&fh
, sizeof(fh
), ifnet_flowhash_seed
);
9623 if (flowhash
== 0) {
9624 /* try to get a non-zero flowhash */
9625 ifnet_flowhash_seed
= RandomULong();
9633 ifnet_set_netsignature(struct ifnet
*ifp
, uint8_t family
, uint8_t len
,
9634 uint16_t flags
, uint8_t *data
)
9636 #pragma unused(flags)
9641 if_inetdata_lock_exclusive(ifp
);
9642 if (IN_IFEXTRA(ifp
) != NULL
) {
9644 /* Allow clearing the signature */
9645 IN_IFEXTRA(ifp
)->netsig_len
= 0;
9646 bzero(IN_IFEXTRA(ifp
)->netsig
,
9647 sizeof(IN_IFEXTRA(ifp
)->netsig
));
9648 if_inetdata_lock_done(ifp
);
9650 } else if (len
> sizeof(IN_IFEXTRA(ifp
)->netsig
)) {
9652 if_inetdata_lock_done(ifp
);
9655 IN_IFEXTRA(ifp
)->netsig_len
= len
;
9656 bcopy(data
, IN_IFEXTRA(ifp
)->netsig
, len
);
9660 if_inetdata_lock_done(ifp
);
9664 if_inet6data_lock_exclusive(ifp
);
9665 if (IN6_IFEXTRA(ifp
) != NULL
) {
9667 /* Allow clearing the signature */
9668 IN6_IFEXTRA(ifp
)->netsig_len
= 0;
9669 bzero(IN6_IFEXTRA(ifp
)->netsig
,
9670 sizeof(IN6_IFEXTRA(ifp
)->netsig
));
9671 if_inet6data_lock_done(ifp
);
9673 } else if (len
> sizeof(IN6_IFEXTRA(ifp
)->netsig
)) {
9675 if_inet6data_lock_done(ifp
);
9678 IN6_IFEXTRA(ifp
)->netsig_len
= len
;
9679 bcopy(data
, IN6_IFEXTRA(ifp
)->netsig
, len
);
9683 if_inet6data_lock_done(ifp
);
9695 ifnet_get_netsignature(struct ifnet
*ifp
, uint8_t family
, uint8_t *len
,
9696 uint16_t *flags
, uint8_t *data
)
9700 if (ifp
== NULL
|| len
== NULL
|| data
== NULL
) {
9706 if_inetdata_lock_shared(ifp
);
9707 if (IN_IFEXTRA(ifp
) != NULL
) {
9708 if (*len
== 0 || *len
< IN_IFEXTRA(ifp
)->netsig_len
) {
9710 if_inetdata_lock_done(ifp
);
9713 if ((*len
= IN_IFEXTRA(ifp
)->netsig_len
) > 0) {
9714 bcopy(IN_IFEXTRA(ifp
)->netsig
, data
, *len
);
9721 if_inetdata_lock_done(ifp
);
9725 if_inet6data_lock_shared(ifp
);
9726 if (IN6_IFEXTRA(ifp
) != NULL
) {
9727 if (*len
== 0 || *len
< IN6_IFEXTRA(ifp
)->netsig_len
) {
9729 if_inet6data_lock_done(ifp
);
9732 if ((*len
= IN6_IFEXTRA(ifp
)->netsig_len
) > 0) {
9733 bcopy(IN6_IFEXTRA(ifp
)->netsig
, data
, *len
);
9740 if_inet6data_lock_done(ifp
);
9748 if (error
== 0 && flags
!= NULL
) {
9757 ifnet_set_nat64prefix(struct ifnet
*ifp
, struct ipv6_prefix
*prefixes
)
9759 int i
, error
= 0, one_set
= 0;
9761 if_inet6data_lock_exclusive(ifp
);
9763 if (IN6_IFEXTRA(ifp
) == NULL
) {
9768 for (i
= 0; i
< NAT64_MAX_NUM_PREFIXES
; i
++) {
9769 uint32_t prefix_len
=
9770 prefixes
[i
].prefix_len
;
9771 struct in6_addr
*prefix
=
9772 &prefixes
[i
].ipv6_prefix
;
9774 if (prefix_len
== 0) {
9775 clat_log0((LOG_DEBUG
,
9776 "NAT64 prefixes purged from Interface %s\n",
9778 /* Allow clearing the signature */
9779 IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].prefix_len
= 0;
9780 bzero(&IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].ipv6_prefix
,
9781 sizeof(struct in6_addr
));
9784 } else if (prefix_len
!= NAT64_PREFIX_LEN_32
&&
9785 prefix_len
!= NAT64_PREFIX_LEN_40
&&
9786 prefix_len
!= NAT64_PREFIX_LEN_48
&&
9787 prefix_len
!= NAT64_PREFIX_LEN_56
&&
9788 prefix_len
!= NAT64_PREFIX_LEN_64
&&
9789 prefix_len
!= NAT64_PREFIX_LEN_96
) {
9790 clat_log0((LOG_DEBUG
,
9791 "NAT64 prefixlen is incorrect %d\n", prefix_len
));
9796 if (IN6_IS_SCOPE_EMBED(prefix
)) {
9797 clat_log0((LOG_DEBUG
,
9798 "NAT64 prefix has interface/link local scope.\n"));
9803 IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].prefix_len
= prefix_len
;
9804 bcopy(prefix
, &IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].ipv6_prefix
,
9805 sizeof(struct in6_addr
));
9806 clat_log0((LOG_DEBUG
,
9807 "NAT64 prefix set to %s with prefixlen: %d\n",
9808 ip6_sprintf(prefix
), prefix_len
));
9813 if_inet6data_lock_done(ifp
);
9815 if (error
== 0 && one_set
!= 0) {
9816 necp_update_all_clients();
9823 ifnet_get_nat64prefix(struct ifnet
*ifp
, struct ipv6_prefix
*prefixes
)
9825 int i
, found_one
= 0, error
= 0;
9831 if_inet6data_lock_shared(ifp
);
9833 if (IN6_IFEXTRA(ifp
) == NULL
) {
9838 for (i
= 0; i
< NAT64_MAX_NUM_PREFIXES
; i
++) {
9839 if (IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].prefix_len
!= 0) {
9844 if (found_one
== 0) {
9850 bcopy(IN6_IFEXTRA(ifp
)->nat64_prefixes
, prefixes
,
9851 sizeof(IN6_IFEXTRA(ifp
)->nat64_prefixes
));
9855 if_inet6data_lock_done(ifp
);
9862 dlil_output_cksum_dbg(struct ifnet
*ifp
, struct mbuf
*m
, uint32_t hoff
,
9863 protocol_family_t pf
)
9868 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_FINALIZE_FORCED
) ||
9869 (m
->m_pkthdr
.csum_flags
& (CSUM_TSO_IPV4
| CSUM_TSO_IPV6
))) {
9875 did_sw
= in_finalize_cksum(m
, hoff
, m
->m_pkthdr
.csum_flags
);
9876 if (did_sw
& CSUM_DELAY_IP
) {
9877 hwcksum_dbg_finalized_hdr
++;
9879 if (did_sw
& CSUM_DELAY_DATA
) {
9880 hwcksum_dbg_finalized_data
++;
9886 * Checksum offload should not have been enabled when
9887 * extension headers exist; that also means that we
9888 * cannot force-finalize packets with extension headers.
9889 * Indicate to the callee should it skip such case by
9890 * setting optlen to -1.
9892 did_sw
= in6_finalize_cksum(m
, hoff
, -1, -1,
9893 m
->m_pkthdr
.csum_flags
);
9894 if (did_sw
& CSUM_DELAY_IPV6_DATA
) {
9895 hwcksum_dbg_finalized_data
++;
9905 dlil_input_cksum_dbg(struct ifnet
*ifp
, struct mbuf
*m
, char *frame_header
,
9906 protocol_family_t pf
)
9911 if (frame_header
== NULL
||
9912 frame_header
< (char *)mbuf_datastart(m
) ||
9913 frame_header
> (char *)m
->m_data
) {
9914 DLIL_PRINTF("%s: frame header pointer 0x%llx out of range "
9915 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp
),
9916 (uint64_t)VM_KERNEL_ADDRPERM(frame_header
),
9917 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m
)),
9918 (uint64_t)VM_KERNEL_ADDRPERM(m
->m_data
),
9919 (uint64_t)VM_KERNEL_ADDRPERM(m
));
9922 hlen
= (m
->m_data
- frame_header
);
9935 * Force partial checksum offload; useful to simulate cases
9936 * where the hardware does not support partial checksum offload,
9937 * in order to validate correctness throughout the layers above.
9939 if (hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
) {
9940 uint32_t foff
= hwcksum_dbg_partial_rxoff_forced
;
9942 if (foff
> (uint32_t)m
->m_pkthdr
.len
) {
9946 m
->m_pkthdr
.csum_flags
&= ~CSUM_RX_FLAGS
;
9948 /* Compute 16-bit 1's complement sum from forced offset */
9949 sum
= m_sum16(m
, foff
, (m
->m_pkthdr
.len
- foff
));
9951 m
->m_pkthdr
.csum_flags
|= (CSUM_DATA_VALID
| CSUM_PARTIAL
);
9952 m
->m_pkthdr
.csum_rx_val
= sum
;
9953 m
->m_pkthdr
.csum_rx_start
= (foff
+ hlen
);
9955 hwcksum_dbg_partial_forced
++;
9956 hwcksum_dbg_partial_forced_bytes
+= m
->m_pkthdr
.len
;
9960 * Partial checksum offload verification (and adjustment);
9961 * useful to validate and test cases where the hardware
9962 * supports partial checksum offload.
9964 if ((m
->m_pkthdr
.csum_flags
&
9965 (CSUM_DATA_VALID
| CSUM_PARTIAL
| CSUM_PSEUDO_HDR
)) ==
9966 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
9969 /* Start offset must begin after frame header */
9970 rxoff
= m
->m_pkthdr
.csum_rx_start
;
9972 hwcksum_dbg_bad_rxoff
++;
9974 DLIL_PRINTF("%s: partial cksum start offset %d "
9975 "is less than frame header length %d for "
9976 "mbuf 0x%llx\n", if_name(ifp
), rxoff
, hlen
,
9977 (uint64_t)VM_KERNEL_ADDRPERM(m
));
9983 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
)) {
9985 * Compute the expected 16-bit 1's complement sum;
9986 * skip this if we've already computed it above
9987 * when partial checksum offload is forced.
9989 sum
= m_sum16(m
, rxoff
, (m
->m_pkthdr
.len
- rxoff
));
9991 /* Hardware or driver is buggy */
9992 if (sum
!= m
->m_pkthdr
.csum_rx_val
) {
9993 hwcksum_dbg_bad_cksum
++;
9995 DLIL_PRINTF("%s: bad partial cksum value "
9996 "0x%x (expected 0x%x) for mbuf "
9997 "0x%llx [rx_start %d]\n",
9999 m
->m_pkthdr
.csum_rx_val
, sum
,
10000 (uint64_t)VM_KERNEL_ADDRPERM(m
),
10001 m
->m_pkthdr
.csum_rx_start
);
10006 hwcksum_dbg_verified
++;
10009 * This code allows us to emulate various hardwares that
10010 * perform 16-bit 1's complement sum beginning at various
10011 * start offset values.
10013 if (hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_RXOFF_ADJ
) {
10014 uint32_t aoff
= hwcksum_dbg_partial_rxoff_adj
;
10016 if (aoff
== rxoff
|| aoff
> (uint32_t)m
->m_pkthdr
.len
) {
10020 sum
= m_adj_sum16(m
, rxoff
, aoff
,
10021 m_pktlen(m
) - aoff
, sum
);
10023 m
->m_pkthdr
.csum_rx_val
= sum
;
10024 m
->m_pkthdr
.csum_rx_start
= (aoff
+ hlen
);
10026 hwcksum_dbg_adjusted
++;
10032 sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
10034 #pragma unused(arg1, arg2)
10038 i
= hwcksum_dbg_mode
;
10040 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
10041 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
10045 if (hwcksum_dbg
== 0) {
10049 if ((i
& ~HWCKSUM_DBG_MASK
) != 0) {
10053 hwcksum_dbg_mode
= (i
& HWCKSUM_DBG_MASK
);
10059 sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
10061 #pragma unused(arg1, arg2)
10065 i
= hwcksum_dbg_partial_rxoff_forced
;
10067 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
10068 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
10072 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
)) {
10076 hwcksum_dbg_partial_rxoff_forced
= i
;
10082 sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
10084 #pragma unused(arg1, arg2)
10088 i
= hwcksum_dbg_partial_rxoff_adj
;
10090 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
10091 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
10095 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_RXOFF_ADJ
)) {
10099 hwcksum_dbg_partial_rxoff_adj
= i
;
10105 sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
10107 #pragma unused(oidp, arg1, arg2)
10110 if (req
->oldptr
== USER_ADDR_NULL
) {
10112 if (req
->newptr
!= USER_ADDR_NULL
) {
10115 err
= SYSCTL_OUT(req
, &tx_chain_len_stats
,
10116 sizeof(struct chain_len_stats
));
10122 #if DEBUG || DEVELOPMENT
10123 /* Blob for sum16 verification */
10124 static uint8_t sumdata
[] = {
10125 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
10126 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
10127 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
10128 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
10129 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
10130 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
10131 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
10132 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
10133 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
10134 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
10135 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
10136 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
10137 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
10138 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
10139 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
10140 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
10141 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
10142 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
10143 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
10144 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
10145 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
10146 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
10147 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
10148 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
10149 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
10150 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
10151 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
10152 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
10153 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
10154 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
10155 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
10156 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
10157 0xc8, 0x28, 0x02, 0x00, 0x00
10160 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
10164 uint16_t sumr
; /* reference */
10165 uint16_t sumrp
; /* reference, precomputed */
10167 { FALSE
, 0, 0, 0x0000 },
10168 { FALSE
, 1, 0, 0x001f },
10169 { FALSE
, 2, 0, 0x8b1f },
10170 { FALSE
, 3, 0, 0x8b27 },
10171 { FALSE
, 7, 0, 0x790e },
10172 { FALSE
, 11, 0, 0xcb6d },
10173 { FALSE
, 20, 0, 0x20dd },
10174 { FALSE
, 27, 0, 0xbabd },
10175 { FALSE
, 32, 0, 0xf3e8 },
10176 { FALSE
, 37, 0, 0x197d },
10177 { FALSE
, 43, 0, 0x9eae },
10178 { FALSE
, 64, 0, 0x4678 },
10179 { FALSE
, 127, 0, 0x9399 },
10180 { FALSE
, 256, 0, 0xd147 },
10181 { FALSE
, 325, 0, 0x0358 },
10183 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
10186 dlil_verify_sum16(void)
10192 /* Make sure test data plus extra room for alignment fits in cluster */
10193 _CASSERT((sizeof(sumdata
) + (sizeof(uint64_t) * 2)) <= MCLBYTES
);
10195 kprintf("DLIL: running SUM16 self-tests ... ");
10197 m
= m_getcl(M_WAITOK
, MT_DATA
, M_PKTHDR
);
10198 m_align(m
, sizeof(sumdata
) + (sizeof(uint64_t) * 2));
10200 buf
= mtod(m
, uint8_t *); /* base address */
10202 for (n
= 0; n
< SUMTBL_MAX
; n
++) {
10203 uint16_t len
= sumtbl
[n
].len
;
10206 /* Verify for all possible alignments */
10207 for (i
= 0; i
< (int)sizeof(uint64_t); i
++) {
10208 uint16_t sum
, sumr
;
10211 /* Copy over test data to mbuf */
10212 VERIFY(len
<= sizeof(sumdata
));
10214 bcopy(sumdata
, c
, len
);
10216 /* Zero-offset test (align by data pointer) */
10217 m
->m_data
= (caddr_t
)c
;
10219 sum
= m_sum16(m
, 0, len
);
10221 if (!sumtbl
[n
].init
) {
10222 sumr
= in_cksum_mbuf_ref(m
, len
, 0, 0);
10223 sumtbl
[n
].sumr
= sumr
;
10224 sumtbl
[n
].init
= TRUE
;
10226 sumr
= sumtbl
[n
].sumr
;
10229 /* Something is horribly broken; stop now */
10230 if (sumr
!= sumtbl
[n
].sumrp
) {
10231 panic_plain("\n%s: broken in_cksum_mbuf_ref() "
10232 "for len=%d align=%d sum=0x%04x "
10233 "[expected=0x%04x]\n", __func__
,
10234 len
, i
, sum
, sumr
);
10236 } else if (sum
!= sumr
) {
10237 panic_plain("\n%s: broken m_sum16() for len=%d "
10238 "align=%d sum=0x%04x [expected=0x%04x]\n",
10239 __func__
, len
, i
, sum
, sumr
);
10243 /* Alignment test by offset (fixed data pointer) */
10244 m
->m_data
= (caddr_t
)buf
;
10245 m
->m_len
= i
+ len
;
10246 sum
= m_sum16(m
, i
, len
);
10248 /* Something is horribly broken; stop now */
10250 panic_plain("\n%s: broken m_sum16() for len=%d "
10251 "offset=%d sum=0x%04x [expected=0x%04x]\n",
10252 __func__
, len
, i
, sum
, sumr
);
10256 /* Simple sum16 contiguous buffer test by aligment */
10257 sum
= b_sum16(c
, len
);
10259 /* Something is horribly broken; stop now */
10261 panic_plain("\n%s: broken b_sum16() for len=%d "
10262 "align=%d sum=0x%04x [expected=0x%04x]\n",
10263 __func__
, len
, i
, sum
, sumr
);
10271 kprintf("PASSED\n");
10273 #endif /* DEBUG || DEVELOPMENT */
10275 #define CASE_STRINGIFY(x) case x: return #x
10277 __private_extern__
const char *
10278 dlil_kev_dl_code_str(u_int32_t event_code
)
10280 switch (event_code
) {
10281 CASE_STRINGIFY(KEV_DL_SIFFLAGS
);
10282 CASE_STRINGIFY(KEV_DL_SIFMETRICS
);
10283 CASE_STRINGIFY(KEV_DL_SIFMTU
);
10284 CASE_STRINGIFY(KEV_DL_SIFPHYS
);
10285 CASE_STRINGIFY(KEV_DL_SIFMEDIA
);
10286 CASE_STRINGIFY(KEV_DL_SIFGENERIC
);
10287 CASE_STRINGIFY(KEV_DL_ADDMULTI
);
10288 CASE_STRINGIFY(KEV_DL_DELMULTI
);
10289 CASE_STRINGIFY(KEV_DL_IF_ATTACHED
);
10290 CASE_STRINGIFY(KEV_DL_IF_DETACHING
);
10291 CASE_STRINGIFY(KEV_DL_IF_DETACHED
);
10292 CASE_STRINGIFY(KEV_DL_LINK_OFF
);
10293 CASE_STRINGIFY(KEV_DL_LINK_ON
);
10294 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED
);
10295 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED
);
10296 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED
);
10297 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED
);
10298 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT
);
10299 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED
);
10300 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED
);
10301 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE
);
10302 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE
);
10303 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED
);
10304 CASE_STRINGIFY(KEV_DL_ISSUES
);
10305 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED
);
10313 dlil_dt_tcall_fn(thread_call_param_t arg0
, thread_call_param_t arg1
)
10315 #pragma unused(arg1)
10316 struct ifnet
*ifp
= arg0
;
10318 if (ifnet_is_attached(ifp
, 1)) {
10319 nstat_ifnet_threshold_reached(ifp
->if_index
);
10320 ifnet_decr_iorefcnt(ifp
);
10325 ifnet_notify_data_threshold(struct ifnet
*ifp
)
10327 uint64_t bytes
= (ifp
->if_ibytes
+ ifp
->if_obytes
);
10328 uint64_t oldbytes
= ifp
->if_dt_bytes
;
10330 ASSERT(ifp
->if_dt_tcall
!= NULL
);
10333 * If we went over the threshold, notify NetworkStatistics.
10334 * We rate-limit it based on the threshold interval value.
10336 if (threshold_notify
&& (bytes
- oldbytes
) > ifp
->if_data_threshold
&&
10337 OSCompareAndSwap64(oldbytes
, bytes
, &ifp
->if_dt_bytes
) &&
10338 !thread_call_isactive(ifp
->if_dt_tcall
)) {
10339 uint64_t tival
= (threshold_interval
* NSEC_PER_SEC
);
10340 uint64_t now
= mach_absolute_time(), deadline
= now
;
10344 nanoseconds_to_absolutetime(tival
, &ival
);
10345 clock_deadline_for_periodic_event(ival
, now
, &deadline
);
10346 (void) thread_call_enter_delayed(ifp
->if_dt_tcall
,
10349 (void) thread_call_enter(ifp
->if_dt_tcall
);
10354 #if (DEVELOPMENT || DEBUG)
10356 * The sysctl variable name contains the input parameters of
10357 * ifnet_get_keepalive_offload_frames()
10358 * ifp (interface index): name[0]
10359 * frames_array_count: name[1]
10360 * frame_data_offset: name[2]
10361 * The return length gives used_frames_count
10364 sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
10366 #pragma unused(oidp)
10367 int *name
= (int *)arg1
;
10368 u_int namelen
= arg2
;
10370 ifnet_t ifp
= NULL
;
10371 u_int32_t frames_array_count
;
10372 size_t frame_data_offset
;
10373 u_int32_t used_frames_count
;
10374 struct ifnet_keepalive_offload_frame
*frames_array
= NULL
;
10379 * Only root can get look at other people TCP frames
10381 error
= proc_suser(current_proc());
10386 * Validate the input parameters
10388 if (req
->newptr
!= USER_ADDR_NULL
) {
10392 if (namelen
!= 3) {
10396 if (req
->oldptr
== USER_ADDR_NULL
) {
10400 if (req
->oldlen
== 0) {
10405 frames_array_count
= name
[1];
10406 frame_data_offset
= name
[2];
10408 /* Make sure the passed buffer is large enough */
10409 if (frames_array_count
* sizeof(struct ifnet_keepalive_offload_frame
) >
10415 ifnet_head_lock_shared();
10416 if (!IF_INDEX_IN_RANGE(idx
)) {
10421 ifp
= ifindex2ifnet
[idx
];
10424 frames_array
= _MALLOC(frames_array_count
*
10425 sizeof(struct ifnet_keepalive_offload_frame
), M_TEMP
, M_WAITOK
);
10426 if (frames_array
== NULL
) {
10431 error
= ifnet_get_keepalive_offload_frames(ifp
, frames_array
,
10432 frames_array_count
, frame_data_offset
, &used_frames_count
);
10434 DLIL_PRINTF("%s: ifnet_get_keepalive_offload_frames error %d\n",
10439 for (i
= 0; i
< used_frames_count
; i
++) {
10440 error
= SYSCTL_OUT(req
, frames_array
+ i
,
10441 sizeof(struct ifnet_keepalive_offload_frame
));
10447 if (frames_array
!= NULL
) {
10448 _FREE(frames_array
, M_TEMP
);
10452 #endif /* DEVELOPMENT || DEBUG */
10455 ifnet_update_stats_per_flow(struct ifnet_stats_per_flow
*ifs
,
10458 tcp_update_stats_per_flow(ifs
, ifp
);