2 * Copyright (c) 1999-2020 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
42 #include <sys/socket.h>
43 #include <sys/domain.h>
45 #include <sys/random.h>
46 #include <sys/socketvar.h>
47 #include <net/if_dl.h>
49 #include <net/route.h>
50 #include <net/if_var.h>
52 #include <net/if_arp.h>
53 #include <net/iptap.h>
54 #include <net/pktap.h>
55 #include <sys/kern_event.h>
56 #include <sys/kdebug.h>
57 #include <sys/mcache.h>
58 #include <sys/syslog.h>
59 #include <sys/protosw.h>
62 #include <kern/assert.h>
63 #include <kern/task.h>
64 #include <kern/thread.h>
65 #include <kern/sched_prim.h>
66 #include <kern/locks.h>
67 #include <kern/zalloc.h>
69 #include <net/kpi_protocol.h>
70 #include <net/if_types.h>
71 #include <net/if_ipsec.h>
72 #include <net/if_llreach.h>
73 #include <net/if_utun.h>
74 #include <net/kpi_interfacefilter.h>
75 #include <net/classq/classq.h>
76 #include <net/classq/classq_sfb.h>
77 #include <net/flowhash.h>
78 #include <net/ntstat.h>
79 #include <net/if_llatbl.h>
80 #include <net/net_api_stats.h>
81 #include <net/if_ports_used.h>
82 #include <net/if_vlan_var.h>
83 #include <netinet/in.h>
85 #include <netinet/in_var.h>
86 #include <netinet/igmp_var.h>
87 #include <netinet/ip_var.h>
88 #include <netinet/tcp.h>
89 #include <netinet/tcp_var.h>
90 #include <netinet/udp.h>
91 #include <netinet/udp_var.h>
92 #include <netinet/if_ether.h>
93 #include <netinet/in_pcb.h>
94 #include <netinet/in_tclass.h>
95 #include <netinet/ip.h>
96 #include <netinet/ip_icmp.h>
97 #include <netinet/icmp_var.h>
100 #include <net/nat464_utils.h>
101 #include <netinet6/in6_var.h>
102 #include <netinet6/nd6.h>
103 #include <netinet6/mld6_var.h>
104 #include <netinet6/scope6_var.h>
105 #include <netinet/ip6.h>
106 #include <netinet/icmp6.h>
107 #include <net/pf_pbuf.h>
108 #include <libkern/OSAtomic.h>
109 #include <libkern/tree.h>
111 #include <dev/random/randomdev.h>
112 #include <machine/machine_routines.h>
114 #include <mach/thread_act.h>
115 #include <mach/sdt.h>
118 #include <sys/kauth.h>
119 #include <security/mac_framework.h>
120 #include <net/ethernet.h>
121 #include <net/firewire.h>
125 #include <net/pfvar.h>
127 #include <net/pktsched/pktsched.h>
128 #include <net/pktsched/pktsched_netem.h>
131 #include <net/necp.h>
137 #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
138 #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
139 #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
140 #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
141 #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
143 #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
144 #define MAX_LINKADDR 4 /* LONGWORDS */
145 #define M_NKE M_IFADDR
148 #define DLIL_PRINTF printf
150 #define DLIL_PRINTF kprintf
153 #define IF_DATA_REQUIRE_ALIGNED_64(f) \
154 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
156 #define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
157 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
165 * List of if_proto structures in if_proto_hash[] is protected by
166 * the ifnet lock. The rest of the fields are initialized at protocol
167 * attach time and never change, thus no lock required as long as
168 * a reference to it is valid, via if_proto_ref().
171 SLIST_ENTRY(if_proto
) next_hash
;
175 protocol_family_t protocol_family
;
179 proto_media_input input
;
180 proto_media_preout pre_output
;
181 proto_media_event event
;
182 proto_media_ioctl ioctl
;
183 proto_media_detached detached
;
184 proto_media_resolve_multi resolve_multi
;
185 proto_media_send_arp send_arp
;
188 proto_media_input_v2 input
;
189 proto_media_preout pre_output
;
190 proto_media_event event
;
191 proto_media_ioctl ioctl
;
192 proto_media_detached detached
;
193 proto_media_resolve_multi resolve_multi
;
194 proto_media_send_arp send_arp
;
199 SLIST_HEAD(proto_hash_entry
, if_proto
);
201 #define DLIL_SDLDATALEN \
202 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
205 struct ifnet dl_if
; /* public ifnet */
207 * DLIL private fields, protected by dl_if_lock
209 decl_lck_mtx_data(, dl_if_lock
);
210 TAILQ_ENTRY(dlil_ifnet
) dl_if_link
; /* dlil_ifnet link */
211 u_int32_t dl_if_flags
; /* flags (below) */
212 u_int32_t dl_if_refcnt
; /* refcnt */
213 void (*dl_if_trace
)(struct dlil_ifnet
*, int); /* ref trace callback */
214 void *dl_if_uniqueid
; /* unique interface id */
215 size_t dl_if_uniqueid_len
; /* length of the unique id */
216 char dl_if_namestorage
[IFNAMSIZ
]; /* interface name storage */
217 char dl_if_xnamestorage
[IFXNAMSIZ
]; /* external name storage */
219 struct ifaddr ifa
; /* lladdr ifa */
220 u_int8_t asdl
[DLIL_SDLMAXLEN
]; /* addr storage */
221 u_int8_t msdl
[DLIL_SDLMAXLEN
]; /* mask storage */
223 u_int8_t dl_if_descstorage
[IF_DESCSIZE
]; /* desc storage */
224 u_int8_t dl_if_permanent_ether
[ETHER_ADDR_LEN
]; /* permanent address */
225 u_int8_t dl_if_permanent_ether_is_set
;
226 u_int8_t dl_if_unused
;
227 struct dlil_threading_info dl_if_inpstorage
; /* input thread storage */
228 ctrace_t dl_if_attach
; /* attach PC stacktrace */
229 ctrace_t dl_if_detach
; /* detach PC stacktrace */
232 /* Values for dl_if_flags (private to DLIL) */
233 #define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
234 #define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
235 #define DLIF_DEBUG 0x4 /* has debugging info */
237 #define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
240 __private_extern__
unsigned int if_ref_trace_hist_size
= IF_REF_TRACE_HIST_SIZE
;
242 struct dlil_ifnet_dbg
{
243 struct dlil_ifnet dldbg_dlif
; /* dlil_ifnet */
244 u_int16_t dldbg_if_refhold_cnt
; /* # ifnet references */
245 u_int16_t dldbg_if_refrele_cnt
; /* # ifnet releases */
247 * Circular lists of ifnet_{reference,release} callers.
249 ctrace_t dldbg_if_refhold
[IF_REF_TRACE_HIST_SIZE
];
250 ctrace_t dldbg_if_refrele
[IF_REF_TRACE_HIST_SIZE
];
253 #define DLIL_TO_IFP(s) (&s->dl_if)
254 #define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
256 struct ifnet_filter
{
257 TAILQ_ENTRY(ifnet_filter
) filt_next
;
259 u_int32_t filt_flags
;
261 const char *filt_name
;
263 protocol_family_t filt_protocol
;
264 iff_input_func filt_input
;
265 iff_output_func filt_output
;
266 iff_event_func filt_event
;
267 iff_ioctl_func filt_ioctl
;
268 iff_detached_func filt_detached
;
271 struct proto_input_entry
;
273 static TAILQ_HEAD(, dlil_ifnet
) dlil_ifnet_head
;
274 static lck_grp_t
*dlil_lock_group
;
275 lck_grp_t
*ifnet_lock_group
;
276 static lck_grp_t
*ifnet_head_lock_group
;
277 static lck_grp_t
*ifnet_snd_lock_group
;
278 static lck_grp_t
*ifnet_rcv_lock_group
;
279 lck_attr_t
*ifnet_lock_attr
;
280 decl_lck_rw_data(static, ifnet_head_lock
);
281 decl_lck_mtx_data(static, dlil_ifnet_lock
);
282 u_int32_t dlil_filter_disable_tso_count
= 0;
285 static unsigned int ifnet_debug
= 1; /* debugging (enabled) */
287 static unsigned int ifnet_debug
; /* debugging (disabled) */
289 static unsigned int dlif_size
; /* size of dlil_ifnet to allocate */
290 static unsigned int dlif_bufsize
; /* size of dlif_size + headroom */
291 static struct zone
*dlif_zone
; /* zone for dlil_ifnet */
292 #define DLIF_ZONE_NAME "ifnet" /* zone name */
294 static ZONE_DECLARE(dlif_filt_zone
, "ifnet_filter",
295 sizeof(struct ifnet_filter
), ZC_ZFREE_CLEARMEM
);
297 static ZONE_DECLARE(dlif_phash_zone
, "ifnet_proto_hash",
298 sizeof(struct proto_hash_entry
) * PROTO_HASH_SLOTS
, ZC_ZFREE_CLEARMEM
);
300 static ZONE_DECLARE(dlif_proto_zone
, "ifnet_proto",
301 sizeof(struct if_proto
), ZC_ZFREE_CLEARMEM
);
303 static unsigned int dlif_tcpstat_size
; /* size of tcpstat_local to allocate */
304 static unsigned int dlif_tcpstat_bufsize
; /* size of dlif_tcpstat_size + headroom */
305 static struct zone
*dlif_tcpstat_zone
; /* zone for tcpstat_local */
306 #define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
308 static unsigned int dlif_udpstat_size
; /* size of udpstat_local to allocate */
309 static unsigned int dlif_udpstat_bufsize
; /* size of dlif_udpstat_size + headroom */
310 static struct zone
*dlif_udpstat_zone
; /* zone for udpstat_local */
311 #define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
313 static u_int32_t net_rtref
;
315 static struct dlil_main_threading_info dlil_main_input_thread_info
;
316 __private_extern__
struct dlil_threading_info
*dlil_main_input_thread
=
317 (struct dlil_threading_info
*)&dlil_main_input_thread_info
;
319 static int dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*msg
, bool update_generation
);
320 static int dlil_detach_filter_internal(interface_filter_t filter
, int detached
);
321 static void dlil_if_trace(struct dlil_ifnet
*, int);
322 static void if_proto_ref(struct if_proto
*);
323 static void if_proto_free(struct if_proto
*);
324 static struct if_proto
*find_attached_proto(struct ifnet
*, u_int32_t
);
325 static u_int32_t
dlil_ifp_protolist(struct ifnet
*ifp
, protocol_family_t
*list
,
326 u_int32_t list_count
);
327 static void if_flt_monitor_busy(struct ifnet
*);
328 static void if_flt_monitor_unbusy(struct ifnet
*);
329 static void if_flt_monitor_enter(struct ifnet
*);
330 static void if_flt_monitor_leave(struct ifnet
*);
331 static int dlil_interface_filters_input(struct ifnet
*, struct mbuf
**,
332 char **, protocol_family_t
);
333 static int dlil_interface_filters_output(struct ifnet
*, struct mbuf
**,
335 static struct ifaddr
*dlil_alloc_lladdr(struct ifnet
*,
336 const struct sockaddr_dl
*);
337 static int ifnet_lookup(struct ifnet
*);
338 static void if_purgeaddrs(struct ifnet
*);
340 static errno_t
ifproto_media_input_v1(struct ifnet
*, protocol_family_t
,
341 struct mbuf
*, char *);
342 static errno_t
ifproto_media_input_v2(struct ifnet
*, protocol_family_t
,
344 static errno_t
ifproto_media_preout(struct ifnet
*, protocol_family_t
,
345 mbuf_t
*, const struct sockaddr
*, void *, char *, char *);
346 static void ifproto_media_event(struct ifnet
*, protocol_family_t
,
347 const struct kev_msg
*);
348 static errno_t
ifproto_media_ioctl(struct ifnet
*, protocol_family_t
,
349 unsigned long, void *);
350 static errno_t
ifproto_media_resolve_multi(ifnet_t
, const struct sockaddr
*,
351 struct sockaddr_dl
*, size_t);
352 static errno_t
ifproto_media_send_arp(struct ifnet
*, u_short
,
353 const struct sockaddr_dl
*, const struct sockaddr
*,
354 const struct sockaddr_dl
*, const struct sockaddr
*);
356 static errno_t
ifp_if_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
357 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
358 boolean_t poll
, struct thread
*tp
);
359 static void ifp_if_input_poll(struct ifnet
*, u_int32_t
, u_int32_t
,
360 struct mbuf
**, struct mbuf
**, u_int32_t
*, u_int32_t
*);
361 static errno_t
ifp_if_ctl(struct ifnet
*, ifnet_ctl_cmd_t
, u_int32_t
, void *);
362 static errno_t
ifp_if_demux(struct ifnet
*, struct mbuf
*, char *,
363 protocol_family_t
*);
364 static errno_t
ifp_if_add_proto(struct ifnet
*, protocol_family_t
,
365 const struct ifnet_demux_desc
*, u_int32_t
);
366 static errno_t
ifp_if_del_proto(struct ifnet
*, protocol_family_t
);
367 static errno_t
ifp_if_check_multi(struct ifnet
*, const struct sockaddr
*);
368 #if !XNU_TARGET_OS_OSX
369 static errno_t
ifp_if_framer(struct ifnet
*, struct mbuf
**,
370 const struct sockaddr
*, const char *, const char *,
371 u_int32_t
*, u_int32_t
*);
372 #else /* XNU_TARGET_OS_OSX */
373 static errno_t
ifp_if_framer(struct ifnet
*, struct mbuf
**,
374 const struct sockaddr
*, const char *, const char *);
375 #endif /* XNU_TARGET_OS_OSX */
376 static errno_t
ifp_if_framer_extended(struct ifnet
*, struct mbuf
**,
377 const struct sockaddr
*, const char *, const char *,
378 u_int32_t
*, u_int32_t
*);
379 static errno_t
ifp_if_set_bpf_tap(struct ifnet
*, bpf_tap_mode
, bpf_packet_func
);
380 static void ifp_if_free(struct ifnet
*);
381 static void ifp_if_event(struct ifnet
*, const struct kev_msg
*);
382 static __inline
void ifp_inc_traffic_class_in(struct ifnet
*, struct mbuf
*);
383 static __inline
void ifp_inc_traffic_class_out(struct ifnet
*, struct mbuf
*);
385 static errno_t
dlil_input_async(struct dlil_threading_info
*, struct ifnet
*,
386 struct mbuf
*, struct mbuf
*, const struct ifnet_stat_increment_param
*,
387 boolean_t
, struct thread
*);
388 static errno_t
dlil_input_sync(struct dlil_threading_info
*, struct ifnet
*,
389 struct mbuf
*, struct mbuf
*, const struct ifnet_stat_increment_param
*,
390 boolean_t
, struct thread
*);
392 static void dlil_main_input_thread_func(void *, wait_result_t
);
393 static void dlil_main_input_thread_cont(void *, wait_result_t
);
395 static void dlil_input_thread_func(void *, wait_result_t
);
396 static void dlil_input_thread_cont(void *, wait_result_t
);
398 static void dlil_rxpoll_input_thread_func(void *, wait_result_t
);
399 static void dlil_rxpoll_input_thread_cont(void *, wait_result_t
);
401 static int dlil_create_input_thread(ifnet_t
, struct dlil_threading_info
*,
402 thread_continue_t
*);
403 static void dlil_terminate_input_thread(struct dlil_threading_info
*);
404 static void dlil_input_stats_add(const struct ifnet_stat_increment_param
*,
405 struct dlil_threading_info
*, struct ifnet
*, boolean_t
);
406 static boolean_t
dlil_input_stats_sync(struct ifnet
*,
407 struct dlil_threading_info
*);
408 static void dlil_input_packet_list_common(struct ifnet
*, struct mbuf
*,
409 u_int32_t
, ifnet_model_t
, boolean_t
);
410 static errno_t
ifnet_input_common(struct ifnet
*, struct mbuf
*, struct mbuf
*,
411 const struct ifnet_stat_increment_param
*, boolean_t
, boolean_t
);
412 static int dlil_is_clat_needed(protocol_family_t
, mbuf_t
);
413 static errno_t
dlil_clat46(ifnet_t
, protocol_family_t
*, mbuf_t
*);
414 static errno_t
dlil_clat64(ifnet_t
, protocol_family_t
*, mbuf_t
*);
415 #if DEBUG || DEVELOPMENT
416 static void dlil_verify_sum16(void);
417 #endif /* DEBUG || DEVELOPMENT */
418 static void dlil_output_cksum_dbg(struct ifnet
*, struct mbuf
*, uint32_t,
420 static void dlil_input_cksum_dbg(struct ifnet
*, struct mbuf
*, char *,
423 static void dlil_incr_pending_thread_count(void);
424 static void dlil_decr_pending_thread_count(void);
426 static void ifnet_detacher_thread_func(void *, wait_result_t
);
427 static void ifnet_detacher_thread_cont(void *, wait_result_t
);
428 static void ifnet_detach_final(struct ifnet
*);
429 static void ifnet_detaching_enqueue(struct ifnet
*);
430 static struct ifnet
*ifnet_detaching_dequeue(void);
432 static void ifnet_start_thread_func(void *, wait_result_t
);
433 static void ifnet_start_thread_cont(void *, wait_result_t
);
435 static void ifnet_poll_thread_func(void *, wait_result_t
);
436 static void ifnet_poll_thread_cont(void *, wait_result_t
);
438 static errno_t
ifnet_enqueue_common(struct ifnet
*, classq_pkt_t
*,
439 boolean_t
, boolean_t
*);
441 static void ifp_src_route_copyout(struct ifnet
*, struct route
*);
442 static void ifp_src_route_copyin(struct ifnet
*, struct route
*);
443 static void ifp_src_route6_copyout(struct ifnet
*, struct route_in6
*);
444 static void ifp_src_route6_copyin(struct ifnet
*, struct route_in6
*);
446 static int sysctl_rxpoll SYSCTL_HANDLER_ARGS
;
447 static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
;
448 static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
;
449 static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
;
450 static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
;
451 static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
;
452 static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
;
453 static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
;
454 static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
;
455 static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
;
456 static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
;
458 struct chain_len_stats tx_chain_len_stats
;
459 static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
;
461 #if TEST_INPUT_THREAD_TERMINATION
462 static int sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
;
463 #endif /* TEST_INPUT_THREAD_TERMINATION */
465 /* The following are protected by dlil_ifnet_lock */
466 static TAILQ_HEAD(, ifnet
) ifnet_detaching_head
;
467 static u_int32_t ifnet_detaching_cnt
;
468 static boolean_t ifnet_detaching_embryonic
;
469 static void *ifnet_delayed_run
; /* wait channel for detaching thread */
471 decl_lck_mtx_data(static, ifnet_fc_lock
);
473 static uint32_t ifnet_flowhash_seed
;
475 struct ifnet_flowhash_key
{
476 char ifk_name
[IFNAMSIZ
];
480 uint32_t ifk_capabilities
;
481 uint32_t ifk_capenable
;
482 uint32_t ifk_output_sched_model
;
487 /* Flow control entry per interface */
488 struct ifnet_fc_entry
{
489 RB_ENTRY(ifnet_fc_entry
) ifce_entry
;
490 u_int32_t ifce_flowhash
;
491 struct ifnet
*ifce_ifp
;
494 static uint32_t ifnet_calc_flowhash(struct ifnet
*);
495 static int ifce_cmp(const struct ifnet_fc_entry
*,
496 const struct ifnet_fc_entry
*);
497 static int ifnet_fc_add(struct ifnet
*);
498 static struct ifnet_fc_entry
*ifnet_fc_get(u_int32_t
);
499 static void ifnet_fc_entry_free(struct ifnet_fc_entry
*);
501 /* protected by ifnet_fc_lock */
502 RB_HEAD(ifnet_fc_tree
, ifnet_fc_entry
) ifnet_fc_tree
;
503 RB_PROTOTYPE(ifnet_fc_tree
, ifnet_fc_entry
, ifce_entry
, ifce_cmp
);
504 RB_GENERATE(ifnet_fc_tree
, ifnet_fc_entry
, ifce_entry
, ifce_cmp
);
506 static ZONE_DECLARE(ifnet_fc_zone
, "ifnet_fc_zone",
507 sizeof(struct ifnet_fc_entry
), ZC_ZFREE_CLEARMEM
);
509 extern void bpfdetach(struct ifnet
*);
510 extern void proto_input_run(void);
512 extern uint32_t udp_count_opportunistic(unsigned int ifindex
,
514 extern uint32_t tcp_count_opportunistic(unsigned int ifindex
,
517 __private_extern__
void link_rtrequest(int, struct rtentry
*, struct sockaddr
*);
520 #if !XNU_TARGET_OS_OSX
521 int dlil_lladdr_ckreq
= 1;
522 #else /* XNU_TARGET_OS_OSX */
523 int dlil_lladdr_ckreq
= 0;
524 #endif /* XNU_TARGET_OS_OSX */
525 #endif /* CONFIG_MACF */
528 int dlil_verbose
= 1;
530 int dlil_verbose
= 0;
532 #if IFNET_INPUT_SANITY_CHK
533 /* sanity checking of input packet lists received */
534 static u_int32_t dlil_input_sanity_check
= 0;
535 #endif /* IFNET_INPUT_SANITY_CHK */
536 /* rate limit debug messages */
537 struct timespec dlil_dbgrate
= { .tv_sec
= 1, .tv_nsec
= 0 };
539 SYSCTL_DECL(_net_link_generic_system
);
541 SYSCTL_INT(_net_link_generic_system
, OID_AUTO
, dlil_verbose
,
542 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_verbose
, 0, "Log DLIL error messages");
544 #define IF_SNDQ_MINLEN 32
545 u_int32_t if_sndq_maxlen
= IFQ_MAXLEN
;
546 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, sndq_maxlen
,
547 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_sndq_maxlen
, IFQ_MAXLEN
,
548 sysctl_sndq_maxlen
, "I", "Default transmit queue max length");
550 #define IF_RCVQ_MINLEN 32
551 #define IF_RCVQ_MAXLEN 256
552 u_int32_t if_rcvq_maxlen
= IF_RCVQ_MAXLEN
;
553 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rcvq_maxlen
,
554 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rcvq_maxlen
, IFQ_MAXLEN
,
555 sysctl_rcvq_maxlen
, "I", "Default receive queue max length");
557 #define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
558 u_int32_t if_rxpoll_decay
= IF_RXPOLL_DECAY
;
559 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_decay
,
560 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_decay
, IF_RXPOLL_DECAY
,
561 "ilog2 of EWMA decay rate of avg inbound packets");
563 #define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
564 #define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
565 static u_int64_t if_rxpoll_mode_holdtime
= IF_RXPOLL_MODE_HOLDTIME
;
566 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_freeze_time
,
567 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_mode_holdtime
,
568 IF_RXPOLL_MODE_HOLDTIME
, sysctl_rxpoll_mode_holdtime
,
569 "Q", "input poll mode freeze time");
571 #define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
572 #define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
573 static u_int64_t if_rxpoll_sample_holdtime
= IF_RXPOLL_SAMPLETIME
;
574 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_sample_time
,
575 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_sample_holdtime
,
576 IF_RXPOLL_SAMPLETIME
, sysctl_rxpoll_sample_holdtime
,
577 "Q", "input poll sampling time");
579 static u_int64_t if_rxpoll_interval_time
= IF_RXPOLL_INTERVALTIME
;
580 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_interval_time
,
581 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_interval_time
,
582 IF_RXPOLL_INTERVALTIME
, sysctl_rxpoll_interval_time
,
583 "Q", "input poll interval (time)");
585 #define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
586 u_int32_t if_rxpoll_interval_pkts
= IF_RXPOLL_INTERVAL_PKTS
;
587 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_interval_pkts
,
588 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_interval_pkts
,
589 IF_RXPOLL_INTERVAL_PKTS
, "input poll interval (packets)");
591 #define IF_RXPOLL_WLOWAT 10
592 static u_int32_t if_sysctl_rxpoll_wlowat
= IF_RXPOLL_WLOWAT
;
593 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_wakeups_lowat
,
594 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_sysctl_rxpoll_wlowat
,
595 IF_RXPOLL_WLOWAT
, sysctl_rxpoll_wlowat
,
596 "I", "input poll wakeup low watermark");
598 #define IF_RXPOLL_WHIWAT 100
599 static u_int32_t if_sysctl_rxpoll_whiwat
= IF_RXPOLL_WHIWAT
;
600 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_wakeups_hiwat
,
601 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_sysctl_rxpoll_whiwat
,
602 IF_RXPOLL_WHIWAT
, sysctl_rxpoll_whiwat
,
603 "I", "input poll wakeup high watermark");
605 static u_int32_t if_rxpoll_max
= 0; /* 0 (automatic) */
606 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_max
,
607 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_max
, 0,
608 "max packets per poll call");
610 u_int32_t if_rxpoll
= 1;
611 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll
,
612 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll
, 0,
613 sysctl_rxpoll
, "I", "enable opportunistic input polling");
615 #if TEST_INPUT_THREAD_TERMINATION
616 static u_int32_t if_input_thread_termination_spin
= 0;
617 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, input_thread_termination_spin
,
618 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
619 &if_input_thread_termination_spin
, 0,
620 sysctl_input_thread_termination_spin
,
621 "I", "input thread termination spin limit");
622 #endif /* TEST_INPUT_THREAD_TERMINATION */
624 static u_int32_t cur_dlil_input_threads
= 0;
625 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, dlil_input_threads
,
626 CTLFLAG_RD
| CTLFLAG_LOCKED
, &cur_dlil_input_threads
, 0,
627 "Current number of DLIL input threads");
629 #if IFNET_INPUT_SANITY_CHK
630 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, dlil_input_sanity_check
,
631 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_input_sanity_check
, 0,
632 "Turn on sanity checking in DLIL input");
633 #endif /* IFNET_INPUT_SANITY_CHK */
635 static u_int32_t if_flowadv
= 1;
636 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, flow_advisory
,
637 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_flowadv
, 1,
638 "enable flow-advisory mechanism");
640 static u_int32_t if_delaybased_queue
= 1;
641 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, delaybased_queue
,
642 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_delaybased_queue
, 1,
643 "enable delay based dynamic queue sizing");
645 static uint64_t hwcksum_in_invalidated
= 0;
646 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
647 hwcksum_in_invalidated
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
648 &hwcksum_in_invalidated
, "inbound packets with invalidated hardware cksum");
650 uint32_t hwcksum_dbg
= 0;
651 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg
,
652 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg
, 0,
653 "enable hardware cksum debugging");
655 u_int32_t ifnet_start_delayed
= 0;
656 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, start_delayed
,
657 CTLFLAG_RW
| CTLFLAG_LOCKED
, &ifnet_start_delayed
, 0,
658 "number of times start was delayed");
660 u_int32_t ifnet_delay_start_disabled
= 0;
661 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, start_delay_disabled
,
662 CTLFLAG_RW
| CTLFLAG_LOCKED
, &ifnet_delay_start_disabled
, 0,
663 "number of times start was delayed");
666 ifnet_delay_start_disabled_increment(void)
668 OSIncrementAtomic(&ifnet_delay_start_disabled
);
671 #define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
672 #define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
673 #define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
674 #define HWCKSUM_DBG_MASK \
675 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
676 HWCKSUM_DBG_FINALIZE_FORCED)
678 static uint32_t hwcksum_dbg_mode
= 0;
679 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg_mode
,
680 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg_mode
,
681 0, sysctl_hwcksum_dbg_mode
, "I", "hardware cksum debugging mode");
683 static uint64_t hwcksum_dbg_partial_forced
= 0;
684 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
685 hwcksum_dbg_partial_forced
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
686 &hwcksum_dbg_partial_forced
, "packets forced using partial cksum");
688 static uint64_t hwcksum_dbg_partial_forced_bytes
= 0;
689 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
690 hwcksum_dbg_partial_forced_bytes
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
691 &hwcksum_dbg_partial_forced_bytes
, "bytes forced using partial cksum");
693 static uint32_t hwcksum_dbg_partial_rxoff_forced
= 0;
694 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
,
695 hwcksum_dbg_partial_rxoff_forced
, CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
696 &hwcksum_dbg_partial_rxoff_forced
, 0,
697 sysctl_hwcksum_dbg_partial_rxoff_forced
, "I",
698 "forced partial cksum rx offset");
700 static uint32_t hwcksum_dbg_partial_rxoff_adj
= 0;
701 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg_partial_rxoff_adj
,
702 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg_partial_rxoff_adj
,
703 0, sysctl_hwcksum_dbg_partial_rxoff_adj
, "I",
704 "adjusted partial cksum rx offset");
706 static uint64_t hwcksum_dbg_verified
= 0;
707 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
708 hwcksum_dbg_verified
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
709 &hwcksum_dbg_verified
, "packets verified for having good checksum");
711 static uint64_t hwcksum_dbg_bad_cksum
= 0;
712 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
713 hwcksum_dbg_bad_cksum
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
714 &hwcksum_dbg_bad_cksum
, "packets with bad hardware calculated checksum");
716 static uint64_t hwcksum_dbg_bad_rxoff
= 0;
717 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
718 hwcksum_dbg_bad_rxoff
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
719 &hwcksum_dbg_bad_rxoff
, "packets with invalid rxoff");
721 static uint64_t hwcksum_dbg_adjusted
= 0;
722 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
723 hwcksum_dbg_adjusted
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
724 &hwcksum_dbg_adjusted
, "packets with rxoff adjusted");
726 static uint64_t hwcksum_dbg_finalized_hdr
= 0;
727 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
728 hwcksum_dbg_finalized_hdr
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
729 &hwcksum_dbg_finalized_hdr
, "finalized headers");
731 static uint64_t hwcksum_dbg_finalized_data
= 0;
732 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
733 hwcksum_dbg_finalized_data
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
734 &hwcksum_dbg_finalized_data
, "finalized payloads");
736 uint32_t hwcksum_tx
= 1;
737 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_tx
,
738 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_tx
, 0,
739 "enable transmit hardware checksum offload");
741 uint32_t hwcksum_rx
= 1;
742 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_rx
,
743 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_rx
, 0,
744 "enable receive hardware checksum offload");
746 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, tx_chain_len_stats
,
747 CTLFLAG_RD
| CTLFLAG_LOCKED
, 0, 9,
748 sysctl_tx_chain_len_stats
, "S", "");
750 uint32_t tx_chain_len_count
= 0;
751 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, tx_chain_len_count
,
752 CTLFLAG_RW
| CTLFLAG_LOCKED
, &tx_chain_len_count
, 0, "");
754 static uint32_t threshold_notify
= 1; /* enable/disable */
755 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, threshold_notify
,
756 CTLFLAG_RW
| CTLFLAG_LOCKED
, &threshold_notify
, 0, "");
758 static uint32_t threshold_interval
= 2; /* in seconds */
759 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, threshold_interval
,
760 CTLFLAG_RW
| CTLFLAG_LOCKED
, &threshold_interval
, 0, "");
762 #if (DEVELOPMENT || DEBUG)
763 static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
;
764 SYSCTL_NODE(_net_link_generic_system
, OID_AUTO
, get_kao_frames
,
765 CTLFLAG_RD
| CTLFLAG_LOCKED
, sysctl_get_kao_frames
, "");
766 #endif /* DEVELOPMENT || DEBUG */
768 struct net_api_stats net_api_stats
;
769 SYSCTL_STRUCT(_net
, OID_AUTO
, api_stats
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
770 &net_api_stats
, net_api_stats
, "");
772 unsigned int net_rxpoll
= 1;
773 unsigned int net_affinity
= 1;
774 unsigned int net_async
= 1; /* 0: synchronous, 1: asynchronous */
776 static kern_return_t
dlil_affinity_set(struct thread
*, u_int32_t
);
778 extern u_int32_t inject_buckets
;
780 static lck_grp_attr_t
*dlil_grp_attributes
= NULL
;
781 static lck_attr_t
*dlil_lck_attributes
= NULL
;
783 /* DLIL data threshold thread call */
784 static void dlil_dt_tcall_fn(thread_call_param_t
, thread_call_param_t
);
787 ifnet_filter_update_tso(boolean_t filter_enable
)
790 * update filter count and route_generation ID to let TCP
791 * know it should reevalute doing TSO or not
793 OSAddAtomic(filter_enable
? 1 : -1, &dlil_filter_disable_tso_count
);
798 #define DLIL_INPUT_CHECK(m, ifp) { \
799 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
800 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
801 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
802 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
807 #define DLIL_EWMA(old, new, decay) do { \
809 if ((_avg = (old)) > 0) \
810 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
816 #define MBPS (1ULL * 1000 * 1000)
817 #define GBPS (MBPS * 1000)
819 struct rxpoll_time_tbl
{
820 u_int64_t speed
; /* downlink speed */
821 u_int32_t plowat
; /* packets low watermark */
822 u_int32_t phiwat
; /* packets high watermark */
823 u_int32_t blowat
; /* bytes low watermark */
824 u_int32_t bhiwat
; /* bytes high watermark */
827 static struct rxpoll_time_tbl rxpoll_tbl
[] = {
828 { .speed
= 10 * MBPS
, .plowat
= 2, .phiwat
= 8, .blowat
= (1 * 1024), .bhiwat
= (6 * 1024) },
829 { .speed
= 100 * MBPS
, .plowat
= 10, .phiwat
= 40, .blowat
= (4 * 1024), .bhiwat
= (64 * 1024) },
830 { .speed
= 1 * GBPS
, .plowat
= 10, .phiwat
= 40, .blowat
= (4 * 1024), .bhiwat
= (64 * 1024) },
831 { .speed
= 10 * GBPS
, .plowat
= 10, .phiwat
= 40, .blowat
= (4 * 1024), .bhiwat
= (64 * 1024) },
832 { .speed
= 100 * GBPS
, .plowat
= 10, .phiwat
= 40, .blowat
= (4 * 1024), .bhiwat
= (64 * 1024) },
833 { .speed
= 0, .plowat
= 0, .phiwat
= 0, .blowat
= 0, .bhiwat
= 0 }
836 decl_lck_mtx_data(static, dlil_thread_sync_lock
);
837 static uint32_t dlil_pending_thread_cnt
= 0;
840 dlil_incr_pending_thread_count(void)
842 LCK_MTX_ASSERT(&dlil_thread_sync_lock
, LCK_MTX_ASSERT_NOTOWNED
);
843 lck_mtx_lock(&dlil_thread_sync_lock
);
844 dlil_pending_thread_cnt
++;
845 lck_mtx_unlock(&dlil_thread_sync_lock
);
849 dlil_decr_pending_thread_count(void)
851 LCK_MTX_ASSERT(&dlil_thread_sync_lock
, LCK_MTX_ASSERT_NOTOWNED
);
852 lck_mtx_lock(&dlil_thread_sync_lock
);
853 VERIFY(dlil_pending_thread_cnt
> 0);
854 dlil_pending_thread_cnt
--;
855 if (dlil_pending_thread_cnt
== 0) {
856 wakeup(&dlil_pending_thread_cnt
);
858 lck_mtx_unlock(&dlil_thread_sync_lock
);
862 proto_hash_value(u_int32_t protocol_family
)
865 * dlil_proto_unplumb_all() depends on the mapping between
866 * the hash bucket index and the protocol family defined
867 * here; future changes must be applied there as well.
869 switch (protocol_family
) {
885 * Caller must already be holding ifnet lock.
887 static struct if_proto
*
888 find_attached_proto(struct ifnet
*ifp
, u_int32_t protocol_family
)
890 struct if_proto
*proto
= NULL
;
891 u_int32_t i
= proto_hash_value(protocol_family
);
893 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_OWNED
);
895 if (ifp
->if_proto_hash
!= NULL
) {
896 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
899 while (proto
!= NULL
&& proto
->protocol_family
!= protocol_family
) {
900 proto
= SLIST_NEXT(proto
, next_hash
);
911 if_proto_ref(struct if_proto
*proto
)
913 atomic_add_32(&proto
->refcount
, 1);
916 extern void if_rtproto_del(struct ifnet
*ifp
, int protocol
);
919 if_proto_free(struct if_proto
*proto
)
922 struct ifnet
*ifp
= proto
->ifp
;
923 u_int32_t proto_family
= proto
->protocol_family
;
924 struct kev_dl_proto_data ev_pr_data
;
926 oldval
= atomic_add_32_ov(&proto
->refcount
, -1);
931 if (proto
->proto_kpi
== kProtoKPI_v1
) {
932 if (proto
->kpi
.v1
.detached
) {
933 proto
->kpi
.v1
.detached(ifp
, proto
->protocol_family
);
936 if (proto
->proto_kpi
== kProtoKPI_v2
) {
937 if (proto
->kpi
.v2
.detached
) {
938 proto
->kpi
.v2
.detached(ifp
, proto
->protocol_family
);
943 * Cleanup routes that may still be in the routing table for that
944 * interface/protocol pair.
946 if_rtproto_del(ifp
, proto_family
);
948 ifnet_lock_shared(ifp
);
950 /* No more reference on this, protocol must have been detached */
951 VERIFY(proto
->detached
);
954 * The reserved field carries the number of protocol still attached
955 * (subject to change)
957 ev_pr_data
.proto_family
= proto_family
;
958 ev_pr_data
.proto_remaining_count
= dlil_ifp_protolist(ifp
, NULL
, 0);
960 ifnet_lock_done(ifp
);
962 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_DETACHED
,
963 (struct net_event_data
*)&ev_pr_data
,
964 sizeof(struct kev_dl_proto_data
));
966 if (ev_pr_data
.proto_remaining_count
== 0) {
968 * The protocol count has gone to zero, mark the interface down.
969 * This used to be done by configd.KernelEventMonitor, but that
970 * is inherently prone to races (rdar://problem/30810208).
972 (void) ifnet_set_flags(ifp
, 0, IFF_UP
);
973 (void) ifnet_ioctl(ifp
, 0, SIOCSIFFLAGS
, NULL
);
974 dlil_post_sifflags_msg(ifp
);
977 zfree(dlif_proto_zone
, proto
);
980 __private_extern__
void
981 ifnet_lock_assert(struct ifnet
*ifp
, ifnet_lock_assert_t what
)
986 unsigned int type
= 0;
990 case IFNET_LCK_ASSERT_EXCLUSIVE
:
991 type
= LCK_RW_ASSERT_EXCLUSIVE
;
994 case IFNET_LCK_ASSERT_SHARED
:
995 type
= LCK_RW_ASSERT_SHARED
;
998 case IFNET_LCK_ASSERT_OWNED
:
999 type
= LCK_RW_ASSERT_HELD
;
1002 case IFNET_LCK_ASSERT_NOTOWNED
:
1003 /* nothing to do here for RW lock; bypass assert */
1008 panic("bad ifnet assert type: %d", what
);
1012 LCK_RW_ASSERT(&ifp
->if_lock
, type
);
1016 __private_extern__
void
1017 ifnet_lock_shared(struct ifnet
*ifp
)
1019 lck_rw_lock_shared(&ifp
->if_lock
);
1022 __private_extern__
void
1023 ifnet_lock_exclusive(struct ifnet
*ifp
)
1025 lck_rw_lock_exclusive(&ifp
->if_lock
);
1028 __private_extern__
void
1029 ifnet_lock_done(struct ifnet
*ifp
)
1031 lck_rw_done(&ifp
->if_lock
);
1035 __private_extern__
void
1036 if_inetdata_lock_shared(struct ifnet
*ifp
)
1038 lck_rw_lock_shared(&ifp
->if_inetdata_lock
);
1041 __private_extern__
void
1042 if_inetdata_lock_exclusive(struct ifnet
*ifp
)
1044 lck_rw_lock_exclusive(&ifp
->if_inetdata_lock
);
1047 __private_extern__
void
1048 if_inetdata_lock_done(struct ifnet
*ifp
)
1050 lck_rw_done(&ifp
->if_inetdata_lock
);
1054 __private_extern__
void
1055 if_inet6data_lock_shared(struct ifnet
*ifp
)
1057 lck_rw_lock_shared(&ifp
->if_inet6data_lock
);
1060 __private_extern__
void
1061 if_inet6data_lock_exclusive(struct ifnet
*ifp
)
1063 lck_rw_lock_exclusive(&ifp
->if_inet6data_lock
);
1066 __private_extern__
void
1067 if_inet6data_lock_done(struct ifnet
*ifp
)
1069 lck_rw_done(&ifp
->if_inet6data_lock
);
1072 __private_extern__
void
1073 ifnet_head_lock_shared(void)
1075 lck_rw_lock_shared(&ifnet_head_lock
);
1078 __private_extern__
void
1079 ifnet_head_lock_exclusive(void)
1081 lck_rw_lock_exclusive(&ifnet_head_lock
);
1084 __private_extern__
void
1085 ifnet_head_done(void)
1087 lck_rw_done(&ifnet_head_lock
);
1090 __private_extern__
void
1091 ifnet_head_assert_exclusive(void)
1093 LCK_RW_ASSERT(&ifnet_head_lock
, LCK_RW_ASSERT_EXCLUSIVE
);
1097 * dlil_ifp_protolist
1098 * - get the list of protocols attached to the interface, or just the number
1099 * of attached protocols
1100 * - if the number returned is greater than 'list_count', truncation occurred
1103 * - caller must already be holding ifnet lock.
1106 dlil_ifp_protolist(struct ifnet
*ifp
, protocol_family_t
*list
,
1107 u_int32_t list_count
)
1109 u_int32_t count
= 0;
1112 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_OWNED
);
1114 if (ifp
->if_proto_hash
== NULL
) {
1118 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
1119 struct if_proto
*proto
;
1120 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
], next_hash
) {
1121 if (list
!= NULL
&& count
< list_count
) {
1122 list
[count
] = proto
->protocol_family
;
1131 __private_extern__ u_int32_t
1132 if_get_protolist(struct ifnet
* ifp
, u_int32_t
*protolist
, u_int32_t count
)
1134 ifnet_lock_shared(ifp
);
1135 count
= dlil_ifp_protolist(ifp
, protolist
, count
);
1136 ifnet_lock_done(ifp
);
1140 __private_extern__
void
1141 if_free_protolist(u_int32_t
*list
)
1143 _FREE(list
, M_TEMP
);
1146 __private_extern__
int
1147 dlil_post_msg(struct ifnet
*ifp
, u_int32_t event_subclass
,
1148 u_int32_t event_code
, struct net_event_data
*event_data
,
1149 u_int32_t event_data_len
)
1151 struct net_event_data ev_data
;
1152 struct kev_msg ev_msg
;
1154 bzero(&ev_msg
, sizeof(ev_msg
));
1155 bzero(&ev_data
, sizeof(ev_data
));
1157 * a net event always starts with a net_event_data structure
1158 * but the caller can generate a simple net event or
1159 * provide a longer event structure to post
1161 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
1162 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
1163 ev_msg
.kev_subclass
= event_subclass
;
1164 ev_msg
.event_code
= event_code
;
1166 if (event_data
== NULL
) {
1167 event_data
= &ev_data
;
1168 event_data_len
= sizeof(struct net_event_data
);
1171 strlcpy(&event_data
->if_name
[0], ifp
->if_name
, IFNAMSIZ
);
1172 event_data
->if_family
= ifp
->if_family
;
1173 event_data
->if_unit
= (u_int32_t
)ifp
->if_unit
;
1175 ev_msg
.dv
[0].data_length
= event_data_len
;
1176 ev_msg
.dv
[0].data_ptr
= event_data
;
1177 ev_msg
.dv
[1].data_length
= 0;
1179 bool update_generation
= true;
1180 if (event_subclass
== KEV_DL_SUBCLASS
) {
1181 /* Don't update interface generation for frequent link quality and state changes */
1182 switch (event_code
) {
1183 case KEV_DL_LINK_QUALITY_METRIC_CHANGED
:
1184 case KEV_DL_RRC_STATE_CHANGED
:
1185 case KEV_DL_NODE_PRESENCE
:
1186 case KEV_DL_NODE_ABSENCE
:
1187 case KEV_DL_MASTER_ELECTED
:
1188 update_generation
= false;
1195 return dlil_event_internal(ifp
, &ev_msg
, update_generation
);
1198 __private_extern__
int
1199 dlil_alloc_local_stats(struct ifnet
*ifp
)
1202 void *buf
, *base
, **pbuf
;
1208 if (ifp
->if_tcp_stat
== NULL
&& ifp
->if_udp_stat
== NULL
) {
1209 /* allocate tcpstat_local structure */
1210 buf
= zalloc_flags(dlif_tcpstat_zone
, Z_WAITOK
| Z_ZERO
);
1216 /* Get the 64-bit aligned base address for this object */
1217 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof(u_int64_t
),
1219 VERIFY(((intptr_t)base
+ dlif_tcpstat_size
) <=
1220 ((intptr_t)buf
+ dlif_tcpstat_bufsize
));
1223 * Wind back a pointer size from the aligned base and
1224 * save the original address so we can free it later.
1226 pbuf
= (void **)((intptr_t)base
- sizeof(void *));
1228 ifp
->if_tcp_stat
= base
;
1230 /* allocate udpstat_local structure */
1231 buf
= zalloc_flags(dlif_udpstat_zone
, Z_WAITOK
| Z_ZERO
);
1237 /* Get the 64-bit aligned base address for this object */
1238 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof(u_int64_t
),
1240 VERIFY(((intptr_t)base
+ dlif_udpstat_size
) <=
1241 ((intptr_t)buf
+ dlif_udpstat_bufsize
));
1244 * Wind back a pointer size from the aligned base and
1245 * save the original address so we can free it later.
1247 pbuf
= (void **)((intptr_t)base
- sizeof(void *));
1249 ifp
->if_udp_stat
= base
;
1251 VERIFY(IS_P2ALIGNED(ifp
->if_tcp_stat
, sizeof(u_int64_t
)) &&
1252 IS_P2ALIGNED(ifp
->if_udp_stat
, sizeof(u_int64_t
)));
1257 if (ifp
->if_ipv4_stat
== NULL
) {
1258 MALLOC(ifp
->if_ipv4_stat
, struct if_tcp_ecn_stat
*,
1259 sizeof(struct if_tcp_ecn_stat
), M_TEMP
, M_WAITOK
| M_ZERO
);
1260 if (ifp
->if_ipv4_stat
== NULL
) {
1266 if (ifp
->if_ipv6_stat
== NULL
) {
1267 MALLOC(ifp
->if_ipv6_stat
, struct if_tcp_ecn_stat
*,
1268 sizeof(struct if_tcp_ecn_stat
), M_TEMP
, M_WAITOK
| M_ZERO
);
1269 if (ifp
->if_ipv6_stat
== NULL
) {
1275 if (ifp
!= NULL
&& ret
!= 0) {
1276 if (ifp
->if_tcp_stat
!= NULL
) {
1278 ((intptr_t)ifp
->if_tcp_stat
- sizeof(void *));
1279 zfree(dlif_tcpstat_zone
, *pbuf
);
1280 ifp
->if_tcp_stat
= NULL
;
1282 if (ifp
->if_udp_stat
!= NULL
) {
1284 ((intptr_t)ifp
->if_udp_stat
- sizeof(void *));
1285 zfree(dlif_udpstat_zone
, *pbuf
);
1286 ifp
->if_udp_stat
= NULL
;
1288 if (ifp
->if_ipv4_stat
!= NULL
) {
1289 FREE(ifp
->if_ipv4_stat
, M_TEMP
);
1290 ifp
->if_ipv4_stat
= NULL
;
1292 if (ifp
->if_ipv6_stat
!= NULL
) {
1293 FREE(ifp
->if_ipv6_stat
, M_TEMP
);
1294 ifp
->if_ipv6_stat
= NULL
;
1302 dlil_reset_rxpoll_params(ifnet_t ifp
)
1304 ASSERT(ifp
!= NULL
);
1305 ifnet_set_poll_cycle(ifp
, NULL
);
1306 ifp
->if_poll_update
= 0;
1307 ifp
->if_poll_flags
= 0;
1308 ifp
->if_poll_req
= 0;
1309 ifp
->if_poll_mode
= IFNET_MODEL_INPUT_POLL_OFF
;
1310 bzero(&ifp
->if_poll_tstats
, sizeof(ifp
->if_poll_tstats
));
1311 bzero(&ifp
->if_poll_pstats
, sizeof(ifp
->if_poll_pstats
));
1312 bzero(&ifp
->if_poll_sstats
, sizeof(ifp
->if_poll_sstats
));
1313 net_timerclear(&ifp
->if_poll_mode_holdtime
);
1314 net_timerclear(&ifp
->if_poll_mode_lasttime
);
1315 net_timerclear(&ifp
->if_poll_sample_holdtime
);
1316 net_timerclear(&ifp
->if_poll_sample_lasttime
);
1317 net_timerclear(&ifp
->if_poll_dbg_lasttime
);
1321 dlil_create_input_thread(ifnet_t ifp
, struct dlil_threading_info
*inp
,
1322 thread_continue_t
*thfunc
)
1324 boolean_t dlil_rxpoll_input
;
1325 thread_continue_t func
= NULL
;
1329 dlil_rxpoll_input
= (ifp
!= NULL
&& net_rxpoll
&&
1330 (ifp
->if_eflags
& IFEF_RXPOLL
) && (ifp
->if_xflags
& IFXF_LEGACY
));
1332 /* default strategy utilizes the DLIL worker thread */
1333 inp
->dlth_strategy
= dlil_input_async
;
1335 /* NULL ifp indicates the main input thread, called at dlil_init time */
1338 * Main input thread only.
1340 func
= dlil_main_input_thread_func
;
1341 VERIFY(inp
== dlil_main_input_thread
);
1342 (void) strlcat(inp
->dlth_name
,
1343 "main_input", DLIL_THREADNAME_LEN
);
1344 } else if (dlil_rxpoll_input
) {
1346 * Legacy (non-netif) hybrid polling.
1348 func
= dlil_rxpoll_input_thread_func
;
1349 VERIFY(inp
!= dlil_main_input_thread
);
1350 (void) snprintf(inp
->dlth_name
, DLIL_THREADNAME_LEN
,
1351 "%s_input_poll", if_name(ifp
));
1352 } else if (net_async
|| (ifp
->if_xflags
& IFXF_LEGACY
)) {
1354 * Asynchronous strategy.
1356 func
= dlil_input_thread_func
;
1357 VERIFY(inp
!= dlil_main_input_thread
);
1358 (void) snprintf(inp
->dlth_name
, DLIL_THREADNAME_LEN
,
1359 "%s_input", if_name(ifp
));
1362 * Synchronous strategy if there's a netif below and
1363 * the device isn't capable of hybrid polling.
1365 ASSERT(func
== NULL
);
1366 ASSERT(!(ifp
->if_xflags
& IFXF_LEGACY
));
1367 VERIFY(inp
!= dlil_main_input_thread
);
1368 ASSERT(!inp
->dlth_affinity
);
1369 inp
->dlth_strategy
= dlil_input_sync
;
1371 VERIFY(inp
->dlth_thread
== THREAD_NULL
);
1373 /* let caller know */
1374 if (thfunc
!= NULL
) {
1378 inp
->dlth_lock_grp
= lck_grp_alloc_init(inp
->dlth_name
,
1379 dlil_grp_attributes
);
1380 lck_mtx_init(&inp
->dlth_lock
, inp
->dlth_lock_grp
, dlil_lck_attributes
);
1382 inp
->dlth_ifp
= ifp
; /* NULL for main input thread */
1384 * For interfaces that support opportunistic polling, set the
1385 * low and high watermarks for outstanding inbound packets/bytes.
1386 * Also define freeze times for transitioning between modes
1387 * and updating the average.
1389 if (ifp
!= NULL
&& net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) {
1390 limit
= MAX(if_rcvq_maxlen
, IF_RCVQ_MINLEN
);
1391 if (ifp
->if_xflags
& IFXF_LEGACY
) {
1392 (void) dlil_rxpoll_set_params(ifp
, NULL
, FALSE
);
1395 limit
= (u_int32_t
)-1;
1398 _qinit(&inp
->dlth_pkts
, Q_DROPTAIL
, limit
, QP_MBUF
);
1399 if (inp
== dlil_main_input_thread
) {
1400 struct dlil_main_threading_info
*inpm
=
1401 (struct dlil_main_threading_info
*)inp
;
1402 _qinit(&inpm
->lo_rcvq_pkts
, Q_DROPTAIL
, limit
, QP_MBUF
);
1406 ASSERT(!(ifp
->if_xflags
& IFXF_LEGACY
));
1412 error
= kernel_thread_start(func
, inp
, &inp
->dlth_thread
);
1413 if (error
== KERN_SUCCESS
) {
1414 thread_precedence_policy_data_t info
;
1415 __unused kern_return_t kret
;
1417 bzero(&info
, sizeof(info
));
1418 info
.importance
= 0;
1419 kret
= thread_policy_set(inp
->dlth_thread
,
1420 THREAD_PRECEDENCE_POLICY
, (thread_policy_t
)&info
,
1421 THREAD_PRECEDENCE_POLICY_COUNT
);
1422 ASSERT(kret
== KERN_SUCCESS
);
1424 * We create an affinity set so that the matching workloop
1425 * thread or the starter thread (for loopback) can be
1426 * scheduled on the same processor set as the input thread.
1429 struct thread
*tp
= inp
->dlth_thread
;
1432 * Randomize to reduce the probability
1433 * of affinity tag namespace collision.
1435 read_frandom(&tag
, sizeof(tag
));
1436 if (dlil_affinity_set(tp
, tag
) == KERN_SUCCESS
) {
1437 thread_reference(tp
);
1438 inp
->dlth_affinity_tag
= tag
;
1439 inp
->dlth_affinity
= TRUE
;
1442 } else if (inp
== dlil_main_input_thread
) {
1443 panic_plain("%s: couldn't create main input thread", __func__
);
1446 panic_plain("%s: couldn't create %s input thread", __func__
,
1450 OSAddAtomic(1, &cur_dlil_input_threads
);
1456 #if TEST_INPUT_THREAD_TERMINATION
1458 sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
1460 #pragma unused(arg1, arg2)
1464 i
= if_input_thread_termination_spin
;
1466 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
1467 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
1471 if (net_rxpoll
== 0) {
1475 if_input_thread_termination_spin
= i
;
1478 #endif /* TEST_INPUT_THREAD_TERMINATION */
1481 dlil_clean_threading_info(struct dlil_threading_info
*inp
)
1483 lck_mtx_destroy(&inp
->dlth_lock
, inp
->dlth_lock_grp
);
1484 lck_grp_free(inp
->dlth_lock_grp
);
1485 inp
->dlth_lock_grp
= NULL
;
1487 inp
->dlth_flags
= 0;
1489 bzero(inp
->dlth_name
, sizeof(inp
->dlth_name
));
1490 inp
->dlth_ifp
= NULL
;
1491 VERIFY(qhead(&inp
->dlth_pkts
) == NULL
&& qempty(&inp
->dlth_pkts
));
1492 qlimit(&inp
->dlth_pkts
) = 0;
1493 bzero(&inp
->dlth_stats
, sizeof(inp
->dlth_stats
));
1495 VERIFY(!inp
->dlth_affinity
);
1496 inp
->dlth_thread
= THREAD_NULL
;
1497 inp
->dlth_strategy
= NULL
;
1498 VERIFY(inp
->dlth_driver_thread
== THREAD_NULL
);
1499 VERIFY(inp
->dlth_poller_thread
== THREAD_NULL
);
1500 VERIFY(inp
->dlth_affinity_tag
== 0);
1501 #if IFNET_INPUT_SANITY_CHK
1502 inp
->dlth_pkts_cnt
= 0;
1503 #endif /* IFNET_INPUT_SANITY_CHK */
1507 dlil_terminate_input_thread(struct dlil_threading_info
*inp
)
1509 struct ifnet
*ifp
= inp
->dlth_ifp
;
1510 classq_pkt_t pkt
= CLASSQ_PKT_INITIALIZER(pkt
);
1512 VERIFY(current_thread() == inp
->dlth_thread
);
1513 VERIFY(inp
!= dlil_main_input_thread
);
1515 OSAddAtomic(-1, &cur_dlil_input_threads
);
1517 #if TEST_INPUT_THREAD_TERMINATION
1518 { /* do something useless that won't get optimized away */
1520 for (uint32_t i
= 0;
1521 i
< if_input_thread_termination_spin
;
1525 DLIL_PRINTF("the value is %d\n", v
);
1527 #endif /* TEST_INPUT_THREAD_TERMINATION */
1529 lck_mtx_lock_spin(&inp
->dlth_lock
);
1530 _getq_all(&inp
->dlth_pkts
, &pkt
, NULL
, NULL
, NULL
);
1531 VERIFY((inp
->dlth_flags
& DLIL_INPUT_TERMINATE
) != 0);
1532 inp
->dlth_flags
|= DLIL_INPUT_TERMINATE_COMPLETE
;
1533 wakeup_one((caddr_t
)&inp
->dlth_flags
);
1534 lck_mtx_unlock(&inp
->dlth_lock
);
1536 /* free up pending packets */
1537 if (pkt
.cp_mbuf
!= NULL
) {
1538 mbuf_freem_list(pkt
.cp_mbuf
);
1541 /* for the extra refcnt from kernel_thread_start() */
1542 thread_deallocate(current_thread());
1545 DLIL_PRINTF("%s: input thread terminated\n",
1549 /* this is the end */
1550 thread_terminate(current_thread());
1554 static kern_return_t
1555 dlil_affinity_set(struct thread
*tp
, u_int32_t tag
)
1557 thread_affinity_policy_data_t policy
;
1559 bzero(&policy
, sizeof(policy
));
1560 policy
.affinity_tag
= tag
;
1561 return thread_policy_set(tp
, THREAD_AFFINITY_POLICY
,
1562 (thread_policy_t
)&policy
, THREAD_AFFINITY_POLICY_COUNT
);
1568 thread_t thread
= THREAD_NULL
;
1571 * The following fields must be 64-bit aligned for atomic operations.
1573 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets
);
1574 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors
);
1575 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets
);
1576 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors
);
1577 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions
);
1578 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes
);
1579 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes
);
1580 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts
);
1581 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts
);
1582 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops
);
1583 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto
);
1584 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs
);
1585 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes
);
1586 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets
);
1587 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes
);
1589 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets
);
1590 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors
);
1591 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets
);
1592 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors
);
1593 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions
);
1594 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes
);
1595 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes
);
1596 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts
);
1597 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts
);
1598 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops
);
1599 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto
);
1600 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs
);
1601 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes
);
1602 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets
);
1603 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes
);
1606 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1608 _CASSERT(IF_HWASSIST_CSUM_IP
== IFNET_CSUM_IP
);
1609 _CASSERT(IF_HWASSIST_CSUM_TCP
== IFNET_CSUM_TCP
);
1610 _CASSERT(IF_HWASSIST_CSUM_UDP
== IFNET_CSUM_UDP
);
1611 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS
== IFNET_CSUM_FRAGMENT
);
1612 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT
== IFNET_IP_FRAGMENT
);
1613 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6
== IFNET_CSUM_TCPIPV6
);
1614 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6
== IFNET_CSUM_UDPIPV6
);
1615 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6
== IFNET_IPV6_FRAGMENT
);
1616 _CASSERT(IF_HWASSIST_CSUM_PARTIAL
== IFNET_CSUM_PARTIAL
);
1617 _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT
== IFNET_CSUM_ZERO_INVERT
);
1618 _CASSERT(IF_HWASSIST_VLAN_TAGGING
== IFNET_VLAN_TAGGING
);
1619 _CASSERT(IF_HWASSIST_VLAN_MTU
== IFNET_VLAN_MTU
);
1620 _CASSERT(IF_HWASSIST_TSO_V4
== IFNET_TSO_IPV4
);
1621 _CASSERT(IF_HWASSIST_TSO_V6
== IFNET_TSO_IPV6
);
1624 * ... as well as the mbuf checksum flags counterparts.
1626 _CASSERT(CSUM_IP
== IF_HWASSIST_CSUM_IP
);
1627 _CASSERT(CSUM_TCP
== IF_HWASSIST_CSUM_TCP
);
1628 _CASSERT(CSUM_UDP
== IF_HWASSIST_CSUM_UDP
);
1629 _CASSERT(CSUM_IP_FRAGS
== IF_HWASSIST_CSUM_IP_FRAGS
);
1630 _CASSERT(CSUM_FRAGMENT
== IF_HWASSIST_CSUM_FRAGMENT
);
1631 _CASSERT(CSUM_TCPIPV6
== IF_HWASSIST_CSUM_TCPIPV6
);
1632 _CASSERT(CSUM_UDPIPV6
== IF_HWASSIST_CSUM_UDPIPV6
);
1633 _CASSERT(CSUM_FRAGMENT_IPV6
== IF_HWASSIST_CSUM_FRAGMENT_IPV6
);
1634 _CASSERT(CSUM_PARTIAL
== IF_HWASSIST_CSUM_PARTIAL
);
1635 _CASSERT(CSUM_ZERO_INVERT
== IF_HWASSIST_CSUM_ZERO_INVERT
);
1636 _CASSERT(CSUM_VLAN_TAG_VALID
== IF_HWASSIST_VLAN_TAGGING
);
1639 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1641 _CASSERT(IF_LLREACH_MAXLEN
<= IF_LLREACHINFO_ADDRLEN
);
1642 _CASSERT(IFNET_LLREACHINFO_ADDRLEN
== IF_LLREACHINFO_ADDRLEN
);
1644 _CASSERT(IFRLOGF_DLIL
== IFNET_LOGF_DLIL
);
1645 _CASSERT(IFRLOGF_FAMILY
== IFNET_LOGF_FAMILY
);
1646 _CASSERT(IFRLOGF_DRIVER
== IFNET_LOGF_DRIVER
);
1647 _CASSERT(IFRLOGF_FIRMWARE
== IFNET_LOGF_FIRMWARE
);
1649 _CASSERT(IFRLOGCAT_CONNECTIVITY
== IFNET_LOGCAT_CONNECTIVITY
);
1650 _CASSERT(IFRLOGCAT_QUALITY
== IFNET_LOGCAT_QUALITY
);
1651 _CASSERT(IFRLOGCAT_PERFORMANCE
== IFNET_LOGCAT_PERFORMANCE
);
1653 _CASSERT(IFRTYPE_FAMILY_ANY
== IFNET_FAMILY_ANY
);
1654 _CASSERT(IFRTYPE_FAMILY_LOOPBACK
== IFNET_FAMILY_LOOPBACK
);
1655 _CASSERT(IFRTYPE_FAMILY_ETHERNET
== IFNET_FAMILY_ETHERNET
);
1656 _CASSERT(IFRTYPE_FAMILY_SLIP
== IFNET_FAMILY_SLIP
);
1657 _CASSERT(IFRTYPE_FAMILY_TUN
== IFNET_FAMILY_TUN
);
1658 _CASSERT(IFRTYPE_FAMILY_VLAN
== IFNET_FAMILY_VLAN
);
1659 _CASSERT(IFRTYPE_FAMILY_PPP
== IFNET_FAMILY_PPP
);
1660 _CASSERT(IFRTYPE_FAMILY_PVC
== IFNET_FAMILY_PVC
);
1661 _CASSERT(IFRTYPE_FAMILY_DISC
== IFNET_FAMILY_DISC
);
1662 _CASSERT(IFRTYPE_FAMILY_MDECAP
== IFNET_FAMILY_MDECAP
);
1663 _CASSERT(IFRTYPE_FAMILY_GIF
== IFNET_FAMILY_GIF
);
1664 _CASSERT(IFRTYPE_FAMILY_FAITH
== IFNET_FAMILY_FAITH
);
1665 _CASSERT(IFRTYPE_FAMILY_STF
== IFNET_FAMILY_STF
);
1666 _CASSERT(IFRTYPE_FAMILY_FIREWIRE
== IFNET_FAMILY_FIREWIRE
);
1667 _CASSERT(IFRTYPE_FAMILY_BOND
== IFNET_FAMILY_BOND
);
1668 _CASSERT(IFRTYPE_FAMILY_CELLULAR
== IFNET_FAMILY_CELLULAR
);
1669 _CASSERT(IFRTYPE_FAMILY_6LOWPAN
== IFNET_FAMILY_6LOWPAN
);
1670 _CASSERT(IFRTYPE_FAMILY_UTUN
== IFNET_FAMILY_UTUN
);
1671 _CASSERT(IFRTYPE_FAMILY_IPSEC
== IFNET_FAMILY_IPSEC
);
1673 _CASSERT(IFRTYPE_SUBFAMILY_ANY
== IFNET_SUBFAMILY_ANY
);
1674 _CASSERT(IFRTYPE_SUBFAMILY_USB
== IFNET_SUBFAMILY_USB
);
1675 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH
== IFNET_SUBFAMILY_BLUETOOTH
);
1676 _CASSERT(IFRTYPE_SUBFAMILY_WIFI
== IFNET_SUBFAMILY_WIFI
);
1677 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT
== IFNET_SUBFAMILY_THUNDERBOLT
);
1678 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED
== IFNET_SUBFAMILY_RESERVED
);
1679 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC
== IFNET_SUBFAMILY_INTCOPROC
);
1680 _CASSERT(IFRTYPE_SUBFAMILY_QUICKRELAY
== IFNET_SUBFAMILY_QUICKRELAY
);
1681 _CASSERT(IFRTYPE_SUBFAMILY_DEFAULT
== IFNET_SUBFAMILY_DEFAULT
);
1683 _CASSERT(DLIL_MODIDLEN
== IFNET_MODIDLEN
);
1684 _CASSERT(DLIL_MODARGLEN
== IFNET_MODARGLEN
);
1686 PE_parse_boot_argn("net_affinity", &net_affinity
,
1687 sizeof(net_affinity
));
1689 PE_parse_boot_argn("net_rxpoll", &net_rxpoll
, sizeof(net_rxpoll
));
1691 PE_parse_boot_argn("net_rtref", &net_rtref
, sizeof(net_rtref
));
1693 PE_parse_boot_argn("net_async", &net_async
, sizeof(net_async
));
1695 PE_parse_boot_argn("ifnet_debug", &ifnet_debug
, sizeof(ifnet_debug
));
1697 VERIFY(dlil_pending_thread_cnt
== 0);
1698 dlif_size
= (ifnet_debug
== 0) ? sizeof(struct dlil_ifnet
) :
1699 sizeof(struct dlil_ifnet_dbg
);
1700 /* Enforce 64-bit alignment for dlil_ifnet structure */
1701 dlif_bufsize
= dlif_size
+ sizeof(void *) + sizeof(u_int64_t
);
1702 dlif_bufsize
= (uint32_t)P2ROUNDUP(dlif_bufsize
, sizeof(u_int64_t
));
1703 dlif_zone
= zone_create(DLIF_ZONE_NAME
, dlif_bufsize
, ZC_ZFREE_CLEARMEM
);
1705 dlif_tcpstat_size
= sizeof(struct tcpstat_local
);
1706 /* Enforce 64-bit alignment for tcpstat_local structure */
1707 dlif_tcpstat_bufsize
=
1708 dlif_tcpstat_size
+ sizeof(void *) + sizeof(u_int64_t
);
1709 dlif_tcpstat_bufsize
= (uint32_t)
1710 P2ROUNDUP(dlif_tcpstat_bufsize
, sizeof(u_int64_t
));
1711 dlif_tcpstat_zone
= zone_create(DLIF_TCPSTAT_ZONE_NAME
,
1712 dlif_tcpstat_bufsize
, ZC_ZFREE_CLEARMEM
);
1714 dlif_udpstat_size
= sizeof(struct udpstat_local
);
1715 /* Enforce 64-bit alignment for udpstat_local structure */
1716 dlif_udpstat_bufsize
=
1717 dlif_udpstat_size
+ sizeof(void *) + sizeof(u_int64_t
);
1718 dlif_udpstat_bufsize
= (uint32_t)
1719 P2ROUNDUP(dlif_udpstat_bufsize
, sizeof(u_int64_t
));
1720 dlif_udpstat_zone
= zone_create(DLIF_UDPSTAT_ZONE_NAME
,
1721 dlif_udpstat_bufsize
, ZC_ZFREE_CLEARMEM
);
1723 eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt
);
1725 TAILQ_INIT(&dlil_ifnet_head
);
1726 TAILQ_INIT(&ifnet_head
);
1727 TAILQ_INIT(&ifnet_detaching_head
);
1728 TAILQ_INIT(&ifnet_ordered_head
);
1730 /* Setup the lock groups we will use */
1731 dlil_grp_attributes
= lck_grp_attr_alloc_init();
1733 dlil_lock_group
= lck_grp_alloc_init("DLIL internal locks",
1734 dlil_grp_attributes
);
1735 ifnet_lock_group
= lck_grp_alloc_init("ifnet locks",
1736 dlil_grp_attributes
);
1737 ifnet_head_lock_group
= lck_grp_alloc_init("ifnet head lock",
1738 dlil_grp_attributes
);
1739 ifnet_rcv_lock_group
= lck_grp_alloc_init("ifnet rcv locks",
1740 dlil_grp_attributes
);
1741 ifnet_snd_lock_group
= lck_grp_alloc_init("ifnet snd locks",
1742 dlil_grp_attributes
);
1744 /* Setup the lock attributes we will use */
1745 dlil_lck_attributes
= lck_attr_alloc_init();
1747 ifnet_lock_attr
= lck_attr_alloc_init();
1749 lck_rw_init(&ifnet_head_lock
, ifnet_head_lock_group
,
1750 dlil_lck_attributes
);
1751 lck_mtx_init(&dlil_ifnet_lock
, dlil_lock_group
, dlil_lck_attributes
);
1752 lck_mtx_init(&dlil_thread_sync_lock
, dlil_lock_group
, dlil_lck_attributes
);
1754 /* Setup interface flow control related items */
1755 lck_mtx_init(&ifnet_fc_lock
, dlil_lock_group
, dlil_lck_attributes
);
1757 /* Initialize interface address subsystem */
1761 /* Initialize the packet filter */
1765 /* Initialize queue algorithms */
1768 /* Initialize packet schedulers */
1771 /* Initialize flow advisory subsystem */
1774 /* Initialize the pktap virtual interface */
1777 /* Initialize the service class to dscp map */
1780 /* Initialize the interface port list */
1781 if_ports_used_init();
1783 /* Initialize the interface low power mode event handler */
1784 if_low_power_evhdlr_init();
1786 #if DEBUG || DEVELOPMENT
1787 /* Run self-tests */
1788 dlil_verify_sum16();
1789 #endif /* DEBUG || DEVELOPMENT */
1791 /* Initialize link layer table */
1792 lltable_glbl_init();
1795 * Create and start up the main DLIL input thread and the interface
1796 * detacher threads once everything is initialized.
1798 dlil_incr_pending_thread_count();
1799 (void) dlil_create_input_thread(NULL
, dlil_main_input_thread
, NULL
);
1802 * Create ifnet detacher thread.
1803 * When an interface gets detached, part of the detach processing
1804 * is delayed. The interface is added to delayed detach list
1805 * and this thread is woken up to call ifnet_detach_final
1806 * on these interfaces.
1808 dlil_incr_pending_thread_count();
1809 if (kernel_thread_start(ifnet_detacher_thread_func
,
1810 NULL
, &thread
) != KERN_SUCCESS
) {
1811 panic_plain("%s: couldn't create detacher thread", __func__
);
1814 thread_deallocate(thread
);
1817 * Wait for the created kernel threads for dlil to get
1818 * scheduled and run at least once before we proceed
1820 lck_mtx_lock(&dlil_thread_sync_lock
);
1821 while (dlil_pending_thread_cnt
!= 0) {
1822 DLIL_PRINTF("%s: Waiting for all the create dlil kernel "
1823 "threads to get scheduled at least once.\n", __func__
);
1824 (void) msleep(&dlil_pending_thread_cnt
, &dlil_thread_sync_lock
,
1825 (PZERO
- 1), __func__
, NULL
);
1826 LCK_MTX_ASSERT(&dlil_thread_sync_lock
, LCK_ASSERT_OWNED
);
1828 lck_mtx_unlock(&dlil_thread_sync_lock
);
1829 DLIL_PRINTF("%s: All the created dlil kernel threads have been "
1830 "scheduled at least once. Proceeding.\n", __func__
);
1834 if_flt_monitor_busy(struct ifnet
*ifp
)
1836 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1839 VERIFY(ifp
->if_flt_busy
!= 0);
1843 if_flt_monitor_unbusy(struct ifnet
*ifp
)
1845 if_flt_monitor_leave(ifp
);
1849 if_flt_monitor_enter(struct ifnet
*ifp
)
1851 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1853 while (ifp
->if_flt_busy
) {
1854 ++ifp
->if_flt_waiters
;
1855 (void) msleep(&ifp
->if_flt_head
, &ifp
->if_flt_lock
,
1856 (PZERO
- 1), "if_flt_monitor", NULL
);
1858 if_flt_monitor_busy(ifp
);
1862 if_flt_monitor_leave(struct ifnet
*ifp
)
1864 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1866 VERIFY(ifp
->if_flt_busy
!= 0);
1869 if (ifp
->if_flt_busy
== 0 && ifp
->if_flt_waiters
> 0) {
1870 ifp
->if_flt_waiters
= 0;
1871 wakeup(&ifp
->if_flt_head
);
1875 __private_extern__
int
1876 dlil_attach_filter(struct ifnet
*ifp
, const struct iff_filter
*if_filter
,
1877 interface_filter_t
*filter_ref
, u_int32_t flags
)
1880 struct ifnet_filter
*filter
= NULL
;
1882 ifnet_head_lock_shared();
1883 /* Check that the interface is in the global list */
1884 if (!ifnet_lookup(ifp
)) {
1889 filter
= zalloc_flags(dlif_filt_zone
, Z_WAITOK
| Z_ZERO
);
1890 if (filter
== NULL
) {
1895 /* refcnt held above during lookup */
1896 filter
->filt_flags
= flags
;
1897 filter
->filt_ifp
= ifp
;
1898 filter
->filt_cookie
= if_filter
->iff_cookie
;
1899 filter
->filt_name
= if_filter
->iff_name
;
1900 filter
->filt_protocol
= if_filter
->iff_protocol
;
1902 * Do not install filter callbacks for internal coproc interface
1904 if (!IFNET_IS_INTCOPROC(ifp
)) {
1905 filter
->filt_input
= if_filter
->iff_input
;
1906 filter
->filt_output
= if_filter
->iff_output
;
1907 filter
->filt_event
= if_filter
->iff_event
;
1908 filter
->filt_ioctl
= if_filter
->iff_ioctl
;
1910 filter
->filt_detached
= if_filter
->iff_detached
;
1912 lck_mtx_lock(&ifp
->if_flt_lock
);
1913 if_flt_monitor_enter(ifp
);
1915 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1916 TAILQ_INSERT_TAIL(&ifp
->if_flt_head
, filter
, filt_next
);
1918 if_flt_monitor_leave(ifp
);
1919 lck_mtx_unlock(&ifp
->if_flt_lock
);
1921 *filter_ref
= filter
;
1924 * Bump filter count and route_generation ID to let TCP
1925 * know it shouldn't do TSO on this connection
1927 if ((filter
->filt_flags
& DLIL_IFF_TSO
) == 0) {
1928 ifnet_filter_update_tso(TRUE
);
1930 OSIncrementAtomic64(&net_api_stats
.nas_iflt_attach_count
);
1931 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_iflt_attach_total
);
1932 if ((filter
->filt_flags
& DLIL_IFF_INTERNAL
)) {
1933 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_iflt_attach_os_total
);
1936 DLIL_PRINTF("%s: %s filter attached\n", if_name(ifp
),
1937 if_filter
->iff_name
);
1941 if (retval
!= 0 && ifp
!= NULL
) {
1942 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1943 if_name(ifp
), if_filter
->iff_name
, retval
);
1945 if (retval
!= 0 && filter
!= NULL
) {
1946 zfree(dlif_filt_zone
, filter
);
1953 dlil_detach_filter_internal(interface_filter_t filter
, int detached
)
1957 if (detached
== 0) {
1960 ifnet_head_lock_shared();
1961 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
1962 interface_filter_t entry
= NULL
;
1964 lck_mtx_lock(&ifp
->if_flt_lock
);
1965 TAILQ_FOREACH(entry
, &ifp
->if_flt_head
, filt_next
) {
1966 if (entry
!= filter
|| entry
->filt_skip
) {
1970 * We've found a match; since it's possible
1971 * that the thread gets blocked in the monitor,
1972 * we do the lock dance. Interface should
1973 * not be detached since we still have a use
1974 * count held during filter attach.
1976 entry
->filt_skip
= 1; /* skip input/output */
1977 lck_mtx_unlock(&ifp
->if_flt_lock
);
1980 lck_mtx_lock(&ifp
->if_flt_lock
);
1981 if_flt_monitor_enter(ifp
);
1982 LCK_MTX_ASSERT(&ifp
->if_flt_lock
,
1983 LCK_MTX_ASSERT_OWNED
);
1985 /* Remove the filter from the list */
1986 TAILQ_REMOVE(&ifp
->if_flt_head
, filter
,
1989 if_flt_monitor_leave(ifp
);
1990 lck_mtx_unlock(&ifp
->if_flt_lock
);
1992 DLIL_PRINTF("%s: %s filter detached\n",
1993 if_name(ifp
), filter
->filt_name
);
1997 lck_mtx_unlock(&ifp
->if_flt_lock
);
2001 /* filter parameter is not a valid filter ref */
2007 DLIL_PRINTF("%s filter detached\n", filter
->filt_name
);
2012 /* Call the detached function if there is one */
2013 if (filter
->filt_detached
) {
2014 filter
->filt_detached(filter
->filt_cookie
, filter
->filt_ifp
);
2018 * Decrease filter count and route_generation ID to let TCP
2019 * know it should reevalute doing TSO or not
2021 if ((filter
->filt_flags
& DLIL_IFF_TSO
) == 0) {
2022 ifnet_filter_update_tso(FALSE
);
2025 VERIFY(OSDecrementAtomic64(&net_api_stats
.nas_iflt_attach_count
) > 0);
2027 /* Free the filter */
2028 zfree(dlif_filt_zone
, filter
);
2031 if (retval
!= 0 && filter
!= NULL
) {
2032 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
2033 filter
->filt_name
, retval
);
2039 __private_extern__
void
2040 dlil_detach_filter(interface_filter_t filter
)
2042 if (filter
== NULL
) {
2045 dlil_detach_filter_internal(filter
, 0);
2048 __private_extern__ boolean_t
2049 dlil_has_ip_filter(void)
2051 boolean_t has_filter
= (net_api_stats
.nas_ipf_add_count
> 0);
2052 DTRACE_IP1(dlil_has_ip_filter
, boolean_t
, has_filter
);
2056 __private_extern__ boolean_t
2057 dlil_has_if_filter(struct ifnet
*ifp
)
2059 boolean_t has_filter
= !TAILQ_EMPTY(&ifp
->if_flt_head
);
2060 DTRACE_IP1(dlil_has_if_filter
, boolean_t
, has_filter
);
2065 dlil_input_wakeup(struct dlil_threading_info
*inp
)
2067 LCK_MTX_ASSERT(&inp
->dlth_lock
, LCK_MTX_ASSERT_OWNED
);
2069 inp
->dlth_flags
|= DLIL_INPUT_WAITING
;
2070 if (!(inp
->dlth_flags
& DLIL_INPUT_RUNNING
)) {
2072 wakeup_one((caddr_t
)&inp
->dlth_flags
);
2076 __attribute__((noreturn
))
2078 dlil_main_input_thread_func(void *v
, wait_result_t w
)
2081 struct dlil_threading_info
*inp
= v
;
2083 VERIFY(inp
== dlil_main_input_thread
);
2084 VERIFY(inp
->dlth_ifp
== NULL
);
2085 VERIFY(current_thread() == inp
->dlth_thread
);
2087 lck_mtx_lock(&inp
->dlth_lock
);
2088 VERIFY(!(inp
->dlth_flags
& (DLIL_INPUT_EMBRYONIC
| DLIL_INPUT_RUNNING
)));
2089 (void) assert_wait(&inp
->dlth_flags
, THREAD_UNINT
);
2090 inp
->dlth_flags
|= DLIL_INPUT_EMBRYONIC
;
2091 /* wake up once to get out of embryonic state */
2092 dlil_input_wakeup(inp
);
2093 lck_mtx_unlock(&inp
->dlth_lock
);
2094 (void) thread_block_parameter(dlil_main_input_thread_cont
, inp
);
2096 __builtin_unreachable();
2100 * Main input thread:
2102 * a) handles all inbound packets for lo0
2103 * b) handles all inbound packets for interfaces with no dedicated
2104 * input thread (e.g. anything but Ethernet/PDP or those that support
2105 * opportunistic polling.)
2106 * c) protocol registrations
2107 * d) packet injections
2109 __attribute__((noreturn
))
2111 dlil_main_input_thread_cont(void *v
, wait_result_t wres
)
2113 struct dlil_main_threading_info
*inpm
= v
;
2114 struct dlil_threading_info
*inp
= v
;
2116 /* main input thread is uninterruptible */
2117 VERIFY(wres
!= THREAD_INTERRUPTED
);
2118 lck_mtx_lock_spin(&inp
->dlth_lock
);
2119 VERIFY(!(inp
->dlth_flags
& (DLIL_INPUT_TERMINATE
|
2120 DLIL_INPUT_RUNNING
)));
2121 inp
->dlth_flags
|= DLIL_INPUT_RUNNING
;
2124 struct mbuf
*m
= NULL
, *m_loop
= NULL
;
2125 u_int32_t m_cnt
, m_cnt_loop
;
2126 classq_pkt_t pkt
= CLASSQ_PKT_INITIALIZER(pkt
);
2127 boolean_t proto_req
;
2128 boolean_t embryonic
;
2130 inp
->dlth_flags
&= ~DLIL_INPUT_WAITING
;
2132 if (__improbable(embryonic
=
2133 (inp
->dlth_flags
& DLIL_INPUT_EMBRYONIC
))) {
2134 inp
->dlth_flags
&= ~DLIL_INPUT_EMBRYONIC
;
2137 proto_req
= (inp
->dlth_flags
&
2138 (DLIL_PROTO_WAITING
| DLIL_PROTO_REGISTER
));
2140 /* Packets for non-dedicated interfaces other than lo0 */
2141 m_cnt
= qlen(&inp
->dlth_pkts
);
2142 _getq_all(&inp
->dlth_pkts
, &pkt
, NULL
, NULL
, NULL
);
2145 /* Packets exclusive to lo0 */
2146 m_cnt_loop
= qlen(&inpm
->lo_rcvq_pkts
);
2147 _getq_all(&inpm
->lo_rcvq_pkts
, &pkt
, NULL
, NULL
, NULL
);
2148 m_loop
= pkt
.cp_mbuf
;
2152 lck_mtx_unlock(&inp
->dlth_lock
);
2154 if (__improbable(embryonic
)) {
2155 dlil_decr_pending_thread_count();
2159 * NOTE warning %%% attention !!!!
2160 * We should think about putting some thread starvation
2161 * safeguards if we deal with long chains of packets.
2163 if (__probable(m_loop
!= NULL
)) {
2164 dlil_input_packet_list_extended(lo_ifp
, m_loop
,
2165 m_cnt_loop
, IFNET_MODEL_INPUT_POLL_OFF
);
2168 if (__probable(m
!= NULL
)) {
2169 dlil_input_packet_list_extended(NULL
, m
,
2170 m_cnt
, IFNET_MODEL_INPUT_POLL_OFF
);
2173 if (__improbable(proto_req
)) {
2177 lck_mtx_lock_spin(&inp
->dlth_lock
);
2178 VERIFY(inp
->dlth_flags
& DLIL_INPUT_RUNNING
);
2179 /* main input thread cannot be terminated */
2180 VERIFY(!(inp
->dlth_flags
& DLIL_INPUT_TERMINATE
));
2181 if (!(inp
->dlth_flags
& ~DLIL_INPUT_RUNNING
)) {
2186 inp
->dlth_flags
&= ~DLIL_INPUT_RUNNING
;
2187 (void) assert_wait(&inp
->dlth_flags
, THREAD_UNINT
);
2188 lck_mtx_unlock(&inp
->dlth_lock
);
2189 (void) thread_block_parameter(dlil_main_input_thread_cont
, inp
);
2191 VERIFY(0); /* we should never get here */
2193 __builtin_unreachable();
2197 * Input thread for interfaces with legacy input model.
2199 __attribute__((noreturn
))
2201 dlil_input_thread_func(void *v
, wait_result_t w
)
2204 char thread_name
[MAXTHREADNAMESIZE
];
2205 struct dlil_threading_info
*inp
= v
;
2206 struct ifnet
*ifp
= inp
->dlth_ifp
;
2208 VERIFY(inp
!= dlil_main_input_thread
);
2209 VERIFY(ifp
!= NULL
);
2210 VERIFY(!(ifp
->if_eflags
& IFEF_RXPOLL
) || !net_rxpoll
||
2211 !(ifp
->if_xflags
& IFXF_LEGACY
));
2212 VERIFY(ifp
->if_poll_mode
== IFNET_MODEL_INPUT_POLL_OFF
||
2213 !(ifp
->if_xflags
& IFXF_LEGACY
));
2214 VERIFY(current_thread() == inp
->dlth_thread
);
2216 /* construct the name for this thread, and then apply it */
2217 bzero(thread_name
, sizeof(thread_name
));
2218 (void) snprintf(thread_name
, sizeof(thread_name
),
2219 "dlil_input_%s", ifp
->if_xname
);
2220 thread_set_thread_name(inp
->dlth_thread
, thread_name
);
2222 lck_mtx_lock(&inp
->dlth_lock
);
2223 VERIFY(!(inp
->dlth_flags
& (DLIL_INPUT_EMBRYONIC
| DLIL_INPUT_RUNNING
)));
2224 (void) assert_wait(&inp
->dlth_flags
, THREAD_UNINT
);
2225 inp
->dlth_flags
|= DLIL_INPUT_EMBRYONIC
;
2226 /* wake up once to get out of embryonic state */
2227 dlil_input_wakeup(inp
);
2228 lck_mtx_unlock(&inp
->dlth_lock
);
2229 (void) thread_block_parameter(dlil_input_thread_cont
, inp
);
2231 __builtin_unreachable();
2234 __attribute__((noreturn
))
2236 dlil_input_thread_cont(void *v
, wait_result_t wres
)
2238 struct dlil_threading_info
*inp
= v
;
2239 struct ifnet
*ifp
= inp
->dlth_ifp
;
2241 lck_mtx_lock_spin(&inp
->dlth_lock
);
2242 if (__improbable(wres
== THREAD_INTERRUPTED
||
2243 (inp
->dlth_flags
& DLIL_INPUT_TERMINATE
))) {
2247 VERIFY(!(inp
->dlth_flags
& DLIL_INPUT_RUNNING
));
2248 inp
->dlth_flags
|= DLIL_INPUT_RUNNING
;
2251 struct mbuf
*m
= NULL
;
2252 classq_pkt_t pkt
= CLASSQ_PKT_INITIALIZER(pkt
);
2253 boolean_t notify
= FALSE
;
2254 boolean_t embryonic
;
2257 inp
->dlth_flags
&= ~DLIL_INPUT_WAITING
;
2259 if (__improbable(embryonic
=
2260 (inp
->dlth_flags
& DLIL_INPUT_EMBRYONIC
))) {
2261 inp
->dlth_flags
&= ~DLIL_INPUT_EMBRYONIC
;
2265 * Protocol registration and injection must always use
2266 * the main input thread; in theory the latter can utilize
2267 * the corresponding input thread where the packet arrived
2268 * on, but that requires our knowing the interface in advance
2269 * (and the benefits might not worth the trouble.)
2271 VERIFY(!(inp
->dlth_flags
&
2272 (DLIL_PROTO_WAITING
| DLIL_PROTO_REGISTER
)));
2274 /* Packets for this interface */
2275 m_cnt
= qlen(&inp
->dlth_pkts
);
2276 _getq_all(&inp
->dlth_pkts
, &pkt
, NULL
, NULL
, NULL
);
2281 notify
= dlil_input_stats_sync(ifp
, inp
);
2283 lck_mtx_unlock(&inp
->dlth_lock
);
2285 if (__improbable(embryonic
)) {
2286 ifnet_decr_pending_thread_count(ifp
);
2289 if (__improbable(notify
)) {
2290 ifnet_notify_data_threshold(ifp
);
2294 * NOTE warning %%% attention !!!!
2295 * We should think about putting some thread starvation
2296 * safeguards if we deal with long chains of packets.
2298 if (__probable(m
!= NULL
)) {
2299 dlil_input_packet_list_extended(NULL
, m
,
2300 m_cnt
, ifp
->if_poll_mode
);
2303 lck_mtx_lock_spin(&inp
->dlth_lock
);
2304 VERIFY(inp
->dlth_flags
& DLIL_INPUT_RUNNING
);
2305 if (!(inp
->dlth_flags
& ~(DLIL_INPUT_RUNNING
|
2306 DLIL_INPUT_TERMINATE
))) {
2311 inp
->dlth_flags
&= ~DLIL_INPUT_RUNNING
;
2313 if (__improbable(inp
->dlth_flags
& DLIL_INPUT_TERMINATE
)) {
2315 lck_mtx_unlock(&inp
->dlth_lock
);
2316 dlil_terminate_input_thread(inp
);
2319 (void) assert_wait(&inp
->dlth_flags
, THREAD_UNINT
);
2320 lck_mtx_unlock(&inp
->dlth_lock
);
2321 (void) thread_block_parameter(dlil_input_thread_cont
, inp
);
2325 VERIFY(0); /* we should never get here */
2327 __builtin_unreachable();
2331 * Input thread for interfaces with opportunistic polling input model.
2333 __attribute__((noreturn
))
2335 dlil_rxpoll_input_thread_func(void *v
, wait_result_t w
)
2338 char thread_name
[MAXTHREADNAMESIZE
];
2339 struct dlil_threading_info
*inp
= v
;
2340 struct ifnet
*ifp
= inp
->dlth_ifp
;
2342 VERIFY(inp
!= dlil_main_input_thread
);
2343 VERIFY(ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_RXPOLL
) &&
2344 (ifp
->if_xflags
& IFXF_LEGACY
));
2345 VERIFY(current_thread() == inp
->dlth_thread
);
2347 /* construct the name for this thread, and then apply it */
2348 bzero(thread_name
, sizeof(thread_name
));
2349 (void) snprintf(thread_name
, sizeof(thread_name
),
2350 "dlil_input_poll_%s", ifp
->if_xname
);
2351 thread_set_thread_name(inp
->dlth_thread
, thread_name
);
2353 lck_mtx_lock(&inp
->dlth_lock
);
2354 VERIFY(!(inp
->dlth_flags
& (DLIL_INPUT_EMBRYONIC
| DLIL_INPUT_RUNNING
)));
2355 (void) assert_wait(&inp
->dlth_flags
, THREAD_UNINT
);
2356 inp
->dlth_flags
|= DLIL_INPUT_EMBRYONIC
;
2357 /* wake up once to get out of embryonic state */
2358 dlil_input_wakeup(inp
);
2359 lck_mtx_unlock(&inp
->dlth_lock
);
2360 (void) thread_block_parameter(dlil_rxpoll_input_thread_cont
, inp
);
2362 __builtin_unreachable();
2365 __attribute__((noreturn
))
2367 dlil_rxpoll_input_thread_cont(void *v
, wait_result_t wres
)
2369 struct dlil_threading_info
*inp
= v
;
2370 struct ifnet
*ifp
= inp
->dlth_ifp
;
2373 lck_mtx_lock_spin(&inp
->dlth_lock
);
2374 if (__improbable(wres
== THREAD_INTERRUPTED
||
2375 (inp
->dlth_flags
& DLIL_INPUT_TERMINATE
))) {
2379 VERIFY(!(inp
->dlth_flags
& DLIL_INPUT_RUNNING
));
2380 inp
->dlth_flags
|= DLIL_INPUT_RUNNING
;
2383 struct mbuf
*m
= NULL
;
2384 uint32_t m_cnt
, poll_req
= 0;
2385 uint64_t m_size
= 0;
2387 struct timespec now
, delta
;
2388 classq_pkt_t pkt
= CLASSQ_PKT_INITIALIZER(pkt
);
2390 boolean_t embryonic
;
2393 inp
->dlth_flags
&= ~DLIL_INPUT_WAITING
;
2395 if (__improbable(embryonic
=
2396 (inp
->dlth_flags
& DLIL_INPUT_EMBRYONIC
))) {
2397 inp
->dlth_flags
&= ~DLIL_INPUT_EMBRYONIC
;
2401 if ((ival
= ifp
->if_rxpoll_ival
) < IF_RXPOLL_INTERVALTIME_MIN
) {
2402 ival
= IF_RXPOLL_INTERVALTIME_MIN
;
2405 /* Link parameters changed? */
2406 if (ifp
->if_poll_update
!= 0) {
2407 ifp
->if_poll_update
= 0;
2408 (void) dlil_rxpoll_set_params(ifp
, NULL
, TRUE
);
2411 /* Current operating mode */
2412 mode
= ifp
->if_poll_mode
;
2415 * Protocol registration and injection must always use
2416 * the main input thread; in theory the latter can utilize
2417 * the corresponding input thread where the packet arrived
2418 * on, but that requires our knowing the interface in advance
2419 * (and the benefits might not worth the trouble.)
2421 VERIFY(!(inp
->dlth_flags
&
2422 (DLIL_PROTO_WAITING
| DLIL_PROTO_REGISTER
)));
2424 /* Total count of all packets */
2425 m_cnt
= qlen(&inp
->dlth_pkts
);
2427 /* Total bytes of all packets */
2428 m_size
= qsize(&inp
->dlth_pkts
);
2430 /* Packets for this interface */
2431 _getq_all(&inp
->dlth_pkts
, &pkt
, NULL
, NULL
, NULL
);
2433 VERIFY(m
!= NULL
|| m_cnt
== 0);
2436 if (!net_timerisset(&ifp
->if_poll_sample_lasttime
)) {
2437 *(&ifp
->if_poll_sample_lasttime
) = *(&now
);
2440 net_timersub(&now
, &ifp
->if_poll_sample_lasttime
, &delta
);
2441 if (if_rxpoll
&& net_timerisset(&ifp
->if_poll_sample_holdtime
)) {
2442 u_int32_t ptot
, btot
;
2444 /* Accumulate statistics for current sampling */
2445 PKTCNTR_ADD(&ifp
->if_poll_sstats
, m_cnt
, m_size
);
2447 if (net_timercmp(&delta
, &ifp
->if_poll_sample_holdtime
, <)) {
2451 *(&ifp
->if_poll_sample_lasttime
) = *(&now
);
2453 /* Calculate min/max of inbound bytes */
2454 btot
= (u_int32_t
)ifp
->if_poll_sstats
.bytes
;
2455 if (ifp
->if_rxpoll_bmin
== 0 || ifp
->if_rxpoll_bmin
> btot
) {
2456 ifp
->if_rxpoll_bmin
= btot
;
2458 if (btot
> ifp
->if_rxpoll_bmax
) {
2459 ifp
->if_rxpoll_bmax
= btot
;
2462 /* Calculate EWMA of inbound bytes */
2463 DLIL_EWMA(ifp
->if_rxpoll_bavg
, btot
, if_rxpoll_decay
);
2465 /* Calculate min/max of inbound packets */
2466 ptot
= (u_int32_t
)ifp
->if_poll_sstats
.packets
;
2467 if (ifp
->if_rxpoll_pmin
== 0 || ifp
->if_rxpoll_pmin
> ptot
) {
2468 ifp
->if_rxpoll_pmin
= ptot
;
2470 if (ptot
> ifp
->if_rxpoll_pmax
) {
2471 ifp
->if_rxpoll_pmax
= ptot
;
2474 /* Calculate EWMA of inbound packets */
2475 DLIL_EWMA(ifp
->if_rxpoll_pavg
, ptot
, if_rxpoll_decay
);
2477 /* Reset sampling statistics */
2478 PKTCNTR_CLEAR(&ifp
->if_poll_sstats
);
2480 /* Calculate EWMA of wakeup requests */
2481 DLIL_EWMA(ifp
->if_rxpoll_wavg
, inp
->dlth_wtot
,
2486 if (!net_timerisset(&ifp
->if_poll_dbg_lasttime
)) {
2487 *(&ifp
->if_poll_dbg_lasttime
) = *(&now
);
2489 net_timersub(&now
, &ifp
->if_poll_dbg_lasttime
, &delta
);
2490 if (net_timercmp(&delta
, &dlil_dbgrate
, >=)) {
2491 *(&ifp
->if_poll_dbg_lasttime
) = *(&now
);
2492 DLIL_PRINTF("%s: [%s] pkts avg %d max %d "
2493 "limits [%d/%d], wreq avg %d "
2494 "limits [%d/%d], bytes avg %d "
2495 "limits [%d/%d]\n", if_name(ifp
),
2496 (ifp
->if_poll_mode
==
2497 IFNET_MODEL_INPUT_POLL_ON
) ?
2498 "ON" : "OFF", ifp
->if_rxpoll_pavg
,
2499 ifp
->if_rxpoll_pmax
,
2500 ifp
->if_rxpoll_plowat
,
2501 ifp
->if_rxpoll_phiwat
,
2502 ifp
->if_rxpoll_wavg
,
2503 ifp
->if_rxpoll_wlowat
,
2504 ifp
->if_rxpoll_whiwat
,
2505 ifp
->if_rxpoll_bavg
,
2506 ifp
->if_rxpoll_blowat
,
2507 ifp
->if_rxpoll_bhiwat
);
2511 /* Perform mode transition, if necessary */
2512 if (!net_timerisset(&ifp
->if_poll_mode_lasttime
)) {
2513 *(&ifp
->if_poll_mode_lasttime
) = *(&now
);
2516 net_timersub(&now
, &ifp
->if_poll_mode_lasttime
, &delta
);
2517 if (net_timercmp(&delta
, &ifp
->if_poll_mode_holdtime
, <)) {
2521 if (ifp
->if_rxpoll_pavg
<= ifp
->if_rxpoll_plowat
&&
2522 ifp
->if_rxpoll_bavg
<= ifp
->if_rxpoll_blowat
&&
2523 ifp
->if_poll_mode
!= IFNET_MODEL_INPUT_POLL_OFF
) {
2524 mode
= IFNET_MODEL_INPUT_POLL_OFF
;
2525 } else if (ifp
->if_rxpoll_pavg
>= ifp
->if_rxpoll_phiwat
&&
2526 (ifp
->if_rxpoll_bavg
>= ifp
->if_rxpoll_bhiwat
||
2527 ifp
->if_rxpoll_wavg
>= ifp
->if_rxpoll_whiwat
) &&
2528 ifp
->if_poll_mode
!= IFNET_MODEL_INPUT_POLL_ON
) {
2529 mode
= IFNET_MODEL_INPUT_POLL_ON
;
2532 if (mode
!= ifp
->if_poll_mode
) {
2533 ifp
->if_poll_mode
= mode
;
2534 *(&ifp
->if_poll_mode_lasttime
) = *(&now
);
2539 notify
= dlil_input_stats_sync(ifp
, inp
);
2541 lck_mtx_unlock(&inp
->dlth_lock
);
2543 if (__improbable(embryonic
)) {
2544 ifnet_decr_pending_thread_count(ifp
);
2547 if (__improbable(notify
)) {
2548 ifnet_notify_data_threshold(ifp
);
2552 * If there's a mode change and interface is still attached,
2553 * perform a downcall to the driver for the new mode. Also
2554 * hold an IO refcnt on the interface to prevent it from
2555 * being detached (will be release below.)
2557 if (poll_req
!= 0 && ifnet_is_attached(ifp
, 1)) {
2558 struct ifnet_model_params p
= {
2559 .model
= mode
, .reserved
= { 0 }
2564 DLIL_PRINTF("%s: polling is now %s, "
2565 "pkts avg %d max %d limits [%d/%d], "
2566 "wreq avg %d limits [%d/%d], "
2567 "bytes avg %d limits [%d/%d]\n",
2569 (mode
== IFNET_MODEL_INPUT_POLL_ON
) ?
2570 "ON" : "OFF", ifp
->if_rxpoll_pavg
,
2571 ifp
->if_rxpoll_pmax
, ifp
->if_rxpoll_plowat
,
2572 ifp
->if_rxpoll_phiwat
, ifp
->if_rxpoll_wavg
,
2573 ifp
->if_rxpoll_wlowat
, ifp
->if_rxpoll_whiwat
,
2574 ifp
->if_rxpoll_bavg
, ifp
->if_rxpoll_blowat
,
2575 ifp
->if_rxpoll_bhiwat
);
2578 if ((err
= ((*ifp
->if_input_ctl
)(ifp
,
2579 IFNET_CTL_SET_INPUT_MODEL
, sizeof(p
), &p
))) != 0) {
2580 DLIL_PRINTF("%s: error setting polling mode "
2581 "to %s (%d)\n", if_name(ifp
),
2582 (mode
== IFNET_MODEL_INPUT_POLL_ON
) ?
2587 case IFNET_MODEL_INPUT_POLL_OFF
:
2588 ifnet_set_poll_cycle(ifp
, NULL
);
2589 ifp
->if_rxpoll_offreq
++;
2591 ifp
->if_rxpoll_offerr
++;
2595 case IFNET_MODEL_INPUT_POLL_ON
:
2596 net_nsectimer(&ival
, &ts
);
2597 ifnet_set_poll_cycle(ifp
, &ts
);
2599 ifp
->if_rxpoll_onreq
++;
2601 ifp
->if_rxpoll_onerr
++;
2610 /* Release the IO refcnt */
2611 ifnet_decr_iorefcnt(ifp
);
2615 * NOTE warning %%% attention !!!!
2616 * We should think about putting some thread starvation
2617 * safeguards if we deal with long chains of packets.
2619 if (__probable(m
!= NULL
)) {
2620 dlil_input_packet_list_extended(NULL
, m
, m_cnt
, mode
);
2623 lck_mtx_lock_spin(&inp
->dlth_lock
);
2624 VERIFY(inp
->dlth_flags
& DLIL_INPUT_RUNNING
);
2625 if (!(inp
->dlth_flags
& ~(DLIL_INPUT_RUNNING
|
2626 DLIL_INPUT_TERMINATE
))) {
2631 inp
->dlth_flags
&= ~DLIL_INPUT_RUNNING
;
2633 if (__improbable(inp
->dlth_flags
& DLIL_INPUT_TERMINATE
)) {
2635 lck_mtx_unlock(&inp
->dlth_lock
);
2636 dlil_terminate_input_thread(inp
);
2639 (void) assert_wait(&inp
->dlth_flags
, THREAD_UNINT
);
2640 lck_mtx_unlock(&inp
->dlth_lock
);
2641 (void) thread_block_parameter(dlil_rxpoll_input_thread_cont
,
2646 VERIFY(0); /* we should never get here */
2648 __builtin_unreachable();
2652 dlil_rxpoll_validate_params(struct ifnet_poll_params
*p
)
2655 if ((p
->packets_lowat
== 0 && p
->packets_hiwat
!= 0) ||
2656 (p
->packets_lowat
!= 0 && p
->packets_hiwat
== 0)) {
2659 if (p
->packets_lowat
!= 0 && /* hiwat must be non-zero */
2660 p
->packets_lowat
>= p
->packets_hiwat
) {
2663 if ((p
->bytes_lowat
== 0 && p
->bytes_hiwat
!= 0) ||
2664 (p
->bytes_lowat
!= 0 && p
->bytes_hiwat
== 0)) {
2667 if (p
->bytes_lowat
!= 0 && /* hiwat must be non-zero */
2668 p
->bytes_lowat
>= p
->bytes_hiwat
) {
2671 if (p
->interval_time
!= 0 &&
2672 p
->interval_time
< IF_RXPOLL_INTERVALTIME_MIN
) {
2673 p
->interval_time
= IF_RXPOLL_INTERVALTIME_MIN
;
2680 dlil_rxpoll_update_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
)
2682 u_int64_t sample_holdtime
, inbw
;
2684 if ((inbw
= ifnet_input_linkrate(ifp
)) == 0 && p
== NULL
) {
2685 sample_holdtime
= 0; /* polling is disabled */
2686 ifp
->if_rxpoll_wlowat
= ifp
->if_rxpoll_plowat
=
2687 ifp
->if_rxpoll_blowat
= 0;
2688 ifp
->if_rxpoll_whiwat
= ifp
->if_rxpoll_phiwat
=
2689 ifp
->if_rxpoll_bhiwat
= (u_int32_t
)-1;
2690 ifp
->if_rxpoll_plim
= 0;
2691 ifp
->if_rxpoll_ival
= IF_RXPOLL_INTERVALTIME_MIN
;
2693 u_int32_t plowat
, phiwat
, blowat
, bhiwat
, plim
;
2697 for (n
= 0, i
= 0; rxpoll_tbl
[i
].speed
!= 0; i
++) {
2698 if (inbw
< rxpoll_tbl
[i
].speed
) {
2703 /* auto-tune if caller didn't specify a value */
2704 plowat
= ((p
== NULL
|| p
->packets_lowat
== 0) ?
2705 rxpoll_tbl
[n
].plowat
: p
->packets_lowat
);
2706 phiwat
= ((p
== NULL
|| p
->packets_hiwat
== 0) ?
2707 rxpoll_tbl
[n
].phiwat
: p
->packets_hiwat
);
2708 blowat
= ((p
== NULL
|| p
->bytes_lowat
== 0) ?
2709 rxpoll_tbl
[n
].blowat
: p
->bytes_lowat
);
2710 bhiwat
= ((p
== NULL
|| p
->bytes_hiwat
== 0) ?
2711 rxpoll_tbl
[n
].bhiwat
: p
->bytes_hiwat
);
2712 plim
= ((p
== NULL
|| p
->packets_limit
== 0) ?
2713 if_rxpoll_max
: p
->packets_limit
);
2714 ival
= ((p
== NULL
|| p
->interval_time
== 0) ?
2715 if_rxpoll_interval_time
: p
->interval_time
);
2717 VERIFY(plowat
!= 0 && phiwat
!= 0);
2718 VERIFY(blowat
!= 0 && bhiwat
!= 0);
2719 VERIFY(ival
>= IF_RXPOLL_INTERVALTIME_MIN
);
2721 sample_holdtime
= if_rxpoll_sample_holdtime
;
2722 ifp
->if_rxpoll_wlowat
= if_sysctl_rxpoll_wlowat
;
2723 ifp
->if_rxpoll_whiwat
= if_sysctl_rxpoll_whiwat
;
2724 ifp
->if_rxpoll_plowat
= plowat
;
2725 ifp
->if_rxpoll_phiwat
= phiwat
;
2726 ifp
->if_rxpoll_blowat
= blowat
;
2727 ifp
->if_rxpoll_bhiwat
= bhiwat
;
2728 ifp
->if_rxpoll_plim
= plim
;
2729 ifp
->if_rxpoll_ival
= ival
;
2732 net_nsectimer(&if_rxpoll_mode_holdtime
, &ifp
->if_poll_mode_holdtime
);
2733 net_nsectimer(&sample_holdtime
, &ifp
->if_poll_sample_holdtime
);
2736 DLIL_PRINTF("%s: speed %llu bps, sample per %llu nsec, "
2737 "poll interval %llu nsec, pkts per poll %u, "
2738 "pkt limits [%u/%u], wreq limits [%u/%u], "
2739 "bytes limits [%u/%u]\n", if_name(ifp
),
2740 inbw
, sample_holdtime
, ifp
->if_rxpoll_ival
,
2741 ifp
->if_rxpoll_plim
, ifp
->if_rxpoll_plowat
,
2742 ifp
->if_rxpoll_phiwat
, ifp
->if_rxpoll_wlowat
,
2743 ifp
->if_rxpoll_whiwat
, ifp
->if_rxpoll_blowat
,
2744 ifp
->if_rxpoll_bhiwat
);
2749 * Must be called on an attached ifnet (caller is expected to check.)
2750 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2753 dlil_rxpoll_set_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
,
2757 struct dlil_threading_info
*inp
;
2759 VERIFY(ifp
!= NULL
);
2760 if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || (inp
= ifp
->if_inp
) == NULL
) {
2763 err
= dlil_rxpoll_validate_params(p
);
2769 lck_mtx_lock(&inp
->dlth_lock
);
2771 LCK_MTX_ASSERT(&inp
->dlth_lock
, LCK_MTX_ASSERT_OWNED
);
2773 * Normally, we'd reset the parameters to the auto-tuned values
2774 * if the the input thread detects a change in link rate. If the
2775 * driver provides its own parameters right after a link rate
2776 * changes, but before the input thread gets to run, we want to
2777 * make sure to keep the driver's values. Clearing if_poll_update
2778 * will achieve that.
2780 if (p
!= NULL
&& !locked
&& ifp
->if_poll_update
!= 0) {
2781 ifp
->if_poll_update
= 0;
2783 dlil_rxpoll_update_params(ifp
, p
);
2785 lck_mtx_unlock(&inp
->dlth_lock
);
2791 * Must be called on an attached ifnet (caller is expected to check.)
2794 dlil_rxpoll_get_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
)
2796 struct dlil_threading_info
*inp
;
2798 VERIFY(ifp
!= NULL
&& p
!= NULL
);
2799 if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || (inp
= ifp
->if_inp
) == NULL
) {
2803 bzero(p
, sizeof(*p
));
2805 lck_mtx_lock(&inp
->dlth_lock
);
2806 p
->packets_limit
= ifp
->if_rxpoll_plim
;
2807 p
->packets_lowat
= ifp
->if_rxpoll_plowat
;
2808 p
->packets_hiwat
= ifp
->if_rxpoll_phiwat
;
2809 p
->bytes_lowat
= ifp
->if_rxpoll_blowat
;
2810 p
->bytes_hiwat
= ifp
->if_rxpoll_bhiwat
;
2811 p
->interval_time
= ifp
->if_rxpoll_ival
;
2812 lck_mtx_unlock(&inp
->dlth_lock
);
2818 ifnet_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
2819 const struct ifnet_stat_increment_param
*s
)
2821 return ifnet_input_common(ifp
, m_head
, NULL
, s
, FALSE
, FALSE
);
2825 ifnet_input_extended(struct ifnet
*ifp
, struct mbuf
*m_head
,
2826 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
)
2828 return ifnet_input_common(ifp
, m_head
, m_tail
, s
, TRUE
, FALSE
);
2832 ifnet_input_poll(struct ifnet
*ifp
, struct mbuf
*m_head
,
2833 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
)
2835 return ifnet_input_common(ifp
, m_head
, m_tail
, s
,
2836 (m_head
!= NULL
), TRUE
);
2840 ifnet_input_common(struct ifnet
*ifp
, struct mbuf
*m_head
, struct mbuf
*m_tail
,
2841 const struct ifnet_stat_increment_param
*s
, boolean_t ext
, boolean_t poll
)
2843 dlil_input_func input_func
;
2844 struct ifnet_stat_increment_param _s
;
2845 u_int32_t m_cnt
= 0, m_size
= 0;
2849 if ((m_head
== NULL
&& !poll
) || (s
== NULL
&& ext
)) {
2850 if (m_head
!= NULL
) {
2851 mbuf_freem_list(m_head
);
2856 VERIFY(m_head
!= NULL
|| (s
== NULL
&& m_tail
== NULL
&& !ext
&& poll
));
2857 VERIFY(m_tail
== NULL
|| ext
);
2858 VERIFY(s
!= NULL
|| !ext
);
2861 * Drop the packet(s) if the parameters are invalid, or if the
2862 * interface is no longer attached; else hold an IO refcnt to
2863 * prevent it from being detached (will be released below.)
2865 if (ifp
== NULL
|| (ifp
!= lo_ifp
&& !ifnet_datamov_begin(ifp
))) {
2866 if (m_head
!= NULL
) {
2867 mbuf_freem_list(m_head
);
2872 input_func
= ifp
->if_input_dlil
;
2873 VERIFY(input_func
!= NULL
);
2875 if (m_tail
== NULL
) {
2877 while (m_head
!= NULL
) {
2878 #if IFNET_INPUT_SANITY_CHK
2879 if (__improbable(dlil_input_sanity_check
!= 0)) {
2880 DLIL_INPUT_CHECK(last
, ifp
);
2882 #endif /* IFNET_INPUT_SANITY_CHK */
2884 m_size
+= m_length(last
);
2885 if (mbuf_nextpkt(last
) == NULL
) {
2888 last
= mbuf_nextpkt(last
);
2892 #if IFNET_INPUT_SANITY_CHK
2893 if (__improbable(dlil_input_sanity_check
!= 0)) {
2896 DLIL_INPUT_CHECK(last
, ifp
);
2898 m_size
+= m_length(last
);
2899 if (mbuf_nextpkt(last
) == NULL
) {
2902 last
= mbuf_nextpkt(last
);
2905 m_cnt
= s
->packets_in
;
2906 m_size
= s
->bytes_in
;
2910 m_cnt
= s
->packets_in
;
2911 m_size
= s
->bytes_in
;
2913 #endif /* IFNET_INPUT_SANITY_CHK */
2916 if (last
!= m_tail
) {
2917 panic_plain("%s: invalid input packet chain for %s, "
2918 "tail mbuf %p instead of %p\n", __func__
, if_name(ifp
),
2923 * Assert packet count only for the extended variant, for backwards
2924 * compatibility, since this came directly from the device driver.
2925 * Relax this assertion for input bytes, as the driver may have
2926 * included the link-layer headers in the computation; hence
2927 * m_size is just an approximation.
2929 if (ext
&& s
->packets_in
!= m_cnt
) {
2930 panic_plain("%s: input packet count mismatch for %s, "
2931 "%d instead of %d\n", __func__
, if_name(ifp
),
2932 s
->packets_in
, m_cnt
);
2936 bzero(&_s
, sizeof(_s
));
2941 _s
.packets_in
= m_cnt
;
2942 _s
.bytes_in
= m_size
;
2944 err
= (*input_func
)(ifp
, m_head
, m_tail
, s
, poll
, current_thread());
2946 if (ifp
!= lo_ifp
) {
2947 /* Release the IO refcnt */
2948 ifnet_datamov_end(ifp
);
2956 dlil_output_handler(struct ifnet
*ifp
, struct mbuf
*m
)
2958 return ifp
->if_output(ifp
, m
);
2962 dlil_input_handler(struct ifnet
*ifp
, struct mbuf
*m_head
,
2963 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
2964 boolean_t poll
, struct thread
*tp
)
2966 struct dlil_threading_info
*inp
= ifp
->if_inp
;
2968 if (__improbable(inp
== NULL
)) {
2969 inp
= dlil_main_input_thread
;
2972 return inp
->dlth_strategy(inp
, ifp
, m_head
, m_tail
, s
, poll
, tp
);
2976 dlil_input_async(struct dlil_threading_info
*inp
,
2977 struct ifnet
*ifp
, struct mbuf
*m_head
, struct mbuf
*m_tail
,
2978 const struct ifnet_stat_increment_param
*s
, boolean_t poll
,
2981 u_int32_t m_cnt
= s
->packets_in
;
2982 u_int32_t m_size
= s
->bytes_in
;
2983 boolean_t notify
= FALSE
;
2986 * If there is a matching DLIL input thread associated with an
2987 * affinity set, associate this thread with the same set. We
2988 * will only do this once.
2990 lck_mtx_lock_spin(&inp
->dlth_lock
);
2991 if (inp
!= dlil_main_input_thread
&& inp
->dlth_affinity
&& tp
!= NULL
&&
2992 ((!poll
&& inp
->dlth_driver_thread
== THREAD_NULL
) ||
2993 (poll
&& inp
->dlth_poller_thread
== THREAD_NULL
))) {
2994 u_int32_t tag
= inp
->dlth_affinity_tag
;
2997 VERIFY(inp
->dlth_poller_thread
== THREAD_NULL
);
2998 inp
->dlth_poller_thread
= tp
;
3000 VERIFY(inp
->dlth_driver_thread
== THREAD_NULL
);
3001 inp
->dlth_driver_thread
= tp
;
3003 lck_mtx_unlock(&inp
->dlth_lock
);
3005 /* Associate the current thread with the new affinity tag */
3006 (void) dlil_affinity_set(tp
, tag
);
3009 * Take a reference on the current thread; during detach,
3010 * we will need to refer to it in order to tear down its
3013 thread_reference(tp
);
3014 lck_mtx_lock_spin(&inp
->dlth_lock
);
3017 VERIFY(m_head
!= NULL
|| (m_tail
== NULL
&& m_cnt
== 0));
3020 * Because of loopbacked multicast we cannot stuff the ifp in
3021 * the rcvif of the packet header: loopback (lo0) packets use a
3022 * dedicated list so that we can later associate them with lo_ifp
3023 * on their way up the stack. Packets for other interfaces without
3024 * dedicated input threads go to the regular list.
3026 if (m_head
!= NULL
) {
3027 classq_pkt_t head
, tail
;
3028 CLASSQ_PKT_INIT_MBUF(&head
, m_head
);
3029 CLASSQ_PKT_INIT_MBUF(&tail
, m_tail
);
3030 if (inp
== dlil_main_input_thread
&& ifp
== lo_ifp
) {
3031 struct dlil_main_threading_info
*inpm
=
3032 (struct dlil_main_threading_info
*)inp
;
3033 _addq_multi(&inpm
->lo_rcvq_pkts
, &head
, &tail
,
3036 _addq_multi(&inp
->dlth_pkts
, &head
, &tail
,
3041 #if IFNET_INPUT_SANITY_CHK
3042 if (__improbable(dlil_input_sanity_check
!= 0)) {
3043 u_int32_t count
= 0, size
= 0;
3046 for (m0
= m_head
; m0
; m0
= mbuf_nextpkt(m0
)) {
3047 size
+= m_length(m0
);
3051 if (count
!= m_cnt
) {
3052 panic_plain("%s: invalid total packet count %u "
3053 "(expected %u)\n", if_name(ifp
), count
, m_cnt
);
3055 __builtin_unreachable();
3056 } else if (size
!= m_size
) {
3057 panic_plain("%s: invalid total packet size %u "
3058 "(expected %u)\n", if_name(ifp
), size
, m_size
);
3060 __builtin_unreachable();
3063 inp
->dlth_pkts_cnt
+= m_cnt
;
3065 #endif /* IFNET_INPUT_SANITY_CHK */
3067 dlil_input_stats_add(s
, inp
, ifp
, poll
);
3069 * If we're using the main input thread, synchronize the
3070 * stats now since we have the interface context. All
3071 * other cases involving dedicated input threads will
3072 * have their stats synchronized there.
3074 if (inp
== dlil_main_input_thread
) {
3075 notify
= dlil_input_stats_sync(ifp
, inp
);
3078 dlil_input_wakeup(inp
);
3079 lck_mtx_unlock(&inp
->dlth_lock
);
3082 ifnet_notify_data_threshold(ifp
);
3089 dlil_input_sync(struct dlil_threading_info
*inp
,
3090 struct ifnet
*ifp
, struct mbuf
*m_head
, struct mbuf
*m_tail
,
3091 const struct ifnet_stat_increment_param
*s
, boolean_t poll
,
3095 u_int32_t m_cnt
= s
->packets_in
;
3096 u_int32_t m_size
= s
->bytes_in
;
3097 boolean_t notify
= FALSE
;
3098 classq_pkt_t head
, tail
;
3100 ASSERT(inp
!= dlil_main_input_thread
);
3102 /* XXX: should we just assert instead? */
3103 if (__improbable(m_head
== NULL
)) {
3107 CLASSQ_PKT_INIT_MBUF(&head
, m_head
);
3108 CLASSQ_PKT_INIT_MBUF(&tail
, m_tail
);
3110 lck_mtx_lock_spin(&inp
->dlth_lock
);
3111 _addq_multi(&inp
->dlth_pkts
, &head
, &tail
, m_cnt
, m_size
);
3113 #if IFNET_INPUT_SANITY_CHK
3114 if (__improbable(dlil_input_sanity_check
!= 0)) {
3115 u_int32_t count
= 0, size
= 0;
3118 for (m0
= m_head
; m0
; m0
= mbuf_nextpkt(m0
)) {
3119 size
+= m_length(m0
);
3123 if (count
!= m_cnt
) {
3124 panic_plain("%s: invalid total packet count %u "
3125 "(expected %u)\n", if_name(ifp
), count
, m_cnt
);
3127 __builtin_unreachable();
3128 } else if (size
!= m_size
) {
3129 panic_plain("%s: invalid total packet size %u "
3130 "(expected %u)\n", if_name(ifp
), size
, m_size
);
3132 __builtin_unreachable();
3135 inp
->dlth_pkts_cnt
+= m_cnt
;
3137 #endif /* IFNET_INPUT_SANITY_CHK */
3139 dlil_input_stats_add(s
, inp
, ifp
, poll
);
3141 m_cnt
= qlen(&inp
->dlth_pkts
);
3142 _getq_all(&inp
->dlth_pkts
, &head
, NULL
, NULL
, NULL
);
3144 notify
= dlil_input_stats_sync(ifp
, inp
);
3146 lck_mtx_unlock(&inp
->dlth_lock
);
3149 ifnet_notify_data_threshold(ifp
);
3153 * NOTE warning %%% attention !!!!
3154 * We should think about putting some thread starvation
3155 * safeguards if we deal with long chains of packets.
3157 if (head
.cp_mbuf
!= NULL
) {
3158 dlil_input_packet_list_extended(NULL
, head
.cp_mbuf
,
3159 m_cnt
, ifp
->if_poll_mode
);
3167 ifnet_start_common(struct ifnet
*ifp
, boolean_t resetfc
)
3169 if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
3173 * If the starter thread is inactive, signal it to do work,
3174 * unless the interface is being flow controlled from below,
3175 * e.g. a virtual interface being flow controlled by a real
3176 * network interface beneath it, or it's been disabled via
3177 * a call to ifnet_disable_output().
3179 lck_mtx_lock_spin(&ifp
->if_start_lock
);
3181 ifp
->if_start_flags
&= ~IFSF_FLOW_CONTROLLED
;
3182 } else if (ifp
->if_start_flags
& IFSF_FLOW_CONTROLLED
) {
3183 lck_mtx_unlock(&ifp
->if_start_lock
);
3186 ifp
->if_start_req
++;
3187 if (!ifp
->if_start_active
&& ifp
->if_start_thread
!= THREAD_NULL
&&
3188 (resetfc
|| !(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) ||
3189 IFCQ_LEN(&ifp
->if_snd
) >= ifp
->if_start_delay_qlen
||
3190 ifp
->if_start_delayed
== 0)) {
3191 (void) wakeup_one((caddr_t
)&ifp
->if_start_thread
);
3193 lck_mtx_unlock(&ifp
->if_start_lock
);
3197 ifnet_start(struct ifnet
*ifp
)
3199 ifnet_start_common(ifp
, FALSE
);
3202 __attribute__((noreturn
))
3204 ifnet_start_thread_func(void *v
, wait_result_t w
)
3207 struct ifnet
*ifp
= v
;
3208 char thread_name
[MAXTHREADNAMESIZE
];
3210 /* Construct the name for this thread, and then apply it. */
3211 bzero(thread_name
, sizeof(thread_name
));
3212 (void) snprintf(thread_name
, sizeof(thread_name
),
3213 "ifnet_start_%s", ifp
->if_xname
);
3214 ASSERT(ifp
->if_start_thread
== current_thread());
3215 thread_set_thread_name(current_thread(), thread_name
);
3218 * Treat the dedicated starter thread for lo0 as equivalent to
3219 * the driver workloop thread; if net_affinity is enabled for
3220 * the main input thread, associate this starter thread to it
3221 * by binding them with the same affinity tag. This is done
3222 * only once (as we only have one lo_ifp which never goes away.)
3224 if (ifp
== lo_ifp
) {
3225 struct dlil_threading_info
*inp
= dlil_main_input_thread
;
3226 struct thread
*tp
= current_thread();
3228 lck_mtx_lock(&inp
->dlth_lock
);
3229 if (inp
->dlth_affinity
) {
3230 u_int32_t tag
= inp
->dlth_affinity_tag
;
3232 VERIFY(inp
->dlth_driver_thread
== THREAD_NULL
);
3233 VERIFY(inp
->dlth_poller_thread
== THREAD_NULL
);
3234 inp
->dlth_driver_thread
= tp
;
3235 lck_mtx_unlock(&inp
->dlth_lock
);
3237 /* Associate this thread with the affinity tag */
3238 (void) dlil_affinity_set(tp
, tag
);
3240 lck_mtx_unlock(&inp
->dlth_lock
);
3244 lck_mtx_lock(&ifp
->if_start_lock
);
3245 VERIFY(!ifp
->if_start_embryonic
&& !ifp
->if_start_active
);
3246 (void) assert_wait(&ifp
->if_start_thread
, THREAD_UNINT
);
3247 ifp
->if_start_embryonic
= 1;
3248 /* wake up once to get out of embryonic state */
3249 ifp
->if_start_req
++;
3250 (void) wakeup_one((caddr_t
)&ifp
->if_start_thread
);
3251 lck_mtx_unlock(&ifp
->if_start_lock
);
3252 (void) thread_block_parameter(ifnet_start_thread_cont
, ifp
);
3254 __builtin_unreachable();
3257 __attribute__((noreturn
))
3259 ifnet_start_thread_cont(void *v
, wait_result_t wres
)
3261 struct ifnet
*ifp
= v
;
3262 struct ifclassq
*ifq
= &ifp
->if_snd
;
3264 lck_mtx_lock_spin(&ifp
->if_start_lock
);
3265 if (__improbable(wres
== THREAD_INTERRUPTED
||
3266 ifp
->if_start_thread
== THREAD_NULL
)) {
3270 if (__improbable(ifp
->if_start_embryonic
)) {
3271 ifp
->if_start_embryonic
= 0;
3272 lck_mtx_unlock(&ifp
->if_start_lock
);
3273 ifnet_decr_pending_thread_count(ifp
);
3274 lck_mtx_lock_spin(&ifp
->if_start_lock
);
3278 ifp
->if_start_active
= 1;
3281 * Keep on servicing until no more request.
3284 u_int32_t req
= ifp
->if_start_req
;
3285 if (!IFCQ_IS_EMPTY(ifq
) &&
3286 (ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) &&
3287 ifp
->if_start_delayed
== 0 &&
3288 IFCQ_LEN(ifq
) < ifp
->if_start_delay_qlen
&&
3289 (ifp
->if_eflags
& IFEF_DELAY_START
)) {
3290 ifp
->if_start_delayed
= 1;
3291 ifnet_start_delayed
++;
3294 ifp
->if_start_delayed
= 0;
3296 lck_mtx_unlock(&ifp
->if_start_lock
);
3299 * If no longer attached, don't call start because ifp
3300 * is being destroyed; else hold an IO refcnt to
3301 * prevent the interface from being detached (will be
3304 if (!ifnet_datamov_begin(ifp
)) {
3305 lck_mtx_lock_spin(&ifp
->if_start_lock
);
3309 /* invoke the driver's start routine */
3310 ((*ifp
->if_start
)(ifp
));
3313 * Release the io ref count taken above.
3315 ifnet_datamov_end(ifp
);
3317 lck_mtx_lock_spin(&ifp
->if_start_lock
);
3320 * If there's no pending request or if the
3321 * interface has been disabled, we're done.
3323 if (req
== ifp
->if_start_req
||
3324 (ifp
->if_start_flags
& IFSF_FLOW_CONTROLLED
)) {
3329 ifp
->if_start_req
= 0;
3330 ifp
->if_start_active
= 0;
3333 if (__probable(ifp
->if_start_thread
!= THREAD_NULL
)) {
3334 uint64_t deadline
= TIMEOUT_WAIT_FOREVER
;
3335 struct timespec delay_start_ts
;
3336 struct timespec
*ts
;
3339 * Wakeup N ns from now if rate-controlled by TBR, and if
3340 * there are still packets in the send queue which haven't
3341 * been dequeued so far; else sleep indefinitely (ts = NULL)
3342 * until ifnet_start() is called again.
3344 ts
= ((IFCQ_TBR_IS_ENABLED(ifq
) && !IFCQ_IS_EMPTY(ifq
)) ?
3345 &ifp
->if_start_cycle
: NULL
);
3347 if (ts
== NULL
&& ifp
->if_start_delayed
== 1) {
3348 delay_start_ts
.tv_sec
= 0;
3349 delay_start_ts
.tv_nsec
= ifp
->if_start_delay_timeout
;
3350 ts
= &delay_start_ts
;
3353 if (ts
!= NULL
&& ts
->tv_sec
== 0 && ts
->tv_nsec
== 0) {
3357 if (__improbable(ts
!= NULL
)) {
3358 clock_interval_to_deadline((uint32_t)(ts
->tv_nsec
+
3359 (ts
->tv_sec
* NSEC_PER_SEC
)), 1, &deadline
);
3362 (void) assert_wait_deadline(&ifp
->if_start_thread
,
3363 THREAD_UNINT
, deadline
);
3364 lck_mtx_unlock(&ifp
->if_start_lock
);
3365 (void) thread_block_parameter(ifnet_start_thread_cont
, ifp
);
3369 /* interface is detached? */
3370 ifnet_set_start_cycle(ifp
, NULL
);
3371 lck_mtx_unlock(&ifp
->if_start_lock
);
3375 DLIL_PRINTF("%s: starter thread terminated\n",
3379 /* for the extra refcnt from kernel_thread_start() */
3380 thread_deallocate(current_thread());
3381 /* this is the end */
3382 thread_terminate(current_thread());
3386 /* must never get here */
3389 __builtin_unreachable();
3393 ifnet_set_start_cycle(struct ifnet
*ifp
, struct timespec
*ts
)
3396 bzero(&ifp
->if_start_cycle
, sizeof(ifp
->if_start_cycle
));
3398 *(&ifp
->if_start_cycle
) = *ts
;
3401 if (ts
!= NULL
&& ts
->tv_nsec
!= 0 && dlil_verbose
) {
3402 DLIL_PRINTF("%s: restart interval set to %lu nsec\n",
3403 if_name(ifp
), ts
->tv_nsec
);
3408 ifnet_poll_wakeup(struct ifnet
*ifp
)
3410 LCK_MTX_ASSERT(&ifp
->if_poll_lock
, LCK_MTX_ASSERT_OWNED
);
3413 if (!(ifp
->if_poll_flags
& IF_POLLF_RUNNING
) &&
3414 ifp
->if_poll_thread
!= THREAD_NULL
) {
3415 wakeup_one((caddr_t
)&ifp
->if_poll_thread
);
3420 ifnet_poll(struct ifnet
*ifp
)
3423 * If the poller thread is inactive, signal it to do work.
3425 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
3426 ifnet_poll_wakeup(ifp
);
3427 lck_mtx_unlock(&ifp
->if_poll_lock
);
3430 __attribute__((noreturn
))
3432 ifnet_poll_thread_func(void *v
, wait_result_t w
)
3435 char thread_name
[MAXTHREADNAMESIZE
];
3436 struct ifnet
*ifp
= v
;
3438 VERIFY(ifp
->if_eflags
& IFEF_RXPOLL
);
3439 VERIFY(current_thread() == ifp
->if_poll_thread
);
3441 /* construct the name for this thread, and then apply it */
3442 bzero(thread_name
, sizeof(thread_name
));
3443 (void) snprintf(thread_name
, sizeof(thread_name
),
3444 "ifnet_poller_%s", ifp
->if_xname
);
3445 thread_set_thread_name(ifp
->if_poll_thread
, thread_name
);
3447 lck_mtx_lock(&ifp
->if_poll_lock
);
3448 VERIFY(!(ifp
->if_poll_flags
& (IF_POLLF_EMBRYONIC
| IF_POLLF_RUNNING
)));
3449 (void) assert_wait(&ifp
->if_poll_thread
, THREAD_UNINT
);
3450 ifp
->if_poll_flags
|= IF_POLLF_EMBRYONIC
;
3451 /* wake up once to get out of embryonic state */
3452 ifnet_poll_wakeup(ifp
);
3453 lck_mtx_unlock(&ifp
->if_poll_lock
);
3454 (void) thread_block_parameter(ifnet_poll_thread_cont
, ifp
);
3456 __builtin_unreachable();
3459 __attribute__((noreturn
))
3461 ifnet_poll_thread_cont(void *v
, wait_result_t wres
)
3463 struct dlil_threading_info
*inp
;
3464 struct ifnet
*ifp
= v
;
3465 struct ifnet_stat_increment_param s
;
3466 struct timespec start_time
;
3468 VERIFY(ifp
->if_eflags
& IFEF_RXPOLL
);
3470 bzero(&s
, sizeof(s
));
3471 net_timerclear(&start_time
);
3473 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
3474 if (__improbable(wres
== THREAD_INTERRUPTED
||
3475 ifp
->if_poll_thread
== THREAD_NULL
)) {
3480 VERIFY(inp
!= NULL
);
3482 if (__improbable(ifp
->if_poll_flags
& IF_POLLF_EMBRYONIC
)) {
3483 ifp
->if_poll_flags
&= ~IF_POLLF_EMBRYONIC
;
3484 lck_mtx_unlock(&ifp
->if_poll_lock
);
3485 ifnet_decr_pending_thread_count(ifp
);
3486 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
3490 ifp
->if_poll_flags
|= IF_POLLF_RUNNING
;
3493 * Keep on servicing until no more request.
3496 struct mbuf
*m_head
, *m_tail
;
3497 u_int32_t m_lim
, m_cnt
, m_totlen
;
3498 u_int16_t req
= ifp
->if_poll_req
;
3500 m_lim
= (ifp
->if_rxpoll_plim
!= 0) ? ifp
->if_rxpoll_plim
:
3501 MAX((qlimit(&inp
->dlth_pkts
)), (ifp
->if_rxpoll_phiwat
<< 2));
3502 lck_mtx_unlock(&ifp
->if_poll_lock
);
3505 * If no longer attached, there's nothing to do;
3506 * else hold an IO refcnt to prevent the interface
3507 * from being detached (will be released below.)
3509 if (!ifnet_is_attached(ifp
, 1)) {
3510 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
3514 if (dlil_verbose
> 1) {
3515 DLIL_PRINTF("%s: polling up to %d pkts, "
3516 "pkts avg %d max %d, wreq avg %d, "
3518 if_name(ifp
), m_lim
,
3519 ifp
->if_rxpoll_pavg
, ifp
->if_rxpoll_pmax
,
3520 ifp
->if_rxpoll_wavg
, ifp
->if_rxpoll_bavg
);
3523 /* invoke the driver's input poll routine */
3524 ((*ifp
->if_input_poll
)(ifp
, 0, m_lim
, &m_head
, &m_tail
,
3525 &m_cnt
, &m_totlen
));
3527 if (m_head
!= NULL
) {
3528 VERIFY(m_tail
!= NULL
&& m_cnt
> 0);
3530 if (dlil_verbose
> 1) {
3531 DLIL_PRINTF("%s: polled %d pkts, "
3532 "pkts avg %d max %d, wreq avg %d, "
3534 if_name(ifp
), m_cnt
,
3535 ifp
->if_rxpoll_pavg
, ifp
->if_rxpoll_pmax
,
3536 ifp
->if_rxpoll_wavg
, ifp
->if_rxpoll_bavg
);
3539 /* stats are required for extended variant */
3540 s
.packets_in
= m_cnt
;
3541 s
.bytes_in
= m_totlen
;
3543 (void) ifnet_input_common(ifp
, m_head
, m_tail
,
3546 if (dlil_verbose
> 1) {
3547 DLIL_PRINTF("%s: no packets, "
3548 "pkts avg %d max %d, wreq avg %d, "
3550 if_name(ifp
), ifp
->if_rxpoll_pavg
,
3551 ifp
->if_rxpoll_pmax
, ifp
->if_rxpoll_wavg
,
3552 ifp
->if_rxpoll_bavg
);
3555 (void) ifnet_input_common(ifp
, NULL
, NULL
,
3559 /* Release the io ref count */
3560 ifnet_decr_iorefcnt(ifp
);
3562 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
3564 /* if there's no pending request, we're done */
3565 if (req
== ifp
->if_poll_req
||
3566 ifp
->if_poll_thread
== THREAD_NULL
) {
3571 ifp
->if_poll_req
= 0;
3572 ifp
->if_poll_flags
&= ~IF_POLLF_RUNNING
;
3574 if (ifp
->if_poll_thread
!= THREAD_NULL
) {
3575 uint64_t deadline
= TIMEOUT_WAIT_FOREVER
;
3576 struct timespec
*ts
;
3579 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
3580 * until ifnet_poll() is called again.
3582 ts
= &ifp
->if_poll_cycle
;
3583 if (ts
->tv_sec
== 0 && ts
->tv_nsec
== 0) {
3588 clock_interval_to_deadline((uint32_t)(ts
->tv_nsec
+
3589 (ts
->tv_sec
* NSEC_PER_SEC
)), 1, &deadline
);
3592 (void) assert_wait_deadline(&ifp
->if_poll_thread
,
3593 THREAD_UNINT
, deadline
);
3594 lck_mtx_unlock(&ifp
->if_poll_lock
);
3595 (void) thread_block_parameter(ifnet_poll_thread_cont
, ifp
);
3599 /* interface is detached (maybe while asleep)? */
3600 ifnet_set_poll_cycle(ifp
, NULL
);
3601 lck_mtx_unlock(&ifp
->if_poll_lock
);
3604 DLIL_PRINTF("%s: poller thread terminated\n",
3608 /* for the extra refcnt from kernel_thread_start() */
3609 thread_deallocate(current_thread());
3610 /* this is the end */
3611 thread_terminate(current_thread());
3615 /* must never get here */
3618 __builtin_unreachable();
3622 ifnet_set_poll_cycle(struct ifnet
*ifp
, struct timespec
*ts
)
3625 bzero(&ifp
->if_poll_cycle
, sizeof(ifp
->if_poll_cycle
));
3627 *(&ifp
->if_poll_cycle
) = *ts
;
3630 if (ts
!= NULL
&& ts
->tv_nsec
!= 0 && dlil_verbose
) {
3631 DLIL_PRINTF("%s: poll interval set to %lu nsec\n",
3632 if_name(ifp
), ts
->tv_nsec
);
3637 ifnet_purge(struct ifnet
*ifp
)
3639 if (ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_TXSTART
)) {
3645 ifnet_update_sndq(struct ifclassq
*ifq
, cqev_t ev
)
3647 IFCQ_LOCK_ASSERT_HELD(ifq
);
3649 if (!(IFCQ_IS_READY(ifq
))) {
3653 if (IFCQ_TBR_IS_ENABLED(ifq
)) {
3654 struct tb_profile tb
= {
3655 .rate
= ifq
->ifcq_tbr
.tbr_rate_raw
,
3656 .percent
= ifq
->ifcq_tbr
.tbr_percent
, .depth
= 0
3658 (void) ifclassq_tbr_set(ifq
, &tb
, FALSE
);
3661 ifclassq_update(ifq
, ev
);
3665 ifnet_update_rcv(struct ifnet
*ifp
, cqev_t ev
)
3668 case CLASSQ_EV_LINK_BANDWIDTH
:
3669 if (net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) {
3670 ifp
->if_poll_update
++;
3680 ifnet_set_output_sched_model(struct ifnet
*ifp
, u_int32_t model
)
3682 struct ifclassq
*ifq
;
3686 if (ifp
== NULL
|| model
>= IFNET_SCHED_MODEL_MAX
) {
3688 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
3694 omodel
= ifp
->if_output_sched_model
;
3695 ifp
->if_output_sched_model
= model
;
3696 if ((err
= ifclassq_pktsched_setup(ifq
)) != 0) {
3697 ifp
->if_output_sched_model
= omodel
;
3705 ifnet_set_sndq_maxlen(struct ifnet
*ifp
, u_int32_t maxqlen
)
3709 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
3713 ifclassq_set_maxlen(&ifp
->if_snd
, maxqlen
);
3719 ifnet_get_sndq_maxlen(struct ifnet
*ifp
, u_int32_t
*maxqlen
)
3721 if (ifp
== NULL
|| maxqlen
== NULL
) {
3723 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
3727 *maxqlen
= ifclassq_get_maxlen(&ifp
->if_snd
);
3733 ifnet_get_sndq_len(struct ifnet
*ifp
, u_int32_t
*pkts
)
3737 if (ifp
== NULL
|| pkts
== NULL
) {
3739 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
3742 err
= ifclassq_get_len(&ifp
->if_snd
, MBUF_SC_UNSPEC
,
3750 ifnet_get_service_class_sndq_len(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3751 u_int32_t
*pkts
, u_int32_t
*bytes
)
3755 if (ifp
== NULL
|| !MBUF_VALID_SC(sc
) ||
3756 (pkts
== NULL
&& bytes
== NULL
)) {
3758 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
3761 err
= ifclassq_get_len(&ifp
->if_snd
, sc
, pkts
, bytes
);
3768 ifnet_set_rcvq_maxlen(struct ifnet
*ifp
, u_int32_t maxqlen
)
3770 struct dlil_threading_info
*inp
;
3774 } else if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || ifp
->if_inp
== NULL
) {
3779 maxqlen
= if_rcvq_maxlen
;
3780 } else if (maxqlen
< IF_RCVQ_MINLEN
) {
3781 maxqlen
= IF_RCVQ_MINLEN
;
3785 lck_mtx_lock(&inp
->dlth_lock
);
3786 qlimit(&inp
->dlth_pkts
) = maxqlen
;
3787 lck_mtx_unlock(&inp
->dlth_lock
);
3793 ifnet_get_rcvq_maxlen(struct ifnet
*ifp
, u_int32_t
*maxqlen
)
3795 struct dlil_threading_info
*inp
;
3797 if (ifp
== NULL
|| maxqlen
== NULL
) {
3799 } else if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || ifp
->if_inp
== NULL
) {
3804 lck_mtx_lock(&inp
->dlth_lock
);
3805 *maxqlen
= qlimit(&inp
->dlth_pkts
);
3806 lck_mtx_unlock(&inp
->dlth_lock
);
3811 ifnet_enqueue_multi_setup(struct ifnet
*ifp
, uint16_t delay_qlen
,
3812 uint16_t delay_timeout
)
3814 if (delay_qlen
> 0 && delay_timeout
> 0) {
3815 if_set_eflags(ifp
, IFEF_ENQUEUE_MULTI
);
3816 ifp
->if_start_delay_qlen
= MIN(100, delay_qlen
);
3817 ifp
->if_start_delay_timeout
= min(20000, delay_timeout
);
3818 /* convert timeout to nanoseconds */
3819 ifp
->if_start_delay_timeout
*= 1000;
3820 kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
3821 ifp
->if_xname
, (uint32_t)delay_qlen
,
3822 (uint32_t)delay_timeout
);
3824 if_clear_eflags(ifp
, IFEF_ENQUEUE_MULTI
);
3829 * This function clears the DSCP bits in the IPV4/V6 header pointed to by buf.
3830 * While it's ok for buf to be not 32 bit aligned, the caller must ensure that
3831 * buf holds the full header.
3833 static __attribute__((noinline
)) void
3834 ifnet_mcast_clear_dscp(uint8_t *buf
, uint8_t ip_ver
)
3837 struct ip6_hdr
*ip6
;
3838 uint8_t lbuf
[64] __attribute__((aligned(8)));
3841 if (ip_ver
== IPVERSION
) {
3845 if (__improbable(!IP_HDR_ALIGNED_P(p
))) {
3846 DTRACE_IP1(not__aligned__v4
, uint8_t *, buf
);
3847 bcopy(buf
, lbuf
, sizeof(struct ip
));
3850 ip
= (struct ip
*)(void *)p
;
3851 if (__probable((ip
->ip_tos
& ~IPTOS_ECN_MASK
) == 0)) {
3855 DTRACE_IP1(clear__v4
, struct ip
*, ip
);
3856 old_tos
= ip
->ip_tos
;
3857 ip
->ip_tos
&= IPTOS_ECN_MASK
;
3858 sum
= ip
->ip_sum
+ htons(old_tos
) - htons(ip
->ip_tos
);
3859 sum
= (sum
>> 16) + (sum
& 0xffff);
3860 ip
->ip_sum
= (uint16_t)(sum
& 0xffff);
3862 if (__improbable(p
== lbuf
)) {
3863 bcopy(lbuf
, buf
, sizeof(struct ip
));
3867 ASSERT(ip_ver
== IPV6_VERSION
);
3869 if (__improbable(!IP_HDR_ALIGNED_P(p
))) {
3870 DTRACE_IP1(not__aligned__v6
, uint8_t *, buf
);
3871 bcopy(buf
, lbuf
, sizeof(struct ip6_hdr
));
3874 ip6
= (struct ip6_hdr
*)(void *)p
;
3875 flow
= ntohl(ip6
->ip6_flow
);
3876 if (__probable((flow
& IP6FLOW_DSCP_MASK
) == 0)) {
3880 DTRACE_IP1(clear__v6
, struct ip6_hdr
*, ip6
);
3881 ip6
->ip6_flow
= htonl(flow
& ~IP6FLOW_DSCP_MASK
);
3883 if (__improbable(p
== lbuf
)) {
3884 bcopy(lbuf
, buf
, sizeof(struct ip6_hdr
));
3889 static inline errno_t
3890 ifnet_enqueue_ifclassq(struct ifnet
*ifp
, classq_pkt_t
*p
, boolean_t flush
,
3893 volatile uint64_t *fg_ts
= NULL
;
3894 volatile uint64_t *rt_ts
= NULL
;
3895 struct timespec now
;
3896 u_int64_t now_nsec
= 0;
3898 uint8_t *mcast_buf
= NULL
;
3902 ASSERT(ifp
->if_eflags
& IFEF_TXSTART
);
3905 * If packet already carries a timestamp, either from dlil_output()
3906 * or from flowswitch, use it here. Otherwise, record timestamp.
3907 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
3908 * the timestamp value is used internally there.
3910 switch (p
->cp_ptype
) {
3912 ASSERT(p
->cp_mbuf
->m_flags
& M_PKTHDR
);
3913 ASSERT(p
->cp_mbuf
->m_nextpkt
== NULL
);
3915 if (!(p
->cp_mbuf
->m_pkthdr
.pkt_flags
& PKTF_TS_VALID
) ||
3916 p
->cp_mbuf
->m_pkthdr
.pkt_timestamp
== 0) {
3918 net_timernsec(&now
, &now_nsec
);
3919 p
->cp_mbuf
->m_pkthdr
.pkt_timestamp
= now_nsec
;
3921 p
->cp_mbuf
->m_pkthdr
.pkt_flags
&= ~PKTF_TS_VALID
;
3923 * If the packet service class is not background,
3924 * update the timestamp to indicate recent activity
3925 * on a foreground socket.
3927 if ((p
->cp_mbuf
->m_pkthdr
.pkt_flags
& PKTF_FLOW_ID
) &&
3928 p
->cp_mbuf
->m_pkthdr
.pkt_flowsrc
== FLOWSRC_INPCB
) {
3929 if (!(p
->cp_mbuf
->m_pkthdr
.pkt_flags
&
3930 PKTF_SO_BACKGROUND
)) {
3931 ifp
->if_fg_sendts
= (uint32_t)_net_uptime
;
3932 if (fg_ts
!= NULL
) {
3933 *fg_ts
= (uint32_t)_net_uptime
;
3936 if (p
->cp_mbuf
->m_pkthdr
.pkt_flags
& PKTF_SO_REALTIME
) {
3937 ifp
->if_rt_sendts
= (uint32_t)_net_uptime
;
3938 if (rt_ts
!= NULL
) {
3939 *rt_ts
= (uint32_t)_net_uptime
;
3943 pktlen
= m_pktlen(p
->cp_mbuf
);
3946 * Some Wi-Fi AP implementations do not correctly handle
3947 * multicast IP packets with DSCP bits set (radr://9331522).
3948 * As a workaround we clear the DSCP bits but keep service
3949 * class (rdar://51507725).
3951 if ((p
->cp_mbuf
->m_flags
& M_MCAST
) != 0 &&
3952 IFNET_IS_WIFI_INFRA(ifp
)) {
3953 size_t len
= mbuf_len(p
->cp_mbuf
), hlen
;
3954 struct ether_header
*eh
;
3955 boolean_t pullup
= FALSE
;
3958 if (__improbable(len
< sizeof(struct ether_header
))) {
3959 DTRACE_IP1(small__ether
, size_t, len
);
3960 if ((p
->cp_mbuf
= m_pullup(p
->cp_mbuf
,
3961 sizeof(struct ether_header
))) == NULL
) {
3965 eh
= (struct ether_header
*)mbuf_data(p
->cp_mbuf
);
3966 etype
= ntohs(eh
->ether_type
);
3967 if (etype
== ETHERTYPE_IP
) {
3968 hlen
= sizeof(struct ether_header
) +
3971 DTRACE_IP1(small__v4
, size_t, len
);
3975 } else if (etype
== ETHERTYPE_IPV6
) {
3976 hlen
= sizeof(struct ether_header
) +
3977 sizeof(struct ip6_hdr
);
3979 DTRACE_IP1(small__v6
, size_t, len
);
3982 ip_ver
= IPV6_VERSION
;
3984 DTRACE_IP1(invalid__etype
, uint16_t, etype
);
3988 if ((p
->cp_mbuf
= m_pullup(p
->cp_mbuf
, (int)hlen
)) ==
3993 eh
= (struct ether_header
*)mbuf_data(
3996 mcast_buf
= (uint8_t *)(eh
+ 1);
3998 * ifnet_mcast_clear_dscp() will finish the work below.
3999 * Note that the pullups above ensure that mcast_buf
4000 * points to a full IP header.
4009 __builtin_unreachable();
4012 if (mcast_buf
!= NULL
) {
4013 ifnet_mcast_clear_dscp(mcast_buf
, ip_ver
);
4016 if (ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) {
4017 if (now_nsec
== 0) {
4019 net_timernsec(&now
, &now_nsec
);
4022 * If the driver chose to delay start callback for
4023 * coalescing multiple packets, Then use the following
4024 * heuristics to make sure that start callback will
4025 * be delayed only when bulk data transfer is detected.
4026 * 1. number of packets enqueued in (delay_win * 2) is
4027 * greater than or equal to the delay qlen.
4028 * 2. If delay_start is enabled it will stay enabled for
4029 * another 10 idle windows. This is to take into account
4030 * variable RTT and burst traffic.
4031 * 3. If the time elapsed since last enqueue is more
4032 * than 200ms we disable delaying start callback. This is
4033 * is to take idle time into account.
4035 u_int64_t dwin
= (ifp
->if_start_delay_timeout
<< 1);
4036 if (ifp
->if_start_delay_swin
> 0) {
4037 if ((ifp
->if_start_delay_swin
+ dwin
) > now_nsec
) {
4038 ifp
->if_start_delay_cnt
++;
4039 } else if ((now_nsec
- ifp
->if_start_delay_swin
)
4040 >= (200 * 1000 * 1000)) {
4041 ifp
->if_start_delay_swin
= now_nsec
;
4042 ifp
->if_start_delay_cnt
= 1;
4043 ifp
->if_start_delay_idle
= 0;
4044 if (ifp
->if_eflags
& IFEF_DELAY_START
) {
4045 if_clear_eflags(ifp
, IFEF_DELAY_START
);
4046 ifnet_delay_start_disabled_increment();
4049 if (ifp
->if_start_delay_cnt
>=
4050 ifp
->if_start_delay_qlen
) {
4051 if_set_eflags(ifp
, IFEF_DELAY_START
);
4052 ifp
->if_start_delay_idle
= 0;
4054 if (ifp
->if_start_delay_idle
>= 10) {
4055 if_clear_eflags(ifp
,
4057 ifnet_delay_start_disabled_increment();
4059 ifp
->if_start_delay_idle
++;
4062 ifp
->if_start_delay_swin
= now_nsec
;
4063 ifp
->if_start_delay_cnt
= 1;
4066 ifp
->if_start_delay_swin
= now_nsec
;
4067 ifp
->if_start_delay_cnt
= 1;
4068 ifp
->if_start_delay_idle
= 0;
4069 if_clear_eflags(ifp
, IFEF_DELAY_START
);
4072 if_clear_eflags(ifp
, IFEF_DELAY_START
);
4075 /* enqueue the packet (caller consumes object) */
4076 error
= ifclassq_enqueue(&ifp
->if_snd
, p
, p
, 1, pktlen
, pdrop
);
4079 * Tell the driver to start dequeueing; do this even when the queue
4080 * for the packet is suspended (EQSUSPENDED), as the driver could still
4081 * be dequeueing from other unsuspended queues.
4083 if (!(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) &&
4084 ((error
== 0 && flush
) || error
== EQFULL
|| error
== EQSUSPENDED
)) {
4091 static inline errno_t
4092 ifnet_enqueue_ifclassq_chain(struct ifnet
*ifp
, classq_pkt_t
*head
,
4093 classq_pkt_t
*tail
, uint32_t cnt
, uint32_t bytes
, boolean_t flush
,
4098 /* enqueue the packet (caller consumes object) */
4099 error
= ifclassq_enqueue(&ifp
->if_snd
, head
, tail
, cnt
, bytes
, pdrop
);
4102 * Tell the driver to start dequeueing; do this even when the queue
4103 * for the packet is suspended (EQSUSPENDED), as the driver could still
4104 * be dequeueing from other unsuspended queues.
4106 if ((error
== 0 && flush
) || error
== EQFULL
|| error
== EQSUSPENDED
) {
4113 ifnet_enqueue_netem(void *handle
, pktsched_pkt_t
*pkts
, uint32_t n_pkts
)
4115 struct ifnet
*ifp
= handle
;
4116 boolean_t pdrop
; /* dummy */
4119 ASSERT(n_pkts
>= 1);
4120 for (i
= 0; i
< n_pkts
- 1; i
++) {
4121 (void) ifnet_enqueue_ifclassq(ifp
, &pkts
[i
].pktsched_pkt
,
4124 /* flush with the last packet */
4125 (void) ifnet_enqueue_ifclassq(ifp
, &pkts
[i
].pktsched_pkt
, TRUE
, &pdrop
);
4130 static inline errno_t
4131 ifnet_enqueue_common(struct ifnet
*ifp
, classq_pkt_t
*pkt
, boolean_t flush
,
4134 if (ifp
->if_output_netem
!= NULL
) {
4135 return netem_enqueue(ifp
->if_output_netem
, pkt
, pdrop
);
4137 return ifnet_enqueue_ifclassq(ifp
, pkt
, flush
, pdrop
);
4142 ifnet_enqueue(struct ifnet
*ifp
, struct mbuf
*m
)
4145 return ifnet_enqueue_mbuf(ifp
, m
, TRUE
, &pdrop
);
4149 ifnet_enqueue_mbuf(struct ifnet
*ifp
, struct mbuf
*m
, boolean_t flush
,
4154 if (ifp
== NULL
|| m
== NULL
|| !(m
->m_flags
& M_PKTHDR
) ||
4155 m
->m_nextpkt
!= NULL
) {
4161 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
4162 !IF_FULLY_ATTACHED(ifp
)) {
4163 /* flag tested without lock for performance */
4167 } else if (!(ifp
->if_flags
& IFF_UP
)) {
4173 CLASSQ_PKT_INIT_MBUF(&pkt
, m
);
4174 return ifnet_enqueue_common(ifp
, &pkt
, flush
, pdrop
);
4178 ifnet_enqueue_mbuf_chain(struct ifnet
*ifp
, struct mbuf
*m_head
,
4179 struct mbuf
*m_tail
, uint32_t cnt
, uint32_t bytes
, boolean_t flush
,
4182 classq_pkt_t head
, tail
;
4184 ASSERT(m_head
!= NULL
);
4185 ASSERT((m_head
->m_flags
& M_PKTHDR
) != 0);
4186 ASSERT(m_tail
!= NULL
);
4187 ASSERT((m_tail
->m_flags
& M_PKTHDR
) != 0);
4188 ASSERT(ifp
!= NULL
);
4189 ASSERT((ifp
->if_eflags
& IFEF_TXSTART
) != 0);
4191 if (!IF_FULLY_ATTACHED(ifp
)) {
4192 /* flag tested without lock for performance */
4193 m_freem_list(m_head
);
4196 } else if (!(ifp
->if_flags
& IFF_UP
)) {
4197 m_freem_list(m_head
);
4202 CLASSQ_PKT_INIT_MBUF(&head
, m_head
);
4203 CLASSQ_PKT_INIT_MBUF(&tail
, m_tail
);
4204 return ifnet_enqueue_ifclassq_chain(ifp
, &head
, &tail
, cnt
, bytes
,
4210 ifnet_dequeue(struct ifnet
*ifp
, struct mbuf
**mp
)
4213 classq_pkt_t pkt
= CLASSQ_PKT_INITIALIZER(pkt
);
4215 if (ifp
== NULL
|| mp
== NULL
) {
4217 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
4218 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
) {
4221 if (!ifnet_is_attached(ifp
, 1)) {
4225 rc
= ifclassq_dequeue(&ifp
->if_snd
, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
,
4226 &pkt
, NULL
, NULL
, NULL
);
4227 VERIFY((pkt
.cp_ptype
== QP_MBUF
) || (pkt
.cp_mbuf
== NULL
));
4228 ifnet_decr_iorefcnt(ifp
);
4234 ifnet_dequeue_service_class(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
4238 classq_pkt_t pkt
= CLASSQ_PKT_INITIALIZER(pkt
);
4240 if (ifp
== NULL
|| mp
== NULL
|| !MBUF_VALID_SC(sc
)) {
4242 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
4243 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
) {
4246 if (!ifnet_is_attached(ifp
, 1)) {
4250 rc
= ifclassq_dequeue_sc(&ifp
->if_snd
, sc
, 1,
4251 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, &pkt
, NULL
, NULL
, NULL
);
4252 VERIFY((pkt
.cp_ptype
== QP_MBUF
) || (pkt
.cp_mbuf
== NULL
));
4253 ifnet_decr_iorefcnt(ifp
);
4259 ifnet_dequeue_multi(struct ifnet
*ifp
, u_int32_t pkt_limit
,
4260 struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
, u_int32_t
*len
)
4263 classq_pkt_t pkt_head
= CLASSQ_PKT_INITIALIZER(pkt_head
);
4264 classq_pkt_t pkt_tail
= CLASSQ_PKT_INITIALIZER(pkt_tail
);
4266 if (ifp
== NULL
|| head
== NULL
|| pkt_limit
< 1) {
4268 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
4269 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
) {
4272 if (!ifnet_is_attached(ifp
, 1)) {
4276 rc
= ifclassq_dequeue(&ifp
->if_snd
, pkt_limit
,
4277 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, &pkt_head
, &pkt_tail
, cnt
, len
);
4278 VERIFY((pkt_head
.cp_ptype
== QP_MBUF
) || (pkt_head
.cp_mbuf
== NULL
));
4279 ifnet_decr_iorefcnt(ifp
);
4280 *head
= pkt_head
.cp_mbuf
;
4282 *tail
= pkt_tail
.cp_mbuf
;
4288 ifnet_dequeue_multi_bytes(struct ifnet
*ifp
, u_int32_t byte_limit
,
4289 struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
, u_int32_t
*len
)
4292 classq_pkt_t pkt_head
= CLASSQ_PKT_INITIALIZER(pkt_head
);
4293 classq_pkt_t pkt_tail
= CLASSQ_PKT_INITIALIZER(pkt_tail
);
4295 if (ifp
== NULL
|| head
== NULL
|| byte_limit
< 1) {
4297 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
4298 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
) {
4301 if (!ifnet_is_attached(ifp
, 1)) {
4305 rc
= ifclassq_dequeue(&ifp
->if_snd
, CLASSQ_DEQUEUE_MAX_PKT_LIMIT
,
4306 byte_limit
, &pkt_head
, &pkt_tail
, cnt
, len
);
4307 VERIFY((pkt_head
.cp_ptype
== QP_MBUF
) || (pkt_head
.cp_mbuf
== NULL
));
4308 ifnet_decr_iorefcnt(ifp
);
4309 *head
= pkt_head
.cp_mbuf
;
4311 *tail
= pkt_tail
.cp_mbuf
;
4317 ifnet_dequeue_service_class_multi(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
4318 u_int32_t pkt_limit
, struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
,
4322 classq_pkt_t pkt_head
= CLASSQ_PKT_INITIALIZER(pkt_head
);
4323 classq_pkt_t pkt_tail
= CLASSQ_PKT_INITIALIZER(pkt_tail
);
4325 if (ifp
== NULL
|| head
== NULL
|| pkt_limit
< 1 ||
4326 !MBUF_VALID_SC(sc
)) {
4328 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
4329 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
) {
4332 if (!ifnet_is_attached(ifp
, 1)) {
4336 rc
= ifclassq_dequeue_sc(&ifp
->if_snd
, sc
, pkt_limit
,
4337 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, &pkt_head
, &pkt_tail
,
4339 VERIFY((pkt_head
.cp_ptype
== QP_MBUF
) || (pkt_head
.cp_mbuf
== NULL
));
4340 ifnet_decr_iorefcnt(ifp
);
4341 *head
= pkt_head
.cp_mbuf
;
4343 *tail
= pkt_tail
.cp_mbuf
;
4348 #if XNU_TARGET_OS_OSX
4350 ifnet_framer_stub(struct ifnet
*ifp
, struct mbuf
**m
,
4351 const struct sockaddr
*dest
, const char *dest_linkaddr
,
4352 const char *frame_type
, u_int32_t
*pre
, u_int32_t
*post
)
4361 return ifp
->if_framer_legacy(ifp
, m
, dest
, dest_linkaddr
, frame_type
);
4363 #endif /* XNU_TARGET_OS_OSX */
4366 packet_has_vlan_tag(struct mbuf
* m
)
4370 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) != 0) {
4371 tag
= EVL_VLANOFTAG(m
->m_pkthdr
.vlan_tag
);
4373 /* the packet is just priority-tagged, clear the bit */
4374 m
->m_pkthdr
.csum_flags
&= ~CSUM_VLAN_TAG_VALID
;
4381 dlil_interface_filters_input(struct ifnet
*ifp
, struct mbuf
**m_p
,
4382 char **frame_header_p
, protocol_family_t protocol_family
)
4384 boolean_t is_vlan_packet
= FALSE
;
4385 struct ifnet_filter
*filter
;
4386 struct mbuf
*m
= *m_p
;
4388 is_vlan_packet
= packet_has_vlan_tag(m
);
4390 if (TAILQ_EMPTY(&ifp
->if_flt_head
)) {
4395 * Pass the inbound packet to the interface filters
4397 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4398 /* prevent filter list from changing in case we drop the lock */
4399 if_flt_monitor_busy(ifp
);
4400 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
4403 /* exclude VLAN packets from external filters PR-3586856 */
4404 if (is_vlan_packet
&&
4405 (filter
->filt_flags
& DLIL_IFF_INTERNAL
) == 0) {
4409 if (!filter
->filt_skip
&& filter
->filt_input
!= NULL
&&
4410 (filter
->filt_protocol
== 0 ||
4411 filter
->filt_protocol
== protocol_family
)) {
4412 lck_mtx_unlock(&ifp
->if_flt_lock
);
4414 result
= (*filter
->filt_input
)(filter
->filt_cookie
,
4415 ifp
, protocol_family
, m_p
, frame_header_p
);
4417 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4419 /* we're done with the filter list */
4420 if_flt_monitor_unbusy(ifp
);
4421 lck_mtx_unlock(&ifp
->if_flt_lock
);
4426 /* we're done with the filter list */
4427 if_flt_monitor_unbusy(ifp
);
4428 lck_mtx_unlock(&ifp
->if_flt_lock
);
4431 * Strip away M_PROTO1 bit prior to sending packet up the stack as
4432 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
4435 (*m_p
)->m_flags
&= ~M_PROTO1
;
4442 dlil_interface_filters_output(struct ifnet
*ifp
, struct mbuf
**m_p
,
4443 protocol_family_t protocol_family
)
4445 boolean_t is_vlan_packet
;
4446 struct ifnet_filter
*filter
;
4447 struct mbuf
*m
= *m_p
;
4449 is_vlan_packet
= packet_has_vlan_tag(m
);
4452 * Pass the outbound packet to the interface filters
4454 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4455 /* prevent filter list from changing in case we drop the lock */
4456 if_flt_monitor_busy(ifp
);
4457 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
4460 /* exclude VLAN packets from external filters PR-3586856 */
4461 if (is_vlan_packet
&&
4462 (filter
->filt_flags
& DLIL_IFF_INTERNAL
) == 0) {
4466 if (!filter
->filt_skip
&& filter
->filt_output
!= NULL
&&
4467 (filter
->filt_protocol
== 0 ||
4468 filter
->filt_protocol
== protocol_family
)) {
4469 lck_mtx_unlock(&ifp
->if_flt_lock
);
4471 result
= filter
->filt_output(filter
->filt_cookie
, ifp
,
4472 protocol_family
, m_p
);
4474 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4476 /* we're done with the filter list */
4477 if_flt_monitor_unbusy(ifp
);
4478 lck_mtx_unlock(&ifp
->if_flt_lock
);
4483 /* we're done with the filter list */
4484 if_flt_monitor_unbusy(ifp
);
4485 lck_mtx_unlock(&ifp
->if_flt_lock
);
4491 dlil_ifproto_input(struct if_proto
* ifproto
, mbuf_t m
)
4495 if (ifproto
->proto_kpi
== kProtoKPI_v1
) {
4496 /* Version 1 protocols get one packet at a time */
4498 char * frame_header
;
4501 next_packet
= m
->m_nextpkt
;
4502 m
->m_nextpkt
= NULL
;
4503 frame_header
= m
->m_pkthdr
.pkt_hdr
;
4504 m
->m_pkthdr
.pkt_hdr
= NULL
;
4505 error
= (*ifproto
->kpi
.v1
.input
)(ifproto
->ifp
,
4506 ifproto
->protocol_family
, m
, frame_header
);
4507 if (error
!= 0 && error
!= EJUSTRETURN
) {
4512 } else if (ifproto
->proto_kpi
== kProtoKPI_v2
) {
4513 /* Version 2 protocols support packet lists */
4514 error
= (*ifproto
->kpi
.v2
.input
)(ifproto
->ifp
,
4515 ifproto
->protocol_family
, m
);
4516 if (error
!= 0 && error
!= EJUSTRETURN
) {
4523 dlil_input_stats_add(const struct ifnet_stat_increment_param
*s
,
4524 struct dlil_threading_info
*inp
, struct ifnet
*ifp
, boolean_t poll
)
4526 struct ifnet_stat_increment_param
*d
= &inp
->dlth_stats
;
4528 if (s
->packets_in
!= 0) {
4529 d
->packets_in
+= s
->packets_in
;
4531 if (s
->bytes_in
!= 0) {
4532 d
->bytes_in
+= s
->bytes_in
;
4534 if (s
->errors_in
!= 0) {
4535 d
->errors_in
+= s
->errors_in
;
4538 if (s
->packets_out
!= 0) {
4539 d
->packets_out
+= s
->packets_out
;
4541 if (s
->bytes_out
!= 0) {
4542 d
->bytes_out
+= s
->bytes_out
;
4544 if (s
->errors_out
!= 0) {
4545 d
->errors_out
+= s
->errors_out
;
4548 if (s
->collisions
!= 0) {
4549 d
->collisions
+= s
->collisions
;
4551 if (s
->dropped
!= 0) {
4552 d
->dropped
+= s
->dropped
;
4556 PKTCNTR_ADD(&ifp
->if_poll_tstats
, s
->packets_in
, s
->bytes_in
);
4561 dlil_input_stats_sync(struct ifnet
*ifp
, struct dlil_threading_info
*inp
)
4563 struct ifnet_stat_increment_param
*s
= &inp
->dlth_stats
;
4566 * Use of atomic operations is unavoidable here because
4567 * these stats may also be incremented elsewhere via KPIs.
4569 if (s
->packets_in
!= 0) {
4570 atomic_add_64(&ifp
->if_data
.ifi_ipackets
, s
->packets_in
);
4573 if (s
->bytes_in
!= 0) {
4574 atomic_add_64(&ifp
->if_data
.ifi_ibytes
, s
->bytes_in
);
4577 if (s
->errors_in
!= 0) {
4578 atomic_add_64(&ifp
->if_data
.ifi_ierrors
, s
->errors_in
);
4582 if (s
->packets_out
!= 0) {
4583 atomic_add_64(&ifp
->if_data
.ifi_opackets
, s
->packets_out
);
4586 if (s
->bytes_out
!= 0) {
4587 atomic_add_64(&ifp
->if_data
.ifi_obytes
, s
->bytes_out
);
4590 if (s
->errors_out
!= 0) {
4591 atomic_add_64(&ifp
->if_data
.ifi_oerrors
, s
->errors_out
);
4595 if (s
->collisions
!= 0) {
4596 atomic_add_64(&ifp
->if_data
.ifi_collisions
, s
->collisions
);
4599 if (s
->dropped
!= 0) {
4600 atomic_add_64(&ifp
->if_data
.ifi_iqdrops
, s
->dropped
);
4605 * No need for atomic operations as they are modified here
4606 * only from within the DLIL input thread context.
4608 if (ifp
->if_poll_tstats
.packets
!= 0) {
4609 ifp
->if_poll_pstats
.ifi_poll_packets
+= ifp
->if_poll_tstats
.packets
;
4610 ifp
->if_poll_tstats
.packets
= 0;
4612 if (ifp
->if_poll_tstats
.bytes
!= 0) {
4613 ifp
->if_poll_pstats
.ifi_poll_bytes
+= ifp
->if_poll_tstats
.bytes
;
4614 ifp
->if_poll_tstats
.bytes
= 0;
4617 return ifp
->if_data_threshold
!= 0;
4620 __private_extern__
void
4621 dlil_input_packet_list(struct ifnet
*ifp
, struct mbuf
*m
)
4623 return dlil_input_packet_list_common(ifp
, m
, 0,
4624 IFNET_MODEL_INPUT_POLL_OFF
, FALSE
);
4627 __private_extern__
void
4628 dlil_input_packet_list_extended(struct ifnet
*ifp
, struct mbuf
*m
,
4629 u_int32_t cnt
, ifnet_model_t mode
)
4631 return dlil_input_packet_list_common(ifp
, m
, cnt
, mode
, TRUE
);
4635 dlil_input_packet_list_common(struct ifnet
*ifp_param
, struct mbuf
*m
,
4636 u_int32_t cnt
, ifnet_model_t mode
, boolean_t ext
)
4639 protocol_family_t protocol_family
;
4641 ifnet_t ifp
= ifp_param
;
4642 char *frame_header
= NULL
;
4643 struct if_proto
*last_ifproto
= NULL
;
4644 mbuf_t pkt_first
= NULL
;
4645 mbuf_t
*pkt_next
= NULL
;
4646 u_int32_t poll_thresh
= 0, poll_ival
= 0;
4649 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
4651 if (ext
&& mode
== IFNET_MODEL_INPUT_POLL_ON
&& cnt
> 1 &&
4652 (poll_ival
= if_rxpoll_interval_pkts
) > 0) {
4657 struct if_proto
*ifproto
= NULL
;
4658 uint32_t pktf_mask
; /* pkt flags to preserve */
4660 if (ifp_param
== NULL
) {
4661 ifp
= m
->m_pkthdr
.rcvif
;
4664 if ((ifp
->if_eflags
& IFEF_RXPOLL
) &&
4665 (ifp
->if_xflags
& IFXF_LEGACY
) && poll_thresh
!= 0 &&
4666 poll_ival
> 0 && (--poll_thresh
% poll_ival
) == 0) {
4670 /* Check if this mbuf looks valid */
4671 MBUF_INPUT_CHECK(m
, ifp
);
4673 next_packet
= m
->m_nextpkt
;
4674 m
->m_nextpkt
= NULL
;
4675 frame_header
= m
->m_pkthdr
.pkt_hdr
;
4676 m
->m_pkthdr
.pkt_hdr
= NULL
;
4679 * Get an IO reference count if the interface is not
4680 * loopback (lo0) and it is attached; lo0 never goes
4681 * away, so optimize for that.
4683 if (ifp
!= lo_ifp
) {
4684 /* iorefcnt is 0 if it hasn't been taken yet */
4685 if (iorefcnt
== 0) {
4686 if (!ifnet_datamov_begin(ifp
)) {
4693 * Preserve the time stamp and skip pktap flags.
4695 pktf_mask
= PKTF_TS_VALID
| PKTF_SKIP_PKTAP
;
4698 * If this arrived on lo0, preserve interface addr
4699 * info to allow for connectivity between loopback
4700 * and local interface addresses.
4702 pktf_mask
= (PKTF_LOOP
| PKTF_IFAINFO
);
4705 /* make sure packet comes in clean */
4706 m_classifier_init(m
, pktf_mask
);
4708 ifp_inc_traffic_class_in(ifp
, m
);
4710 /* find which protocol family this packet is for */
4711 ifnet_lock_shared(ifp
);
4712 error
= (*ifp
->if_demux
)(ifp
, m
, frame_header
,
4714 ifnet_lock_done(ifp
);
4716 if (error
== EJUSTRETURN
) {
4719 protocol_family
= 0;
4722 pktap_input(ifp
, protocol_family
, m
, frame_header
);
4724 /* Drop v4 packets received on CLAT46 enabled interface */
4725 if (protocol_family
== PF_INET
&& IS_INTF_CLAT46(ifp
)) {
4727 ip6stat
.ip6s_clat464_in_v4_drop
++;
4731 /* Translate the packet if it is received on CLAT interface */
4732 if (protocol_family
== PF_INET6
&& IS_INTF_CLAT46(ifp
)
4733 && dlil_is_clat_needed(protocol_family
, m
)) {
4735 struct ether_header eh
;
4736 struct ether_header
*ehp
= NULL
;
4738 if (ifp
->if_type
== IFT_ETHER
) {
4739 ehp
= (struct ether_header
*)(void *)frame_header
;
4740 /* Skip RX Ethernet packets if they are not IPV6 */
4741 if (ntohs(ehp
->ether_type
) != ETHERTYPE_IPV6
) {
4745 /* Keep a copy of frame_header for Ethernet packets */
4746 bcopy(frame_header
, (caddr_t
)&eh
, ETHER_HDR_LEN
);
4748 error
= dlil_clat64(ifp
, &protocol_family
, &m
);
4749 data
= (char *) mbuf_data(m
);
4752 ip6stat
.ip6s_clat464_in_drop
++;
4755 /* Native v6 should be No-op */
4756 if (protocol_family
!= PF_INET
) {
4760 /* Do this only for translated v4 packets. */
4761 switch (ifp
->if_type
) {
4763 frame_header
= data
;
4767 * Drop if the mbuf doesn't have enough
4768 * space for Ethernet header
4770 if (M_LEADINGSPACE(m
) < ETHER_HDR_LEN
) {
4772 ip6stat
.ip6s_clat464_in_drop
++;
4776 * Set the frame_header ETHER_HDR_LEN bytes
4777 * preceeding the data pointer. Change
4778 * the ether_type too.
4780 frame_header
= data
- ETHER_HDR_LEN
;
4781 eh
.ether_type
= htons(ETHERTYPE_IP
);
4782 bcopy((caddr_t
)&eh
, frame_header
, ETHER_HDR_LEN
);
4787 if (hwcksum_dbg
!= 0 && !(ifp
->if_flags
& IFF_LOOPBACK
) &&
4788 !(m
->m_pkthdr
.pkt_flags
& PKTF_LOOP
)) {
4789 dlil_input_cksum_dbg(ifp
, m
, frame_header
,
4793 * For partial checksum offload, we expect the driver to
4794 * set the start offset indicating the start of the span
4795 * that is covered by the hardware-computed checksum;
4796 * adjust this start offset accordingly because the data
4797 * pointer has been advanced beyond the link-layer header.
4799 * Virtual lan types (bridge, vlan, bond) can call
4800 * dlil_input_packet_list() with the same packet with the
4801 * checksum flags set. Set a flag indicating that the
4802 * adjustment has already been done.
4804 if ((m
->m_pkthdr
.csum_flags
& CSUM_ADJUST_DONE
) != 0) {
4805 /* adjustment has already been done */
4806 } else if ((m
->m_pkthdr
.csum_flags
&
4807 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
4808 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
4810 if (frame_header
== NULL
||
4811 frame_header
< (char *)mbuf_datastart(m
) ||
4812 frame_header
> (char *)m
->m_data
||
4813 (adj
= (int)(m
->m_data
- frame_header
)) >
4814 m
->m_pkthdr
.csum_rx_start
) {
4815 m
->m_pkthdr
.csum_data
= 0;
4816 m
->m_pkthdr
.csum_flags
&= ~CSUM_DATA_VALID
;
4817 hwcksum_in_invalidated
++;
4819 m
->m_pkthdr
.csum_rx_start
-= adj
;
4821 /* make sure we don't adjust more than once */
4822 m
->m_pkthdr
.csum_flags
|= CSUM_ADJUST_DONE
;
4825 pktap_input(ifp
, protocol_family
, m
, frame_header
);
4828 if (m
->m_flags
& (M_BCAST
| M_MCAST
)) {
4829 atomic_add_64(&ifp
->if_imcasts
, 1);
4832 /* run interface filters */
4833 error
= dlil_interface_filters_input(ifp
, &m
,
4834 &frame_header
, protocol_family
);
4836 if (error
!= EJUSTRETURN
) {
4842 * A VLAN interface receives VLAN-tagged packets by attaching
4843 * its PF_VLAN protocol to a parent interface. When a VLAN
4844 * interface is a member of a bridge, the parent interface
4845 * receives VLAN-tagged M_PROMISC packets. A VLAN-tagged
4846 * M_PROMISC packet must be processed by the VLAN protocol
4847 * so that it can be sent up the stack via
4848 * dlil_input_packet_list(). That allows the bridge interface's
4849 * input filter, attached to the VLAN interface, to process
4852 if (protocol_family
!= PF_VLAN
&&
4853 (m
->m_flags
& M_PROMISC
) != 0) {
4858 /* Lookup the protocol attachment to this interface */
4859 if (protocol_family
== 0) {
4861 } else if (last_ifproto
!= NULL
&& last_ifproto
->ifp
== ifp
&&
4862 (last_ifproto
->protocol_family
== protocol_family
)) {
4863 VERIFY(ifproto
== NULL
);
4864 ifproto
= last_ifproto
;
4865 if_proto_ref(last_ifproto
);
4867 VERIFY(ifproto
== NULL
);
4868 ifnet_lock_shared(ifp
);
4869 /* callee holds a proto refcnt upon success */
4870 ifproto
= find_attached_proto(ifp
, protocol_family
);
4871 ifnet_lock_done(ifp
);
4873 if (ifproto
== NULL
) {
4874 /* no protocol for this packet, discard */
4878 if (ifproto
!= last_ifproto
) {
4879 if (last_ifproto
!= NULL
) {
4880 /* pass up the list for the previous protocol */
4881 dlil_ifproto_input(last_ifproto
, pkt_first
);
4883 if_proto_free(last_ifproto
);
4885 last_ifproto
= ifproto
;
4886 if_proto_ref(ifproto
);
4888 /* extend the list */
4889 m
->m_pkthdr
.pkt_hdr
= frame_header
;
4890 if (pkt_first
== NULL
) {
4895 pkt_next
= &m
->m_nextpkt
;
4898 if (next_packet
== NULL
&& last_ifproto
!= NULL
) {
4899 /* pass up the last list of packets */
4900 dlil_ifproto_input(last_ifproto
, pkt_first
);
4901 if_proto_free(last_ifproto
);
4902 last_ifproto
= NULL
;
4904 if (ifproto
!= NULL
) {
4905 if_proto_free(ifproto
);
4911 /* update the driver's multicast filter, if needed */
4912 if (ifp
->if_updatemcasts
> 0 && if_mcasts_update(ifp
) == 0) {
4913 ifp
->if_updatemcasts
= 0;
4915 if (iorefcnt
== 1) {
4916 /* If the next mbuf is on a different interface, unlock data-mov */
4917 if (!m
|| (ifp
!= ifp_param
&& ifp
!= m
->m_pkthdr
.rcvif
)) {
4918 ifnet_datamov_end(ifp
);
4924 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4928 if_mcasts_update(struct ifnet
*ifp
)
4932 err
= ifnet_ioctl(ifp
, 0, SIOCADDMULTI
, NULL
);
4933 if (err
== EAFNOSUPPORT
) {
4936 DLIL_PRINTF("%s: %s %d suspended link-layer multicast membership(s) "
4937 "(err=%d)\n", if_name(ifp
),
4938 (err
== 0 ? "successfully restored" : "failed to restore"),
4939 ifp
->if_updatemcasts
, err
);
4941 /* just return success */
4945 /* If ifp is set, we will increment the generation for the interface */
4947 dlil_post_complete_msg(struct ifnet
*ifp
, struct kev_msg
*event
)
4950 ifnet_increment_generation(ifp
);
4954 necp_update_all_clients();
4957 return kev_post_msg(event
);
4960 __private_extern__
void
4961 dlil_post_sifflags_msg(struct ifnet
* ifp
)
4963 struct kev_msg ev_msg
;
4964 struct net_event_data ev_data
;
4966 bzero(&ev_data
, sizeof(ev_data
));
4967 bzero(&ev_msg
, sizeof(ev_msg
));
4968 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
4969 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
4970 ev_msg
.kev_subclass
= KEV_DL_SUBCLASS
;
4971 ev_msg
.event_code
= KEV_DL_SIFFLAGS
;
4972 strlcpy(&ev_data
.if_name
[0], ifp
->if_name
, IFNAMSIZ
);
4973 ev_data
.if_family
= ifp
->if_family
;
4974 ev_data
.if_unit
= (u_int32_t
) ifp
->if_unit
;
4975 ev_msg
.dv
[0].data_length
= sizeof(struct net_event_data
);
4976 ev_msg
.dv
[0].data_ptr
= &ev_data
;
4977 ev_msg
.dv
[1].data_length
= 0;
4978 dlil_post_complete_msg(ifp
, &ev_msg
);
4981 #define TMP_IF_PROTO_ARR_SIZE 10
4983 dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*event
, bool update_generation
)
4985 struct ifnet_filter
*filter
= NULL
;
4986 struct if_proto
*proto
= NULL
;
4987 int if_proto_count
= 0;
4988 struct if_proto
**tmp_ifproto_arr
= NULL
;
4989 struct if_proto
*tmp_ifproto_stack_arr
[TMP_IF_PROTO_ARR_SIZE
] = {NULL
};
4990 int tmp_ifproto_arr_idx
= 0;
4991 bool tmp_malloc
= false;
4994 * Pass the event to the interface filters
4996 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4997 /* prevent filter list from changing in case we drop the lock */
4998 if_flt_monitor_busy(ifp
);
4999 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
5000 if (filter
->filt_event
!= NULL
) {
5001 lck_mtx_unlock(&ifp
->if_flt_lock
);
5003 filter
->filt_event(filter
->filt_cookie
, ifp
,
5004 filter
->filt_protocol
, event
);
5006 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
5009 /* we're done with the filter list */
5010 if_flt_monitor_unbusy(ifp
);
5011 lck_mtx_unlock(&ifp
->if_flt_lock
);
5013 /* Get an io ref count if the interface is attached */
5014 if (!ifnet_is_attached(ifp
, 1)) {
5019 * An embedded tmp_list_entry in if_proto may still get
5020 * over-written by another thread after giving up ifnet lock,
5021 * therefore we are avoiding embedded pointers here.
5023 ifnet_lock_shared(ifp
);
5024 if_proto_count
= dlil_ifp_protolist(ifp
, NULL
, 0);
5025 if (if_proto_count
) {
5027 VERIFY(ifp
->if_proto_hash
!= NULL
);
5028 if (if_proto_count
<= TMP_IF_PROTO_ARR_SIZE
) {
5029 tmp_ifproto_arr
= tmp_ifproto_stack_arr
;
5031 MALLOC(tmp_ifproto_arr
, struct if_proto
**,
5032 sizeof(*tmp_ifproto_arr
) * if_proto_count
,
5034 if (tmp_ifproto_arr
== NULL
) {
5035 ifnet_lock_done(ifp
);
5041 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
5042 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
],
5044 if_proto_ref(proto
);
5045 tmp_ifproto_arr
[tmp_ifproto_arr_idx
] = proto
;
5046 tmp_ifproto_arr_idx
++;
5049 VERIFY(if_proto_count
== tmp_ifproto_arr_idx
);
5051 ifnet_lock_done(ifp
);
5053 for (tmp_ifproto_arr_idx
= 0; tmp_ifproto_arr_idx
< if_proto_count
;
5054 tmp_ifproto_arr_idx
++) {
5055 proto
= tmp_ifproto_arr
[tmp_ifproto_arr_idx
];
5056 VERIFY(proto
!= NULL
);
5057 proto_media_event eventp
=
5058 (proto
->proto_kpi
== kProtoKPI_v1
?
5059 proto
->kpi
.v1
.event
:
5060 proto
->kpi
.v2
.event
);
5062 if (eventp
!= NULL
) {
5063 eventp(ifp
, proto
->protocol_family
,
5066 if_proto_free(proto
);
5071 FREE(tmp_ifproto_arr
, M_TEMP
);
5074 /* Pass the event to the interface */
5075 if (ifp
->if_event
!= NULL
) {
5076 ifp
->if_event(ifp
, event
);
5079 /* Release the io ref count */
5080 ifnet_decr_iorefcnt(ifp
);
5082 return dlil_post_complete_msg(update_generation
? ifp
: NULL
, event
);
5086 ifnet_event(ifnet_t ifp
, struct kern_event_msg
*event
)
5088 struct kev_msg kev_msg
;
5091 if (ifp
== NULL
|| event
== NULL
) {
5095 bzero(&kev_msg
, sizeof(kev_msg
));
5096 kev_msg
.vendor_code
= event
->vendor_code
;
5097 kev_msg
.kev_class
= event
->kev_class
;
5098 kev_msg
.kev_subclass
= event
->kev_subclass
;
5099 kev_msg
.event_code
= event
->event_code
;
5100 kev_msg
.dv
[0].data_ptr
= &event
->event_data
[0];
5101 kev_msg
.dv
[0].data_length
= event
->total_size
- KEV_MSG_HEADER_SIZE
;
5102 kev_msg
.dv
[1].data_length
= 0;
5104 result
= dlil_event_internal(ifp
, &kev_msg
, TRUE
);
5110 dlil_count_chain_len(mbuf_t m
, struct chain_len_stats
*cls
)
5123 atomic_add_64(&cls
->cls_one
, 1);
5126 atomic_add_64(&cls
->cls_two
, 1);
5129 atomic_add_64(&cls
->cls_three
, 1);
5132 atomic_add_64(&cls
->cls_four
, 1);
5136 atomic_add_64(&cls
->cls_five_or_more
, 1);
5144 * Caller should have a lock on the protocol domain if the protocol
5145 * doesn't support finer grained locking. In most cases, the lock
5146 * will be held from the socket layer and won't be released until
5147 * we return back to the socket layer.
5149 * This does mean that we must take a protocol lock before we take
5150 * an interface lock if we're going to take both. This makes sense
5151 * because a protocol is likely to interact with an ifp while it
5152 * is under the protocol lock.
5154 * An advisory code will be returned if adv is not null. This
5155 * can be used to provide feedback about interface queues to the
5159 dlil_output(ifnet_t ifp
, protocol_family_t proto_family
, mbuf_t packetlist
,
5160 void *route
, const struct sockaddr
*dest
, int raw
, struct flowadv
*adv
)
5162 char *frame_type
= NULL
;
5163 char *dst_linkaddr
= NULL
;
5165 char frame_type_buffer
[MAX_FRAME_TYPE_SIZE
* 4];
5166 char dst_linkaddr_buffer
[MAX_LINKADDR
* 4];
5167 struct if_proto
*proto
= NULL
;
5169 mbuf_t send_head
= NULL
;
5170 mbuf_t
*send_tail
= &send_head
;
5172 u_int32_t pre
= 0, post
= 0;
5173 u_int32_t fpkts
= 0, fbytes
= 0;
5175 struct timespec now
;
5177 boolean_t did_clat46
= FALSE
;
5178 protocol_family_t old_proto_family
= proto_family
;
5179 struct sockaddr_in6 dest6
;
5180 struct rtentry
*rt
= NULL
;
5181 u_int32_t m_loop_set
= 0;
5183 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
5186 * Get an io refcnt if the interface is attached to prevent ifnet_detach
5187 * from happening while this operation is in progress
5189 if (!ifnet_datamov_begin(ifp
)) {
5195 VERIFY(ifp
->if_output_dlil
!= NULL
);
5197 /* update the driver's multicast filter, if needed */
5198 if (ifp
->if_updatemcasts
> 0 && if_mcasts_update(ifp
) == 0) {
5199 ifp
->if_updatemcasts
= 0;
5202 frame_type
= frame_type_buffer
;
5203 dst_linkaddr
= dst_linkaddr_buffer
;
5206 ifnet_lock_shared(ifp
);
5207 /* callee holds a proto refcnt upon success */
5208 proto
= find_attached_proto(ifp
, proto_family
);
5209 if (proto
== NULL
) {
5210 ifnet_lock_done(ifp
);
5214 ifnet_lock_done(ifp
);
5218 if (packetlist
== NULL
) {
5223 packetlist
= packetlist
->m_nextpkt
;
5224 m
->m_nextpkt
= NULL
;
5227 * Perform address family translation for the first
5228 * packet outside the loop in order to perform address
5229 * lookup for the translated proto family.
5231 if (proto_family
== PF_INET
&& IS_INTF_CLAT46(ifp
) &&
5232 (ifp
->if_type
== IFT_CELLULAR
||
5233 dlil_is_clat_needed(proto_family
, m
))) {
5234 retval
= dlil_clat46(ifp
, &proto_family
, &m
);
5236 * Go to the next packet if translation fails
5241 ip6stat
.ip6s_clat464_out_drop
++;
5242 /* Make sure that the proto family is PF_INET */
5243 ASSERT(proto_family
== PF_INET
);
5247 * Free the old one and make it point to the IPv6 proto structure.
5249 * Change proto for the first time we have successfully
5250 * performed address family translation.
5252 if (!did_clat46
&& proto_family
== PF_INET6
) {
5255 if (proto
!= NULL
) {
5256 if_proto_free(proto
);
5258 ifnet_lock_shared(ifp
);
5259 /* callee holds a proto refcnt upon success */
5260 proto
= find_attached_proto(ifp
, proto_family
);
5261 if (proto
== NULL
) {
5262 ifnet_lock_done(ifp
);
5268 ifnet_lock_done(ifp
);
5269 if (ifp
->if_type
== IFT_ETHER
) {
5270 /* Update the dest to translated v6 address */
5271 dest6
.sin6_len
= sizeof(struct sockaddr_in6
);
5272 dest6
.sin6_family
= AF_INET6
;
5273 dest6
.sin6_addr
= (mtod(m
, struct ip6_hdr
*))->ip6_dst
;
5274 dest
= (const struct sockaddr
*)&dest6
;
5277 * Lookup route to the translated destination
5278 * Free this route ref during cleanup
5280 rt
= rtalloc1_scoped((struct sockaddr
*)&dest6
,
5281 0, 0, ifp
->if_index
);
5289 * This path gets packet chain going to the same destination.
5290 * The pre output routine is used to either trigger resolution of
5291 * the next hop or retreive the next hop's link layer addressing.
5292 * For ex: ether_inet(6)_pre_output routine.
5294 * If the routine returns EJUSTRETURN, it implies that packet has
5295 * been queued, and therefore we have to call preout_again for the
5296 * following packet in the chain.
5298 * For errors other than EJUSTRETURN, the current packet is freed
5299 * and the rest of the chain (pointed by packetlist is freed as
5302 * Else if there is no error the retrieved information is used for
5303 * all the packets in the chain.
5306 proto_media_preout preoutp
= (proto
->proto_kpi
== kProtoKPI_v1
?
5307 proto
->kpi
.v1
.pre_output
: proto
->kpi
.v2
.pre_output
);
5309 if (preoutp
!= NULL
) {
5310 retval
= preoutp(ifp
, proto_family
, &m
, dest
, route
,
5311 frame_type
, dst_linkaddr
);
5314 if (retval
== EJUSTRETURN
) {
5326 * pkt_hdr is set here to point to m_data prior to
5327 * calling into the framer. This value of pkt_hdr is
5328 * used by the netif gso logic to retrieve the ip header
5329 * for the TCP packets, offloaded for TSO processing.
5331 if ((raw
!= 0) && (ifp
->if_family
== IFNET_FAMILY_ETHERNET
)) {
5332 uint8_t vlan_encap_len
= 0;
5334 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_ENCAP_PRESENT
) != 0) {
5335 vlan_encap_len
= ETHER_VLAN_ENCAP_LEN
;
5337 m
->m_pkthdr
.pkt_hdr
= mtod(m
, char *) + ETHER_HDR_LEN
+ vlan_encap_len
;
5339 m
->m_pkthdr
.pkt_hdr
= mtod(m
, void *);
5343 * Perform address family translation if needed.
5344 * For now we only support stateless 4 to 6 translation
5347 * The routine below translates IP header, updates protocol
5348 * checksum and also translates ICMP.
5350 * We skip the first packet as it is already translated and
5351 * the proto family is set to PF_INET6.
5353 if (proto_family
== PF_INET
&& IS_INTF_CLAT46(ifp
) &&
5354 (ifp
->if_type
== IFT_CELLULAR
||
5355 dlil_is_clat_needed(proto_family
, m
))) {
5356 retval
= dlil_clat46(ifp
, &proto_family
, &m
);
5357 /* Goto the next packet if the translation fails */
5361 ip6stat
.ip6s_clat464_out_drop
++;
5367 if (!raw
&& proto_family
== PF_INET
) {
5368 struct ip
*ip
= mtod(m
, struct ip
*);
5369 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
5370 struct ip
*, ip
, struct ifnet
*, ifp
,
5371 struct ip
*, ip
, struct ip6_hdr
*, NULL
);
5372 } else if (!raw
&& proto_family
== PF_INET6
) {
5373 struct ip6_hdr
*ip6
= mtod(m
, struct ip6_hdr
*);
5374 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
5375 struct ip6_hdr
*, ip6
, struct ifnet
*, ifp
,
5376 struct ip
*, NULL
, struct ip6_hdr
*, ip6
);
5378 #endif /* CONFIG_DTRACE */
5380 if (raw
== 0 && ifp
->if_framer
!= NULL
) {
5384 * If this is a broadcast packet that needs to be
5385 * looped back into the system, set the inbound ifp
5386 * to that of the outbound ifp. This will allow
5387 * us to determine that it is a legitimate packet
5388 * for the system. Only set the ifp if it's not
5389 * already set, just to be safe.
5391 if ((m
->m_flags
& (M_BCAST
| M_LOOP
)) &&
5392 m
->m_pkthdr
.rcvif
== NULL
) {
5393 m
->m_pkthdr
.rcvif
= ifp
;
5396 m_loop_set
= m
->m_flags
& M_LOOP
;
5397 retval
= ifp
->if_framer(ifp
, &m
, dest
, dst_linkaddr
,
5398 frame_type
, &pre
, &post
);
5400 if (retval
!= EJUSTRETURN
) {
5407 * For partial checksum offload, adjust the start
5408 * and stuff offsets based on the prepended header.
5410 if ((m
->m_pkthdr
.csum_flags
&
5411 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
5412 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
5413 m
->m_pkthdr
.csum_tx_stuff
+= pre
;
5414 m
->m_pkthdr
.csum_tx_start
+= pre
;
5417 if (hwcksum_dbg
!= 0 && !(ifp
->if_flags
& IFF_LOOPBACK
)) {
5418 dlil_output_cksum_dbg(ifp
, m
, pre
,
5423 * Clear the ifp if it was set above, and to be
5424 * safe, only if it is still the same as the
5425 * outbound ifp we have in context. If it was
5426 * looped back, then a copy of it was sent to the
5427 * loopback interface with the rcvif set, and we
5428 * are clearing the one that will go down to the
5431 if (rcvif_set
&& m
->m_pkthdr
.rcvif
== ifp
) {
5432 m
->m_pkthdr
.rcvif
= NULL
;
5437 * Let interface filters (if any) do their thing ...
5439 retval
= dlil_interface_filters_output(ifp
, &m
, proto_family
);
5441 if (retval
!= EJUSTRETURN
) {
5447 * Strip away M_PROTO1 bit prior to sending packet
5448 * to the driver as this field may be used by the driver
5450 m
->m_flags
&= ~M_PROTO1
;
5453 * If the underlying interface is not capable of handling a
5454 * packet whose data portion spans across physically disjoint
5455 * pages, we need to "normalize" the packet so that we pass
5456 * down a chain of mbufs where each mbuf points to a span that
5457 * resides in the system page boundary. If the packet does
5458 * not cross page(s), the following is a no-op.
5460 if (!(ifp
->if_hwassist
& IFNET_MULTIPAGES
)) {
5461 if ((m
= m_normalize(m
)) == NULL
) {
5467 * If this is a TSO packet, make sure the interface still
5468 * advertise TSO capability.
5470 if (TSO_IPV4_NOTOK(ifp
, m
) || TSO_IPV6_NOTOK(ifp
, m
)) {
5476 ifp_inc_traffic_class_out(ifp
, m
);
5478 pktap_output(ifp
, proto_family
, m
, pre
, post
);
5481 * Count the number of elements in the mbuf chain
5483 if (tx_chain_len_count
) {
5484 dlil_count_chain_len(m
, &tx_chain_len_stats
);
5488 * Record timestamp; ifnet_enqueue() will use this info
5489 * rather than redoing the work. An optimization could
5490 * involve doing this just once at the top, if there are
5491 * no interface filters attached, but that's probably
5495 net_timernsec(&now
, &now_nsec
);
5496 (void) mbuf_set_timestamp(m
, now_nsec
, TRUE
);
5499 * Discard partial sum information if this packet originated
5500 * from another interface; the packet would already have the
5501 * final checksum and we shouldn't recompute it.
5503 if ((m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) &&
5504 (m
->m_pkthdr
.csum_flags
& (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
5505 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
5506 m
->m_pkthdr
.csum_flags
&= ~CSUM_TX_FLAGS
;
5507 m
->m_pkthdr
.csum_data
= 0;
5511 * Finally, call the driver.
5513 if (ifp
->if_eflags
& (IFEF_SENDLIST
| IFEF_ENQUEUE_MULTI
)) {
5514 if (m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) {
5515 flen
+= (m_pktlen(m
) - (pre
+ post
));
5516 m
->m_pkthdr
.pkt_flags
&= ~PKTF_FORWARDED
;
5519 send_tail
= &m
->m_nextpkt
;
5521 if (m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) {
5522 flen
= (m_pktlen(m
) - (pre
+ post
));
5523 m
->m_pkthdr
.pkt_flags
&= ~PKTF_FORWARDED
;
5527 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
,
5529 retval
= (*ifp
->if_output_dlil
)(ifp
, m
);
5530 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
5531 if (adv
!= NULL
&& adv
->code
== FADV_SUCCESS
) {
5532 adv
->code
= (retval
== EQFULL
?
5533 FADV_FLOW_CONTROLLED
:
5538 if (retval
== 0 && flen
> 0) {
5542 if (retval
!= 0 && dlil_verbose
) {
5543 DLIL_PRINTF("%s: output error on %s retval = %d\n",
5544 __func__
, if_name(ifp
),
5547 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
,
5550 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
5555 m
->m_flags
|= m_loop_set
;
5556 packetlist
= packetlist
->m_nextpkt
;
5557 m
->m_nextpkt
= NULL
;
5559 /* Reset the proto family to old proto family for CLAT */
5561 proto_family
= old_proto_family
;
5563 } while (m
!= NULL
);
5565 if (send_head
!= NULL
) {
5566 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
,
5568 if (ifp
->if_eflags
& IFEF_SENDLIST
) {
5569 retval
= (*ifp
->if_output_dlil
)(ifp
, send_head
);
5570 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
5572 adv
->code
= (retval
== EQFULL
?
5573 FADV_FLOW_CONTROLLED
:
5578 if (retval
== 0 && flen
> 0) {
5582 if (retval
!= 0 && dlil_verbose
) {
5583 DLIL_PRINTF("%s: output error on %s retval = %d\n",
5584 __func__
, if_name(ifp
), retval
);
5587 struct mbuf
*send_m
;
5589 VERIFY(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
);
5590 while (send_head
!= NULL
) {
5592 send_head
= send_m
->m_nextpkt
;
5593 send_m
->m_nextpkt
= NULL
;
5594 retval
= (*ifp
->if_output_dlil
)(ifp
, send_m
);
5595 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
5597 adv
->code
= (retval
== EQFULL
?
5598 FADV_FLOW_CONTROLLED
:
5609 if (retval
!= 0 && dlil_verbose
) {
5610 DLIL_PRINTF("%s: output error on %s "
5612 __func__
, if_name(ifp
), retval
);
5620 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
5623 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
5627 ifp
->if_fbytes
+= fbytes
;
5630 ifp
->if_fpackets
+= fpkts
;
5632 if (proto
!= NULL
) {
5633 if_proto_free(proto
);
5635 if (packetlist
) { /* if any packets are left, clean up */
5636 mbuf_freem_list(packetlist
);
5638 if (retval
== EJUSTRETURN
) {
5641 if (iorefcnt
== 1) {
5642 ifnet_datamov_end(ifp
);
5653 * This routine checks if the destination address is not a loopback, link-local,
5654 * multicast or broadcast address.
5657 dlil_is_clat_needed(protocol_family_t proto_family
, mbuf_t m
)
5660 switch (proto_family
) {
5662 struct ip
*iph
= mtod(m
, struct ip
*);
5663 if (CLAT46_NEEDED(ntohl(iph
->ip_dst
.s_addr
))) {
5669 struct ip6_hdr
*ip6h
= mtod(m
, struct ip6_hdr
*);
5670 if ((size_t)m_pktlen(m
) >= sizeof(struct ip6_hdr
) &&
5671 CLAT64_NEEDED(&ip6h
->ip6_dst
)) {
5681 * @brief This routine translates IPv4 packet to IPv6 packet,
5682 * updates protocol checksum and also translates ICMP for code
5683 * along with inner header translation.
5685 * @param ifp Pointer to the interface
5686 * @param proto_family pointer to protocol family. It is updated if function
5687 * performs the translation successfully.
5688 * @param m Pointer to the pointer pointing to the packet. Needed because this
5689 * routine can end up changing the mbuf to a different one.
5691 * @return 0 on success or else a negative value.
5694 dlil_clat46(ifnet_t ifp
, protocol_family_t
*proto_family
, mbuf_t
*m
)
5696 VERIFY(*proto_family
== PF_INET
);
5697 VERIFY(IS_INTF_CLAT46(ifp
));
5699 pbuf_t pbuf_store
, *pbuf
= NULL
;
5700 struct ip
*iph
= NULL
;
5701 struct in_addr osrc
, odst
;
5703 struct in6_ifaddr
*ia6_clat_src
= NULL
;
5704 struct in6_addr
*src
= NULL
;
5705 struct in6_addr dst
;
5708 uint16_t tot_len
= 0;
5709 uint16_t ip_id_val
= 0;
5710 uint16_t ip_frag_off
= 0;
5712 boolean_t is_frag
= FALSE
;
5713 boolean_t is_first_frag
= TRUE
;
5714 boolean_t is_last_frag
= TRUE
;
5716 pbuf_init_mbuf(&pbuf_store
, *m
, ifp
);
5718 iph
= pbuf
->pb_data
;
5723 off
= (uint16_t)(iph
->ip_hl
<< 2);
5724 ip_id_val
= iph
->ip_id
;
5725 ip_frag_off
= ntohs(iph
->ip_off
) & IP_OFFMASK
;
5727 tot_len
= ntohs(iph
->ip_len
);
5730 * For packets that are not first frags
5731 * we only need to adjust CSUM.
5732 * For 4 to 6, Fragmentation header gets appended
5733 * after proto translation.
5735 if (ntohs(iph
->ip_off
) & ~(IP_DF
| IP_RF
)) {
5738 /* If the offset is not zero, it is not first frag */
5739 if (ip_frag_off
!= 0) {
5740 is_first_frag
= FALSE
;
5743 /* If IP_MF is set, then it is not last frag */
5744 if (ntohs(iph
->ip_off
) & IP_MF
) {
5745 is_last_frag
= FALSE
;
5750 * Retrive the local IPv6 CLAT46 address reserved for stateless
5753 ia6_clat_src
= in6ifa_ifpwithflag(ifp
, IN6_IFF_CLAT46
);
5754 if (ia6_clat_src
== NULL
) {
5755 ip6stat
.ip6s_clat464_out_nov6addr_drop
++;
5760 src
= &ia6_clat_src
->ia_addr
.sin6_addr
;
5763 * Translate IPv4 destination to IPv6 destination by using the
5764 * prefixes learned through prior PLAT discovery.
5766 if ((error
= nat464_synthesize_ipv6(ifp
, &odst
, &dst
)) != 0) {
5767 ip6stat
.ip6s_clat464_out_v6synthfail_drop
++;
5771 /* Translate the IP header part first */
5772 error
= (nat464_translate_46(pbuf
, off
, iph
->ip_tos
, iph
->ip_p
,
5773 iph
->ip_ttl
, *src
, dst
, tot_len
) == NT_NAT64
) ? 0 : -1;
5775 iph
= NULL
; /* Invalidate iph as pbuf has been modified */
5778 ip6stat
.ip6s_clat464_out_46transfail_drop
++;
5783 * Translate protocol header, update checksum, checksum flags
5784 * and related fields.
5786 error
= (nat464_translate_proto(pbuf
, (struct nat464_addr
*)&osrc
, (struct nat464_addr
*)&odst
,
5787 proto
, PF_INET
, PF_INET6
, NT_OUT
, !is_first_frag
) == NT_NAT64
) ? 0 : -1;
5790 ip6stat
.ip6s_clat464_out_46proto_transfail_drop
++;
5794 /* Now insert the IPv6 fragment header */
5796 error
= nat464_insert_frag46(pbuf
, ip_id_val
, ip_frag_off
, is_last_frag
);
5799 ip6stat
.ip6s_clat464_out_46frag_transfail_drop
++;
5805 if (ia6_clat_src
!= NULL
) {
5806 IFA_REMREF(&ia6_clat_src
->ia_ifa
);
5809 if (pbuf_is_valid(pbuf
)) {
5811 pbuf
->pb_mbuf
= NULL
;
5815 ip6stat
.ip6s_clat464_out_invalpbuf_drop
++;
5819 *proto_family
= PF_INET6
;
5820 ip6stat
.ip6s_clat464_out_success
++;
5827 * @brief This routine translates incoming IPv6 to IPv4 packet,
5828 * updates protocol checksum and also translates ICMPv6 outer
5831 * @return 0 on success or else a negative value.
5834 dlil_clat64(ifnet_t ifp
, protocol_family_t
*proto_family
, mbuf_t
*m
)
5836 VERIFY(*proto_family
== PF_INET6
);
5837 VERIFY(IS_INTF_CLAT46(ifp
));
5839 struct ip6_hdr
*ip6h
= NULL
;
5840 struct in6_addr osrc
, odst
;
5842 struct in6_ifaddr
*ia6_clat_dst
= NULL
;
5843 struct in_ifaddr
*ia4_clat_dst
= NULL
;
5844 struct in_addr
*dst
= NULL
;
5848 u_int64_t tot_len
= 0;
5850 boolean_t is_first_frag
= TRUE
;
5852 /* Incoming mbuf does not contain valid IP6 header */
5853 if ((size_t)(*m
)->m_pkthdr
.len
< sizeof(struct ip6_hdr
) ||
5854 ((size_t)(*m
)->m_len
< sizeof(struct ip6_hdr
) &&
5855 (*m
= m_pullup(*m
, sizeof(struct ip6_hdr
))) == NULL
)) {
5856 ip6stat
.ip6s_clat464_in_tooshort_drop
++;
5860 ip6h
= mtod(*m
, struct ip6_hdr
*);
5861 /* Validate that mbuf contains IP payload equal to ip6_plen */
5862 if ((size_t)(*m
)->m_pkthdr
.len
< ntohs(ip6h
->ip6_plen
) + sizeof(struct ip6_hdr
)) {
5863 ip6stat
.ip6s_clat464_in_tooshort_drop
++;
5867 osrc
= ip6h
->ip6_src
;
5868 odst
= ip6h
->ip6_dst
;
5871 * Retrieve the local CLAT46 reserved IPv6 address.
5872 * Let the packet pass if we don't find one, as the flag
5873 * may get set before IPv6 configuration has taken place.
5875 ia6_clat_dst
= in6ifa_ifpwithflag(ifp
, IN6_IFF_CLAT46
);
5876 if (ia6_clat_dst
== NULL
) {
5881 * Check if the original dest in the packet is same as the reserved
5882 * CLAT46 IPv6 address
5884 if (IN6_ARE_ADDR_EQUAL(&odst
, &ia6_clat_dst
->ia_addr
.sin6_addr
)) {
5885 pbuf_t pbuf_store
, *pbuf
= NULL
;
5886 pbuf_init_mbuf(&pbuf_store
, *m
, ifp
);
5890 * Retrive the local CLAT46 IPv4 address reserved for stateless
5893 ia4_clat_dst
= inifa_ifpclatv4(ifp
);
5894 if (ia4_clat_dst
== NULL
) {
5895 IFA_REMREF(&ia6_clat_dst
->ia_ifa
);
5896 ip6stat
.ip6s_clat464_in_nov4addr_drop
++;
5900 IFA_REMREF(&ia6_clat_dst
->ia_ifa
);
5902 /* Translate IPv6 src to IPv4 src by removing the NAT64 prefix */
5903 dst
= &ia4_clat_dst
->ia_addr
.sin_addr
;
5904 if ((error
= nat464_synthesize_ipv4(ifp
, &osrc
, &src
)) != 0) {
5905 ip6stat
.ip6s_clat464_in_v4synthfail_drop
++;
5910 ip6h
= pbuf
->pb_data
;
5911 off
= sizeof(struct ip6_hdr
);
5912 proto
= ip6h
->ip6_nxt
;
5913 tos
= (ntohl(ip6h
->ip6_flow
) >> 20) & 0xff;
5914 tot_len
= ntohs(ip6h
->ip6_plen
) + sizeof(struct ip6_hdr
);
5917 * Translate the IP header and update the fragmentation
5920 error
= (nat464_translate_64(pbuf
, off
, tos
, &proto
,
5921 ip6h
->ip6_hlim
, src
, *dst
, tot_len
, &is_first_frag
) == NT_NAT64
) ?
5924 ip6h
= NULL
; /* Invalidate ip6h as pbuf has been changed */
5927 ip6stat
.ip6s_clat464_in_64transfail_drop
++;
5932 * Translate protocol header, update checksum, checksum flags
5933 * and related fields.
5935 error
= (nat464_translate_proto(pbuf
, (struct nat464_addr
*)&osrc
,
5936 (struct nat464_addr
*)&odst
, proto
, PF_INET6
, PF_INET
,
5937 NT_IN
, !is_first_frag
) == NT_NAT64
) ? 0 : -1;
5940 ip6stat
.ip6s_clat464_in_64proto_transfail_drop
++;
5945 if (ia4_clat_dst
!= NULL
) {
5946 IFA_REMREF(&ia4_clat_dst
->ia_ifa
);
5949 if (pbuf_is_valid(pbuf
)) {
5951 pbuf
->pb_mbuf
= NULL
;
5955 ip6stat
.ip6s_clat464_in_invalpbuf_drop
++;
5959 *proto_family
= PF_INET
;
5960 ip6stat
.ip6s_clat464_in_success
++;
5962 } /* CLAT traffic */
5969 ifnet_ioctl(ifnet_t ifp
, protocol_family_t proto_fam
, u_long ioctl_code
,
5972 struct ifnet_filter
*filter
;
5973 int retval
= EOPNOTSUPP
;
5976 if (ifp
== NULL
|| ioctl_code
== 0) {
5980 /* Get an io ref count if the interface is attached */
5981 if (!ifnet_is_attached(ifp
, 1)) {
5986 * Run the interface filters first.
5987 * We want to run all filters before calling the protocol,
5988 * interface family, or interface.
5990 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
5991 /* prevent filter list from changing in case we drop the lock */
5992 if_flt_monitor_busy(ifp
);
5993 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
5994 if (filter
->filt_ioctl
!= NULL
&& (filter
->filt_protocol
== 0 ||
5995 filter
->filt_protocol
== proto_fam
)) {
5996 lck_mtx_unlock(&ifp
->if_flt_lock
);
5998 result
= filter
->filt_ioctl(filter
->filt_cookie
, ifp
,
5999 proto_fam
, ioctl_code
, ioctl_arg
);
6001 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
6003 /* Only update retval if no one has handled the ioctl */
6004 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
6005 if (result
== ENOTSUP
) {
6006 result
= EOPNOTSUPP
;
6009 if (retval
!= 0 && retval
!= EOPNOTSUPP
) {
6010 /* we're done with the filter list */
6011 if_flt_monitor_unbusy(ifp
);
6012 lck_mtx_unlock(&ifp
->if_flt_lock
);
6018 /* we're done with the filter list */
6019 if_flt_monitor_unbusy(ifp
);
6020 lck_mtx_unlock(&ifp
->if_flt_lock
);
6022 /* Allow the protocol to handle the ioctl */
6023 if (proto_fam
!= 0) {
6024 struct if_proto
*proto
;
6026 /* callee holds a proto refcnt upon success */
6027 ifnet_lock_shared(ifp
);
6028 proto
= find_attached_proto(ifp
, proto_fam
);
6029 ifnet_lock_done(ifp
);
6030 if (proto
!= NULL
) {
6031 proto_media_ioctl ioctlp
=
6032 (proto
->proto_kpi
== kProtoKPI_v1
?
6033 proto
->kpi
.v1
.ioctl
: proto
->kpi
.v2
.ioctl
);
6034 result
= EOPNOTSUPP
;
6035 if (ioctlp
!= NULL
) {
6036 result
= ioctlp(ifp
, proto_fam
, ioctl_code
,
6039 if_proto_free(proto
);
6041 /* Only update retval if no one has handled the ioctl */
6042 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
6043 if (result
== ENOTSUP
) {
6044 result
= EOPNOTSUPP
;
6047 if (retval
&& retval
!= EOPNOTSUPP
) {
6054 /* retval is either 0 or EOPNOTSUPP */
6057 * Let the interface handle this ioctl.
6058 * If it returns EOPNOTSUPP, ignore that, we may have
6059 * already handled this in the protocol or family.
6061 if (ifp
->if_ioctl
) {
6062 result
= (*ifp
->if_ioctl
)(ifp
, ioctl_code
, ioctl_arg
);
6065 /* Only update retval if no one has handled the ioctl */
6066 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
6067 if (result
== ENOTSUP
) {
6068 result
= EOPNOTSUPP
;
6071 if (retval
&& retval
!= EOPNOTSUPP
) {
6077 if (retval
== EJUSTRETURN
) {
6081 ifnet_decr_iorefcnt(ifp
);
6086 __private_extern__ errno_t
6087 dlil_set_bpf_tap(ifnet_t ifp
, bpf_tap_mode mode
, bpf_packet_func callback
)
6092 if (ifp
->if_set_bpf_tap
) {
6093 /* Get an io reference on the interface if it is attached */
6094 if (!ifnet_is_attached(ifp
, 1)) {
6097 error
= ifp
->if_set_bpf_tap(ifp
, mode
, callback
);
6098 ifnet_decr_iorefcnt(ifp
);
6104 dlil_resolve_multi(struct ifnet
*ifp
, const struct sockaddr
*proto_addr
,
6105 struct sockaddr
*ll_addr
, size_t ll_len
)
6107 errno_t result
= EOPNOTSUPP
;
6108 struct if_proto
*proto
;
6109 const struct sockaddr
*verify
;
6110 proto_media_resolve_multi resolvep
;
6112 if (!ifnet_is_attached(ifp
, 1)) {
6116 bzero(ll_addr
, ll_len
);
6118 /* Call the protocol first; callee holds a proto refcnt upon success */
6119 ifnet_lock_shared(ifp
);
6120 proto
= find_attached_proto(ifp
, proto_addr
->sa_family
);
6121 ifnet_lock_done(ifp
);
6122 if (proto
!= NULL
) {
6123 resolvep
= (proto
->proto_kpi
== kProtoKPI_v1
?
6124 proto
->kpi
.v1
.resolve_multi
: proto
->kpi
.v2
.resolve_multi
);
6125 if (resolvep
!= NULL
) {
6126 result
= resolvep(ifp
, proto_addr
,
6127 (struct sockaddr_dl
*)(void *)ll_addr
, ll_len
);
6129 if_proto_free(proto
);
6132 /* Let the interface verify the multicast address */
6133 if ((result
== EOPNOTSUPP
|| result
== 0) && ifp
->if_check_multi
) {
6137 verify
= proto_addr
;
6139 result
= ifp
->if_check_multi(ifp
, verify
);
6142 ifnet_decr_iorefcnt(ifp
);
6146 __private_extern__ errno_t
6147 dlil_send_arp_internal(ifnet_t ifp
, u_short arpop
,
6148 const struct sockaddr_dl
*sender_hw
, const struct sockaddr
*sender_proto
,
6149 const struct sockaddr_dl
*target_hw
, const struct sockaddr
*target_proto
)
6151 struct if_proto
*proto
;
6154 /* callee holds a proto refcnt upon success */
6155 ifnet_lock_shared(ifp
);
6156 proto
= find_attached_proto(ifp
, target_proto
->sa_family
);
6157 ifnet_lock_done(ifp
);
6158 if (proto
== NULL
) {
6161 proto_media_send_arp arpp
;
6162 arpp
= (proto
->proto_kpi
== kProtoKPI_v1
?
6163 proto
->kpi
.v1
.send_arp
: proto
->kpi
.v2
.send_arp
);
6169 arpstat
.txrequests
++;
6170 if (target_hw
!= NULL
) {
6171 arpstat
.txurequests
++;
6175 arpstat
.txreplies
++;
6178 result
= arpp(ifp
, arpop
, sender_hw
, sender_proto
,
6179 target_hw
, target_proto
);
6181 if_proto_free(proto
);
6187 struct net_thread_marks
{ };
6188 static const struct net_thread_marks net_thread_marks_base
= { };
6190 __private_extern__
const net_thread_marks_t net_thread_marks_none
=
6191 &net_thread_marks_base
;
6193 __private_extern__ net_thread_marks_t
6194 net_thread_marks_push(u_int32_t push
)
6196 static const char *const base
= (const void*)&net_thread_marks_base
;
6200 struct uthread
*uth
= get_bsdthread_info(current_thread());
6202 pop
= push
& ~uth
->uu_network_marks
;
6204 uth
->uu_network_marks
|= pop
;
6208 return (net_thread_marks_t
)&base
[pop
];
6211 __private_extern__ net_thread_marks_t
6212 net_thread_unmarks_push(u_int32_t unpush
)
6214 static const char *const base
= (const void*)&net_thread_marks_base
;
6215 u_int32_t unpop
= 0;
6218 struct uthread
*uth
= get_bsdthread_info(current_thread());
6220 unpop
= unpush
& uth
->uu_network_marks
;
6222 uth
->uu_network_marks
&= ~unpop
;
6226 return (net_thread_marks_t
)&base
[unpop
];
6229 __private_extern__
void
6230 net_thread_marks_pop(net_thread_marks_t popx
)
6232 static const char *const base
= (const void*)&net_thread_marks_base
;
6233 const ptrdiff_t pop
= (const char *)popx
- (const char *)base
;
6236 static const ptrdiff_t ones
= (ptrdiff_t)(u_int32_t
)~0U;
6237 struct uthread
*uth
= get_bsdthread_info(current_thread());
6239 VERIFY((pop
& ones
) == pop
);
6240 VERIFY((ptrdiff_t)(uth
->uu_network_marks
& pop
) == pop
);
6241 uth
->uu_network_marks
&= ~pop
;
6245 __private_extern__
void
6246 net_thread_unmarks_pop(net_thread_marks_t unpopx
)
6248 static const char *const base
= (const void*)&net_thread_marks_base
;
6249 ptrdiff_t unpop
= (const char *)unpopx
- (const char *)base
;
6252 static const ptrdiff_t ones
= (ptrdiff_t)(u_int32_t
)~0U;
6253 struct uthread
*uth
= get_bsdthread_info(current_thread());
6255 VERIFY((unpop
& ones
) == unpop
);
6256 VERIFY((ptrdiff_t)(uth
->uu_network_marks
& unpop
) == 0);
6257 uth
->uu_network_marks
|= unpop
;
6261 __private_extern__ u_int32_t
6262 net_thread_is_marked(u_int32_t check
)
6265 struct uthread
*uth
= get_bsdthread_info(current_thread());
6266 return uth
->uu_network_marks
& check
;
6272 __private_extern__ u_int32_t
6273 net_thread_is_unmarked(u_int32_t check
)
6276 struct uthread
*uth
= get_bsdthread_info(current_thread());
6277 return ~uth
->uu_network_marks
& check
;
6283 static __inline__
int
6284 _is_announcement(const struct sockaddr_in
* sender_sin
,
6285 const struct sockaddr_in
* target_sin
)
6287 if (target_sin
== NULL
|| sender_sin
== NULL
) {
6291 return sender_sin
->sin_addr
.s_addr
== target_sin
->sin_addr
.s_addr
;
6294 __private_extern__ errno_t
6295 dlil_send_arp(ifnet_t ifp
, u_short arpop
, const struct sockaddr_dl
*sender_hw
,
6296 const struct sockaddr
*sender_proto
, const struct sockaddr_dl
*target_hw
,
6297 const struct sockaddr
*target_proto0
, u_int32_t rtflags
)
6300 const struct sockaddr_in
* sender_sin
;
6301 const struct sockaddr_in
* target_sin
;
6302 struct sockaddr_inarp target_proto_sinarp
;
6303 struct sockaddr
*target_proto
= (void *)(uintptr_t)target_proto0
;
6305 if (target_proto
== NULL
|| sender_proto
== NULL
) {
6309 if (sender_proto
->sa_family
!= target_proto
->sa_family
) {
6314 * If the target is a (default) router, provide that
6315 * information to the send_arp callback routine.
6317 if (rtflags
& RTF_ROUTER
) {
6318 bcopy(target_proto
, &target_proto_sinarp
,
6319 sizeof(struct sockaddr_in
));
6320 target_proto_sinarp
.sin_other
|= SIN_ROUTER
;
6321 target_proto
= (struct sockaddr
*)&target_proto_sinarp
;
6325 * If this is an ARP request and the target IP is IPv4LL,
6326 * send the request on all interfaces. The exception is
6327 * an announcement, which must only appear on the specific
6330 sender_sin
= (struct sockaddr_in
*)(void *)(uintptr_t)sender_proto
;
6331 target_sin
= (struct sockaddr_in
*)(void *)(uintptr_t)target_proto
;
6332 if (target_proto
->sa_family
== AF_INET
&&
6333 IN_LINKLOCAL(ntohl(target_sin
->sin_addr
.s_addr
)) &&
6334 ipv4_ll_arp_aware
!= 0 && arpop
== ARPOP_REQUEST
&&
6335 !_is_announcement(sender_sin
, target_sin
)) {
6342 if (ifnet_list_get(IFNET_FAMILY_ANY
, &ifp_list
, &count
) == 0) {
6343 for (ifp_on
= 0; ifp_on
< count
; ifp_on
++) {
6345 ifaddr_t source_hw
= NULL
;
6346 ifaddr_t source_ip
= NULL
;
6347 struct sockaddr_in source_ip_copy
;
6348 struct ifnet
*cur_ifp
= ifp_list
[ifp_on
];
6351 * Only arp on interfaces marked for IPv4LL
6352 * ARPing. This may mean that we don't ARP on
6353 * the interface the subnet route points to.
6355 if (!(cur_ifp
->if_eflags
& IFEF_ARPLL
)) {
6359 /* Find the source IP address */
6360 ifnet_lock_shared(cur_ifp
);
6361 source_hw
= cur_ifp
->if_lladdr
;
6362 TAILQ_FOREACH(source_ip
, &cur_ifp
->if_addrhead
,
6364 IFA_LOCK(source_ip
);
6365 if (source_ip
->ifa_addr
!= NULL
&&
6366 source_ip
->ifa_addr
->sa_family
==
6368 /* Copy the source IP address */
6370 *(struct sockaddr_in
*)
6371 (void *)source_ip
->ifa_addr
;
6372 IFA_UNLOCK(source_ip
);
6375 IFA_UNLOCK(source_ip
);
6378 /* No IP Source, don't arp */
6379 if (source_ip
== NULL
) {
6380 ifnet_lock_done(cur_ifp
);
6384 IFA_ADDREF(source_hw
);
6385 ifnet_lock_done(cur_ifp
);
6388 new_result
= dlil_send_arp_internal(cur_ifp
,
6389 arpop
, (struct sockaddr_dl
*)(void *)
6390 source_hw
->ifa_addr
,
6391 (struct sockaddr
*)&source_ip_copy
, NULL
,
6394 IFA_REMREF(source_hw
);
6395 if (result
== ENOTSUP
) {
6396 result
= new_result
;
6399 ifnet_list_free(ifp_list
);
6402 result
= dlil_send_arp_internal(ifp
, arpop
, sender_hw
,
6403 sender_proto
, target_hw
, target_proto
);
6410 * Caller must hold ifnet head lock.
6413 ifnet_lookup(struct ifnet
*ifp
)
6417 LCK_RW_ASSERT(&ifnet_head_lock
, LCK_RW_ASSERT_HELD
);
6418 TAILQ_FOREACH(_ifp
, &ifnet_head
, if_link
) {
6423 return _ifp
!= NULL
;
6427 * Caller has to pass a non-zero refio argument to get a
6428 * IO reference count. This will prevent ifnet_detach from
6429 * being called when there are outstanding io reference counts.
6432 ifnet_is_attached(struct ifnet
*ifp
, int refio
)
6436 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6437 if ((ret
= IF_FULLY_ATTACHED(ifp
))) {
6442 lck_mtx_unlock(&ifp
->if_ref_lock
);
6448 ifnet_incr_pending_thread_count(struct ifnet
*ifp
)
6450 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6451 ifp
->if_threads_pending
++;
6452 lck_mtx_unlock(&ifp
->if_ref_lock
);
6456 ifnet_decr_pending_thread_count(struct ifnet
*ifp
)
6458 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6459 VERIFY(ifp
->if_threads_pending
> 0);
6460 ifp
->if_threads_pending
--;
6461 if (ifp
->if_threads_pending
== 0) {
6462 wakeup(&ifp
->if_threads_pending
);
6464 lck_mtx_unlock(&ifp
->if_ref_lock
);
6468 * Caller must ensure the interface is attached; the assumption is that
6469 * there is at least an outstanding IO reference count held already.
6470 * Most callers would call ifnet_is_{attached,data_ready}() instead.
6473 ifnet_incr_iorefcnt(struct ifnet
*ifp
)
6475 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6476 VERIFY(IF_FULLY_ATTACHED(ifp
));
6477 VERIFY(ifp
->if_refio
> 0);
6479 lck_mtx_unlock(&ifp
->if_ref_lock
);
6482 __attribute__((always_inline
))
6484 ifnet_decr_iorefcnt_locked(struct ifnet
*ifp
)
6486 LCK_MTX_ASSERT(&ifp
->if_ref_lock
, LCK_MTX_ASSERT_OWNED
);
6488 VERIFY(ifp
->if_refio
> 0);
6489 VERIFY(ifp
->if_refflags
& (IFRF_ATTACHED
| IFRF_DETACHING
));
6492 VERIFY(ifp
->if_refio
!= 0 || ifp
->if_datamov
== 0);
6495 * if there are no more outstanding io references, wakeup the
6496 * ifnet_detach thread if detaching flag is set.
6498 if (ifp
->if_refio
== 0 && (ifp
->if_refflags
& IFRF_DETACHING
)) {
6499 wakeup(&(ifp
->if_refio
));
6504 ifnet_decr_iorefcnt(struct ifnet
*ifp
)
6506 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6507 ifnet_decr_iorefcnt_locked(ifp
);
6508 lck_mtx_unlock(&ifp
->if_ref_lock
);
6512 ifnet_datamov_begin(struct ifnet
*ifp
)
6516 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6517 if ((ret
= IF_FULLY_ATTACHED_AND_READY(ifp
))) {
6521 lck_mtx_unlock(&ifp
->if_ref_lock
);
6527 ifnet_datamov_end(struct ifnet
*ifp
)
6529 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6530 VERIFY(ifp
->if_datamov
> 0);
6532 * if there's no more thread moving data, wakeup any
6533 * drainers that's blocked waiting for this.
6535 if (--ifp
->if_datamov
== 0 && ifp
->if_drainers
> 0) {
6536 wakeup(&(ifp
->if_datamov
));
6538 ifnet_decr_iorefcnt_locked(ifp
);
6539 lck_mtx_unlock(&ifp
->if_ref_lock
);
6543 ifnet_datamov_suspend(struct ifnet
*ifp
)
6545 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6546 VERIFY(ifp
->if_refflags
& (IFRF_ATTACHED
| IFRF_DETACHING
));
6548 if (ifp
->if_suspend
++ == 0) {
6549 VERIFY(ifp
->if_refflags
& IFRF_READY
);
6550 ifp
->if_refflags
&= ~IFRF_READY
;
6552 lck_mtx_unlock(&ifp
->if_ref_lock
);
6556 ifnet_datamov_drain(struct ifnet
*ifp
)
6558 lck_mtx_lock(&ifp
->if_ref_lock
);
6559 VERIFY(ifp
->if_refflags
& (IFRF_ATTACHED
| IFRF_DETACHING
));
6560 /* data movement must already be suspended */
6561 VERIFY(ifp
->if_suspend
> 0);
6562 VERIFY(!(ifp
->if_refflags
& IFRF_READY
));
6564 while (ifp
->if_datamov
!= 0) {
6565 (void) msleep(&(ifp
->if_datamov
), &ifp
->if_ref_lock
,
6566 (PZERO
- 1), __func__
, NULL
);
6568 VERIFY(!(ifp
->if_refflags
& IFRF_READY
));
6569 VERIFY(ifp
->if_drainers
> 0);
6571 lck_mtx_unlock(&ifp
->if_ref_lock
);
6573 /* purge the interface queues */
6574 if ((ifp
->if_eflags
& IFEF_TXSTART
) != 0) {
6580 ifnet_datamov_resume(struct ifnet
*ifp
)
6582 lck_mtx_lock(&ifp
->if_ref_lock
);
6583 /* data movement must already be suspended */
6584 VERIFY(ifp
->if_suspend
> 0);
6585 if (--ifp
->if_suspend
== 0) {
6586 VERIFY(!(ifp
->if_refflags
& IFRF_READY
));
6587 ifp
->if_refflags
|= IFRF_READY
;
6589 ifnet_decr_iorefcnt_locked(ifp
);
6590 lck_mtx_unlock(&ifp
->if_ref_lock
);
6594 dlil_if_trace(struct dlil_ifnet
*dl_if
, int refhold
)
6596 struct dlil_ifnet_dbg
*dl_if_dbg
= (struct dlil_ifnet_dbg
*)dl_if
;
6601 if (!(dl_if
->dl_if_flags
& DLIF_DEBUG
)) {
6602 panic("%s: dl_if %p has no debug structure", __func__
, dl_if
);
6607 cnt
= &dl_if_dbg
->dldbg_if_refhold_cnt
;
6608 tr
= dl_if_dbg
->dldbg_if_refhold
;
6610 cnt
= &dl_if_dbg
->dldbg_if_refrele_cnt
;
6611 tr
= dl_if_dbg
->dldbg_if_refrele
;
6614 idx
= atomic_add_16_ov(cnt
, 1) % IF_REF_TRACE_HIST_SIZE
;
6615 ctrace_record(&tr
[idx
]);
6619 dlil_if_ref(struct ifnet
*ifp
)
6621 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
6623 if (dl_if
== NULL
) {
6627 lck_mtx_lock_spin(&dl_if
->dl_if_lock
);
6628 ++dl_if
->dl_if_refcnt
;
6629 if (dl_if
->dl_if_refcnt
== 0) {
6630 panic("%s: wraparound refcnt for ifp=%p", __func__
, ifp
);
6633 if (dl_if
->dl_if_trace
!= NULL
) {
6634 (*dl_if
->dl_if_trace
)(dl_if
, TRUE
);
6636 lck_mtx_unlock(&dl_if
->dl_if_lock
);
6642 dlil_if_free(struct ifnet
*ifp
)
6644 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
6645 bool need_release
= FALSE
;
6647 if (dl_if
== NULL
) {
6651 lck_mtx_lock_spin(&dl_if
->dl_if_lock
);
6652 switch (dl_if
->dl_if_refcnt
) {
6654 panic("%s: negative refcnt for ifp=%p", __func__
, ifp
);
6658 if ((ifp
->if_refflags
& IFRF_EMBRYONIC
) != 0) {
6659 need_release
= TRUE
;
6665 --dl_if
->dl_if_refcnt
;
6666 if (dl_if
->dl_if_trace
!= NULL
) {
6667 (*dl_if
->dl_if_trace
)(dl_if
, FALSE
);
6669 lck_mtx_unlock(&dl_if
->dl_if_lock
);
6671 dlil_if_release(ifp
);
6677 dlil_attach_protocol_internal(struct if_proto
*proto
,
6678 const struct ifnet_demux_desc
*demux_list
, u_int32_t demux_count
,
6679 uint32_t * proto_count
)
6681 struct kev_dl_proto_data ev_pr_data
;
6682 struct ifnet
*ifp
= proto
->ifp
;
6684 u_int32_t hash_value
= proto_hash_value(proto
->protocol_family
);
6685 struct if_proto
*prev_proto
;
6686 struct if_proto
*_proto
;
6688 /* callee holds a proto refcnt upon success */
6689 ifnet_lock_exclusive(ifp
);
6690 _proto
= find_attached_proto(ifp
, proto
->protocol_family
);
6691 if (_proto
!= NULL
) {
6692 ifnet_lock_done(ifp
);
6693 if_proto_free(_proto
);
6698 * Call family module add_proto routine so it can refine the
6699 * demux descriptors as it wishes.
6701 retval
= ifp
->if_add_proto(ifp
, proto
->protocol_family
, demux_list
,
6704 ifnet_lock_done(ifp
);
6709 * Insert the protocol in the hash
6711 prev_proto
= SLIST_FIRST(&ifp
->if_proto_hash
[hash_value
]);
6712 while (prev_proto
!= NULL
&& SLIST_NEXT(prev_proto
, next_hash
) != NULL
) {
6713 prev_proto
= SLIST_NEXT(prev_proto
, next_hash
);
6716 SLIST_INSERT_AFTER(prev_proto
, proto
, next_hash
);
6718 SLIST_INSERT_HEAD(&ifp
->if_proto_hash
[hash_value
],
6722 /* hold a proto refcnt for attach */
6723 if_proto_ref(proto
);
6726 * The reserved field carries the number of protocol still attached
6727 * (subject to change)
6729 ev_pr_data
.proto_family
= proto
->protocol_family
;
6730 ev_pr_data
.proto_remaining_count
= dlil_ifp_protolist(ifp
, NULL
, 0);
6732 ifnet_lock_done(ifp
);
6734 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_ATTACHED
,
6735 (struct net_event_data
*)&ev_pr_data
,
6736 sizeof(struct kev_dl_proto_data
));
6737 if (proto_count
!= NULL
) {
6738 *proto_count
= ev_pr_data
.proto_remaining_count
;
6744 ifnet_attach_protocol(ifnet_t ifp
, protocol_family_t protocol
,
6745 const struct ifnet_attach_proto_param
*proto_details
)
6748 struct if_proto
*ifproto
= NULL
;
6749 uint32_t proto_count
= 0;
6751 ifnet_head_lock_shared();
6752 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
) {
6756 /* Check that the interface is in the global list */
6757 if (!ifnet_lookup(ifp
)) {
6762 ifproto
= zalloc_flags(dlif_proto_zone
, Z_WAITOK
| Z_ZERO
);
6763 if (ifproto
== NULL
) {
6768 /* refcnt held above during lookup */
6770 ifproto
->protocol_family
= protocol
;
6771 ifproto
->proto_kpi
= kProtoKPI_v1
;
6772 ifproto
->kpi
.v1
.input
= proto_details
->input
;
6773 ifproto
->kpi
.v1
.pre_output
= proto_details
->pre_output
;
6774 ifproto
->kpi
.v1
.event
= proto_details
->event
;
6775 ifproto
->kpi
.v1
.ioctl
= proto_details
->ioctl
;
6776 ifproto
->kpi
.v1
.detached
= proto_details
->detached
;
6777 ifproto
->kpi
.v1
.resolve_multi
= proto_details
->resolve
;
6778 ifproto
->kpi
.v1
.send_arp
= proto_details
->send_arp
;
6780 retval
= dlil_attach_protocol_internal(ifproto
,
6781 proto_details
->demux_list
, proto_details
->demux_count
,
6785 if (retval
!= 0 && retval
!= EEXIST
) {
6786 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
6787 ifp
!= NULL
? if_name(ifp
) : "N/A", protocol
, retval
);
6790 DLIL_PRINTF("%s: attached v1 protocol %d (count = %d)\n",
6791 ifp
!= NULL
? if_name(ifp
) : "N/A",
6792 protocol
, proto_count
);
6798 * A protocol has been attached, mark the interface up.
6799 * This used to be done by configd.KernelEventMonitor, but that
6800 * is inherently prone to races (rdar://problem/30810208).
6802 (void) ifnet_set_flags(ifp
, IFF_UP
, IFF_UP
);
6803 (void) ifnet_ioctl(ifp
, 0, SIOCSIFFLAGS
, NULL
);
6804 dlil_post_sifflags_msg(ifp
);
6805 } else if (ifproto
!= NULL
) {
6806 zfree(dlif_proto_zone
, ifproto
);
6812 ifnet_attach_protocol_v2(ifnet_t ifp
, protocol_family_t protocol
,
6813 const struct ifnet_attach_proto_param_v2
*proto_details
)
6816 struct if_proto
*ifproto
= NULL
;
6817 uint32_t proto_count
= 0;
6819 ifnet_head_lock_shared();
6820 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
) {
6824 /* Check that the interface is in the global list */
6825 if (!ifnet_lookup(ifp
)) {
6830 ifproto
= zalloc(dlif_proto_zone
);
6831 if (ifproto
== NULL
) {
6835 bzero(ifproto
, sizeof(*ifproto
));
6837 /* refcnt held above during lookup */
6839 ifproto
->protocol_family
= protocol
;
6840 ifproto
->proto_kpi
= kProtoKPI_v2
;
6841 ifproto
->kpi
.v2
.input
= proto_details
->input
;
6842 ifproto
->kpi
.v2
.pre_output
= proto_details
->pre_output
;
6843 ifproto
->kpi
.v2
.event
= proto_details
->event
;
6844 ifproto
->kpi
.v2
.ioctl
= proto_details
->ioctl
;
6845 ifproto
->kpi
.v2
.detached
= proto_details
->detached
;
6846 ifproto
->kpi
.v2
.resolve_multi
= proto_details
->resolve
;
6847 ifproto
->kpi
.v2
.send_arp
= proto_details
->send_arp
;
6849 retval
= dlil_attach_protocol_internal(ifproto
,
6850 proto_details
->demux_list
, proto_details
->demux_count
,
6854 if (retval
!= 0 && retval
!= EEXIST
) {
6855 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
6856 ifp
!= NULL
? if_name(ifp
) : "N/A", protocol
, retval
);
6859 DLIL_PRINTF("%s: attached v2 protocol %d (count = %d)\n",
6860 ifp
!= NULL
? if_name(ifp
) : "N/A",
6861 protocol
, proto_count
);
6867 * A protocol has been attached, mark the interface up.
6868 * This used to be done by configd.KernelEventMonitor, but that
6869 * is inherently prone to races (rdar://problem/30810208).
6871 (void) ifnet_set_flags(ifp
, IFF_UP
, IFF_UP
);
6872 (void) ifnet_ioctl(ifp
, 0, SIOCSIFFLAGS
, NULL
);
6873 dlil_post_sifflags_msg(ifp
);
6874 } else if (ifproto
!= NULL
) {
6875 zfree(dlif_proto_zone
, ifproto
);
6881 ifnet_detach_protocol(ifnet_t ifp
, protocol_family_t proto_family
)
6883 struct if_proto
*proto
= NULL
;
6886 if (ifp
== NULL
|| proto_family
== 0) {
6891 ifnet_lock_exclusive(ifp
);
6892 /* callee holds a proto refcnt upon success */
6893 proto
= find_attached_proto(ifp
, proto_family
);
6894 if (proto
== NULL
) {
6896 ifnet_lock_done(ifp
);
6900 /* call family module del_proto */
6901 if (ifp
->if_del_proto
) {
6902 ifp
->if_del_proto(ifp
, proto
->protocol_family
);
6905 SLIST_REMOVE(&ifp
->if_proto_hash
[proto_hash_value(proto_family
)],
6906 proto
, if_proto
, next_hash
);
6908 if (proto
->proto_kpi
== kProtoKPI_v1
) {
6909 proto
->kpi
.v1
.input
= ifproto_media_input_v1
;
6910 proto
->kpi
.v1
.pre_output
= ifproto_media_preout
;
6911 proto
->kpi
.v1
.event
= ifproto_media_event
;
6912 proto
->kpi
.v1
.ioctl
= ifproto_media_ioctl
;
6913 proto
->kpi
.v1
.resolve_multi
= ifproto_media_resolve_multi
;
6914 proto
->kpi
.v1
.send_arp
= ifproto_media_send_arp
;
6916 proto
->kpi
.v2
.input
= ifproto_media_input_v2
;
6917 proto
->kpi
.v2
.pre_output
= ifproto_media_preout
;
6918 proto
->kpi
.v2
.event
= ifproto_media_event
;
6919 proto
->kpi
.v2
.ioctl
= ifproto_media_ioctl
;
6920 proto
->kpi
.v2
.resolve_multi
= ifproto_media_resolve_multi
;
6921 proto
->kpi
.v2
.send_arp
= ifproto_media_send_arp
;
6923 proto
->detached
= 1;
6924 ifnet_lock_done(ifp
);
6927 DLIL_PRINTF("%s: detached %s protocol %d\n", if_name(ifp
),
6928 (proto
->proto_kpi
== kProtoKPI_v1
) ?
6929 "v1" : "v2", proto_family
);
6932 /* release proto refcnt held during protocol attach */
6933 if_proto_free(proto
);
6936 * Release proto refcnt held during lookup; the rest of
6937 * protocol detach steps will happen when the last proto
6938 * reference is released.
6940 if_proto_free(proto
);
6948 ifproto_media_input_v1(struct ifnet
*ifp
, protocol_family_t protocol
,
6949 struct mbuf
*packet
, char *header
)
6951 #pragma unused(ifp, protocol, packet, header)
6956 ifproto_media_input_v2(struct ifnet
*ifp
, protocol_family_t protocol
,
6957 struct mbuf
*packet
)
6959 #pragma unused(ifp, protocol, packet)
6964 ifproto_media_preout(struct ifnet
*ifp
, protocol_family_t protocol
,
6965 mbuf_t
*packet
, const struct sockaddr
*dest
, void *route
, char *frame_type
,
6966 char *link_layer_dest
)
6968 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
6973 ifproto_media_event(struct ifnet
*ifp
, protocol_family_t protocol
,
6974 const struct kev_msg
*event
)
6976 #pragma unused(ifp, protocol, event)
6980 ifproto_media_ioctl(struct ifnet
*ifp
, protocol_family_t protocol
,
6981 unsigned long command
, void *argument
)
6983 #pragma unused(ifp, protocol, command, argument)
6988 ifproto_media_resolve_multi(ifnet_t ifp
, const struct sockaddr
*proto_addr
,
6989 struct sockaddr_dl
*out_ll
, size_t ll_len
)
6991 #pragma unused(ifp, proto_addr, out_ll, ll_len)
6996 ifproto_media_send_arp(struct ifnet
*ifp
, u_short arpop
,
6997 const struct sockaddr_dl
*sender_hw
, const struct sockaddr
*sender_proto
,
6998 const struct sockaddr_dl
*target_hw
, const struct sockaddr
*target_proto
)
7000 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
7004 extern int if_next_index(void);
7005 extern int tcp_ecn_outbound
;
7008 ifnet_attach(ifnet_t ifp
, const struct sockaddr_dl
*ll_addr
)
7010 struct ifnet
*tmp_if
;
7012 struct if_data_internal if_data_saved
;
7013 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
7014 struct dlil_threading_info
*dl_inp
;
7015 thread_continue_t thfunc
= NULL
;
7016 u_int32_t sflags
= 0;
7024 * Serialize ifnet attach using dlil_ifnet_lock, in order to
7025 * prevent the interface from being configured while it is
7026 * embryonic, as ifnet_head_lock is dropped and reacquired
7027 * below prior to marking the ifnet with IFRF_ATTACHED.
7030 ifnet_head_lock_exclusive();
7031 /* Verify we aren't already on the list */
7032 TAILQ_FOREACH(tmp_if
, &ifnet_head
, if_link
) {
7033 if (tmp_if
== ifp
) {
7040 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
7041 if (!(ifp
->if_refflags
& IFRF_EMBRYONIC
)) {
7042 panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
7046 lck_mtx_unlock(&ifp
->if_ref_lock
);
7048 ifnet_lock_exclusive(ifp
);
7051 VERIFY(ifp
->if_detaching_link
.tqe_next
== NULL
);
7052 VERIFY(ifp
->if_detaching_link
.tqe_prev
== NULL
);
7053 VERIFY(ifp
->if_threads_pending
== 0);
7055 if (ll_addr
!= NULL
) {
7056 if (ifp
->if_addrlen
== 0) {
7057 ifp
->if_addrlen
= ll_addr
->sdl_alen
;
7058 } else if (ll_addr
->sdl_alen
!= ifp
->if_addrlen
) {
7059 ifnet_lock_done(ifp
);
7067 * Allow interfaces without protocol families to attach
7068 * only if they have the necessary fields filled out.
7070 if (ifp
->if_add_proto
== NULL
|| ifp
->if_del_proto
== NULL
) {
7071 DLIL_PRINTF("%s: Attempt to attach interface without "
7072 "family module - %d\n", __func__
, ifp
->if_family
);
7073 ifnet_lock_done(ifp
);
7079 /* Allocate protocol hash table */
7080 VERIFY(ifp
->if_proto_hash
== NULL
);
7081 ifp
->if_proto_hash
= zalloc_flags(dlif_phash_zone
, Z_WAITOK
| Z_ZERO
);
7082 if (ifp
->if_proto_hash
== NULL
) {
7083 ifnet_lock_done(ifp
);
7089 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
7090 VERIFY(TAILQ_EMPTY(&ifp
->if_flt_head
));
7091 TAILQ_INIT(&ifp
->if_flt_head
);
7092 VERIFY(ifp
->if_flt_busy
== 0);
7093 VERIFY(ifp
->if_flt_waiters
== 0);
7094 lck_mtx_unlock(&ifp
->if_flt_lock
);
7096 if (!(dl_if
->dl_if_flags
& DLIF_REUSE
)) {
7097 VERIFY(LIST_EMPTY(&ifp
->if_multiaddrs
));
7098 LIST_INIT(&ifp
->if_multiaddrs
);
7101 VERIFY(ifp
->if_allhostsinm
== NULL
);
7102 VERIFY(TAILQ_EMPTY(&ifp
->if_addrhead
));
7103 TAILQ_INIT(&ifp
->if_addrhead
);
7105 if (ifp
->if_index
== 0) {
7106 int idx
= if_next_index();
7110 ifnet_lock_done(ifp
);
7115 ifp
->if_index
= (uint16_t)idx
;
7117 /* the lladdr passed at attach time is the permanent address */
7118 if (ll_addr
!= NULL
&& ifp
->if_type
== IFT_ETHER
&&
7119 ll_addr
->sdl_alen
== ETHER_ADDR_LEN
) {
7120 bcopy(CONST_LLADDR(ll_addr
),
7121 dl_if
->dl_if_permanent_ether
,
7123 dl_if
->dl_if_permanent_ether_is_set
= 1;
7126 /* There should not be anything occupying this slot */
7127 VERIFY(ifindex2ifnet
[ifp
->if_index
] == NULL
);
7129 /* allocate (if needed) and initialize a link address */
7130 ifa
= dlil_alloc_lladdr(ifp
, ll_addr
);
7132 ifnet_lock_done(ifp
);
7138 VERIFY(ifnet_addrs
[ifp
->if_index
- 1] == NULL
);
7139 ifnet_addrs
[ifp
->if_index
- 1] = ifa
;
7141 /* make this address the first on the list */
7143 /* hold a reference for ifnet_addrs[] */
7144 IFA_ADDREF_LOCKED(ifa
);
7145 /* if_attach_link_ifa() holds a reference for ifa_link */
7146 if_attach_link_ifa(ifp
, ifa
);
7149 TAILQ_INSERT_TAIL(&ifnet_head
, ifp
, if_link
);
7150 ifindex2ifnet
[ifp
->if_index
] = ifp
;
7152 /* Hold a reference to the underlying dlil_ifnet */
7153 ifnet_reference(ifp
);
7155 /* Clear stats (save and restore other fields that we care) */
7156 if_data_saved
= ifp
->if_data
;
7157 bzero(&ifp
->if_data
, sizeof(ifp
->if_data
));
7158 ifp
->if_data
.ifi_type
= if_data_saved
.ifi_type
;
7159 ifp
->if_data
.ifi_typelen
= if_data_saved
.ifi_typelen
;
7160 ifp
->if_data
.ifi_physical
= if_data_saved
.ifi_physical
;
7161 ifp
->if_data
.ifi_addrlen
= if_data_saved
.ifi_addrlen
;
7162 ifp
->if_data
.ifi_hdrlen
= if_data_saved
.ifi_hdrlen
;
7163 ifp
->if_data
.ifi_mtu
= if_data_saved
.ifi_mtu
;
7164 ifp
->if_data
.ifi_baudrate
= if_data_saved
.ifi_baudrate
;
7165 ifp
->if_data
.ifi_hwassist
= if_data_saved
.ifi_hwassist
;
7166 ifp
->if_data
.ifi_tso_v4_mtu
= if_data_saved
.ifi_tso_v4_mtu
;
7167 ifp
->if_data
.ifi_tso_v6_mtu
= if_data_saved
.ifi_tso_v6_mtu
;
7168 ifnet_touch_lastchange(ifp
);
7170 VERIFY(ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_NORMAL
||
7171 ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_DRIVER_MANAGED
||
7172 ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_FQ_CODEL
);
7174 /* By default, use SFB and enable flow advisory */
7175 sflags
= PKTSCHEDF_QALG_SFB
;
7177 sflags
|= PKTSCHEDF_QALG_FLOWCTL
;
7180 if (if_delaybased_queue
) {
7181 sflags
|= PKTSCHEDF_QALG_DELAYBASED
;
7184 if (ifp
->if_output_sched_model
==
7185 IFNET_SCHED_MODEL_DRIVER_MANAGED
) {
7186 sflags
|= PKTSCHEDF_QALG_DRIVER_MANAGED
;
7189 /* Initialize transmit queue(s) */
7190 err
= ifclassq_setup(ifp
, sflags
, (dl_if
->dl_if_flags
& DLIF_REUSE
));
7192 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
7193 "err=%d", __func__
, ifp
, err
);
7197 /* Sanity checks on the input thread storage */
7198 dl_inp
= &dl_if
->dl_if_inpstorage
;
7199 bzero(&dl_inp
->dlth_stats
, sizeof(dl_inp
->dlth_stats
));
7200 VERIFY(dl_inp
->dlth_flags
== 0);
7201 VERIFY(dl_inp
->dlth_wtot
== 0);
7202 VERIFY(dl_inp
->dlth_ifp
== NULL
);
7203 VERIFY(qhead(&dl_inp
->dlth_pkts
) == NULL
&& qempty(&dl_inp
->dlth_pkts
));
7204 VERIFY(qlimit(&dl_inp
->dlth_pkts
) == 0);
7205 VERIFY(!dl_inp
->dlth_affinity
);
7206 VERIFY(ifp
->if_inp
== NULL
);
7207 VERIFY(dl_inp
->dlth_thread
== THREAD_NULL
);
7208 VERIFY(dl_inp
->dlth_strategy
== NULL
);
7209 VERIFY(dl_inp
->dlth_driver_thread
== THREAD_NULL
);
7210 VERIFY(dl_inp
->dlth_poller_thread
== THREAD_NULL
);
7211 VERIFY(dl_inp
->dlth_affinity_tag
== 0);
7213 #if IFNET_INPUT_SANITY_CHK
7214 VERIFY(dl_inp
->dlth_pkts_cnt
== 0);
7215 #endif /* IFNET_INPUT_SANITY_CHK */
7217 VERIFY(ifp
->if_poll_thread
== THREAD_NULL
);
7218 dlil_reset_rxpoll_params(ifp
);
7220 * A specific DLIL input thread is created per non-loopback interface.
7222 if (ifp
->if_family
!= IFNET_FAMILY_LOOPBACK
) {
7223 ifp
->if_inp
= dl_inp
;
7224 ifnet_incr_pending_thread_count(ifp
);
7225 err
= dlil_create_input_thread(ifp
, ifp
->if_inp
, &thfunc
);
7226 if (err
== ENODEV
) {
7227 VERIFY(thfunc
== NULL
);
7228 ifnet_decr_pending_thread_count(ifp
);
7229 } else if (err
!= 0) {
7230 panic_plain("%s: ifp=%p couldn't get an input thread; "
7231 "err=%d", __func__
, ifp
, err
);
7236 * If the driver supports the new transmit model, calculate flow hash
7237 * and create a workloop starter thread to invoke the if_start callback
7238 * where the packets may be dequeued and transmitted.
7240 if (ifp
->if_eflags
& IFEF_TXSTART
) {
7241 thread_precedence_policy_data_t info
;
7242 __unused kern_return_t kret
;
7244 ifp
->if_flowhash
= ifnet_calc_flowhash(ifp
);
7245 VERIFY(ifp
->if_flowhash
!= 0);
7246 VERIFY(ifp
->if_start_thread
== THREAD_NULL
);
7248 ifnet_set_start_cycle(ifp
, NULL
);
7249 ifp
->if_start_active
= 0;
7250 ifp
->if_start_req
= 0;
7251 ifp
->if_start_flags
= 0;
7252 VERIFY(ifp
->if_start
!= NULL
);
7253 ifnet_incr_pending_thread_count(ifp
);
7254 if ((err
= kernel_thread_start(ifnet_start_thread_func
,
7255 ifp
, &ifp
->if_start_thread
)) != KERN_SUCCESS
) {
7257 "ifp=%p couldn't get a start thread; "
7258 "err=%d", __func__
, ifp
, err
);
7261 bzero(&info
, sizeof(info
));
7262 info
.importance
= 1;
7263 kret
= thread_policy_set(ifp
->if_start_thread
,
7264 THREAD_PRECEDENCE_POLICY
, (thread_policy_t
)&info
,
7265 THREAD_PRECEDENCE_POLICY_COUNT
);
7266 ASSERT(kret
== KERN_SUCCESS
);
7268 ifp
->if_flowhash
= 0;
7271 /* Reset polling parameters */
7272 ifnet_set_poll_cycle(ifp
, NULL
);
7273 ifp
->if_poll_update
= 0;
7274 ifp
->if_poll_flags
= 0;
7275 ifp
->if_poll_req
= 0;
7276 VERIFY(ifp
->if_poll_thread
== THREAD_NULL
);
7279 * If the driver supports the new receive model, create a poller
7280 * thread to invoke if_input_poll callback where the packets may
7281 * be dequeued from the driver and processed for reception.
7282 * if the interface is netif compat then the poller thread is
7285 if (thfunc
== dlil_rxpoll_input_thread_func
) {
7286 thread_precedence_policy_data_t info
;
7287 __unused kern_return_t kret
;
7288 VERIFY(ifp
->if_input_poll
!= NULL
);
7289 VERIFY(ifp
->if_input_ctl
!= NULL
);
7290 ifnet_incr_pending_thread_count(ifp
);
7291 if ((err
= kernel_thread_start(ifnet_poll_thread_func
, ifp
,
7292 &ifp
->if_poll_thread
)) != KERN_SUCCESS
) {
7293 panic_plain("%s: ifp=%p couldn't get a poll thread; "
7294 "err=%d", __func__
, ifp
, err
);
7297 bzero(&info
, sizeof(info
));
7298 info
.importance
= 1;
7299 kret
= thread_policy_set(ifp
->if_poll_thread
,
7300 THREAD_PRECEDENCE_POLICY
, (thread_policy_t
)&info
,
7301 THREAD_PRECEDENCE_POLICY_COUNT
);
7302 ASSERT(kret
== KERN_SUCCESS
);
7305 VERIFY(ifp
->if_desc
.ifd_maxlen
== IF_DESCSIZE
);
7306 VERIFY(ifp
->if_desc
.ifd_len
== 0);
7307 VERIFY(ifp
->if_desc
.ifd_desc
!= NULL
);
7309 /* Record attach PC stacktrace */
7310 ctrace_record(&((struct dlil_ifnet
*)ifp
)->dl_if_attach
);
7312 ifp
->if_updatemcasts
= 0;
7313 if (!LIST_EMPTY(&ifp
->if_multiaddrs
)) {
7314 struct ifmultiaddr
*ifma
;
7315 LIST_FOREACH(ifma
, &ifp
->if_multiaddrs
, ifma_link
) {
7317 if (ifma
->ifma_addr
->sa_family
== AF_LINK
||
7318 ifma
->ifma_addr
->sa_family
== AF_UNSPEC
) {
7319 ifp
->if_updatemcasts
++;
7324 DLIL_PRINTF("%s: attached with %d suspended link-layer multicast "
7325 "membership(s)\n", if_name(ifp
),
7326 ifp
->if_updatemcasts
);
7329 /* Clear logging parameters */
7330 bzero(&ifp
->if_log
, sizeof(ifp
->if_log
));
7332 /* Clear foreground/realtime activity timestamps */
7333 ifp
->if_fg_sendts
= 0;
7334 ifp
->if_rt_sendts
= 0;
7336 VERIFY(ifp
->if_delegated
.ifp
== NULL
);
7337 VERIFY(ifp
->if_delegated
.type
== 0);
7338 VERIFY(ifp
->if_delegated
.family
== 0);
7339 VERIFY(ifp
->if_delegated
.subfamily
== 0);
7340 VERIFY(ifp
->if_delegated
.expensive
== 0);
7341 VERIFY(ifp
->if_delegated
.constrained
== 0);
7343 VERIFY(ifp
->if_agentids
== NULL
);
7344 VERIFY(ifp
->if_agentcount
== 0);
7346 /* Reset interface state */
7347 bzero(&ifp
->if_interface_state
, sizeof(ifp
->if_interface_state
));
7348 ifp
->if_interface_state
.valid_bitmask
|=
7349 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
7350 ifp
->if_interface_state
.interface_availability
=
7351 IF_INTERFACE_STATE_INTERFACE_AVAILABLE
;
7353 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
7354 if (ifp
== lo_ifp
) {
7355 ifp
->if_interface_state
.lqm_state
= IFNET_LQM_THRESH_GOOD
;
7356 ifp
->if_interface_state
.valid_bitmask
|=
7357 IF_INTERFACE_STATE_LQM_STATE_VALID
;
7359 ifp
->if_interface_state
.lqm_state
= IFNET_LQM_THRESH_UNKNOWN
;
7363 * Enable ECN capability on this interface depending on the
7364 * value of ECN global setting
7366 if (tcp_ecn_outbound
== 2 && !IFNET_IS_CELLULAR(ifp
)) {
7367 if_set_eflags(ifp
, IFEF_ECN_ENABLE
);
7368 if_clear_eflags(ifp
, IFEF_ECN_DISABLE
);
7372 * Built-in Cyclops always on policy for WiFi infra
7374 if (IFNET_IS_WIFI_INFRA(ifp
) && net_qos_policy_wifi_enabled
!= 0) {
7377 error
= if_set_qosmarking_mode(ifp
,
7378 IFRTYPE_QOSMARKING_FASTLANE
);
7380 DLIL_PRINTF("%s if_set_qosmarking_mode(%s) error %d\n",
7381 __func__
, ifp
->if_xname
, error
);
7383 if_set_eflags(ifp
, IFEF_QOSMARKING_ENABLED
);
7384 #if (DEVELOPMENT || DEBUG)
7385 DLIL_PRINTF("%s fastlane enabled on %s\n",
7386 __func__
, ifp
->if_xname
);
7387 #endif /* (DEVELOPMENT || DEBUG) */
7391 ifnet_lock_done(ifp
);
7395 lck_mtx_lock(&ifp
->if_cached_route_lock
);
7396 /* Enable forwarding cached route */
7397 ifp
->if_fwd_cacheok
= 1;
7398 /* Clean up any existing cached routes */
7399 ROUTE_RELEASE(&ifp
->if_fwd_route
);
7400 bzero(&ifp
->if_fwd_route
, sizeof(ifp
->if_fwd_route
));
7401 ROUTE_RELEASE(&ifp
->if_src_route
);
7402 bzero(&ifp
->if_src_route
, sizeof(ifp
->if_src_route
));
7403 ROUTE_RELEASE(&ifp
->if_src_route6
);
7404 bzero(&ifp
->if_src_route6
, sizeof(ifp
->if_src_route6
));
7405 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
7407 ifnet_llreach_ifattach(ifp
, (dl_if
->dl_if_flags
& DLIF_REUSE
));
7410 * Allocate and attach IGMPv3/MLDv2 interface specific variables
7411 * and trees; do this before the ifnet is marked as attached.
7412 * The ifnet keeps the reference to the info structures even after
7413 * the ifnet is detached, since the network-layer records still
7414 * refer to the info structures even after that. This also
7415 * makes it possible for them to still function after the ifnet
7416 * is recycled or reattached.
7419 if (IGMP_IFINFO(ifp
) == NULL
) {
7420 IGMP_IFINFO(ifp
) = igmp_domifattach(ifp
, Z_WAITOK
);
7421 VERIFY(IGMP_IFINFO(ifp
) != NULL
);
7423 VERIFY(IGMP_IFINFO(ifp
)->igi_ifp
== ifp
);
7424 igmp_domifreattach(IGMP_IFINFO(ifp
));
7427 if (MLD_IFINFO(ifp
) == NULL
) {
7428 MLD_IFINFO(ifp
) = mld_domifattach(ifp
, Z_WAITOK
);
7429 VERIFY(MLD_IFINFO(ifp
) != NULL
);
7431 VERIFY(MLD_IFINFO(ifp
)->mli_ifp
== ifp
);
7432 mld_domifreattach(MLD_IFINFO(ifp
));
7435 VERIFY(ifp
->if_data_threshold
== 0);
7436 VERIFY(ifp
->if_dt_tcall
!= NULL
);
7439 * Wait for the created kernel threads for I/O to get
7440 * scheduled and run at least once before we proceed
7441 * to mark interface as attached.
7443 lck_mtx_lock(&ifp
->if_ref_lock
);
7444 while (ifp
->if_threads_pending
!= 0) {
7445 DLIL_PRINTF("%s: Waiting for all kernel threads created for "
7446 "interface %s to get scheduled at least once.\n",
7447 __func__
, ifp
->if_xname
);
7448 (void) msleep(&ifp
->if_threads_pending
, &ifp
->if_ref_lock
, (PZERO
- 1),
7450 LCK_MTX_ASSERT(&ifp
->if_ref_lock
, LCK_ASSERT_OWNED
);
7452 lck_mtx_unlock(&ifp
->if_ref_lock
);
7453 DLIL_PRINTF("%s: All kernel threads created for interface %s have been scheduled "
7454 "at least once. Proceeding.\n", __func__
, ifp
->if_xname
);
7456 /* Final mark this ifnet as attached. */
7457 lck_mtx_lock(rnh_lock
);
7458 ifnet_lock_exclusive(ifp
);
7459 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
7460 ifp
->if_refflags
= (IFRF_ATTACHED
| IFRF_READY
); /* clears embryonic */
7461 lck_mtx_unlock(&ifp
->if_ref_lock
);
7463 /* boot-args override; enable idle notification */
7464 (void) ifnet_set_idle_flags_locked(ifp
, IFRF_IDLE_NOTIFY
,
7467 /* apply previous request(s) to set the idle flags, if any */
7468 (void) ifnet_set_idle_flags_locked(ifp
, ifp
->if_idle_new_flags
,
7469 ifp
->if_idle_new_flags_mask
);
7471 ifnet_lock_done(ifp
);
7472 lck_mtx_unlock(rnh_lock
);
7477 * Attach packet filter to this interface, if enabled.
7479 pf_ifnet_hook(ifp
, 1);
7482 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_ATTACHED
, NULL
, 0);
7485 DLIL_PRINTF("%s: attached%s\n", if_name(ifp
),
7486 (dl_if
->dl_if_flags
& DLIF_REUSE
) ? " (recycled)" : "");
7493 * Prepare the storage for the first/permanent link address, which must
7494 * must have the same lifetime as the ifnet itself. Although the link
7495 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
7496 * its location in memory must never change as it may still be referred
7497 * to by some parts of the system afterwards (unfortunate implementation
7498 * artifacts inherited from BSD.)
7500 * Caller must hold ifnet lock as writer.
7502 static struct ifaddr
*
7503 dlil_alloc_lladdr(struct ifnet
*ifp
, const struct sockaddr_dl
*ll_addr
)
7505 struct ifaddr
*ifa
, *oifa
;
7506 struct sockaddr_dl
*asdl
, *msdl
;
7507 char workbuf
[IFNAMSIZ
* 2];
7508 int namelen
, masklen
, socksize
;
7509 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
7511 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_EXCLUSIVE
);
7512 VERIFY(ll_addr
== NULL
|| ll_addr
->sdl_alen
== ifp
->if_addrlen
);
7514 namelen
= scnprintf(workbuf
, sizeof(workbuf
), "%s",
7516 masklen
= offsetof(struct sockaddr_dl
, sdl_data
[0])
7517 + ((namelen
> 0) ? namelen
: 0);
7518 socksize
= masklen
+ ifp
->if_addrlen
;
7519 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
7520 if ((u_int32_t
)socksize
< sizeof(struct sockaddr_dl
)) {
7521 socksize
= sizeof(struct sockaddr_dl
);
7523 socksize
= ROUNDUP(socksize
);
7526 ifa
= ifp
->if_lladdr
;
7527 if (socksize
> DLIL_SDLMAXLEN
||
7528 (ifa
!= NULL
&& ifa
!= &dl_if
->dl_if_lladdr
.ifa
)) {
7530 * Rare, but in the event that the link address requires
7531 * more storage space than DLIL_SDLMAXLEN, allocate the
7532 * largest possible storages for address and mask, such
7533 * that we can reuse the same space when if_addrlen grows.
7534 * This same space will be used when if_addrlen shrinks.
7536 if (ifa
== NULL
|| ifa
== &dl_if
->dl_if_lladdr
.ifa
) {
7537 int ifasize
= sizeof(*ifa
) + 2 * SOCK_MAXADDRLEN
;
7538 ifa
= _MALLOC(ifasize
, M_IFADDR
, M_WAITOK
| M_ZERO
);
7543 /* Don't set IFD_ALLOC, as this is permanent */
7544 ifa
->ifa_debug
= IFD_LINK
;
7547 /* address and mask sockaddr_dl locations */
7548 asdl
= (struct sockaddr_dl
*)(ifa
+ 1);
7549 bzero(asdl
, SOCK_MAXADDRLEN
);
7550 msdl
= (struct sockaddr_dl
*)(void *)
7551 ((char *)asdl
+ SOCK_MAXADDRLEN
);
7552 bzero(msdl
, SOCK_MAXADDRLEN
);
7554 VERIFY(ifa
== NULL
|| ifa
== &dl_if
->dl_if_lladdr
.ifa
);
7556 * Use the storage areas for address and mask within the
7557 * dlil_ifnet structure. This is the most common case.
7560 ifa
= &dl_if
->dl_if_lladdr
.ifa
;
7562 /* Don't set IFD_ALLOC, as this is permanent */
7563 ifa
->ifa_debug
= IFD_LINK
;
7566 /* address and mask sockaddr_dl locations */
7567 asdl
= (struct sockaddr_dl
*)(void *)&dl_if
->dl_if_lladdr
.asdl
;
7568 bzero(asdl
, sizeof(dl_if
->dl_if_lladdr
.asdl
));
7569 msdl
= (struct sockaddr_dl
*)(void *)&dl_if
->dl_if_lladdr
.msdl
;
7570 bzero(msdl
, sizeof(dl_if
->dl_if_lladdr
.msdl
));
7573 /* hold a permanent reference for the ifnet itself */
7574 IFA_ADDREF_LOCKED(ifa
);
7575 oifa
= ifp
->if_lladdr
;
7576 ifp
->if_lladdr
= ifa
;
7578 VERIFY(ifa
->ifa_debug
== IFD_LINK
);
7580 ifa
->ifa_rtrequest
= link_rtrequest
;
7581 ifa
->ifa_addr
= (struct sockaddr
*)asdl
;
7582 asdl
->sdl_len
= (u_char
)socksize
;
7583 asdl
->sdl_family
= AF_LINK
;
7585 bcopy(workbuf
, asdl
->sdl_data
, min(namelen
,
7586 sizeof(asdl
->sdl_data
)));
7587 asdl
->sdl_nlen
= (u_char
)namelen
;
7591 asdl
->sdl_index
= ifp
->if_index
;
7592 asdl
->sdl_type
= ifp
->if_type
;
7593 if (ll_addr
!= NULL
) {
7594 asdl
->sdl_alen
= ll_addr
->sdl_alen
;
7595 bcopy(CONST_LLADDR(ll_addr
), LLADDR(asdl
), asdl
->sdl_alen
);
7599 ifa
->ifa_netmask
= (struct sockaddr
*)msdl
;
7600 msdl
->sdl_len
= (u_char
)masklen
;
7601 while (namelen
> 0) {
7602 msdl
->sdl_data
[--namelen
] = 0xff;
7614 if_purgeaddrs(struct ifnet
*ifp
)
7619 in6_purgeaddrs(ifp
);
7623 ifnet_detach(ifnet_t ifp
)
7625 struct ifnet
*delegated_ifp
;
7626 struct nd_ifinfo
*ndi
= NULL
;
7632 ndi
= ND_IFINFO(ifp
);
7634 ndi
->cga_initialized
= FALSE
;
7637 lck_mtx_lock(rnh_lock
);
7638 ifnet_head_lock_exclusive();
7639 ifnet_lock_exclusive(ifp
);
7641 if (ifp
->if_output_netem
!= NULL
) {
7642 netem_destroy(ifp
->if_output_netem
);
7643 ifp
->if_output_netem
= NULL
;
7647 * Check to see if this interface has previously triggered
7648 * aggressive protocol draining; if so, decrement the global
7649 * refcnt and clear PR_AGGDRAIN on the route domain if
7650 * there are no more of such an interface around.
7652 (void) ifnet_set_idle_flags_locked(ifp
, 0, ~0);
7654 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
7655 if (!(ifp
->if_refflags
& IFRF_ATTACHED
)) {
7656 lck_mtx_unlock(&ifp
->if_ref_lock
);
7657 ifnet_lock_done(ifp
);
7659 lck_mtx_unlock(rnh_lock
);
7661 } else if (ifp
->if_refflags
& IFRF_DETACHING
) {
7662 /* Interface has already been detached */
7663 lck_mtx_unlock(&ifp
->if_ref_lock
);
7664 ifnet_lock_done(ifp
);
7666 lck_mtx_unlock(rnh_lock
);
7669 VERIFY(!(ifp
->if_refflags
& IFRF_EMBRYONIC
));
7670 /* Indicate this interface is being detached */
7671 ifp
->if_refflags
&= ~IFRF_ATTACHED
;
7672 ifp
->if_refflags
|= IFRF_DETACHING
;
7673 lck_mtx_unlock(&ifp
->if_ref_lock
);
7676 DLIL_PRINTF("%s: detaching\n", if_name(ifp
));
7679 /* clean up flow control entry object if there's any */
7680 if (ifp
->if_eflags
& IFEF_TXSTART
) {
7681 ifnet_flowadv(ifp
->if_flowhash
);
7684 /* Reset ECN enable/disable flags */
7685 /* Reset CLAT46 flag */
7686 if_clear_eflags(ifp
, IFEF_ECN_ENABLE
| IFEF_ECN_DISABLE
| IFEF_CLAT46
);
7689 * We do not reset the TCP keep alive counters in case
7690 * a TCP connection stays connection after the interface
7693 if (ifp
->if_tcp_kao_cnt
> 0) {
7694 os_log(OS_LOG_DEFAULT
, "%s %s tcp_kao_cnt %u not zero",
7695 __func__
, if_name(ifp
), ifp
->if_tcp_kao_cnt
);
7697 ifp
->if_tcp_kao_max
= 0;
7700 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
7701 * no longer be visible during lookups from this point.
7703 VERIFY(ifindex2ifnet
[ifp
->if_index
] == ifp
);
7704 TAILQ_REMOVE(&ifnet_head
, ifp
, if_link
);
7705 ifp
->if_link
.tqe_next
= NULL
;
7706 ifp
->if_link
.tqe_prev
= NULL
;
7707 if (ifp
->if_ordered_link
.tqe_next
!= NULL
||
7708 ifp
->if_ordered_link
.tqe_prev
!= NULL
) {
7709 ifnet_remove_from_ordered_list(ifp
);
7711 ifindex2ifnet
[ifp
->if_index
] = NULL
;
7713 /* 18717626 - reset router mode */
7714 if_clear_eflags(ifp
, IFEF_IPV4_ROUTER
);
7715 ifp
->if_ipv6_router_mode
= IPV6_ROUTER_MODE_DISABLED
;
7717 /* Record detach PC stacktrace */
7718 ctrace_record(&((struct dlil_ifnet
*)ifp
)->dl_if_detach
);
7720 /* Clear logging parameters */
7721 bzero(&ifp
->if_log
, sizeof(ifp
->if_log
));
7723 /* Clear delegated interface info (reference released below) */
7724 delegated_ifp
= ifp
->if_delegated
.ifp
;
7725 bzero(&ifp
->if_delegated
, sizeof(ifp
->if_delegated
));
7727 /* Reset interface state */
7728 bzero(&ifp
->if_interface_state
, sizeof(ifp
->if_interface_state
));
7730 ifnet_lock_done(ifp
);
7732 lck_mtx_unlock(rnh_lock
);
7735 /* Release reference held on the delegated interface */
7736 if (delegated_ifp
!= NULL
) {
7737 ifnet_release(delegated_ifp
);
7740 /* Reset Link Quality Metric (unless loopback [lo0]) */
7741 if (ifp
!= lo_ifp
) {
7742 if_lqm_update(ifp
, IFNET_LQM_THRESH_OFF
, 0);
7745 /* Reset TCP local statistics */
7746 if (ifp
->if_tcp_stat
!= NULL
) {
7747 bzero(ifp
->if_tcp_stat
, sizeof(*ifp
->if_tcp_stat
));
7750 /* Reset UDP local statistics */
7751 if (ifp
->if_udp_stat
!= NULL
) {
7752 bzero(ifp
->if_udp_stat
, sizeof(*ifp
->if_udp_stat
));
7755 /* Reset ifnet IPv4 stats */
7756 if (ifp
->if_ipv4_stat
!= NULL
) {
7757 bzero(ifp
->if_ipv4_stat
, sizeof(*ifp
->if_ipv4_stat
));
7760 /* Reset ifnet IPv6 stats */
7761 if (ifp
->if_ipv6_stat
!= NULL
) {
7762 bzero(ifp
->if_ipv6_stat
, sizeof(*ifp
->if_ipv6_stat
));
7765 /* Release memory held for interface link status report */
7766 if (ifp
->if_link_status
!= NULL
) {
7767 FREE(ifp
->if_link_status
, M_TEMP
);
7768 ifp
->if_link_status
= NULL
;
7771 /* Clear agent IDs */
7772 if (ifp
->if_agentids
!= NULL
) {
7773 FREE(ifp
->if_agentids
, M_NETAGENT
);
7774 ifp
->if_agentids
= NULL
;
7776 ifp
->if_agentcount
= 0;
7779 /* Let BPF know we're detaching */
7782 /* Mark the interface as DOWN */
7785 /* Disable forwarding cached route */
7786 lck_mtx_lock(&ifp
->if_cached_route_lock
);
7787 ifp
->if_fwd_cacheok
= 0;
7788 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
7790 /* Disable data threshold and wait for any pending event posting */
7791 ifp
->if_data_threshold
= 0;
7792 VERIFY(ifp
->if_dt_tcall
!= NULL
);
7793 (void) thread_call_cancel_wait(ifp
->if_dt_tcall
);
7796 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
7797 * references to the info structures and leave them attached to
7801 igmp_domifdetach(ifp
);
7803 mld_domifdetach(ifp
);
7805 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHING
, NULL
, 0);
7807 /* Let worker thread take care of the rest, to avoid reentrancy */
7809 ifnet_detaching_enqueue(ifp
);
7816 ifnet_detaching_enqueue(struct ifnet
*ifp
)
7818 dlil_if_lock_assert();
7820 ++ifnet_detaching_cnt
;
7821 VERIFY(ifnet_detaching_cnt
!= 0);
7822 TAILQ_INSERT_TAIL(&ifnet_detaching_head
, ifp
, if_detaching_link
);
7823 wakeup((caddr_t
)&ifnet_delayed_run
);
7826 static struct ifnet
*
7827 ifnet_detaching_dequeue(void)
7831 dlil_if_lock_assert();
7833 ifp
= TAILQ_FIRST(&ifnet_detaching_head
);
7834 VERIFY(ifnet_detaching_cnt
!= 0 || ifp
== NULL
);
7836 VERIFY(ifnet_detaching_cnt
!= 0);
7837 --ifnet_detaching_cnt
;
7838 TAILQ_REMOVE(&ifnet_detaching_head
, ifp
, if_detaching_link
);
7839 ifp
->if_detaching_link
.tqe_next
= NULL
;
7840 ifp
->if_detaching_link
.tqe_prev
= NULL
;
7845 __attribute__((noreturn
))
7847 ifnet_detacher_thread_cont(void *v
, wait_result_t wres
)
7849 #pragma unused(v, wres)
7853 if (__improbable(ifnet_detaching_embryonic
)) {
7854 ifnet_detaching_embryonic
= FALSE
;
7855 /* there's no lock ordering constrain so OK to do this here */
7856 dlil_decr_pending_thread_count();
7860 dlil_if_lock_assert();
7862 if (ifnet_detaching_cnt
== 0) {
7866 net_update_uptime();
7868 VERIFY(TAILQ_FIRST(&ifnet_detaching_head
) != NULL
);
7870 /* Take care of detaching ifnet */
7871 ifp
= ifnet_detaching_dequeue();
7874 ifnet_detach_final(ifp
);
7879 (void) assert_wait(&ifnet_delayed_run
, THREAD_UNINT
);
7881 (void) thread_block(ifnet_detacher_thread_cont
);
7883 VERIFY(0); /* we should never get here */
7885 __builtin_unreachable();
7890 ifnet_detacher_thread_func(void *v
, wait_result_t w
)
7892 #pragma unused(v, w)
7894 (void) assert_wait(&ifnet_delayed_run
, THREAD_UNINT
);
7895 ifnet_detaching_embryonic
= TRUE
;
7896 /* wake up once to get out of embryonic state */
7897 wakeup((caddr_t
)&ifnet_delayed_run
);
7899 (void) thread_block(ifnet_detacher_thread_cont
);
7902 __builtin_unreachable();
7906 ifnet_detach_final(struct ifnet
*ifp
)
7908 struct ifnet_filter
*filter
, *filter_next
;
7909 struct ifnet_filter_head fhead
;
7910 struct dlil_threading_info
*inp
;
7912 ifnet_detached_func if_free
;
7915 lck_mtx_lock(&ifp
->if_ref_lock
);
7916 if (!(ifp
->if_refflags
& IFRF_DETACHING
)) {
7917 panic("%s: flags mismatch (detaching not set) ifp=%p",
7923 * Wait until the existing IO references get released
7924 * before we proceed with ifnet_detach. This is not a
7925 * common case, so block without using a continuation.
7927 while (ifp
->if_refio
> 0) {
7928 DLIL_PRINTF("%s: Waiting for IO references on %s interface "
7929 "to be released\n", __func__
, if_name(ifp
));
7930 (void) msleep(&(ifp
->if_refio
), &ifp
->if_ref_lock
,
7931 (PZERO
- 1), "ifnet_ioref_wait", NULL
);
7934 VERIFY(ifp
->if_datamov
== 0);
7935 VERIFY(ifp
->if_drainers
== 0);
7936 VERIFY(ifp
->if_suspend
== 0);
7937 ifp
->if_refflags
&= ~IFRF_READY
;
7938 lck_mtx_unlock(&ifp
->if_ref_lock
);
7940 /* Drain and destroy send queue */
7941 ifclassq_teardown(ifp
);
7943 /* Detach interface filters */
7944 lck_mtx_lock(&ifp
->if_flt_lock
);
7945 if_flt_monitor_enter(ifp
);
7947 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
7948 fhead
= ifp
->if_flt_head
;
7949 TAILQ_INIT(&ifp
->if_flt_head
);
7951 for (filter
= TAILQ_FIRST(&fhead
); filter
; filter
= filter_next
) {
7952 filter_next
= TAILQ_NEXT(filter
, filt_next
);
7953 lck_mtx_unlock(&ifp
->if_flt_lock
);
7955 dlil_detach_filter_internal(filter
, 1);
7956 lck_mtx_lock(&ifp
->if_flt_lock
);
7958 if_flt_monitor_leave(ifp
);
7959 lck_mtx_unlock(&ifp
->if_flt_lock
);
7961 /* Tell upper layers to drop their network addresses */
7964 ifnet_lock_exclusive(ifp
);
7966 /* Unplumb all protocols */
7967 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
7968 struct if_proto
*proto
;
7970 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
7971 while (proto
!= NULL
) {
7972 protocol_family_t family
= proto
->protocol_family
;
7973 ifnet_lock_done(ifp
);
7974 proto_unplumb(family
, ifp
);
7975 ifnet_lock_exclusive(ifp
);
7976 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
7978 /* There should not be any protocols left */
7979 VERIFY(SLIST_EMPTY(&ifp
->if_proto_hash
[i
]));
7981 zfree(dlif_phash_zone
, ifp
->if_proto_hash
);
7982 ifp
->if_proto_hash
= NULL
;
7984 /* Detach (permanent) link address from if_addrhead */
7985 ifa
= TAILQ_FIRST(&ifp
->if_addrhead
);
7986 VERIFY(ifnet_addrs
[ifp
->if_index
- 1] == ifa
);
7988 if_detach_link_ifa(ifp
, ifa
);
7991 /* Remove (permanent) link address from ifnet_addrs[] */
7993 ifnet_addrs
[ifp
->if_index
- 1] = NULL
;
7995 /* This interface should not be on {ifnet_head,detaching} */
7996 VERIFY(ifp
->if_link
.tqe_next
== NULL
);
7997 VERIFY(ifp
->if_link
.tqe_prev
== NULL
);
7998 VERIFY(ifp
->if_detaching_link
.tqe_next
== NULL
);
7999 VERIFY(ifp
->if_detaching_link
.tqe_prev
== NULL
);
8000 VERIFY(ifp
->if_ordered_link
.tqe_next
== NULL
);
8001 VERIFY(ifp
->if_ordered_link
.tqe_prev
== NULL
);
8003 /* The slot should have been emptied */
8004 VERIFY(ifindex2ifnet
[ifp
->if_index
] == NULL
);
8006 /* There should not be any addresses left */
8007 VERIFY(TAILQ_EMPTY(&ifp
->if_addrhead
));
8010 * Signal the starter thread to terminate itself.
8012 if (ifp
->if_start_thread
!= THREAD_NULL
) {
8013 lck_mtx_lock_spin(&ifp
->if_start_lock
);
8014 ifp
->if_start_flags
= 0;
8015 ifp
->if_start_thread
= THREAD_NULL
;
8016 wakeup_one((caddr_t
)&ifp
->if_start_thread
);
8017 lck_mtx_unlock(&ifp
->if_start_lock
);
8021 * Signal the poller thread to terminate itself.
8023 if (ifp
->if_poll_thread
!= THREAD_NULL
) {
8024 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
8025 ifp
->if_poll_thread
= THREAD_NULL
;
8026 wakeup_one((caddr_t
)&ifp
->if_poll_thread
);
8027 lck_mtx_unlock(&ifp
->if_poll_lock
);
8031 * If thread affinity was set for the workloop thread, we will need
8032 * to tear down the affinity and release the extra reference count
8033 * taken at attach time. Does not apply to lo0 or other interfaces
8034 * without dedicated input threads.
8036 if ((inp
= ifp
->if_inp
) != NULL
) {
8037 VERIFY(inp
!= dlil_main_input_thread
);
8039 if (inp
->dlth_affinity
) {
8040 struct thread
*tp
, *wtp
, *ptp
;
8042 lck_mtx_lock_spin(&inp
->dlth_lock
);
8043 wtp
= inp
->dlth_driver_thread
;
8044 inp
->dlth_driver_thread
= THREAD_NULL
;
8045 ptp
= inp
->dlth_poller_thread
;
8046 inp
->dlth_poller_thread
= THREAD_NULL
;
8047 ASSERT(inp
->dlth_thread
!= THREAD_NULL
);
8048 tp
= inp
->dlth_thread
; /* don't nullify now */
8049 inp
->dlth_affinity_tag
= 0;
8050 inp
->dlth_affinity
= FALSE
;
8051 lck_mtx_unlock(&inp
->dlth_lock
);
8053 /* Tear down poll thread affinity */
8055 VERIFY(ifp
->if_eflags
& IFEF_RXPOLL
);
8056 VERIFY(ifp
->if_xflags
& IFXF_LEGACY
);
8057 (void) dlil_affinity_set(ptp
,
8058 THREAD_AFFINITY_TAG_NULL
);
8059 thread_deallocate(ptp
);
8062 /* Tear down workloop thread affinity */
8064 (void) dlil_affinity_set(wtp
,
8065 THREAD_AFFINITY_TAG_NULL
);
8066 thread_deallocate(wtp
);
8069 /* Tear down DLIL input thread affinity */
8070 (void) dlil_affinity_set(tp
, THREAD_AFFINITY_TAG_NULL
);
8071 thread_deallocate(tp
);
8074 /* disassociate ifp DLIL input thread */
8077 /* if the worker thread was created, tell it to terminate */
8078 if (inp
->dlth_thread
!= THREAD_NULL
) {
8079 lck_mtx_lock_spin(&inp
->dlth_lock
);
8080 inp
->dlth_flags
|= DLIL_INPUT_TERMINATE
;
8081 if (!(inp
->dlth_flags
& DLIL_INPUT_RUNNING
)) {
8082 wakeup_one((caddr_t
)&inp
->dlth_flags
);
8084 lck_mtx_unlock(&inp
->dlth_lock
);
8085 ifnet_lock_done(ifp
);
8087 /* wait for the input thread to terminate */
8088 lck_mtx_lock_spin(&inp
->dlth_lock
);
8089 while ((inp
->dlth_flags
& DLIL_INPUT_TERMINATE_COMPLETE
)
8091 (void) msleep(&inp
->dlth_flags
, &inp
->dlth_lock
,
8092 (PZERO
- 1) | PSPIN
, inp
->dlth_name
, NULL
);
8094 lck_mtx_unlock(&inp
->dlth_lock
);
8095 ifnet_lock_exclusive(ifp
);
8098 /* clean-up input thread state */
8099 dlil_clean_threading_info(inp
);
8100 /* clean-up poll parameters */
8101 VERIFY(ifp
->if_poll_thread
== THREAD_NULL
);
8102 dlil_reset_rxpoll_params(ifp
);
8105 /* The driver might unload, so point these to ourselves */
8106 if_free
= ifp
->if_free
;
8107 ifp
->if_output_dlil
= ifp_if_output
;
8108 ifp
->if_output
= ifp_if_output
;
8109 ifp
->if_pre_enqueue
= ifp_if_output
;
8110 ifp
->if_start
= ifp_if_start
;
8111 ifp
->if_output_ctl
= ifp_if_ctl
;
8112 ifp
->if_input_dlil
= ifp_if_input
;
8113 ifp
->if_input_poll
= ifp_if_input_poll
;
8114 ifp
->if_input_ctl
= ifp_if_ctl
;
8115 ifp
->if_ioctl
= ifp_if_ioctl
;
8116 ifp
->if_set_bpf_tap
= ifp_if_set_bpf_tap
;
8117 ifp
->if_free
= ifp_if_free
;
8118 ifp
->if_demux
= ifp_if_demux
;
8119 ifp
->if_event
= ifp_if_event
;
8120 ifp
->if_framer_legacy
= ifp_if_framer
;
8121 ifp
->if_framer
= ifp_if_framer_extended
;
8122 ifp
->if_add_proto
= ifp_if_add_proto
;
8123 ifp
->if_del_proto
= ifp_if_del_proto
;
8124 ifp
->if_check_multi
= ifp_if_check_multi
;
8126 /* wipe out interface description */
8127 VERIFY(ifp
->if_desc
.ifd_maxlen
== IF_DESCSIZE
);
8128 ifp
->if_desc
.ifd_len
= 0;
8129 VERIFY(ifp
->if_desc
.ifd_desc
!= NULL
);
8130 bzero(ifp
->if_desc
.ifd_desc
, IF_DESCSIZE
);
8132 /* there shouldn't be any delegation by now */
8133 VERIFY(ifp
->if_delegated
.ifp
== NULL
);
8134 VERIFY(ifp
->if_delegated
.type
== 0);
8135 VERIFY(ifp
->if_delegated
.family
== 0);
8136 VERIFY(ifp
->if_delegated
.subfamily
== 0);
8137 VERIFY(ifp
->if_delegated
.expensive
== 0);
8138 VERIFY(ifp
->if_delegated
.constrained
== 0);
8140 /* QoS marking get cleared */
8141 if_clear_eflags(ifp
, IFEF_QOSMARKING_ENABLED
);
8142 if_set_qosmarking_mode(ifp
, IFRTYPE_QOSMARKING_MODE_NONE
);
8145 ifnet_lock_done(ifp
);
8149 * Detach this interface from packet filter, if enabled.
8151 pf_ifnet_hook(ifp
, 0);
8154 /* Filter list should be empty */
8155 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
8156 VERIFY(TAILQ_EMPTY(&ifp
->if_flt_head
));
8157 VERIFY(ifp
->if_flt_busy
== 0);
8158 VERIFY(ifp
->if_flt_waiters
== 0);
8159 lck_mtx_unlock(&ifp
->if_flt_lock
);
8161 /* Last chance to drain send queue */
8164 /* Last chance to cleanup any cached route */
8165 lck_mtx_lock(&ifp
->if_cached_route_lock
);
8166 VERIFY(!ifp
->if_fwd_cacheok
);
8167 ROUTE_RELEASE(&ifp
->if_fwd_route
);
8168 bzero(&ifp
->if_fwd_route
, sizeof(ifp
->if_fwd_route
));
8169 ROUTE_RELEASE(&ifp
->if_src_route
);
8170 bzero(&ifp
->if_src_route
, sizeof(ifp
->if_src_route
));
8171 ROUTE_RELEASE(&ifp
->if_src_route6
);
8172 bzero(&ifp
->if_src_route6
, sizeof(ifp
->if_src_route6
));
8173 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
8175 VERIFY(ifp
->if_data_threshold
== 0);
8176 VERIFY(ifp
->if_dt_tcall
!= NULL
);
8177 VERIFY(!thread_call_isactive(ifp
->if_dt_tcall
));
8179 ifnet_llreach_ifdetach(ifp
);
8181 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHED
, NULL
, 0);
8184 * Finally, mark this ifnet as detached.
8186 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
8187 if (!(ifp
->if_refflags
& IFRF_DETACHING
)) {
8188 panic("%s: flags mismatch (detaching not set) ifp=%p",
8192 ifp
->if_refflags
&= ~IFRF_DETACHING
;
8193 lck_mtx_unlock(&ifp
->if_ref_lock
);
8194 if (if_free
!= NULL
) {
8199 DLIL_PRINTF("%s: detached\n", if_name(ifp
));
8202 /* Release reference held during ifnet attach */
8207 ifp_if_output(struct ifnet
*ifp
, struct mbuf
*m
)
8215 ifp_if_start(struct ifnet
*ifp
)
8221 ifp_if_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
8222 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
8223 boolean_t poll
, struct thread
*tp
)
8225 #pragma unused(ifp, m_tail, s, poll, tp)
8226 m_freem_list(m_head
);
8231 ifp_if_input_poll(struct ifnet
*ifp
, u_int32_t flags
, u_int32_t max_cnt
,
8232 struct mbuf
**m_head
, struct mbuf
**m_tail
, u_int32_t
*cnt
, u_int32_t
*len
)
8234 #pragma unused(ifp, flags, max_cnt)
8235 if (m_head
!= NULL
) {
8238 if (m_tail
!= NULL
) {
8250 ifp_if_ctl(struct ifnet
*ifp
, ifnet_ctl_cmd_t cmd
, u_int32_t arglen
, void *arg
)
8252 #pragma unused(ifp, cmd, arglen, arg)
8257 ifp_if_demux(struct ifnet
*ifp
, struct mbuf
*m
, char *fh
, protocol_family_t
*pf
)
8259 #pragma unused(ifp, fh, pf)
8265 ifp_if_add_proto(struct ifnet
*ifp
, protocol_family_t pf
,
8266 const struct ifnet_demux_desc
*da
, u_int32_t dc
)
8268 #pragma unused(ifp, pf, da, dc)
8273 ifp_if_del_proto(struct ifnet
*ifp
, protocol_family_t pf
)
8275 #pragma unused(ifp, pf)
8280 ifp_if_check_multi(struct ifnet
*ifp
, const struct sockaddr
*sa
)
8282 #pragma unused(ifp, sa)
8286 #if !XNU_TARGET_OS_OSX
8288 ifp_if_framer(struct ifnet
*ifp
, struct mbuf
**m
,
8289 const struct sockaddr
*sa
, const char *ll
, const char *t
,
8290 u_int32_t
*pre
, u_int32_t
*post
)
8291 #else /* XNU_TARGET_OS_OSX */
8293 ifp_if_framer(struct ifnet
*ifp
, struct mbuf
**m
,
8294 const struct sockaddr
*sa
, const char *ll
, const char *t
)
8295 #endif /* XNU_TARGET_OS_OSX */
8297 #pragma unused(ifp, m, sa, ll, t)
8298 #if !XNU_TARGET_OS_OSX
8299 return ifp_if_framer_extended(ifp
, m
, sa
, ll
, t
, pre
, post
);
8300 #else /* XNU_TARGET_OS_OSX */
8301 return ifp_if_framer_extended(ifp
, m
, sa
, ll
, t
, NULL
, NULL
);
8302 #endif /* XNU_TARGET_OS_OSX */
8306 ifp_if_framer_extended(struct ifnet
*ifp
, struct mbuf
**m
,
8307 const struct sockaddr
*sa
, const char *ll
, const char *t
,
8308 u_int32_t
*pre
, u_int32_t
*post
)
8310 #pragma unused(ifp, sa, ll, t)
8325 ifp_if_ioctl(struct ifnet
*ifp
, unsigned long cmd
, void *arg
)
8327 #pragma unused(ifp, cmd, arg)
8332 ifp_if_set_bpf_tap(struct ifnet
*ifp
, bpf_tap_mode tm
, bpf_packet_func f
)
8334 #pragma unused(ifp, tm, f)
8335 /* XXX not sure what to do here */
8340 ifp_if_free(struct ifnet
*ifp
)
8346 ifp_if_event(struct ifnet
*ifp
, const struct kev_msg
*e
)
8348 #pragma unused(ifp, e)
8352 dlil_if_acquire(u_int32_t family
, const void *uniqueid
,
8353 size_t uniqueid_len
, const char *ifxname
, struct ifnet
**ifp
)
8355 struct ifnet
*ifp1
= NULL
;
8356 struct dlil_ifnet
*dlifp1
= NULL
;
8357 struct dlil_ifnet
*dlifp1_saved
= NULL
;
8358 void *buf
, *base
, **pbuf
;
8361 VERIFY(*ifp
== NULL
);
8364 * We absolutely can't have an interface with the same name
8366 * To make sure of that list has to be traversed completely
8368 TAILQ_FOREACH(dlifp1
, &dlil_ifnet_head
, dl_if_link
) {
8369 ifp1
= (struct ifnet
*)dlifp1
;
8371 if (ifp1
->if_family
!= family
) {
8376 * If interface is in use, return EBUSY if either unique id
8377 * or interface extended names are the same
8379 lck_mtx_lock(&dlifp1
->dl_if_lock
);
8380 if (strncmp(ifxname
, ifp1
->if_xname
, IFXNAMSIZ
) == 0) {
8381 if (dlifp1
->dl_if_flags
& DLIF_INUSE
) {
8382 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
8389 if (uniqueid_len
== dlifp1
->dl_if_uniqueid_len
&&
8390 bcmp(uniqueid
, dlifp1
->dl_if_uniqueid
, uniqueid_len
) == 0) {
8391 if (dlifp1
->dl_if_flags
& DLIF_INUSE
) {
8392 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
8396 /* Cache the first interface that can be recycled */
8399 dlifp1_saved
= dlifp1
;
8402 * XXX Do not break or jump to end as we have to traverse
8403 * the whole list to ensure there are no name collisions
8408 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
8411 /* If there's an interface that can be recycled, use that */
8413 if (dlifp1_saved
!= NULL
) {
8414 lck_mtx_lock(&dlifp1_saved
->dl_if_lock
);
8415 dlifp1_saved
->dl_if_flags
|= (DLIF_INUSE
| DLIF_REUSE
);
8416 lck_mtx_unlock(&dlifp1_saved
->dl_if_lock
);
8417 dlifp1_saved
= NULL
;
8422 /* no interface found, allocate a new one */
8423 buf
= zalloc_flags(dlif_zone
, Z_WAITOK
| Z_ZERO
);
8429 /* Get the 64-bit aligned base address for this object */
8430 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof(u_int64_t
),
8432 VERIFY(((intptr_t)base
+ dlif_size
) <= ((intptr_t)buf
+ dlif_bufsize
));
8435 * Wind back a pointer size from the aligned base and
8436 * save the original address so we can free it later.
8438 pbuf
= (void **)((intptr_t)base
- sizeof(void *));
8443 MALLOC(dlifp1
->dl_if_uniqueid
, void *, uniqueid_len
,
8445 if (dlifp1
->dl_if_uniqueid
== NULL
) {
8446 zfree(dlif_zone
, buf
);
8450 bcopy(uniqueid
, dlifp1
->dl_if_uniqueid
, uniqueid_len
);
8451 dlifp1
->dl_if_uniqueid_len
= uniqueid_len
;
8454 ifp1
= (struct ifnet
*)dlifp1
;
8455 dlifp1
->dl_if_flags
= DLIF_INUSE
;
8457 dlifp1
->dl_if_flags
|= DLIF_DEBUG
;
8458 dlifp1
->dl_if_trace
= dlil_if_trace
;
8460 ifp1
->if_name
= dlifp1
->dl_if_namestorage
;
8461 ifp1
->if_xname
= dlifp1
->dl_if_xnamestorage
;
8463 /* initialize interface description */
8464 ifp1
->if_desc
.ifd_maxlen
= IF_DESCSIZE
;
8465 ifp1
->if_desc
.ifd_len
= 0;
8466 ifp1
->if_desc
.ifd_desc
= dlifp1
->dl_if_descstorage
;
8469 if ((ret
= dlil_alloc_local_stats(ifp1
)) != 0) {
8470 DLIL_PRINTF("%s: failed to allocate if local stats, "
8471 "error: %d\n", __func__
, ret
);
8472 /* This probably shouldn't be fatal */
8476 lck_mtx_init(&dlifp1
->dl_if_lock
, ifnet_lock_group
, ifnet_lock_attr
);
8477 lck_rw_init(&ifp1
->if_lock
, ifnet_lock_group
, ifnet_lock_attr
);
8478 lck_mtx_init(&ifp1
->if_ref_lock
, ifnet_lock_group
, ifnet_lock_attr
);
8479 lck_mtx_init(&ifp1
->if_flt_lock
, ifnet_lock_group
, ifnet_lock_attr
);
8480 lck_mtx_init(&ifp1
->if_addrconfig_lock
, ifnet_lock_group
,
8482 lck_rw_init(&ifp1
->if_llreach_lock
, ifnet_lock_group
, ifnet_lock_attr
);
8484 lck_rw_init(&ifp1
->if_inetdata_lock
, ifnet_lock_group
,
8486 ifp1
->if_inetdata
= NULL
;
8488 lck_rw_init(&ifp1
->if_inet6data_lock
, ifnet_lock_group
,
8490 ifp1
->if_inet6data
= NULL
;
8491 lck_rw_init(&ifp1
->if_link_status_lock
, ifnet_lock_group
,
8493 ifp1
->if_link_status
= NULL
;
8495 /* for send data paths */
8496 lck_mtx_init(&ifp1
->if_start_lock
, ifnet_snd_lock_group
,
8498 lck_mtx_init(&ifp1
->if_cached_route_lock
, ifnet_snd_lock_group
,
8500 lck_mtx_init(&ifp1
->if_snd
.ifcq_lock
, ifnet_snd_lock_group
,
8503 /* for receive data paths */
8504 lck_mtx_init(&ifp1
->if_poll_lock
, ifnet_rcv_lock_group
,
8507 /* thread call allocation is done with sleeping zalloc */
8508 ifp1
->if_dt_tcall
= thread_call_allocate_with_options(dlil_dt_tcall_fn
,
8509 ifp1
, THREAD_CALL_PRIORITY_KERNEL
, THREAD_CALL_OPTIONS_ONCE
);
8510 if (ifp1
->if_dt_tcall
== NULL
) {
8511 panic_plain("%s: couldn't create if_dt_tcall", __func__
);
8515 TAILQ_INSERT_TAIL(&dlil_ifnet_head
, dlifp1
, dl_if_link
);
8522 VERIFY(dlifp1
== NULL
|| (IS_P2ALIGNED(dlifp1
, sizeof(u_int64_t
)) &&
8523 IS_P2ALIGNED(&ifp1
->if_data
, sizeof(u_int64_t
))));
8528 __private_extern__
void
8529 dlil_if_release(ifnet_t ifp
)
8531 struct dlil_ifnet
*dlifp
= (struct dlil_ifnet
*)ifp
;
8533 VERIFY(OSDecrementAtomic64(&net_api_stats
.nas_ifnet_alloc_count
) > 0);
8534 if (!(ifp
->if_xflags
& IFXF_ALLOC_KPI
)) {
8535 VERIFY(OSDecrementAtomic64(&net_api_stats
.nas_ifnet_alloc_os_count
) > 0);
8538 ifnet_lock_exclusive(ifp
);
8539 lck_mtx_lock(&dlifp
->dl_if_lock
);
8540 dlifp
->dl_if_flags
&= ~DLIF_INUSE
;
8541 strlcpy(dlifp
->dl_if_namestorage
, ifp
->if_name
, IFNAMSIZ
);
8542 ifp
->if_name
= dlifp
->dl_if_namestorage
;
8543 /* Reset external name (name + unit) */
8544 ifp
->if_xname
= dlifp
->dl_if_xnamestorage
;
8545 snprintf(__DECONST(char *, ifp
->if_xname
), IFXNAMSIZ
,
8546 "%s?", ifp
->if_name
);
8547 lck_mtx_unlock(&dlifp
->dl_if_lock
);
8548 ifnet_lock_done(ifp
);
8551 __private_extern__
void
8554 lck_mtx_lock(&dlil_ifnet_lock
);
8557 __private_extern__
void
8558 dlil_if_unlock(void)
8560 lck_mtx_unlock(&dlil_ifnet_lock
);
8563 __private_extern__
void
8564 dlil_if_lock_assert(void)
8566 LCK_MTX_ASSERT(&dlil_ifnet_lock
, LCK_MTX_ASSERT_OWNED
);
8569 __private_extern__
void
8570 dlil_proto_unplumb_all(struct ifnet
*ifp
)
8573 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
8574 * each bucket contains exactly one entry; PF_VLAN does not need an
8577 * if_proto_hash[3] is for other protocols; we expect anything
8578 * in this bucket to respond to the DETACHING event (which would
8579 * have happened by now) and do the unplumb then.
8581 (void) proto_unplumb(PF_INET
, ifp
);
8582 (void) proto_unplumb(PF_INET6
, ifp
);
8586 ifp_src_route_copyout(struct ifnet
*ifp
, struct route
*dst
)
8588 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
8589 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
8591 route_copyout(dst
, &ifp
->if_src_route
, sizeof(*dst
));
8593 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
8597 ifp_src_route_copyin(struct ifnet
*ifp
, struct route
*src
)
8599 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
8600 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
8602 if (ifp
->if_fwd_cacheok
) {
8603 route_copyin(src
, &ifp
->if_src_route
, sizeof(*src
));
8607 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
8611 ifp_src_route6_copyout(struct ifnet
*ifp
, struct route_in6
*dst
)
8613 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
8614 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
8616 route_copyout((struct route
*)dst
, (struct route
*)&ifp
->if_src_route6
,
8619 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
8623 ifp_src_route6_copyin(struct ifnet
*ifp
, struct route_in6
*src
)
8625 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
8626 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
8628 if (ifp
->if_fwd_cacheok
) {
8629 route_copyin((struct route
*)src
,
8630 (struct route
*)&ifp
->if_src_route6
, sizeof(*src
));
8634 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
8638 ifnet_cached_rtlookup_inet(struct ifnet
*ifp
, struct in_addr src_ip
)
8640 struct route src_rt
;
8641 struct sockaddr_in
*dst
;
8643 dst
= (struct sockaddr_in
*)(void *)(&src_rt
.ro_dst
);
8645 ifp_src_route_copyout(ifp
, &src_rt
);
8647 if (ROUTE_UNUSABLE(&src_rt
) || src_ip
.s_addr
!= dst
->sin_addr
.s_addr
) {
8648 ROUTE_RELEASE(&src_rt
);
8649 if (dst
->sin_family
!= AF_INET
) {
8650 bzero(&src_rt
.ro_dst
, sizeof(src_rt
.ro_dst
));
8651 dst
->sin_len
= sizeof(src_rt
.ro_dst
);
8652 dst
->sin_family
= AF_INET
;
8654 dst
->sin_addr
= src_ip
;
8656 VERIFY(src_rt
.ro_rt
== NULL
);
8657 src_rt
.ro_rt
= rtalloc1_scoped((struct sockaddr
*)dst
,
8658 0, 0, ifp
->if_index
);
8660 if (src_rt
.ro_rt
!= NULL
) {
8661 /* retain a ref, copyin consumes one */
8662 struct rtentry
*rte
= src_rt
.ro_rt
;
8664 ifp_src_route_copyin(ifp
, &src_rt
);
8669 return src_rt
.ro_rt
;
8673 ifnet_cached_rtlookup_inet6(struct ifnet
*ifp
, struct in6_addr
*src_ip6
)
8675 struct route_in6 src_rt
;
8677 ifp_src_route6_copyout(ifp
, &src_rt
);
8679 if (ROUTE_UNUSABLE(&src_rt
) ||
8680 !IN6_ARE_ADDR_EQUAL(src_ip6
, &src_rt
.ro_dst
.sin6_addr
)) {
8681 ROUTE_RELEASE(&src_rt
);
8682 if (src_rt
.ro_dst
.sin6_family
!= AF_INET6
) {
8683 bzero(&src_rt
.ro_dst
, sizeof(src_rt
.ro_dst
));
8684 src_rt
.ro_dst
.sin6_len
= sizeof(src_rt
.ro_dst
);
8685 src_rt
.ro_dst
.sin6_family
= AF_INET6
;
8687 src_rt
.ro_dst
.sin6_scope_id
= in6_addr2scopeid(ifp
, src_ip6
);
8688 bcopy(src_ip6
, &src_rt
.ro_dst
.sin6_addr
,
8689 sizeof(src_rt
.ro_dst
.sin6_addr
));
8691 if (src_rt
.ro_rt
== NULL
) {
8692 src_rt
.ro_rt
= rtalloc1_scoped(
8693 (struct sockaddr
*)&src_rt
.ro_dst
, 0, 0,
8696 if (src_rt
.ro_rt
!= NULL
) {
8697 /* retain a ref, copyin consumes one */
8698 struct rtentry
*rte
= src_rt
.ro_rt
;
8700 ifp_src_route6_copyin(ifp
, &src_rt
);
8706 return src_rt
.ro_rt
;
8710 if_lqm_update(struct ifnet
*ifp
, int lqm
, int locked
)
8712 struct kev_dl_link_quality_metric_data ev_lqm_data
;
8714 VERIFY(lqm
>= IFNET_LQM_MIN
&& lqm
<= IFNET_LQM_MAX
);
8716 /* Normalize to edge */
8717 if (lqm
>= 0 && lqm
<= IFNET_LQM_THRESH_ABORT
) {
8718 lqm
= IFNET_LQM_THRESH_ABORT
;
8719 atomic_bitset_32(&tcbinfo
.ipi_flags
,
8720 INPCBINFO_HANDLE_LQM_ABORT
);
8721 inpcb_timer_sched(&tcbinfo
, INPCB_TIMER_FAST
);
8722 } else if (lqm
> IFNET_LQM_THRESH_ABORT
&&
8723 lqm
<= IFNET_LQM_THRESH_MINIMALLY_VIABLE
) {
8724 lqm
= IFNET_LQM_THRESH_MINIMALLY_VIABLE
;
8725 } else if (lqm
> IFNET_LQM_THRESH_MINIMALLY_VIABLE
&&
8726 lqm
<= IFNET_LQM_THRESH_POOR
) {
8727 lqm
= IFNET_LQM_THRESH_POOR
;
8728 } else if (lqm
> IFNET_LQM_THRESH_POOR
&&
8729 lqm
<= IFNET_LQM_THRESH_GOOD
) {
8730 lqm
= IFNET_LQM_THRESH_GOOD
;
8734 * Take the lock if needed
8737 ifnet_lock_exclusive(ifp
);
8740 if (lqm
== ifp
->if_interface_state
.lqm_state
&&
8741 (ifp
->if_interface_state
.valid_bitmask
&
8742 IF_INTERFACE_STATE_LQM_STATE_VALID
)) {
8744 * Release the lock if was not held by the caller
8747 ifnet_lock_done(ifp
);
8749 return; /* nothing to update */
8751 ifp
->if_interface_state
.valid_bitmask
|=
8752 IF_INTERFACE_STATE_LQM_STATE_VALID
;
8753 ifp
->if_interface_state
.lqm_state
= (int8_t)lqm
;
8756 * Don't want to hold the lock when issuing kernel events
8758 ifnet_lock_done(ifp
);
8760 bzero(&ev_lqm_data
, sizeof(ev_lqm_data
));
8761 ev_lqm_data
.link_quality_metric
= lqm
;
8763 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_LINK_QUALITY_METRIC_CHANGED
,
8764 (struct net_event_data
*)&ev_lqm_data
, sizeof(ev_lqm_data
));
8767 * Reacquire the lock for the caller
8770 ifnet_lock_exclusive(ifp
);
8775 if_rrc_state_update(struct ifnet
*ifp
, unsigned int rrc_state
)
8777 struct kev_dl_rrc_state kev
;
8779 if (rrc_state
== ifp
->if_interface_state
.rrc_state
&&
8780 (ifp
->if_interface_state
.valid_bitmask
&
8781 IF_INTERFACE_STATE_RRC_STATE_VALID
)) {
8785 ifp
->if_interface_state
.valid_bitmask
|=
8786 IF_INTERFACE_STATE_RRC_STATE_VALID
;
8788 ifp
->if_interface_state
.rrc_state
= (uint8_t)rrc_state
;
8791 * Don't want to hold the lock when issuing kernel events
8793 ifnet_lock_done(ifp
);
8795 bzero(&kev
, sizeof(struct kev_dl_rrc_state
));
8796 kev
.rrc_state
= rrc_state
;
8798 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_RRC_STATE_CHANGED
,
8799 (struct net_event_data
*)&kev
, sizeof(struct kev_dl_rrc_state
));
8801 ifnet_lock_exclusive(ifp
);
8805 if_state_update(struct ifnet
*ifp
,
8806 struct if_interface_state
*if_interface_state
)
8808 u_short if_index_available
= 0;
8810 ifnet_lock_exclusive(ifp
);
8812 if ((ifp
->if_type
!= IFT_CELLULAR
) &&
8813 (if_interface_state
->valid_bitmask
&
8814 IF_INTERFACE_STATE_RRC_STATE_VALID
)) {
8815 ifnet_lock_done(ifp
);
8818 if ((if_interface_state
->valid_bitmask
&
8819 IF_INTERFACE_STATE_LQM_STATE_VALID
) &&
8820 (if_interface_state
->lqm_state
< IFNET_LQM_MIN
||
8821 if_interface_state
->lqm_state
> IFNET_LQM_MAX
)) {
8822 ifnet_lock_done(ifp
);
8825 if ((if_interface_state
->valid_bitmask
&
8826 IF_INTERFACE_STATE_RRC_STATE_VALID
) &&
8827 if_interface_state
->rrc_state
!=
8828 IF_INTERFACE_STATE_RRC_STATE_IDLE
&&
8829 if_interface_state
->rrc_state
!=
8830 IF_INTERFACE_STATE_RRC_STATE_CONNECTED
) {
8831 ifnet_lock_done(ifp
);
8835 if (if_interface_state
->valid_bitmask
&
8836 IF_INTERFACE_STATE_LQM_STATE_VALID
) {
8837 if_lqm_update(ifp
, if_interface_state
->lqm_state
, 1);
8839 if (if_interface_state
->valid_bitmask
&
8840 IF_INTERFACE_STATE_RRC_STATE_VALID
) {
8841 if_rrc_state_update(ifp
, if_interface_state
->rrc_state
);
8843 if (if_interface_state
->valid_bitmask
&
8844 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
) {
8845 ifp
->if_interface_state
.valid_bitmask
|=
8846 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
8847 ifp
->if_interface_state
.interface_availability
=
8848 if_interface_state
->interface_availability
;
8850 if (ifp
->if_interface_state
.interface_availability
==
8851 IF_INTERFACE_STATE_INTERFACE_AVAILABLE
) {
8852 os_log(OS_LOG_DEFAULT
, "%s: interface %s (%u) available\n",
8853 __func__
, if_name(ifp
), ifp
->if_index
);
8854 if_index_available
= ifp
->if_index
;
8856 os_log(OS_LOG_DEFAULT
, "%s: interface %s (%u) unavailable)\n",
8857 __func__
, if_name(ifp
), ifp
->if_index
);
8860 ifnet_lock_done(ifp
);
8863 * Check if the TCP connections going on this interface should be
8864 * forced to send probe packets instead of waiting for TCP timers
8865 * to fire. This is done on an explicit notification such as
8866 * SIOCSIFINTERFACESTATE which marks the interface as available.
8868 if (if_index_available
> 0) {
8869 tcp_interface_send_probe(if_index_available
);
8876 if_get_state(struct ifnet
*ifp
,
8877 struct if_interface_state
*if_interface_state
)
8879 ifnet_lock_shared(ifp
);
8881 if_interface_state
->valid_bitmask
= 0;
8883 if (ifp
->if_interface_state
.valid_bitmask
&
8884 IF_INTERFACE_STATE_RRC_STATE_VALID
) {
8885 if_interface_state
->valid_bitmask
|=
8886 IF_INTERFACE_STATE_RRC_STATE_VALID
;
8887 if_interface_state
->rrc_state
=
8888 ifp
->if_interface_state
.rrc_state
;
8890 if (ifp
->if_interface_state
.valid_bitmask
&
8891 IF_INTERFACE_STATE_LQM_STATE_VALID
) {
8892 if_interface_state
->valid_bitmask
|=
8893 IF_INTERFACE_STATE_LQM_STATE_VALID
;
8894 if_interface_state
->lqm_state
=
8895 ifp
->if_interface_state
.lqm_state
;
8897 if (ifp
->if_interface_state
.valid_bitmask
&
8898 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
) {
8899 if_interface_state
->valid_bitmask
|=
8900 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
8901 if_interface_state
->interface_availability
=
8902 ifp
->if_interface_state
.interface_availability
;
8905 ifnet_lock_done(ifp
);
8909 if_probe_connectivity(struct ifnet
*ifp
, u_int32_t conn_probe
)
8911 if (conn_probe
> 1) {
8914 if (conn_probe
== 0) {
8915 if_clear_eflags(ifp
, IFEF_PROBE_CONNECTIVITY
);
8917 if_set_eflags(ifp
, IFEF_PROBE_CONNECTIVITY
);
8921 necp_update_all_clients();
8924 tcp_probe_connectivity(ifp
, conn_probe
);
8930 get_ether_index(int * ret_other_index
)
8934 int other_en_index
= 0;
8935 int any_ether_index
= 0;
8936 short best_unit
= 0;
8938 *ret_other_index
= 0;
8939 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
8941 * find en0, or if not en0, the lowest unit en*, and if not
8942 * that, any ethernet
8944 ifnet_lock_shared(ifp
);
8945 if (strcmp(ifp
->if_name
, "en") == 0) {
8946 if (ifp
->if_unit
== 0) {
8947 /* found en0, we're done */
8948 en0_index
= ifp
->if_index
;
8949 ifnet_lock_done(ifp
);
8952 if (other_en_index
== 0 || ifp
->if_unit
< best_unit
) {
8953 other_en_index
= ifp
->if_index
;
8954 best_unit
= ifp
->if_unit
;
8956 } else if (ifp
->if_type
== IFT_ETHER
&& any_ether_index
== 0) {
8957 any_ether_index
= ifp
->if_index
;
8959 ifnet_lock_done(ifp
);
8961 if (en0_index
== 0) {
8962 if (other_en_index
!= 0) {
8963 *ret_other_index
= other_en_index
;
8964 } else if (any_ether_index
!= 0) {
8965 *ret_other_index
= any_ether_index
;
8972 uuid_get_ethernet(u_int8_t
*node
)
8974 static int en0_index
;
8976 int other_index
= 0;
8980 ifnet_head_lock_shared();
8981 if (en0_index
== 0 || ifindex2ifnet
[en0_index
] == NULL
) {
8982 en0_index
= get_ether_index(&other_index
);
8984 if (en0_index
!= 0) {
8985 the_index
= en0_index
;
8986 } else if (other_index
!= 0) {
8987 the_index
= other_index
;
8989 if (the_index
!= 0) {
8990 struct dlil_ifnet
*dl_if
;
8992 ifp
= ifindex2ifnet
[the_index
];
8993 VERIFY(ifp
!= NULL
);
8994 dl_if
= (struct dlil_ifnet
*)ifp
;
8995 if (dl_if
->dl_if_permanent_ether_is_set
!= 0) {
8997 * Use the permanent ethernet address if it is
8998 * available because it will never change.
9000 memcpy(node
, dl_if
->dl_if_permanent_ether
,
9003 memcpy(node
, IF_LLADDR(ifp
), ETHER_ADDR_LEN
);
9014 sysctl_rxpoll SYSCTL_HANDLER_ARGS
9016 #pragma unused(arg1, arg2)
9022 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
9023 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
9027 if (net_rxpoll
== 0) {
9036 sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
9038 #pragma unused(arg1, arg2)
9042 q
= if_rxpoll_mode_holdtime
;
9044 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
9045 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
9049 if (q
< IF_RXPOLL_MODE_HOLDTIME_MIN
) {
9050 q
= IF_RXPOLL_MODE_HOLDTIME_MIN
;
9053 if_rxpoll_mode_holdtime
= q
;
9059 sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
9061 #pragma unused(arg1, arg2)
9065 q
= if_rxpoll_sample_holdtime
;
9067 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
9068 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
9072 if (q
< IF_RXPOLL_SAMPLETIME_MIN
) {
9073 q
= IF_RXPOLL_SAMPLETIME_MIN
;
9076 if_rxpoll_sample_holdtime
= q
;
9082 sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
9084 #pragma unused(arg1, arg2)
9088 q
= if_rxpoll_interval_time
;
9090 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
9091 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
9095 if (q
< IF_RXPOLL_INTERVALTIME_MIN
) {
9096 q
= IF_RXPOLL_INTERVALTIME_MIN
;
9099 if_rxpoll_interval_time
= q
;
9105 sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
9107 #pragma unused(arg1, arg2)
9111 i
= if_sysctl_rxpoll_wlowat
;
9113 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
9114 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
9118 if (i
== 0 || i
>= if_sysctl_rxpoll_whiwat
) {
9122 if_sysctl_rxpoll_wlowat
= i
;
9127 sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
9129 #pragma unused(arg1, arg2)
9133 i
= if_sysctl_rxpoll_whiwat
;
9135 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
9136 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
9140 if (i
<= if_sysctl_rxpoll_wlowat
) {
9144 if_sysctl_rxpoll_whiwat
= i
;
9149 sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
9151 #pragma unused(arg1, arg2)
9156 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
9157 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
9161 if (i
< IF_SNDQ_MINLEN
) {
9170 sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
9172 #pragma unused(arg1, arg2)
9177 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
9178 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
9182 if (i
< IF_RCVQ_MINLEN
) {
9191 dlil_node_present(struct ifnet
*ifp
, struct sockaddr
*sa
,
9192 int32_t rssi
, int lqm
, int npm
, u_int8_t srvinfo
[48])
9194 struct kev_dl_node_presence kev
;
9195 struct sockaddr_dl
*sdl
;
9196 struct sockaddr_in6
*sin6
;
9201 VERIFY(sa
->sa_family
== AF_LINK
|| sa
->sa_family
== AF_INET6
);
9203 bzero(&kev
, sizeof(kev
));
9204 sin6
= &kev
.sin6_node_address
;
9205 sdl
= &kev
.sdl_node_address
;
9206 nd6_alt_node_addr_decompose(ifp
, sa
, sdl
, sin6
);
9208 kev
.link_quality_metric
= lqm
;
9209 kev
.node_proximity_metric
= npm
;
9210 bcopy(srvinfo
, kev
.node_service_info
, sizeof(kev
.node_service_info
));
9212 ret
= nd6_alt_node_present(ifp
, sin6
, sdl
, rssi
, lqm
, npm
);
9214 int err
= dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_PRESENCE
,
9215 &kev
.link_data
, sizeof(kev
));
9217 log(LOG_ERR
, "%s: Post DL_NODE_PRESENCE failed with"
9218 "error %d\n", __func__
, err
);
9225 dlil_node_absent(struct ifnet
*ifp
, struct sockaddr
*sa
)
9227 struct kev_dl_node_absence kev
= {};
9228 struct sockaddr_in6
*kev_sin6
= NULL
;
9229 struct sockaddr_dl
*kev_sdl
= NULL
;
9231 VERIFY(ifp
!= NULL
);
9233 VERIFY(sa
->sa_family
== AF_LINK
|| sa
->sa_family
== AF_INET6
);
9235 kev_sin6
= &kev
.sin6_node_address
;
9236 kev_sdl
= &kev
.sdl_node_address
;
9238 if (sa
->sa_family
== AF_INET6
) {
9240 * If IPv6 address is given, get the link layer
9241 * address from what was cached in the neighbor cache
9243 VERIFY(sa
->sa_len
<= sizeof(*kev_sin6
));
9244 bcopy(sa
, kev_sin6
, sa
->sa_len
);
9245 nd6_alt_node_absent(ifp
, kev_sin6
, kev_sdl
);
9248 * If passed address is AF_LINK type, derive the address
9249 * based on the link address.
9251 nd6_alt_node_addr_decompose(ifp
, sa
, kev_sdl
, kev_sin6
);
9252 nd6_alt_node_absent(ifp
, kev_sin6
, NULL
);
9255 kev_sdl
->sdl_type
= ifp
->if_type
;
9256 kev_sdl
->sdl_index
= ifp
->if_index
;
9258 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_ABSENCE
,
9259 &kev
.link_data
, sizeof(kev
));
9263 dlil_node_present_v2(struct ifnet
*ifp
, struct sockaddr
*sa
, struct sockaddr_dl
*sdl
,
9264 int32_t rssi
, int lqm
, int npm
, u_int8_t srvinfo
[48])
9266 struct kev_dl_node_presence kev
= {};
9267 struct sockaddr_dl
*kev_sdl
= NULL
;
9268 struct sockaddr_in6
*kev_sin6
= NULL
;
9271 VERIFY(ifp
!= NULL
);
9272 VERIFY(sa
!= NULL
&& sdl
!= NULL
);
9273 VERIFY(sa
->sa_family
== AF_INET6
&& sdl
->sdl_family
== AF_LINK
);
9275 kev_sin6
= &kev
.sin6_node_address
;
9276 kev_sdl
= &kev
.sdl_node_address
;
9278 VERIFY(sdl
->sdl_len
<= sizeof(*kev_sdl
));
9279 bcopy(sdl
, kev_sdl
, sdl
->sdl_len
);
9280 kev_sdl
->sdl_type
= ifp
->if_type
;
9281 kev_sdl
->sdl_index
= ifp
->if_index
;
9283 VERIFY(sa
->sa_len
<= sizeof(*kev_sin6
));
9284 bcopy(sa
, kev_sin6
, sa
->sa_len
);
9287 kev
.link_quality_metric
= lqm
;
9288 kev
.node_proximity_metric
= npm
;
9289 bcopy(srvinfo
, kev
.node_service_info
, sizeof(kev
.node_service_info
));
9291 ret
= nd6_alt_node_present(ifp
, SIN6(sa
), sdl
, rssi
, lqm
, npm
);
9293 int err
= dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_PRESENCE
,
9294 &kev
.link_data
, sizeof(kev
));
9296 log(LOG_ERR
, "%s: Post DL_NODE_PRESENCE failed with error %d\n", __func__
, err
);
9303 dlil_ifaddr_bytes(const struct sockaddr_dl
*sdl
, size_t *sizep
,
9304 kauth_cred_t
*credp
)
9306 const u_int8_t
*bytes
;
9309 bytes
= CONST_LLADDR(sdl
);
9310 size
= sdl
->sdl_alen
;
9313 if (dlil_lladdr_ckreq
) {
9314 switch (sdl
->sdl_type
) {
9324 if (credp
&& mac_system_check_info(*credp
, "net.link.addr")) {
9325 static const u_int8_t unspec
[FIREWIRE_EUI64_LEN
] = {
9333 #pragma unused(credp)
9336 if (sizep
!= NULL
) {
9343 dlil_report_issues(struct ifnet
*ifp
, u_int8_t modid
[DLIL_MODIDLEN
],
9344 u_int8_t info
[DLIL_MODARGLEN
])
9346 struct kev_dl_issues kev
;
9349 VERIFY(ifp
!= NULL
);
9350 VERIFY(modid
!= NULL
);
9351 _CASSERT(sizeof(kev
.modid
) == DLIL_MODIDLEN
);
9352 _CASSERT(sizeof(kev
.info
) == DLIL_MODARGLEN
);
9354 bzero(&kev
, sizeof(kev
));
9357 kev
.timestamp
= tv
.tv_sec
;
9358 bcopy(modid
, &kev
.modid
, DLIL_MODIDLEN
);
9360 bcopy(info
, &kev
.info
, DLIL_MODARGLEN
);
9363 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_ISSUES
,
9364 &kev
.link_data
, sizeof(kev
));
9368 ifnet_getset_opportunistic(ifnet_t ifp
, u_long cmd
, struct ifreq
*ifr
,
9371 u_int32_t level
= IFNET_THROTTLE_OFF
;
9374 VERIFY(cmd
== SIOCSIFOPPORTUNISTIC
|| cmd
== SIOCGIFOPPORTUNISTIC
);
9376 if (cmd
== SIOCSIFOPPORTUNISTIC
) {
9378 * XXX: Use priv_check_cred() instead of root check?
9380 if ((result
= proc_suser(p
)) != 0) {
9384 if (ifr
->ifr_opportunistic
.ifo_flags
==
9385 IFRIFOF_BLOCK_OPPORTUNISTIC
) {
9386 level
= IFNET_THROTTLE_OPPORTUNISTIC
;
9387 } else if (ifr
->ifr_opportunistic
.ifo_flags
== 0) {
9388 level
= IFNET_THROTTLE_OFF
;
9394 result
= ifnet_set_throttle(ifp
, level
);
9396 } else if ((result
= ifnet_get_throttle(ifp
, &level
)) == 0) {
9397 ifr
->ifr_opportunistic
.ifo_flags
= 0;
9398 if (level
== IFNET_THROTTLE_OPPORTUNISTIC
) {
9399 ifr
->ifr_opportunistic
.ifo_flags
|=
9400 IFRIFOF_BLOCK_OPPORTUNISTIC
;
9405 * Return the count of current opportunistic connections
9406 * over the interface.
9410 flags
|= (cmd
== SIOCSIFOPPORTUNISTIC
) ?
9411 INPCB_OPPORTUNISTIC_SETCMD
: 0;
9412 flags
|= (level
== IFNET_THROTTLE_OPPORTUNISTIC
) ?
9413 INPCB_OPPORTUNISTIC_THROTTLEON
: 0;
9414 ifr
->ifr_opportunistic
.ifo_inuse
=
9415 udp_count_opportunistic(ifp
->if_index
, flags
) +
9416 tcp_count_opportunistic(ifp
->if_index
, flags
);
9419 if (result
== EALREADY
) {
9427 ifnet_get_throttle(struct ifnet
*ifp
, u_int32_t
*level
)
9429 struct ifclassq
*ifq
;
9432 if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
9436 *level
= IFNET_THROTTLE_OFF
;
9440 /* Throttling works only for IFCQ, not ALTQ instances */
9441 if (IFCQ_IS_ENABLED(ifq
)) {
9442 cqrq_throttle_t req
= { 0, IFNET_THROTTLE_OFF
};
9444 err
= fq_if_request_classq(ifq
, CLASSQRQ_THROTTLE
, &req
);
9453 ifnet_set_throttle(struct ifnet
*ifp
, u_int32_t level
)
9455 struct ifclassq
*ifq
;
9458 if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
9465 case IFNET_THROTTLE_OFF
:
9466 case IFNET_THROTTLE_OPPORTUNISTIC
:
9473 if (IFCQ_IS_ENABLED(ifq
)) {
9474 cqrq_throttle_t req
= { 1, level
};
9476 err
= fq_if_request_classq(ifq
, CLASSQRQ_THROTTLE
, &req
);
9481 DLIL_PRINTF("%s: throttling level set to %d\n", if_name(ifp
),
9484 necp_update_all_clients();
9486 if (level
== IFNET_THROTTLE_OFF
) {
9495 ifnet_getset_log(ifnet_t ifp
, u_long cmd
, struct ifreq
*ifr
,
9501 int level
, category
, subcategory
;
9503 VERIFY(cmd
== SIOCSIFLOG
|| cmd
== SIOCGIFLOG
);
9505 if (cmd
== SIOCSIFLOG
) {
9506 if ((result
= priv_check_cred(kauth_cred_get(),
9507 PRIV_NET_INTERFACE_CONTROL
, 0)) != 0) {
9511 level
= ifr
->ifr_log
.ifl_level
;
9512 if (level
< IFNET_LOG_MIN
|| level
> IFNET_LOG_MAX
) {
9516 flags
= ifr
->ifr_log
.ifl_flags
;
9517 if ((flags
&= IFNET_LOGF_MASK
) == 0) {
9521 category
= ifr
->ifr_log
.ifl_category
;
9522 subcategory
= ifr
->ifr_log
.ifl_subcategory
;
9525 result
= ifnet_set_log(ifp
, level
, flags
,
9526 category
, subcategory
);
9529 result
= ifnet_get_log(ifp
, &level
, &flags
, &category
,
9532 ifr
->ifr_log
.ifl_level
= level
;
9533 ifr
->ifr_log
.ifl_flags
= flags
;
9534 ifr
->ifr_log
.ifl_category
= category
;
9535 ifr
->ifr_log
.ifl_subcategory
= subcategory
;
9543 ifnet_set_log(struct ifnet
*ifp
, int32_t level
, uint32_t flags
,
9544 int32_t category
, int32_t subcategory
)
9548 VERIFY(level
>= IFNET_LOG_MIN
&& level
<= IFNET_LOG_MAX
);
9549 VERIFY(flags
& IFNET_LOGF_MASK
);
9552 * The logging level applies to all facilities; make sure to
9553 * update them all with the most current level.
9555 flags
|= ifp
->if_log
.flags
;
9557 if (ifp
->if_output_ctl
!= NULL
) {
9558 struct ifnet_log_params l
;
9560 bzero(&l
, sizeof(l
));
9563 l
.flags
&= ~IFNET_LOGF_DLIL
;
9564 l
.category
= category
;
9565 l
.subcategory
= subcategory
;
9567 /* Send this request to lower layers */
9569 err
= ifp
->if_output_ctl(ifp
, IFNET_CTL_SET_LOG
,
9572 } else if ((flags
& ~IFNET_LOGF_DLIL
) && ifp
->if_output_ctl
== NULL
) {
9574 * If targeted to the lower layers without an output
9575 * control callback registered on the interface, just
9576 * silently ignore facilities other than ours.
9578 flags
&= IFNET_LOGF_DLIL
;
9579 if (flags
== 0 && (!(ifp
->if_log
.flags
& IFNET_LOGF_DLIL
))) {
9585 if ((ifp
->if_log
.level
= level
) == IFNET_LOG_DEFAULT
) {
9586 ifp
->if_log
.flags
= 0;
9588 ifp
->if_log
.flags
|= flags
;
9591 log(LOG_INFO
, "%s: logging level set to %d flags=%b "
9592 "arg=%b, category=%d subcategory=%d\n", if_name(ifp
),
9593 ifp
->if_log
.level
, ifp
->if_log
.flags
,
9594 IFNET_LOGF_BITS
, flags
, IFNET_LOGF_BITS
,
9595 category
, subcategory
);
9602 ifnet_get_log(struct ifnet
*ifp
, int32_t *level
, uint32_t *flags
,
9603 int32_t *category
, int32_t *subcategory
)
9605 if (level
!= NULL
) {
9606 *level
= ifp
->if_log
.level
;
9608 if (flags
!= NULL
) {
9609 *flags
= ifp
->if_log
.flags
;
9611 if (category
!= NULL
) {
9612 *category
= ifp
->if_log
.category
;
9614 if (subcategory
!= NULL
) {
9615 *subcategory
= ifp
->if_log
.subcategory
;
9622 ifnet_notify_address(struct ifnet
*ifp
, int af
)
9624 struct ifnet_notify_address_params na
;
9627 (void) pf_ifaddr_hook(ifp
);
9630 if (ifp
->if_output_ctl
== NULL
) {
9634 bzero(&na
, sizeof(na
));
9635 na
.address_family
= (sa_family_t
)af
;
9637 return ifp
->if_output_ctl(ifp
, IFNET_CTL_NOTIFY_ADDRESS
,
9642 ifnet_flowid(struct ifnet
*ifp
, uint32_t *flowid
)
9644 if (ifp
== NULL
|| flowid
== NULL
) {
9646 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
9647 !IF_FULLY_ATTACHED(ifp
)) {
9651 *flowid
= ifp
->if_flowhash
;
9657 ifnet_disable_output(struct ifnet
*ifp
)
9663 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
9664 !IF_FULLY_ATTACHED(ifp
)) {
9668 if ((err
= ifnet_fc_add(ifp
)) == 0) {
9669 lck_mtx_lock_spin(&ifp
->if_start_lock
);
9670 ifp
->if_start_flags
|= IFSF_FLOW_CONTROLLED
;
9671 lck_mtx_unlock(&ifp
->if_start_lock
);
9677 ifnet_enable_output(struct ifnet
*ifp
)
9681 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
9682 !IF_FULLY_ATTACHED(ifp
)) {
9686 ifnet_start_common(ifp
, TRUE
);
9691 ifnet_flowadv(uint32_t flowhash
)
9693 struct ifnet_fc_entry
*ifce
;
9696 ifce
= ifnet_fc_get(flowhash
);
9701 VERIFY(ifce
->ifce_ifp
!= NULL
);
9702 ifp
= ifce
->ifce_ifp
;
9704 /* flow hash gets recalculated per attach, so check */
9705 if (ifnet_is_attached(ifp
, 1)) {
9706 if (ifp
->if_flowhash
== flowhash
) {
9707 (void) ifnet_enable_output(ifp
);
9709 ifnet_decr_iorefcnt(ifp
);
9711 ifnet_fc_entry_free(ifce
);
9715 * Function to compare ifnet_fc_entries in ifnet flow control tree
9718 ifce_cmp(const struct ifnet_fc_entry
*fc1
, const struct ifnet_fc_entry
*fc2
)
9720 return fc1
->ifce_flowhash
- fc2
->ifce_flowhash
;
9724 ifnet_fc_add(struct ifnet
*ifp
)
9726 struct ifnet_fc_entry keyfc
, *ifce
;
9729 VERIFY(ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_TXSTART
));
9730 VERIFY(ifp
->if_flowhash
!= 0);
9731 flowhash
= ifp
->if_flowhash
;
9733 bzero(&keyfc
, sizeof(keyfc
));
9734 keyfc
.ifce_flowhash
= flowhash
;
9736 lck_mtx_lock_spin(&ifnet_fc_lock
);
9737 ifce
= RB_FIND(ifnet_fc_tree
, &ifnet_fc_tree
, &keyfc
);
9738 if (ifce
!= NULL
&& ifce
->ifce_ifp
== ifp
) {
9739 /* Entry is already in ifnet_fc_tree, return */
9740 lck_mtx_unlock(&ifnet_fc_lock
);
9746 * There is a different fc entry with the same flow hash
9747 * but different ifp pointer. There can be a collision
9748 * on flow hash but the probability is low. Let's just
9749 * avoid adding a second one when there is a collision.
9751 lck_mtx_unlock(&ifnet_fc_lock
);
9755 /* become regular mutex */
9756 lck_mtx_convert_spin(&ifnet_fc_lock
);
9758 ifce
= zalloc_flags(ifnet_fc_zone
, Z_WAITOK
| Z_ZERO
);
9759 ifce
->ifce_flowhash
= flowhash
;
9760 ifce
->ifce_ifp
= ifp
;
9762 RB_INSERT(ifnet_fc_tree
, &ifnet_fc_tree
, ifce
);
9763 lck_mtx_unlock(&ifnet_fc_lock
);
9767 static struct ifnet_fc_entry
*
9768 ifnet_fc_get(uint32_t flowhash
)
9770 struct ifnet_fc_entry keyfc
, *ifce
;
9773 bzero(&keyfc
, sizeof(keyfc
));
9774 keyfc
.ifce_flowhash
= flowhash
;
9776 lck_mtx_lock_spin(&ifnet_fc_lock
);
9777 ifce
= RB_FIND(ifnet_fc_tree
, &ifnet_fc_tree
, &keyfc
);
9779 /* Entry is not present in ifnet_fc_tree, return */
9780 lck_mtx_unlock(&ifnet_fc_lock
);
9784 RB_REMOVE(ifnet_fc_tree
, &ifnet_fc_tree
, ifce
);
9786 VERIFY(ifce
->ifce_ifp
!= NULL
);
9787 ifp
= ifce
->ifce_ifp
;
9789 /* become regular mutex */
9790 lck_mtx_convert_spin(&ifnet_fc_lock
);
9792 if (!ifnet_is_attached(ifp
, 0)) {
9794 * This ifp is not attached or in the process of being
9795 * detached; just don't process it.
9797 ifnet_fc_entry_free(ifce
);
9800 lck_mtx_unlock(&ifnet_fc_lock
);
9806 ifnet_fc_entry_free(struct ifnet_fc_entry
*ifce
)
9808 zfree(ifnet_fc_zone
, ifce
);
9812 ifnet_calc_flowhash(struct ifnet
*ifp
)
9814 struct ifnet_flowhash_key fh
__attribute__((aligned(8)));
9815 uint32_t flowhash
= 0;
9817 if (ifnet_flowhash_seed
== 0) {
9818 ifnet_flowhash_seed
= RandomULong();
9821 bzero(&fh
, sizeof(fh
));
9823 (void) snprintf(fh
.ifk_name
, sizeof(fh
.ifk_name
), "%s", ifp
->if_name
);
9824 fh
.ifk_unit
= ifp
->if_unit
;
9825 fh
.ifk_flags
= ifp
->if_flags
;
9826 fh
.ifk_eflags
= ifp
->if_eflags
;
9827 fh
.ifk_capabilities
= ifp
->if_capabilities
;
9828 fh
.ifk_capenable
= ifp
->if_capenable
;
9829 fh
.ifk_output_sched_model
= ifp
->if_output_sched_model
;
9830 fh
.ifk_rand1
= RandomULong();
9831 fh
.ifk_rand2
= RandomULong();
9834 flowhash
= net_flowhash(&fh
, sizeof(fh
), ifnet_flowhash_seed
);
9835 if (flowhash
== 0) {
9836 /* try to get a non-zero flowhash */
9837 ifnet_flowhash_seed
= RandomULong();
9845 ifnet_set_netsignature(struct ifnet
*ifp
, uint8_t family
, uint8_t len
,
9846 uint16_t flags
, uint8_t *data
)
9848 #pragma unused(flags)
9853 if_inetdata_lock_exclusive(ifp
);
9854 if (IN_IFEXTRA(ifp
) != NULL
) {
9856 /* Allow clearing the signature */
9857 IN_IFEXTRA(ifp
)->netsig_len
= 0;
9858 bzero(IN_IFEXTRA(ifp
)->netsig
,
9859 sizeof(IN_IFEXTRA(ifp
)->netsig
));
9860 if_inetdata_lock_done(ifp
);
9862 } else if (len
> sizeof(IN_IFEXTRA(ifp
)->netsig
)) {
9864 if_inetdata_lock_done(ifp
);
9867 IN_IFEXTRA(ifp
)->netsig_len
= len
;
9868 bcopy(data
, IN_IFEXTRA(ifp
)->netsig
, len
);
9872 if_inetdata_lock_done(ifp
);
9876 if_inet6data_lock_exclusive(ifp
);
9877 if (IN6_IFEXTRA(ifp
) != NULL
) {
9879 /* Allow clearing the signature */
9880 IN6_IFEXTRA(ifp
)->netsig_len
= 0;
9881 bzero(IN6_IFEXTRA(ifp
)->netsig
,
9882 sizeof(IN6_IFEXTRA(ifp
)->netsig
));
9883 if_inet6data_lock_done(ifp
);
9885 } else if (len
> sizeof(IN6_IFEXTRA(ifp
)->netsig
)) {
9887 if_inet6data_lock_done(ifp
);
9890 IN6_IFEXTRA(ifp
)->netsig_len
= len
;
9891 bcopy(data
, IN6_IFEXTRA(ifp
)->netsig
, len
);
9895 if_inet6data_lock_done(ifp
);
9907 ifnet_get_netsignature(struct ifnet
*ifp
, uint8_t family
, uint8_t *len
,
9908 uint16_t *flags
, uint8_t *data
)
9912 if (ifp
== NULL
|| len
== NULL
|| data
== NULL
) {
9918 if_inetdata_lock_shared(ifp
);
9919 if (IN_IFEXTRA(ifp
) != NULL
) {
9920 if (*len
== 0 || *len
< IN_IFEXTRA(ifp
)->netsig_len
) {
9922 if_inetdata_lock_done(ifp
);
9925 if ((*len
= (uint8_t)IN_IFEXTRA(ifp
)->netsig_len
) > 0) {
9926 bcopy(IN_IFEXTRA(ifp
)->netsig
, data
, *len
);
9933 if_inetdata_lock_done(ifp
);
9937 if_inet6data_lock_shared(ifp
);
9938 if (IN6_IFEXTRA(ifp
) != NULL
) {
9939 if (*len
== 0 || *len
< IN6_IFEXTRA(ifp
)->netsig_len
) {
9941 if_inet6data_lock_done(ifp
);
9944 if ((*len
= (uint8_t)IN6_IFEXTRA(ifp
)->netsig_len
) > 0) {
9945 bcopy(IN6_IFEXTRA(ifp
)->netsig
, data
, *len
);
9952 if_inet6data_lock_done(ifp
);
9960 if (error
== 0 && flags
!= NULL
) {
9968 ifnet_set_nat64prefix(struct ifnet
*ifp
, struct ipv6_prefix
*prefixes
)
9970 int i
, error
= 0, one_set
= 0;
9972 if_inet6data_lock_exclusive(ifp
);
9974 if (IN6_IFEXTRA(ifp
) == NULL
) {
9979 for (i
= 0; i
< NAT64_MAX_NUM_PREFIXES
; i
++) {
9980 uint32_t prefix_len
=
9981 prefixes
[i
].prefix_len
;
9982 struct in6_addr
*prefix
=
9983 &prefixes
[i
].ipv6_prefix
;
9985 if (prefix_len
== 0) {
9986 clat_log0((LOG_DEBUG
,
9987 "NAT64 prefixes purged from Interface %s\n",
9989 /* Allow clearing the signature */
9990 IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].prefix_len
= 0;
9991 bzero(&IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].ipv6_prefix
,
9992 sizeof(struct in6_addr
));
9995 } else if (prefix_len
!= NAT64_PREFIX_LEN_32
&&
9996 prefix_len
!= NAT64_PREFIX_LEN_40
&&
9997 prefix_len
!= NAT64_PREFIX_LEN_48
&&
9998 prefix_len
!= NAT64_PREFIX_LEN_56
&&
9999 prefix_len
!= NAT64_PREFIX_LEN_64
&&
10000 prefix_len
!= NAT64_PREFIX_LEN_96
) {
10001 clat_log0((LOG_DEBUG
,
10002 "NAT64 prefixlen is incorrect %d\n", prefix_len
));
10007 if (IN6_IS_SCOPE_EMBED(prefix
)) {
10008 clat_log0((LOG_DEBUG
,
10009 "NAT64 prefix has interface/link local scope.\n"));
10014 IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].prefix_len
= prefix_len
;
10015 bcopy(prefix
, &IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].ipv6_prefix
,
10016 sizeof(struct in6_addr
));
10017 clat_log0((LOG_DEBUG
,
10018 "NAT64 prefix set to %s with prefixlen: %d\n",
10019 ip6_sprintf(prefix
), prefix_len
));
10024 if_inet6data_lock_done(ifp
);
10026 if (error
== 0 && one_set
!= 0) {
10027 necp_update_all_clients();
10034 ifnet_get_nat64prefix(struct ifnet
*ifp
, struct ipv6_prefix
*prefixes
)
10036 int i
, found_one
= 0, error
= 0;
10042 if_inet6data_lock_shared(ifp
);
10044 if (IN6_IFEXTRA(ifp
) == NULL
) {
10049 for (i
= 0; i
< NAT64_MAX_NUM_PREFIXES
; i
++) {
10050 if (IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].prefix_len
!= 0) {
10055 if (found_one
== 0) {
10061 bcopy(IN6_IFEXTRA(ifp
)->nat64_prefixes
, prefixes
,
10062 sizeof(IN6_IFEXTRA(ifp
)->nat64_prefixes
));
10066 if_inet6data_lock_done(ifp
);
10072 dlil_output_cksum_dbg(struct ifnet
*ifp
, struct mbuf
*m
, uint32_t hoff
,
10073 protocol_family_t pf
)
10075 #pragma unused(ifp)
10078 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_FINALIZE_FORCED
) ||
10079 (m
->m_pkthdr
.csum_flags
& (CSUM_TSO_IPV4
| CSUM_TSO_IPV6
))) {
10085 did_sw
= in_finalize_cksum(m
, hoff
, m
->m_pkthdr
.csum_flags
);
10086 if (did_sw
& CSUM_DELAY_IP
) {
10087 hwcksum_dbg_finalized_hdr
++;
10089 if (did_sw
& CSUM_DELAY_DATA
) {
10090 hwcksum_dbg_finalized_data
++;
10095 * Checksum offload should not have been enabled when
10096 * extension headers exist; that also means that we
10097 * cannot force-finalize packets with extension headers.
10098 * Indicate to the callee should it skip such case by
10099 * setting optlen to -1.
10101 did_sw
= in6_finalize_cksum(m
, hoff
, -1, -1,
10102 m
->m_pkthdr
.csum_flags
);
10103 if (did_sw
& CSUM_DELAY_IPV6_DATA
) {
10104 hwcksum_dbg_finalized_data
++;
10113 dlil_input_cksum_dbg(struct ifnet
*ifp
, struct mbuf
*m
, char *frame_header
,
10114 protocol_family_t pf
)
10119 if (frame_header
== NULL
||
10120 frame_header
< (char *)mbuf_datastart(m
) ||
10121 frame_header
> (char *)m
->m_data
) {
10122 DLIL_PRINTF("%s: frame header pointer 0x%llx out of range "
10123 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp
),
10124 (uint64_t)VM_KERNEL_ADDRPERM(frame_header
),
10125 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m
)),
10126 (uint64_t)VM_KERNEL_ADDRPERM(m
->m_data
),
10127 (uint64_t)VM_KERNEL_ADDRPERM(m
));
10130 hlen
= (uint32_t)(m
->m_data
- frame_header
);
10141 * Force partial checksum offload; useful to simulate cases
10142 * where the hardware does not support partial checksum offload,
10143 * in order to validate correctness throughout the layers above.
10145 if (hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
) {
10146 uint32_t foff
= hwcksum_dbg_partial_rxoff_forced
;
10148 if (foff
> (uint32_t)m
->m_pkthdr
.len
) {
10152 m
->m_pkthdr
.csum_flags
&= ~CSUM_RX_FLAGS
;
10154 /* Compute 16-bit 1's complement sum from forced offset */
10155 sum
= m_sum16(m
, foff
, (m
->m_pkthdr
.len
- foff
));
10157 m
->m_pkthdr
.csum_flags
|= (CSUM_DATA_VALID
| CSUM_PARTIAL
);
10158 m
->m_pkthdr
.csum_rx_val
= sum
;
10159 m
->m_pkthdr
.csum_rx_start
= (uint16_t)(foff
+ hlen
);
10161 hwcksum_dbg_partial_forced
++;
10162 hwcksum_dbg_partial_forced_bytes
+= m
->m_pkthdr
.len
;
10166 * Partial checksum offload verification (and adjustment);
10167 * useful to validate and test cases where the hardware
10168 * supports partial checksum offload.
10170 if ((m
->m_pkthdr
.csum_flags
&
10171 (CSUM_DATA_VALID
| CSUM_PARTIAL
| CSUM_PSEUDO_HDR
)) ==
10172 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
10175 /* Start offset must begin after frame header */
10176 rxoff
= m
->m_pkthdr
.csum_rx_start
;
10177 if (hlen
> rxoff
) {
10178 hwcksum_dbg_bad_rxoff
++;
10179 if (dlil_verbose
) {
10180 DLIL_PRINTF("%s: partial cksum start offset %d "
10181 "is less than frame header length %d for "
10182 "mbuf 0x%llx\n", if_name(ifp
), rxoff
, hlen
,
10183 (uint64_t)VM_KERNEL_ADDRPERM(m
));
10189 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
)) {
10191 * Compute the expected 16-bit 1's complement sum;
10192 * skip this if we've already computed it above
10193 * when partial checksum offload is forced.
10195 sum
= m_sum16(m
, rxoff
, (m
->m_pkthdr
.len
- rxoff
));
10197 /* Hardware or driver is buggy */
10198 if (sum
!= m
->m_pkthdr
.csum_rx_val
) {
10199 hwcksum_dbg_bad_cksum
++;
10200 if (dlil_verbose
) {
10201 DLIL_PRINTF("%s: bad partial cksum value "
10202 "0x%x (expected 0x%x) for mbuf "
10203 "0x%llx [rx_start %d]\n",
10205 m
->m_pkthdr
.csum_rx_val
, sum
,
10206 (uint64_t)VM_KERNEL_ADDRPERM(m
),
10207 m
->m_pkthdr
.csum_rx_start
);
10212 hwcksum_dbg_verified
++;
10215 * This code allows us to emulate various hardwares that
10216 * perform 16-bit 1's complement sum beginning at various
10217 * start offset values.
10219 if (hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_RXOFF_ADJ
) {
10220 uint32_t aoff
= hwcksum_dbg_partial_rxoff_adj
;
10222 if (aoff
== rxoff
|| aoff
> (uint32_t)m
->m_pkthdr
.len
) {
10226 sum
= m_adj_sum16(m
, rxoff
, aoff
,
10227 m_pktlen(m
) - aoff
, sum
);
10229 m
->m_pkthdr
.csum_rx_val
= sum
;
10230 m
->m_pkthdr
.csum_rx_start
= (uint16_t)(aoff
+ hlen
);
10232 hwcksum_dbg_adjusted
++;
10238 sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
10240 #pragma unused(arg1, arg2)
10244 i
= hwcksum_dbg_mode
;
10246 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
10247 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
10251 if (hwcksum_dbg
== 0) {
10255 if ((i
& ~HWCKSUM_DBG_MASK
) != 0) {
10259 hwcksum_dbg_mode
= (i
& HWCKSUM_DBG_MASK
);
10265 sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
10267 #pragma unused(arg1, arg2)
10271 i
= hwcksum_dbg_partial_rxoff_forced
;
10273 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
10274 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
10278 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
)) {
10282 hwcksum_dbg_partial_rxoff_forced
= i
;
10288 sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
10290 #pragma unused(arg1, arg2)
10294 i
= hwcksum_dbg_partial_rxoff_adj
;
10296 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
10297 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
10301 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_RXOFF_ADJ
)) {
10305 hwcksum_dbg_partial_rxoff_adj
= i
;
10311 sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
10313 #pragma unused(oidp, arg1, arg2)
10316 if (req
->oldptr
== USER_ADDR_NULL
) {
10318 if (req
->newptr
!= USER_ADDR_NULL
) {
10321 err
= SYSCTL_OUT(req
, &tx_chain_len_stats
,
10322 sizeof(struct chain_len_stats
));
10328 #if DEBUG || DEVELOPMENT
10329 /* Blob for sum16 verification */
10330 static uint8_t sumdata
[] = {
10331 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
10332 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
10333 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
10334 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
10335 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
10336 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
10337 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
10338 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
10339 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
10340 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
10341 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
10342 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
10343 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
10344 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
10345 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
10346 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
10347 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
10348 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
10349 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
10350 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
10351 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
10352 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
10353 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
10354 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
10355 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
10356 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
10357 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
10358 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
10359 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
10360 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
10361 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
10362 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
10363 0xc8, 0x28, 0x02, 0x00, 0x00
10366 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
10370 uint16_t sumr
; /* reference */
10371 uint16_t sumrp
; /* reference, precomputed */
10373 { FALSE
, 0, 0, 0x0000 },
10374 { FALSE
, 1, 0, 0x001f },
10375 { FALSE
, 2, 0, 0x8b1f },
10376 { FALSE
, 3, 0, 0x8b27 },
10377 { FALSE
, 7, 0, 0x790e },
10378 { FALSE
, 11, 0, 0xcb6d },
10379 { FALSE
, 20, 0, 0x20dd },
10380 { FALSE
, 27, 0, 0xbabd },
10381 { FALSE
, 32, 0, 0xf3e8 },
10382 { FALSE
, 37, 0, 0x197d },
10383 { FALSE
, 43, 0, 0x9eae },
10384 { FALSE
, 64, 0, 0x4678 },
10385 { FALSE
, 127, 0, 0x9399 },
10386 { FALSE
, 256, 0, 0xd147 },
10387 { FALSE
, 325, 0, 0x0358 },
10389 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
10392 dlil_verify_sum16(void)
10398 /* Make sure test data plus extra room for alignment fits in cluster */
10399 _CASSERT((sizeof(sumdata
) + (sizeof(uint64_t) * 2)) <= MCLBYTES
);
10401 kprintf("DLIL: running SUM16 self-tests ... ");
10403 m
= m_getcl(M_WAITOK
, MT_DATA
, M_PKTHDR
);
10404 m_align(m
, sizeof(sumdata
) + (sizeof(uint64_t) * 2));
10406 buf
= mtod(m
, uint8_t *); /* base address */
10408 for (n
= 0; n
< SUMTBL_MAX
; n
++) {
10409 uint16_t len
= sumtbl
[n
].len
;
10412 /* Verify for all possible alignments */
10413 for (i
= 0; i
< (int)sizeof(uint64_t); i
++) {
10414 uint16_t sum
, sumr
;
10417 /* Copy over test data to mbuf */
10418 VERIFY(len
<= sizeof(sumdata
));
10420 bcopy(sumdata
, c
, len
);
10422 /* Zero-offset test (align by data pointer) */
10423 m
->m_data
= (caddr_t
)c
;
10425 sum
= m_sum16(m
, 0, len
);
10427 if (!sumtbl
[n
].init
) {
10428 sumr
= (uint16_t)in_cksum_mbuf_ref(m
, len
, 0, 0);
10429 sumtbl
[n
].sumr
= sumr
;
10430 sumtbl
[n
].init
= TRUE
;
10432 sumr
= sumtbl
[n
].sumr
;
10435 /* Something is horribly broken; stop now */
10436 if (sumr
!= sumtbl
[n
].sumrp
) {
10437 panic_plain("\n%s: broken in_cksum_mbuf_ref() "
10438 "for len=%d align=%d sum=0x%04x "
10439 "[expected=0x%04x]\n", __func__
,
10440 len
, i
, sum
, sumr
);
10442 } else if (sum
!= sumr
) {
10443 panic_plain("\n%s: broken m_sum16() for len=%d "
10444 "align=%d sum=0x%04x [expected=0x%04x]\n",
10445 __func__
, len
, i
, sum
, sumr
);
10449 /* Alignment test by offset (fixed data pointer) */
10450 m
->m_data
= (caddr_t
)buf
;
10451 m
->m_len
= i
+ len
;
10452 sum
= m_sum16(m
, i
, len
);
10454 /* Something is horribly broken; stop now */
10456 panic_plain("\n%s: broken m_sum16() for len=%d "
10457 "offset=%d sum=0x%04x [expected=0x%04x]\n",
10458 __func__
, len
, i
, sum
, sumr
);
10462 /* Simple sum16 contiguous buffer test by aligment */
10463 sum
= b_sum16(c
, len
);
10465 /* Something is horribly broken; stop now */
10467 panic_plain("\n%s: broken b_sum16() for len=%d "
10468 "align=%d sum=0x%04x [expected=0x%04x]\n",
10469 __func__
, len
, i
, sum
, sumr
);
10477 kprintf("PASSED\n");
10479 #endif /* DEBUG || DEVELOPMENT */
10481 #define CASE_STRINGIFY(x) case x: return #x
10483 __private_extern__
const char *
10484 dlil_kev_dl_code_str(u_int32_t event_code
)
10486 switch (event_code
) {
10487 CASE_STRINGIFY(KEV_DL_SIFFLAGS
);
10488 CASE_STRINGIFY(KEV_DL_SIFMETRICS
);
10489 CASE_STRINGIFY(KEV_DL_SIFMTU
);
10490 CASE_STRINGIFY(KEV_DL_SIFPHYS
);
10491 CASE_STRINGIFY(KEV_DL_SIFMEDIA
);
10492 CASE_STRINGIFY(KEV_DL_SIFGENERIC
);
10493 CASE_STRINGIFY(KEV_DL_ADDMULTI
);
10494 CASE_STRINGIFY(KEV_DL_DELMULTI
);
10495 CASE_STRINGIFY(KEV_DL_IF_ATTACHED
);
10496 CASE_STRINGIFY(KEV_DL_IF_DETACHING
);
10497 CASE_STRINGIFY(KEV_DL_IF_DETACHED
);
10498 CASE_STRINGIFY(KEV_DL_LINK_OFF
);
10499 CASE_STRINGIFY(KEV_DL_LINK_ON
);
10500 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED
);
10501 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED
);
10502 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED
);
10503 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED
);
10504 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT
);
10505 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED
);
10506 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED
);
10507 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE
);
10508 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE
);
10509 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED
);
10510 CASE_STRINGIFY(KEV_DL_ISSUES
);
10511 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED
);
10519 dlil_dt_tcall_fn(thread_call_param_t arg0
, thread_call_param_t arg1
)
10521 #pragma unused(arg1)
10522 struct ifnet
*ifp
= arg0
;
10524 if (ifnet_is_attached(ifp
, 1)) {
10525 nstat_ifnet_threshold_reached(ifp
->if_index
);
10526 ifnet_decr_iorefcnt(ifp
);
10531 ifnet_notify_data_threshold(struct ifnet
*ifp
)
10533 uint64_t bytes
= (ifp
->if_ibytes
+ ifp
->if_obytes
);
10534 uint64_t oldbytes
= ifp
->if_dt_bytes
;
10536 ASSERT(ifp
->if_dt_tcall
!= NULL
);
10539 * If we went over the threshold, notify NetworkStatistics.
10540 * We rate-limit it based on the threshold interval value.
10542 if (threshold_notify
&& (bytes
- oldbytes
) > ifp
->if_data_threshold
&&
10543 OSCompareAndSwap64(oldbytes
, bytes
, &ifp
->if_dt_bytes
) &&
10544 !thread_call_isactive(ifp
->if_dt_tcall
)) {
10545 uint64_t tival
= (threshold_interval
* NSEC_PER_SEC
);
10546 uint64_t now
= mach_absolute_time(), deadline
= now
;
10550 nanoseconds_to_absolutetime(tival
, &ival
);
10551 clock_deadline_for_periodic_event(ival
, now
, &deadline
);
10552 (void) thread_call_enter_delayed(ifp
->if_dt_tcall
,
10555 (void) thread_call_enter(ifp
->if_dt_tcall
);
10560 #if (DEVELOPMENT || DEBUG)
10562 * The sysctl variable name contains the input parameters of
10563 * ifnet_get_keepalive_offload_frames()
10564 * ifp (interface index): name[0]
10565 * frames_array_count: name[1]
10566 * frame_data_offset: name[2]
10567 * The return length gives used_frames_count
10570 sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
10572 #pragma unused(oidp)
10573 int *name
= (int *)arg1
;
10574 u_int namelen
= arg2
;
10576 ifnet_t ifp
= NULL
;
10577 u_int32_t frames_array_count
;
10578 size_t frame_data_offset
;
10579 u_int32_t used_frames_count
;
10580 struct ifnet_keepalive_offload_frame
*frames_array
= NULL
;
10585 * Only root can get look at other people TCP frames
10587 error
= proc_suser(current_proc());
10592 * Validate the input parameters
10594 if (req
->newptr
!= USER_ADDR_NULL
) {
10598 if (namelen
!= 3) {
10602 if (req
->oldptr
== USER_ADDR_NULL
) {
10606 if (req
->oldlen
== 0) {
10611 frames_array_count
= name
[1];
10612 frame_data_offset
= name
[2];
10614 /* Make sure the passed buffer is large enough */
10615 if (frames_array_count
* sizeof(struct ifnet_keepalive_offload_frame
) >
10621 ifnet_head_lock_shared();
10622 if (!IF_INDEX_IN_RANGE(idx
)) {
10627 ifp
= ifindex2ifnet
[idx
];
10630 frames_array
= _MALLOC(frames_array_count
*
10631 sizeof(struct ifnet_keepalive_offload_frame
), M_TEMP
, M_WAITOK
);
10632 if (frames_array
== NULL
) {
10637 error
= ifnet_get_keepalive_offload_frames(ifp
, frames_array
,
10638 frames_array_count
, frame_data_offset
, &used_frames_count
);
10640 DLIL_PRINTF("%s: ifnet_get_keepalive_offload_frames error %d\n",
10645 for (i
= 0; i
< used_frames_count
; i
++) {
10646 error
= SYSCTL_OUT(req
, frames_array
+ i
,
10647 sizeof(struct ifnet_keepalive_offload_frame
));
10653 if (frames_array
!= NULL
) {
10654 _FREE(frames_array
, M_TEMP
);
10658 #endif /* DEVELOPMENT || DEBUG */
10661 ifnet_update_stats_per_flow(struct ifnet_stats_per_flow
*ifs
,
10664 tcp_update_stats_per_flow(ifs
, ifp
);
10667 static inline u_int32_t
10668 _set_flags(u_int32_t
*flags_p
, u_int32_t set_flags
)
10670 return (u_int32_t
)OSBitOrAtomic(set_flags
, flags_p
);
10674 _clear_flags(u_int32_t
*flags_p
, u_int32_t clear_flags
)
10676 OSBitAndAtomic(~clear_flags
, flags_p
);
10679 __private_extern__ u_int32_t
10680 if_set_eflags(ifnet_t interface
, u_int32_t set_flags
)
10682 return _set_flags(&interface
->if_eflags
, set_flags
);
10685 __private_extern__
void
10686 if_clear_eflags(ifnet_t interface
, u_int32_t clear_flags
)
10688 _clear_flags(&interface
->if_eflags
, clear_flags
);
10691 __private_extern__ u_int32_t
10692 if_set_xflags(ifnet_t interface
, u_int32_t set_flags
)
10694 return _set_flags(&interface
->if_xflags
, set_flags
);
10697 __private_extern__
void
10698 if_clear_xflags(ifnet_t interface
, u_int32_t clear_flags
)
10700 _clear_flags(&interface
->if_xflags
, clear_flags
);