2 * Copyright (c) 1999-2018 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
41 #include <sys/socket.h>
42 #include <sys/domain.h>
44 #include <sys/random.h>
45 #include <sys/socketvar.h>
46 #include <net/if_dl.h>
48 #include <net/route.h>
49 #include <net/if_var.h>
51 #include <net/if_arp.h>
52 #include <net/iptap.h>
53 #include <net/pktap.h>
54 #include <sys/kern_event.h>
55 #include <sys/kdebug.h>
56 #include <sys/mcache.h>
57 #include <sys/syslog.h>
58 #include <sys/protosw.h>
61 #include <kern/assert.h>
62 #include <kern/task.h>
63 #include <kern/thread.h>
64 #include <kern/sched_prim.h>
65 #include <kern/locks.h>
66 #include <kern/zalloc.h>
68 #include <net/kpi_protocol.h>
69 #include <net/if_types.h>
70 #include <net/if_ipsec.h>
71 #include <net/if_llreach.h>
72 #include <net/if_utun.h>
73 #include <net/kpi_interfacefilter.h>
74 #include <net/classq/classq.h>
75 #include <net/classq/classq_sfb.h>
76 #include <net/flowhash.h>
77 #include <net/ntstat.h>
78 #include <net/if_llatbl.h>
79 #include <net/net_api_stats.h>
80 #include <net/if_ports_used.h>
81 #include <netinet/in.h>
83 #include <netinet/in_var.h>
84 #include <netinet/igmp_var.h>
85 #include <netinet/ip_var.h>
86 #include <netinet/tcp.h>
87 #include <netinet/tcp_var.h>
88 #include <netinet/udp.h>
89 #include <netinet/udp_var.h>
90 #include <netinet/if_ether.h>
91 #include <netinet/in_pcb.h>
92 #include <netinet/in_tclass.h>
93 #include <netinet/ip.h>
94 #include <netinet/ip_icmp.h>
95 #include <netinet/icmp_var.h>
99 #include <net/nat464_utils.h>
100 #include <netinet6/in6_var.h>
101 #include <netinet6/nd6.h>
102 #include <netinet6/mld6_var.h>
103 #include <netinet6/scope6_var.h>
104 #include <netinet/ip6.h>
105 #include <netinet/icmp6.h>
107 #include <net/pf_pbuf.h>
108 #include <libkern/OSAtomic.h>
109 #include <libkern/tree.h>
111 #include <dev/random/randomdev.h>
112 #include <machine/machine_routines.h>
114 #include <mach/thread_act.h>
115 #include <mach/sdt.h>
118 #include <sys/kauth.h>
119 #include <security/mac_framework.h>
120 #include <net/ethernet.h>
121 #include <net/firewire.h>
125 #include <net/pfvar.h>
127 #include <net/pktsched/pktsched.h>
130 #include <net/necp.h>
134 #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
135 #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
136 #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
137 #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
138 #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
140 #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
141 #define MAX_LINKADDR 4 /* LONGWORDS */
142 #define M_NKE M_IFADDR
145 #define DLIL_PRINTF printf
147 #define DLIL_PRINTF kprintf
150 #define IF_DATA_REQUIRE_ALIGNED_64(f) \
151 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
153 #define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
154 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
162 * List of if_proto structures in if_proto_hash[] is protected by
163 * the ifnet lock. The rest of the fields are initialized at protocol
164 * attach time and never change, thus no lock required as long as
165 * a reference to it is valid, via if_proto_ref().
168 SLIST_ENTRY(if_proto
) next_hash
;
172 protocol_family_t protocol_family
;
176 proto_media_input input
;
177 proto_media_preout pre_output
;
178 proto_media_event event
;
179 proto_media_ioctl ioctl
;
180 proto_media_detached detached
;
181 proto_media_resolve_multi resolve_multi
;
182 proto_media_send_arp send_arp
;
185 proto_media_input_v2 input
;
186 proto_media_preout pre_output
;
187 proto_media_event event
;
188 proto_media_ioctl ioctl
;
189 proto_media_detached detached
;
190 proto_media_resolve_multi resolve_multi
;
191 proto_media_send_arp send_arp
;
196 SLIST_HEAD(proto_hash_entry
, if_proto
);
198 #define DLIL_SDLDATALEN \
199 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
202 struct ifnet dl_if
; /* public ifnet */
204 * DLIL private fields, protected by dl_if_lock
206 decl_lck_mtx_data(, dl_if_lock
);
207 TAILQ_ENTRY(dlil_ifnet
) dl_if_link
; /* dlil_ifnet link */
208 u_int32_t dl_if_flags
; /* flags (below) */
209 u_int32_t dl_if_refcnt
; /* refcnt */
210 void (*dl_if_trace
)(struct dlil_ifnet
*, int); /* ref trace callback */
211 void *dl_if_uniqueid
; /* unique interface id */
212 size_t dl_if_uniqueid_len
; /* length of the unique id */
213 char dl_if_namestorage
[IFNAMSIZ
]; /* interface name storage */
214 char dl_if_xnamestorage
[IFXNAMSIZ
]; /* external name storage */
216 struct ifaddr ifa
; /* lladdr ifa */
217 u_int8_t asdl
[DLIL_SDLMAXLEN
]; /* addr storage */
218 u_int8_t msdl
[DLIL_SDLMAXLEN
]; /* mask storage */
220 u_int8_t dl_if_descstorage
[IF_DESCSIZE
]; /* desc storage */
221 struct dlil_threading_info dl_if_inpstorage
; /* input thread storage */
222 ctrace_t dl_if_attach
; /* attach PC stacktrace */
223 ctrace_t dl_if_detach
; /* detach PC stacktrace */
226 /* Values for dl_if_flags (private to DLIL) */
227 #define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
228 #define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
229 #define DLIF_DEBUG 0x4 /* has debugging info */
231 #define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
234 __private_extern__
unsigned int if_ref_trace_hist_size
= IF_REF_TRACE_HIST_SIZE
;
236 struct dlil_ifnet_dbg
{
237 struct dlil_ifnet dldbg_dlif
; /* dlil_ifnet */
238 u_int16_t dldbg_if_refhold_cnt
; /* # ifnet references */
239 u_int16_t dldbg_if_refrele_cnt
; /* # ifnet releases */
241 * Circular lists of ifnet_{reference,release} callers.
243 ctrace_t dldbg_if_refhold
[IF_REF_TRACE_HIST_SIZE
];
244 ctrace_t dldbg_if_refrele
[IF_REF_TRACE_HIST_SIZE
];
247 #define DLIL_TO_IFP(s) (&s->dl_if)
248 #define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
250 struct ifnet_filter
{
251 TAILQ_ENTRY(ifnet_filter
) filt_next
;
253 u_int32_t filt_flags
;
255 const char *filt_name
;
257 protocol_family_t filt_protocol
;
258 iff_input_func filt_input
;
259 iff_output_func filt_output
;
260 iff_event_func filt_event
;
261 iff_ioctl_func filt_ioctl
;
262 iff_detached_func filt_detached
;
265 struct proto_input_entry
;
267 static TAILQ_HEAD(, dlil_ifnet
) dlil_ifnet_head
;
268 static lck_grp_t
*dlil_lock_group
;
269 lck_grp_t
*ifnet_lock_group
;
270 static lck_grp_t
*ifnet_head_lock_group
;
271 static lck_grp_t
*ifnet_snd_lock_group
;
272 static lck_grp_t
*ifnet_rcv_lock_group
;
273 lck_attr_t
*ifnet_lock_attr
;
274 decl_lck_rw_data(static, ifnet_head_lock
);
275 decl_lck_mtx_data(static, dlil_ifnet_lock
);
276 u_int32_t dlil_filter_disable_tso_count
= 0;
279 static unsigned int ifnet_debug
= 1; /* debugging (enabled) */
281 static unsigned int ifnet_debug
; /* debugging (disabled) */
283 static unsigned int dlif_size
; /* size of dlil_ifnet to allocate */
284 static unsigned int dlif_bufsize
; /* size of dlif_size + headroom */
285 static struct zone
*dlif_zone
; /* zone for dlil_ifnet */
287 #define DLIF_ZONE_MAX IFNETS_MAX /* maximum elements in zone */
288 #define DLIF_ZONE_NAME "ifnet" /* zone name */
290 static unsigned int dlif_filt_size
; /* size of ifnet_filter */
291 static struct zone
*dlif_filt_zone
; /* zone for ifnet_filter */
293 #define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
294 #define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
296 static unsigned int dlif_phash_size
; /* size of ifnet proto hash table */
297 static struct zone
*dlif_phash_zone
; /* zone for ifnet proto hash table */
299 #define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
300 #define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
302 static unsigned int dlif_proto_size
; /* size of if_proto */
303 static struct zone
*dlif_proto_zone
; /* zone for if_proto */
305 #define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
306 #define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
308 static unsigned int dlif_tcpstat_size
; /* size of tcpstat_local to allocate */
309 static unsigned int dlif_tcpstat_bufsize
; /* size of dlif_tcpstat_size + headroom */
310 static struct zone
*dlif_tcpstat_zone
; /* zone for tcpstat_local */
312 #define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
313 #define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
315 static unsigned int dlif_udpstat_size
; /* size of udpstat_local to allocate */
316 static unsigned int dlif_udpstat_bufsize
; /* size of dlif_udpstat_size + headroom */
317 static struct zone
*dlif_udpstat_zone
; /* zone for udpstat_local */
319 #define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
320 #define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
322 static u_int32_t net_rtref
;
324 static struct dlil_main_threading_info dlil_main_input_thread_info
;
325 __private_extern__
struct dlil_threading_info
*dlil_main_input_thread
=
326 (struct dlil_threading_info
*)&dlil_main_input_thread_info
;
328 static int dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*msg
, bool update_generation
);
329 static int dlil_detach_filter_internal(interface_filter_t filter
, int detached
);
330 static void dlil_if_trace(struct dlil_ifnet
*, int);
331 static void if_proto_ref(struct if_proto
*);
332 static void if_proto_free(struct if_proto
*);
333 static struct if_proto
*find_attached_proto(struct ifnet
*, u_int32_t
);
334 static u_int32_t
dlil_ifp_protolist(struct ifnet
*ifp
, protocol_family_t
*list
,
335 u_int32_t list_count
);
336 static void if_flt_monitor_busy(struct ifnet
*);
337 static void if_flt_monitor_unbusy(struct ifnet
*);
338 static void if_flt_monitor_enter(struct ifnet
*);
339 static void if_flt_monitor_leave(struct ifnet
*);
340 static int dlil_interface_filters_input(struct ifnet
*, struct mbuf
**,
341 char **, protocol_family_t
);
342 static int dlil_interface_filters_output(struct ifnet
*, struct mbuf
**,
344 static struct ifaddr
*dlil_alloc_lladdr(struct ifnet
*,
345 const struct sockaddr_dl
*);
346 static int ifnet_lookup(struct ifnet
*);
347 static void if_purgeaddrs(struct ifnet
*);
349 static errno_t
ifproto_media_input_v1(struct ifnet
*, protocol_family_t
,
350 struct mbuf
*, char *);
351 static errno_t
ifproto_media_input_v2(struct ifnet
*, protocol_family_t
,
353 static errno_t
ifproto_media_preout(struct ifnet
*, protocol_family_t
,
354 mbuf_t
*, const struct sockaddr
*, void *, char *, char *);
355 static void ifproto_media_event(struct ifnet
*, protocol_family_t
,
356 const struct kev_msg
*);
357 static errno_t
ifproto_media_ioctl(struct ifnet
*, protocol_family_t
,
358 unsigned long, void *);
359 static errno_t
ifproto_media_resolve_multi(ifnet_t
, const struct sockaddr
*,
360 struct sockaddr_dl
*, size_t);
361 static errno_t
ifproto_media_send_arp(struct ifnet
*, u_short
,
362 const struct sockaddr_dl
*, const struct sockaddr
*,
363 const struct sockaddr_dl
*, const struct sockaddr
*);
365 static errno_t
ifp_if_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
366 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
367 boolean_t poll
, struct thread
*tp
);
368 static void ifp_if_input_poll(struct ifnet
*, u_int32_t
, u_int32_t
,
369 struct mbuf
**, struct mbuf
**, u_int32_t
*, u_int32_t
*);
370 static errno_t
ifp_if_ctl(struct ifnet
*, ifnet_ctl_cmd_t
, u_int32_t
, void *);
371 static errno_t
ifp_if_demux(struct ifnet
*, struct mbuf
*, char *,
372 protocol_family_t
*);
373 static errno_t
ifp_if_add_proto(struct ifnet
*, protocol_family_t
,
374 const struct ifnet_demux_desc
*, u_int32_t
);
375 static errno_t
ifp_if_del_proto(struct ifnet
*, protocol_family_t
);
376 static errno_t
ifp_if_check_multi(struct ifnet
*, const struct sockaddr
*);
378 static errno_t
ifp_if_framer(struct ifnet
*, struct mbuf
**,
379 const struct sockaddr
*, const char *, const char *,
380 u_int32_t
*, u_int32_t
*);
382 static errno_t
ifp_if_framer(struct ifnet
*, struct mbuf
**,
383 const struct sockaddr
*, const char *, const char *);
384 #endif /* CONFIG_EMBEDDED */
385 static errno_t
ifp_if_framer_extended(struct ifnet
*, struct mbuf
**,
386 const struct sockaddr
*, const char *, const char *,
387 u_int32_t
*, u_int32_t
*);
388 static errno_t
ifp_if_set_bpf_tap(struct ifnet
*, bpf_tap_mode
, bpf_packet_func
);
389 static void ifp_if_free(struct ifnet
*);
390 static void ifp_if_event(struct ifnet
*, const struct kev_msg
*);
391 static __inline
void ifp_inc_traffic_class_in(struct ifnet
*, struct mbuf
*);
392 static __inline
void ifp_inc_traffic_class_out(struct ifnet
*, struct mbuf
*);
394 static void dlil_main_input_thread_func(void *, wait_result_t
);
395 static void dlil_input_thread_func(void *, wait_result_t
);
396 static void dlil_rxpoll_input_thread_func(void *, wait_result_t
);
397 static int dlil_create_input_thread(ifnet_t
, struct dlil_threading_info
*);
398 static void dlil_terminate_input_thread(struct dlil_threading_info
*);
399 static void dlil_input_stats_add(const struct ifnet_stat_increment_param
*,
400 struct dlil_threading_info
*, boolean_t
);
401 static void dlil_input_stats_sync(struct ifnet
*, struct dlil_threading_info
*);
402 static void dlil_input_packet_list_common(struct ifnet
*, struct mbuf
*,
403 u_int32_t
, ifnet_model_t
, boolean_t
);
404 static errno_t
ifnet_input_common(struct ifnet
*, struct mbuf
*, struct mbuf
*,
405 const struct ifnet_stat_increment_param
*, boolean_t
, boolean_t
);
406 static int dlil_is_clat_needed(protocol_family_t
, mbuf_t
);
407 static errno_t
dlil_clat46(ifnet_t
, protocol_family_t
*, mbuf_t
*);
408 static errno_t
dlil_clat64(ifnet_t
, protocol_family_t
*, mbuf_t
*);
409 #if DEBUG || DEVELOPMENT
410 static void dlil_verify_sum16(void);
411 #endif /* DEBUG || DEVELOPMENT */
412 static void dlil_output_cksum_dbg(struct ifnet
*, struct mbuf
*, uint32_t,
414 static void dlil_input_cksum_dbg(struct ifnet
*, struct mbuf
*, char *,
417 static void ifnet_detacher_thread_func(void *, wait_result_t
);
418 static int ifnet_detacher_thread_cont(int);
419 static void ifnet_detach_final(struct ifnet
*);
420 static void ifnet_detaching_enqueue(struct ifnet
*);
421 static struct ifnet
*ifnet_detaching_dequeue(void);
423 static void ifnet_start_thread_fn(void *, wait_result_t
);
424 static void ifnet_poll_thread_fn(void *, wait_result_t
);
425 static void ifnet_poll(struct ifnet
*);
426 static errno_t
ifnet_enqueue_common(struct ifnet
*, void *,
427 classq_pkt_type_t
, boolean_t
, boolean_t
*);
429 static void ifp_src_route_copyout(struct ifnet
*, struct route
*);
430 static void ifp_src_route_copyin(struct ifnet
*, struct route
*);
432 static void ifp_src_route6_copyout(struct ifnet
*, struct route_in6
*);
433 static void ifp_src_route6_copyin(struct ifnet
*, struct route_in6
*);
436 static int sysctl_rxpoll SYSCTL_HANDLER_ARGS
;
437 static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
;
438 static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
;
439 static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
;
440 static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
;
441 static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
;
442 static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
;
443 static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
;
444 static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
;
445 static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
;
446 static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
;
448 struct chain_len_stats tx_chain_len_stats
;
449 static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
;
451 #if TEST_INPUT_THREAD_TERMINATION
452 static int sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
;
453 #endif /* TEST_INPUT_THREAD_TERMINATION */
455 /* The following are protected by dlil_ifnet_lock */
456 static TAILQ_HEAD(, ifnet
) ifnet_detaching_head
;
457 static u_int32_t ifnet_detaching_cnt
;
458 static void *ifnet_delayed_run
; /* wait channel for detaching thread */
460 decl_lck_mtx_data(static, ifnet_fc_lock
);
462 static uint32_t ifnet_flowhash_seed
;
464 struct ifnet_flowhash_key
{
465 char ifk_name
[IFNAMSIZ
];
469 uint32_t ifk_capabilities
;
470 uint32_t ifk_capenable
;
471 uint32_t ifk_output_sched_model
;
476 /* Flow control entry per interface */
477 struct ifnet_fc_entry
{
478 RB_ENTRY(ifnet_fc_entry
) ifce_entry
;
479 u_int32_t ifce_flowhash
;
480 struct ifnet
*ifce_ifp
;
483 static uint32_t ifnet_calc_flowhash(struct ifnet
*);
484 static int ifce_cmp(const struct ifnet_fc_entry
*,
485 const struct ifnet_fc_entry
*);
486 static int ifnet_fc_add(struct ifnet
*);
487 static struct ifnet_fc_entry
*ifnet_fc_get(u_int32_t
);
488 static void ifnet_fc_entry_free(struct ifnet_fc_entry
*);
490 /* protected by ifnet_fc_lock */
491 RB_HEAD(ifnet_fc_tree
, ifnet_fc_entry
) ifnet_fc_tree
;
492 RB_PROTOTYPE(ifnet_fc_tree
, ifnet_fc_entry
, ifce_entry
, ifce_cmp
);
493 RB_GENERATE(ifnet_fc_tree
, ifnet_fc_entry
, ifce_entry
, ifce_cmp
);
495 static unsigned int ifnet_fc_zone_size
; /* sizeof ifnet_fc_entry */
496 static struct zone
*ifnet_fc_zone
; /* ifnet_fc_entry zone */
498 #define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
499 #define IFNET_FC_ZONE_MAX 32
501 extern void bpfdetach(struct ifnet
*);
502 extern void proto_input_run(void);
504 extern uint32_t udp_count_opportunistic(unsigned int ifindex
,
506 extern uint32_t tcp_count_opportunistic(unsigned int ifindex
,
509 __private_extern__
void link_rtrequest(int, struct rtentry
*, struct sockaddr
*);
512 #ifdef CONFIG_EMBEDDED
513 int dlil_lladdr_ckreq
= 1;
515 int dlil_lladdr_ckreq
= 0;
520 int dlil_verbose
= 1;
522 int dlil_verbose
= 0;
524 #if IFNET_INPUT_SANITY_CHK
525 /* sanity checking of input packet lists received */
526 static u_int32_t dlil_input_sanity_check
= 0;
527 #endif /* IFNET_INPUT_SANITY_CHK */
528 /* rate limit debug messages */
529 struct timespec dlil_dbgrate
= { 1, 0 };
531 SYSCTL_DECL(_net_link_generic_system
);
533 SYSCTL_INT(_net_link_generic_system
, OID_AUTO
, dlil_verbose
,
534 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_verbose
, 0, "Log DLIL error messages");
536 #define IF_SNDQ_MINLEN 32
537 u_int32_t if_sndq_maxlen
= IFQ_MAXLEN
;
538 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, sndq_maxlen
,
539 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_sndq_maxlen
, IFQ_MAXLEN
,
540 sysctl_sndq_maxlen
, "I", "Default transmit queue max length");
542 #define IF_RCVQ_MINLEN 32
543 #define IF_RCVQ_MAXLEN 256
544 u_int32_t if_rcvq_maxlen
= IF_RCVQ_MAXLEN
;
545 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rcvq_maxlen
,
546 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rcvq_maxlen
, IFQ_MAXLEN
,
547 sysctl_rcvq_maxlen
, "I", "Default receive queue max length");
549 #define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
550 static u_int32_t if_rxpoll_decay
= IF_RXPOLL_DECAY
;
551 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_decay
,
552 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_decay
, IF_RXPOLL_DECAY
,
553 "ilog2 of EWMA decay rate of avg inbound packets");
555 #define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
556 #define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
557 static u_int64_t if_rxpoll_mode_holdtime
= IF_RXPOLL_MODE_HOLDTIME
;
558 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_freeze_time
,
559 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_mode_holdtime
,
560 IF_RXPOLL_MODE_HOLDTIME
, sysctl_rxpoll_mode_holdtime
,
561 "Q", "input poll mode freeze time");
563 #define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
564 #define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
565 static u_int64_t if_rxpoll_sample_holdtime
= IF_RXPOLL_SAMPLETIME
;
566 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_sample_time
,
567 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_sample_holdtime
,
568 IF_RXPOLL_SAMPLETIME
, sysctl_rxpoll_sample_holdtime
,
569 "Q", "input poll sampling time");
571 #define IF_RXPOLL_INTERVALTIME_MIN (1ULL * 1000) /* 1 us */
572 #define IF_RXPOLL_INTERVALTIME (1ULL * 1000 * 1000) /* 1 ms */
573 static u_int64_t if_rxpoll_interval_time
= IF_RXPOLL_INTERVALTIME
;
574 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_interval_time
,
575 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_interval_time
,
576 IF_RXPOLL_INTERVALTIME
, sysctl_rxpoll_interval_time
,
577 "Q", "input poll interval (time)");
579 #define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
580 static u_int32_t if_rxpoll_interval_pkts
= IF_RXPOLL_INTERVAL_PKTS
;
581 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_interval_pkts
,
582 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_interval_pkts
,
583 IF_RXPOLL_INTERVAL_PKTS
, "input poll interval (packets)");
585 #define IF_RXPOLL_WLOWAT 10
586 static u_int32_t if_rxpoll_wlowat
= IF_RXPOLL_WLOWAT
;
587 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_wakeups_lowat
,
588 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_wlowat
,
589 IF_RXPOLL_WLOWAT
, sysctl_rxpoll_wlowat
,
590 "I", "input poll wakeup low watermark");
592 #define IF_RXPOLL_WHIWAT 100
593 static u_int32_t if_rxpoll_whiwat
= IF_RXPOLL_WHIWAT
;
594 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_wakeups_hiwat
,
595 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_whiwat
,
596 IF_RXPOLL_WHIWAT
, sysctl_rxpoll_whiwat
,
597 "I", "input poll wakeup high watermark");
599 static u_int32_t if_rxpoll_max
= 0; /* 0 (automatic) */
600 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_max
,
601 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_max
, 0,
602 "max packets per poll call");
604 static u_int32_t if_rxpoll
= 1;
605 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll
,
606 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll
, 0,
607 sysctl_rxpoll
, "I", "enable opportunistic input polling");
609 #if TEST_INPUT_THREAD_TERMINATION
610 static u_int32_t if_input_thread_termination_spin
= 0;
611 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, input_thread_termination_spin
,
612 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
613 &if_input_thread_termination_spin
, 0,
614 sysctl_input_thread_termination_spin
,
615 "I", "input thread termination spin limit");
616 #endif /* TEST_INPUT_THREAD_TERMINATION */
618 static u_int32_t cur_dlil_input_threads
= 0;
619 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, dlil_input_threads
,
620 CTLFLAG_RD
| CTLFLAG_LOCKED
, &cur_dlil_input_threads
, 0,
621 "Current number of DLIL input threads");
623 #if IFNET_INPUT_SANITY_CHK
624 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, dlil_input_sanity_check
,
625 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_input_sanity_check
, 0,
626 "Turn on sanity checking in DLIL input");
627 #endif /* IFNET_INPUT_SANITY_CHK */
629 static u_int32_t if_flowadv
= 1;
630 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, flow_advisory
,
631 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_flowadv
, 1,
632 "enable flow-advisory mechanism");
634 static u_int32_t if_delaybased_queue
= 1;
635 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, delaybased_queue
,
636 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_delaybased_queue
, 1,
637 "enable delay based dynamic queue sizing");
639 static uint64_t hwcksum_in_invalidated
= 0;
640 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
641 hwcksum_in_invalidated
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
642 &hwcksum_in_invalidated
, "inbound packets with invalidated hardware cksum");
644 uint32_t hwcksum_dbg
= 0;
645 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg
,
646 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg
, 0,
647 "enable hardware cksum debugging");
649 u_int32_t ifnet_start_delayed
= 0;
650 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, start_delayed
,
651 CTLFLAG_RW
| CTLFLAG_LOCKED
, &ifnet_start_delayed
, 0,
652 "number of times start was delayed");
654 u_int32_t ifnet_delay_start_disabled
= 0;
655 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, start_delay_disabled
,
656 CTLFLAG_RW
| CTLFLAG_LOCKED
, &ifnet_delay_start_disabled
, 0,
657 "number of times start was delayed");
659 #define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
660 #define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
661 #define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
662 #define HWCKSUM_DBG_MASK \
663 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
664 HWCKSUM_DBG_FINALIZE_FORCED)
666 static uint32_t hwcksum_dbg_mode
= 0;
667 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg_mode
,
668 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg_mode
,
669 0, sysctl_hwcksum_dbg_mode
, "I", "hardware cksum debugging mode");
671 static uint64_t hwcksum_dbg_partial_forced
= 0;
672 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
673 hwcksum_dbg_partial_forced
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
674 &hwcksum_dbg_partial_forced
, "packets forced using partial cksum");
676 static uint64_t hwcksum_dbg_partial_forced_bytes
= 0;
677 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
678 hwcksum_dbg_partial_forced_bytes
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
679 &hwcksum_dbg_partial_forced_bytes
, "bytes forced using partial cksum");
681 static uint32_t hwcksum_dbg_partial_rxoff_forced
= 0;
682 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
,
683 hwcksum_dbg_partial_rxoff_forced
, CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
684 &hwcksum_dbg_partial_rxoff_forced
, 0,
685 sysctl_hwcksum_dbg_partial_rxoff_forced
, "I",
686 "forced partial cksum rx offset");
688 static uint32_t hwcksum_dbg_partial_rxoff_adj
= 0;
689 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg_partial_rxoff_adj
,
690 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg_partial_rxoff_adj
,
691 0, sysctl_hwcksum_dbg_partial_rxoff_adj
, "I",
692 "adjusted partial cksum rx offset");
694 static uint64_t hwcksum_dbg_verified
= 0;
695 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
696 hwcksum_dbg_verified
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
697 &hwcksum_dbg_verified
, "packets verified for having good checksum");
699 static uint64_t hwcksum_dbg_bad_cksum
= 0;
700 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
701 hwcksum_dbg_bad_cksum
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
702 &hwcksum_dbg_bad_cksum
, "packets with bad hardware calculated checksum");
704 static uint64_t hwcksum_dbg_bad_rxoff
= 0;
705 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
706 hwcksum_dbg_bad_rxoff
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
707 &hwcksum_dbg_bad_rxoff
, "packets with invalid rxoff");
709 static uint64_t hwcksum_dbg_adjusted
= 0;
710 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
711 hwcksum_dbg_adjusted
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
712 &hwcksum_dbg_adjusted
, "packets with rxoff adjusted");
714 static uint64_t hwcksum_dbg_finalized_hdr
= 0;
715 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
716 hwcksum_dbg_finalized_hdr
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
717 &hwcksum_dbg_finalized_hdr
, "finalized headers");
719 static uint64_t hwcksum_dbg_finalized_data
= 0;
720 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
721 hwcksum_dbg_finalized_data
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
722 &hwcksum_dbg_finalized_data
, "finalized payloads");
724 uint32_t hwcksum_tx
= 1;
725 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_tx
,
726 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_tx
, 0,
727 "enable transmit hardware checksum offload");
729 uint32_t hwcksum_rx
= 1;
730 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_rx
,
731 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_rx
, 0,
732 "enable receive hardware checksum offload");
734 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, tx_chain_len_stats
,
735 CTLFLAG_RD
| CTLFLAG_LOCKED
, 0, 9,
736 sysctl_tx_chain_len_stats
, "S", "");
738 uint32_t tx_chain_len_count
= 0;
739 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, tx_chain_len_count
,
740 CTLFLAG_RW
| CTLFLAG_LOCKED
, &tx_chain_len_count
, 0, "");
742 static uint32_t threshold_notify
= 1; /* enable/disable */
743 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, threshold_notify
,
744 CTLFLAG_RW
| CTLFLAG_LOCKED
, &threshold_notify
, 0, "");
746 static uint32_t threshold_interval
= 2; /* in seconds */
747 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, threshold_interval
,
748 CTLFLAG_RW
| CTLFLAG_LOCKED
, &threshold_interval
, 0, "");
750 #if (DEVELOPMENT || DEBUG)
751 static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
;
752 SYSCTL_NODE(_net_link_generic_system
, OID_AUTO
, get_kao_frames
,
753 CTLFLAG_RD
| CTLFLAG_LOCKED
, sysctl_get_kao_frames
, "");
754 #endif /* DEVELOPMENT || DEBUG */
756 struct net_api_stats net_api_stats
;
757 SYSCTL_STRUCT(_net
, OID_AUTO
, api_stats
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
758 &net_api_stats
, net_api_stats
, "");
761 unsigned int net_rxpoll
= 1;
762 unsigned int net_affinity
= 1;
763 static kern_return_t
dlil_affinity_set(struct thread
*, u_int32_t
);
765 extern u_int32_t inject_buckets
;
767 static lck_grp_attr_t
*dlil_grp_attributes
= NULL
;
768 static lck_attr_t
*dlil_lck_attributes
= NULL
;
770 /* DLIL data threshold thread call */
771 static void dlil_dt_tcall_fn(thread_call_param_t
, thread_call_param_t
);
773 static void dlil_mit_tcall_fn(thread_call_param_t
, thread_call_param_t
);
775 uint32_t dlil_rcv_mit_pkts_min
= 5;
776 uint32_t dlil_rcv_mit_pkts_max
= 64;
777 uint32_t dlil_rcv_mit_interval
= (500 * 1000);
779 #if (DEVELOPMENT || DEBUG)
780 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rcv_mit_pkts_min
,
781 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_rcv_mit_pkts_min
, 0, "");
782 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rcv_mit_pkts_max
,
783 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_rcv_mit_pkts_max
, 0, "");
784 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rcv_mit_interval
,
785 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_rcv_mit_interval
, 0, "");
786 #endif /* DEVELOPMENT || DEBUG */
789 #define DLIL_INPUT_CHECK(m, ifp) { \
790 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
791 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
792 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
793 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
798 #define DLIL_EWMA(old, new, decay) do { \
800 if ((_avg = (old)) > 0) \
801 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
807 #define MBPS (1ULL * 1000 * 1000)
808 #define GBPS (MBPS * 1000)
810 struct rxpoll_time_tbl
{
811 u_int64_t speed
; /* downlink speed */
812 u_int32_t plowat
; /* packets low watermark */
813 u_int32_t phiwat
; /* packets high watermark */
814 u_int32_t blowat
; /* bytes low watermark */
815 u_int32_t bhiwat
; /* bytes high watermark */
818 static struct rxpoll_time_tbl rxpoll_tbl
[] = {
819 { 10 * MBPS
, 2, 8, (1 * 1024), (6 * 1024) },
820 { 100 * MBPS
, 10, 40, (4 * 1024), (64 * 1024) },
821 { 1 * GBPS
, 10, 40, (4 * 1024), (64 * 1024) },
822 { 10 * GBPS
, 10, 40, (4 * 1024), (64 * 1024) },
823 { 100 * GBPS
, 10, 40, (4 * 1024), (64 * 1024) },
828 proto_hash_value(u_int32_t protocol_family
)
831 * dlil_proto_unplumb_all() depends on the mapping between
832 * the hash bucket index and the protocol family defined
833 * here; future changes must be applied there as well.
835 switch (protocol_family
) {
849 * Caller must already be holding ifnet lock.
851 static struct if_proto
*
852 find_attached_proto(struct ifnet
*ifp
, u_int32_t protocol_family
)
854 struct if_proto
*proto
= NULL
;
855 u_int32_t i
= proto_hash_value(protocol_family
);
857 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_OWNED
);
859 if (ifp
->if_proto_hash
!= NULL
)
860 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
862 while (proto
!= NULL
&& proto
->protocol_family
!= protocol_family
)
863 proto
= SLIST_NEXT(proto
, next_hash
);
872 if_proto_ref(struct if_proto
*proto
)
874 atomic_add_32(&proto
->refcount
, 1);
877 extern void if_rtproto_del(struct ifnet
*ifp
, int protocol
);
880 if_proto_free(struct if_proto
*proto
)
883 struct ifnet
*ifp
= proto
->ifp
;
884 u_int32_t proto_family
= proto
->protocol_family
;
885 struct kev_dl_proto_data ev_pr_data
;
887 oldval
= atomic_add_32_ov(&proto
->refcount
, -1);
891 /* No more reference on this, protocol must have been detached */
892 VERIFY(proto
->detached
);
894 if (proto
->proto_kpi
== kProtoKPI_v1
) {
895 if (proto
->kpi
.v1
.detached
)
896 proto
->kpi
.v1
.detached(ifp
, proto
->protocol_family
);
898 if (proto
->proto_kpi
== kProtoKPI_v2
) {
899 if (proto
->kpi
.v2
.detached
)
900 proto
->kpi
.v2
.detached(ifp
, proto
->protocol_family
);
904 * Cleanup routes that may still be in the routing table for that
905 * interface/protocol pair.
907 if_rtproto_del(ifp
, proto_family
);
910 * The reserved field carries the number of protocol still attached
911 * (subject to change)
913 ifnet_lock_shared(ifp
);
914 ev_pr_data
.proto_family
= proto_family
;
915 ev_pr_data
.proto_remaining_count
= dlil_ifp_protolist(ifp
, NULL
, 0);
916 ifnet_lock_done(ifp
);
918 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_DETACHED
,
919 (struct net_event_data
*)&ev_pr_data
,
920 sizeof (struct kev_dl_proto_data
));
922 if (ev_pr_data
.proto_remaining_count
== 0) {
924 * The protocol count has gone to zero, mark the interface down.
925 * This used to be done by configd.KernelEventMonitor, but that
926 * is inherently prone to races (rdar://problem/30810208).
928 (void) ifnet_set_flags(ifp
, 0, IFF_UP
);
929 (void) ifnet_ioctl(ifp
, 0, SIOCSIFFLAGS
, NULL
);
930 dlil_post_sifflags_msg(ifp
);
933 zfree(dlif_proto_zone
, proto
);
936 __private_extern__
void
937 ifnet_lock_assert(struct ifnet
*ifp
, ifnet_lock_assert_t what
)
942 unsigned int type
= 0;
946 case IFNET_LCK_ASSERT_EXCLUSIVE
:
947 type
= LCK_RW_ASSERT_EXCLUSIVE
;
950 case IFNET_LCK_ASSERT_SHARED
:
951 type
= LCK_RW_ASSERT_SHARED
;
954 case IFNET_LCK_ASSERT_OWNED
:
955 type
= LCK_RW_ASSERT_HELD
;
958 case IFNET_LCK_ASSERT_NOTOWNED
:
959 /* nothing to do here for RW lock; bypass assert */
964 panic("bad ifnet assert type: %d", what
);
968 LCK_RW_ASSERT(&ifp
->if_lock
, type
);
971 __private_extern__
void
972 ifnet_lock_shared(struct ifnet
*ifp
)
974 lck_rw_lock_shared(&ifp
->if_lock
);
977 __private_extern__
void
978 ifnet_lock_exclusive(struct ifnet
*ifp
)
980 lck_rw_lock_exclusive(&ifp
->if_lock
);
983 __private_extern__
void
984 ifnet_lock_done(struct ifnet
*ifp
)
986 lck_rw_done(&ifp
->if_lock
);
990 __private_extern__
void
991 if_inetdata_lock_shared(struct ifnet
*ifp
)
993 lck_rw_lock_shared(&ifp
->if_inetdata_lock
);
996 __private_extern__
void
997 if_inetdata_lock_exclusive(struct ifnet
*ifp
)
999 lck_rw_lock_exclusive(&ifp
->if_inetdata_lock
);
1002 __private_extern__
void
1003 if_inetdata_lock_done(struct ifnet
*ifp
)
1005 lck_rw_done(&ifp
->if_inetdata_lock
);
1010 __private_extern__
void
1011 if_inet6data_lock_shared(struct ifnet
*ifp
)
1013 lck_rw_lock_shared(&ifp
->if_inet6data_lock
);
1016 __private_extern__
void
1017 if_inet6data_lock_exclusive(struct ifnet
*ifp
)
1019 lck_rw_lock_exclusive(&ifp
->if_inet6data_lock
);
1022 __private_extern__
void
1023 if_inet6data_lock_done(struct ifnet
*ifp
)
1025 lck_rw_done(&ifp
->if_inet6data_lock
);
1029 __private_extern__
void
1030 ifnet_head_lock_shared(void)
1032 lck_rw_lock_shared(&ifnet_head_lock
);
1035 __private_extern__
void
1036 ifnet_head_lock_exclusive(void)
1038 lck_rw_lock_exclusive(&ifnet_head_lock
);
1041 __private_extern__
void
1042 ifnet_head_done(void)
1044 lck_rw_done(&ifnet_head_lock
);
1047 __private_extern__
void
1048 ifnet_head_assert_exclusive(void)
1050 LCK_RW_ASSERT(&ifnet_head_lock
, LCK_RW_ASSERT_EXCLUSIVE
);
1054 * dlil_ifp_protolist
1055 * - get the list of protocols attached to the interface, or just the number
1056 * of attached protocols
1057 * - if the number returned is greater than 'list_count', truncation occurred
1060 * - caller must already be holding ifnet lock.
1063 dlil_ifp_protolist(struct ifnet
*ifp
, protocol_family_t
*list
,
1064 u_int32_t list_count
)
1066 u_int32_t count
= 0;
1069 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_OWNED
);
1071 if (ifp
->if_proto_hash
== NULL
)
1074 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
1075 struct if_proto
*proto
;
1076 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
], next_hash
) {
1077 if (list
!= NULL
&& count
< list_count
) {
1078 list
[count
] = proto
->protocol_family
;
1087 __private_extern__ u_int32_t
1088 if_get_protolist(struct ifnet
* ifp
, u_int32_t
*protolist
, u_int32_t count
)
1090 ifnet_lock_shared(ifp
);
1091 count
= dlil_ifp_protolist(ifp
, protolist
, count
);
1092 ifnet_lock_done(ifp
);
1096 __private_extern__
void
1097 if_free_protolist(u_int32_t
*list
)
1099 _FREE(list
, M_TEMP
);
1102 __private_extern__
void
1103 dlil_post_msg(struct ifnet
*ifp
, u_int32_t event_subclass
,
1104 u_int32_t event_code
, struct net_event_data
*event_data
,
1105 u_int32_t event_data_len
)
1107 struct net_event_data ev_data
;
1108 struct kev_msg ev_msg
;
1110 bzero(&ev_msg
, sizeof (ev_msg
));
1111 bzero(&ev_data
, sizeof (ev_data
));
1113 * a net event always starts with a net_event_data structure
1114 * but the caller can generate a simple net event or
1115 * provide a longer event structure to post
1117 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
1118 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
1119 ev_msg
.kev_subclass
= event_subclass
;
1120 ev_msg
.event_code
= event_code
;
1122 if (event_data
== NULL
) {
1123 event_data
= &ev_data
;
1124 event_data_len
= sizeof (struct net_event_data
);
1127 strlcpy(&event_data
->if_name
[0], ifp
->if_name
, IFNAMSIZ
);
1128 event_data
->if_family
= ifp
->if_family
;
1129 event_data
->if_unit
= (u_int32_t
)ifp
->if_unit
;
1131 ev_msg
.dv
[0].data_length
= event_data_len
;
1132 ev_msg
.dv
[0].data_ptr
= event_data
;
1133 ev_msg
.dv
[1].data_length
= 0;
1135 /* Don't update interface generation for quality and RRC state changess */
1136 bool update_generation
= (event_subclass
!= KEV_DL_SUBCLASS
||
1137 (event_code
!= KEV_DL_LINK_QUALITY_METRIC_CHANGED
&&
1138 event_code
!= KEV_DL_RRC_STATE_CHANGED
));
1140 dlil_event_internal(ifp
, &ev_msg
, update_generation
);
1143 __private_extern__
int
1144 dlil_alloc_local_stats(struct ifnet
*ifp
)
1147 void *buf
, *base
, **pbuf
;
1152 if (ifp
->if_tcp_stat
== NULL
&& ifp
->if_udp_stat
== NULL
) {
1153 /* allocate tcpstat_local structure */
1154 buf
= zalloc(dlif_tcpstat_zone
);
1159 bzero(buf
, dlif_tcpstat_bufsize
);
1161 /* Get the 64-bit aligned base address for this object */
1162 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof (u_int64_t
),
1163 sizeof (u_int64_t
));
1164 VERIFY(((intptr_t)base
+ dlif_tcpstat_size
) <=
1165 ((intptr_t)buf
+ dlif_tcpstat_bufsize
));
1168 * Wind back a pointer size from the aligned base and
1169 * save the original address so we can free it later.
1171 pbuf
= (void **)((intptr_t)base
- sizeof (void *));
1173 ifp
->if_tcp_stat
= base
;
1175 /* allocate udpstat_local structure */
1176 buf
= zalloc(dlif_udpstat_zone
);
1181 bzero(buf
, dlif_udpstat_bufsize
);
1183 /* Get the 64-bit aligned base address for this object */
1184 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof (u_int64_t
),
1185 sizeof (u_int64_t
));
1186 VERIFY(((intptr_t)base
+ dlif_udpstat_size
) <=
1187 ((intptr_t)buf
+ dlif_udpstat_bufsize
));
1190 * Wind back a pointer size from the aligned base and
1191 * save the original address so we can free it later.
1193 pbuf
= (void **)((intptr_t)base
- sizeof (void *));
1195 ifp
->if_udp_stat
= base
;
1197 VERIFY(IS_P2ALIGNED(ifp
->if_tcp_stat
, sizeof (u_int64_t
)) &&
1198 IS_P2ALIGNED(ifp
->if_udp_stat
, sizeof (u_int64_t
)));
1203 if (ifp
->if_ipv4_stat
== NULL
) {
1204 MALLOC(ifp
->if_ipv4_stat
, struct if_tcp_ecn_stat
*,
1205 sizeof (struct if_tcp_ecn_stat
), M_TEMP
, M_WAITOK
|M_ZERO
);
1206 if (ifp
->if_ipv4_stat
== NULL
) {
1212 if (ifp
->if_ipv6_stat
== NULL
) {
1213 MALLOC(ifp
->if_ipv6_stat
, struct if_tcp_ecn_stat
*,
1214 sizeof (struct if_tcp_ecn_stat
), M_TEMP
, M_WAITOK
|M_ZERO
);
1215 if (ifp
->if_ipv6_stat
== NULL
) {
1222 if (ifp
->if_tcp_stat
!= NULL
) {
1224 ((intptr_t)ifp
->if_tcp_stat
- sizeof (void *));
1225 zfree(dlif_tcpstat_zone
, *pbuf
);
1226 ifp
->if_tcp_stat
= NULL
;
1228 if (ifp
->if_udp_stat
!= NULL
) {
1230 ((intptr_t)ifp
->if_udp_stat
- sizeof (void *));
1231 zfree(dlif_udpstat_zone
, *pbuf
);
1232 ifp
->if_udp_stat
= NULL
;
1234 if (ifp
->if_ipv4_stat
!= NULL
) {
1235 FREE(ifp
->if_ipv4_stat
, M_TEMP
);
1236 ifp
->if_ipv4_stat
= NULL
;
1238 if (ifp
->if_ipv6_stat
!= NULL
) {
1239 FREE(ifp
->if_ipv6_stat
, M_TEMP
);
1240 ifp
->if_ipv6_stat
= NULL
;
1248 dlil_create_input_thread(ifnet_t ifp
, struct dlil_threading_info
*inp
)
1250 thread_continue_t func
;
1254 /* NULL ifp indicates the main input thread, called at dlil_init time */
1256 func
= dlil_main_input_thread_func
;
1257 VERIFY(inp
== dlil_main_input_thread
);
1258 (void) strlcat(inp
->input_name
,
1259 "main_input", DLIL_THREADNAME_LEN
);
1260 } else if (net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) {
1261 func
= dlil_rxpoll_input_thread_func
;
1262 VERIFY(inp
!= dlil_main_input_thread
);
1263 (void) snprintf(inp
->input_name
, DLIL_THREADNAME_LEN
,
1264 "%s_input_poll", if_name(ifp
));
1266 func
= dlil_input_thread_func
;
1267 VERIFY(inp
!= dlil_main_input_thread
);
1268 (void) snprintf(inp
->input_name
, DLIL_THREADNAME_LEN
,
1269 "%s_input", if_name(ifp
));
1271 VERIFY(inp
->input_thr
== THREAD_NULL
);
1273 inp
->lck_grp
= lck_grp_alloc_init(inp
->input_name
, dlil_grp_attributes
);
1274 lck_mtx_init(&inp
->input_lck
, inp
->lck_grp
, dlil_lck_attributes
);
1276 inp
->mode
= IFNET_MODEL_INPUT_POLL_OFF
;
1277 inp
->ifp
= ifp
; /* NULL for main input thread */
1279 net_timerclear(&inp
->mode_holdtime
);
1280 net_timerclear(&inp
->mode_lasttime
);
1281 net_timerclear(&inp
->sample_holdtime
);
1282 net_timerclear(&inp
->sample_lasttime
);
1283 net_timerclear(&inp
->dbg_lasttime
);
1286 * For interfaces that support opportunistic polling, set the
1287 * low and high watermarks for outstanding inbound packets/bytes.
1288 * Also define freeze times for transitioning between modes
1289 * and updating the average.
1291 if (ifp
!= NULL
&& net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) {
1292 limit
= MAX(if_rcvq_maxlen
, IF_RCVQ_MINLEN
);
1293 (void) dlil_rxpoll_set_params(ifp
, NULL
, FALSE
);
1295 limit
= (u_int32_t
)-1;
1298 _qinit(&inp
->rcvq_pkts
, Q_DROPTAIL
, limit
, QP_MBUF
);
1299 if (inp
== dlil_main_input_thread
) {
1300 struct dlil_main_threading_info
*inpm
=
1301 (struct dlil_main_threading_info
*)inp
;
1302 _qinit(&inpm
->lo_rcvq_pkts
, Q_DROPTAIL
, limit
, QP_MBUF
);
1305 error
= kernel_thread_start(func
, inp
, &inp
->input_thr
);
1306 if (error
== KERN_SUCCESS
) {
1307 ml_thread_policy(inp
->input_thr
, MACHINE_GROUP
,
1308 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_NETISR
));
1310 * We create an affinity set so that the matching workloop
1311 * thread or the starter thread (for loopback) can be
1312 * scheduled on the same processor set as the input thread.
1315 struct thread
*tp
= inp
->input_thr
;
1318 * Randomize to reduce the probability
1319 * of affinity tag namespace collision.
1321 read_frandom(&tag
, sizeof (tag
));
1322 if (dlil_affinity_set(tp
, tag
) == KERN_SUCCESS
) {
1323 thread_reference(tp
);
1325 inp
->net_affinity
= TRUE
;
1328 } else if (inp
== dlil_main_input_thread
) {
1329 panic_plain("%s: couldn't create main input thread", __func__
);
1332 panic_plain("%s: couldn't create %s input thread", __func__
,
1336 OSAddAtomic(1, &cur_dlil_input_threads
);
1341 #if TEST_INPUT_THREAD_TERMINATION
1343 sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
1345 #pragma unused(arg1, arg2)
1349 i
= if_input_thread_termination_spin
;
1351 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
1352 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
1355 if (net_rxpoll
== 0)
1358 if_input_thread_termination_spin
= i
;
1361 #endif /* TEST_INPUT_THREAD_TERMINATION */
1364 dlil_clean_threading_info(struct dlil_threading_info
*inp
)
1366 lck_mtx_destroy(&inp
->input_lck
, inp
->lck_grp
);
1367 lck_grp_free(inp
->lck_grp
);
1369 inp
->input_waiting
= 0;
1371 bzero(inp
->input_name
, sizeof (inp
->input_name
));
1373 VERIFY(qhead(&inp
->rcvq_pkts
) == NULL
&& qempty(&inp
->rcvq_pkts
));
1374 qlimit(&inp
->rcvq_pkts
) = 0;
1375 bzero(&inp
->stats
, sizeof (inp
->stats
));
1377 VERIFY(!inp
->net_affinity
);
1378 inp
->input_thr
= THREAD_NULL
;
1379 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
1380 VERIFY(inp
->poll_thr
== THREAD_NULL
);
1381 VERIFY(inp
->tag
== 0);
1383 inp
->mode
= IFNET_MODEL_INPUT_POLL_OFF
;
1384 bzero(&inp
->tstats
, sizeof (inp
->tstats
));
1385 bzero(&inp
->pstats
, sizeof (inp
->pstats
));
1386 bzero(&inp
->sstats
, sizeof (inp
->sstats
));
1388 net_timerclear(&inp
->mode_holdtime
);
1389 net_timerclear(&inp
->mode_lasttime
);
1390 net_timerclear(&inp
->sample_holdtime
);
1391 net_timerclear(&inp
->sample_lasttime
);
1392 net_timerclear(&inp
->dbg_lasttime
);
1394 #if IFNET_INPUT_SANITY_CHK
1395 inp
->input_mbuf_cnt
= 0;
1396 #endif /* IFNET_INPUT_SANITY_CHK */
1400 dlil_terminate_input_thread(struct dlil_threading_info
*inp
)
1402 struct ifnet
*ifp
= inp
->ifp
;
1404 VERIFY(current_thread() == inp
->input_thr
);
1405 VERIFY(inp
!= dlil_main_input_thread
);
1407 OSAddAtomic(-1, &cur_dlil_input_threads
);
1409 #if TEST_INPUT_THREAD_TERMINATION
1410 { /* do something useless that won't get optimized away */
1412 for (uint32_t i
= 0;
1413 i
< if_input_thread_termination_spin
;
1417 printf("the value is %d\n", v
);
1419 #endif /* TEST_INPUT_THREAD_TERMINATION */
1421 lck_mtx_lock_spin(&inp
->input_lck
);
1422 VERIFY((inp
->input_waiting
& DLIL_INPUT_TERMINATE
) != 0);
1423 inp
->input_waiting
|= DLIL_INPUT_TERMINATE_COMPLETE
;
1424 wakeup_one((caddr_t
)&inp
->input_waiting
);
1425 lck_mtx_unlock(&inp
->input_lck
);
1427 /* for the extra refcnt from kernel_thread_start() */
1428 thread_deallocate(current_thread());
1431 printf("%s: input thread terminated\n",
1435 /* this is the end */
1436 thread_terminate(current_thread());
1440 static kern_return_t
1441 dlil_affinity_set(struct thread
*tp
, u_int32_t tag
)
1443 thread_affinity_policy_data_t policy
;
1445 bzero(&policy
, sizeof (policy
));
1446 policy
.affinity_tag
= tag
;
1447 return (thread_policy_set(tp
, THREAD_AFFINITY_POLICY
,
1448 (thread_policy_t
)&policy
, THREAD_AFFINITY_POLICY_COUNT
));
1454 thread_t thread
= THREAD_NULL
;
1457 * The following fields must be 64-bit aligned for atomic operations.
1459 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets
);
1460 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors
);
1461 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets
);
1462 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors
);
1463 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions
);
1464 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes
);
1465 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes
);
1466 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts
);
1467 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts
);
1468 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops
);
1469 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto
);
1470 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs
);
1471 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes
);
1472 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets
);
1473 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes
);
1475 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets
);
1476 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors
);
1477 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets
);
1478 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors
);
1479 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions
);
1480 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes
);
1481 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes
);
1482 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts
);
1483 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts
);
1484 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops
);
1485 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto
);
1486 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs
);
1487 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes
);
1488 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets
);
1489 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes
);
1492 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1494 _CASSERT(IF_HWASSIST_CSUM_IP
== IFNET_CSUM_IP
);
1495 _CASSERT(IF_HWASSIST_CSUM_TCP
== IFNET_CSUM_TCP
);
1496 _CASSERT(IF_HWASSIST_CSUM_UDP
== IFNET_CSUM_UDP
);
1497 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS
== IFNET_CSUM_FRAGMENT
);
1498 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT
== IFNET_IP_FRAGMENT
);
1499 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6
== IFNET_CSUM_TCPIPV6
);
1500 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6
== IFNET_CSUM_UDPIPV6
);
1501 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6
== IFNET_IPV6_FRAGMENT
);
1502 _CASSERT(IF_HWASSIST_CSUM_PARTIAL
== IFNET_CSUM_PARTIAL
);
1503 _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT
== IFNET_CSUM_ZERO_INVERT
);
1504 _CASSERT(IF_HWASSIST_VLAN_TAGGING
== IFNET_VLAN_TAGGING
);
1505 _CASSERT(IF_HWASSIST_VLAN_MTU
== IFNET_VLAN_MTU
);
1506 _CASSERT(IF_HWASSIST_TSO_V4
== IFNET_TSO_IPV4
);
1507 _CASSERT(IF_HWASSIST_TSO_V6
== IFNET_TSO_IPV6
);
1510 * ... as well as the mbuf checksum flags counterparts.
1512 _CASSERT(CSUM_IP
== IF_HWASSIST_CSUM_IP
);
1513 _CASSERT(CSUM_TCP
== IF_HWASSIST_CSUM_TCP
);
1514 _CASSERT(CSUM_UDP
== IF_HWASSIST_CSUM_UDP
);
1515 _CASSERT(CSUM_IP_FRAGS
== IF_HWASSIST_CSUM_IP_FRAGS
);
1516 _CASSERT(CSUM_FRAGMENT
== IF_HWASSIST_CSUM_FRAGMENT
);
1517 _CASSERT(CSUM_TCPIPV6
== IF_HWASSIST_CSUM_TCPIPV6
);
1518 _CASSERT(CSUM_UDPIPV6
== IF_HWASSIST_CSUM_UDPIPV6
);
1519 _CASSERT(CSUM_FRAGMENT_IPV6
== IF_HWASSIST_CSUM_FRAGMENT_IPV6
);
1520 _CASSERT(CSUM_PARTIAL
== IF_HWASSIST_CSUM_PARTIAL
);
1521 _CASSERT(CSUM_ZERO_INVERT
== IF_HWASSIST_CSUM_ZERO_INVERT
);
1522 _CASSERT(CSUM_VLAN_TAG_VALID
== IF_HWASSIST_VLAN_TAGGING
);
1525 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1527 _CASSERT(IF_LLREACH_MAXLEN
<= IF_LLREACHINFO_ADDRLEN
);
1528 _CASSERT(IFNET_LLREACHINFO_ADDRLEN
== IF_LLREACHINFO_ADDRLEN
);
1530 _CASSERT(IFRLOGF_DLIL
== IFNET_LOGF_DLIL
);
1531 _CASSERT(IFRLOGF_FAMILY
== IFNET_LOGF_FAMILY
);
1532 _CASSERT(IFRLOGF_DRIVER
== IFNET_LOGF_DRIVER
);
1533 _CASSERT(IFRLOGF_FIRMWARE
== IFNET_LOGF_FIRMWARE
);
1535 _CASSERT(IFRLOGCAT_CONNECTIVITY
== IFNET_LOGCAT_CONNECTIVITY
);
1536 _CASSERT(IFRLOGCAT_QUALITY
== IFNET_LOGCAT_QUALITY
);
1537 _CASSERT(IFRLOGCAT_PERFORMANCE
== IFNET_LOGCAT_PERFORMANCE
);
1539 _CASSERT(IFRTYPE_FAMILY_ANY
== IFNET_FAMILY_ANY
);
1540 _CASSERT(IFRTYPE_FAMILY_LOOPBACK
== IFNET_FAMILY_LOOPBACK
);
1541 _CASSERT(IFRTYPE_FAMILY_ETHERNET
== IFNET_FAMILY_ETHERNET
);
1542 _CASSERT(IFRTYPE_FAMILY_SLIP
== IFNET_FAMILY_SLIP
);
1543 _CASSERT(IFRTYPE_FAMILY_TUN
== IFNET_FAMILY_TUN
);
1544 _CASSERT(IFRTYPE_FAMILY_VLAN
== IFNET_FAMILY_VLAN
);
1545 _CASSERT(IFRTYPE_FAMILY_PPP
== IFNET_FAMILY_PPP
);
1546 _CASSERT(IFRTYPE_FAMILY_PVC
== IFNET_FAMILY_PVC
);
1547 _CASSERT(IFRTYPE_FAMILY_DISC
== IFNET_FAMILY_DISC
);
1548 _CASSERT(IFRTYPE_FAMILY_MDECAP
== IFNET_FAMILY_MDECAP
);
1549 _CASSERT(IFRTYPE_FAMILY_GIF
== IFNET_FAMILY_GIF
);
1550 _CASSERT(IFRTYPE_FAMILY_FAITH
== IFNET_FAMILY_FAITH
);
1551 _CASSERT(IFRTYPE_FAMILY_STF
== IFNET_FAMILY_STF
);
1552 _CASSERT(IFRTYPE_FAMILY_FIREWIRE
== IFNET_FAMILY_FIREWIRE
);
1553 _CASSERT(IFRTYPE_FAMILY_BOND
== IFNET_FAMILY_BOND
);
1554 _CASSERT(IFRTYPE_FAMILY_CELLULAR
== IFNET_FAMILY_CELLULAR
);
1556 _CASSERT(IFRTYPE_SUBFAMILY_ANY
== IFNET_SUBFAMILY_ANY
);
1557 _CASSERT(IFRTYPE_SUBFAMILY_USB
== IFNET_SUBFAMILY_USB
);
1558 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH
== IFNET_SUBFAMILY_BLUETOOTH
);
1559 _CASSERT(IFRTYPE_SUBFAMILY_WIFI
== IFNET_SUBFAMILY_WIFI
);
1560 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT
== IFNET_SUBFAMILY_THUNDERBOLT
);
1561 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED
== IFNET_SUBFAMILY_RESERVED
);
1562 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC
== IFNET_SUBFAMILY_INTCOPROC
);
1564 _CASSERT(DLIL_MODIDLEN
== IFNET_MODIDLEN
);
1565 _CASSERT(DLIL_MODARGLEN
== IFNET_MODARGLEN
);
1567 PE_parse_boot_argn("net_affinity", &net_affinity
,
1568 sizeof (net_affinity
));
1570 PE_parse_boot_argn("net_rxpoll", &net_rxpoll
, sizeof (net_rxpoll
));
1572 PE_parse_boot_argn("net_rtref", &net_rtref
, sizeof (net_rtref
));
1574 PE_parse_boot_argn("ifnet_debug", &ifnet_debug
, sizeof (ifnet_debug
));
1576 dlif_size
= (ifnet_debug
== 0) ? sizeof (struct dlil_ifnet
) :
1577 sizeof (struct dlil_ifnet_dbg
);
1578 /* Enforce 64-bit alignment for dlil_ifnet structure */
1579 dlif_bufsize
= dlif_size
+ sizeof (void *) + sizeof (u_int64_t
);
1580 dlif_bufsize
= P2ROUNDUP(dlif_bufsize
, sizeof (u_int64_t
));
1581 dlif_zone
= zinit(dlif_bufsize
, DLIF_ZONE_MAX
* dlif_bufsize
,
1583 if (dlif_zone
== NULL
) {
1584 panic_plain("%s: failed allocating %s", __func__
,
1588 zone_change(dlif_zone
, Z_EXPAND
, TRUE
);
1589 zone_change(dlif_zone
, Z_CALLERACCT
, FALSE
);
1591 dlif_filt_size
= sizeof (struct ifnet_filter
);
1592 dlif_filt_zone
= zinit(dlif_filt_size
,
1593 DLIF_FILT_ZONE_MAX
* dlif_filt_size
, 0, DLIF_FILT_ZONE_NAME
);
1594 if (dlif_filt_zone
== NULL
) {
1595 panic_plain("%s: failed allocating %s", __func__
,
1596 DLIF_FILT_ZONE_NAME
);
1599 zone_change(dlif_filt_zone
, Z_EXPAND
, TRUE
);
1600 zone_change(dlif_filt_zone
, Z_CALLERACCT
, FALSE
);
1602 dlif_phash_size
= sizeof (struct proto_hash_entry
) * PROTO_HASH_SLOTS
;
1603 dlif_phash_zone
= zinit(dlif_phash_size
,
1604 DLIF_PHASH_ZONE_MAX
* dlif_phash_size
, 0, DLIF_PHASH_ZONE_NAME
);
1605 if (dlif_phash_zone
== NULL
) {
1606 panic_plain("%s: failed allocating %s", __func__
,
1607 DLIF_PHASH_ZONE_NAME
);
1610 zone_change(dlif_phash_zone
, Z_EXPAND
, TRUE
);
1611 zone_change(dlif_phash_zone
, Z_CALLERACCT
, FALSE
);
1613 dlif_proto_size
= sizeof (struct if_proto
);
1614 dlif_proto_zone
= zinit(dlif_proto_size
,
1615 DLIF_PROTO_ZONE_MAX
* dlif_proto_size
, 0, DLIF_PROTO_ZONE_NAME
);
1616 if (dlif_proto_zone
== NULL
) {
1617 panic_plain("%s: failed allocating %s", __func__
,
1618 DLIF_PROTO_ZONE_NAME
);
1621 zone_change(dlif_proto_zone
, Z_EXPAND
, TRUE
);
1622 zone_change(dlif_proto_zone
, Z_CALLERACCT
, FALSE
);
1624 dlif_tcpstat_size
= sizeof (struct tcpstat_local
);
1625 /* Enforce 64-bit alignment for tcpstat_local structure */
1626 dlif_tcpstat_bufsize
=
1627 dlif_tcpstat_size
+ sizeof (void *) + sizeof (u_int64_t
);
1628 dlif_tcpstat_bufsize
=
1629 P2ROUNDUP(dlif_tcpstat_bufsize
, sizeof (u_int64_t
));
1630 dlif_tcpstat_zone
= zinit(dlif_tcpstat_bufsize
,
1631 DLIF_TCPSTAT_ZONE_MAX
* dlif_tcpstat_bufsize
, 0,
1632 DLIF_TCPSTAT_ZONE_NAME
);
1633 if (dlif_tcpstat_zone
== NULL
) {
1634 panic_plain("%s: failed allocating %s", __func__
,
1635 DLIF_TCPSTAT_ZONE_NAME
);
1638 zone_change(dlif_tcpstat_zone
, Z_EXPAND
, TRUE
);
1639 zone_change(dlif_tcpstat_zone
, Z_CALLERACCT
, FALSE
);
1641 dlif_udpstat_size
= sizeof (struct udpstat_local
);
1642 /* Enforce 64-bit alignment for udpstat_local structure */
1643 dlif_udpstat_bufsize
=
1644 dlif_udpstat_size
+ sizeof (void *) + sizeof (u_int64_t
);
1645 dlif_udpstat_bufsize
=
1646 P2ROUNDUP(dlif_udpstat_bufsize
, sizeof (u_int64_t
));
1647 dlif_udpstat_zone
= zinit(dlif_udpstat_bufsize
,
1648 DLIF_TCPSTAT_ZONE_MAX
* dlif_udpstat_bufsize
, 0,
1649 DLIF_UDPSTAT_ZONE_NAME
);
1650 if (dlif_udpstat_zone
== NULL
) {
1651 panic_plain("%s: failed allocating %s", __func__
,
1652 DLIF_UDPSTAT_ZONE_NAME
);
1655 zone_change(dlif_udpstat_zone
, Z_EXPAND
, TRUE
);
1656 zone_change(dlif_udpstat_zone
, Z_CALLERACCT
, FALSE
);
1658 ifnet_llreach_init();
1659 eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt
);
1661 TAILQ_INIT(&dlil_ifnet_head
);
1662 TAILQ_INIT(&ifnet_head
);
1663 TAILQ_INIT(&ifnet_detaching_head
);
1664 TAILQ_INIT(&ifnet_ordered_head
);
1666 /* Setup the lock groups we will use */
1667 dlil_grp_attributes
= lck_grp_attr_alloc_init();
1669 dlil_lock_group
= lck_grp_alloc_init("DLIL internal locks",
1670 dlil_grp_attributes
);
1671 ifnet_lock_group
= lck_grp_alloc_init("ifnet locks",
1672 dlil_grp_attributes
);
1673 ifnet_head_lock_group
= lck_grp_alloc_init("ifnet head lock",
1674 dlil_grp_attributes
);
1675 ifnet_rcv_lock_group
= lck_grp_alloc_init("ifnet rcv locks",
1676 dlil_grp_attributes
);
1677 ifnet_snd_lock_group
= lck_grp_alloc_init("ifnet snd locks",
1678 dlil_grp_attributes
);
1680 /* Setup the lock attributes we will use */
1681 dlil_lck_attributes
= lck_attr_alloc_init();
1683 ifnet_lock_attr
= lck_attr_alloc_init();
1685 lck_rw_init(&ifnet_head_lock
, ifnet_head_lock_group
,
1686 dlil_lck_attributes
);
1687 lck_mtx_init(&dlil_ifnet_lock
, dlil_lock_group
, dlil_lck_attributes
);
1689 /* Setup interface flow control related items */
1690 lck_mtx_init(&ifnet_fc_lock
, dlil_lock_group
, dlil_lck_attributes
);
1692 ifnet_fc_zone_size
= sizeof (struct ifnet_fc_entry
);
1693 ifnet_fc_zone
= zinit(ifnet_fc_zone_size
,
1694 IFNET_FC_ZONE_MAX
* ifnet_fc_zone_size
, 0, IFNET_FC_ZONE_NAME
);
1695 if (ifnet_fc_zone
== NULL
) {
1696 panic_plain("%s: failed allocating %s", __func__
,
1697 IFNET_FC_ZONE_NAME
);
1700 zone_change(ifnet_fc_zone
, Z_EXPAND
, TRUE
);
1701 zone_change(ifnet_fc_zone
, Z_CALLERACCT
, FALSE
);
1703 /* Initialize interface address subsystem */
1707 /* Initialize the packet filter */
1711 /* Initialize queue algorithms */
1714 /* Initialize packet schedulers */
1717 /* Initialize flow advisory subsystem */
1720 /* Initialize the pktap virtual interface */
1723 /* Initialize the service class to dscp map */
1726 /* Initialize the interface port list */
1727 if_ports_used_init();
1729 /* Initialize the interface low power mode event handler */
1730 if_low_power_evhdlr_init();
1732 #if DEBUG || DEVELOPMENT
1733 /* Run self-tests */
1734 dlil_verify_sum16();
1735 #endif /* DEBUG || DEVELOPMENT */
1737 /* Initialize link layer table */
1738 lltable_glbl_init();
1741 * Create and start up the main DLIL input thread and the interface
1742 * detacher threads once everything is initialized.
1744 dlil_create_input_thread(NULL
, dlil_main_input_thread
);
1746 if (kernel_thread_start(ifnet_detacher_thread_func
,
1747 NULL
, &thread
) != KERN_SUCCESS
) {
1748 panic_plain("%s: couldn't create detacher thread", __func__
);
1751 thread_deallocate(thread
);
1756 if_flt_monitor_busy(struct ifnet
*ifp
)
1758 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1761 VERIFY(ifp
->if_flt_busy
!= 0);
1765 if_flt_monitor_unbusy(struct ifnet
*ifp
)
1767 if_flt_monitor_leave(ifp
);
1771 if_flt_monitor_enter(struct ifnet
*ifp
)
1773 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1775 while (ifp
->if_flt_busy
) {
1776 ++ifp
->if_flt_waiters
;
1777 (void) msleep(&ifp
->if_flt_head
, &ifp
->if_flt_lock
,
1778 (PZERO
- 1), "if_flt_monitor", NULL
);
1780 if_flt_monitor_busy(ifp
);
1784 if_flt_monitor_leave(struct ifnet
*ifp
)
1786 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1788 VERIFY(ifp
->if_flt_busy
!= 0);
1791 if (ifp
->if_flt_busy
== 0 && ifp
->if_flt_waiters
> 0) {
1792 ifp
->if_flt_waiters
= 0;
1793 wakeup(&ifp
->if_flt_head
);
1797 __private_extern__
int
1798 dlil_attach_filter(struct ifnet
*ifp
, const struct iff_filter
*if_filter
,
1799 interface_filter_t
*filter_ref
, u_int32_t flags
)
1802 struct ifnet_filter
*filter
= NULL
;
1804 ifnet_head_lock_shared();
1805 /* Check that the interface is in the global list */
1806 if (!ifnet_lookup(ifp
)) {
1811 filter
= zalloc(dlif_filt_zone
);
1812 if (filter
== NULL
) {
1816 bzero(filter
, dlif_filt_size
);
1818 /* refcnt held above during lookup */
1819 filter
->filt_flags
= flags
;
1820 filter
->filt_ifp
= ifp
;
1821 filter
->filt_cookie
= if_filter
->iff_cookie
;
1822 filter
->filt_name
= if_filter
->iff_name
;
1823 filter
->filt_protocol
= if_filter
->iff_protocol
;
1825 * Do not install filter callbacks for internal coproc interface
1827 if (!IFNET_IS_INTCOPROC(ifp
)) {
1828 filter
->filt_input
= if_filter
->iff_input
;
1829 filter
->filt_output
= if_filter
->iff_output
;
1830 filter
->filt_event
= if_filter
->iff_event
;
1831 filter
->filt_ioctl
= if_filter
->iff_ioctl
;
1833 filter
->filt_detached
= if_filter
->iff_detached
;
1835 lck_mtx_lock(&ifp
->if_flt_lock
);
1836 if_flt_monitor_enter(ifp
);
1838 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1839 TAILQ_INSERT_TAIL(&ifp
->if_flt_head
, filter
, filt_next
);
1841 if_flt_monitor_leave(ifp
);
1842 lck_mtx_unlock(&ifp
->if_flt_lock
);
1844 *filter_ref
= filter
;
1847 * Bump filter count and route_generation ID to let TCP
1848 * know it shouldn't do TSO on this connection
1850 if ((filter
->filt_flags
& DLIL_IFF_TSO
) == 0) {
1851 OSAddAtomic(1, &dlil_filter_disable_tso_count
);
1852 routegenid_update();
1854 OSIncrementAtomic64(&net_api_stats
.nas_iflt_attach_count
);
1855 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_iflt_attach_total
);
1856 if ((filter
->filt_flags
& DLIL_IFF_INTERNAL
)) {
1857 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_iflt_attach_os_total
);
1860 printf("%s: %s filter attached\n", if_name(ifp
),
1861 if_filter
->iff_name
);
1865 if (retval
!= 0 && ifp
!= NULL
) {
1866 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1867 if_name(ifp
), if_filter
->iff_name
, retval
);
1869 if (retval
!= 0 && filter
!= NULL
)
1870 zfree(dlif_filt_zone
, filter
);
1876 dlil_detach_filter_internal(interface_filter_t filter
, int detached
)
1880 if (detached
== 0) {
1883 ifnet_head_lock_shared();
1884 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
1885 interface_filter_t entry
= NULL
;
1887 lck_mtx_lock(&ifp
->if_flt_lock
);
1888 TAILQ_FOREACH(entry
, &ifp
->if_flt_head
, filt_next
) {
1889 if (entry
!= filter
|| entry
->filt_skip
)
1892 * We've found a match; since it's possible
1893 * that the thread gets blocked in the monitor,
1894 * we do the lock dance. Interface should
1895 * not be detached since we still have a use
1896 * count held during filter attach.
1898 entry
->filt_skip
= 1; /* skip input/output */
1899 lck_mtx_unlock(&ifp
->if_flt_lock
);
1902 lck_mtx_lock(&ifp
->if_flt_lock
);
1903 if_flt_monitor_enter(ifp
);
1904 LCK_MTX_ASSERT(&ifp
->if_flt_lock
,
1905 LCK_MTX_ASSERT_OWNED
);
1907 /* Remove the filter from the list */
1908 TAILQ_REMOVE(&ifp
->if_flt_head
, filter
,
1911 if_flt_monitor_leave(ifp
);
1912 lck_mtx_unlock(&ifp
->if_flt_lock
);
1914 printf("%s: %s filter detached\n",
1915 if_name(ifp
), filter
->filt_name
);
1919 lck_mtx_unlock(&ifp
->if_flt_lock
);
1923 /* filter parameter is not a valid filter ref */
1929 printf("%s filter detached\n", filter
->filt_name
);
1933 /* Call the detached function if there is one */
1934 if (filter
->filt_detached
)
1935 filter
->filt_detached(filter
->filt_cookie
, filter
->filt_ifp
);
1938 * Decrease filter count and route_generation ID to let TCP
1939 * know it should reevalute doing TSO or not
1941 if ((filter
->filt_flags
& DLIL_IFF_TSO
) == 0) {
1942 OSAddAtomic(-1, &dlil_filter_disable_tso_count
);
1943 routegenid_update();
1946 VERIFY(OSDecrementAtomic64(&net_api_stats
.nas_iflt_attach_count
) > 0);
1948 /* Free the filter */
1949 zfree(dlif_filt_zone
, filter
);
1952 if (retval
!= 0 && filter
!= NULL
) {
1953 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1954 filter
->filt_name
, retval
);
1960 __private_extern__
void
1961 dlil_detach_filter(interface_filter_t filter
)
1965 dlil_detach_filter_internal(filter
, 0);
1969 * Main input thread:
1971 * a) handles all inbound packets for lo0
1972 * b) handles all inbound packets for interfaces with no dedicated
1973 * input thread (e.g. anything but Ethernet/PDP or those that support
1974 * opportunistic polling.)
1975 * c) protocol registrations
1976 * d) packet injections
1978 __attribute__((noreturn
))
1980 dlil_main_input_thread_func(void *v
, wait_result_t w
)
1983 struct dlil_main_threading_info
*inpm
= v
;
1984 struct dlil_threading_info
*inp
= v
;
1986 VERIFY(inp
== dlil_main_input_thread
);
1987 VERIFY(inp
->ifp
== NULL
);
1988 VERIFY(inp
->mode
== IFNET_MODEL_INPUT_POLL_OFF
);
1991 struct mbuf
*m
= NULL
, *m_loop
= NULL
;
1992 u_int32_t m_cnt
, m_cnt_loop
;
1993 boolean_t proto_req
;
1995 lck_mtx_lock_spin(&inp
->input_lck
);
1997 /* Wait until there is work to be done */
1998 while (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
1999 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
2000 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
2001 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
2004 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
2005 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
2007 /* Main input thread cannot be terminated */
2008 VERIFY(!(inp
->input_waiting
& DLIL_INPUT_TERMINATE
));
2010 proto_req
= (inp
->input_waiting
&
2011 (DLIL_PROTO_WAITING
| DLIL_PROTO_REGISTER
));
2013 /* Packets for non-dedicated interfaces other than lo0 */
2014 m_cnt
= qlen(&inp
->rcvq_pkts
);
2015 m
= _getq_all(&inp
->rcvq_pkts
, NULL
, NULL
, NULL
);
2017 /* Packets exclusive to lo0 */
2018 m_cnt_loop
= qlen(&inpm
->lo_rcvq_pkts
);
2019 m_loop
= _getq_all(&inpm
->lo_rcvq_pkts
, NULL
, NULL
, NULL
);
2023 lck_mtx_unlock(&inp
->input_lck
);
2026 * NOTE warning %%% attention !!!!
2027 * We should think about putting some thread starvation
2028 * safeguards if we deal with long chains of packets.
2031 dlil_input_packet_list_extended(lo_ifp
, m_loop
,
2032 m_cnt_loop
, inp
->mode
);
2035 dlil_input_packet_list_extended(NULL
, m
,
2043 VERIFY(0); /* we should never get here */
2047 * Input thread for interfaces with legacy input model.
2050 dlil_input_thread_func(void *v
, wait_result_t w
)
2053 char thread_name
[MAXTHREADNAMESIZE
];
2054 struct dlil_threading_info
*inp
= v
;
2055 struct ifnet
*ifp
= inp
->ifp
;
2057 /* Construct the name for this thread, and then apply it. */
2058 bzero(thread_name
, sizeof(thread_name
));
2059 snprintf(thread_name
, sizeof(thread_name
), "dlil_input_%s", ifp
->if_xname
);
2060 thread_set_thread_name(inp
->input_thr
, thread_name
);
2062 VERIFY(inp
!= dlil_main_input_thread
);
2063 VERIFY(ifp
!= NULL
);
2064 VERIFY(!(ifp
->if_eflags
& IFEF_RXPOLL
) || !net_rxpoll
);
2065 VERIFY(inp
->mode
== IFNET_MODEL_INPUT_POLL_OFF
);
2068 struct mbuf
*m
= NULL
;
2071 lck_mtx_lock_spin(&inp
->input_lck
);
2073 /* Wait until there is work to be done */
2074 while (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
2075 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
2076 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
2077 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
2080 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
2081 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
2084 * Protocol registration and injection must always use
2085 * the main input thread; in theory the latter can utilize
2086 * the corresponding input thread where the packet arrived
2087 * on, but that requires our knowing the interface in advance
2088 * (and the benefits might not worth the trouble.)
2090 VERIFY(!(inp
->input_waiting
&
2091 (DLIL_PROTO_WAITING
|DLIL_PROTO_REGISTER
)));
2093 /* Packets for this interface */
2094 m_cnt
= qlen(&inp
->rcvq_pkts
);
2095 m
= _getq_all(&inp
->rcvq_pkts
, NULL
, NULL
, NULL
);
2097 if (inp
->input_waiting
& DLIL_INPUT_TERMINATE
) {
2098 lck_mtx_unlock(&inp
->input_lck
);
2100 /* Free up pending packets */
2104 dlil_terminate_input_thread(inp
);
2111 dlil_input_stats_sync(ifp
, inp
);
2113 lck_mtx_unlock(&inp
->input_lck
);
2116 * NOTE warning %%% attention !!!!
2117 * We should think about putting some thread starvation
2118 * safeguards if we deal with long chains of packets.
2121 dlil_input_packet_list_extended(NULL
, m
,
2126 VERIFY(0); /* we should never get here */
2130 * Input thread for interfaces with opportunistic polling input model.
2133 dlil_rxpoll_input_thread_func(void *v
, wait_result_t w
)
2136 struct dlil_threading_info
*inp
= v
;
2137 struct ifnet
*ifp
= inp
->ifp
;
2140 VERIFY(inp
!= dlil_main_input_thread
);
2141 VERIFY(ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_RXPOLL
));
2144 struct mbuf
*m
= NULL
;
2145 u_int32_t m_cnt
, m_size
, poll_req
= 0;
2147 struct timespec now
, delta
;
2150 lck_mtx_lock_spin(&inp
->input_lck
);
2152 if ((ival
= inp
->rxpoll_ival
) < IF_RXPOLL_INTERVALTIME_MIN
)
2153 ival
= IF_RXPOLL_INTERVALTIME_MIN
;
2155 /* Link parameters changed? */
2156 if (ifp
->if_poll_update
!= 0) {
2157 ifp
->if_poll_update
= 0;
2158 (void) dlil_rxpoll_set_params(ifp
, NULL
, TRUE
);
2161 /* Current operating mode */
2164 /* Wait until there is work to be done */
2165 while (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
2166 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
2167 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
2168 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
2171 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
2172 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
2175 * Protocol registration and injection must always use
2176 * the main input thread; in theory the latter can utilize
2177 * the corresponding input thread where the packet arrived
2178 * on, but that requires our knowing the interface in advance
2179 * (and the benefits might not worth the trouble.)
2181 VERIFY(!(inp
->input_waiting
&
2182 (DLIL_PROTO_WAITING
|DLIL_PROTO_REGISTER
)));
2184 if (inp
->input_waiting
& DLIL_INPUT_TERMINATE
) {
2185 /* Free up pending packets */
2186 lck_mtx_convert_spin(&inp
->input_lck
);
2187 _flushq(&inp
->rcvq_pkts
);
2188 if (inp
->input_mit_tcall
!= NULL
) {
2189 if (thread_call_isactive(inp
->input_mit_tcall
))
2190 thread_call_cancel(inp
->input_mit_tcall
);
2192 lck_mtx_unlock(&inp
->input_lck
);
2194 dlil_terminate_input_thread(inp
);
2199 /* Total count of all packets */
2200 m_cnt
= qlen(&inp
->rcvq_pkts
);
2202 /* Total bytes of all packets */
2203 m_size
= qsize(&inp
->rcvq_pkts
);
2205 /* Packets for this interface */
2206 m
= _getq_all(&inp
->rcvq_pkts
, NULL
, NULL
, NULL
);
2207 VERIFY(m
!= NULL
|| m_cnt
== 0);
2210 if (!net_timerisset(&inp
->sample_lasttime
))
2211 *(&inp
->sample_lasttime
) = *(&now
);
2213 net_timersub(&now
, &inp
->sample_lasttime
, &delta
);
2214 if (if_rxpoll
&& net_timerisset(&inp
->sample_holdtime
)) {
2215 u_int32_t ptot
, btot
;
2217 /* Accumulate statistics for current sampling */
2218 PKTCNTR_ADD(&inp
->sstats
, m_cnt
, m_size
);
2220 if (net_timercmp(&delta
, &inp
->sample_holdtime
, <))
2223 *(&inp
->sample_lasttime
) = *(&now
);
2225 /* Calculate min/max of inbound bytes */
2226 btot
= (u_int32_t
)inp
->sstats
.bytes
;
2227 if (inp
->rxpoll_bmin
== 0 || inp
->rxpoll_bmin
> btot
)
2228 inp
->rxpoll_bmin
= btot
;
2229 if (btot
> inp
->rxpoll_bmax
)
2230 inp
->rxpoll_bmax
= btot
;
2232 /* Calculate EWMA of inbound bytes */
2233 DLIL_EWMA(inp
->rxpoll_bavg
, btot
, if_rxpoll_decay
);
2235 /* Calculate min/max of inbound packets */
2236 ptot
= (u_int32_t
)inp
->sstats
.packets
;
2237 if (inp
->rxpoll_pmin
== 0 || inp
->rxpoll_pmin
> ptot
)
2238 inp
->rxpoll_pmin
= ptot
;
2239 if (ptot
> inp
->rxpoll_pmax
)
2240 inp
->rxpoll_pmax
= ptot
;
2242 /* Calculate EWMA of inbound packets */
2243 DLIL_EWMA(inp
->rxpoll_pavg
, ptot
, if_rxpoll_decay
);
2245 /* Reset sampling statistics */
2246 PKTCNTR_CLEAR(&inp
->sstats
);
2248 /* Calculate EWMA of wakeup requests */
2249 DLIL_EWMA(inp
->rxpoll_wavg
, inp
->wtot
, if_rxpoll_decay
);
2253 if (!net_timerisset(&inp
->dbg_lasttime
))
2254 *(&inp
->dbg_lasttime
) = *(&now
);
2255 net_timersub(&now
, &inp
->dbg_lasttime
, &delta
);
2256 if (net_timercmp(&delta
, &dlil_dbgrate
, >=)) {
2257 *(&inp
->dbg_lasttime
) = *(&now
);
2258 printf("%s: [%s] pkts avg %d max %d "
2259 "limits [%d/%d], wreq avg %d "
2260 "limits [%d/%d], bytes avg %d "
2261 "limits [%d/%d]\n", if_name(ifp
),
2263 IFNET_MODEL_INPUT_POLL_ON
) ?
2264 "ON" : "OFF", inp
->rxpoll_pavg
,
2273 inp
->rxpoll_bhiwat
);
2277 /* Perform mode transition, if necessary */
2278 if (!net_timerisset(&inp
->mode_lasttime
))
2279 *(&inp
->mode_lasttime
) = *(&now
);
2281 net_timersub(&now
, &inp
->mode_lasttime
, &delta
);
2282 if (net_timercmp(&delta
, &inp
->mode_holdtime
, <))
2285 if (inp
->rxpoll_pavg
<= inp
->rxpoll_plowat
&&
2286 inp
->rxpoll_bavg
<= inp
->rxpoll_blowat
&&
2287 inp
->mode
!= IFNET_MODEL_INPUT_POLL_OFF
) {
2288 mode
= IFNET_MODEL_INPUT_POLL_OFF
;
2289 } else if (inp
->rxpoll_pavg
>= inp
->rxpoll_phiwat
&&
2290 (inp
->rxpoll_bavg
>= inp
->rxpoll_bhiwat
||
2291 inp
->rxpoll_wavg
>= inp
->rxpoll_whiwat
) &&
2292 inp
->mode
!= IFNET_MODEL_INPUT_POLL_ON
) {
2293 mode
= IFNET_MODEL_INPUT_POLL_ON
;
2296 if (mode
!= inp
->mode
) {
2298 *(&inp
->mode_lasttime
) = *(&now
);
2303 dlil_input_stats_sync(ifp
, inp
);
2305 lck_mtx_unlock(&inp
->input_lck
);
2308 * If there's a mode change and interface is still attached,
2309 * perform a downcall to the driver for the new mode. Also
2310 * hold an IO refcnt on the interface to prevent it from
2311 * being detached (will be release below.)
2313 if (poll_req
!= 0 && ifnet_is_attached(ifp
, 1)) {
2314 struct ifnet_model_params p
= { mode
, { 0 } };
2318 printf("%s: polling is now %s, "
2319 "pkts avg %d max %d limits [%d/%d], "
2320 "wreq avg %d limits [%d/%d], "
2321 "bytes avg %d limits [%d/%d]\n",
2323 (mode
== IFNET_MODEL_INPUT_POLL_ON
) ?
2324 "ON" : "OFF", inp
->rxpoll_pavg
,
2325 inp
->rxpoll_pmax
, inp
->rxpoll_plowat
,
2326 inp
->rxpoll_phiwat
, inp
->rxpoll_wavg
,
2327 inp
->rxpoll_wlowat
, inp
->rxpoll_whiwat
,
2328 inp
->rxpoll_bavg
, inp
->rxpoll_blowat
,
2329 inp
->rxpoll_bhiwat
);
2332 if ((err
= ((*ifp
->if_input_ctl
)(ifp
,
2333 IFNET_CTL_SET_INPUT_MODEL
, sizeof (p
), &p
))) != 0) {
2334 printf("%s: error setting polling mode "
2335 "to %s (%d)\n", if_name(ifp
),
2336 (mode
== IFNET_MODEL_INPUT_POLL_ON
) ?
2341 case IFNET_MODEL_INPUT_POLL_OFF
:
2342 ifnet_set_poll_cycle(ifp
, NULL
);
2343 inp
->rxpoll_offreq
++;
2345 inp
->rxpoll_offerr
++;
2348 case IFNET_MODEL_INPUT_POLL_ON
:
2349 net_nsectimer(&ival
, &ts
);
2350 ifnet_set_poll_cycle(ifp
, &ts
);
2352 inp
->rxpoll_onreq
++;
2354 inp
->rxpoll_onerr
++;
2362 /* Release the IO refcnt */
2363 ifnet_decr_iorefcnt(ifp
);
2367 * NOTE warning %%% attention !!!!
2368 * We should think about putting some thread starvation
2369 * safeguards if we deal with long chains of packets.
2372 dlil_input_packet_list_extended(NULL
, m
, m_cnt
, mode
);
2376 VERIFY(0); /* we should never get here */
2380 * Must be called on an attached ifnet (caller is expected to check.)
2381 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2384 dlil_rxpoll_set_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
,
2387 struct dlil_threading_info
*inp
;
2388 u_int64_t sample_holdtime
, inbw
;
2390 VERIFY(ifp
!= NULL
);
2391 if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || (inp
= ifp
->if_inp
) == NULL
)
2395 if ((p
->packets_lowat
== 0 && p
->packets_hiwat
!= 0) ||
2396 (p
->packets_lowat
!= 0 && p
->packets_hiwat
== 0))
2398 if (p
->packets_lowat
!= 0 && /* hiwat must be non-zero */
2399 p
->packets_lowat
>= p
->packets_hiwat
)
2401 if ((p
->bytes_lowat
== 0 && p
->bytes_hiwat
!= 0) ||
2402 (p
->bytes_lowat
!= 0 && p
->bytes_hiwat
== 0))
2404 if (p
->bytes_lowat
!= 0 && /* hiwat must be non-zero */
2405 p
->bytes_lowat
>= p
->bytes_hiwat
)
2407 if (p
->interval_time
!= 0 &&
2408 p
->interval_time
< IF_RXPOLL_INTERVALTIME_MIN
)
2409 p
->interval_time
= IF_RXPOLL_INTERVALTIME_MIN
;
2413 lck_mtx_lock(&inp
->input_lck
);
2415 LCK_MTX_ASSERT(&inp
->input_lck
, LCK_MTX_ASSERT_OWNED
);
2418 * Normally, we'd reset the parameters to the auto-tuned values
2419 * if the the input thread detects a change in link rate. If the
2420 * driver provides its own parameters right after a link rate
2421 * changes, but before the input thread gets to run, we want to
2422 * make sure to keep the driver's values. Clearing if_poll_update
2423 * will achieve that.
2425 if (p
!= NULL
&& !locked
&& ifp
->if_poll_update
!= 0)
2426 ifp
->if_poll_update
= 0;
2428 if ((inbw
= ifnet_input_linkrate(ifp
)) == 0 && p
== NULL
) {
2429 sample_holdtime
= 0; /* polling is disabled */
2430 inp
->rxpoll_wlowat
= inp
->rxpoll_plowat
=
2431 inp
->rxpoll_blowat
= 0;
2432 inp
->rxpoll_whiwat
= inp
->rxpoll_phiwat
=
2433 inp
->rxpoll_bhiwat
= (u_int32_t
)-1;
2434 inp
->rxpoll_plim
= 0;
2435 inp
->rxpoll_ival
= IF_RXPOLL_INTERVALTIME_MIN
;
2437 u_int32_t plowat
, phiwat
, blowat
, bhiwat
, plim
;
2441 for (n
= 0, i
= 0; rxpoll_tbl
[i
].speed
!= 0; i
++) {
2442 if (inbw
< rxpoll_tbl
[i
].speed
)
2446 /* auto-tune if caller didn't specify a value */
2447 plowat
= ((p
== NULL
|| p
->packets_lowat
== 0) ?
2448 rxpoll_tbl
[n
].plowat
: p
->packets_lowat
);
2449 phiwat
= ((p
== NULL
|| p
->packets_hiwat
== 0) ?
2450 rxpoll_tbl
[n
].phiwat
: p
->packets_hiwat
);
2451 blowat
= ((p
== NULL
|| p
->bytes_lowat
== 0) ?
2452 rxpoll_tbl
[n
].blowat
: p
->bytes_lowat
);
2453 bhiwat
= ((p
== NULL
|| p
->bytes_hiwat
== 0) ?
2454 rxpoll_tbl
[n
].bhiwat
: p
->bytes_hiwat
);
2455 plim
= ((p
== NULL
|| p
->packets_limit
== 0) ?
2456 if_rxpoll_max
: p
->packets_limit
);
2457 ival
= ((p
== NULL
|| p
->interval_time
== 0) ?
2458 if_rxpoll_interval_time
: p
->interval_time
);
2460 VERIFY(plowat
!= 0 && phiwat
!= 0);
2461 VERIFY(blowat
!= 0 && bhiwat
!= 0);
2462 VERIFY(ival
>= IF_RXPOLL_INTERVALTIME_MIN
);
2464 sample_holdtime
= if_rxpoll_sample_holdtime
;
2465 inp
->rxpoll_wlowat
= if_rxpoll_wlowat
;
2466 inp
->rxpoll_whiwat
= if_rxpoll_whiwat
;
2467 inp
->rxpoll_plowat
= plowat
;
2468 inp
->rxpoll_phiwat
= phiwat
;
2469 inp
->rxpoll_blowat
= blowat
;
2470 inp
->rxpoll_bhiwat
= bhiwat
;
2471 inp
->rxpoll_plim
= plim
;
2472 inp
->rxpoll_ival
= ival
;
2475 net_nsectimer(&if_rxpoll_mode_holdtime
, &inp
->mode_holdtime
);
2476 net_nsectimer(&sample_holdtime
, &inp
->sample_holdtime
);
2479 printf("%s: speed %llu bps, sample per %llu nsec, "
2480 "poll interval %llu nsec, pkts per poll %u, "
2481 "pkt limits [%u/%u], wreq limits [%u/%u], "
2482 "bytes limits [%u/%u]\n", if_name(ifp
),
2483 inbw
, sample_holdtime
, inp
->rxpoll_ival
, inp
->rxpoll_plim
,
2484 inp
->rxpoll_plowat
, inp
->rxpoll_phiwat
, inp
->rxpoll_wlowat
,
2485 inp
->rxpoll_whiwat
, inp
->rxpoll_blowat
, inp
->rxpoll_bhiwat
);
2489 lck_mtx_unlock(&inp
->input_lck
);
2495 * Must be called on an attached ifnet (caller is expected to check.)
2498 dlil_rxpoll_get_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
)
2500 struct dlil_threading_info
*inp
;
2502 VERIFY(ifp
!= NULL
&& p
!= NULL
);
2503 if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || (inp
= ifp
->if_inp
) == NULL
)
2506 bzero(p
, sizeof (*p
));
2508 lck_mtx_lock(&inp
->input_lck
);
2509 p
->packets_limit
= inp
->rxpoll_plim
;
2510 p
->packets_lowat
= inp
->rxpoll_plowat
;
2511 p
->packets_hiwat
= inp
->rxpoll_phiwat
;
2512 p
->bytes_lowat
= inp
->rxpoll_blowat
;
2513 p
->bytes_hiwat
= inp
->rxpoll_bhiwat
;
2514 p
->interval_time
= inp
->rxpoll_ival
;
2515 lck_mtx_unlock(&inp
->input_lck
);
2521 ifnet_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
2522 const struct ifnet_stat_increment_param
*s
)
2524 return (ifnet_input_common(ifp
, m_head
, NULL
, s
, FALSE
, FALSE
));
2528 ifnet_input_extended(struct ifnet
*ifp
, struct mbuf
*m_head
,
2529 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
)
2531 return (ifnet_input_common(ifp
, m_head
, m_tail
, s
, TRUE
, FALSE
));
2535 ifnet_input_common(struct ifnet
*ifp
, struct mbuf
*m_head
, struct mbuf
*m_tail
,
2536 const struct ifnet_stat_increment_param
*s
, boolean_t ext
, boolean_t poll
)
2538 dlil_input_func input_func
;
2539 struct ifnet_stat_increment_param _s
;
2540 u_int32_t m_cnt
= 0, m_size
= 0;
2544 if ((m_head
== NULL
&& !poll
) || (s
== NULL
&& ext
)) {
2546 mbuf_freem_list(m_head
);
2550 VERIFY(m_head
!= NULL
|| (s
== NULL
&& m_tail
== NULL
&& !ext
&& poll
));
2551 VERIFY(m_tail
== NULL
|| ext
);
2552 VERIFY(s
!= NULL
|| !ext
);
2555 * Drop the packet(s) if the parameters are invalid, or if the
2556 * interface is no longer attached; else hold an IO refcnt to
2557 * prevent it from being detached (will be released below.)
2559 if (ifp
== NULL
|| (ifp
!= lo_ifp
&& !ifnet_is_attached(ifp
, 1))) {
2561 mbuf_freem_list(m_head
);
2565 input_func
= ifp
->if_input_dlil
;
2566 VERIFY(input_func
!= NULL
);
2568 if (m_tail
== NULL
) {
2570 while (m_head
!= NULL
) {
2571 #if IFNET_INPUT_SANITY_CHK
2572 if (dlil_input_sanity_check
!= 0)
2573 DLIL_INPUT_CHECK(last
, ifp
);
2574 #endif /* IFNET_INPUT_SANITY_CHK */
2576 m_size
+= m_length(last
);
2577 if (mbuf_nextpkt(last
) == NULL
)
2579 last
= mbuf_nextpkt(last
);
2583 #if IFNET_INPUT_SANITY_CHK
2584 if (dlil_input_sanity_check
!= 0) {
2587 DLIL_INPUT_CHECK(last
, ifp
);
2589 m_size
+= m_length(last
);
2590 if (mbuf_nextpkt(last
) == NULL
)
2592 last
= mbuf_nextpkt(last
);
2595 m_cnt
= s
->packets_in
;
2596 m_size
= s
->bytes_in
;
2600 m_cnt
= s
->packets_in
;
2601 m_size
= s
->bytes_in
;
2603 #endif /* IFNET_INPUT_SANITY_CHK */
2606 if (last
!= m_tail
) {
2607 panic_plain("%s: invalid input packet chain for %s, "
2608 "tail mbuf %p instead of %p\n", __func__
, if_name(ifp
),
2613 * Assert packet count only for the extended variant, for backwards
2614 * compatibility, since this came directly from the device driver.
2615 * Relax this assertion for input bytes, as the driver may have
2616 * included the link-layer headers in the computation; hence
2617 * m_size is just an approximation.
2619 if (ext
&& s
->packets_in
!= m_cnt
) {
2620 panic_plain("%s: input packet count mismatch for %s, "
2621 "%d instead of %d\n", __func__
, if_name(ifp
),
2622 s
->packets_in
, m_cnt
);
2626 bzero(&_s
, sizeof (_s
));
2631 _s
.packets_in
= m_cnt
;
2632 _s
.bytes_in
= m_size
;
2634 err
= (*input_func
)(ifp
, m_head
, m_tail
, s
, poll
, current_thread());
2636 if (ifp
!= lo_ifp
) {
2637 /* Release the IO refcnt */
2638 ifnet_decr_iorefcnt(ifp
);
2646 dlil_output_handler(struct ifnet
*ifp
, struct mbuf
*m
)
2648 return (ifp
->if_output(ifp
, m
));
2652 dlil_input_handler(struct ifnet
*ifp
, struct mbuf
*m_head
,
2653 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
2654 boolean_t poll
, struct thread
*tp
)
2656 struct dlil_threading_info
*inp
;
2657 u_int32_t m_cnt
= s
->packets_in
;
2658 u_int32_t m_size
= s
->bytes_in
;
2660 if ((inp
= ifp
->if_inp
) == NULL
)
2661 inp
= dlil_main_input_thread
;
2664 * If there is a matching DLIL input thread associated with an
2665 * affinity set, associate this thread with the same set. We
2666 * will only do this once.
2668 lck_mtx_lock_spin(&inp
->input_lck
);
2669 if (inp
!= dlil_main_input_thread
&& inp
->net_affinity
&& tp
!= NULL
&&
2670 ((!poll
&& inp
->wloop_thr
== THREAD_NULL
) ||
2671 (poll
&& inp
->poll_thr
== THREAD_NULL
))) {
2672 u_int32_t tag
= inp
->tag
;
2675 VERIFY(inp
->poll_thr
== THREAD_NULL
);
2678 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
2679 inp
->wloop_thr
= tp
;
2681 lck_mtx_unlock(&inp
->input_lck
);
2683 /* Associate the current thread with the new affinity tag */
2684 (void) dlil_affinity_set(tp
, tag
);
2687 * Take a reference on the current thread; during detach,
2688 * we will need to refer to it in order to tear down its
2691 thread_reference(tp
);
2692 lck_mtx_lock_spin(&inp
->input_lck
);
2695 VERIFY(m_head
!= NULL
|| (m_tail
== NULL
&& m_cnt
== 0));
2698 * Because of loopbacked multicast we cannot stuff the ifp in
2699 * the rcvif of the packet header: loopback (lo0) packets use a
2700 * dedicated list so that we can later associate them with lo_ifp
2701 * on their way up the stack. Packets for other interfaces without
2702 * dedicated input threads go to the regular list.
2704 if (m_head
!= NULL
) {
2705 if (inp
== dlil_main_input_thread
&& ifp
== lo_ifp
) {
2706 struct dlil_main_threading_info
*inpm
=
2707 (struct dlil_main_threading_info
*)inp
;
2708 _addq_multi(&inpm
->lo_rcvq_pkts
, m_head
, m_tail
,
2711 _addq_multi(&inp
->rcvq_pkts
, m_head
, m_tail
,
2716 #if IFNET_INPUT_SANITY_CHK
2717 if (dlil_input_sanity_check
!= 0) {
2721 for (m0
= m_head
, count
= 0; m0
; m0
= mbuf_nextpkt(m0
))
2724 if (count
!= m_cnt
) {
2725 panic_plain("%s: invalid packet count %d "
2726 "(expected %d)\n", if_name(ifp
),
2731 inp
->input_mbuf_cnt
+= m_cnt
;
2733 #endif /* IFNET_INPUT_SANITY_CHK */
2735 dlil_input_stats_add(s
, inp
, poll
);
2737 * If we're using the main input thread, synchronize the
2738 * stats now since we have the interface context. All
2739 * other cases involving dedicated input threads will
2740 * have their stats synchronized there.
2742 if (inp
== dlil_main_input_thread
)
2743 dlil_input_stats_sync(ifp
, inp
);
2745 if (inp
->input_mit_tcall
&&
2746 qlen(&inp
->rcvq_pkts
) >= dlil_rcv_mit_pkts_min
&&
2747 qlen(&inp
->rcvq_pkts
) < dlil_rcv_mit_pkts_max
&&
2748 (ifp
->if_family
== IFNET_FAMILY_ETHERNET
||
2749 ifp
->if_type
== IFT_CELLULAR
)
2751 if (!thread_call_isactive(inp
->input_mit_tcall
)) {
2753 clock_interval_to_deadline(dlil_rcv_mit_interval
,
2755 (void) thread_call_enter_delayed(
2756 inp
->input_mit_tcall
, deadline
);
2759 inp
->input_waiting
|= DLIL_INPUT_WAITING
;
2760 if (!(inp
->input_waiting
& DLIL_INPUT_RUNNING
)) {
2762 wakeup_one((caddr_t
)&inp
->input_waiting
);
2765 lck_mtx_unlock(&inp
->input_lck
);
2772 ifnet_start_common(struct ifnet
*ifp
, boolean_t resetfc
)
2774 if (!(ifp
->if_eflags
& IFEF_TXSTART
))
2777 * If the starter thread is inactive, signal it to do work,
2778 * unless the interface is being flow controlled from below,
2779 * e.g. a virtual interface being flow controlled by a real
2780 * network interface beneath it, or it's been disabled via
2781 * a call to ifnet_disable_output().
2783 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2785 ifp
->if_start_flags
&= ~IFSF_FLOW_CONTROLLED
;
2786 } else if (ifp
->if_start_flags
& IFSF_FLOW_CONTROLLED
) {
2787 lck_mtx_unlock(&ifp
->if_start_lock
);
2790 ifp
->if_start_req
++;
2791 if (!ifp
->if_start_active
&& ifp
->if_start_thread
!= THREAD_NULL
&&
2792 (resetfc
|| !(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) ||
2793 IFCQ_LEN(&ifp
->if_snd
) >= ifp
->if_start_delay_qlen
||
2794 ifp
->if_start_delayed
== 0)) {
2795 (void) thread_wakeup_thread((caddr_t
)&ifp
->if_start_thread
,
2796 ifp
->if_start_thread
);
2798 lck_mtx_unlock(&ifp
->if_start_lock
);
2802 ifnet_start(struct ifnet
*ifp
)
2804 ifnet_start_common(ifp
, FALSE
);
2808 ifnet_start_thread_fn(void *v
, wait_result_t w
)
2811 struct ifnet
*ifp
= v
;
2812 char ifname
[IFNAMSIZ
+ 1];
2813 char thread_name
[MAXTHREADNAMESIZE
];
2814 struct timespec
*ts
= NULL
;
2815 struct ifclassq
*ifq
= &ifp
->if_snd
;
2816 struct timespec delay_start_ts
;
2818 /* Construct the name for this thread, and then apply it. */
2819 bzero(thread_name
, sizeof(thread_name
));
2820 (void) snprintf(thread_name
, sizeof (thread_name
),
2821 "ifnet_start_%s", ifp
->if_xname
);
2822 thread_set_thread_name(ifp
->if_start_thread
, thread_name
);
2825 * Treat the dedicated starter thread for lo0 as equivalent to
2826 * the driver workloop thread; if net_affinity is enabled for
2827 * the main input thread, associate this starter thread to it
2828 * by binding them with the same affinity tag. This is done
2829 * only once (as we only have one lo_ifp which never goes away.)
2831 if (ifp
== lo_ifp
) {
2832 struct dlil_threading_info
*inp
= dlil_main_input_thread
;
2833 struct thread
*tp
= current_thread();
2835 lck_mtx_lock(&inp
->input_lck
);
2836 if (inp
->net_affinity
) {
2837 u_int32_t tag
= inp
->tag
;
2839 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
2840 VERIFY(inp
->poll_thr
== THREAD_NULL
);
2841 inp
->wloop_thr
= tp
;
2842 lck_mtx_unlock(&inp
->input_lck
);
2844 /* Associate this thread with the affinity tag */
2845 (void) dlil_affinity_set(tp
, tag
);
2847 lck_mtx_unlock(&inp
->input_lck
);
2851 (void) snprintf(ifname
, sizeof (ifname
), "%s_starter", if_name(ifp
));
2853 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2856 if (ifp
->if_start_thread
!= NULL
) {
2857 (void) msleep(&ifp
->if_start_thread
,
2858 &ifp
->if_start_lock
,
2859 (PZERO
- 1) | PSPIN
, ifname
, ts
);
2861 /* interface is detached? */
2862 if (ifp
->if_start_thread
== THREAD_NULL
) {
2863 ifnet_set_start_cycle(ifp
, NULL
);
2864 lck_mtx_unlock(&ifp
->if_start_lock
);
2868 printf("%s: starter thread terminated\n",
2872 /* for the extra refcnt from kernel_thread_start() */
2873 thread_deallocate(current_thread());
2874 /* this is the end */
2875 thread_terminate(current_thread());
2880 ifp
->if_start_active
= 1;
2883 u_int32_t req
= ifp
->if_start_req
;
2884 if (!IFCQ_IS_EMPTY(ifq
) &&
2885 (ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) &&
2886 ifp
->if_start_delayed
== 0 &&
2887 IFCQ_LEN(ifq
) < ifp
->if_start_delay_qlen
&&
2888 (ifp
->if_eflags
& IFEF_DELAY_START
)) {
2889 ifp
->if_start_delayed
= 1;
2890 ifnet_start_delayed
++;
2893 ifp
->if_start_delayed
= 0;
2895 lck_mtx_unlock(&ifp
->if_start_lock
);
2898 * If no longer attached, don't call start because ifp
2899 * is being destroyed; else hold an IO refcnt to
2900 * prevent the interface from being detached (will be
2903 if (!ifnet_is_attached(ifp
, 1)) {
2904 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2908 /* invoke the driver's start routine */
2909 ((*ifp
->if_start
)(ifp
));
2912 * Release the io ref count taken by ifnet_is_attached.
2914 ifnet_decr_iorefcnt(ifp
);
2916 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2919 * If there's no pending request or if the
2920 * interface has been disabled, we're done.
2922 if (req
== ifp
->if_start_req
||
2923 (ifp
->if_start_flags
& IFSF_FLOW_CONTROLLED
)) {
2928 ifp
->if_start_req
= 0;
2929 ifp
->if_start_active
= 0;
2932 * Wakeup N ns from now if rate-controlled by TBR, and if
2933 * there are still packets in the send queue which haven't
2934 * been dequeued so far; else sleep indefinitely (ts = NULL)
2935 * until ifnet_start() is called again.
2937 ts
= ((IFCQ_TBR_IS_ENABLED(ifq
) && !IFCQ_IS_EMPTY(ifq
)) ?
2938 &ifp
->if_start_cycle
: NULL
);
2940 if (ts
== NULL
&& ifp
->if_start_delayed
== 1) {
2941 delay_start_ts
.tv_sec
= 0;
2942 delay_start_ts
.tv_nsec
= ifp
->if_start_delay_timeout
;
2943 ts
= &delay_start_ts
;
2946 if (ts
!= NULL
&& ts
->tv_sec
== 0 && ts
->tv_nsec
== 0)
2954 ifnet_set_start_cycle(struct ifnet
*ifp
, struct timespec
*ts
)
2957 bzero(&ifp
->if_start_cycle
, sizeof (ifp
->if_start_cycle
));
2959 *(&ifp
->if_start_cycle
) = *ts
;
2961 if (ts
!= NULL
&& ts
->tv_nsec
!= 0 && dlil_verbose
)
2962 printf("%s: restart interval set to %lu nsec\n",
2963 if_name(ifp
), ts
->tv_nsec
);
2967 ifnet_poll(struct ifnet
*ifp
)
2970 * If the poller thread is inactive, signal it to do work.
2972 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
2974 if (!ifp
->if_poll_active
&& ifp
->if_poll_thread
!= THREAD_NULL
) {
2975 wakeup_one((caddr_t
)&ifp
->if_poll_thread
);
2977 lck_mtx_unlock(&ifp
->if_poll_lock
);
2981 ifnet_poll_thread_fn(void *v
, wait_result_t w
)
2984 struct dlil_threading_info
*inp
;
2985 struct ifnet
*ifp
= v
;
2986 char ifname
[IFNAMSIZ
+ 1];
2987 struct timespec
*ts
= NULL
;
2988 struct ifnet_stat_increment_param s
;
2990 snprintf(ifname
, sizeof (ifname
), "%s_poller",
2992 bzero(&s
, sizeof (s
));
2994 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
2997 VERIFY(inp
!= NULL
);
3000 if (ifp
->if_poll_thread
!= THREAD_NULL
) {
3001 (void) msleep(&ifp
->if_poll_thread
, &ifp
->if_poll_lock
,
3002 (PZERO
- 1) | PSPIN
, ifname
, ts
);
3005 /* interface is detached (maybe while asleep)? */
3006 if (ifp
->if_poll_thread
== THREAD_NULL
) {
3007 ifnet_set_poll_cycle(ifp
, NULL
);
3008 lck_mtx_unlock(&ifp
->if_poll_lock
);
3011 printf("%s: poller thread terminated\n",
3015 /* for the extra refcnt from kernel_thread_start() */
3016 thread_deallocate(current_thread());
3017 /* this is the end */
3018 thread_terminate(current_thread());
3023 ifp
->if_poll_active
= 1;
3025 struct mbuf
*m_head
, *m_tail
;
3026 u_int32_t m_lim
, m_cnt
, m_totlen
;
3027 u_int16_t req
= ifp
->if_poll_req
;
3029 lck_mtx_unlock(&ifp
->if_poll_lock
);
3032 * If no longer attached, there's nothing to do;
3033 * else hold an IO refcnt to prevent the interface
3034 * from being detached (will be released below.)
3036 if (!ifnet_is_attached(ifp
, 1)) {
3037 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
3041 m_lim
= (inp
->rxpoll_plim
!= 0) ? inp
->rxpoll_plim
:
3042 MAX((qlimit(&inp
->rcvq_pkts
)),
3043 (inp
->rxpoll_phiwat
<< 2));
3045 if (dlil_verbose
> 1) {
3046 printf("%s: polling up to %d pkts, "
3047 "pkts avg %d max %d, wreq avg %d, "
3049 if_name(ifp
), m_lim
,
3050 inp
->rxpoll_pavg
, inp
->rxpoll_pmax
,
3051 inp
->rxpoll_wavg
, inp
->rxpoll_bavg
);
3054 /* invoke the driver's input poll routine */
3055 ((*ifp
->if_input_poll
)(ifp
, 0, m_lim
, &m_head
, &m_tail
,
3056 &m_cnt
, &m_totlen
));
3058 if (m_head
!= NULL
) {
3059 VERIFY(m_tail
!= NULL
&& m_cnt
> 0);
3061 if (dlil_verbose
> 1) {
3062 printf("%s: polled %d pkts, "
3063 "pkts avg %d max %d, wreq avg %d, "
3065 if_name(ifp
), m_cnt
,
3066 inp
->rxpoll_pavg
, inp
->rxpoll_pmax
,
3067 inp
->rxpoll_wavg
, inp
->rxpoll_bavg
);
3070 /* stats are required for extended variant */
3071 s
.packets_in
= m_cnt
;
3072 s
.bytes_in
= m_totlen
;
3074 (void) ifnet_input_common(ifp
, m_head
, m_tail
,
3077 if (dlil_verbose
> 1) {
3078 printf("%s: no packets, "
3079 "pkts avg %d max %d, wreq avg %d, "
3081 if_name(ifp
), inp
->rxpoll_pavg
,
3082 inp
->rxpoll_pmax
, inp
->rxpoll_wavg
,
3086 (void) ifnet_input_common(ifp
, NULL
, NULL
,
3090 /* Release the io ref count */
3091 ifnet_decr_iorefcnt(ifp
);
3093 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
3095 /* if there's no pending request, we're done */
3096 if (req
== ifp
->if_poll_req
) {
3100 ifp
->if_poll_req
= 0;
3101 ifp
->if_poll_active
= 0;
3104 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
3105 * until ifnet_poll() is called again.
3107 ts
= &ifp
->if_poll_cycle
;
3108 if (ts
->tv_sec
== 0 && ts
->tv_nsec
== 0)
3116 ifnet_set_poll_cycle(struct ifnet
*ifp
, struct timespec
*ts
)
3119 bzero(&ifp
->if_poll_cycle
, sizeof (ifp
->if_poll_cycle
));
3121 *(&ifp
->if_poll_cycle
) = *ts
;
3123 if (ts
!= NULL
&& ts
->tv_nsec
!= 0 && dlil_verbose
)
3124 printf("%s: poll interval set to %lu nsec\n",
3125 if_name(ifp
), ts
->tv_nsec
);
3129 ifnet_purge(struct ifnet
*ifp
)
3131 if (ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_TXSTART
))
3136 ifnet_update_sndq(struct ifclassq
*ifq
, cqev_t ev
)
3138 IFCQ_LOCK_ASSERT_HELD(ifq
);
3140 if (!(IFCQ_IS_READY(ifq
)))
3143 if (IFCQ_TBR_IS_ENABLED(ifq
)) {
3144 struct tb_profile tb
= { ifq
->ifcq_tbr
.tbr_rate_raw
,
3145 ifq
->ifcq_tbr
.tbr_percent
, 0 };
3146 (void) ifclassq_tbr_set(ifq
, &tb
, FALSE
);
3149 ifclassq_update(ifq
, ev
);
3153 ifnet_update_rcv(struct ifnet
*ifp
, cqev_t ev
)
3156 case CLASSQ_EV_LINK_BANDWIDTH
:
3157 if (net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
))
3158 ifp
->if_poll_update
++;
3167 ifnet_set_output_sched_model(struct ifnet
*ifp
, u_int32_t model
)
3169 struct ifclassq
*ifq
;
3173 if (ifp
== NULL
|| model
>= IFNET_SCHED_MODEL_MAX
)
3175 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3180 omodel
= ifp
->if_output_sched_model
;
3181 ifp
->if_output_sched_model
= model
;
3182 if ((err
= ifclassq_pktsched_setup(ifq
)) != 0)
3183 ifp
->if_output_sched_model
= omodel
;
3190 ifnet_set_sndq_maxlen(struct ifnet
*ifp
, u_int32_t maxqlen
)
3194 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3197 ifclassq_set_maxlen(&ifp
->if_snd
, maxqlen
);
3203 ifnet_get_sndq_maxlen(struct ifnet
*ifp
, u_int32_t
*maxqlen
)
3205 if (ifp
== NULL
|| maxqlen
== NULL
)
3207 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3210 *maxqlen
= ifclassq_get_maxlen(&ifp
->if_snd
);
3216 ifnet_get_sndq_len(struct ifnet
*ifp
, u_int32_t
*pkts
)
3220 if (ifp
== NULL
|| pkts
== NULL
)
3222 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3225 err
= ifclassq_get_len(&ifp
->if_snd
, MBUF_SC_UNSPEC
,
3232 ifnet_get_service_class_sndq_len(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3233 u_int32_t
*pkts
, u_int32_t
*bytes
)
3237 if (ifp
== NULL
|| !MBUF_VALID_SC(sc
) ||
3238 (pkts
== NULL
&& bytes
== NULL
))
3240 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3243 err
= ifclassq_get_len(&ifp
->if_snd
, sc
, pkts
, bytes
);
3249 ifnet_set_rcvq_maxlen(struct ifnet
*ifp
, u_int32_t maxqlen
)
3251 struct dlil_threading_info
*inp
;
3255 else if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || ifp
->if_inp
== NULL
)
3259 maxqlen
= if_rcvq_maxlen
;
3260 else if (maxqlen
< IF_RCVQ_MINLEN
)
3261 maxqlen
= IF_RCVQ_MINLEN
;
3264 lck_mtx_lock(&inp
->input_lck
);
3265 qlimit(&inp
->rcvq_pkts
) = maxqlen
;
3266 lck_mtx_unlock(&inp
->input_lck
);
3272 ifnet_get_rcvq_maxlen(struct ifnet
*ifp
, u_int32_t
*maxqlen
)
3274 struct dlil_threading_info
*inp
;
3276 if (ifp
== NULL
|| maxqlen
== NULL
)
3278 else if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || ifp
->if_inp
== NULL
)
3282 lck_mtx_lock(&inp
->input_lck
);
3283 *maxqlen
= qlimit(&inp
->rcvq_pkts
);
3284 lck_mtx_unlock(&inp
->input_lck
);
3289 ifnet_enqueue_multi_setup(struct ifnet
*ifp
, uint16_t delay_qlen
,
3290 uint16_t delay_timeout
)
3292 if (delay_qlen
> 0 && delay_timeout
> 0) {
3293 ifp
->if_eflags
|= IFEF_ENQUEUE_MULTI
;
3294 ifp
->if_start_delay_qlen
= min(100, delay_qlen
);
3295 ifp
->if_start_delay_timeout
= min(20000, delay_timeout
);
3296 /* convert timeout to nanoseconds */
3297 ifp
->if_start_delay_timeout
*= 1000;
3298 kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
3299 ifp
->if_xname
, (uint32_t)delay_qlen
,
3300 (uint32_t)delay_timeout
);
3302 ifp
->if_eflags
&= ~IFEF_ENQUEUE_MULTI
;
3306 static inline errno_t
3307 ifnet_enqueue_common(struct ifnet
*ifp
, void *p
, classq_pkt_type_t ptype
,
3308 boolean_t flush
, boolean_t
*pdrop
)
3310 volatile uint64_t *fg_ts
= NULL
;
3311 volatile uint64_t *rt_ts
= NULL
;
3313 struct timespec now
;
3314 u_int64_t now_nsec
= 0;
3317 ASSERT(ifp
->if_eflags
& IFEF_TXSTART
);
3320 * If packet already carries a timestamp, either from dlil_output()
3321 * or from flowswitch, use it here. Otherwise, record timestamp.
3322 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
3323 * the timestamp value is used internally there.
3327 ASSERT(m
->m_flags
& M_PKTHDR
);
3328 ASSERT(m
->m_nextpkt
== NULL
);
3330 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_TS_VALID
) ||
3331 m
->m_pkthdr
.pkt_timestamp
== 0) {
3333 net_timernsec(&now
, &now_nsec
);
3334 m
->m_pkthdr
.pkt_timestamp
= now_nsec
;
3336 m
->m_pkthdr
.pkt_flags
&= ~PKTF_TS_VALID
;
3338 * If the packet service class is not background,
3339 * update the timestamp to indicate recent activity
3340 * on a foreground socket.
3342 if ((m
->m_pkthdr
.pkt_flags
& PKTF_FLOW_ID
) &&
3343 m
->m_pkthdr
.pkt_flowsrc
== FLOWSRC_INPCB
) {
3344 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_SO_BACKGROUND
)) {
3345 ifp
->if_fg_sendts
= _net_uptime
;
3347 *fg_ts
= _net_uptime
;
3349 if (m
->m_pkthdr
.pkt_flags
& PKTF_SO_REALTIME
) {
3350 ifp
->if_rt_sendts
= _net_uptime
;
3352 *rt_ts
= _net_uptime
;
3363 if (ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) {
3364 if (now_nsec
== 0) {
3366 net_timernsec(&now
, &now_nsec
);
3369 * If the driver chose to delay start callback for
3370 * coalescing multiple packets, Then use the following
3371 * heuristics to make sure that start callback will
3372 * be delayed only when bulk data transfer is detected.
3373 * 1. number of packets enqueued in (delay_win * 2) is
3374 * greater than or equal to the delay qlen.
3375 * 2. If delay_start is enabled it will stay enabled for
3376 * another 10 idle windows. This is to take into account
3377 * variable RTT and burst traffic.
3378 * 3. If the time elapsed since last enqueue is more
3379 * than 200ms we disable delaying start callback. This is
3380 * is to take idle time into account.
3382 u_int64_t dwin
= (ifp
->if_start_delay_timeout
<< 1);
3383 if (ifp
->if_start_delay_swin
> 0) {
3384 if ((ifp
->if_start_delay_swin
+ dwin
) > now_nsec
) {
3385 ifp
->if_start_delay_cnt
++;
3386 } else if ((now_nsec
- ifp
->if_start_delay_swin
)
3387 >= (200 * 1000 * 1000)) {
3388 ifp
->if_start_delay_swin
= now_nsec
;
3389 ifp
->if_start_delay_cnt
= 1;
3390 ifp
->if_start_delay_idle
= 0;
3391 if (ifp
->if_eflags
& IFEF_DELAY_START
) {
3393 ~(IFEF_DELAY_START
);
3394 ifnet_delay_start_disabled
++;
3397 if (ifp
->if_start_delay_cnt
>=
3398 ifp
->if_start_delay_qlen
) {
3399 ifp
->if_eflags
|= IFEF_DELAY_START
;
3400 ifp
->if_start_delay_idle
= 0;
3402 if (ifp
->if_start_delay_idle
>= 10) {
3403 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3404 ifnet_delay_start_disabled
++;
3406 ifp
->if_start_delay_idle
++;
3409 ifp
->if_start_delay_swin
= now_nsec
;
3410 ifp
->if_start_delay_cnt
= 1;
3413 ifp
->if_start_delay_swin
= now_nsec
;
3414 ifp
->if_start_delay_cnt
= 1;
3415 ifp
->if_start_delay_idle
= 0;
3416 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3419 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3424 /* enqueue the packet (caller consumes object) */
3425 error
= ifclassq_enqueue(&ifp
->if_snd
, m
, QP_MBUF
, pdrop
);
3435 * Tell the driver to start dequeueing; do this even when the queue
3436 * for the packet is suspended (EQSUSPENDED), as the driver could still
3437 * be dequeueing from other unsuspended queues.
3439 if (!(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) &&
3440 ((error
== 0 && flush
) || error
== EQFULL
|| error
== EQSUSPENDED
))
3447 ifnet_enqueue(struct ifnet
*ifp
, struct mbuf
*m
)
3450 return (ifnet_enqueue_mbuf(ifp
, m
, TRUE
, &pdrop
));
3454 ifnet_enqueue_mbuf(struct ifnet
*ifp
, struct mbuf
*m
, boolean_t flush
,
3457 if (ifp
== NULL
|| m
== NULL
|| !(m
->m_flags
& M_PKTHDR
) ||
3458 m
->m_nextpkt
!= NULL
) {
3464 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3465 !IF_FULLY_ATTACHED(ifp
)) {
3466 /* flag tested without lock for performance */
3470 } else if (!(ifp
->if_flags
& IFF_UP
)) {
3476 return (ifnet_enqueue_common(ifp
, m
, QP_MBUF
, flush
, pdrop
));
3481 ifnet_dequeue(struct ifnet
*ifp
, struct mbuf
**mp
)
3484 classq_pkt_type_t ptype
;
3485 if (ifp
== NULL
|| mp
== NULL
)
3487 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3488 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3490 if (!ifnet_is_attached(ifp
, 1))
3493 rc
= ifclassq_dequeue(&ifp
->if_snd
, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
,
3494 (void **)mp
, NULL
, NULL
, NULL
, &ptype
);
3495 VERIFY((*mp
== NULL
) || (ptype
== QP_MBUF
));
3496 ifnet_decr_iorefcnt(ifp
);
3502 ifnet_dequeue_service_class(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3506 classq_pkt_type_t ptype
;
3507 if (ifp
== NULL
|| mp
== NULL
|| !MBUF_VALID_SC(sc
))
3509 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3510 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3512 if (!ifnet_is_attached(ifp
, 1))
3515 rc
= ifclassq_dequeue_sc(&ifp
->if_snd
, sc
, 1,
3516 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, (void **)mp
, NULL
, NULL
,
3518 VERIFY((*mp
== NULL
) || (ptype
== QP_MBUF
));
3519 ifnet_decr_iorefcnt(ifp
);
3524 ifnet_dequeue_multi(struct ifnet
*ifp
, u_int32_t pkt_limit
,
3525 struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
, u_int32_t
*len
)
3528 classq_pkt_type_t ptype
;
3529 if (ifp
== NULL
|| head
== NULL
|| pkt_limit
< 1)
3531 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3532 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3534 if (!ifnet_is_attached(ifp
, 1))
3537 rc
= ifclassq_dequeue(&ifp
->if_snd
, pkt_limit
,
3538 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, (void **)head
, (void **)tail
, cnt
,
3540 VERIFY((*head
== NULL
) || (ptype
== QP_MBUF
));
3541 ifnet_decr_iorefcnt(ifp
);
3546 ifnet_dequeue_multi_bytes(struct ifnet
*ifp
, u_int32_t byte_limit
,
3547 struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
, u_int32_t
*len
)
3550 classq_pkt_type_t ptype
;
3551 if (ifp
== NULL
|| head
== NULL
|| byte_limit
< 1)
3553 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3554 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3556 if (!ifnet_is_attached(ifp
, 1))
3559 rc
= ifclassq_dequeue(&ifp
->if_snd
, CLASSQ_DEQUEUE_MAX_PKT_LIMIT
,
3560 byte_limit
, (void **)head
, (void **)tail
, cnt
, len
, &ptype
);
3561 VERIFY((*head
== NULL
) || (ptype
== QP_MBUF
));
3562 ifnet_decr_iorefcnt(ifp
);
3567 ifnet_dequeue_service_class_multi(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3568 u_int32_t pkt_limit
, struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
,
3572 classq_pkt_type_t ptype
;
3573 if (ifp
== NULL
|| head
== NULL
|| pkt_limit
< 1 ||
3576 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3577 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3579 if (!ifnet_is_attached(ifp
, 1))
3582 rc
= ifclassq_dequeue_sc(&ifp
->if_snd
, sc
, pkt_limit
,
3583 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, (void **)head
,
3584 (void **)tail
, cnt
, len
, &ptype
);
3585 VERIFY((*head
== NULL
) || (ptype
== QP_MBUF
));
3586 ifnet_decr_iorefcnt(ifp
);
3590 #if !CONFIG_EMBEDDED
3592 ifnet_framer_stub(struct ifnet
*ifp
, struct mbuf
**m
,
3593 const struct sockaddr
*dest
, const char *dest_linkaddr
,
3594 const char *frame_type
, u_int32_t
*pre
, u_int32_t
*post
)
3601 return (ifp
->if_framer_legacy(ifp
, m
, dest
, dest_linkaddr
, frame_type
));
3603 #endif /* !CONFIG_EMBEDDED */
3606 dlil_interface_filters_input(struct ifnet
*ifp
, struct mbuf
**m_p
,
3607 char **frame_header_p
, protocol_family_t protocol_family
)
3609 struct ifnet_filter
*filter
;
3612 * Pass the inbound packet to the interface filters
3614 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3615 /* prevent filter list from changing in case we drop the lock */
3616 if_flt_monitor_busy(ifp
);
3617 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
3620 if (!filter
->filt_skip
&& filter
->filt_input
!= NULL
&&
3621 (filter
->filt_protocol
== 0 ||
3622 filter
->filt_protocol
== protocol_family
)) {
3623 lck_mtx_unlock(&ifp
->if_flt_lock
);
3625 result
= (*filter
->filt_input
)(filter
->filt_cookie
,
3626 ifp
, protocol_family
, m_p
, frame_header_p
);
3628 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3630 /* we're done with the filter list */
3631 if_flt_monitor_unbusy(ifp
);
3632 lck_mtx_unlock(&ifp
->if_flt_lock
);
3637 /* we're done with the filter list */
3638 if_flt_monitor_unbusy(ifp
);
3639 lck_mtx_unlock(&ifp
->if_flt_lock
);
3642 * Strip away M_PROTO1 bit prior to sending packet up the stack as
3643 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
3646 (*m_p
)->m_flags
&= ~M_PROTO1
;
3652 dlil_interface_filters_output(struct ifnet
*ifp
, struct mbuf
**m_p
,
3653 protocol_family_t protocol_family
)
3655 struct ifnet_filter
*filter
;
3658 * Pass the outbound packet to the interface filters
3660 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3661 /* prevent filter list from changing in case we drop the lock */
3662 if_flt_monitor_busy(ifp
);
3663 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
3666 if (!filter
->filt_skip
&& filter
->filt_output
!= NULL
&&
3667 (filter
->filt_protocol
== 0 ||
3668 filter
->filt_protocol
== protocol_family
)) {
3669 lck_mtx_unlock(&ifp
->if_flt_lock
);
3671 result
= filter
->filt_output(filter
->filt_cookie
, ifp
,
3672 protocol_family
, m_p
);
3674 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3676 /* we're done with the filter list */
3677 if_flt_monitor_unbusy(ifp
);
3678 lck_mtx_unlock(&ifp
->if_flt_lock
);
3683 /* we're done with the filter list */
3684 if_flt_monitor_unbusy(ifp
);
3685 lck_mtx_unlock(&ifp
->if_flt_lock
);
3691 dlil_ifproto_input(struct if_proto
* ifproto
, mbuf_t m
)
3695 if (ifproto
->proto_kpi
== kProtoKPI_v1
) {
3696 /* Version 1 protocols get one packet at a time */
3698 char * frame_header
;
3701 next_packet
= m
->m_nextpkt
;
3702 m
->m_nextpkt
= NULL
;
3703 frame_header
= m
->m_pkthdr
.pkt_hdr
;
3704 m
->m_pkthdr
.pkt_hdr
= NULL
;
3705 error
= (*ifproto
->kpi
.v1
.input
)(ifproto
->ifp
,
3706 ifproto
->protocol_family
, m
, frame_header
);
3707 if (error
!= 0 && error
!= EJUSTRETURN
)
3711 } else if (ifproto
->proto_kpi
== kProtoKPI_v2
) {
3712 /* Version 2 protocols support packet lists */
3713 error
= (*ifproto
->kpi
.v2
.input
)(ifproto
->ifp
,
3714 ifproto
->protocol_family
, m
);
3715 if (error
!= 0 && error
!= EJUSTRETURN
)
3721 dlil_input_stats_add(const struct ifnet_stat_increment_param
*s
,
3722 struct dlil_threading_info
*inp
, boolean_t poll
)
3724 struct ifnet_stat_increment_param
*d
= &inp
->stats
;
3726 if (s
->packets_in
!= 0)
3727 d
->packets_in
+= s
->packets_in
;
3728 if (s
->bytes_in
!= 0)
3729 d
->bytes_in
+= s
->bytes_in
;
3730 if (s
->errors_in
!= 0)
3731 d
->errors_in
+= s
->errors_in
;
3733 if (s
->packets_out
!= 0)
3734 d
->packets_out
+= s
->packets_out
;
3735 if (s
->bytes_out
!= 0)
3736 d
->bytes_out
+= s
->bytes_out
;
3737 if (s
->errors_out
!= 0)
3738 d
->errors_out
+= s
->errors_out
;
3740 if (s
->collisions
!= 0)
3741 d
->collisions
+= s
->collisions
;
3742 if (s
->dropped
!= 0)
3743 d
->dropped
+= s
->dropped
;
3746 PKTCNTR_ADD(&inp
->tstats
, s
->packets_in
, s
->bytes_in
);
3750 dlil_input_stats_sync(struct ifnet
*ifp
, struct dlil_threading_info
*inp
)
3752 struct ifnet_stat_increment_param
*s
= &inp
->stats
;
3755 * Use of atomic operations is unavoidable here because
3756 * these stats may also be incremented elsewhere via KPIs.
3758 if (s
->packets_in
!= 0) {
3759 atomic_add_64(&ifp
->if_data
.ifi_ipackets
, s
->packets_in
);
3762 if (s
->bytes_in
!= 0) {
3763 atomic_add_64(&ifp
->if_data
.ifi_ibytes
, s
->bytes_in
);
3766 if (s
->errors_in
!= 0) {
3767 atomic_add_64(&ifp
->if_data
.ifi_ierrors
, s
->errors_in
);
3771 if (s
->packets_out
!= 0) {
3772 atomic_add_64(&ifp
->if_data
.ifi_opackets
, s
->packets_out
);
3775 if (s
->bytes_out
!= 0) {
3776 atomic_add_64(&ifp
->if_data
.ifi_obytes
, s
->bytes_out
);
3779 if (s
->errors_out
!= 0) {
3780 atomic_add_64(&ifp
->if_data
.ifi_oerrors
, s
->errors_out
);
3784 if (s
->collisions
!= 0) {
3785 atomic_add_64(&ifp
->if_data
.ifi_collisions
, s
->collisions
);
3788 if (s
->dropped
!= 0) {
3789 atomic_add_64(&ifp
->if_data
.ifi_iqdrops
, s
->dropped
);
3793 if (ifp
->if_data_threshold
!= 0) {
3794 lck_mtx_convert_spin(&inp
->input_lck
);
3795 ifnet_notify_data_threshold(ifp
);
3799 * No need for atomic operations as they are modified here
3800 * only from within the DLIL input thread context.
3802 if (inp
->tstats
.packets
!= 0) {
3803 inp
->pstats
.ifi_poll_packets
+= inp
->tstats
.packets
;
3804 inp
->tstats
.packets
= 0;
3806 if (inp
->tstats
.bytes
!= 0) {
3807 inp
->pstats
.ifi_poll_bytes
+= inp
->tstats
.bytes
;
3808 inp
->tstats
.bytes
= 0;
3812 __private_extern__
void
3813 dlil_input_packet_list(struct ifnet
*ifp
, struct mbuf
*m
)
3815 return (dlil_input_packet_list_common(ifp
, m
, 0,
3816 IFNET_MODEL_INPUT_POLL_OFF
, FALSE
));
3819 __private_extern__
void
3820 dlil_input_packet_list_extended(struct ifnet
*ifp
, struct mbuf
*m
,
3821 u_int32_t cnt
, ifnet_model_t mode
)
3823 return (dlil_input_packet_list_common(ifp
, m
, cnt
, mode
, TRUE
));
3827 dlil_input_packet_list_common(struct ifnet
*ifp_param
, struct mbuf
*m
,
3828 u_int32_t cnt
, ifnet_model_t mode
, boolean_t ext
)
3831 protocol_family_t protocol_family
;
3833 ifnet_t ifp
= ifp_param
;
3834 char *frame_header
= NULL
;
3835 struct if_proto
*last_ifproto
= NULL
;
3836 mbuf_t pkt_first
= NULL
;
3837 mbuf_t
*pkt_next
= NULL
;
3838 u_int32_t poll_thresh
= 0, poll_ival
= 0;
3840 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
3842 if (ext
&& mode
== IFNET_MODEL_INPUT_POLL_ON
&& cnt
> 1 &&
3843 (poll_ival
= if_rxpoll_interval_pkts
) > 0)
3847 struct if_proto
*ifproto
= NULL
;
3849 uint32_t pktf_mask
; /* pkt flags to preserve */
3851 if (ifp_param
== NULL
)
3852 ifp
= m
->m_pkthdr
.rcvif
;
3854 if ((ifp
->if_eflags
& IFEF_RXPOLL
) && poll_thresh
!= 0 &&
3855 poll_ival
> 0 && (--poll_thresh
% poll_ival
) == 0)
3858 /* Check if this mbuf looks valid */
3859 MBUF_INPUT_CHECK(m
, ifp
);
3861 next_packet
= m
->m_nextpkt
;
3862 m
->m_nextpkt
= NULL
;
3863 frame_header
= m
->m_pkthdr
.pkt_hdr
;
3864 m
->m_pkthdr
.pkt_hdr
= NULL
;
3867 * Get an IO reference count if the interface is not
3868 * loopback (lo0) and it is attached; lo0 never goes
3869 * away, so optimize for that.
3871 if (ifp
!= lo_ifp
) {
3872 if (!ifnet_is_attached(ifp
, 1)) {
3878 * Preserve the time stamp if it was set.
3880 pktf_mask
= PKTF_TS_VALID
;
3883 * If this arrived on lo0, preserve interface addr
3884 * info to allow for connectivity between loopback
3885 * and local interface addresses.
3887 pktf_mask
= (PKTF_LOOP
|PKTF_IFAINFO
);
3890 /* make sure packet comes in clean */
3891 m_classifier_init(m
, pktf_mask
);
3893 ifp_inc_traffic_class_in(ifp
, m
);
3895 /* find which protocol family this packet is for */
3896 ifnet_lock_shared(ifp
);
3897 error
= (*ifp
->if_demux
)(ifp
, m
, frame_header
,
3899 ifnet_lock_done(ifp
);
3901 if (error
== EJUSTRETURN
)
3903 protocol_family
= 0;
3906 pktap_input(ifp
, protocol_family
, m
, frame_header
);
3908 /* Drop v4 packets received on CLAT46 enabled interface */
3909 if (protocol_family
== PF_INET
&& IS_INTF_CLAT46(ifp
)) {
3911 ip6stat
.ip6s_clat464_in_v4_drop
++;
3915 /* Translate the packet if it is received on CLAT interface */
3916 if (protocol_family
== PF_INET6
&& IS_INTF_CLAT46(ifp
)
3917 && dlil_is_clat_needed(protocol_family
, m
)) {
3919 struct ether_header eh
;
3920 struct ether_header
*ehp
= NULL
;
3922 if (ifp
->if_type
== IFT_ETHER
) {
3923 ehp
= (struct ether_header
*)(void *)frame_header
;
3924 /* Skip RX Ethernet packets if they are not IPV6 */
3925 if (ntohs(ehp
->ether_type
) != ETHERTYPE_IPV6
)
3928 /* Keep a copy of frame_header for Ethernet packets */
3929 bcopy(frame_header
, (caddr_t
)&eh
, ETHER_HDR_LEN
);
3931 error
= dlil_clat64(ifp
, &protocol_family
, &m
);
3932 data
= (char *) mbuf_data(m
);
3935 ip6stat
.ip6s_clat464_in_drop
++;
3938 /* Native v6 should be No-op */
3939 if (protocol_family
!= PF_INET
)
3942 /* Do this only for translated v4 packets. */
3943 switch (ifp
->if_type
) {
3945 frame_header
= data
;
3949 * Drop if the mbuf doesn't have enough
3950 * space for Ethernet header
3952 if (M_LEADINGSPACE(m
) < ETHER_HDR_LEN
) {
3954 ip6stat
.ip6s_clat464_in_drop
++;
3958 * Set the frame_header ETHER_HDR_LEN bytes
3959 * preceeding the data pointer. Change
3960 * the ether_type too.
3962 frame_header
= data
- ETHER_HDR_LEN
;
3963 eh
.ether_type
= htons(ETHERTYPE_IP
);
3964 bcopy((caddr_t
)&eh
, frame_header
, ETHER_HDR_LEN
);
3969 if (hwcksum_dbg
!= 0 && !(ifp
->if_flags
& IFF_LOOPBACK
) &&
3970 !(m
->m_pkthdr
.pkt_flags
& PKTF_LOOP
))
3971 dlil_input_cksum_dbg(ifp
, m
, frame_header
,
3975 * For partial checksum offload, we expect the driver to
3976 * set the start offset indicating the start of the span
3977 * that is covered by the hardware-computed checksum;
3978 * adjust this start offset accordingly because the data
3979 * pointer has been advanced beyond the link-layer header.
3981 * Don't adjust if the interface is a bridge member, as
3982 * the adjustment will occur from the context of the
3983 * bridge interface during input.
3985 if (ifp
->if_bridge
== NULL
&& (m
->m_pkthdr
.csum_flags
&
3986 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
3987 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
3989 if (frame_header
== NULL
||
3990 frame_header
< (char *)mbuf_datastart(m
) ||
3991 frame_header
> (char *)m
->m_data
||
3992 (adj
= (m
->m_data
- frame_header
)) >
3993 m
->m_pkthdr
.csum_rx_start
) {
3994 m
->m_pkthdr
.csum_data
= 0;
3995 m
->m_pkthdr
.csum_flags
&= ~CSUM_DATA_VALID
;
3996 hwcksum_in_invalidated
++;
3998 m
->m_pkthdr
.csum_rx_start
-= adj
;
4003 pktap_input(ifp
, protocol_family
, m
, frame_header
);
4005 if (m
->m_flags
& (M_BCAST
|M_MCAST
))
4006 atomic_add_64(&ifp
->if_imcasts
, 1);
4008 /* run interface filters, exclude VLAN packets PR-3586856 */
4009 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) == 0) {
4010 error
= dlil_interface_filters_input(ifp
, &m
,
4011 &frame_header
, protocol_family
);
4013 if (error
!= EJUSTRETURN
)
4018 if (error
!= 0 || ((m
->m_flags
& M_PROMISC
) != 0)) {
4023 /* Lookup the protocol attachment to this interface */
4024 if (protocol_family
== 0) {
4026 } else if (last_ifproto
!= NULL
&& last_ifproto
->ifp
== ifp
&&
4027 (last_ifproto
->protocol_family
== protocol_family
)) {
4028 VERIFY(ifproto
== NULL
);
4029 ifproto
= last_ifproto
;
4030 if_proto_ref(last_ifproto
);
4032 VERIFY(ifproto
== NULL
);
4033 ifnet_lock_shared(ifp
);
4034 /* callee holds a proto refcnt upon success */
4035 ifproto
= find_attached_proto(ifp
, protocol_family
);
4036 ifnet_lock_done(ifp
);
4038 if (ifproto
== NULL
) {
4039 /* no protocol for this packet, discard */
4043 if (ifproto
!= last_ifproto
) {
4044 if (last_ifproto
!= NULL
) {
4045 /* pass up the list for the previous protocol */
4046 dlil_ifproto_input(last_ifproto
, pkt_first
);
4048 if_proto_free(last_ifproto
);
4050 last_ifproto
= ifproto
;
4051 if_proto_ref(ifproto
);
4053 /* extend the list */
4054 m
->m_pkthdr
.pkt_hdr
= frame_header
;
4055 if (pkt_first
== NULL
) {
4060 pkt_next
= &m
->m_nextpkt
;
4063 if (next_packet
== NULL
&& last_ifproto
!= NULL
) {
4064 /* pass up the last list of packets */
4065 dlil_ifproto_input(last_ifproto
, pkt_first
);
4066 if_proto_free(last_ifproto
);
4067 last_ifproto
= NULL
;
4069 if (ifproto
!= NULL
) {
4070 if_proto_free(ifproto
);
4076 /* update the driver's multicast filter, if needed */
4077 if (ifp
->if_updatemcasts
> 0 && if_mcasts_update(ifp
) == 0)
4078 ifp
->if_updatemcasts
= 0;
4080 ifnet_decr_iorefcnt(ifp
);
4083 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4087 if_mcasts_update(struct ifnet
*ifp
)
4091 err
= ifnet_ioctl(ifp
, 0, SIOCADDMULTI
, NULL
);
4092 if (err
== EAFNOSUPPORT
)
4094 printf("%s: %s %d suspended link-layer multicast membership(s) "
4095 "(err=%d)\n", if_name(ifp
),
4096 (err
== 0 ? "successfully restored" : "failed to restore"),
4097 ifp
->if_updatemcasts
, err
);
4099 /* just return success */
4103 /* If ifp is set, we will increment the generation for the interface */
4105 dlil_post_complete_msg(struct ifnet
*ifp
, struct kev_msg
*event
)
4108 ifnet_increment_generation(ifp
);
4112 necp_update_all_clients();
4115 return (kev_post_msg(event
));
4118 __private_extern__
void
4119 dlil_post_sifflags_msg(struct ifnet
* ifp
)
4121 struct kev_msg ev_msg
;
4122 struct net_event_data ev_data
;
4124 bzero(&ev_data
, sizeof (ev_data
));
4125 bzero(&ev_msg
, sizeof (ev_msg
));
4126 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
4127 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
4128 ev_msg
.kev_subclass
= KEV_DL_SUBCLASS
;
4129 ev_msg
.event_code
= KEV_DL_SIFFLAGS
;
4130 strlcpy(&ev_data
.if_name
[0], ifp
->if_name
, IFNAMSIZ
);
4131 ev_data
.if_family
= ifp
->if_family
;
4132 ev_data
.if_unit
= (u_int32_t
) ifp
->if_unit
;
4133 ev_msg
.dv
[0].data_length
= sizeof(struct net_event_data
);
4134 ev_msg
.dv
[0].data_ptr
= &ev_data
;
4135 ev_msg
.dv
[1].data_length
= 0;
4136 dlil_post_complete_msg(ifp
, &ev_msg
);
4139 #define TMP_IF_PROTO_ARR_SIZE 10
4141 dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*event
, bool update_generation
)
4143 struct ifnet_filter
*filter
= NULL
;
4144 struct if_proto
*proto
= NULL
;
4145 int if_proto_count
= 0;
4146 struct if_proto
**tmp_ifproto_arr
= NULL
;
4147 struct if_proto
*tmp_ifproto_stack_arr
[TMP_IF_PROTO_ARR_SIZE
] = {NULL
};
4148 int tmp_ifproto_arr_idx
= 0;
4149 bool tmp_malloc
= false;
4152 * Pass the event to the interface filters
4154 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4155 /* prevent filter list from changing in case we drop the lock */
4156 if_flt_monitor_busy(ifp
);
4157 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
4158 if (filter
->filt_event
!= NULL
) {
4159 lck_mtx_unlock(&ifp
->if_flt_lock
);
4161 filter
->filt_event(filter
->filt_cookie
, ifp
,
4162 filter
->filt_protocol
, event
);
4164 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4167 /* we're done with the filter list */
4168 if_flt_monitor_unbusy(ifp
);
4169 lck_mtx_unlock(&ifp
->if_flt_lock
);
4171 /* Get an io ref count if the interface is attached */
4172 if (!ifnet_is_attached(ifp
, 1))
4176 * An embedded tmp_list_entry in if_proto may still get
4177 * over-written by another thread after giving up ifnet lock,
4178 * therefore we are avoiding embedded pointers here.
4180 ifnet_lock_shared(ifp
);
4181 if_proto_count
= dlil_ifp_protolist(ifp
, NULL
, 0);
4182 if (if_proto_count
) {
4184 VERIFY(ifp
->if_proto_hash
!= NULL
);
4185 if (if_proto_count
<= TMP_IF_PROTO_ARR_SIZE
) {
4186 tmp_ifproto_arr
= tmp_ifproto_stack_arr
;
4188 MALLOC(tmp_ifproto_arr
, struct if_proto
**,
4189 sizeof (*tmp_ifproto_arr
) * if_proto_count
,
4191 if (tmp_ifproto_arr
== NULL
) {
4192 ifnet_lock_done(ifp
);
4198 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
4199 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
],
4201 if_proto_ref(proto
);
4202 tmp_ifproto_arr
[tmp_ifproto_arr_idx
] = proto
;
4203 tmp_ifproto_arr_idx
++;
4206 VERIFY(if_proto_count
== tmp_ifproto_arr_idx
);
4208 ifnet_lock_done(ifp
);
4210 for (tmp_ifproto_arr_idx
= 0; tmp_ifproto_arr_idx
< if_proto_count
;
4211 tmp_ifproto_arr_idx
++) {
4212 proto
= tmp_ifproto_arr
[tmp_ifproto_arr_idx
];
4213 VERIFY(proto
!= NULL
);
4214 proto_media_event eventp
=
4215 (proto
->proto_kpi
== kProtoKPI_v1
?
4216 proto
->kpi
.v1
.event
:
4217 proto
->kpi
.v2
.event
);
4219 if (eventp
!= NULL
) {
4220 eventp(ifp
, proto
->protocol_family
,
4223 if_proto_free(proto
);
4228 FREE(tmp_ifproto_arr
, M_TEMP
);
4231 /* Pass the event to the interface */
4232 if (ifp
->if_event
!= NULL
)
4233 ifp
->if_event(ifp
, event
);
4235 /* Release the io ref count */
4236 ifnet_decr_iorefcnt(ifp
);
4238 return (dlil_post_complete_msg(update_generation
? ifp
: NULL
, event
));
4242 ifnet_event(ifnet_t ifp
, struct kern_event_msg
*event
)
4244 struct kev_msg kev_msg
;
4247 if (ifp
== NULL
|| event
== NULL
)
4250 bzero(&kev_msg
, sizeof (kev_msg
));
4251 kev_msg
.vendor_code
= event
->vendor_code
;
4252 kev_msg
.kev_class
= event
->kev_class
;
4253 kev_msg
.kev_subclass
= event
->kev_subclass
;
4254 kev_msg
.event_code
= event
->event_code
;
4255 kev_msg
.dv
[0].data_ptr
= &event
->event_data
[0];
4256 kev_msg
.dv
[0].data_length
= event
->total_size
- KEV_MSG_HEADER_SIZE
;
4257 kev_msg
.dv
[1].data_length
= 0;
4259 result
= dlil_event_internal(ifp
, &kev_msg
, TRUE
);
4265 #include <netinet/ip6.h>
4266 #include <netinet/ip.h>
4268 dlil_get_socket_type(struct mbuf
**mp
, int family
, int raw
)
4272 struct ip6_hdr
*ip6
;
4273 int type
= SOCK_RAW
;
4278 m
= m_pullup(*mp
, sizeof(struct ip
));
4282 ip
= mtod(m
, struct ip
*);
4283 if (ip
->ip_p
== IPPROTO_TCP
)
4285 else if (ip
->ip_p
== IPPROTO_UDP
)
4289 m
= m_pullup(*mp
, sizeof(struct ip6_hdr
));
4293 ip6
= mtod(m
, struct ip6_hdr
*);
4294 if (ip6
->ip6_nxt
== IPPROTO_TCP
)
4296 else if (ip6
->ip6_nxt
== IPPROTO_UDP
)
4307 dlil_count_chain_len(mbuf_t m
, struct chain_len_stats
*cls
)
4320 atomic_add_64(&cls
->cls_one
, 1);
4323 atomic_add_64(&cls
->cls_two
, 1);
4326 atomic_add_64(&cls
->cls_three
, 1);
4329 atomic_add_64(&cls
->cls_four
, 1);
4333 atomic_add_64(&cls
->cls_five_or_more
, 1);
4341 * Caller should have a lock on the protocol domain if the protocol
4342 * doesn't support finer grained locking. In most cases, the lock
4343 * will be held from the socket layer and won't be released until
4344 * we return back to the socket layer.
4346 * This does mean that we must take a protocol lock before we take
4347 * an interface lock if we're going to take both. This makes sense
4348 * because a protocol is likely to interact with an ifp while it
4349 * is under the protocol lock.
4351 * An advisory code will be returned if adv is not null. This
4352 * can be used to provide feedback about interface queues to the
4356 dlil_output(ifnet_t ifp
, protocol_family_t proto_family
, mbuf_t packetlist
,
4357 void *route
, const struct sockaddr
*dest
, int raw
, struct flowadv
*adv
)
4359 char *frame_type
= NULL
;
4360 char *dst_linkaddr
= NULL
;
4362 char frame_type_buffer
[MAX_FRAME_TYPE_SIZE
* 4];
4363 char dst_linkaddr_buffer
[MAX_LINKADDR
* 4];
4364 struct if_proto
*proto
= NULL
;
4366 mbuf_t send_head
= NULL
;
4367 mbuf_t
*send_tail
= &send_head
;
4369 u_int32_t pre
= 0, post
= 0;
4370 u_int32_t fpkts
= 0, fbytes
= 0;
4372 struct timespec now
;
4374 boolean_t did_clat46
= FALSE
;
4375 protocol_family_t old_proto_family
= proto_family
;
4376 struct rtentry
*rt
= NULL
;
4378 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
4381 * Get an io refcnt if the interface is attached to prevent ifnet_detach
4382 * from happening while this operation is in progress
4384 if (!ifnet_is_attached(ifp
, 1)) {
4390 VERIFY(ifp
->if_output_dlil
!= NULL
);
4392 /* update the driver's multicast filter, if needed */
4393 if (ifp
->if_updatemcasts
> 0 && if_mcasts_update(ifp
) == 0)
4394 ifp
->if_updatemcasts
= 0;
4396 frame_type
= frame_type_buffer
;
4397 dst_linkaddr
= dst_linkaddr_buffer
;
4400 ifnet_lock_shared(ifp
);
4401 /* callee holds a proto refcnt upon success */
4402 proto
= find_attached_proto(ifp
, proto_family
);
4403 if (proto
== NULL
) {
4404 ifnet_lock_done(ifp
);
4408 ifnet_lock_done(ifp
);
4412 if (packetlist
== NULL
)
4416 packetlist
= packetlist
->m_nextpkt
;
4417 m
->m_nextpkt
= NULL
;
4420 * Perform address family translation for the first
4421 * packet outside the loop in order to perform address
4422 * lookup for the translated proto family.
4424 if (proto_family
== PF_INET
&& IS_INTF_CLAT46(ifp
) &&
4425 (ifp
->if_type
== IFT_CELLULAR
||
4426 dlil_is_clat_needed(proto_family
, m
))) {
4427 retval
= dlil_clat46(ifp
, &proto_family
, &m
);
4429 * Go to the next packet if translation fails
4434 ip6stat
.ip6s_clat464_out_drop
++;
4435 /* Make sure that the proto family is PF_INET */
4436 ASSERT(proto_family
== PF_INET
);
4440 * Free the old one and make it point to the IPv6 proto structure.
4442 * Change proto for the first time we have successfully
4443 * performed address family translation.
4445 if (!did_clat46
&& proto_family
== PF_INET6
) {
4446 struct sockaddr_in6 dest6
;
4450 if_proto_free(proto
);
4451 ifnet_lock_shared(ifp
);
4452 /* callee holds a proto refcnt upon success */
4453 proto
= find_attached_proto(ifp
, proto_family
);
4454 if (proto
== NULL
) {
4455 ifnet_lock_done(ifp
);
4461 ifnet_lock_done(ifp
);
4462 if (ifp
->if_type
== IFT_ETHER
) {
4463 /* Update the dest to translated v6 address */
4464 dest6
.sin6_len
= sizeof(struct sockaddr_in6
);
4465 dest6
.sin6_family
= AF_INET6
;
4466 dest6
.sin6_addr
= (mtod(m
, struct ip6_hdr
*))->ip6_dst
;
4467 dest
= (const struct sockaddr
*)&dest6
;
4470 * Lookup route to the translated destination
4471 * Free this route ref during cleanup
4473 rt
= rtalloc1_scoped((struct sockaddr
*)&dest6
,
4474 0, 0, ifp
->if_index
);
4482 * This path gets packet chain going to the same destination.
4483 * The pre output routine is used to either trigger resolution of
4484 * the next hop or retreive the next hop's link layer addressing.
4485 * For ex: ether_inet(6)_pre_output routine.
4487 * If the routine returns EJUSTRETURN, it implies that packet has
4488 * been queued, and therefore we have to call preout_again for the
4489 * following packet in the chain.
4491 * For errors other than EJUSTRETURN, the current packet is freed
4492 * and the rest of the chain (pointed by packetlist is freed as
4495 * Else if there is no error the retrieved information is used for
4496 * all the packets in the chain.
4499 proto_media_preout preoutp
= (proto
->proto_kpi
== kProtoKPI_v1
?
4500 proto
->kpi
.v1
.pre_output
: proto
->kpi
.v2
.pre_output
);
4502 if (preoutp
!= NULL
) {
4503 retval
= preoutp(ifp
, proto_family
, &m
, dest
, route
,
4504 frame_type
, dst_linkaddr
);
4507 if (retval
== EJUSTRETURN
)
4517 retval
= mac_ifnet_check_transmit(ifp
, m
, proto_family
,
4518 dlil_get_socket_type(&m
, proto_family
, raw
));
4527 * Perform address family translation if needed.
4528 * For now we only support stateless 4 to 6 translation
4531 * The routine below translates IP header, updates protocol
4532 * checksum and also translates ICMP.
4534 * We skip the first packet as it is already translated and
4535 * the proto family is set to PF_INET6.
4537 if (proto_family
== PF_INET
&& IS_INTF_CLAT46(ifp
) &&
4538 (ifp
->if_type
== IFT_CELLULAR
||
4539 dlil_is_clat_needed(proto_family
, m
))) {
4540 retval
= dlil_clat46(ifp
, &proto_family
, &m
);
4541 /* Goto the next packet if the translation fails */
4545 ip6stat
.ip6s_clat464_out_drop
++;
4551 if (!raw
&& proto_family
== PF_INET
) {
4552 struct ip
*ip
= mtod(m
, struct ip
*);
4553 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
4554 struct ip
*, ip
, struct ifnet
*, ifp
,
4555 struct ip
*, ip
, struct ip6_hdr
*, NULL
);
4557 } else if (!raw
&& proto_family
== PF_INET6
) {
4558 struct ip6_hdr
*ip6
= mtod(m
, struct ip6_hdr
*);
4559 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
4560 struct ip6_hdr
*, ip6
, struct ifnet
*, ifp
,
4561 struct ip
*, NULL
, struct ip6_hdr
*, ip6
);
4563 #endif /* CONFIG_DTRACE */
4565 if (raw
== 0 && ifp
->if_framer
!= NULL
) {
4569 * If this is a broadcast packet that needs to be
4570 * looped back into the system, set the inbound ifp
4571 * to that of the outbound ifp. This will allow
4572 * us to determine that it is a legitimate packet
4573 * for the system. Only set the ifp if it's not
4574 * already set, just to be safe.
4576 if ((m
->m_flags
& (M_BCAST
| M_LOOP
)) &&
4577 m
->m_pkthdr
.rcvif
== NULL
) {
4578 m
->m_pkthdr
.rcvif
= ifp
;
4582 retval
= ifp
->if_framer(ifp
, &m
, dest
, dst_linkaddr
,
4583 frame_type
, &pre
, &post
);
4585 if (retval
!= EJUSTRETURN
)
4591 * For partial checksum offload, adjust the start
4592 * and stuff offsets based on the prepended header.
4594 if ((m
->m_pkthdr
.csum_flags
&
4595 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
4596 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
4597 m
->m_pkthdr
.csum_tx_stuff
+= pre
;
4598 m
->m_pkthdr
.csum_tx_start
+= pre
;
4601 if (hwcksum_dbg
!= 0 && !(ifp
->if_flags
& IFF_LOOPBACK
))
4602 dlil_output_cksum_dbg(ifp
, m
, pre
,
4606 * Clear the ifp if it was set above, and to be
4607 * safe, only if it is still the same as the
4608 * outbound ifp we have in context. If it was
4609 * looped back, then a copy of it was sent to the
4610 * loopback interface with the rcvif set, and we
4611 * are clearing the one that will go down to the
4614 if (rcvif_set
&& m
->m_pkthdr
.rcvif
== ifp
)
4615 m
->m_pkthdr
.rcvif
= NULL
;
4619 * Let interface filters (if any) do their thing ...
4621 /* Do not pass VLAN tagged packets to filters PR-3586856 */
4622 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) == 0) {
4623 retval
= dlil_interface_filters_output(ifp
,
4626 if (retval
!= EJUSTRETURN
)
4632 * Strip away M_PROTO1 bit prior to sending packet
4633 * to the driver as this field may be used by the driver
4635 m
->m_flags
&= ~M_PROTO1
;
4638 * If the underlying interface is not capable of handling a
4639 * packet whose data portion spans across physically disjoint
4640 * pages, we need to "normalize" the packet so that we pass
4641 * down a chain of mbufs where each mbuf points to a span that
4642 * resides in the system page boundary. If the packet does
4643 * not cross page(s), the following is a no-op.
4645 if (!(ifp
->if_hwassist
& IFNET_MULTIPAGES
)) {
4646 if ((m
= m_normalize(m
)) == NULL
)
4651 * If this is a TSO packet, make sure the interface still
4652 * advertise TSO capability.
4654 if (TSO_IPV4_NOTOK(ifp
, m
) || TSO_IPV6_NOTOK(ifp
, m
)) {
4660 ifp_inc_traffic_class_out(ifp
, m
);
4661 pktap_output(ifp
, proto_family
, m
, pre
, post
);
4664 * Count the number of elements in the mbuf chain
4666 if (tx_chain_len_count
) {
4667 dlil_count_chain_len(m
, &tx_chain_len_stats
);
4671 * Record timestamp; ifnet_enqueue() will use this info
4672 * rather than redoing the work. An optimization could
4673 * involve doing this just once at the top, if there are
4674 * no interface filters attached, but that's probably
4678 net_timernsec(&now
, &now_nsec
);
4679 (void) mbuf_set_timestamp(m
, now_nsec
, TRUE
);
4682 * Discard partial sum information if this packet originated
4683 * from another interface; the packet would already have the
4684 * final checksum and we shouldn't recompute it.
4686 if ((m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) &&
4687 (m
->m_pkthdr
.csum_flags
& (CSUM_DATA_VALID
|CSUM_PARTIAL
)) ==
4688 (CSUM_DATA_VALID
|CSUM_PARTIAL
)) {
4689 m
->m_pkthdr
.csum_flags
&= ~CSUM_TX_FLAGS
;
4690 m
->m_pkthdr
.csum_data
= 0;
4694 * Finally, call the driver.
4696 if (ifp
->if_eflags
& (IFEF_SENDLIST
| IFEF_ENQUEUE_MULTI
)) {
4697 if (m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) {
4698 flen
+= (m_pktlen(m
) - (pre
+ post
));
4699 m
->m_pkthdr
.pkt_flags
&= ~PKTF_FORWARDED
;
4702 send_tail
= &m
->m_nextpkt
;
4704 if (m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) {
4705 flen
= (m_pktlen(m
) - (pre
+ post
));
4706 m
->m_pkthdr
.pkt_flags
&= ~PKTF_FORWARDED
;
4710 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
,
4712 retval
= (*ifp
->if_output_dlil
)(ifp
, m
);
4713 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
4714 if (adv
!= NULL
&& adv
->code
== FADV_SUCCESS
) {
4715 adv
->code
= (retval
== EQFULL
?
4716 FADV_FLOW_CONTROLLED
:
4721 if (retval
== 0 && flen
> 0) {
4725 if (retval
!= 0 && dlil_verbose
) {
4726 printf("%s: output error on %s retval = %d\n",
4727 __func__
, if_name(ifp
),
4730 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
,
4733 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4738 packetlist
= packetlist
->m_nextpkt
;
4739 m
->m_nextpkt
= NULL
;
4741 /* Reset the proto family to old proto family for CLAT */
4743 proto_family
= old_proto_family
;
4744 } while (m
!= NULL
);
4746 if (send_head
!= NULL
) {
4747 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
,
4749 if (ifp
->if_eflags
& IFEF_SENDLIST
) {
4750 retval
= (*ifp
->if_output_dlil
)(ifp
, send_head
);
4751 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
4753 adv
->code
= (retval
== EQFULL
?
4754 FADV_FLOW_CONTROLLED
:
4759 if (retval
== 0 && flen
> 0) {
4763 if (retval
!= 0 && dlil_verbose
) {
4764 printf("%s: output error on %s retval = %d\n",
4765 __func__
, if_name(ifp
), retval
);
4768 struct mbuf
*send_m
;
4770 VERIFY(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
);
4771 while (send_head
!= NULL
) {
4773 send_head
= send_m
->m_nextpkt
;
4774 send_m
->m_nextpkt
= NULL
;
4775 retval
= (*ifp
->if_output_dlil
)(ifp
, send_m
);
4776 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
4778 adv
->code
= (retval
== EQFULL
?
4779 FADV_FLOW_CONTROLLED
:
4789 if (retval
!= 0 && dlil_verbose
) {
4790 printf("%s: output error on %s "
4792 __func__
, if_name(ifp
), retval
);
4800 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4803 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4807 ifp
->if_fbytes
+= fbytes
;
4809 ifp
->if_fpackets
+= fpkts
;
4811 if_proto_free(proto
);
4812 if (packetlist
) /* if any packets are left, clean up */
4813 mbuf_freem_list(packetlist
);
4814 if (retval
== EJUSTRETURN
)
4817 ifnet_decr_iorefcnt(ifp
);
4827 * This routine checks if the destination address is not a loopback, link-local,
4828 * multicast or broadcast address.
4831 dlil_is_clat_needed(protocol_family_t proto_family
, mbuf_t m
)
4834 switch(proto_family
) {
4836 struct ip
*iph
= mtod(m
, struct ip
*);
4837 if (CLAT46_NEEDED(ntohl(iph
->ip_dst
.s_addr
)))
4842 struct ip6_hdr
*ip6h
= mtod(m
, struct ip6_hdr
*);
4843 if ((size_t)m_pktlen(m
) >= sizeof(struct ip6_hdr
) &&
4844 CLAT64_NEEDED(&ip6h
->ip6_dst
))
4853 * @brief This routine translates IPv4 packet to IPv6 packet,
4854 * updates protocol checksum and also translates ICMP for code
4855 * along with inner header translation.
4857 * @param ifp Pointer to the interface
4858 * @param proto_family pointer to protocol family. It is updated if function
4859 * performs the translation successfully.
4860 * @param m Pointer to the pointer pointing to the packet. Needed because this
4861 * routine can end up changing the mbuf to a different one.
4863 * @return 0 on success or else a negative value.
4866 dlil_clat46(ifnet_t ifp
, protocol_family_t
*proto_family
, mbuf_t
*m
)
4868 VERIFY(*proto_family
== PF_INET
);
4869 VERIFY(IS_INTF_CLAT46(ifp
));
4871 pbuf_t pbuf_store
, *pbuf
= NULL
;
4872 struct ip
*iph
= NULL
;
4873 struct in_addr osrc
, odst
;
4875 struct in6_ifaddr
*ia6_clat_src
= NULL
;
4876 struct in6_addr
*src
= NULL
;
4877 struct in6_addr dst
;
4880 uint64_t tot_len
= 0;
4881 uint16_t ip_id_val
= 0;
4882 uint16_t ip_frag_off
= 0;
4884 boolean_t is_frag
= FALSE
;
4885 boolean_t is_first_frag
= TRUE
;
4886 boolean_t is_last_frag
= TRUE
;
4888 pbuf_init_mbuf(&pbuf_store
, *m
, ifp
);
4890 iph
= pbuf
->pb_data
;
4895 off
= iph
->ip_hl
<< 2;
4896 ip_id_val
= iph
->ip_id
;
4897 ip_frag_off
= ntohs(iph
->ip_off
) & IP_OFFMASK
;
4899 tot_len
= ntohs(iph
->ip_len
);
4902 * For packets that are not first frags
4903 * we only need to adjust CSUM.
4904 * For 4 to 6, Fragmentation header gets appended
4905 * after proto translation.
4907 if (ntohs(iph
->ip_off
) & ~(IP_DF
| IP_RF
)) {
4910 /* If the offset is not zero, it is not first frag */
4911 if (ip_frag_off
!= 0)
4912 is_first_frag
= FALSE
;
4914 /* If IP_MF is set, then it is not last frag */
4915 if (ntohs(iph
->ip_off
) & IP_MF
)
4916 is_last_frag
= FALSE
;
4920 * Retrive the local IPv6 CLAT46 address reserved for stateless
4923 ia6_clat_src
= in6ifa_ifpwithflag(ifp
, IN6_IFF_CLAT46
);
4924 if (ia6_clat_src
== NULL
) {
4925 ip6stat
.ip6s_clat464_out_nov6addr_drop
++;
4930 src
= &ia6_clat_src
->ia_addr
.sin6_addr
;
4933 * Translate IPv4 destination to IPv6 destination by using the
4934 * prefixes learned through prior PLAT discovery.
4936 if ((error
= nat464_synthesize_ipv6(ifp
, &odst
, &dst
)) != 0) {
4937 ip6stat
.ip6s_clat464_out_v6synthfail_drop
++;
4941 /* Translate the IP header part first */
4942 error
= (nat464_translate_46(pbuf
, off
, iph
->ip_tos
, iph
->ip_p
,
4943 iph
->ip_ttl
, *src
, dst
, tot_len
) == NT_NAT64
) ? 0 : -1;
4945 iph
= NULL
; /* Invalidate iph as pbuf has been modified */
4948 ip6stat
.ip6s_clat464_out_46transfail_drop
++;
4953 * Translate protocol header, update checksum, checksum flags
4954 * and related fields.
4956 error
= (nat464_translate_proto(pbuf
, (struct nat464_addr
*)&osrc
, (struct nat464_addr
*)&odst
,
4957 proto
, PF_INET
, PF_INET6
, NT_OUT
, !is_first_frag
) == NT_NAT64
) ? 0 : -1;
4960 ip6stat
.ip6s_clat464_out_46proto_transfail_drop
++;
4964 /* Now insert the IPv6 fragment header */
4966 error
= nat464_insert_frag46(pbuf
, ip_id_val
, ip_frag_off
, is_last_frag
);
4969 ip6stat
.ip6s_clat464_out_46frag_transfail_drop
++;
4975 if (ia6_clat_src
!= NULL
)
4976 IFA_REMREF(&ia6_clat_src
->ia_ifa
);
4978 if (pbuf_is_valid(pbuf
)) {
4980 pbuf
->pb_mbuf
= NULL
;
4984 ip6stat
.ip6s_clat464_out_invalpbuf_drop
++;
4988 *proto_family
= PF_INET6
;
4989 ip6stat
.ip6s_clat464_out_success
++;
4996 * @brief This routine translates incoming IPv6 to IPv4 packet,
4997 * updates protocol checksum and also translates ICMPv6 outer
5000 * @return 0 on success or else a negative value.
5003 dlil_clat64(ifnet_t ifp
, protocol_family_t
*proto_family
, mbuf_t
*m
)
5005 VERIFY(*proto_family
== PF_INET6
);
5006 VERIFY(IS_INTF_CLAT46(ifp
));
5008 struct ip6_hdr
*ip6h
= NULL
;
5009 struct in6_addr osrc
, odst
;
5011 struct in6_ifaddr
*ia6_clat_dst
= NULL
;
5012 struct in_ifaddr
*ia4_clat_dst
= NULL
;
5013 struct in_addr
*dst
= NULL
;
5017 u_int64_t tot_len
= 0;
5019 boolean_t is_first_frag
= TRUE
;
5021 /* Incoming mbuf does not contain valid IP6 header */
5022 if ((size_t)(*m
)->m_pkthdr
.len
< sizeof(struct ip6_hdr
) ||
5023 ((size_t)(*m
)->m_len
< sizeof(struct ip6_hdr
) &&
5024 (*m
= m_pullup(*m
, sizeof(struct ip6_hdr
))) == NULL
)) {
5025 ip6stat
.ip6s_clat464_in_tooshort_drop
++;
5029 ip6h
= mtod(*m
, struct ip6_hdr
*);
5030 /* Validate that mbuf contains IP payload equal to ip6_plen */
5031 if ((size_t)(*m
)->m_pkthdr
.len
< ntohs(ip6h
->ip6_plen
) + sizeof(struct ip6_hdr
)) {
5032 ip6stat
.ip6s_clat464_in_tooshort_drop
++;
5036 osrc
= ip6h
->ip6_src
;
5037 odst
= ip6h
->ip6_dst
;
5040 * Retrieve the local CLAT46 reserved IPv6 address.
5041 * Let the packet pass if we don't find one, as the flag
5042 * may get set before IPv6 configuration has taken place.
5044 ia6_clat_dst
= in6ifa_ifpwithflag(ifp
, IN6_IFF_CLAT46
);
5045 if (ia6_clat_dst
== NULL
)
5049 * Check if the original dest in the packet is same as the reserved
5050 * CLAT46 IPv6 address
5052 if (IN6_ARE_ADDR_EQUAL(&odst
, &ia6_clat_dst
->ia_addr
.sin6_addr
)) {
5053 pbuf_t pbuf_store
, *pbuf
= NULL
;
5054 pbuf_init_mbuf(&pbuf_store
, *m
, ifp
);
5058 * Retrive the local CLAT46 IPv4 address reserved for stateless
5061 ia4_clat_dst
= inifa_ifpclatv4(ifp
);
5062 if (ia4_clat_dst
== NULL
) {
5063 IFA_REMREF(&ia6_clat_dst
->ia_ifa
);
5064 ip6stat
.ip6s_clat464_in_nov4addr_drop
++;
5068 IFA_REMREF(&ia6_clat_dst
->ia_ifa
);
5070 /* Translate IPv6 src to IPv4 src by removing the NAT64 prefix */
5071 dst
= &ia4_clat_dst
->ia_addr
.sin_addr
;
5072 if ((error
= nat464_synthesize_ipv4(ifp
, &osrc
, &src
)) != 0) {
5073 ip6stat
.ip6s_clat464_in_v4synthfail_drop
++;
5078 ip6h
= pbuf
->pb_data
;
5079 off
= sizeof(struct ip6_hdr
);
5080 proto
= ip6h
->ip6_nxt
;
5081 tos
= (ntohl(ip6h
->ip6_flow
) >> 20) & 0xff;
5082 tot_len
= ntohs(ip6h
->ip6_plen
) + sizeof(struct ip6_hdr
);
5085 * Translate the IP header and update the fragmentation
5088 error
= (nat464_translate_64(pbuf
, off
, tos
, &proto
,
5089 ip6h
->ip6_hlim
, src
, *dst
, tot_len
, &is_first_frag
) == NT_NAT64
) ?
5092 ip6h
= NULL
; /* Invalidate ip6h as pbuf has been changed */
5095 ip6stat
.ip6s_clat464_in_64transfail_drop
++;
5100 * Translate protocol header, update checksum, checksum flags
5101 * and related fields.
5103 error
= (nat464_translate_proto(pbuf
, (struct nat464_addr
*)&osrc
,
5104 (struct nat464_addr
*)&odst
, proto
, PF_INET6
, PF_INET
,
5105 NT_IN
, !is_first_frag
) == NT_NAT64
) ? 0 : -1;
5108 ip6stat
.ip6s_clat464_in_64proto_transfail_drop
++;
5113 if (ia4_clat_dst
!= NULL
)
5114 IFA_REMREF(&ia4_clat_dst
->ia_ifa
);
5116 if (pbuf_is_valid(pbuf
)) {
5118 pbuf
->pb_mbuf
= NULL
;
5122 ip6stat
.ip6s_clat464_in_invalpbuf_drop
++;
5126 *proto_family
= PF_INET
;
5127 ip6stat
.ip6s_clat464_in_success
++;
5129 } /* CLAT traffic */
5136 ifnet_ioctl(ifnet_t ifp
, protocol_family_t proto_fam
, u_long ioctl_code
,
5139 struct ifnet_filter
*filter
;
5140 int retval
= EOPNOTSUPP
;
5143 if (ifp
== NULL
|| ioctl_code
== 0)
5146 /* Get an io ref count if the interface is attached */
5147 if (!ifnet_is_attached(ifp
, 1))
5148 return (EOPNOTSUPP
);
5151 * Run the interface filters first.
5152 * We want to run all filters before calling the protocol,
5153 * interface family, or interface.
5155 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
5156 /* prevent filter list from changing in case we drop the lock */
5157 if_flt_monitor_busy(ifp
);
5158 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
5159 if (filter
->filt_ioctl
!= NULL
&& (filter
->filt_protocol
== 0 ||
5160 filter
->filt_protocol
== proto_fam
)) {
5161 lck_mtx_unlock(&ifp
->if_flt_lock
);
5163 result
= filter
->filt_ioctl(filter
->filt_cookie
, ifp
,
5164 proto_fam
, ioctl_code
, ioctl_arg
);
5166 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
5168 /* Only update retval if no one has handled the ioctl */
5169 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
5170 if (result
== ENOTSUP
)
5171 result
= EOPNOTSUPP
;
5173 if (retval
!= 0 && retval
!= EOPNOTSUPP
) {
5174 /* we're done with the filter list */
5175 if_flt_monitor_unbusy(ifp
);
5176 lck_mtx_unlock(&ifp
->if_flt_lock
);
5182 /* we're done with the filter list */
5183 if_flt_monitor_unbusy(ifp
);
5184 lck_mtx_unlock(&ifp
->if_flt_lock
);
5186 /* Allow the protocol to handle the ioctl */
5187 if (proto_fam
!= 0) {
5188 struct if_proto
*proto
;
5190 /* callee holds a proto refcnt upon success */
5191 ifnet_lock_shared(ifp
);
5192 proto
= find_attached_proto(ifp
, proto_fam
);
5193 ifnet_lock_done(ifp
);
5194 if (proto
!= NULL
) {
5195 proto_media_ioctl ioctlp
=
5196 (proto
->proto_kpi
== kProtoKPI_v1
?
5197 proto
->kpi
.v1
.ioctl
: proto
->kpi
.v2
.ioctl
);
5198 result
= EOPNOTSUPP
;
5200 result
= ioctlp(ifp
, proto_fam
, ioctl_code
,
5202 if_proto_free(proto
);
5204 /* Only update retval if no one has handled the ioctl */
5205 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
5206 if (result
== ENOTSUP
)
5207 result
= EOPNOTSUPP
;
5209 if (retval
&& retval
!= EOPNOTSUPP
)
5215 /* retval is either 0 or EOPNOTSUPP */
5218 * Let the interface handle this ioctl.
5219 * If it returns EOPNOTSUPP, ignore that, we may have
5220 * already handled this in the protocol or family.
5223 result
= (*ifp
->if_ioctl
)(ifp
, ioctl_code
, ioctl_arg
);
5225 /* Only update retval if no one has handled the ioctl */
5226 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
5227 if (result
== ENOTSUP
)
5228 result
= EOPNOTSUPP
;
5230 if (retval
&& retval
!= EOPNOTSUPP
) {
5236 if (retval
== EJUSTRETURN
)
5239 ifnet_decr_iorefcnt(ifp
);
5244 __private_extern__ errno_t
5245 dlil_set_bpf_tap(ifnet_t ifp
, bpf_tap_mode mode
, bpf_packet_func callback
)
5250 if (ifp
->if_set_bpf_tap
) {
5251 /* Get an io reference on the interface if it is attached */
5252 if (!ifnet_is_attached(ifp
, 1))
5254 error
= ifp
->if_set_bpf_tap(ifp
, mode
, callback
);
5255 ifnet_decr_iorefcnt(ifp
);
5261 dlil_resolve_multi(struct ifnet
*ifp
, const struct sockaddr
*proto_addr
,
5262 struct sockaddr
*ll_addr
, size_t ll_len
)
5264 errno_t result
= EOPNOTSUPP
;
5265 struct if_proto
*proto
;
5266 const struct sockaddr
*verify
;
5267 proto_media_resolve_multi resolvep
;
5269 if (!ifnet_is_attached(ifp
, 1))
5272 bzero(ll_addr
, ll_len
);
5274 /* Call the protocol first; callee holds a proto refcnt upon success */
5275 ifnet_lock_shared(ifp
);
5276 proto
= find_attached_proto(ifp
, proto_addr
->sa_family
);
5277 ifnet_lock_done(ifp
);
5278 if (proto
!= NULL
) {
5279 resolvep
= (proto
->proto_kpi
== kProtoKPI_v1
?
5280 proto
->kpi
.v1
.resolve_multi
: proto
->kpi
.v2
.resolve_multi
);
5281 if (resolvep
!= NULL
)
5282 result
= resolvep(ifp
, proto_addr
,
5283 (struct sockaddr_dl
*)(void *)ll_addr
, ll_len
);
5284 if_proto_free(proto
);
5287 /* Let the interface verify the multicast address */
5288 if ((result
== EOPNOTSUPP
|| result
== 0) && ifp
->if_check_multi
) {
5292 verify
= proto_addr
;
5293 result
= ifp
->if_check_multi(ifp
, verify
);
5296 ifnet_decr_iorefcnt(ifp
);
5300 __private_extern__ errno_t
5301 dlil_send_arp_internal(ifnet_t ifp
, u_short arpop
,
5302 const struct sockaddr_dl
*sender_hw
, const struct sockaddr
*sender_proto
,
5303 const struct sockaddr_dl
*target_hw
, const struct sockaddr
*target_proto
)
5305 struct if_proto
*proto
;
5308 /* callee holds a proto refcnt upon success */
5309 ifnet_lock_shared(ifp
);
5310 proto
= find_attached_proto(ifp
, target_proto
->sa_family
);
5311 ifnet_lock_done(ifp
);
5312 if (proto
== NULL
) {
5315 proto_media_send_arp arpp
;
5316 arpp
= (proto
->proto_kpi
== kProtoKPI_v1
?
5317 proto
->kpi
.v1
.send_arp
: proto
->kpi
.v2
.send_arp
);
5323 arpstat
.txrequests
++;
5324 if (target_hw
!= NULL
)
5325 arpstat
.txurequests
++;
5328 arpstat
.txreplies
++;
5331 result
= arpp(ifp
, arpop
, sender_hw
, sender_proto
,
5332 target_hw
, target_proto
);
5334 if_proto_free(proto
);
5340 struct net_thread_marks
{ };
5341 static const struct net_thread_marks net_thread_marks_base
= { };
5343 __private_extern__
const net_thread_marks_t net_thread_marks_none
=
5344 &net_thread_marks_base
;
5346 __private_extern__ net_thread_marks_t
5347 net_thread_marks_push(u_int32_t push
)
5349 static const char *const base
= (const void*)&net_thread_marks_base
;
5353 struct uthread
*uth
= get_bsdthread_info(current_thread());
5355 pop
= push
& ~uth
->uu_network_marks
;
5357 uth
->uu_network_marks
|= pop
;
5360 return ((net_thread_marks_t
)&base
[pop
]);
5363 __private_extern__ net_thread_marks_t
5364 net_thread_unmarks_push(u_int32_t unpush
)
5366 static const char *const base
= (const void*)&net_thread_marks_base
;
5367 u_int32_t unpop
= 0;
5370 struct uthread
*uth
= get_bsdthread_info(current_thread());
5372 unpop
= unpush
& uth
->uu_network_marks
;
5374 uth
->uu_network_marks
&= ~unpop
;
5377 return ((net_thread_marks_t
)&base
[unpop
]);
5380 __private_extern__
void
5381 net_thread_marks_pop(net_thread_marks_t popx
)
5383 static const char *const base
= (const void*)&net_thread_marks_base
;
5384 const ptrdiff_t pop
= (const char *)popx
- (const char *)base
;
5387 static const ptrdiff_t ones
= (ptrdiff_t)(u_int32_t
)~0U;
5388 struct uthread
*uth
= get_bsdthread_info(current_thread());
5390 VERIFY((pop
& ones
) == pop
);
5391 VERIFY((ptrdiff_t)(uth
->uu_network_marks
& pop
) == pop
);
5392 uth
->uu_network_marks
&= ~pop
;
5396 __private_extern__
void
5397 net_thread_unmarks_pop(net_thread_marks_t unpopx
)
5399 static const char *const base
= (const void*)&net_thread_marks_base
;
5400 ptrdiff_t unpop
= (const char *)unpopx
- (const char *)base
;
5403 static const ptrdiff_t ones
= (ptrdiff_t)(u_int32_t
)~0U;
5404 struct uthread
*uth
= get_bsdthread_info(current_thread());
5406 VERIFY((unpop
& ones
) == unpop
);
5407 VERIFY((ptrdiff_t)(uth
->uu_network_marks
& unpop
) == 0);
5408 uth
->uu_network_marks
|= unpop
;
5412 __private_extern__ u_int32_t
5413 net_thread_is_marked(u_int32_t check
)
5416 struct uthread
*uth
= get_bsdthread_info(current_thread());
5417 return (uth
->uu_network_marks
& check
);
5423 __private_extern__ u_int32_t
5424 net_thread_is_unmarked(u_int32_t check
)
5427 struct uthread
*uth
= get_bsdthread_info(current_thread());
5428 return (~uth
->uu_network_marks
& check
);
5434 static __inline__
int
5435 _is_announcement(const struct sockaddr_in
* sender_sin
,
5436 const struct sockaddr_in
* target_sin
)
5438 if (sender_sin
== NULL
) {
5441 return (sender_sin
->sin_addr
.s_addr
== target_sin
->sin_addr
.s_addr
);
5444 __private_extern__ errno_t
5445 dlil_send_arp(ifnet_t ifp
, u_short arpop
, const struct sockaddr_dl
*sender_hw
,
5446 const struct sockaddr
*sender_proto
, const struct sockaddr_dl
*target_hw
,
5447 const struct sockaddr
*target_proto0
, u_int32_t rtflags
)
5450 const struct sockaddr_in
* sender_sin
;
5451 const struct sockaddr_in
* target_sin
;
5452 struct sockaddr_inarp target_proto_sinarp
;
5453 struct sockaddr
*target_proto
= (void *)(uintptr_t)target_proto0
;
5455 if (target_proto
== NULL
|| (sender_proto
!= NULL
&&
5456 sender_proto
->sa_family
!= target_proto
->sa_family
))
5460 * If the target is a (default) router, provide that
5461 * information to the send_arp callback routine.
5463 if (rtflags
& RTF_ROUTER
) {
5464 bcopy(target_proto
, &target_proto_sinarp
,
5465 sizeof (struct sockaddr_in
));
5466 target_proto_sinarp
.sin_other
|= SIN_ROUTER
;
5467 target_proto
= (struct sockaddr
*)&target_proto_sinarp
;
5471 * If this is an ARP request and the target IP is IPv4LL,
5472 * send the request on all interfaces. The exception is
5473 * an announcement, which must only appear on the specific
5476 sender_sin
= (struct sockaddr_in
*)(void *)(uintptr_t)sender_proto
;
5477 target_sin
= (struct sockaddr_in
*)(void *)(uintptr_t)target_proto
;
5478 if (target_proto
->sa_family
== AF_INET
&&
5479 IN_LINKLOCAL(ntohl(target_sin
->sin_addr
.s_addr
)) &&
5480 ipv4_ll_arp_aware
!= 0 && arpop
== ARPOP_REQUEST
&&
5481 !_is_announcement(target_sin
, sender_sin
)) {
5488 if (ifnet_list_get(IFNET_FAMILY_ANY
, &ifp_list
, &count
) == 0) {
5489 for (ifp_on
= 0; ifp_on
< count
; ifp_on
++) {
5491 ifaddr_t source_hw
= NULL
;
5492 ifaddr_t source_ip
= NULL
;
5493 struct sockaddr_in source_ip_copy
;
5494 struct ifnet
*cur_ifp
= ifp_list
[ifp_on
];
5497 * Only arp on interfaces marked for IPv4LL
5498 * ARPing. This may mean that we don't ARP on
5499 * the interface the subnet route points to.
5501 if (!(cur_ifp
->if_eflags
& IFEF_ARPLL
))
5504 /* Find the source IP address */
5505 ifnet_lock_shared(cur_ifp
);
5506 source_hw
= cur_ifp
->if_lladdr
;
5507 TAILQ_FOREACH(source_ip
, &cur_ifp
->if_addrhead
,
5509 IFA_LOCK(source_ip
);
5510 if (source_ip
->ifa_addr
!= NULL
&&
5511 source_ip
->ifa_addr
->sa_family
==
5513 /* Copy the source IP address */
5515 *(struct sockaddr_in
*)
5516 (void *)source_ip
->ifa_addr
;
5517 IFA_UNLOCK(source_ip
);
5520 IFA_UNLOCK(source_ip
);
5523 /* No IP Source, don't arp */
5524 if (source_ip
== NULL
) {
5525 ifnet_lock_done(cur_ifp
);
5529 IFA_ADDREF(source_hw
);
5530 ifnet_lock_done(cur_ifp
);
5533 new_result
= dlil_send_arp_internal(cur_ifp
,
5534 arpop
, (struct sockaddr_dl
*)(void *)
5535 source_hw
->ifa_addr
,
5536 (struct sockaddr
*)&source_ip_copy
, NULL
,
5539 IFA_REMREF(source_hw
);
5540 if (result
== ENOTSUP
) {
5541 result
= new_result
;
5544 ifnet_list_free(ifp_list
);
5547 result
= dlil_send_arp_internal(ifp
, arpop
, sender_hw
,
5548 sender_proto
, target_hw
, target_proto
);
5555 * Caller must hold ifnet head lock.
5558 ifnet_lookup(struct ifnet
*ifp
)
5562 LCK_RW_ASSERT(&ifnet_head_lock
, LCK_RW_ASSERT_HELD
);
5563 TAILQ_FOREACH(_ifp
, &ifnet_head
, if_link
) {
5567 return (_ifp
!= NULL
);
5571 * Caller has to pass a non-zero refio argument to get a
5572 * IO reference count. This will prevent ifnet_detach from
5573 * being called when there are outstanding io reference counts.
5576 ifnet_is_attached(struct ifnet
*ifp
, int refio
)
5580 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5581 if ((ret
= IF_FULLY_ATTACHED(ifp
))) {
5585 lck_mtx_unlock(&ifp
->if_ref_lock
);
5591 * Caller must ensure the interface is attached; the assumption is that
5592 * there is at least an outstanding IO reference count held already.
5593 * Most callers would call ifnet_is_attached() instead.
5596 ifnet_incr_iorefcnt(struct ifnet
*ifp
)
5598 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5599 VERIFY(IF_FULLY_ATTACHED(ifp
));
5600 VERIFY(ifp
->if_refio
> 0);
5602 lck_mtx_unlock(&ifp
->if_ref_lock
);
5606 ifnet_decr_iorefcnt(struct ifnet
*ifp
)
5608 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5609 VERIFY(ifp
->if_refio
> 0);
5610 VERIFY(ifp
->if_refflags
& (IFRF_ATTACHED
| IFRF_DETACHING
));
5614 * if there are no more outstanding io references, wakeup the
5615 * ifnet_detach thread if detaching flag is set.
5617 if (ifp
->if_refio
== 0 && (ifp
->if_refflags
& IFRF_DETACHING
))
5618 wakeup(&(ifp
->if_refio
));
5620 lck_mtx_unlock(&ifp
->if_ref_lock
);
5624 dlil_if_trace(struct dlil_ifnet
*dl_if
, int refhold
)
5626 struct dlil_ifnet_dbg
*dl_if_dbg
= (struct dlil_ifnet_dbg
*)dl_if
;
5631 if (!(dl_if
->dl_if_flags
& DLIF_DEBUG
)) {
5632 panic("%s: dl_if %p has no debug structure", __func__
, dl_if
);
5637 cnt
= &dl_if_dbg
->dldbg_if_refhold_cnt
;
5638 tr
= dl_if_dbg
->dldbg_if_refhold
;
5640 cnt
= &dl_if_dbg
->dldbg_if_refrele_cnt
;
5641 tr
= dl_if_dbg
->dldbg_if_refrele
;
5644 idx
= atomic_add_16_ov(cnt
, 1) % IF_REF_TRACE_HIST_SIZE
;
5645 ctrace_record(&tr
[idx
]);
5649 dlil_if_ref(struct ifnet
*ifp
)
5651 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
5656 lck_mtx_lock_spin(&dl_if
->dl_if_lock
);
5657 ++dl_if
->dl_if_refcnt
;
5658 if (dl_if
->dl_if_refcnt
== 0) {
5659 panic("%s: wraparound refcnt for ifp=%p", __func__
, ifp
);
5662 if (dl_if
->dl_if_trace
!= NULL
)
5663 (*dl_if
->dl_if_trace
)(dl_if
, TRUE
);
5664 lck_mtx_unlock(&dl_if
->dl_if_lock
);
5670 dlil_if_free(struct ifnet
*ifp
)
5672 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
5673 bool need_release
= FALSE
;
5678 lck_mtx_lock_spin(&dl_if
->dl_if_lock
);
5679 switch (dl_if
->dl_if_refcnt
) {
5681 panic("%s: negative refcnt for ifp=%p", __func__
, ifp
);
5685 if ((ifp
->if_refflags
& IFRF_EMBRYONIC
) != 0) {
5686 need_release
= TRUE
;
5692 --dl_if
->dl_if_refcnt
;
5693 if (dl_if
->dl_if_trace
!= NULL
)
5694 (*dl_if
->dl_if_trace
)(dl_if
, FALSE
);
5695 lck_mtx_unlock(&dl_if
->dl_if_lock
);
5697 dlil_if_release(ifp
);
5703 dlil_attach_protocol_internal(struct if_proto
*proto
,
5704 const struct ifnet_demux_desc
*demux_list
, u_int32_t demux_count
,
5705 uint32_t * proto_count
)
5707 struct kev_dl_proto_data ev_pr_data
;
5708 struct ifnet
*ifp
= proto
->ifp
;
5710 u_int32_t hash_value
= proto_hash_value(proto
->protocol_family
);
5711 struct if_proto
*prev_proto
;
5712 struct if_proto
*_proto
;
5714 /* callee holds a proto refcnt upon success */
5715 ifnet_lock_exclusive(ifp
);
5716 _proto
= find_attached_proto(ifp
, proto
->protocol_family
);
5717 if (_proto
!= NULL
) {
5718 ifnet_lock_done(ifp
);
5719 if_proto_free(_proto
);
5724 * Call family module add_proto routine so it can refine the
5725 * demux descriptors as it wishes.
5727 retval
= ifp
->if_add_proto(ifp
, proto
->protocol_family
, demux_list
,
5730 ifnet_lock_done(ifp
);
5735 * Insert the protocol in the hash
5737 prev_proto
= SLIST_FIRST(&ifp
->if_proto_hash
[hash_value
]);
5738 while (prev_proto
!= NULL
&& SLIST_NEXT(prev_proto
, next_hash
) != NULL
)
5739 prev_proto
= SLIST_NEXT(prev_proto
, next_hash
);
5741 SLIST_INSERT_AFTER(prev_proto
, proto
, next_hash
);
5743 SLIST_INSERT_HEAD(&ifp
->if_proto_hash
[hash_value
],
5746 /* hold a proto refcnt for attach */
5747 if_proto_ref(proto
);
5750 * The reserved field carries the number of protocol still attached
5751 * (subject to change)
5753 ev_pr_data
.proto_family
= proto
->protocol_family
;
5754 ev_pr_data
.proto_remaining_count
= dlil_ifp_protolist(ifp
, NULL
, 0);
5756 ifnet_lock_done(ifp
);
5758 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_ATTACHED
,
5759 (struct net_event_data
*)&ev_pr_data
,
5760 sizeof (struct kev_dl_proto_data
));
5761 if (proto_count
!= NULL
) {
5762 *proto_count
= ev_pr_data
.proto_remaining_count
;
5768 ifnet_attach_protocol(ifnet_t ifp
, protocol_family_t protocol
,
5769 const struct ifnet_attach_proto_param
*proto_details
)
5772 struct if_proto
*ifproto
= NULL
;
5773 uint32_t proto_count
= 0;
5775 ifnet_head_lock_shared();
5776 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
) {
5780 /* Check that the interface is in the global list */
5781 if (!ifnet_lookup(ifp
)) {
5786 ifproto
= zalloc(dlif_proto_zone
);
5787 if (ifproto
== NULL
) {
5791 bzero(ifproto
, dlif_proto_size
);
5793 /* refcnt held above during lookup */
5795 ifproto
->protocol_family
= protocol
;
5796 ifproto
->proto_kpi
= kProtoKPI_v1
;
5797 ifproto
->kpi
.v1
.input
= proto_details
->input
;
5798 ifproto
->kpi
.v1
.pre_output
= proto_details
->pre_output
;
5799 ifproto
->kpi
.v1
.event
= proto_details
->event
;
5800 ifproto
->kpi
.v1
.ioctl
= proto_details
->ioctl
;
5801 ifproto
->kpi
.v1
.detached
= proto_details
->detached
;
5802 ifproto
->kpi
.v1
.resolve_multi
= proto_details
->resolve
;
5803 ifproto
->kpi
.v1
.send_arp
= proto_details
->send_arp
;
5805 retval
= dlil_attach_protocol_internal(ifproto
,
5806 proto_details
->demux_list
, proto_details
->demux_count
,
5810 if (retval
!= 0 && retval
!= EEXIST
&& ifp
!= NULL
) {
5811 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
5812 if_name(ifp
), protocol
, retval
);
5815 printf("%s: attached v1 protocol %d (count = %d)\n",
5817 protocol
, proto_count
);
5823 * A protocol has been attached, mark the interface up.
5824 * This used to be done by configd.KernelEventMonitor, but that
5825 * is inherently prone to races (rdar://problem/30810208).
5827 (void) ifnet_set_flags(ifp
, IFF_UP
, IFF_UP
);
5828 (void) ifnet_ioctl(ifp
, 0, SIOCSIFFLAGS
, NULL
);
5829 dlil_post_sifflags_msg(ifp
);
5830 } else if (ifproto
!= NULL
) {
5831 zfree(dlif_proto_zone
, ifproto
);
5837 ifnet_attach_protocol_v2(ifnet_t ifp
, protocol_family_t protocol
,
5838 const struct ifnet_attach_proto_param_v2
*proto_details
)
5841 struct if_proto
*ifproto
= NULL
;
5842 uint32_t proto_count
= 0;
5844 ifnet_head_lock_shared();
5845 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
) {
5849 /* Check that the interface is in the global list */
5850 if (!ifnet_lookup(ifp
)) {
5855 ifproto
= zalloc(dlif_proto_zone
);
5856 if (ifproto
== NULL
) {
5860 bzero(ifproto
, sizeof(*ifproto
));
5862 /* refcnt held above during lookup */
5864 ifproto
->protocol_family
= protocol
;
5865 ifproto
->proto_kpi
= kProtoKPI_v2
;
5866 ifproto
->kpi
.v2
.input
= proto_details
->input
;
5867 ifproto
->kpi
.v2
.pre_output
= proto_details
->pre_output
;
5868 ifproto
->kpi
.v2
.event
= proto_details
->event
;
5869 ifproto
->kpi
.v2
.ioctl
= proto_details
->ioctl
;
5870 ifproto
->kpi
.v2
.detached
= proto_details
->detached
;
5871 ifproto
->kpi
.v2
.resolve_multi
= proto_details
->resolve
;
5872 ifproto
->kpi
.v2
.send_arp
= proto_details
->send_arp
;
5874 retval
= dlil_attach_protocol_internal(ifproto
,
5875 proto_details
->demux_list
, proto_details
->demux_count
,
5879 if (retval
!= 0 && retval
!= EEXIST
&& ifp
!= NULL
) {
5880 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
5881 if_name(ifp
), protocol
, retval
);
5884 printf("%s: attached v2 protocol %d (count = %d)\n",
5886 protocol
, proto_count
);
5892 * A protocol has been attached, mark the interface up.
5893 * This used to be done by configd.KernelEventMonitor, but that
5894 * is inherently prone to races (rdar://problem/30810208).
5896 (void) ifnet_set_flags(ifp
, IFF_UP
, IFF_UP
);
5897 (void) ifnet_ioctl(ifp
, 0, SIOCSIFFLAGS
, NULL
);
5898 dlil_post_sifflags_msg(ifp
);
5899 } else if (ifproto
!= NULL
) {
5900 zfree(dlif_proto_zone
, ifproto
);
5906 ifnet_detach_protocol(ifnet_t ifp
, protocol_family_t proto_family
)
5908 struct if_proto
*proto
= NULL
;
5911 if (ifp
== NULL
|| proto_family
== 0) {
5916 ifnet_lock_exclusive(ifp
);
5917 /* callee holds a proto refcnt upon success */
5918 proto
= find_attached_proto(ifp
, proto_family
);
5919 if (proto
== NULL
) {
5921 ifnet_lock_done(ifp
);
5925 /* call family module del_proto */
5926 if (ifp
->if_del_proto
)
5927 ifp
->if_del_proto(ifp
, proto
->protocol_family
);
5929 SLIST_REMOVE(&ifp
->if_proto_hash
[proto_hash_value(proto_family
)],
5930 proto
, if_proto
, next_hash
);
5932 if (proto
->proto_kpi
== kProtoKPI_v1
) {
5933 proto
->kpi
.v1
.input
= ifproto_media_input_v1
;
5934 proto
->kpi
.v1
.pre_output
= ifproto_media_preout
;
5935 proto
->kpi
.v1
.event
= ifproto_media_event
;
5936 proto
->kpi
.v1
.ioctl
= ifproto_media_ioctl
;
5937 proto
->kpi
.v1
.resolve_multi
= ifproto_media_resolve_multi
;
5938 proto
->kpi
.v1
.send_arp
= ifproto_media_send_arp
;
5940 proto
->kpi
.v2
.input
= ifproto_media_input_v2
;
5941 proto
->kpi
.v2
.pre_output
= ifproto_media_preout
;
5942 proto
->kpi
.v2
.event
= ifproto_media_event
;
5943 proto
->kpi
.v2
.ioctl
= ifproto_media_ioctl
;
5944 proto
->kpi
.v2
.resolve_multi
= ifproto_media_resolve_multi
;
5945 proto
->kpi
.v2
.send_arp
= ifproto_media_send_arp
;
5947 proto
->detached
= 1;
5948 ifnet_lock_done(ifp
);
5951 printf("%s: detached %s protocol %d\n", if_name(ifp
),
5952 (proto
->proto_kpi
== kProtoKPI_v1
) ?
5953 "v1" : "v2", proto_family
);
5956 /* release proto refcnt held during protocol attach */
5957 if_proto_free(proto
);
5960 * Release proto refcnt held during lookup; the rest of
5961 * protocol detach steps will happen when the last proto
5962 * reference is released.
5964 if_proto_free(proto
);
5972 ifproto_media_input_v1(struct ifnet
*ifp
, protocol_family_t protocol
,
5973 struct mbuf
*packet
, char *header
)
5975 #pragma unused(ifp, protocol, packet, header)
5980 ifproto_media_input_v2(struct ifnet
*ifp
, protocol_family_t protocol
,
5981 struct mbuf
*packet
)
5983 #pragma unused(ifp, protocol, packet)
5989 ifproto_media_preout(struct ifnet
*ifp
, protocol_family_t protocol
,
5990 mbuf_t
*packet
, const struct sockaddr
*dest
, void *route
, char *frame_type
,
5991 char *link_layer_dest
)
5993 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
5999 ifproto_media_event(struct ifnet
*ifp
, protocol_family_t protocol
,
6000 const struct kev_msg
*event
)
6002 #pragma unused(ifp, protocol, event)
6006 ifproto_media_ioctl(struct ifnet
*ifp
, protocol_family_t protocol
,
6007 unsigned long command
, void *argument
)
6009 #pragma unused(ifp, protocol, command, argument)
6014 ifproto_media_resolve_multi(ifnet_t ifp
, const struct sockaddr
*proto_addr
,
6015 struct sockaddr_dl
*out_ll
, size_t ll_len
)
6017 #pragma unused(ifp, proto_addr, out_ll, ll_len)
6022 ifproto_media_send_arp(struct ifnet
*ifp
, u_short arpop
,
6023 const struct sockaddr_dl
*sender_hw
, const struct sockaddr
*sender_proto
,
6024 const struct sockaddr_dl
*target_hw
, const struct sockaddr
*target_proto
)
6026 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
6030 extern int if_next_index(void);
6031 extern int tcp_ecn_outbound
;
6034 ifnet_attach(ifnet_t ifp
, const struct sockaddr_dl
*ll_addr
)
6036 struct ifnet
*tmp_if
;
6038 struct if_data_internal if_data_saved
;
6039 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
6040 struct dlil_threading_info
*dl_inp
;
6041 u_int32_t sflags
= 0;
6048 * Serialize ifnet attach using dlil_ifnet_lock, in order to
6049 * prevent the interface from being configured while it is
6050 * embryonic, as ifnet_head_lock is dropped and reacquired
6051 * below prior to marking the ifnet with IFRF_ATTACHED.
6054 ifnet_head_lock_exclusive();
6055 /* Verify we aren't already on the list */
6056 TAILQ_FOREACH(tmp_if
, &ifnet_head
, if_link
) {
6057 if (tmp_if
== ifp
) {
6064 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6065 if (!(ifp
->if_refflags
& IFRF_EMBRYONIC
)) {
6066 panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
6070 lck_mtx_unlock(&ifp
->if_ref_lock
);
6072 ifnet_lock_exclusive(ifp
);
6075 VERIFY(ifp
->if_detaching_link
.tqe_next
== NULL
);
6076 VERIFY(ifp
->if_detaching_link
.tqe_prev
== NULL
);
6078 if (ll_addr
!= NULL
) {
6079 if (ifp
->if_addrlen
== 0) {
6080 ifp
->if_addrlen
= ll_addr
->sdl_alen
;
6081 } else if (ll_addr
->sdl_alen
!= ifp
->if_addrlen
) {
6082 ifnet_lock_done(ifp
);
6090 * Allow interfaces without protocol families to attach
6091 * only if they have the necessary fields filled out.
6093 if (ifp
->if_add_proto
== NULL
|| ifp
->if_del_proto
== NULL
) {
6094 DLIL_PRINTF("%s: Attempt to attach interface without "
6095 "family module - %d\n", __func__
, ifp
->if_family
);
6096 ifnet_lock_done(ifp
);
6102 /* Allocate protocol hash table */
6103 VERIFY(ifp
->if_proto_hash
== NULL
);
6104 ifp
->if_proto_hash
= zalloc(dlif_phash_zone
);
6105 if (ifp
->if_proto_hash
== NULL
) {
6106 ifnet_lock_done(ifp
);
6111 bzero(ifp
->if_proto_hash
, dlif_phash_size
);
6113 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
6114 VERIFY(TAILQ_EMPTY(&ifp
->if_flt_head
));
6115 TAILQ_INIT(&ifp
->if_flt_head
);
6116 VERIFY(ifp
->if_flt_busy
== 0);
6117 VERIFY(ifp
->if_flt_waiters
== 0);
6118 lck_mtx_unlock(&ifp
->if_flt_lock
);
6120 if (!(dl_if
->dl_if_flags
& DLIF_REUSE
)) {
6121 VERIFY(LIST_EMPTY(&ifp
->if_multiaddrs
));
6122 LIST_INIT(&ifp
->if_multiaddrs
);
6125 VERIFY(ifp
->if_allhostsinm
== NULL
);
6126 VERIFY(TAILQ_EMPTY(&ifp
->if_addrhead
));
6127 TAILQ_INIT(&ifp
->if_addrhead
);
6129 if (ifp
->if_index
== 0) {
6130 int idx
= if_next_index();
6134 ifnet_lock_done(ifp
);
6139 ifp
->if_index
= idx
;
6141 /* There should not be anything occupying this slot */
6142 VERIFY(ifindex2ifnet
[ifp
->if_index
] == NULL
);
6144 /* allocate (if needed) and initialize a link address */
6145 ifa
= dlil_alloc_lladdr(ifp
, ll_addr
);
6147 ifnet_lock_done(ifp
);
6153 VERIFY(ifnet_addrs
[ifp
->if_index
- 1] == NULL
);
6154 ifnet_addrs
[ifp
->if_index
- 1] = ifa
;
6156 /* make this address the first on the list */
6158 /* hold a reference for ifnet_addrs[] */
6159 IFA_ADDREF_LOCKED(ifa
);
6160 /* if_attach_link_ifa() holds a reference for ifa_link */
6161 if_attach_link_ifa(ifp
, ifa
);
6165 mac_ifnet_label_associate(ifp
);
6168 TAILQ_INSERT_TAIL(&ifnet_head
, ifp
, if_link
);
6169 ifindex2ifnet
[ifp
->if_index
] = ifp
;
6171 /* Hold a reference to the underlying dlil_ifnet */
6172 ifnet_reference(ifp
);
6174 /* Clear stats (save and restore other fields that we care) */
6175 if_data_saved
= ifp
->if_data
;
6176 bzero(&ifp
->if_data
, sizeof (ifp
->if_data
));
6177 ifp
->if_data
.ifi_type
= if_data_saved
.ifi_type
;
6178 ifp
->if_data
.ifi_typelen
= if_data_saved
.ifi_typelen
;
6179 ifp
->if_data
.ifi_physical
= if_data_saved
.ifi_physical
;
6180 ifp
->if_data
.ifi_addrlen
= if_data_saved
.ifi_addrlen
;
6181 ifp
->if_data
.ifi_hdrlen
= if_data_saved
.ifi_hdrlen
;
6182 ifp
->if_data
.ifi_mtu
= if_data_saved
.ifi_mtu
;
6183 ifp
->if_data
.ifi_baudrate
= if_data_saved
.ifi_baudrate
;
6184 ifp
->if_data
.ifi_hwassist
= if_data_saved
.ifi_hwassist
;
6185 ifp
->if_data
.ifi_tso_v4_mtu
= if_data_saved
.ifi_tso_v4_mtu
;
6186 ifp
->if_data
.ifi_tso_v6_mtu
= if_data_saved
.ifi_tso_v6_mtu
;
6187 ifnet_touch_lastchange(ifp
);
6189 VERIFY(ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_NORMAL
||
6190 ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_DRIVER_MANAGED
||
6191 ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_FQ_CODEL
);
6193 /* By default, use SFB and enable flow advisory */
6194 sflags
= PKTSCHEDF_QALG_SFB
;
6196 sflags
|= PKTSCHEDF_QALG_FLOWCTL
;
6198 if (if_delaybased_queue
)
6199 sflags
|= PKTSCHEDF_QALG_DELAYBASED
;
6201 if (ifp
->if_output_sched_model
==
6202 IFNET_SCHED_MODEL_DRIVER_MANAGED
)
6203 sflags
|= PKTSCHEDF_QALG_DRIVER_MANAGED
;
6205 /* Initialize transmit queue(s) */
6206 err
= ifclassq_setup(ifp
, sflags
, (dl_if
->dl_if_flags
& DLIF_REUSE
));
6208 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
6209 "err=%d", __func__
, ifp
, err
);
6213 /* Sanity checks on the input thread storage */
6214 dl_inp
= &dl_if
->dl_if_inpstorage
;
6215 bzero(&dl_inp
->stats
, sizeof (dl_inp
->stats
));
6216 VERIFY(dl_inp
->input_waiting
== 0);
6217 VERIFY(dl_inp
->wtot
== 0);
6218 VERIFY(dl_inp
->ifp
== NULL
);
6219 VERIFY(qhead(&dl_inp
->rcvq_pkts
) == NULL
&& qempty(&dl_inp
->rcvq_pkts
));
6220 VERIFY(qlimit(&dl_inp
->rcvq_pkts
) == 0);
6221 VERIFY(!dl_inp
->net_affinity
);
6222 VERIFY(ifp
->if_inp
== NULL
);
6223 VERIFY(dl_inp
->input_thr
== THREAD_NULL
);
6224 VERIFY(dl_inp
->wloop_thr
== THREAD_NULL
);
6225 VERIFY(dl_inp
->poll_thr
== THREAD_NULL
);
6226 VERIFY(dl_inp
->tag
== 0);
6227 VERIFY(dl_inp
->mode
== IFNET_MODEL_INPUT_POLL_OFF
);
6228 bzero(&dl_inp
->tstats
, sizeof (dl_inp
->tstats
));
6229 bzero(&dl_inp
->pstats
, sizeof (dl_inp
->pstats
));
6230 bzero(&dl_inp
->sstats
, sizeof (dl_inp
->sstats
));
6231 #if IFNET_INPUT_SANITY_CHK
6232 VERIFY(dl_inp
->input_mbuf_cnt
== 0);
6233 #endif /* IFNET_INPUT_SANITY_CHK */
6236 * A specific DLIL input thread is created per Ethernet/cellular
6237 * interface or for an interface which supports opportunistic
6238 * input polling. Pseudo interfaces or other types of interfaces
6239 * use the main input thread instead.
6241 if ((net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) ||
6242 ifp
->if_type
== IFT_ETHER
|| ifp
->if_type
== IFT_CELLULAR
) {
6243 ifp
->if_inp
= dl_inp
;
6244 err
= dlil_create_input_thread(ifp
, ifp
->if_inp
);
6246 panic_plain("%s: ifp=%p couldn't get an input thread; "
6247 "err=%d", __func__
, ifp
, err
);
6252 if (ifp
->if_inp
!= NULL
&& ifp
->if_inp
->input_mit_tcall
== NULL
) {
6253 ifp
->if_inp
->input_mit_tcall
=
6254 thread_call_allocate_with_priority(dlil_mit_tcall_fn
,
6255 ifp
, THREAD_CALL_PRIORITY_KERNEL
);
6259 * If the driver supports the new transmit model, calculate flow hash
6260 * and create a workloop starter thread to invoke the if_start callback
6261 * where the packets may be dequeued and transmitted.
6263 if (ifp
->if_eflags
& IFEF_TXSTART
) {
6264 ifp
->if_flowhash
= ifnet_calc_flowhash(ifp
);
6265 VERIFY(ifp
->if_flowhash
!= 0);
6266 VERIFY(ifp
->if_start_thread
== THREAD_NULL
);
6268 ifnet_set_start_cycle(ifp
, NULL
);
6269 ifp
->if_start_active
= 0;
6270 ifp
->if_start_req
= 0;
6271 ifp
->if_start_flags
= 0;
6272 VERIFY(ifp
->if_start
!= NULL
);
6273 if ((err
= kernel_thread_start(ifnet_start_thread_fn
,
6274 ifp
, &ifp
->if_start_thread
)) != KERN_SUCCESS
) {
6276 "ifp=%p couldn't get a start thread; "
6277 "err=%d", __func__
, ifp
, err
);
6280 ml_thread_policy(ifp
->if_start_thread
, MACHINE_GROUP
,
6281 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_WORKLOOP
));
6283 ifp
->if_flowhash
= 0;
6287 * If the driver supports the new receive model, create a poller
6288 * thread to invoke if_input_poll callback where the packets may
6289 * be dequeued from the driver and processed for reception.
6291 if (ifp
->if_eflags
& IFEF_RXPOLL
) {
6292 VERIFY(ifp
->if_input_poll
!= NULL
);
6293 VERIFY(ifp
->if_input_ctl
!= NULL
);
6294 VERIFY(ifp
->if_poll_thread
== THREAD_NULL
);
6296 ifnet_set_poll_cycle(ifp
, NULL
);
6297 ifp
->if_poll_update
= 0;
6298 ifp
->if_poll_active
= 0;
6299 ifp
->if_poll_req
= 0;
6300 if ((err
= kernel_thread_start(ifnet_poll_thread_fn
, ifp
,
6301 &ifp
->if_poll_thread
)) != KERN_SUCCESS
) {
6302 panic_plain("%s: ifp=%p couldn't get a poll thread; "
6303 "err=%d", __func__
, ifp
, err
);
6306 ml_thread_policy(ifp
->if_poll_thread
, MACHINE_GROUP
,
6307 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_WORKLOOP
));
6310 VERIFY(ifp
->if_desc
.ifd_maxlen
== IF_DESCSIZE
);
6311 VERIFY(ifp
->if_desc
.ifd_len
== 0);
6312 VERIFY(ifp
->if_desc
.ifd_desc
!= NULL
);
6314 /* Record attach PC stacktrace */
6315 ctrace_record(&((struct dlil_ifnet
*)ifp
)->dl_if_attach
);
6317 ifp
->if_updatemcasts
= 0;
6318 if (!LIST_EMPTY(&ifp
->if_multiaddrs
)) {
6319 struct ifmultiaddr
*ifma
;
6320 LIST_FOREACH(ifma
, &ifp
->if_multiaddrs
, ifma_link
) {
6322 if (ifma
->ifma_addr
->sa_family
== AF_LINK
||
6323 ifma
->ifma_addr
->sa_family
== AF_UNSPEC
)
6324 ifp
->if_updatemcasts
++;
6328 printf("%s: attached with %d suspended link-layer multicast "
6329 "membership(s)\n", if_name(ifp
),
6330 ifp
->if_updatemcasts
);
6333 /* Clear logging parameters */
6334 bzero(&ifp
->if_log
, sizeof (ifp
->if_log
));
6336 /* Clear foreground/realtime activity timestamps */
6337 ifp
->if_fg_sendts
= 0;
6338 ifp
->if_rt_sendts
= 0;
6340 VERIFY(ifp
->if_delegated
.ifp
== NULL
);
6341 VERIFY(ifp
->if_delegated
.type
== 0);
6342 VERIFY(ifp
->if_delegated
.family
== 0);
6343 VERIFY(ifp
->if_delegated
.subfamily
== 0);
6344 VERIFY(ifp
->if_delegated
.expensive
== 0);
6346 VERIFY(ifp
->if_agentids
== NULL
);
6347 VERIFY(ifp
->if_agentcount
== 0);
6349 /* Reset interface state */
6350 bzero(&ifp
->if_interface_state
, sizeof(ifp
->if_interface_state
));
6351 ifp
->if_interface_state
.valid_bitmask
|=
6352 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
6353 ifp
->if_interface_state
.interface_availability
=
6354 IF_INTERFACE_STATE_INTERFACE_AVAILABLE
;
6356 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
6357 if (ifp
== lo_ifp
) {
6358 ifp
->if_interface_state
.lqm_state
= IFNET_LQM_THRESH_GOOD
;
6359 ifp
->if_interface_state
.valid_bitmask
|=
6360 IF_INTERFACE_STATE_LQM_STATE_VALID
;
6362 ifp
->if_interface_state
.lqm_state
= IFNET_LQM_THRESH_UNKNOWN
;
6366 * Enable ECN capability on this interface depending on the
6367 * value of ECN global setting
6369 if (tcp_ecn_outbound
== 2 && !IFNET_IS_CELLULAR(ifp
)) {
6370 ifp
->if_eflags
|= IFEF_ECN_ENABLE
;
6371 ifp
->if_eflags
&= ~IFEF_ECN_DISABLE
;
6375 * Built-in Cyclops always on policy for WiFi infra
6377 if (IFNET_IS_WIFI_INFRA(ifp
) && net_qos_policy_wifi_enabled
!= 0) {
6380 error
= if_set_qosmarking_mode(ifp
,
6381 IFRTYPE_QOSMARKING_FASTLANE
);
6383 printf("%s if_set_qosmarking_mode(%s) error %d\n",
6384 __func__
, ifp
->if_xname
, error
);
6386 ifp
->if_eflags
|= IFEF_QOSMARKING_ENABLED
;
6387 #if (DEVELOPMENT || DEBUG)
6388 printf("%s fastlane enabled on %s\n",
6389 __func__
, ifp
->if_xname
);
6390 #endif /* (DEVELOPMENT || DEBUG) */
6394 ifnet_lock_done(ifp
);
6398 lck_mtx_lock(&ifp
->if_cached_route_lock
);
6399 /* Enable forwarding cached route */
6400 ifp
->if_fwd_cacheok
= 1;
6401 /* Clean up any existing cached routes */
6402 ROUTE_RELEASE(&ifp
->if_fwd_route
);
6403 bzero(&ifp
->if_fwd_route
, sizeof (ifp
->if_fwd_route
));
6404 ROUTE_RELEASE(&ifp
->if_src_route
);
6405 bzero(&ifp
->if_src_route
, sizeof (ifp
->if_src_route
));
6406 ROUTE_RELEASE(&ifp
->if_src_route6
);
6407 bzero(&ifp
->if_src_route6
, sizeof (ifp
->if_src_route6
));
6408 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6410 ifnet_llreach_ifattach(ifp
, (dl_if
->dl_if_flags
& DLIF_REUSE
));
6413 * Allocate and attach IGMPv3/MLDv2 interface specific variables
6414 * and trees; do this before the ifnet is marked as attached.
6415 * The ifnet keeps the reference to the info structures even after
6416 * the ifnet is detached, since the network-layer records still
6417 * refer to the info structures even after that. This also
6418 * makes it possible for them to still function after the ifnet
6419 * is recycled or reattached.
6422 if (IGMP_IFINFO(ifp
) == NULL
) {
6423 IGMP_IFINFO(ifp
) = igmp_domifattach(ifp
, M_WAITOK
);
6424 VERIFY(IGMP_IFINFO(ifp
) != NULL
);
6426 VERIFY(IGMP_IFINFO(ifp
)->igi_ifp
== ifp
);
6427 igmp_domifreattach(IGMP_IFINFO(ifp
));
6431 if (MLD_IFINFO(ifp
) == NULL
) {
6432 MLD_IFINFO(ifp
) = mld_domifattach(ifp
, M_WAITOK
);
6433 VERIFY(MLD_IFINFO(ifp
) != NULL
);
6435 VERIFY(MLD_IFINFO(ifp
)->mli_ifp
== ifp
);
6436 mld_domifreattach(MLD_IFINFO(ifp
));
6440 VERIFY(ifp
->if_data_threshold
== 0);
6441 VERIFY(ifp
->if_dt_tcall
!= NULL
);
6444 * Finally, mark this ifnet as attached.
6446 lck_mtx_lock(rnh_lock
);
6447 ifnet_lock_exclusive(ifp
);
6448 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6449 ifp
->if_refflags
= IFRF_ATTACHED
; /* clears embryonic */
6450 lck_mtx_unlock(&ifp
->if_ref_lock
);
6452 /* boot-args override; enable idle notification */
6453 (void) ifnet_set_idle_flags_locked(ifp
, IFRF_IDLE_NOTIFY
,
6456 /* apply previous request(s) to set the idle flags, if any */
6457 (void) ifnet_set_idle_flags_locked(ifp
, ifp
->if_idle_new_flags
,
6458 ifp
->if_idle_new_flags_mask
);
6461 ifnet_lock_done(ifp
);
6462 lck_mtx_unlock(rnh_lock
);
6467 * Attach packet filter to this interface, if enabled.
6469 pf_ifnet_hook(ifp
, 1);
6472 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_ATTACHED
, NULL
, 0);
6475 printf("%s: attached%s\n", if_name(ifp
),
6476 (dl_if
->dl_if_flags
& DLIF_REUSE
) ? " (recycled)" : "");
6483 * Prepare the storage for the first/permanent link address, which must
6484 * must have the same lifetime as the ifnet itself. Although the link
6485 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
6486 * its location in memory must never change as it may still be referred
6487 * to by some parts of the system afterwards (unfortunate implementation
6488 * artifacts inherited from BSD.)
6490 * Caller must hold ifnet lock as writer.
6492 static struct ifaddr
*
6493 dlil_alloc_lladdr(struct ifnet
*ifp
, const struct sockaddr_dl
*ll_addr
)
6495 struct ifaddr
*ifa
, *oifa
;
6496 struct sockaddr_dl
*asdl
, *msdl
;
6497 char workbuf
[IFNAMSIZ
*2];
6498 int namelen
, masklen
, socksize
;
6499 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
6501 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_EXCLUSIVE
);
6502 VERIFY(ll_addr
== NULL
|| ll_addr
->sdl_alen
== ifp
->if_addrlen
);
6504 namelen
= snprintf(workbuf
, sizeof (workbuf
), "%s",
6506 masklen
= offsetof(struct sockaddr_dl
, sdl_data
[0])
6507 + ((namelen
> 0) ? namelen
: 0);
6508 socksize
= masklen
+ ifp
->if_addrlen
;
6509 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
6510 if ((u_int32_t
)socksize
< sizeof (struct sockaddr_dl
))
6511 socksize
= sizeof(struct sockaddr_dl
);
6512 socksize
= ROUNDUP(socksize
);
6515 ifa
= ifp
->if_lladdr
;
6516 if (socksize
> DLIL_SDLMAXLEN
||
6517 (ifa
!= NULL
&& ifa
!= &dl_if
->dl_if_lladdr
.ifa
)) {
6519 * Rare, but in the event that the link address requires
6520 * more storage space than DLIL_SDLMAXLEN, allocate the
6521 * largest possible storages for address and mask, such
6522 * that we can reuse the same space when if_addrlen grows.
6523 * This same space will be used when if_addrlen shrinks.
6525 if (ifa
== NULL
|| ifa
== &dl_if
->dl_if_lladdr
.ifa
) {
6526 int ifasize
= sizeof (*ifa
) + 2 * SOCK_MAXADDRLEN
;
6527 ifa
= _MALLOC(ifasize
, M_IFADDR
, M_WAITOK
| M_ZERO
);
6531 /* Don't set IFD_ALLOC, as this is permanent */
6532 ifa
->ifa_debug
= IFD_LINK
;
6535 /* address and mask sockaddr_dl locations */
6536 asdl
= (struct sockaddr_dl
*)(ifa
+ 1);
6537 bzero(asdl
, SOCK_MAXADDRLEN
);
6538 msdl
= (struct sockaddr_dl
*)(void *)
6539 ((char *)asdl
+ SOCK_MAXADDRLEN
);
6540 bzero(msdl
, SOCK_MAXADDRLEN
);
6542 VERIFY(ifa
== NULL
|| ifa
== &dl_if
->dl_if_lladdr
.ifa
);
6544 * Use the storage areas for address and mask within the
6545 * dlil_ifnet structure. This is the most common case.
6548 ifa
= &dl_if
->dl_if_lladdr
.ifa
;
6550 /* Don't set IFD_ALLOC, as this is permanent */
6551 ifa
->ifa_debug
= IFD_LINK
;
6554 /* address and mask sockaddr_dl locations */
6555 asdl
= (struct sockaddr_dl
*)(void *)&dl_if
->dl_if_lladdr
.asdl
;
6556 bzero(asdl
, sizeof (dl_if
->dl_if_lladdr
.asdl
));
6557 msdl
= (struct sockaddr_dl
*)(void *)&dl_if
->dl_if_lladdr
.msdl
;
6558 bzero(msdl
, sizeof (dl_if
->dl_if_lladdr
.msdl
));
6561 /* hold a permanent reference for the ifnet itself */
6562 IFA_ADDREF_LOCKED(ifa
);
6563 oifa
= ifp
->if_lladdr
;
6564 ifp
->if_lladdr
= ifa
;
6566 VERIFY(ifa
->ifa_debug
== IFD_LINK
);
6568 ifa
->ifa_rtrequest
= link_rtrequest
;
6569 ifa
->ifa_addr
= (struct sockaddr
*)asdl
;
6570 asdl
->sdl_len
= socksize
;
6571 asdl
->sdl_family
= AF_LINK
;
6573 bcopy(workbuf
, asdl
->sdl_data
, min(namelen
,
6574 sizeof (asdl
->sdl_data
)));
6575 asdl
->sdl_nlen
= namelen
;
6579 asdl
->sdl_index
= ifp
->if_index
;
6580 asdl
->sdl_type
= ifp
->if_type
;
6581 if (ll_addr
!= NULL
) {
6582 asdl
->sdl_alen
= ll_addr
->sdl_alen
;
6583 bcopy(CONST_LLADDR(ll_addr
), LLADDR(asdl
), asdl
->sdl_alen
);
6587 ifa
->ifa_netmask
= (struct sockaddr
*)msdl
;
6588 msdl
->sdl_len
= masklen
;
6590 msdl
->sdl_data
[--namelen
] = 0xff;
6600 if_purgeaddrs(struct ifnet
*ifp
)
6606 in6_purgeaddrs(ifp
);
6611 ifnet_detach(ifnet_t ifp
)
6613 struct ifnet
*delegated_ifp
;
6614 struct nd_ifinfo
*ndi
= NULL
;
6619 ndi
= ND_IFINFO(ifp
);
6621 ndi
->cga_initialized
= FALSE
;
6623 lck_mtx_lock(rnh_lock
);
6624 ifnet_head_lock_exclusive();
6625 ifnet_lock_exclusive(ifp
);
6628 * Check to see if this interface has previously triggered
6629 * aggressive protocol draining; if so, decrement the global
6630 * refcnt and clear PR_AGGDRAIN on the route domain if
6631 * there are no more of such an interface around.
6633 (void) ifnet_set_idle_flags_locked(ifp
, 0, ~0);
6635 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6636 if (!(ifp
->if_refflags
& IFRF_ATTACHED
)) {
6637 lck_mtx_unlock(&ifp
->if_ref_lock
);
6638 ifnet_lock_done(ifp
);
6640 lck_mtx_unlock(rnh_lock
);
6642 } else if (ifp
->if_refflags
& IFRF_DETACHING
) {
6643 /* Interface has already been detached */
6644 lck_mtx_unlock(&ifp
->if_ref_lock
);
6645 ifnet_lock_done(ifp
);
6647 lck_mtx_unlock(rnh_lock
);
6650 VERIFY(!(ifp
->if_refflags
& IFRF_EMBRYONIC
));
6651 /* Indicate this interface is being detached */
6652 ifp
->if_refflags
&= ~IFRF_ATTACHED
;
6653 ifp
->if_refflags
|= IFRF_DETACHING
;
6654 lck_mtx_unlock(&ifp
->if_ref_lock
);
6657 printf("%s: detaching\n", if_name(ifp
));
6660 /* clean up flow control entry object if there's any */
6661 if (ifp
->if_eflags
& IFEF_TXSTART
) {
6662 ifnet_flowadv(ifp
->if_flowhash
);
6665 /* Reset ECN enable/disable flags */
6666 ifp
->if_eflags
&= ~IFEF_ECN_DISABLE
;
6667 ifp
->if_eflags
&= ~IFEF_ECN_ENABLE
;
6669 /* Reset CLAT46 flag */
6670 ifp
->if_eflags
&= ~IFEF_CLAT46
;
6673 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
6674 * no longer be visible during lookups from this point.
6676 VERIFY(ifindex2ifnet
[ifp
->if_index
] == ifp
);
6677 TAILQ_REMOVE(&ifnet_head
, ifp
, if_link
);
6678 ifp
->if_link
.tqe_next
= NULL
;
6679 ifp
->if_link
.tqe_prev
= NULL
;
6680 if (ifp
->if_ordered_link
.tqe_next
!= NULL
||
6681 ifp
->if_ordered_link
.tqe_prev
!= NULL
) {
6682 ifnet_remove_from_ordered_list(ifp
);
6684 ifindex2ifnet
[ifp
->if_index
] = NULL
;
6686 /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */
6687 ifp
->if_eflags
&= ~(IFEF_IPV4_ROUTER
| IFEF_IPV6_ROUTER
);
6689 /* Record detach PC stacktrace */
6690 ctrace_record(&((struct dlil_ifnet
*)ifp
)->dl_if_detach
);
6692 /* Clear logging parameters */
6693 bzero(&ifp
->if_log
, sizeof (ifp
->if_log
));
6695 /* Clear delegated interface info (reference released below) */
6696 delegated_ifp
= ifp
->if_delegated
.ifp
;
6697 bzero(&ifp
->if_delegated
, sizeof (ifp
->if_delegated
));
6699 /* Reset interface state */
6700 bzero(&ifp
->if_interface_state
, sizeof(ifp
->if_interface_state
));
6702 ifnet_lock_done(ifp
);
6704 lck_mtx_unlock(rnh_lock
);
6707 /* Release reference held on the delegated interface */
6708 if (delegated_ifp
!= NULL
)
6709 ifnet_release(delegated_ifp
);
6711 /* Reset Link Quality Metric (unless loopback [lo0]) */
6713 if_lqm_update(ifp
, IFNET_LQM_THRESH_OFF
, 0);
6715 /* Reset TCP local statistics */
6716 if (ifp
->if_tcp_stat
!= NULL
)
6717 bzero(ifp
->if_tcp_stat
, sizeof(*ifp
->if_tcp_stat
));
6719 /* Reset UDP local statistics */
6720 if (ifp
->if_udp_stat
!= NULL
)
6721 bzero(ifp
->if_udp_stat
, sizeof(*ifp
->if_udp_stat
));
6723 /* Reset ifnet IPv4 stats */
6724 if (ifp
->if_ipv4_stat
!= NULL
)
6725 bzero(ifp
->if_ipv4_stat
, sizeof(*ifp
->if_ipv4_stat
));
6727 /* Reset ifnet IPv6 stats */
6728 if (ifp
->if_ipv6_stat
!= NULL
)
6729 bzero(ifp
->if_ipv6_stat
, sizeof(*ifp
->if_ipv6_stat
));
6731 /* Release memory held for interface link status report */
6732 if (ifp
->if_link_status
!= NULL
) {
6733 FREE(ifp
->if_link_status
, M_TEMP
);
6734 ifp
->if_link_status
= NULL
;
6737 /* Clear agent IDs */
6738 if (ifp
->if_agentids
!= NULL
) {
6739 FREE(ifp
->if_agentids
, M_NETAGENT
);
6740 ifp
->if_agentids
= NULL
;
6742 ifp
->if_agentcount
= 0;
6745 /* Let BPF know we're detaching */
6748 /* Mark the interface as DOWN */
6751 /* Disable forwarding cached route */
6752 lck_mtx_lock(&ifp
->if_cached_route_lock
);
6753 ifp
->if_fwd_cacheok
= 0;
6754 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6756 /* Disable data threshold and wait for any pending event posting */
6757 ifp
->if_data_threshold
= 0;
6758 VERIFY(ifp
->if_dt_tcall
!= NULL
);
6759 (void) thread_call_cancel_wait(ifp
->if_dt_tcall
);
6762 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
6763 * references to the info structures and leave them attached to
6767 igmp_domifdetach(ifp
);
6770 mld_domifdetach(ifp
);
6773 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHING
, NULL
, 0);
6775 /* Let worker thread take care of the rest, to avoid reentrancy */
6777 ifnet_detaching_enqueue(ifp
);
6784 ifnet_detaching_enqueue(struct ifnet
*ifp
)
6786 dlil_if_lock_assert();
6788 ++ifnet_detaching_cnt
;
6789 VERIFY(ifnet_detaching_cnt
!= 0);
6790 TAILQ_INSERT_TAIL(&ifnet_detaching_head
, ifp
, if_detaching_link
);
6791 wakeup((caddr_t
)&ifnet_delayed_run
);
6794 static struct ifnet
*
6795 ifnet_detaching_dequeue(void)
6799 dlil_if_lock_assert();
6801 ifp
= TAILQ_FIRST(&ifnet_detaching_head
);
6802 VERIFY(ifnet_detaching_cnt
!= 0 || ifp
== NULL
);
6804 VERIFY(ifnet_detaching_cnt
!= 0);
6805 --ifnet_detaching_cnt
;
6806 TAILQ_REMOVE(&ifnet_detaching_head
, ifp
, if_detaching_link
);
6807 ifp
->if_detaching_link
.tqe_next
= NULL
;
6808 ifp
->if_detaching_link
.tqe_prev
= NULL
;
6814 ifnet_detacher_thread_cont(int err
)
6820 dlil_if_lock_assert();
6821 while (ifnet_detaching_cnt
== 0) {
6822 (void) msleep0(&ifnet_delayed_run
, &dlil_ifnet_lock
,
6823 (PZERO
- 1), "ifnet_detacher_cont", 0,
6824 ifnet_detacher_thread_cont
);
6828 VERIFY(TAILQ_FIRST(&ifnet_detaching_head
) != NULL
);
6830 /* Take care of detaching ifnet */
6831 ifp
= ifnet_detaching_dequeue();
6834 ifnet_detach_final(ifp
);
6841 ifnet_detacher_thread_func(void *v
, wait_result_t w
)
6843 #pragma unused(v, w)
6845 (void) msleep0(&ifnet_delayed_run
, &dlil_ifnet_lock
,
6846 (PZERO
- 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont
);
6848 * msleep0() shouldn't have returned as PCATCH was not set;
6849 * therefore assert in this case.
6856 ifnet_detach_final(struct ifnet
*ifp
)
6858 struct ifnet_filter
*filter
, *filter_next
;
6859 struct ifnet_filter_head fhead
;
6860 struct dlil_threading_info
*inp
;
6862 ifnet_detached_func if_free
;
6865 lck_mtx_lock(&ifp
->if_ref_lock
);
6866 if (!(ifp
->if_refflags
& IFRF_DETACHING
)) {
6867 panic("%s: flags mismatch (detaching not set) ifp=%p",
6873 * Wait until the existing IO references get released
6874 * before we proceed with ifnet_detach. This is not a
6875 * common case, so block without using a continuation.
6877 while (ifp
->if_refio
> 0) {
6878 printf("%s: Waiting for IO references on %s interface "
6879 "to be released\n", __func__
, if_name(ifp
));
6880 (void) msleep(&(ifp
->if_refio
), &ifp
->if_ref_lock
,
6881 (PZERO
- 1), "ifnet_ioref_wait", NULL
);
6883 lck_mtx_unlock(&ifp
->if_ref_lock
);
6885 /* Drain and destroy send queue */
6886 ifclassq_teardown(ifp
);
6888 /* Detach interface filters */
6889 lck_mtx_lock(&ifp
->if_flt_lock
);
6890 if_flt_monitor_enter(ifp
);
6892 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
6893 fhead
= ifp
->if_flt_head
;
6894 TAILQ_INIT(&ifp
->if_flt_head
);
6896 for (filter
= TAILQ_FIRST(&fhead
); filter
; filter
= filter_next
) {
6897 filter_next
= TAILQ_NEXT(filter
, filt_next
);
6898 lck_mtx_unlock(&ifp
->if_flt_lock
);
6900 dlil_detach_filter_internal(filter
, 1);
6901 lck_mtx_lock(&ifp
->if_flt_lock
);
6903 if_flt_monitor_leave(ifp
);
6904 lck_mtx_unlock(&ifp
->if_flt_lock
);
6906 /* Tell upper layers to drop their network addresses */
6909 ifnet_lock_exclusive(ifp
);
6911 /* Uplumb all protocols */
6912 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
6913 struct if_proto
*proto
;
6915 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
6916 while (proto
!= NULL
) {
6917 protocol_family_t family
= proto
->protocol_family
;
6918 ifnet_lock_done(ifp
);
6919 proto_unplumb(family
, ifp
);
6920 ifnet_lock_exclusive(ifp
);
6921 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
6923 /* There should not be any protocols left */
6924 VERIFY(SLIST_EMPTY(&ifp
->if_proto_hash
[i
]));
6926 zfree(dlif_phash_zone
, ifp
->if_proto_hash
);
6927 ifp
->if_proto_hash
= NULL
;
6929 /* Detach (permanent) link address from if_addrhead */
6930 ifa
= TAILQ_FIRST(&ifp
->if_addrhead
);
6931 VERIFY(ifnet_addrs
[ifp
->if_index
- 1] == ifa
);
6933 if_detach_link_ifa(ifp
, ifa
);
6936 /* Remove (permanent) link address from ifnet_addrs[] */
6938 ifnet_addrs
[ifp
->if_index
- 1] = NULL
;
6940 /* This interface should not be on {ifnet_head,detaching} */
6941 VERIFY(ifp
->if_link
.tqe_next
== NULL
);
6942 VERIFY(ifp
->if_link
.tqe_prev
== NULL
);
6943 VERIFY(ifp
->if_detaching_link
.tqe_next
== NULL
);
6944 VERIFY(ifp
->if_detaching_link
.tqe_prev
== NULL
);
6945 VERIFY(ifp
->if_ordered_link
.tqe_next
== NULL
);
6946 VERIFY(ifp
->if_ordered_link
.tqe_prev
== NULL
);
6948 /* The slot should have been emptied */
6949 VERIFY(ifindex2ifnet
[ifp
->if_index
] == NULL
);
6951 /* There should not be any addresses left */
6952 VERIFY(TAILQ_EMPTY(&ifp
->if_addrhead
));
6955 * Signal the starter thread to terminate itself.
6957 if (ifp
->if_start_thread
!= THREAD_NULL
) {
6958 lck_mtx_lock_spin(&ifp
->if_start_lock
);
6959 ifp
->if_start_flags
= 0;
6960 ifp
->if_start_thread
= THREAD_NULL
;
6961 wakeup_one((caddr_t
)&ifp
->if_start_thread
);
6962 lck_mtx_unlock(&ifp
->if_start_lock
);
6966 * Signal the poller thread to terminate itself.
6968 if (ifp
->if_poll_thread
!= THREAD_NULL
) {
6969 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
6970 ifp
->if_poll_thread
= THREAD_NULL
;
6971 wakeup_one((caddr_t
)&ifp
->if_poll_thread
);
6972 lck_mtx_unlock(&ifp
->if_poll_lock
);
6976 * If thread affinity was set for the workloop thread, we will need
6977 * to tear down the affinity and release the extra reference count
6978 * taken at attach time. Does not apply to lo0 or other interfaces
6979 * without dedicated input threads.
6981 if ((inp
= ifp
->if_inp
) != NULL
) {
6982 VERIFY(inp
!= dlil_main_input_thread
);
6984 if (inp
->net_affinity
) {
6985 struct thread
*tp
, *wtp
, *ptp
;
6987 lck_mtx_lock_spin(&inp
->input_lck
);
6988 wtp
= inp
->wloop_thr
;
6989 inp
->wloop_thr
= THREAD_NULL
;
6990 ptp
= inp
->poll_thr
;
6991 inp
->poll_thr
= THREAD_NULL
;
6992 tp
= inp
->input_thr
; /* don't nullify now */
6994 inp
->net_affinity
= FALSE
;
6995 lck_mtx_unlock(&inp
->input_lck
);
6997 /* Tear down poll thread affinity */
6999 VERIFY(ifp
->if_eflags
& IFEF_RXPOLL
);
7000 (void) dlil_affinity_set(ptp
,
7001 THREAD_AFFINITY_TAG_NULL
);
7002 thread_deallocate(ptp
);
7005 /* Tear down workloop thread affinity */
7007 (void) dlil_affinity_set(wtp
,
7008 THREAD_AFFINITY_TAG_NULL
);
7009 thread_deallocate(wtp
);
7012 /* Tear down DLIL input thread affinity */
7013 (void) dlil_affinity_set(tp
, THREAD_AFFINITY_TAG_NULL
);
7014 thread_deallocate(tp
);
7017 /* disassociate ifp DLIL input thread */
7020 /* tell the input thread to terminate */
7021 lck_mtx_lock_spin(&inp
->input_lck
);
7022 inp
->input_waiting
|= DLIL_INPUT_TERMINATE
;
7023 if (!(inp
->input_waiting
& DLIL_INPUT_RUNNING
)) {
7024 wakeup_one((caddr_t
)&inp
->input_waiting
);
7026 lck_mtx_unlock(&inp
->input_lck
);
7027 ifnet_lock_done(ifp
);
7029 /* wait for the input thread to terminate */
7030 lck_mtx_lock_spin(&inp
->input_lck
);
7031 while ((inp
->input_waiting
& DLIL_INPUT_TERMINATE_COMPLETE
)
7033 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
7034 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
7036 lck_mtx_unlock(&inp
->input_lck
);
7037 ifnet_lock_exclusive(ifp
);
7039 /* clean-up input thread state */
7040 dlil_clean_threading_info(inp
);
7044 /* The driver might unload, so point these to ourselves */
7045 if_free
= ifp
->if_free
;
7046 ifp
->if_output_dlil
= ifp_if_output
;
7047 ifp
->if_output
= ifp_if_output
;
7048 ifp
->if_pre_enqueue
= ifp_if_output
;
7049 ifp
->if_start
= ifp_if_start
;
7050 ifp
->if_output_ctl
= ifp_if_ctl
;
7051 ifp
->if_input_dlil
= ifp_if_input
;
7052 ifp
->if_input_poll
= ifp_if_input_poll
;
7053 ifp
->if_input_ctl
= ifp_if_ctl
;
7054 ifp
->if_ioctl
= ifp_if_ioctl
;
7055 ifp
->if_set_bpf_tap
= ifp_if_set_bpf_tap
;
7056 ifp
->if_free
= ifp_if_free
;
7057 ifp
->if_demux
= ifp_if_demux
;
7058 ifp
->if_event
= ifp_if_event
;
7059 ifp
->if_framer_legacy
= ifp_if_framer
;
7060 ifp
->if_framer
= ifp_if_framer_extended
;
7061 ifp
->if_add_proto
= ifp_if_add_proto
;
7062 ifp
->if_del_proto
= ifp_if_del_proto
;
7063 ifp
->if_check_multi
= ifp_if_check_multi
;
7065 /* wipe out interface description */
7066 VERIFY(ifp
->if_desc
.ifd_maxlen
== IF_DESCSIZE
);
7067 ifp
->if_desc
.ifd_len
= 0;
7068 VERIFY(ifp
->if_desc
.ifd_desc
!= NULL
);
7069 bzero(ifp
->if_desc
.ifd_desc
, IF_DESCSIZE
);
7071 /* there shouldn't be any delegation by now */
7072 VERIFY(ifp
->if_delegated
.ifp
== NULL
);
7073 VERIFY(ifp
->if_delegated
.type
== 0);
7074 VERIFY(ifp
->if_delegated
.family
== 0);
7075 VERIFY(ifp
->if_delegated
.subfamily
== 0);
7076 VERIFY(ifp
->if_delegated
.expensive
== 0);
7078 /* QoS marking get cleared */
7079 ifp
->if_eflags
&= ~IFEF_QOSMARKING_ENABLED
;
7080 if_set_qosmarking_mode(ifp
, IFRTYPE_QOSMARKING_MODE_NONE
);
7083 ifnet_lock_done(ifp
);
7087 * Detach this interface from packet filter, if enabled.
7089 pf_ifnet_hook(ifp
, 0);
7092 /* Filter list should be empty */
7093 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
7094 VERIFY(TAILQ_EMPTY(&ifp
->if_flt_head
));
7095 VERIFY(ifp
->if_flt_busy
== 0);
7096 VERIFY(ifp
->if_flt_waiters
== 0);
7097 lck_mtx_unlock(&ifp
->if_flt_lock
);
7099 /* Last chance to drain send queue */
7102 /* Last chance to cleanup any cached route */
7103 lck_mtx_lock(&ifp
->if_cached_route_lock
);
7104 VERIFY(!ifp
->if_fwd_cacheok
);
7105 ROUTE_RELEASE(&ifp
->if_fwd_route
);
7106 bzero(&ifp
->if_fwd_route
, sizeof (ifp
->if_fwd_route
));
7107 ROUTE_RELEASE(&ifp
->if_src_route
);
7108 bzero(&ifp
->if_src_route
, sizeof (ifp
->if_src_route
));
7109 ROUTE_RELEASE(&ifp
->if_src_route6
);
7110 bzero(&ifp
->if_src_route6
, sizeof (ifp
->if_src_route6
));
7111 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
7113 VERIFY(ifp
->if_data_threshold
== 0);
7114 VERIFY(ifp
->if_dt_tcall
!= NULL
);
7115 VERIFY(!thread_call_isactive(ifp
->if_dt_tcall
));
7117 ifnet_llreach_ifdetach(ifp
);
7119 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHED
, NULL
, 0);
7122 * Finally, mark this ifnet as detached.
7124 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
7125 if (!(ifp
->if_refflags
& IFRF_DETACHING
)) {
7126 panic("%s: flags mismatch (detaching not set) ifp=%p",
7130 ifp
->if_refflags
&= ~IFRF_DETACHING
;
7131 lck_mtx_unlock(&ifp
->if_ref_lock
);
7132 if (if_free
!= NULL
)
7136 printf("%s: detached\n", if_name(ifp
));
7138 /* Release reference held during ifnet attach */
7143 ifp_if_output(struct ifnet
*ifp
, struct mbuf
*m
)
7151 ifp_if_start(struct ifnet
*ifp
)
7157 ifp_if_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
7158 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
7159 boolean_t poll
, struct thread
*tp
)
7161 #pragma unused(ifp, m_tail, s, poll, tp)
7162 m_freem_list(m_head
);
7167 ifp_if_input_poll(struct ifnet
*ifp
, u_int32_t flags
, u_int32_t max_cnt
,
7168 struct mbuf
**m_head
, struct mbuf
**m_tail
, u_int32_t
*cnt
, u_int32_t
*len
)
7170 #pragma unused(ifp, flags, max_cnt)
7182 ifp_if_ctl(struct ifnet
*ifp
, ifnet_ctl_cmd_t cmd
, u_int32_t arglen
, void *arg
)
7184 #pragma unused(ifp, cmd, arglen, arg)
7185 return (EOPNOTSUPP
);
7189 ifp_if_demux(struct ifnet
*ifp
, struct mbuf
*m
, char *fh
, protocol_family_t
*pf
)
7191 #pragma unused(ifp, fh, pf)
7193 return (EJUSTRETURN
);
7197 ifp_if_add_proto(struct ifnet
*ifp
, protocol_family_t pf
,
7198 const struct ifnet_demux_desc
*da
, u_int32_t dc
)
7200 #pragma unused(ifp, pf, da, dc)
7205 ifp_if_del_proto(struct ifnet
*ifp
, protocol_family_t pf
)
7207 #pragma unused(ifp, pf)
7212 ifp_if_check_multi(struct ifnet
*ifp
, const struct sockaddr
*sa
)
7214 #pragma unused(ifp, sa)
7215 return (EOPNOTSUPP
);
7220 ifp_if_framer(struct ifnet
*ifp
, struct mbuf
**m
,
7221 const struct sockaddr
*sa
, const char *ll
, const char *t
,
7222 u_int32_t
*pre
, u_int32_t
*post
)
7225 ifp_if_framer(struct ifnet
*ifp
, struct mbuf
**m
,
7226 const struct sockaddr
*sa
, const char *ll
, const char *t
)
7227 #endif /* !CONFIG_EMBEDDED */
7229 #pragma unused(ifp, m, sa, ll, t)
7231 return (ifp_if_framer_extended(ifp
, m
, sa
, ll
, t
, pre
, post
));
7233 return (ifp_if_framer_extended(ifp
, m
, sa
, ll
, t
, NULL
, NULL
));
7234 #endif /* !CONFIG_EMBEDDED */
7238 ifp_if_framer_extended(struct ifnet
*ifp
, struct mbuf
**m
,
7239 const struct sockaddr
*sa
, const char *ll
, const char *t
,
7240 u_int32_t
*pre
, u_int32_t
*post
)
7242 #pragma unused(ifp, sa, ll, t)
7251 return (EJUSTRETURN
);
7255 ifp_if_ioctl(struct ifnet
*ifp
, unsigned long cmd
, void *arg
)
7257 #pragma unused(ifp, cmd, arg)
7258 return (EOPNOTSUPP
);
7262 ifp_if_set_bpf_tap(struct ifnet
*ifp
, bpf_tap_mode tm
, bpf_packet_func f
)
7264 #pragma unused(ifp, tm, f)
7265 /* XXX not sure what to do here */
7270 ifp_if_free(struct ifnet
*ifp
)
7276 ifp_if_event(struct ifnet
*ifp
, const struct kev_msg
*e
)
7278 #pragma unused(ifp, e)
7281 int dlil_if_acquire(u_int32_t family
, const void *uniqueid
,
7282 size_t uniqueid_len
, const char *ifxname
, struct ifnet
**ifp
)
7284 struct ifnet
*ifp1
= NULL
;
7285 struct dlil_ifnet
*dlifp1
= NULL
;
7286 void *buf
, *base
, **pbuf
;
7289 VERIFY(*ifp
== NULL
);
7292 * We absolutely can't have an interface with the same name
7294 * To make sure of that list has to be traversed completely
7296 TAILQ_FOREACH(dlifp1
, &dlil_ifnet_head
, dl_if_link
) {
7297 ifp1
= (struct ifnet
*)dlifp1
;
7299 if (ifp1
->if_family
!= family
)
7303 * If interface is in use, return EBUSY if either unique id
7304 * or interface extended names are the same
7306 lck_mtx_lock(&dlifp1
->dl_if_lock
);
7307 if (strncmp(ifxname
, ifp1
->if_xname
, IFXNAMSIZ
) == 0) {
7308 if (dlifp1
->dl_if_flags
& DLIF_INUSE
) {
7309 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
7316 if (uniqueid_len
== dlifp1
->dl_if_uniqueid_len
&&
7317 bcmp(uniqueid
, dlifp1
->dl_if_uniqueid
, uniqueid_len
) == 0) {
7318 if (dlifp1
->dl_if_flags
& DLIF_INUSE
) {
7319 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
7323 dlifp1
->dl_if_flags
|= (DLIF_INUSE
|DLIF_REUSE
);
7324 /* Cache the first interface that can be recycled */
7328 * XXX Do not break or jump to end as we have to traverse
7329 * the whole list to ensure there are no name collisions
7334 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
7337 /* If there's an interface that can be recycled, use that */
7341 /* no interface found, allocate a new one */
7342 buf
= zalloc(dlif_zone
);
7347 bzero(buf
, dlif_bufsize
);
7349 /* Get the 64-bit aligned base address for this object */
7350 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof (u_int64_t
),
7351 sizeof (u_int64_t
));
7352 VERIFY(((intptr_t)base
+ dlif_size
) <= ((intptr_t)buf
+ dlif_bufsize
));
7355 * Wind back a pointer size from the aligned base and
7356 * save the original address so we can free it later.
7358 pbuf
= (void **)((intptr_t)base
- sizeof (void *));
7363 MALLOC(dlifp1
->dl_if_uniqueid
, void *, uniqueid_len
,
7365 if (dlifp1
->dl_if_uniqueid
== NULL
) {
7366 zfree(dlif_zone
, buf
);
7370 bcopy(uniqueid
, dlifp1
->dl_if_uniqueid
, uniqueid_len
);
7371 dlifp1
->dl_if_uniqueid_len
= uniqueid_len
;
7374 ifp1
= (struct ifnet
*)dlifp1
;
7375 dlifp1
->dl_if_flags
= DLIF_INUSE
;
7377 dlifp1
->dl_if_flags
|= DLIF_DEBUG
;
7378 dlifp1
->dl_if_trace
= dlil_if_trace
;
7380 ifp1
->if_name
= dlifp1
->dl_if_namestorage
;
7381 ifp1
->if_xname
= dlifp1
->dl_if_xnamestorage
;
7383 /* initialize interface description */
7384 ifp1
->if_desc
.ifd_maxlen
= IF_DESCSIZE
;
7385 ifp1
->if_desc
.ifd_len
= 0;
7386 ifp1
->if_desc
.ifd_desc
= dlifp1
->dl_if_descstorage
;
7390 mac_ifnet_label_init(ifp1
);
7393 if ((ret
= dlil_alloc_local_stats(ifp1
)) != 0) {
7394 DLIL_PRINTF("%s: failed to allocate if local stats, "
7395 "error: %d\n", __func__
, ret
);
7396 /* This probably shouldn't be fatal */
7400 lck_mtx_init(&dlifp1
->dl_if_lock
, ifnet_lock_group
, ifnet_lock_attr
);
7401 lck_rw_init(&ifp1
->if_lock
, ifnet_lock_group
, ifnet_lock_attr
);
7402 lck_mtx_init(&ifp1
->if_ref_lock
, ifnet_lock_group
, ifnet_lock_attr
);
7403 lck_mtx_init(&ifp1
->if_flt_lock
, ifnet_lock_group
, ifnet_lock_attr
);
7404 lck_mtx_init(&ifp1
->if_addrconfig_lock
, ifnet_lock_group
,
7406 lck_rw_init(&ifp1
->if_llreach_lock
, ifnet_lock_group
, ifnet_lock_attr
);
7408 lck_rw_init(&ifp1
->if_inetdata_lock
, ifnet_lock_group
,
7410 ifp1
->if_inetdata
= NULL
;
7413 lck_rw_init(&ifp1
->if_inet6data_lock
, ifnet_lock_group
,
7415 ifp1
->if_inet6data
= NULL
;
7417 lck_rw_init(&ifp1
->if_link_status_lock
, ifnet_lock_group
,
7419 ifp1
->if_link_status
= NULL
;
7421 /* for send data paths */
7422 lck_mtx_init(&ifp1
->if_start_lock
, ifnet_snd_lock_group
,
7424 lck_mtx_init(&ifp1
->if_cached_route_lock
, ifnet_snd_lock_group
,
7426 lck_mtx_init(&ifp1
->if_snd
.ifcq_lock
, ifnet_snd_lock_group
,
7429 /* for receive data paths */
7430 lck_mtx_init(&ifp1
->if_poll_lock
, ifnet_rcv_lock_group
,
7433 /* thread call allocation is done with sleeping zalloc */
7434 ifp1
->if_dt_tcall
= thread_call_allocate_with_options(dlil_dt_tcall_fn
,
7435 ifp1
, THREAD_CALL_PRIORITY_KERNEL
, THREAD_CALL_OPTIONS_ONCE
);
7436 if (ifp1
->if_dt_tcall
== NULL
) {
7437 panic_plain("%s: couldn't create if_dt_tcall", __func__
);
7441 TAILQ_INSERT_TAIL(&dlil_ifnet_head
, dlifp1
, dl_if_link
);
7448 VERIFY(dlifp1
== NULL
|| (IS_P2ALIGNED(dlifp1
, sizeof (u_int64_t
)) &&
7449 IS_P2ALIGNED(&ifp1
->if_data
, sizeof (u_int64_t
))));
7454 __private_extern__
void
7455 dlil_if_release(ifnet_t ifp
)
7457 struct dlil_ifnet
*dlifp
= (struct dlil_ifnet
*)ifp
;
7459 VERIFY(OSDecrementAtomic64(&net_api_stats
.nas_ifnet_alloc_count
) > 0);
7460 if (!(ifp
->if_xflags
& IFXF_ALLOC_KPI
)) {
7461 VERIFY(OSDecrementAtomic64(&net_api_stats
.nas_ifnet_alloc_os_count
) > 0);
7464 ifnet_lock_exclusive(ifp
);
7465 lck_mtx_lock(&dlifp
->dl_if_lock
);
7466 dlifp
->dl_if_flags
&= ~DLIF_INUSE
;
7467 strlcpy(dlifp
->dl_if_namestorage
, ifp
->if_name
, IFNAMSIZ
);
7468 ifp
->if_name
= dlifp
->dl_if_namestorage
;
7469 /* Reset external name (name + unit) */
7470 ifp
->if_xname
= dlifp
->dl_if_xnamestorage
;
7471 snprintf(__DECONST(char *, ifp
->if_xname
), IFXNAMSIZ
,
7472 "%s?", ifp
->if_name
);
7473 lck_mtx_unlock(&dlifp
->dl_if_lock
);
7476 * We can either recycle the MAC label here or in dlil_if_acquire().
7477 * It seems logical to do it here but this means that anything that
7478 * still has a handle on ifp will now see it as unlabeled.
7479 * Since the interface is "dead" that may be OK. Revisit later.
7481 mac_ifnet_label_recycle(ifp
);
7483 ifnet_lock_done(ifp
);
7486 __private_extern__
void
7489 lck_mtx_lock(&dlil_ifnet_lock
);
7492 __private_extern__
void
7493 dlil_if_unlock(void)
7495 lck_mtx_unlock(&dlil_ifnet_lock
);
7498 __private_extern__
void
7499 dlil_if_lock_assert(void)
7501 LCK_MTX_ASSERT(&dlil_ifnet_lock
, LCK_MTX_ASSERT_OWNED
);
7504 __private_extern__
void
7505 dlil_proto_unplumb_all(struct ifnet
*ifp
)
7508 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
7509 * each bucket contains exactly one entry; PF_VLAN does not need an
7512 * if_proto_hash[3] is for other protocols; we expect anything
7513 * in this bucket to respond to the DETACHING event (which would
7514 * have happened by now) and do the unplumb then.
7516 (void) proto_unplumb(PF_INET
, ifp
);
7518 (void) proto_unplumb(PF_INET6
, ifp
);
7523 ifp_src_route_copyout(struct ifnet
*ifp
, struct route
*dst
)
7525 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
7526 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
7528 route_copyout(dst
, &ifp
->if_src_route
, sizeof (*dst
));
7530 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
7534 ifp_src_route_copyin(struct ifnet
*ifp
, struct route
*src
)
7536 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
7537 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
7539 if (ifp
->if_fwd_cacheok
) {
7540 route_copyin(src
, &ifp
->if_src_route
, sizeof (*src
));
7544 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
7549 ifp_src_route6_copyout(struct ifnet
*ifp
, struct route_in6
*dst
)
7551 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
7552 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
7554 route_copyout((struct route
*)dst
, (struct route
*)&ifp
->if_src_route6
,
7557 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
7561 ifp_src_route6_copyin(struct ifnet
*ifp
, struct route_in6
*src
)
7563 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
7564 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
7566 if (ifp
->if_fwd_cacheok
) {
7567 route_copyin((struct route
*)src
,
7568 (struct route
*)&ifp
->if_src_route6
, sizeof (*src
));
7572 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
7577 ifnet_cached_rtlookup_inet(struct ifnet
*ifp
, struct in_addr src_ip
)
7579 struct route src_rt
;
7580 struct sockaddr_in
*dst
;
7582 dst
= (struct sockaddr_in
*)(void *)(&src_rt
.ro_dst
);
7584 ifp_src_route_copyout(ifp
, &src_rt
);
7586 if (ROUTE_UNUSABLE(&src_rt
) || src_ip
.s_addr
!= dst
->sin_addr
.s_addr
) {
7587 ROUTE_RELEASE(&src_rt
);
7588 if (dst
->sin_family
!= AF_INET
) {
7589 bzero(&src_rt
.ro_dst
, sizeof (src_rt
.ro_dst
));
7590 dst
->sin_len
= sizeof (src_rt
.ro_dst
);
7591 dst
->sin_family
= AF_INET
;
7593 dst
->sin_addr
= src_ip
;
7595 VERIFY(src_rt
.ro_rt
== NULL
);
7596 src_rt
.ro_rt
= rtalloc1_scoped((struct sockaddr
*)dst
,
7597 0, 0, ifp
->if_index
);
7599 if (src_rt
.ro_rt
!= NULL
) {
7600 /* retain a ref, copyin consumes one */
7601 struct rtentry
*rte
= src_rt
.ro_rt
;
7603 ifp_src_route_copyin(ifp
, &src_rt
);
7608 return (src_rt
.ro_rt
);
7613 ifnet_cached_rtlookup_inet6(struct ifnet
*ifp
, struct in6_addr
*src_ip6
)
7615 struct route_in6 src_rt
;
7617 ifp_src_route6_copyout(ifp
, &src_rt
);
7619 if (ROUTE_UNUSABLE(&src_rt
) ||
7620 !IN6_ARE_ADDR_EQUAL(src_ip6
, &src_rt
.ro_dst
.sin6_addr
)) {
7621 ROUTE_RELEASE(&src_rt
);
7622 if (src_rt
.ro_dst
.sin6_family
!= AF_INET6
) {
7623 bzero(&src_rt
.ro_dst
, sizeof (src_rt
.ro_dst
));
7624 src_rt
.ro_dst
.sin6_len
= sizeof (src_rt
.ro_dst
);
7625 src_rt
.ro_dst
.sin6_family
= AF_INET6
;
7627 src_rt
.ro_dst
.sin6_scope_id
= in6_addr2scopeid(ifp
, src_ip6
);
7628 bcopy(src_ip6
, &src_rt
.ro_dst
.sin6_addr
,
7629 sizeof (src_rt
.ro_dst
.sin6_addr
));
7631 if (src_rt
.ro_rt
== NULL
) {
7632 src_rt
.ro_rt
= rtalloc1_scoped(
7633 (struct sockaddr
*)&src_rt
.ro_dst
, 0, 0,
7636 if (src_rt
.ro_rt
!= NULL
) {
7637 /* retain a ref, copyin consumes one */
7638 struct rtentry
*rte
= src_rt
.ro_rt
;
7640 ifp_src_route6_copyin(ifp
, &src_rt
);
7646 return (src_rt
.ro_rt
);
7651 if_lqm_update(struct ifnet
*ifp
, int lqm
, int locked
)
7653 struct kev_dl_link_quality_metric_data ev_lqm_data
;
7655 VERIFY(lqm
>= IFNET_LQM_MIN
&& lqm
<= IFNET_LQM_MAX
);
7657 /* Normalize to edge */
7658 if (lqm
>= 0 && lqm
<= IFNET_LQM_THRESH_ABORT
) {
7659 lqm
= IFNET_LQM_THRESH_ABORT
;
7660 atomic_bitset_32(&tcbinfo
.ipi_flags
,
7661 INPCBINFO_HANDLE_LQM_ABORT
);
7662 inpcb_timer_sched(&tcbinfo
, INPCB_TIMER_FAST
);
7663 } else if (lqm
> IFNET_LQM_THRESH_ABORT
&&
7664 lqm
<= IFNET_LQM_THRESH_MINIMALLY_VIABLE
) {
7665 lqm
= IFNET_LQM_THRESH_MINIMALLY_VIABLE
;
7666 } else if (lqm
> IFNET_LQM_THRESH_MINIMALLY_VIABLE
&&
7667 lqm
<= IFNET_LQM_THRESH_POOR
) {
7668 lqm
= IFNET_LQM_THRESH_POOR
;
7669 } else if (lqm
> IFNET_LQM_THRESH_POOR
&&
7670 lqm
<= IFNET_LQM_THRESH_GOOD
) {
7671 lqm
= IFNET_LQM_THRESH_GOOD
;
7675 * Take the lock if needed
7678 ifnet_lock_exclusive(ifp
);
7680 if (lqm
== ifp
->if_interface_state
.lqm_state
&&
7681 (ifp
->if_interface_state
.valid_bitmask
&
7682 IF_INTERFACE_STATE_LQM_STATE_VALID
)) {
7684 * Release the lock if was not held by the caller
7687 ifnet_lock_done(ifp
);
7688 return; /* nothing to update */
7690 ifp
->if_interface_state
.valid_bitmask
|=
7691 IF_INTERFACE_STATE_LQM_STATE_VALID
;
7692 ifp
->if_interface_state
.lqm_state
= lqm
;
7695 * Don't want to hold the lock when issuing kernel events
7697 ifnet_lock_done(ifp
);
7699 bzero(&ev_lqm_data
, sizeof (ev_lqm_data
));
7700 ev_lqm_data
.link_quality_metric
= lqm
;
7702 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_LINK_QUALITY_METRIC_CHANGED
,
7703 (struct net_event_data
*)&ev_lqm_data
, sizeof (ev_lqm_data
));
7706 * Reacquire the lock for the caller
7709 ifnet_lock_exclusive(ifp
);
7713 if_rrc_state_update(struct ifnet
*ifp
, unsigned int rrc_state
)
7715 struct kev_dl_rrc_state kev
;
7717 if (rrc_state
== ifp
->if_interface_state
.rrc_state
&&
7718 (ifp
->if_interface_state
.valid_bitmask
&
7719 IF_INTERFACE_STATE_RRC_STATE_VALID
))
7722 ifp
->if_interface_state
.valid_bitmask
|=
7723 IF_INTERFACE_STATE_RRC_STATE_VALID
;
7725 ifp
->if_interface_state
.rrc_state
= rrc_state
;
7728 * Don't want to hold the lock when issuing kernel events
7730 ifnet_lock_done(ifp
);
7732 bzero(&kev
, sizeof(struct kev_dl_rrc_state
));
7733 kev
.rrc_state
= rrc_state
;
7735 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_RRC_STATE_CHANGED
,
7736 (struct net_event_data
*)&kev
, sizeof(struct kev_dl_rrc_state
));
7738 ifnet_lock_exclusive(ifp
);
7742 if_state_update(struct ifnet
*ifp
,
7743 struct if_interface_state
*if_interface_state
)
7745 u_short if_index_available
= 0;
7747 ifnet_lock_exclusive(ifp
);
7749 if ((ifp
->if_type
!= IFT_CELLULAR
) &&
7750 (if_interface_state
->valid_bitmask
&
7751 IF_INTERFACE_STATE_RRC_STATE_VALID
)) {
7752 ifnet_lock_done(ifp
);
7755 if ((if_interface_state
->valid_bitmask
&
7756 IF_INTERFACE_STATE_LQM_STATE_VALID
) &&
7757 (if_interface_state
->lqm_state
< IFNET_LQM_MIN
||
7758 if_interface_state
->lqm_state
> IFNET_LQM_MAX
)) {
7759 ifnet_lock_done(ifp
);
7762 if ((if_interface_state
->valid_bitmask
&
7763 IF_INTERFACE_STATE_RRC_STATE_VALID
) &&
7764 if_interface_state
->rrc_state
!=
7765 IF_INTERFACE_STATE_RRC_STATE_IDLE
&&
7766 if_interface_state
->rrc_state
!=
7767 IF_INTERFACE_STATE_RRC_STATE_CONNECTED
) {
7768 ifnet_lock_done(ifp
);
7772 if (if_interface_state
->valid_bitmask
&
7773 IF_INTERFACE_STATE_LQM_STATE_VALID
) {
7774 if_lqm_update(ifp
, if_interface_state
->lqm_state
, 1);
7776 if (if_interface_state
->valid_bitmask
&
7777 IF_INTERFACE_STATE_RRC_STATE_VALID
) {
7778 if_rrc_state_update(ifp
, if_interface_state
->rrc_state
);
7780 if (if_interface_state
->valid_bitmask
&
7781 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
) {
7782 ifp
->if_interface_state
.valid_bitmask
|=
7783 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
7784 ifp
->if_interface_state
.interface_availability
=
7785 if_interface_state
->interface_availability
;
7787 if (ifp
->if_interface_state
.interface_availability
==
7788 IF_INTERFACE_STATE_INTERFACE_AVAILABLE
) {
7789 if_index_available
= ifp
->if_index
;
7792 ifnet_lock_done(ifp
);
7795 * Check if the TCP connections going on this interface should be
7796 * forced to send probe packets instead of waiting for TCP timers
7797 * to fire. This will be done when there is an explicit
7798 * notification that the interface became available.
7800 if (if_index_available
> 0)
7801 tcp_interface_send_probe(if_index_available
);
7807 if_get_state(struct ifnet
*ifp
,
7808 struct if_interface_state
*if_interface_state
)
7810 ifnet_lock_shared(ifp
);
7812 if_interface_state
->valid_bitmask
= 0;
7814 if (ifp
->if_interface_state
.valid_bitmask
&
7815 IF_INTERFACE_STATE_RRC_STATE_VALID
) {
7816 if_interface_state
->valid_bitmask
|=
7817 IF_INTERFACE_STATE_RRC_STATE_VALID
;
7818 if_interface_state
->rrc_state
=
7819 ifp
->if_interface_state
.rrc_state
;
7821 if (ifp
->if_interface_state
.valid_bitmask
&
7822 IF_INTERFACE_STATE_LQM_STATE_VALID
) {
7823 if_interface_state
->valid_bitmask
|=
7824 IF_INTERFACE_STATE_LQM_STATE_VALID
;
7825 if_interface_state
->lqm_state
=
7826 ifp
->if_interface_state
.lqm_state
;
7828 if (ifp
->if_interface_state
.valid_bitmask
&
7829 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
) {
7830 if_interface_state
->valid_bitmask
|=
7831 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
7832 if_interface_state
->interface_availability
=
7833 ifp
->if_interface_state
.interface_availability
;
7836 ifnet_lock_done(ifp
);
7840 if_probe_connectivity(struct ifnet
*ifp
, u_int32_t conn_probe
)
7842 ifnet_lock_exclusive(ifp
);
7843 if (conn_probe
> 1) {
7844 ifnet_lock_done(ifp
);
7847 if (conn_probe
== 0)
7848 ifp
->if_eflags
&= ~IFEF_PROBE_CONNECTIVITY
;
7850 ifp
->if_eflags
|= IFEF_PROBE_CONNECTIVITY
;
7851 ifnet_lock_done(ifp
);
7854 necp_update_all_clients();
7857 tcp_probe_connectivity(ifp
, conn_probe
);
7863 uuid_get_ethernet(u_int8_t
*node
)
7866 struct sockaddr_dl
*sdl
;
7868 ifnet_head_lock_shared();
7869 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
7870 ifnet_lock_shared(ifp
);
7871 IFA_LOCK_SPIN(ifp
->if_lladdr
);
7872 sdl
= (struct sockaddr_dl
*)(void *)ifp
->if_lladdr
->ifa_addr
;
7873 if (sdl
->sdl_type
== IFT_ETHER
) {
7874 memcpy(node
, LLADDR(sdl
), ETHER_ADDR_LEN
);
7875 IFA_UNLOCK(ifp
->if_lladdr
);
7876 ifnet_lock_done(ifp
);
7880 IFA_UNLOCK(ifp
->if_lladdr
);
7881 ifnet_lock_done(ifp
);
7889 sysctl_rxpoll SYSCTL_HANDLER_ARGS
7891 #pragma unused(arg1, arg2)
7897 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7898 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7901 if (net_rxpoll
== 0)
7909 sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
7911 #pragma unused(arg1, arg2)
7915 q
= if_rxpoll_mode_holdtime
;
7917 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
7918 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7921 if (q
< IF_RXPOLL_MODE_HOLDTIME_MIN
)
7922 q
= IF_RXPOLL_MODE_HOLDTIME_MIN
;
7924 if_rxpoll_mode_holdtime
= q
;
7930 sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
7932 #pragma unused(arg1, arg2)
7936 q
= if_rxpoll_sample_holdtime
;
7938 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
7939 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7942 if (q
< IF_RXPOLL_SAMPLETIME_MIN
)
7943 q
= IF_RXPOLL_SAMPLETIME_MIN
;
7945 if_rxpoll_sample_holdtime
= q
;
7951 sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
7953 #pragma unused(arg1, arg2)
7957 q
= if_rxpoll_interval_time
;
7959 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
7960 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7963 if (q
< IF_RXPOLL_INTERVALTIME_MIN
)
7964 q
= IF_RXPOLL_INTERVALTIME_MIN
;
7966 if_rxpoll_interval_time
= q
;
7972 sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
7974 #pragma unused(arg1, arg2)
7978 i
= if_rxpoll_wlowat
;
7980 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7981 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7984 if (i
== 0 || i
>= if_rxpoll_whiwat
)
7987 if_rxpoll_wlowat
= i
;
7992 sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
7994 #pragma unused(arg1, arg2)
7998 i
= if_rxpoll_whiwat
;
8000 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8001 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
8004 if (i
<= if_rxpoll_wlowat
)
8007 if_rxpoll_whiwat
= i
;
8012 sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
8014 #pragma unused(arg1, arg2)
8019 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8020 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
8023 if (i
< IF_SNDQ_MINLEN
)
8031 sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
8033 #pragma unused(arg1, arg2)
8038 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8039 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
8042 if (i
< IF_RCVQ_MINLEN
)
8050 dlil_node_present(struct ifnet
*ifp
, struct sockaddr
*sa
,
8051 int32_t rssi
, int lqm
, int npm
, u_int8_t srvinfo
[48])
8053 struct kev_dl_node_presence kev
;
8054 struct sockaddr_dl
*sdl
;
8055 struct sockaddr_in6
*sin6
;
8059 VERIFY(sa
->sa_family
== AF_LINK
|| sa
->sa_family
== AF_INET6
);
8061 bzero(&kev
, sizeof (kev
));
8062 sin6
= &kev
.sin6_node_address
;
8063 sdl
= &kev
.sdl_node_address
;
8064 nd6_alt_node_addr_decompose(ifp
, sa
, sdl
, sin6
);
8066 kev
.link_quality_metric
= lqm
;
8067 kev
.node_proximity_metric
= npm
;
8068 bcopy(srvinfo
, kev
.node_service_info
, sizeof (kev
.node_service_info
));
8070 nd6_alt_node_present(ifp
, sin6
, sdl
, rssi
, lqm
, npm
);
8071 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_PRESENCE
,
8072 &kev
.link_data
, sizeof (kev
));
8076 dlil_node_absent(struct ifnet
*ifp
, struct sockaddr
*sa
)
8078 struct kev_dl_node_absence kev
;
8079 struct sockaddr_in6
*sin6
;
8080 struct sockaddr_dl
*sdl
;
8084 VERIFY(sa
->sa_family
== AF_LINK
|| sa
->sa_family
== AF_INET6
);
8086 bzero(&kev
, sizeof (kev
));
8087 sin6
= &kev
.sin6_node_address
;
8088 sdl
= &kev
.sdl_node_address
;
8089 nd6_alt_node_addr_decompose(ifp
, sa
, sdl
, sin6
);
8091 nd6_alt_node_absent(ifp
, sin6
);
8092 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_ABSENCE
,
8093 &kev
.link_data
, sizeof (kev
));
8097 dlil_ifaddr_bytes(const struct sockaddr_dl
*sdl
, size_t *sizep
,
8098 kauth_cred_t
*credp
)
8100 const u_int8_t
*bytes
;
8103 bytes
= CONST_LLADDR(sdl
);
8104 size
= sdl
->sdl_alen
;
8107 if (dlil_lladdr_ckreq
) {
8108 switch (sdl
->sdl_type
) {
8117 if (credp
&& mac_system_check_info(*credp
, "net.link.addr")) {
8118 static const u_int8_t unspec
[FIREWIRE_EUI64_LEN
] = {
8126 #pragma unused(credp)
8129 if (sizep
!= NULL
) *sizep
= size
;
8134 dlil_report_issues(struct ifnet
*ifp
, u_int8_t modid
[DLIL_MODIDLEN
],
8135 u_int8_t info
[DLIL_MODARGLEN
])
8137 struct kev_dl_issues kev
;
8140 VERIFY(ifp
!= NULL
);
8141 VERIFY(modid
!= NULL
);
8142 _CASSERT(sizeof (kev
.modid
) == DLIL_MODIDLEN
);
8143 _CASSERT(sizeof (kev
.info
) == DLIL_MODARGLEN
);
8145 bzero(&kev
, sizeof (kev
));
8148 kev
.timestamp
= tv
.tv_sec
;
8149 bcopy(modid
, &kev
.modid
, DLIL_MODIDLEN
);
8151 bcopy(info
, &kev
.info
, DLIL_MODARGLEN
);
8153 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_ISSUES
,
8154 &kev
.link_data
, sizeof (kev
));
8158 ifnet_getset_opportunistic(ifnet_t ifp
, u_long cmd
, struct ifreq
*ifr
,
8161 u_int32_t level
= IFNET_THROTTLE_OFF
;
8164 VERIFY(cmd
== SIOCSIFOPPORTUNISTIC
|| cmd
== SIOCGIFOPPORTUNISTIC
);
8166 if (cmd
== SIOCSIFOPPORTUNISTIC
) {
8168 * XXX: Use priv_check_cred() instead of root check?
8170 if ((result
= proc_suser(p
)) != 0)
8173 if (ifr
->ifr_opportunistic
.ifo_flags
==
8174 IFRIFOF_BLOCK_OPPORTUNISTIC
)
8175 level
= IFNET_THROTTLE_OPPORTUNISTIC
;
8176 else if (ifr
->ifr_opportunistic
.ifo_flags
== 0)
8177 level
= IFNET_THROTTLE_OFF
;
8182 result
= ifnet_set_throttle(ifp
, level
);
8183 } else if ((result
= ifnet_get_throttle(ifp
, &level
)) == 0) {
8184 ifr
->ifr_opportunistic
.ifo_flags
= 0;
8185 if (level
== IFNET_THROTTLE_OPPORTUNISTIC
) {
8186 ifr
->ifr_opportunistic
.ifo_flags
|=
8187 IFRIFOF_BLOCK_OPPORTUNISTIC
;
8192 * Return the count of current opportunistic connections
8193 * over the interface.
8197 flags
|= (cmd
== SIOCSIFOPPORTUNISTIC
) ?
8198 INPCB_OPPORTUNISTIC_SETCMD
: 0;
8199 flags
|= (level
== IFNET_THROTTLE_OPPORTUNISTIC
) ?
8200 INPCB_OPPORTUNISTIC_THROTTLEON
: 0;
8201 ifr
->ifr_opportunistic
.ifo_inuse
=
8202 udp_count_opportunistic(ifp
->if_index
, flags
) +
8203 tcp_count_opportunistic(ifp
->if_index
, flags
);
8206 if (result
== EALREADY
)
8213 ifnet_get_throttle(struct ifnet
*ifp
, u_int32_t
*level
)
8215 struct ifclassq
*ifq
;
8218 if (!(ifp
->if_eflags
& IFEF_TXSTART
))
8221 *level
= IFNET_THROTTLE_OFF
;
8225 /* Throttling works only for IFCQ, not ALTQ instances */
8226 if (IFCQ_IS_ENABLED(ifq
))
8227 IFCQ_GET_THROTTLE(ifq
, *level
, err
);
8234 ifnet_set_throttle(struct ifnet
*ifp
, u_int32_t level
)
8236 struct ifclassq
*ifq
;
8239 if (!(ifp
->if_eflags
& IFEF_TXSTART
))
8245 case IFNET_THROTTLE_OFF
:
8246 case IFNET_THROTTLE_OPPORTUNISTIC
:
8253 if (IFCQ_IS_ENABLED(ifq
))
8254 IFCQ_SET_THROTTLE(ifq
, level
, err
);
8258 printf("%s: throttling level set to %d\n", if_name(ifp
),
8260 if (level
== IFNET_THROTTLE_OFF
)
8268 ifnet_getset_log(ifnet_t ifp
, u_long cmd
, struct ifreq
*ifr
,
8274 int level
, category
, subcategory
;
8276 VERIFY(cmd
== SIOCSIFLOG
|| cmd
== SIOCGIFLOG
);
8278 if (cmd
== SIOCSIFLOG
) {
8279 if ((result
= priv_check_cred(kauth_cred_get(),
8280 PRIV_NET_INTERFACE_CONTROL
, 0)) != 0)
8283 level
= ifr
->ifr_log
.ifl_level
;
8284 if (level
< IFNET_LOG_MIN
|| level
> IFNET_LOG_MAX
)
8287 flags
= ifr
->ifr_log
.ifl_flags
;
8288 if ((flags
&= IFNET_LOGF_MASK
) == 0)
8291 category
= ifr
->ifr_log
.ifl_category
;
8292 subcategory
= ifr
->ifr_log
.ifl_subcategory
;
8295 result
= ifnet_set_log(ifp
, level
, flags
,
8296 category
, subcategory
);
8298 result
= ifnet_get_log(ifp
, &level
, &flags
, &category
,
8301 ifr
->ifr_log
.ifl_level
= level
;
8302 ifr
->ifr_log
.ifl_flags
= flags
;
8303 ifr
->ifr_log
.ifl_category
= category
;
8304 ifr
->ifr_log
.ifl_subcategory
= subcategory
;
8312 ifnet_set_log(struct ifnet
*ifp
, int32_t level
, uint32_t flags
,
8313 int32_t category
, int32_t subcategory
)
8317 VERIFY(level
>= IFNET_LOG_MIN
&& level
<= IFNET_LOG_MAX
);
8318 VERIFY(flags
& IFNET_LOGF_MASK
);
8321 * The logging level applies to all facilities; make sure to
8322 * update them all with the most current level.
8324 flags
|= ifp
->if_log
.flags
;
8326 if (ifp
->if_output_ctl
!= NULL
) {
8327 struct ifnet_log_params l
;
8329 bzero(&l
, sizeof (l
));
8332 l
.flags
&= ~IFNET_LOGF_DLIL
;
8333 l
.category
= category
;
8334 l
.subcategory
= subcategory
;
8336 /* Send this request to lower layers */
8338 err
= ifp
->if_output_ctl(ifp
, IFNET_CTL_SET_LOG
,
8341 } else if ((flags
& ~IFNET_LOGF_DLIL
) && ifp
->if_output_ctl
== NULL
) {
8343 * If targeted to the lower layers without an output
8344 * control callback registered on the interface, just
8345 * silently ignore facilities other than ours.
8347 flags
&= IFNET_LOGF_DLIL
;
8348 if (flags
== 0 && (!(ifp
->if_log
.flags
& IFNET_LOGF_DLIL
)))
8353 if ((ifp
->if_log
.level
= level
) == IFNET_LOG_DEFAULT
)
8354 ifp
->if_log
.flags
= 0;
8356 ifp
->if_log
.flags
|= flags
;
8358 log(LOG_INFO
, "%s: logging level set to %d flags=%b "
8359 "arg=%b, category=%d subcategory=%d\n", if_name(ifp
),
8360 ifp
->if_log
.level
, ifp
->if_log
.flags
,
8361 IFNET_LOGF_BITS
, flags
, IFNET_LOGF_BITS
,
8362 category
, subcategory
);
8369 ifnet_get_log(struct ifnet
*ifp
, int32_t *level
, uint32_t *flags
,
8370 int32_t *category
, int32_t *subcategory
)
8373 *level
= ifp
->if_log
.level
;
8375 *flags
= ifp
->if_log
.flags
;
8376 if (category
!= NULL
)
8377 *category
= ifp
->if_log
.category
;
8378 if (subcategory
!= NULL
)
8379 *subcategory
= ifp
->if_log
.subcategory
;
8385 ifnet_notify_address(struct ifnet
*ifp
, int af
)
8387 struct ifnet_notify_address_params na
;
8390 (void) pf_ifaddr_hook(ifp
);
8393 if (ifp
->if_output_ctl
== NULL
)
8394 return (EOPNOTSUPP
);
8396 bzero(&na
, sizeof (na
));
8397 na
.address_family
= af
;
8399 return (ifp
->if_output_ctl(ifp
, IFNET_CTL_NOTIFY_ADDRESS
,
8404 ifnet_flowid(struct ifnet
*ifp
, uint32_t *flowid
)
8406 if (ifp
== NULL
|| flowid
== NULL
) {
8408 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
8409 !IF_FULLY_ATTACHED(ifp
)) {
8413 *flowid
= ifp
->if_flowhash
;
8419 ifnet_disable_output(struct ifnet
*ifp
)
8425 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
8426 !IF_FULLY_ATTACHED(ifp
)) {
8430 if ((err
= ifnet_fc_add(ifp
)) == 0) {
8431 lck_mtx_lock_spin(&ifp
->if_start_lock
);
8432 ifp
->if_start_flags
|= IFSF_FLOW_CONTROLLED
;
8433 lck_mtx_unlock(&ifp
->if_start_lock
);
8439 ifnet_enable_output(struct ifnet
*ifp
)
8443 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
8444 !IF_FULLY_ATTACHED(ifp
)) {
8448 ifnet_start_common(ifp
, TRUE
);
8453 ifnet_flowadv(uint32_t flowhash
)
8455 struct ifnet_fc_entry
*ifce
;
8458 ifce
= ifnet_fc_get(flowhash
);
8462 VERIFY(ifce
->ifce_ifp
!= NULL
);
8463 ifp
= ifce
->ifce_ifp
;
8465 /* flow hash gets recalculated per attach, so check */
8466 if (ifnet_is_attached(ifp
, 1)) {
8467 if (ifp
->if_flowhash
== flowhash
)
8468 (void) ifnet_enable_output(ifp
);
8469 ifnet_decr_iorefcnt(ifp
);
8471 ifnet_fc_entry_free(ifce
);
8475 * Function to compare ifnet_fc_entries in ifnet flow control tree
8478 ifce_cmp(const struct ifnet_fc_entry
*fc1
, const struct ifnet_fc_entry
*fc2
)
8480 return (fc1
->ifce_flowhash
- fc2
->ifce_flowhash
);
8484 ifnet_fc_add(struct ifnet
*ifp
)
8486 struct ifnet_fc_entry keyfc
, *ifce
;
8489 VERIFY(ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_TXSTART
));
8490 VERIFY(ifp
->if_flowhash
!= 0);
8491 flowhash
= ifp
->if_flowhash
;
8493 bzero(&keyfc
, sizeof (keyfc
));
8494 keyfc
.ifce_flowhash
= flowhash
;
8496 lck_mtx_lock_spin(&ifnet_fc_lock
);
8497 ifce
= RB_FIND(ifnet_fc_tree
, &ifnet_fc_tree
, &keyfc
);
8498 if (ifce
!= NULL
&& ifce
->ifce_ifp
== ifp
) {
8499 /* Entry is already in ifnet_fc_tree, return */
8500 lck_mtx_unlock(&ifnet_fc_lock
);
8506 * There is a different fc entry with the same flow hash
8507 * but different ifp pointer. There can be a collision
8508 * on flow hash but the probability is low. Let's just
8509 * avoid adding a second one when there is a collision.
8511 lck_mtx_unlock(&ifnet_fc_lock
);
8515 /* become regular mutex */
8516 lck_mtx_convert_spin(&ifnet_fc_lock
);
8518 ifce
= zalloc(ifnet_fc_zone
);
8520 /* memory allocation failed */
8521 lck_mtx_unlock(&ifnet_fc_lock
);
8524 bzero(ifce
, ifnet_fc_zone_size
);
8526 ifce
->ifce_flowhash
= flowhash
;
8527 ifce
->ifce_ifp
= ifp
;
8529 RB_INSERT(ifnet_fc_tree
, &ifnet_fc_tree
, ifce
);
8530 lck_mtx_unlock(&ifnet_fc_lock
);
8534 static struct ifnet_fc_entry
*
8535 ifnet_fc_get(uint32_t flowhash
)
8537 struct ifnet_fc_entry keyfc
, *ifce
;
8540 bzero(&keyfc
, sizeof (keyfc
));
8541 keyfc
.ifce_flowhash
= flowhash
;
8543 lck_mtx_lock_spin(&ifnet_fc_lock
);
8544 ifce
= RB_FIND(ifnet_fc_tree
, &ifnet_fc_tree
, &keyfc
);
8546 /* Entry is not present in ifnet_fc_tree, return */
8547 lck_mtx_unlock(&ifnet_fc_lock
);
8551 RB_REMOVE(ifnet_fc_tree
, &ifnet_fc_tree
, ifce
);
8553 VERIFY(ifce
->ifce_ifp
!= NULL
);
8554 ifp
= ifce
->ifce_ifp
;
8556 /* become regular mutex */
8557 lck_mtx_convert_spin(&ifnet_fc_lock
);
8559 if (!ifnet_is_attached(ifp
, 0)) {
8561 * This ifp is not attached or in the process of being
8562 * detached; just don't process it.
8564 ifnet_fc_entry_free(ifce
);
8567 lck_mtx_unlock(&ifnet_fc_lock
);
8573 ifnet_fc_entry_free(struct ifnet_fc_entry
*ifce
)
8575 zfree(ifnet_fc_zone
, ifce
);
8579 ifnet_calc_flowhash(struct ifnet
*ifp
)
8581 struct ifnet_flowhash_key fh
__attribute__((aligned(8)));
8582 uint32_t flowhash
= 0;
8584 if (ifnet_flowhash_seed
== 0)
8585 ifnet_flowhash_seed
= RandomULong();
8587 bzero(&fh
, sizeof (fh
));
8589 (void) snprintf(fh
.ifk_name
, sizeof (fh
.ifk_name
), "%s", ifp
->if_name
);
8590 fh
.ifk_unit
= ifp
->if_unit
;
8591 fh
.ifk_flags
= ifp
->if_flags
;
8592 fh
.ifk_eflags
= ifp
->if_eflags
;
8593 fh
.ifk_capabilities
= ifp
->if_capabilities
;
8594 fh
.ifk_capenable
= ifp
->if_capenable
;
8595 fh
.ifk_output_sched_model
= ifp
->if_output_sched_model
;
8596 fh
.ifk_rand1
= RandomULong();
8597 fh
.ifk_rand2
= RandomULong();
8600 flowhash
= net_flowhash(&fh
, sizeof (fh
), ifnet_flowhash_seed
);
8601 if (flowhash
== 0) {
8602 /* try to get a non-zero flowhash */
8603 ifnet_flowhash_seed
= RandomULong();
8611 ifnet_set_netsignature(struct ifnet
*ifp
, uint8_t family
, uint8_t len
,
8612 uint16_t flags
, uint8_t *data
)
8614 #pragma unused(flags)
8619 if_inetdata_lock_exclusive(ifp
);
8620 if (IN_IFEXTRA(ifp
) != NULL
) {
8622 /* Allow clearing the signature */
8623 IN_IFEXTRA(ifp
)->netsig_len
= 0;
8624 bzero(IN_IFEXTRA(ifp
)->netsig
,
8625 sizeof (IN_IFEXTRA(ifp
)->netsig
));
8626 if_inetdata_lock_done(ifp
);
8628 } else if (len
> sizeof (IN_IFEXTRA(ifp
)->netsig
)) {
8630 if_inetdata_lock_done(ifp
);
8633 IN_IFEXTRA(ifp
)->netsig_len
= len
;
8634 bcopy(data
, IN_IFEXTRA(ifp
)->netsig
, len
);
8638 if_inetdata_lock_done(ifp
);
8642 if_inet6data_lock_exclusive(ifp
);
8643 if (IN6_IFEXTRA(ifp
) != NULL
) {
8645 /* Allow clearing the signature */
8646 IN6_IFEXTRA(ifp
)->netsig_len
= 0;
8647 bzero(IN6_IFEXTRA(ifp
)->netsig
,
8648 sizeof (IN6_IFEXTRA(ifp
)->netsig
));
8649 if_inet6data_lock_done(ifp
);
8651 } else if (len
> sizeof (IN6_IFEXTRA(ifp
)->netsig
)) {
8653 if_inet6data_lock_done(ifp
);
8656 IN6_IFEXTRA(ifp
)->netsig_len
= len
;
8657 bcopy(data
, IN6_IFEXTRA(ifp
)->netsig
, len
);
8661 if_inet6data_lock_done(ifp
);
8673 ifnet_get_netsignature(struct ifnet
*ifp
, uint8_t family
, uint8_t *len
,
8674 uint16_t *flags
, uint8_t *data
)
8678 if (ifp
== NULL
|| len
== NULL
|| data
== NULL
)
8683 if_inetdata_lock_shared(ifp
);
8684 if (IN_IFEXTRA(ifp
) != NULL
) {
8685 if (*len
== 0 || *len
< IN_IFEXTRA(ifp
)->netsig_len
) {
8687 if_inetdata_lock_done(ifp
);
8690 if ((*len
= IN_IFEXTRA(ifp
)->netsig_len
) > 0)
8691 bcopy(IN_IFEXTRA(ifp
)->netsig
, data
, *len
);
8697 if_inetdata_lock_done(ifp
);
8701 if_inet6data_lock_shared(ifp
);
8702 if (IN6_IFEXTRA(ifp
) != NULL
) {
8703 if (*len
== 0 || *len
< IN6_IFEXTRA(ifp
)->netsig_len
) {
8705 if_inet6data_lock_done(ifp
);
8708 if ((*len
= IN6_IFEXTRA(ifp
)->netsig_len
) > 0)
8709 bcopy(IN6_IFEXTRA(ifp
)->netsig
, data
, *len
);
8715 if_inet6data_lock_done(ifp
);
8723 if (error
== 0 && flags
!= NULL
)
8731 ifnet_set_nat64prefix(struct ifnet
*ifp
, struct ipv6_prefix
*prefixes
)
8733 int i
, error
= 0, one_set
= 0;
8735 if_inet6data_lock_exclusive(ifp
);
8737 if (IN6_IFEXTRA(ifp
) == NULL
) {
8742 for (i
= 0; i
< NAT64_MAX_NUM_PREFIXES
; i
++) {
8743 uint32_t prefix_len
=
8744 prefixes
[i
].prefix_len
;
8745 struct in6_addr
*prefix
=
8746 &prefixes
[i
].ipv6_prefix
;
8748 if (prefix_len
== 0) {
8749 clat_log0((LOG_DEBUG
,
8750 "NAT64 prefixes purged from Interface %s\n",
8752 /* Allow clearing the signature */
8753 IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].prefix_len
= 0;
8754 bzero(&IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].ipv6_prefix
,
8755 sizeof(struct in6_addr
));
8758 } else if (prefix_len
!= NAT64_PREFIX_LEN_32
&&
8759 prefix_len
!= NAT64_PREFIX_LEN_40
&&
8760 prefix_len
!= NAT64_PREFIX_LEN_48
&&
8761 prefix_len
!= NAT64_PREFIX_LEN_56
&&
8762 prefix_len
!= NAT64_PREFIX_LEN_64
&&
8763 prefix_len
!= NAT64_PREFIX_LEN_96
) {
8764 clat_log0((LOG_DEBUG
,
8765 "NAT64 prefixlen is incorrect %d\n", prefix_len
));
8770 if (IN6_IS_SCOPE_EMBED(prefix
)) {
8771 clat_log0((LOG_DEBUG
,
8772 "NAT64 prefix has interface/link local scope.\n"));
8777 IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].prefix_len
= prefix_len
;
8778 bcopy(prefix
, &IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].ipv6_prefix
,
8779 sizeof(struct in6_addr
));
8780 clat_log0((LOG_DEBUG
,
8781 "NAT64 prefix set to %s with prefixlen: %d\n",
8782 ip6_sprintf(prefix
), prefix_len
));
8787 if_inet6data_lock_done(ifp
);
8789 if (error
== 0 && one_set
!= 0)
8790 necp_update_all_clients();
8796 ifnet_get_nat64prefix(struct ifnet
*ifp
, struct ipv6_prefix
*prefixes
)
8798 int i
, found_one
= 0, error
= 0;
8803 if_inet6data_lock_shared(ifp
);
8805 if (IN6_IFEXTRA(ifp
) == NULL
) {
8810 for (i
= 0; i
< NAT64_MAX_NUM_PREFIXES
; i
++) {
8811 if (IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].prefix_len
!= 0)
8815 if (found_one
== 0) {
8821 bcopy(IN6_IFEXTRA(ifp
)->nat64_prefixes
, prefixes
,
8822 sizeof(IN6_IFEXTRA(ifp
)->nat64_prefixes
));
8825 if_inet6data_lock_done(ifp
);
8832 dlil_output_cksum_dbg(struct ifnet
*ifp
, struct mbuf
*m
, uint32_t hoff
,
8833 protocol_family_t pf
)
8838 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_FINALIZE_FORCED
) ||
8839 (m
->m_pkthdr
.csum_flags
& (CSUM_TSO_IPV4
|CSUM_TSO_IPV6
)))
8844 did_sw
= in_finalize_cksum(m
, hoff
, m
->m_pkthdr
.csum_flags
);
8845 if (did_sw
& CSUM_DELAY_IP
)
8846 hwcksum_dbg_finalized_hdr
++;
8847 if (did_sw
& CSUM_DELAY_DATA
)
8848 hwcksum_dbg_finalized_data
++;
8853 * Checksum offload should not have been enabled when
8854 * extension headers exist; that also means that we
8855 * cannot force-finalize packets with extension headers.
8856 * Indicate to the callee should it skip such case by
8857 * setting optlen to -1.
8859 did_sw
= in6_finalize_cksum(m
, hoff
, -1, -1,
8860 m
->m_pkthdr
.csum_flags
);
8861 if (did_sw
& CSUM_DELAY_IPV6_DATA
)
8862 hwcksum_dbg_finalized_data
++;
8871 dlil_input_cksum_dbg(struct ifnet
*ifp
, struct mbuf
*m
, char *frame_header
,
8872 protocol_family_t pf
)
8877 if (frame_header
== NULL
||
8878 frame_header
< (char *)mbuf_datastart(m
) ||
8879 frame_header
> (char *)m
->m_data
) {
8880 printf("%s: frame header pointer 0x%llx out of range "
8881 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp
),
8882 (uint64_t)VM_KERNEL_ADDRPERM(frame_header
),
8883 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m
)),
8884 (uint64_t)VM_KERNEL_ADDRPERM(m
->m_data
),
8885 (uint64_t)VM_KERNEL_ADDRPERM(m
));
8888 hlen
= (m
->m_data
- frame_header
);
8901 * Force partial checksum offload; useful to simulate cases
8902 * where the hardware does not support partial checksum offload,
8903 * in order to validate correctness throughout the layers above.
8905 if (hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
) {
8906 uint32_t foff
= hwcksum_dbg_partial_rxoff_forced
;
8908 if (foff
> (uint32_t)m
->m_pkthdr
.len
)
8911 m
->m_pkthdr
.csum_flags
&= ~CSUM_RX_FLAGS
;
8913 /* Compute 16-bit 1's complement sum from forced offset */
8914 sum
= m_sum16(m
, foff
, (m
->m_pkthdr
.len
- foff
));
8916 m
->m_pkthdr
.csum_flags
|= (CSUM_DATA_VALID
| CSUM_PARTIAL
);
8917 m
->m_pkthdr
.csum_rx_val
= sum
;
8918 m
->m_pkthdr
.csum_rx_start
= (foff
+ hlen
);
8920 hwcksum_dbg_partial_forced
++;
8921 hwcksum_dbg_partial_forced_bytes
+= m
->m_pkthdr
.len
;
8925 * Partial checksum offload verification (and adjustment);
8926 * useful to validate and test cases where the hardware
8927 * supports partial checksum offload.
8929 if ((m
->m_pkthdr
.csum_flags
&
8930 (CSUM_DATA_VALID
| CSUM_PARTIAL
| CSUM_PSEUDO_HDR
)) ==
8931 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
8934 /* Start offset must begin after frame header */
8935 rxoff
= m
->m_pkthdr
.csum_rx_start
;
8937 hwcksum_dbg_bad_rxoff
++;
8939 printf("%s: partial cksum start offset %d "
8940 "is less than frame header length %d for "
8941 "mbuf 0x%llx\n", if_name(ifp
), rxoff
, hlen
,
8942 (uint64_t)VM_KERNEL_ADDRPERM(m
));
8948 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
)) {
8950 * Compute the expected 16-bit 1's complement sum;
8951 * skip this if we've already computed it above
8952 * when partial checksum offload is forced.
8954 sum
= m_sum16(m
, rxoff
, (m
->m_pkthdr
.len
- rxoff
));
8956 /* Hardware or driver is buggy */
8957 if (sum
!= m
->m_pkthdr
.csum_rx_val
) {
8958 hwcksum_dbg_bad_cksum
++;
8960 printf("%s: bad partial cksum value "
8961 "0x%x (expected 0x%x) for mbuf "
8962 "0x%llx [rx_start %d]\n",
8964 m
->m_pkthdr
.csum_rx_val
, sum
,
8965 (uint64_t)VM_KERNEL_ADDRPERM(m
),
8966 m
->m_pkthdr
.csum_rx_start
);
8971 hwcksum_dbg_verified
++;
8974 * This code allows us to emulate various hardwares that
8975 * perform 16-bit 1's complement sum beginning at various
8976 * start offset values.
8978 if (hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_RXOFF_ADJ
) {
8979 uint32_t aoff
= hwcksum_dbg_partial_rxoff_adj
;
8981 if (aoff
== rxoff
|| aoff
> (uint32_t)m
->m_pkthdr
.len
)
8984 sum
= m_adj_sum16(m
, rxoff
, aoff
,
8985 m_pktlen(m
) - aoff
, sum
);
8987 m
->m_pkthdr
.csum_rx_val
= sum
;
8988 m
->m_pkthdr
.csum_rx_start
= (aoff
+ hlen
);
8990 hwcksum_dbg_adjusted
++;
8996 sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
8998 #pragma unused(arg1, arg2)
9002 i
= hwcksum_dbg_mode
;
9004 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
9005 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
9008 if (hwcksum_dbg
== 0)
9011 if ((i
& ~HWCKSUM_DBG_MASK
) != 0)
9014 hwcksum_dbg_mode
= (i
& HWCKSUM_DBG_MASK
);
9020 sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
9022 #pragma unused(arg1, arg2)
9026 i
= hwcksum_dbg_partial_rxoff_forced
;
9028 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
9029 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
9032 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
))
9035 hwcksum_dbg_partial_rxoff_forced
= i
;
9041 sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
9043 #pragma unused(arg1, arg2)
9047 i
= hwcksum_dbg_partial_rxoff_adj
;
9049 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
9050 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
9053 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_RXOFF_ADJ
))
9056 hwcksum_dbg_partial_rxoff_adj
= i
;
9062 sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
9064 #pragma unused(oidp, arg1, arg2)
9067 if (req
->oldptr
== USER_ADDR_NULL
) {
9070 if (req
->newptr
!= USER_ADDR_NULL
) {
9073 err
= SYSCTL_OUT(req
, &tx_chain_len_stats
,
9074 sizeof(struct chain_len_stats
));
9080 #if DEBUG || DEVELOPMENT
9081 /* Blob for sum16 verification */
9082 static uint8_t sumdata
[] = {
9083 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
9084 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
9085 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
9086 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
9087 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
9088 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
9089 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
9090 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
9091 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
9092 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
9093 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
9094 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
9095 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
9096 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
9097 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
9098 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
9099 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
9100 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
9101 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
9102 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
9103 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
9104 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
9105 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
9106 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
9107 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
9108 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
9109 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
9110 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
9111 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
9112 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
9113 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
9114 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
9115 0xc8, 0x28, 0x02, 0x00, 0x00
9118 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
9122 uint16_t sumr
; /* reference */
9123 uint16_t sumrp
; /* reference, precomputed */
9125 { FALSE
, 0, 0, 0x0000 },
9126 { FALSE
, 1, 0, 0x001f },
9127 { FALSE
, 2, 0, 0x8b1f },
9128 { FALSE
, 3, 0, 0x8b27 },
9129 { FALSE
, 7, 0, 0x790e },
9130 { FALSE
, 11, 0, 0xcb6d },
9131 { FALSE
, 20, 0, 0x20dd },
9132 { FALSE
, 27, 0, 0xbabd },
9133 { FALSE
, 32, 0, 0xf3e8 },
9134 { FALSE
, 37, 0, 0x197d },
9135 { FALSE
, 43, 0, 0x9eae },
9136 { FALSE
, 64, 0, 0x4678 },
9137 { FALSE
, 127, 0, 0x9399 },
9138 { FALSE
, 256, 0, 0xd147 },
9139 { FALSE
, 325, 0, 0x0358 },
9141 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
9144 dlil_verify_sum16(void)
9150 /* Make sure test data plus extra room for alignment fits in cluster */
9151 _CASSERT((sizeof (sumdata
) + (sizeof (uint64_t) * 2)) <= MCLBYTES
);
9153 kprintf("DLIL: running SUM16 self-tests ... ");
9155 m
= m_getcl(M_WAITOK
, MT_DATA
, M_PKTHDR
);
9156 m_align(m
, sizeof(sumdata
) + (sizeof (uint64_t) * 2));
9158 buf
= mtod(m
, uint8_t *); /* base address */
9160 for (n
= 0; n
< SUMTBL_MAX
; n
++) {
9161 uint16_t len
= sumtbl
[n
].len
;
9164 /* Verify for all possible alignments */
9165 for (i
= 0; i
< (int)sizeof (uint64_t); i
++) {
9169 /* Copy over test data to mbuf */
9170 VERIFY(len
<= sizeof (sumdata
));
9172 bcopy(sumdata
, c
, len
);
9174 /* Zero-offset test (align by data pointer) */
9175 m
->m_data
= (caddr_t
)c
;
9177 sum
= m_sum16(m
, 0, len
);
9179 if (!sumtbl
[n
].init
) {
9180 sumr
= in_cksum_mbuf_ref(m
, len
, 0, 0);
9181 sumtbl
[n
].sumr
= sumr
;
9182 sumtbl
[n
].init
= TRUE
;
9184 sumr
= sumtbl
[n
].sumr
;
9187 /* Something is horribly broken; stop now */
9188 if (sumr
!= sumtbl
[n
].sumrp
) {
9189 panic_plain("\n%s: broken in_cksum_mbuf_ref() "
9190 "for len=%d align=%d sum=0x%04x "
9191 "[expected=0x%04x]\n", __func__
,
9194 } else if (sum
!= sumr
) {
9195 panic_plain("\n%s: broken m_sum16() for len=%d "
9196 "align=%d sum=0x%04x [expected=0x%04x]\n",
9197 __func__
, len
, i
, sum
, sumr
);
9201 /* Alignment test by offset (fixed data pointer) */
9202 m
->m_data
= (caddr_t
)buf
;
9204 sum
= m_sum16(m
, i
, len
);
9206 /* Something is horribly broken; stop now */
9208 panic_plain("\n%s: broken m_sum16() for len=%d "
9209 "offset=%d sum=0x%04x [expected=0x%04x]\n",
9210 __func__
, len
, i
, sum
, sumr
);
9214 /* Simple sum16 contiguous buffer test by aligment */
9215 sum
= b_sum16(c
, len
);
9217 /* Something is horribly broken; stop now */
9219 panic_plain("\n%s: broken b_sum16() for len=%d "
9220 "align=%d sum=0x%04x [expected=0x%04x]\n",
9221 __func__
, len
, i
, sum
, sumr
);
9229 kprintf("PASSED\n");
9231 #endif /* DEBUG || DEVELOPMENT */
9233 #define CASE_STRINGIFY(x) case x: return #x
9235 __private_extern__
const char *
9236 dlil_kev_dl_code_str(u_int32_t event_code
)
9238 switch (event_code
) {
9239 CASE_STRINGIFY(KEV_DL_SIFFLAGS
);
9240 CASE_STRINGIFY(KEV_DL_SIFMETRICS
);
9241 CASE_STRINGIFY(KEV_DL_SIFMTU
);
9242 CASE_STRINGIFY(KEV_DL_SIFPHYS
);
9243 CASE_STRINGIFY(KEV_DL_SIFMEDIA
);
9244 CASE_STRINGIFY(KEV_DL_SIFGENERIC
);
9245 CASE_STRINGIFY(KEV_DL_ADDMULTI
);
9246 CASE_STRINGIFY(KEV_DL_DELMULTI
);
9247 CASE_STRINGIFY(KEV_DL_IF_ATTACHED
);
9248 CASE_STRINGIFY(KEV_DL_IF_DETACHING
);
9249 CASE_STRINGIFY(KEV_DL_IF_DETACHED
);
9250 CASE_STRINGIFY(KEV_DL_LINK_OFF
);
9251 CASE_STRINGIFY(KEV_DL_LINK_ON
);
9252 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED
);
9253 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED
);
9254 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED
);
9255 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED
);
9256 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT
);
9257 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED
);
9258 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED
);
9259 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE
);
9260 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE
);
9261 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED
);
9262 CASE_STRINGIFY(KEV_DL_ISSUES
);
9263 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED
);
9271 dlil_dt_tcall_fn(thread_call_param_t arg0
, thread_call_param_t arg1
)
9273 #pragma unused(arg1)
9274 struct ifnet
*ifp
= arg0
;
9276 if (ifnet_is_attached(ifp
, 1)) {
9277 nstat_ifnet_threshold_reached(ifp
->if_index
);
9278 ifnet_decr_iorefcnt(ifp
);
9283 ifnet_notify_data_threshold(struct ifnet
*ifp
)
9285 uint64_t bytes
= (ifp
->if_ibytes
+ ifp
->if_obytes
);
9286 uint64_t oldbytes
= ifp
->if_dt_bytes
;
9288 ASSERT(ifp
->if_dt_tcall
!= NULL
);
9291 * If we went over the threshold, notify NetworkStatistics.
9292 * We rate-limit it based on the threshold interval value.
9294 if (threshold_notify
&& (bytes
- oldbytes
) > ifp
->if_data_threshold
&&
9295 OSCompareAndSwap64(oldbytes
, bytes
, &ifp
->if_dt_bytes
) &&
9296 !thread_call_isactive(ifp
->if_dt_tcall
)) {
9297 uint64_t tival
= (threshold_interval
* NSEC_PER_SEC
);
9298 uint64_t now
= mach_absolute_time(), deadline
= now
;
9302 nanoseconds_to_absolutetime(tival
, &ival
);
9303 clock_deadline_for_periodic_event(ival
, now
, &deadline
);
9304 (void) thread_call_enter_delayed(ifp
->if_dt_tcall
,
9307 (void) thread_call_enter(ifp
->if_dt_tcall
);
9312 #if (DEVELOPMENT || DEBUG)
9314 * The sysctl variable name contains the input parameters of
9315 * ifnet_get_keepalive_offload_frames()
9316 * ifp (interface index): name[0]
9317 * frames_array_count: name[1]
9318 * frame_data_offset: name[2]
9319 * The return length gives used_frames_count
9322 sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
9324 #pragma unused(oidp)
9325 int *name
= (int *)arg1
;
9326 u_int namelen
= arg2
;
9329 u_int32_t frames_array_count
;
9330 size_t frame_data_offset
;
9331 u_int32_t used_frames_count
;
9332 struct ifnet_keepalive_offload_frame
*frames_array
= NULL
;
9337 * Only root can get look at other people TCP frames
9339 error
= proc_suser(current_proc());
9343 * Validate the input parameters
9345 if (req
->newptr
!= USER_ADDR_NULL
) {
9353 if (req
->oldptr
== USER_ADDR_NULL
) {
9357 if (req
->oldlen
== 0) {
9362 frames_array_count
= name
[1];
9363 frame_data_offset
= name
[2];
9365 /* Make sure the passed buffer is large enough */
9366 if (frames_array_count
* sizeof(struct ifnet_keepalive_offload_frame
) >
9372 ifnet_head_lock_shared();
9373 if (!IF_INDEX_IN_RANGE(idx
)) {
9378 ifp
= ifindex2ifnet
[idx
];
9381 frames_array
= _MALLOC(frames_array_count
*
9382 sizeof(struct ifnet_keepalive_offload_frame
), M_TEMP
, M_WAITOK
);
9383 if (frames_array
== NULL
) {
9388 error
= ifnet_get_keepalive_offload_frames(ifp
, frames_array
,
9389 frames_array_count
, frame_data_offset
, &used_frames_count
);
9391 printf("%s: ifnet_get_keepalive_offload_frames error %d\n",
9396 for (i
= 0; i
< used_frames_count
; i
++) {
9397 error
= SYSCTL_OUT(req
, frames_array
+ i
,
9398 sizeof(struct ifnet_keepalive_offload_frame
));
9404 if (frames_array
!= NULL
)
9405 _FREE(frames_array
, M_TEMP
);
9408 #endif /* DEVELOPMENT || DEBUG */
9411 ifnet_update_stats_per_flow(struct ifnet_stats_per_flow
*ifs
,
9414 tcp_update_stats_per_flow(ifs
, ifp
);
9418 dlil_mit_tcall_fn(thread_call_param_t arg0
, thread_call_param_t arg1
)
9420 #pragma unused(arg1)
9421 struct ifnet
*ifp
= (struct ifnet
*)arg0
;
9422 struct dlil_threading_info
*inp
= ifp
->if_inp
;
9424 ifnet_lock_shared(ifp
);
9425 if (!IF_FULLY_ATTACHED(ifp
) || inp
== NULL
) {
9426 ifnet_lock_done(ifp
);
9430 lck_mtx_lock_spin(&inp
->input_lck
);
9431 inp
->input_waiting
|= DLIL_INPUT_WAITING
;
9432 if (!(inp
->input_waiting
& DLIL_INPUT_RUNNING
) ||
9433 !qempty(&inp
->rcvq_pkts
)) {
9435 wakeup_one((caddr_t
)&inp
->input_waiting
);
9437 lck_mtx_unlock(&inp
->input_lck
);
9438 ifnet_lock_done(ifp
);