2 * Copyright (c) 1999-2019 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
41 #include <sys/socket.h>
42 #include <sys/domain.h>
44 #include <sys/random.h>
45 #include <sys/socketvar.h>
46 #include <net/if_dl.h>
48 #include <net/route.h>
49 #include <net/if_var.h>
51 #include <net/if_arp.h>
52 #include <net/iptap.h>
53 #include <net/pktap.h>
54 #include <sys/kern_event.h>
55 #include <sys/kdebug.h>
56 #include <sys/mcache.h>
57 #include <sys/syslog.h>
58 #include <sys/protosw.h>
61 #include <kern/assert.h>
62 #include <kern/task.h>
63 #include <kern/thread.h>
64 #include <kern/sched_prim.h>
65 #include <kern/locks.h>
66 #include <kern/zalloc.h>
68 #include <net/kpi_protocol.h>
69 #include <net/if_types.h>
70 #include <net/if_ipsec.h>
71 #include <net/if_llreach.h>
72 #include <net/if_utun.h>
73 #include <net/kpi_interfacefilter.h>
74 #include <net/classq/classq.h>
75 #include <net/classq/classq_sfb.h>
76 #include <net/flowhash.h>
77 #include <net/ntstat.h>
78 #include <net/if_llatbl.h>
79 #include <net/net_api_stats.h>
80 #include <net/if_ports_used.h>
81 #include <netinet/in.h>
83 #include <netinet/in_var.h>
84 #include <netinet/igmp_var.h>
85 #include <netinet/ip_var.h>
86 #include <netinet/tcp.h>
87 #include <netinet/tcp_var.h>
88 #include <netinet/udp.h>
89 #include <netinet/udp_var.h>
90 #include <netinet/if_ether.h>
91 #include <netinet/in_pcb.h>
92 #include <netinet/in_tclass.h>
93 #include <netinet/ip.h>
94 #include <netinet/ip_icmp.h>
95 #include <netinet/icmp_var.h>
99 #include <net/nat464_utils.h>
100 #include <netinet6/in6_var.h>
101 #include <netinet6/nd6.h>
102 #include <netinet6/mld6_var.h>
103 #include <netinet6/scope6_var.h>
104 #include <netinet/ip6.h>
105 #include <netinet/icmp6.h>
107 #include <net/pf_pbuf.h>
108 #include <libkern/OSAtomic.h>
109 #include <libkern/tree.h>
111 #include <dev/random/randomdev.h>
112 #include <machine/machine_routines.h>
114 #include <mach/thread_act.h>
115 #include <mach/sdt.h>
118 #include <sys/kauth.h>
119 #include <security/mac_framework.h>
120 #include <net/ethernet.h>
121 #include <net/firewire.h>
125 #include <net/pfvar.h>
127 #include <net/pktsched/pktsched.h>
130 #include <net/necp.h>
134 #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
135 #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
136 #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
137 #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
138 #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
140 #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
141 #define MAX_LINKADDR 4 /* LONGWORDS */
142 #define M_NKE M_IFADDR
145 #define DLIL_PRINTF printf
147 #define DLIL_PRINTF kprintf
150 #define IF_DATA_REQUIRE_ALIGNED_64(f) \
151 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
153 #define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
154 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
162 * List of if_proto structures in if_proto_hash[] is protected by
163 * the ifnet lock. The rest of the fields are initialized at protocol
164 * attach time and never change, thus no lock required as long as
165 * a reference to it is valid, via if_proto_ref().
168 SLIST_ENTRY(if_proto
) next_hash
;
172 protocol_family_t protocol_family
;
176 proto_media_input input
;
177 proto_media_preout pre_output
;
178 proto_media_event event
;
179 proto_media_ioctl ioctl
;
180 proto_media_detached detached
;
181 proto_media_resolve_multi resolve_multi
;
182 proto_media_send_arp send_arp
;
185 proto_media_input_v2 input
;
186 proto_media_preout pre_output
;
187 proto_media_event event
;
188 proto_media_ioctl ioctl
;
189 proto_media_detached detached
;
190 proto_media_resolve_multi resolve_multi
;
191 proto_media_send_arp send_arp
;
196 SLIST_HEAD(proto_hash_entry
, if_proto
);
198 #define DLIL_SDLDATALEN \
199 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
202 struct ifnet dl_if
; /* public ifnet */
204 * DLIL private fields, protected by dl_if_lock
206 decl_lck_mtx_data(, dl_if_lock
);
207 TAILQ_ENTRY(dlil_ifnet
) dl_if_link
; /* dlil_ifnet link */
208 u_int32_t dl_if_flags
; /* flags (below) */
209 u_int32_t dl_if_refcnt
; /* refcnt */
210 void (*dl_if_trace
)(struct dlil_ifnet
*, int); /* ref trace callback */
211 void *dl_if_uniqueid
; /* unique interface id */
212 size_t dl_if_uniqueid_len
; /* length of the unique id */
213 char dl_if_namestorage
[IFNAMSIZ
]; /* interface name storage */
214 char dl_if_xnamestorage
[IFXNAMSIZ
]; /* external name storage */
216 struct ifaddr ifa
; /* lladdr ifa */
217 u_int8_t asdl
[DLIL_SDLMAXLEN
]; /* addr storage */
218 u_int8_t msdl
[DLIL_SDLMAXLEN
]; /* mask storage */
220 u_int8_t dl_if_descstorage
[IF_DESCSIZE
]; /* desc storage */
221 struct dlil_threading_info dl_if_inpstorage
; /* input thread storage */
222 ctrace_t dl_if_attach
; /* attach PC stacktrace */
223 ctrace_t dl_if_detach
; /* detach PC stacktrace */
226 /* Values for dl_if_flags (private to DLIL) */
227 #define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
228 #define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
229 #define DLIF_DEBUG 0x4 /* has debugging info */
231 #define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
234 __private_extern__
unsigned int if_ref_trace_hist_size
= IF_REF_TRACE_HIST_SIZE
;
236 struct dlil_ifnet_dbg
{
237 struct dlil_ifnet dldbg_dlif
; /* dlil_ifnet */
238 u_int16_t dldbg_if_refhold_cnt
; /* # ifnet references */
239 u_int16_t dldbg_if_refrele_cnt
; /* # ifnet releases */
241 * Circular lists of ifnet_{reference,release} callers.
243 ctrace_t dldbg_if_refhold
[IF_REF_TRACE_HIST_SIZE
];
244 ctrace_t dldbg_if_refrele
[IF_REF_TRACE_HIST_SIZE
];
247 #define DLIL_TO_IFP(s) (&s->dl_if)
248 #define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
250 struct ifnet_filter
{
251 TAILQ_ENTRY(ifnet_filter
) filt_next
;
253 u_int32_t filt_flags
;
255 const char *filt_name
;
257 protocol_family_t filt_protocol
;
258 iff_input_func filt_input
;
259 iff_output_func filt_output
;
260 iff_event_func filt_event
;
261 iff_ioctl_func filt_ioctl
;
262 iff_detached_func filt_detached
;
265 struct proto_input_entry
;
267 static TAILQ_HEAD(, dlil_ifnet
) dlil_ifnet_head
;
268 static lck_grp_t
*dlil_lock_group
;
269 lck_grp_t
*ifnet_lock_group
;
270 static lck_grp_t
*ifnet_head_lock_group
;
271 static lck_grp_t
*ifnet_snd_lock_group
;
272 static lck_grp_t
*ifnet_rcv_lock_group
;
273 lck_attr_t
*ifnet_lock_attr
;
274 decl_lck_rw_data(static, ifnet_head_lock
);
275 decl_lck_mtx_data(static, dlil_ifnet_lock
);
276 u_int32_t dlil_filter_disable_tso_count
= 0;
279 static unsigned int ifnet_debug
= 1; /* debugging (enabled) */
281 static unsigned int ifnet_debug
; /* debugging (disabled) */
283 static unsigned int dlif_size
; /* size of dlil_ifnet to allocate */
284 static unsigned int dlif_bufsize
; /* size of dlif_size + headroom */
285 static struct zone
*dlif_zone
; /* zone for dlil_ifnet */
287 #define DLIF_ZONE_MAX IFNETS_MAX /* maximum elements in zone */
288 #define DLIF_ZONE_NAME "ifnet" /* zone name */
290 static unsigned int dlif_filt_size
; /* size of ifnet_filter */
291 static struct zone
*dlif_filt_zone
; /* zone for ifnet_filter */
293 #define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
294 #define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
296 static unsigned int dlif_phash_size
; /* size of ifnet proto hash table */
297 static struct zone
*dlif_phash_zone
; /* zone for ifnet proto hash table */
299 #define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
300 #define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
302 static unsigned int dlif_proto_size
; /* size of if_proto */
303 static struct zone
*dlif_proto_zone
; /* zone for if_proto */
305 #define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
306 #define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
308 static unsigned int dlif_tcpstat_size
; /* size of tcpstat_local to allocate */
309 static unsigned int dlif_tcpstat_bufsize
; /* size of dlif_tcpstat_size + headroom */
310 static struct zone
*dlif_tcpstat_zone
; /* zone for tcpstat_local */
312 #define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
313 #define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
315 static unsigned int dlif_udpstat_size
; /* size of udpstat_local to allocate */
316 static unsigned int dlif_udpstat_bufsize
; /* size of dlif_udpstat_size + headroom */
317 static struct zone
*dlif_udpstat_zone
; /* zone for udpstat_local */
319 #define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
320 #define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
322 static u_int32_t net_rtref
;
324 static struct dlil_main_threading_info dlil_main_input_thread_info
;
325 __private_extern__
struct dlil_threading_info
*dlil_main_input_thread
=
326 (struct dlil_threading_info
*)&dlil_main_input_thread_info
;
328 static int dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*msg
, bool update_generation
);
329 static int dlil_detach_filter_internal(interface_filter_t filter
, int detached
);
330 static void dlil_if_trace(struct dlil_ifnet
*, int);
331 static void if_proto_ref(struct if_proto
*);
332 static void if_proto_free(struct if_proto
*);
333 static struct if_proto
*find_attached_proto(struct ifnet
*, u_int32_t
);
334 static u_int32_t
dlil_ifp_protolist(struct ifnet
*ifp
, protocol_family_t
*list
,
335 u_int32_t list_count
);
336 static void if_flt_monitor_busy(struct ifnet
*);
337 static void if_flt_monitor_unbusy(struct ifnet
*);
338 static void if_flt_monitor_enter(struct ifnet
*);
339 static void if_flt_monitor_leave(struct ifnet
*);
340 static int dlil_interface_filters_input(struct ifnet
*, struct mbuf
**,
341 char **, protocol_family_t
);
342 static int dlil_interface_filters_output(struct ifnet
*, struct mbuf
**,
344 static struct ifaddr
*dlil_alloc_lladdr(struct ifnet
*,
345 const struct sockaddr_dl
*);
346 static int ifnet_lookup(struct ifnet
*);
347 static void if_purgeaddrs(struct ifnet
*);
349 static errno_t
ifproto_media_input_v1(struct ifnet
*, protocol_family_t
,
350 struct mbuf
*, char *);
351 static errno_t
ifproto_media_input_v2(struct ifnet
*, protocol_family_t
,
353 static errno_t
ifproto_media_preout(struct ifnet
*, protocol_family_t
,
354 mbuf_t
*, const struct sockaddr
*, void *, char *, char *);
355 static void ifproto_media_event(struct ifnet
*, protocol_family_t
,
356 const struct kev_msg
*);
357 static errno_t
ifproto_media_ioctl(struct ifnet
*, protocol_family_t
,
358 unsigned long, void *);
359 static errno_t
ifproto_media_resolve_multi(ifnet_t
, const struct sockaddr
*,
360 struct sockaddr_dl
*, size_t);
361 static errno_t
ifproto_media_send_arp(struct ifnet
*, u_short
,
362 const struct sockaddr_dl
*, const struct sockaddr
*,
363 const struct sockaddr_dl
*, const struct sockaddr
*);
365 static errno_t
ifp_if_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
366 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
367 boolean_t poll
, struct thread
*tp
);
368 static void ifp_if_input_poll(struct ifnet
*, u_int32_t
, u_int32_t
,
369 struct mbuf
**, struct mbuf
**, u_int32_t
*, u_int32_t
*);
370 static errno_t
ifp_if_ctl(struct ifnet
*, ifnet_ctl_cmd_t
, u_int32_t
, void *);
371 static errno_t
ifp_if_demux(struct ifnet
*, struct mbuf
*, char *,
372 protocol_family_t
*);
373 static errno_t
ifp_if_add_proto(struct ifnet
*, protocol_family_t
,
374 const struct ifnet_demux_desc
*, u_int32_t
);
375 static errno_t
ifp_if_del_proto(struct ifnet
*, protocol_family_t
);
376 static errno_t
ifp_if_check_multi(struct ifnet
*, const struct sockaddr
*);
378 static errno_t
ifp_if_framer(struct ifnet
*, struct mbuf
**,
379 const struct sockaddr
*, const char *, const char *,
380 u_int32_t
*, u_int32_t
*);
382 static errno_t
ifp_if_framer(struct ifnet
*, struct mbuf
**,
383 const struct sockaddr
*, const char *, const char *);
384 #endif /* CONFIG_EMBEDDED */
385 static errno_t
ifp_if_framer_extended(struct ifnet
*, struct mbuf
**,
386 const struct sockaddr
*, const char *, const char *,
387 u_int32_t
*, u_int32_t
*);
388 static errno_t
ifp_if_set_bpf_tap(struct ifnet
*, bpf_tap_mode
, bpf_packet_func
);
389 static void ifp_if_free(struct ifnet
*);
390 static void ifp_if_event(struct ifnet
*, const struct kev_msg
*);
391 static __inline
void ifp_inc_traffic_class_in(struct ifnet
*, struct mbuf
*);
392 static __inline
void ifp_inc_traffic_class_out(struct ifnet
*, struct mbuf
*);
394 static void dlil_main_input_thread_func(void *, wait_result_t
);
395 static void dlil_input_thread_func(void *, wait_result_t
);
396 static void dlil_rxpoll_input_thread_func(void *, wait_result_t
);
397 static int dlil_create_input_thread(ifnet_t
, struct dlil_threading_info
*);
398 static void dlil_terminate_input_thread(struct dlil_threading_info
*);
399 static void dlil_input_stats_add(const struct ifnet_stat_increment_param
*,
400 struct dlil_threading_info
*, boolean_t
);
401 static void dlil_input_stats_sync(struct ifnet
*, struct dlil_threading_info
*);
402 static void dlil_input_packet_list_common(struct ifnet
*, struct mbuf
*,
403 u_int32_t
, ifnet_model_t
, boolean_t
);
404 static errno_t
ifnet_input_common(struct ifnet
*, struct mbuf
*, struct mbuf
*,
405 const struct ifnet_stat_increment_param
*, boolean_t
, boolean_t
);
406 static int dlil_is_clat_needed(protocol_family_t
, mbuf_t
);
407 static errno_t
dlil_clat46(ifnet_t
, protocol_family_t
*, mbuf_t
*);
408 static errno_t
dlil_clat64(ifnet_t
, protocol_family_t
*, mbuf_t
*);
409 #if DEBUG || DEVELOPMENT
410 static void dlil_verify_sum16(void);
411 #endif /* DEBUG || DEVELOPMENT */
412 static void dlil_output_cksum_dbg(struct ifnet
*, struct mbuf
*, uint32_t,
414 static void dlil_input_cksum_dbg(struct ifnet
*, struct mbuf
*, char *,
417 static void ifnet_detacher_thread_func(void *, wait_result_t
);
418 static int ifnet_detacher_thread_cont(int);
419 static void ifnet_detach_final(struct ifnet
*);
420 static void ifnet_detaching_enqueue(struct ifnet
*);
421 static struct ifnet
*ifnet_detaching_dequeue(void);
423 static void ifnet_start_thread_fn(void *, wait_result_t
);
424 static void ifnet_poll_thread_fn(void *, wait_result_t
);
425 static void ifnet_poll(struct ifnet
*);
426 static errno_t
ifnet_enqueue_common(struct ifnet
*, void *,
427 classq_pkt_type_t
, boolean_t
, boolean_t
*);
429 static void ifp_src_route_copyout(struct ifnet
*, struct route
*);
430 static void ifp_src_route_copyin(struct ifnet
*, struct route
*);
432 static void ifp_src_route6_copyout(struct ifnet
*, struct route_in6
*);
433 static void ifp_src_route6_copyin(struct ifnet
*, struct route_in6
*);
436 static int sysctl_rxpoll SYSCTL_HANDLER_ARGS
;
437 static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
;
438 static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
;
439 static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
;
440 static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
;
441 static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
;
442 static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
;
443 static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
;
444 static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
;
445 static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
;
446 static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
;
448 struct chain_len_stats tx_chain_len_stats
;
449 static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
;
451 #if TEST_INPUT_THREAD_TERMINATION
452 static int sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
;
453 #endif /* TEST_INPUT_THREAD_TERMINATION */
455 /* The following are protected by dlil_ifnet_lock */
456 static TAILQ_HEAD(, ifnet
) ifnet_detaching_head
;
457 static u_int32_t ifnet_detaching_cnt
;
458 static void *ifnet_delayed_run
; /* wait channel for detaching thread */
460 decl_lck_mtx_data(static, ifnet_fc_lock
);
462 static uint32_t ifnet_flowhash_seed
;
464 struct ifnet_flowhash_key
{
465 char ifk_name
[IFNAMSIZ
];
469 uint32_t ifk_capabilities
;
470 uint32_t ifk_capenable
;
471 uint32_t ifk_output_sched_model
;
476 /* Flow control entry per interface */
477 struct ifnet_fc_entry
{
478 RB_ENTRY(ifnet_fc_entry
) ifce_entry
;
479 u_int32_t ifce_flowhash
;
480 struct ifnet
*ifce_ifp
;
483 static uint32_t ifnet_calc_flowhash(struct ifnet
*);
484 static int ifce_cmp(const struct ifnet_fc_entry
*,
485 const struct ifnet_fc_entry
*);
486 static int ifnet_fc_add(struct ifnet
*);
487 static struct ifnet_fc_entry
*ifnet_fc_get(u_int32_t
);
488 static void ifnet_fc_entry_free(struct ifnet_fc_entry
*);
490 /* protected by ifnet_fc_lock */
491 RB_HEAD(ifnet_fc_tree
, ifnet_fc_entry
) ifnet_fc_tree
;
492 RB_PROTOTYPE(ifnet_fc_tree
, ifnet_fc_entry
, ifce_entry
, ifce_cmp
);
493 RB_GENERATE(ifnet_fc_tree
, ifnet_fc_entry
, ifce_entry
, ifce_cmp
);
495 static unsigned int ifnet_fc_zone_size
; /* sizeof ifnet_fc_entry */
496 static struct zone
*ifnet_fc_zone
; /* ifnet_fc_entry zone */
498 #define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
499 #define IFNET_FC_ZONE_MAX 32
501 extern void bpfdetach(struct ifnet
*);
502 extern void proto_input_run(void);
504 extern uint32_t udp_count_opportunistic(unsigned int ifindex
,
506 extern uint32_t tcp_count_opportunistic(unsigned int ifindex
,
509 __private_extern__
void link_rtrequest(int, struct rtentry
*, struct sockaddr
*);
512 #ifdef CONFIG_EMBEDDED
513 int dlil_lladdr_ckreq
= 1;
515 int dlil_lladdr_ckreq
= 0;
520 int dlil_verbose
= 1;
522 int dlil_verbose
= 0;
524 #if IFNET_INPUT_SANITY_CHK
525 /* sanity checking of input packet lists received */
526 static u_int32_t dlil_input_sanity_check
= 0;
527 #endif /* IFNET_INPUT_SANITY_CHK */
528 /* rate limit debug messages */
529 struct timespec dlil_dbgrate
= { 1, 0 };
531 SYSCTL_DECL(_net_link_generic_system
);
533 SYSCTL_INT(_net_link_generic_system
, OID_AUTO
, dlil_verbose
,
534 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_verbose
, 0, "Log DLIL error messages");
536 #define IF_SNDQ_MINLEN 32
537 u_int32_t if_sndq_maxlen
= IFQ_MAXLEN
;
538 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, sndq_maxlen
,
539 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_sndq_maxlen
, IFQ_MAXLEN
,
540 sysctl_sndq_maxlen
, "I", "Default transmit queue max length");
542 #define IF_RCVQ_MINLEN 32
543 #define IF_RCVQ_MAXLEN 256
544 u_int32_t if_rcvq_maxlen
= IF_RCVQ_MAXLEN
;
545 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rcvq_maxlen
,
546 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rcvq_maxlen
, IFQ_MAXLEN
,
547 sysctl_rcvq_maxlen
, "I", "Default receive queue max length");
549 #define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
550 static u_int32_t if_rxpoll_decay
= IF_RXPOLL_DECAY
;
551 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_decay
,
552 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_decay
, IF_RXPOLL_DECAY
,
553 "ilog2 of EWMA decay rate of avg inbound packets");
555 #define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
556 #define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
557 static u_int64_t if_rxpoll_mode_holdtime
= IF_RXPOLL_MODE_HOLDTIME
;
558 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_freeze_time
,
559 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_mode_holdtime
,
560 IF_RXPOLL_MODE_HOLDTIME
, sysctl_rxpoll_mode_holdtime
,
561 "Q", "input poll mode freeze time");
563 #define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
564 #define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
565 static u_int64_t if_rxpoll_sample_holdtime
= IF_RXPOLL_SAMPLETIME
;
566 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_sample_time
,
567 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_sample_holdtime
,
568 IF_RXPOLL_SAMPLETIME
, sysctl_rxpoll_sample_holdtime
,
569 "Q", "input poll sampling time");
571 #define IF_RXPOLL_INTERVALTIME_MIN (1ULL * 1000) /* 1 us */
572 #define IF_RXPOLL_INTERVALTIME (1ULL * 1000 * 1000) /* 1 ms */
573 static u_int64_t if_rxpoll_interval_time
= IF_RXPOLL_INTERVALTIME
;
574 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_interval_time
,
575 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_interval_time
,
576 IF_RXPOLL_INTERVALTIME
, sysctl_rxpoll_interval_time
,
577 "Q", "input poll interval (time)");
579 #define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
580 static u_int32_t if_rxpoll_interval_pkts
= IF_RXPOLL_INTERVAL_PKTS
;
581 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_interval_pkts
,
582 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_interval_pkts
,
583 IF_RXPOLL_INTERVAL_PKTS
, "input poll interval (packets)");
585 #define IF_RXPOLL_WLOWAT 10
586 static u_int32_t if_rxpoll_wlowat
= IF_RXPOLL_WLOWAT
;
587 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_wakeups_lowat
,
588 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_wlowat
,
589 IF_RXPOLL_WLOWAT
, sysctl_rxpoll_wlowat
,
590 "I", "input poll wakeup low watermark");
592 #define IF_RXPOLL_WHIWAT 100
593 static u_int32_t if_rxpoll_whiwat
= IF_RXPOLL_WHIWAT
;
594 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_wakeups_hiwat
,
595 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_whiwat
,
596 IF_RXPOLL_WHIWAT
, sysctl_rxpoll_whiwat
,
597 "I", "input poll wakeup high watermark");
599 static u_int32_t if_rxpoll_max
= 0; /* 0 (automatic) */
600 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_max
,
601 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_max
, 0,
602 "max packets per poll call");
604 static u_int32_t if_rxpoll
= 1;
605 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll
,
606 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll
, 0,
607 sysctl_rxpoll
, "I", "enable opportunistic input polling");
609 #if TEST_INPUT_THREAD_TERMINATION
610 static u_int32_t if_input_thread_termination_spin
= 0;
611 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, input_thread_termination_spin
,
612 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
613 &if_input_thread_termination_spin
, 0,
614 sysctl_input_thread_termination_spin
,
615 "I", "input thread termination spin limit");
616 #endif /* TEST_INPUT_THREAD_TERMINATION */
618 static u_int32_t cur_dlil_input_threads
= 0;
619 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, dlil_input_threads
,
620 CTLFLAG_RD
| CTLFLAG_LOCKED
, &cur_dlil_input_threads
, 0,
621 "Current number of DLIL input threads");
623 #if IFNET_INPUT_SANITY_CHK
624 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, dlil_input_sanity_check
,
625 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_input_sanity_check
, 0,
626 "Turn on sanity checking in DLIL input");
627 #endif /* IFNET_INPUT_SANITY_CHK */
629 static u_int32_t if_flowadv
= 1;
630 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, flow_advisory
,
631 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_flowadv
, 1,
632 "enable flow-advisory mechanism");
634 static u_int32_t if_delaybased_queue
= 1;
635 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, delaybased_queue
,
636 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_delaybased_queue
, 1,
637 "enable delay based dynamic queue sizing");
639 static uint64_t hwcksum_in_invalidated
= 0;
640 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
641 hwcksum_in_invalidated
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
642 &hwcksum_in_invalidated
, "inbound packets with invalidated hardware cksum");
644 uint32_t hwcksum_dbg
= 0;
645 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg
,
646 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg
, 0,
647 "enable hardware cksum debugging");
649 u_int32_t ifnet_start_delayed
= 0;
650 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, start_delayed
,
651 CTLFLAG_RW
| CTLFLAG_LOCKED
, &ifnet_start_delayed
, 0,
652 "number of times start was delayed");
654 u_int32_t ifnet_delay_start_disabled
= 0;
655 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, start_delay_disabled
,
656 CTLFLAG_RW
| CTLFLAG_LOCKED
, &ifnet_delay_start_disabled
, 0,
657 "number of times start was delayed");
659 #define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
660 #define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
661 #define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
662 #define HWCKSUM_DBG_MASK \
663 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
664 HWCKSUM_DBG_FINALIZE_FORCED)
666 static uint32_t hwcksum_dbg_mode
= 0;
667 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg_mode
,
668 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg_mode
,
669 0, sysctl_hwcksum_dbg_mode
, "I", "hardware cksum debugging mode");
671 static uint64_t hwcksum_dbg_partial_forced
= 0;
672 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
673 hwcksum_dbg_partial_forced
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
674 &hwcksum_dbg_partial_forced
, "packets forced using partial cksum");
676 static uint64_t hwcksum_dbg_partial_forced_bytes
= 0;
677 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
678 hwcksum_dbg_partial_forced_bytes
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
679 &hwcksum_dbg_partial_forced_bytes
, "bytes forced using partial cksum");
681 static uint32_t hwcksum_dbg_partial_rxoff_forced
= 0;
682 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
,
683 hwcksum_dbg_partial_rxoff_forced
, CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
684 &hwcksum_dbg_partial_rxoff_forced
, 0,
685 sysctl_hwcksum_dbg_partial_rxoff_forced
, "I",
686 "forced partial cksum rx offset");
688 static uint32_t hwcksum_dbg_partial_rxoff_adj
= 0;
689 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg_partial_rxoff_adj
,
690 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg_partial_rxoff_adj
,
691 0, sysctl_hwcksum_dbg_partial_rxoff_adj
, "I",
692 "adjusted partial cksum rx offset");
694 static uint64_t hwcksum_dbg_verified
= 0;
695 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
696 hwcksum_dbg_verified
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
697 &hwcksum_dbg_verified
, "packets verified for having good checksum");
699 static uint64_t hwcksum_dbg_bad_cksum
= 0;
700 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
701 hwcksum_dbg_bad_cksum
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
702 &hwcksum_dbg_bad_cksum
, "packets with bad hardware calculated checksum");
704 static uint64_t hwcksum_dbg_bad_rxoff
= 0;
705 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
706 hwcksum_dbg_bad_rxoff
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
707 &hwcksum_dbg_bad_rxoff
, "packets with invalid rxoff");
709 static uint64_t hwcksum_dbg_adjusted
= 0;
710 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
711 hwcksum_dbg_adjusted
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
712 &hwcksum_dbg_adjusted
, "packets with rxoff adjusted");
714 static uint64_t hwcksum_dbg_finalized_hdr
= 0;
715 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
716 hwcksum_dbg_finalized_hdr
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
717 &hwcksum_dbg_finalized_hdr
, "finalized headers");
719 static uint64_t hwcksum_dbg_finalized_data
= 0;
720 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
721 hwcksum_dbg_finalized_data
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
722 &hwcksum_dbg_finalized_data
, "finalized payloads");
724 uint32_t hwcksum_tx
= 1;
725 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_tx
,
726 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_tx
, 0,
727 "enable transmit hardware checksum offload");
729 uint32_t hwcksum_rx
= 1;
730 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_rx
,
731 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_rx
, 0,
732 "enable receive hardware checksum offload");
734 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, tx_chain_len_stats
,
735 CTLFLAG_RD
| CTLFLAG_LOCKED
, 0, 9,
736 sysctl_tx_chain_len_stats
, "S", "");
738 uint32_t tx_chain_len_count
= 0;
739 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, tx_chain_len_count
,
740 CTLFLAG_RW
| CTLFLAG_LOCKED
, &tx_chain_len_count
, 0, "");
742 static uint32_t threshold_notify
= 1; /* enable/disable */
743 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, threshold_notify
,
744 CTLFLAG_RW
| CTLFLAG_LOCKED
, &threshold_notify
, 0, "");
746 static uint32_t threshold_interval
= 2; /* in seconds */
747 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, threshold_interval
,
748 CTLFLAG_RW
| CTLFLAG_LOCKED
, &threshold_interval
, 0, "");
750 #if (DEVELOPMENT || DEBUG)
751 static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
;
752 SYSCTL_NODE(_net_link_generic_system
, OID_AUTO
, get_kao_frames
,
753 CTLFLAG_RD
| CTLFLAG_LOCKED
, sysctl_get_kao_frames
, "");
754 #endif /* DEVELOPMENT || DEBUG */
756 struct net_api_stats net_api_stats
;
757 SYSCTL_STRUCT(_net
, OID_AUTO
, api_stats
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
758 &net_api_stats
, net_api_stats
, "");
761 unsigned int net_rxpoll
= 1;
762 unsigned int net_affinity
= 1;
763 static kern_return_t
dlil_affinity_set(struct thread
*, u_int32_t
);
765 extern u_int32_t inject_buckets
;
767 static lck_grp_attr_t
*dlil_grp_attributes
= NULL
;
768 static lck_attr_t
*dlil_lck_attributes
= NULL
;
770 /* DLIL data threshold thread call */
771 static void dlil_dt_tcall_fn(thread_call_param_t
, thread_call_param_t
);
773 static void dlil_mit_tcall_fn(thread_call_param_t
, thread_call_param_t
);
775 uint32_t dlil_rcv_mit_pkts_min
= 5;
776 uint32_t dlil_rcv_mit_pkts_max
= 64;
777 uint32_t dlil_rcv_mit_interval
= (500 * 1000);
779 #if (DEVELOPMENT || DEBUG)
780 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rcv_mit_pkts_min
,
781 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_rcv_mit_pkts_min
, 0, "");
782 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rcv_mit_pkts_max
,
783 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_rcv_mit_pkts_max
, 0, "");
784 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rcv_mit_interval
,
785 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_rcv_mit_interval
, 0, "");
786 #endif /* DEVELOPMENT || DEBUG */
789 #define DLIL_INPUT_CHECK(m, ifp) { \
790 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
791 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
792 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
793 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
798 #define DLIL_EWMA(old, new, decay) do { \
800 if ((_avg = (old)) > 0) \
801 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
807 #define MBPS (1ULL * 1000 * 1000)
808 #define GBPS (MBPS * 1000)
810 struct rxpoll_time_tbl
{
811 u_int64_t speed
; /* downlink speed */
812 u_int32_t plowat
; /* packets low watermark */
813 u_int32_t phiwat
; /* packets high watermark */
814 u_int32_t blowat
; /* bytes low watermark */
815 u_int32_t bhiwat
; /* bytes high watermark */
818 static struct rxpoll_time_tbl rxpoll_tbl
[] = {
819 { 10 * MBPS
, 2, 8, (1 * 1024), (6 * 1024) },
820 { 100 * MBPS
, 10, 40, (4 * 1024), (64 * 1024) },
821 { 1 * GBPS
, 10, 40, (4 * 1024), (64 * 1024) },
822 { 10 * GBPS
, 10, 40, (4 * 1024), (64 * 1024) },
823 { 100 * GBPS
, 10, 40, (4 * 1024), (64 * 1024) },
828 proto_hash_value(u_int32_t protocol_family
)
831 * dlil_proto_unplumb_all() depends on the mapping between
832 * the hash bucket index and the protocol family defined
833 * here; future changes must be applied there as well.
835 switch (protocol_family
) {
849 * Caller must already be holding ifnet lock.
851 static struct if_proto
*
852 find_attached_proto(struct ifnet
*ifp
, u_int32_t protocol_family
)
854 struct if_proto
*proto
= NULL
;
855 u_int32_t i
= proto_hash_value(protocol_family
);
857 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_OWNED
);
859 if (ifp
->if_proto_hash
!= NULL
) {
860 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
863 while (proto
!= NULL
&& proto
->protocol_family
!= protocol_family
) {
864 proto
= SLIST_NEXT(proto
, next_hash
);
875 if_proto_ref(struct if_proto
*proto
)
877 atomic_add_32(&proto
->refcount
, 1);
880 extern void if_rtproto_del(struct ifnet
*ifp
, int protocol
);
883 if_proto_free(struct if_proto
*proto
)
886 struct ifnet
*ifp
= proto
->ifp
;
887 u_int32_t proto_family
= proto
->protocol_family
;
888 struct kev_dl_proto_data ev_pr_data
;
890 oldval
= atomic_add_32_ov(&proto
->refcount
, -1);
895 /* No more reference on this, protocol must have been detached */
896 VERIFY(proto
->detached
);
898 if (proto
->proto_kpi
== kProtoKPI_v1
) {
899 if (proto
->kpi
.v1
.detached
) {
900 proto
->kpi
.v1
.detached(ifp
, proto
->protocol_family
);
903 if (proto
->proto_kpi
== kProtoKPI_v2
) {
904 if (proto
->kpi
.v2
.detached
) {
905 proto
->kpi
.v2
.detached(ifp
, proto
->protocol_family
);
910 * Cleanup routes that may still be in the routing table for that
911 * interface/protocol pair.
913 if_rtproto_del(ifp
, proto_family
);
916 * The reserved field carries the number of protocol still attached
917 * (subject to change)
919 ifnet_lock_shared(ifp
);
920 ev_pr_data
.proto_family
= proto_family
;
921 ev_pr_data
.proto_remaining_count
= dlil_ifp_protolist(ifp
, NULL
, 0);
922 ifnet_lock_done(ifp
);
924 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_DETACHED
,
925 (struct net_event_data
*)&ev_pr_data
,
926 sizeof(struct kev_dl_proto_data
));
928 if (ev_pr_data
.proto_remaining_count
== 0) {
930 * The protocol count has gone to zero, mark the interface down.
931 * This used to be done by configd.KernelEventMonitor, but that
932 * is inherently prone to races (rdar://problem/30810208).
934 (void) ifnet_set_flags(ifp
, 0, IFF_UP
);
935 (void) ifnet_ioctl(ifp
, 0, SIOCSIFFLAGS
, NULL
);
936 dlil_post_sifflags_msg(ifp
);
939 zfree(dlif_proto_zone
, proto
);
942 __private_extern__
void
943 ifnet_lock_assert(struct ifnet
*ifp
, ifnet_lock_assert_t what
)
948 unsigned int type
= 0;
952 case IFNET_LCK_ASSERT_EXCLUSIVE
:
953 type
= LCK_RW_ASSERT_EXCLUSIVE
;
956 case IFNET_LCK_ASSERT_SHARED
:
957 type
= LCK_RW_ASSERT_SHARED
;
960 case IFNET_LCK_ASSERT_OWNED
:
961 type
= LCK_RW_ASSERT_HELD
;
964 case IFNET_LCK_ASSERT_NOTOWNED
:
965 /* nothing to do here for RW lock; bypass assert */
970 panic("bad ifnet assert type: %d", what
);
974 LCK_RW_ASSERT(&ifp
->if_lock
, type
);
978 __private_extern__
void
979 ifnet_lock_shared(struct ifnet
*ifp
)
981 lck_rw_lock_shared(&ifp
->if_lock
);
984 __private_extern__
void
985 ifnet_lock_exclusive(struct ifnet
*ifp
)
987 lck_rw_lock_exclusive(&ifp
->if_lock
);
990 __private_extern__
void
991 ifnet_lock_done(struct ifnet
*ifp
)
993 lck_rw_done(&ifp
->if_lock
);
997 __private_extern__
void
998 if_inetdata_lock_shared(struct ifnet
*ifp
)
1000 lck_rw_lock_shared(&ifp
->if_inetdata_lock
);
1003 __private_extern__
void
1004 if_inetdata_lock_exclusive(struct ifnet
*ifp
)
1006 lck_rw_lock_exclusive(&ifp
->if_inetdata_lock
);
1009 __private_extern__
void
1010 if_inetdata_lock_done(struct ifnet
*ifp
)
1012 lck_rw_done(&ifp
->if_inetdata_lock
);
1017 __private_extern__
void
1018 if_inet6data_lock_shared(struct ifnet
*ifp
)
1020 lck_rw_lock_shared(&ifp
->if_inet6data_lock
);
1023 __private_extern__
void
1024 if_inet6data_lock_exclusive(struct ifnet
*ifp
)
1026 lck_rw_lock_exclusive(&ifp
->if_inet6data_lock
);
1029 __private_extern__
void
1030 if_inet6data_lock_done(struct ifnet
*ifp
)
1032 lck_rw_done(&ifp
->if_inet6data_lock
);
1036 __private_extern__
void
1037 ifnet_head_lock_shared(void)
1039 lck_rw_lock_shared(&ifnet_head_lock
);
1042 __private_extern__
void
1043 ifnet_head_lock_exclusive(void)
1045 lck_rw_lock_exclusive(&ifnet_head_lock
);
1048 __private_extern__
void
1049 ifnet_head_done(void)
1051 lck_rw_done(&ifnet_head_lock
);
1054 __private_extern__
void
1055 ifnet_head_assert_exclusive(void)
1057 LCK_RW_ASSERT(&ifnet_head_lock
, LCK_RW_ASSERT_EXCLUSIVE
);
1061 * dlil_ifp_protolist
1062 * - get the list of protocols attached to the interface, or just the number
1063 * of attached protocols
1064 * - if the number returned is greater than 'list_count', truncation occurred
1067 * - caller must already be holding ifnet lock.
1070 dlil_ifp_protolist(struct ifnet
*ifp
, protocol_family_t
*list
,
1071 u_int32_t list_count
)
1073 u_int32_t count
= 0;
1076 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_OWNED
);
1078 if (ifp
->if_proto_hash
== NULL
) {
1082 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
1083 struct if_proto
*proto
;
1084 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
], next_hash
) {
1085 if (list
!= NULL
&& count
< list_count
) {
1086 list
[count
] = proto
->protocol_family
;
1095 __private_extern__ u_int32_t
1096 if_get_protolist(struct ifnet
* ifp
, u_int32_t
*protolist
, u_int32_t count
)
1098 ifnet_lock_shared(ifp
);
1099 count
= dlil_ifp_protolist(ifp
, protolist
, count
);
1100 ifnet_lock_done(ifp
);
1104 __private_extern__
void
1105 if_free_protolist(u_int32_t
*list
)
1107 _FREE(list
, M_TEMP
);
1110 __private_extern__
void
1111 dlil_post_msg(struct ifnet
*ifp
, u_int32_t event_subclass
,
1112 u_int32_t event_code
, struct net_event_data
*event_data
,
1113 u_int32_t event_data_len
)
1115 struct net_event_data ev_data
;
1116 struct kev_msg ev_msg
;
1118 bzero(&ev_msg
, sizeof(ev_msg
));
1119 bzero(&ev_data
, sizeof(ev_data
));
1121 * a net event always starts with a net_event_data structure
1122 * but the caller can generate a simple net event or
1123 * provide a longer event structure to post
1125 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
1126 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
1127 ev_msg
.kev_subclass
= event_subclass
;
1128 ev_msg
.event_code
= event_code
;
1130 if (event_data
== NULL
) {
1131 event_data
= &ev_data
;
1132 event_data_len
= sizeof(struct net_event_data
);
1135 strlcpy(&event_data
->if_name
[0], ifp
->if_name
, IFNAMSIZ
);
1136 event_data
->if_family
= ifp
->if_family
;
1137 event_data
->if_unit
= (u_int32_t
)ifp
->if_unit
;
1139 ev_msg
.dv
[0].data_length
= event_data_len
;
1140 ev_msg
.dv
[0].data_ptr
= event_data
;
1141 ev_msg
.dv
[1].data_length
= 0;
1143 /* Don't update interface generation for quality and RRC state changess */
1144 bool update_generation
= (event_subclass
!= KEV_DL_SUBCLASS
||
1145 (event_code
!= KEV_DL_LINK_QUALITY_METRIC_CHANGED
&&
1146 event_code
!= KEV_DL_RRC_STATE_CHANGED
));
1148 dlil_event_internal(ifp
, &ev_msg
, update_generation
);
1151 __private_extern__
int
1152 dlil_alloc_local_stats(struct ifnet
*ifp
)
1155 void *buf
, *base
, **pbuf
;
1161 if (ifp
->if_tcp_stat
== NULL
&& ifp
->if_udp_stat
== NULL
) {
1162 /* allocate tcpstat_local structure */
1163 buf
= zalloc(dlif_tcpstat_zone
);
1168 bzero(buf
, dlif_tcpstat_bufsize
);
1170 /* Get the 64-bit aligned base address for this object */
1171 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof(u_int64_t
),
1173 VERIFY(((intptr_t)base
+ dlif_tcpstat_size
) <=
1174 ((intptr_t)buf
+ dlif_tcpstat_bufsize
));
1177 * Wind back a pointer size from the aligned base and
1178 * save the original address so we can free it later.
1180 pbuf
= (void **)((intptr_t)base
- sizeof(void *));
1182 ifp
->if_tcp_stat
= base
;
1184 /* allocate udpstat_local structure */
1185 buf
= zalloc(dlif_udpstat_zone
);
1190 bzero(buf
, dlif_udpstat_bufsize
);
1192 /* Get the 64-bit aligned base address for this object */
1193 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof(u_int64_t
),
1195 VERIFY(((intptr_t)base
+ dlif_udpstat_size
) <=
1196 ((intptr_t)buf
+ dlif_udpstat_bufsize
));
1199 * Wind back a pointer size from the aligned base and
1200 * save the original address so we can free it later.
1202 pbuf
= (void **)((intptr_t)base
- sizeof(void *));
1204 ifp
->if_udp_stat
= base
;
1206 VERIFY(IS_P2ALIGNED(ifp
->if_tcp_stat
, sizeof(u_int64_t
)) &&
1207 IS_P2ALIGNED(ifp
->if_udp_stat
, sizeof(u_int64_t
)));
1212 if (ifp
->if_ipv4_stat
== NULL
) {
1213 MALLOC(ifp
->if_ipv4_stat
, struct if_tcp_ecn_stat
*,
1214 sizeof(struct if_tcp_ecn_stat
), M_TEMP
, M_WAITOK
| M_ZERO
);
1215 if (ifp
->if_ipv4_stat
== NULL
) {
1221 if (ifp
->if_ipv6_stat
== NULL
) {
1222 MALLOC(ifp
->if_ipv6_stat
, struct if_tcp_ecn_stat
*,
1223 sizeof(struct if_tcp_ecn_stat
), M_TEMP
, M_WAITOK
| M_ZERO
);
1224 if (ifp
->if_ipv6_stat
== NULL
) {
1231 if (ifp
->if_tcp_stat
!= NULL
) {
1233 ((intptr_t)ifp
->if_tcp_stat
- sizeof(void *));
1234 zfree(dlif_tcpstat_zone
, *pbuf
);
1235 ifp
->if_tcp_stat
= NULL
;
1237 if (ifp
->if_udp_stat
!= NULL
) {
1239 ((intptr_t)ifp
->if_udp_stat
- sizeof(void *));
1240 zfree(dlif_udpstat_zone
, *pbuf
);
1241 ifp
->if_udp_stat
= NULL
;
1243 if (ifp
->if_ipv4_stat
!= NULL
) {
1244 FREE(ifp
->if_ipv4_stat
, M_TEMP
);
1245 ifp
->if_ipv4_stat
= NULL
;
1247 if (ifp
->if_ipv6_stat
!= NULL
) {
1248 FREE(ifp
->if_ipv6_stat
, M_TEMP
);
1249 ifp
->if_ipv6_stat
= NULL
;
1257 dlil_create_input_thread(ifnet_t ifp
, struct dlil_threading_info
*inp
)
1259 thread_continue_t func
;
1263 /* NULL ifp indicates the main input thread, called at dlil_init time */
1265 func
= dlil_main_input_thread_func
;
1266 VERIFY(inp
== dlil_main_input_thread
);
1267 (void) strlcat(inp
->input_name
,
1268 "main_input", DLIL_THREADNAME_LEN
);
1269 } else if (net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) {
1270 func
= dlil_rxpoll_input_thread_func
;
1271 VERIFY(inp
!= dlil_main_input_thread
);
1272 (void) snprintf(inp
->input_name
, DLIL_THREADNAME_LEN
,
1273 "%s_input_poll", if_name(ifp
));
1275 func
= dlil_input_thread_func
;
1276 VERIFY(inp
!= dlil_main_input_thread
);
1277 (void) snprintf(inp
->input_name
, DLIL_THREADNAME_LEN
,
1278 "%s_input", if_name(ifp
));
1280 VERIFY(inp
->input_thr
== THREAD_NULL
);
1282 inp
->lck_grp
= lck_grp_alloc_init(inp
->input_name
, dlil_grp_attributes
);
1283 lck_mtx_init(&inp
->input_lck
, inp
->lck_grp
, dlil_lck_attributes
);
1285 inp
->mode
= IFNET_MODEL_INPUT_POLL_OFF
;
1286 inp
->ifp
= ifp
; /* NULL for main input thread */
1288 net_timerclear(&inp
->mode_holdtime
);
1289 net_timerclear(&inp
->mode_lasttime
);
1290 net_timerclear(&inp
->sample_holdtime
);
1291 net_timerclear(&inp
->sample_lasttime
);
1292 net_timerclear(&inp
->dbg_lasttime
);
1295 * For interfaces that support opportunistic polling, set the
1296 * low and high watermarks for outstanding inbound packets/bytes.
1297 * Also define freeze times for transitioning between modes
1298 * and updating the average.
1300 if (ifp
!= NULL
&& net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) {
1301 limit
= MAX(if_rcvq_maxlen
, IF_RCVQ_MINLEN
);
1302 (void) dlil_rxpoll_set_params(ifp
, NULL
, FALSE
);
1304 limit
= (u_int32_t
)-1;
1307 _qinit(&inp
->rcvq_pkts
, Q_DROPTAIL
, limit
, QP_MBUF
);
1308 if (inp
== dlil_main_input_thread
) {
1309 struct dlil_main_threading_info
*inpm
=
1310 (struct dlil_main_threading_info
*)inp
;
1311 _qinit(&inpm
->lo_rcvq_pkts
, Q_DROPTAIL
, limit
, QP_MBUF
);
1314 error
= kernel_thread_start(func
, inp
, &inp
->input_thr
);
1315 if (error
== KERN_SUCCESS
) {
1316 ml_thread_policy(inp
->input_thr
, MACHINE_GROUP
,
1317 (MACHINE_NETWORK_GROUP
| MACHINE_NETWORK_NETISR
));
1319 * We create an affinity set so that the matching workloop
1320 * thread or the starter thread (for loopback) can be
1321 * scheduled on the same processor set as the input thread.
1324 struct thread
*tp
= inp
->input_thr
;
1327 * Randomize to reduce the probability
1328 * of affinity tag namespace collision.
1330 read_frandom(&tag
, sizeof(tag
));
1331 if (dlil_affinity_set(tp
, tag
) == KERN_SUCCESS
) {
1332 thread_reference(tp
);
1334 inp
->net_affinity
= TRUE
;
1337 } else if (inp
== dlil_main_input_thread
) {
1338 panic_plain("%s: couldn't create main input thread", __func__
);
1341 panic_plain("%s: couldn't create %s input thread", __func__
,
1345 OSAddAtomic(1, &cur_dlil_input_threads
);
1350 #if TEST_INPUT_THREAD_TERMINATION
1352 sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
1354 #pragma unused(arg1, arg2)
1358 i
= if_input_thread_termination_spin
;
1360 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
1361 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
1365 if (net_rxpoll
== 0) {
1369 if_input_thread_termination_spin
= i
;
1372 #endif /* TEST_INPUT_THREAD_TERMINATION */
1375 dlil_clean_threading_info(struct dlil_threading_info
*inp
)
1377 lck_mtx_destroy(&inp
->input_lck
, inp
->lck_grp
);
1378 lck_grp_free(inp
->lck_grp
);
1380 inp
->input_waiting
= 0;
1382 bzero(inp
->input_name
, sizeof(inp
->input_name
));
1384 VERIFY(qhead(&inp
->rcvq_pkts
) == NULL
&& qempty(&inp
->rcvq_pkts
));
1385 qlimit(&inp
->rcvq_pkts
) = 0;
1386 bzero(&inp
->stats
, sizeof(inp
->stats
));
1388 VERIFY(!inp
->net_affinity
);
1389 inp
->input_thr
= THREAD_NULL
;
1390 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
1391 VERIFY(inp
->poll_thr
== THREAD_NULL
);
1392 VERIFY(inp
->tag
== 0);
1394 inp
->mode
= IFNET_MODEL_INPUT_POLL_OFF
;
1395 bzero(&inp
->tstats
, sizeof(inp
->tstats
));
1396 bzero(&inp
->pstats
, sizeof(inp
->pstats
));
1397 bzero(&inp
->sstats
, sizeof(inp
->sstats
));
1399 net_timerclear(&inp
->mode_holdtime
);
1400 net_timerclear(&inp
->mode_lasttime
);
1401 net_timerclear(&inp
->sample_holdtime
);
1402 net_timerclear(&inp
->sample_lasttime
);
1403 net_timerclear(&inp
->dbg_lasttime
);
1405 #if IFNET_INPUT_SANITY_CHK
1406 inp
->input_mbuf_cnt
= 0;
1407 #endif /* IFNET_INPUT_SANITY_CHK */
1411 dlil_terminate_input_thread(struct dlil_threading_info
*inp
)
1413 struct ifnet
*ifp
= inp
->ifp
;
1415 VERIFY(current_thread() == inp
->input_thr
);
1416 VERIFY(inp
!= dlil_main_input_thread
);
1418 OSAddAtomic(-1, &cur_dlil_input_threads
);
1420 #if TEST_INPUT_THREAD_TERMINATION
1421 { /* do something useless that won't get optimized away */
1423 for (uint32_t i
= 0;
1424 i
< if_input_thread_termination_spin
;
1428 printf("the value is %d\n", v
);
1430 #endif /* TEST_INPUT_THREAD_TERMINATION */
1432 lck_mtx_lock_spin(&inp
->input_lck
);
1433 VERIFY((inp
->input_waiting
& DLIL_INPUT_TERMINATE
) != 0);
1434 inp
->input_waiting
|= DLIL_INPUT_TERMINATE_COMPLETE
;
1435 wakeup_one((caddr_t
)&inp
->input_waiting
);
1436 lck_mtx_unlock(&inp
->input_lck
);
1438 /* for the extra refcnt from kernel_thread_start() */
1439 thread_deallocate(current_thread());
1442 printf("%s: input thread terminated\n",
1446 /* this is the end */
1447 thread_terminate(current_thread());
1451 static kern_return_t
1452 dlil_affinity_set(struct thread
*tp
, u_int32_t tag
)
1454 thread_affinity_policy_data_t policy
;
1456 bzero(&policy
, sizeof(policy
));
1457 policy
.affinity_tag
= tag
;
1458 return thread_policy_set(tp
, THREAD_AFFINITY_POLICY
,
1459 (thread_policy_t
)&policy
, THREAD_AFFINITY_POLICY_COUNT
);
1465 thread_t thread
= THREAD_NULL
;
1468 * The following fields must be 64-bit aligned for atomic operations.
1470 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets
);
1471 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors
);
1472 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets
);
1473 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors
);
1474 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions
);
1475 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes
);
1476 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes
);
1477 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts
);
1478 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts
);
1479 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops
);
1480 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto
);
1481 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs
);
1482 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes
);
1483 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets
);
1484 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes
);
1486 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets
);
1487 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors
);
1488 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets
);
1489 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors
);
1490 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions
);
1491 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes
);
1492 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes
);
1493 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts
);
1494 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts
);
1495 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops
);
1496 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto
);
1497 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs
);
1498 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes
);
1499 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets
);
1500 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes
);
1503 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1505 _CASSERT(IF_HWASSIST_CSUM_IP
== IFNET_CSUM_IP
);
1506 _CASSERT(IF_HWASSIST_CSUM_TCP
== IFNET_CSUM_TCP
);
1507 _CASSERT(IF_HWASSIST_CSUM_UDP
== IFNET_CSUM_UDP
);
1508 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS
== IFNET_CSUM_FRAGMENT
);
1509 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT
== IFNET_IP_FRAGMENT
);
1510 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6
== IFNET_CSUM_TCPIPV6
);
1511 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6
== IFNET_CSUM_UDPIPV6
);
1512 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6
== IFNET_IPV6_FRAGMENT
);
1513 _CASSERT(IF_HWASSIST_CSUM_PARTIAL
== IFNET_CSUM_PARTIAL
);
1514 _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT
== IFNET_CSUM_ZERO_INVERT
);
1515 _CASSERT(IF_HWASSIST_VLAN_TAGGING
== IFNET_VLAN_TAGGING
);
1516 _CASSERT(IF_HWASSIST_VLAN_MTU
== IFNET_VLAN_MTU
);
1517 _CASSERT(IF_HWASSIST_TSO_V4
== IFNET_TSO_IPV4
);
1518 _CASSERT(IF_HWASSIST_TSO_V6
== IFNET_TSO_IPV6
);
1521 * ... as well as the mbuf checksum flags counterparts.
1523 _CASSERT(CSUM_IP
== IF_HWASSIST_CSUM_IP
);
1524 _CASSERT(CSUM_TCP
== IF_HWASSIST_CSUM_TCP
);
1525 _CASSERT(CSUM_UDP
== IF_HWASSIST_CSUM_UDP
);
1526 _CASSERT(CSUM_IP_FRAGS
== IF_HWASSIST_CSUM_IP_FRAGS
);
1527 _CASSERT(CSUM_FRAGMENT
== IF_HWASSIST_CSUM_FRAGMENT
);
1528 _CASSERT(CSUM_TCPIPV6
== IF_HWASSIST_CSUM_TCPIPV6
);
1529 _CASSERT(CSUM_UDPIPV6
== IF_HWASSIST_CSUM_UDPIPV6
);
1530 _CASSERT(CSUM_FRAGMENT_IPV6
== IF_HWASSIST_CSUM_FRAGMENT_IPV6
);
1531 _CASSERT(CSUM_PARTIAL
== IF_HWASSIST_CSUM_PARTIAL
);
1532 _CASSERT(CSUM_ZERO_INVERT
== IF_HWASSIST_CSUM_ZERO_INVERT
);
1533 _CASSERT(CSUM_VLAN_TAG_VALID
== IF_HWASSIST_VLAN_TAGGING
);
1536 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1538 _CASSERT(IF_LLREACH_MAXLEN
<= IF_LLREACHINFO_ADDRLEN
);
1539 _CASSERT(IFNET_LLREACHINFO_ADDRLEN
== IF_LLREACHINFO_ADDRLEN
);
1541 _CASSERT(IFRLOGF_DLIL
== IFNET_LOGF_DLIL
);
1542 _CASSERT(IFRLOGF_FAMILY
== IFNET_LOGF_FAMILY
);
1543 _CASSERT(IFRLOGF_DRIVER
== IFNET_LOGF_DRIVER
);
1544 _CASSERT(IFRLOGF_FIRMWARE
== IFNET_LOGF_FIRMWARE
);
1546 _CASSERT(IFRLOGCAT_CONNECTIVITY
== IFNET_LOGCAT_CONNECTIVITY
);
1547 _CASSERT(IFRLOGCAT_QUALITY
== IFNET_LOGCAT_QUALITY
);
1548 _CASSERT(IFRLOGCAT_PERFORMANCE
== IFNET_LOGCAT_PERFORMANCE
);
1550 _CASSERT(IFRTYPE_FAMILY_ANY
== IFNET_FAMILY_ANY
);
1551 _CASSERT(IFRTYPE_FAMILY_LOOPBACK
== IFNET_FAMILY_LOOPBACK
);
1552 _CASSERT(IFRTYPE_FAMILY_ETHERNET
== IFNET_FAMILY_ETHERNET
);
1553 _CASSERT(IFRTYPE_FAMILY_SLIP
== IFNET_FAMILY_SLIP
);
1554 _CASSERT(IFRTYPE_FAMILY_TUN
== IFNET_FAMILY_TUN
);
1555 _CASSERT(IFRTYPE_FAMILY_VLAN
== IFNET_FAMILY_VLAN
);
1556 _CASSERT(IFRTYPE_FAMILY_PPP
== IFNET_FAMILY_PPP
);
1557 _CASSERT(IFRTYPE_FAMILY_PVC
== IFNET_FAMILY_PVC
);
1558 _CASSERT(IFRTYPE_FAMILY_DISC
== IFNET_FAMILY_DISC
);
1559 _CASSERT(IFRTYPE_FAMILY_MDECAP
== IFNET_FAMILY_MDECAP
);
1560 _CASSERT(IFRTYPE_FAMILY_GIF
== IFNET_FAMILY_GIF
);
1561 _CASSERT(IFRTYPE_FAMILY_FAITH
== IFNET_FAMILY_FAITH
);
1562 _CASSERT(IFRTYPE_FAMILY_STF
== IFNET_FAMILY_STF
);
1563 _CASSERT(IFRTYPE_FAMILY_FIREWIRE
== IFNET_FAMILY_FIREWIRE
);
1564 _CASSERT(IFRTYPE_FAMILY_BOND
== IFNET_FAMILY_BOND
);
1565 _CASSERT(IFRTYPE_FAMILY_CELLULAR
== IFNET_FAMILY_CELLULAR
);
1567 _CASSERT(IFRTYPE_SUBFAMILY_ANY
== IFNET_SUBFAMILY_ANY
);
1568 _CASSERT(IFRTYPE_SUBFAMILY_USB
== IFNET_SUBFAMILY_USB
);
1569 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH
== IFNET_SUBFAMILY_BLUETOOTH
);
1570 _CASSERT(IFRTYPE_SUBFAMILY_WIFI
== IFNET_SUBFAMILY_WIFI
);
1571 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT
== IFNET_SUBFAMILY_THUNDERBOLT
);
1572 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED
== IFNET_SUBFAMILY_RESERVED
);
1573 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC
== IFNET_SUBFAMILY_INTCOPROC
);
1575 _CASSERT(DLIL_MODIDLEN
== IFNET_MODIDLEN
);
1576 _CASSERT(DLIL_MODARGLEN
== IFNET_MODARGLEN
);
1578 PE_parse_boot_argn("net_affinity", &net_affinity
,
1579 sizeof(net_affinity
));
1581 PE_parse_boot_argn("net_rxpoll", &net_rxpoll
, sizeof(net_rxpoll
));
1583 PE_parse_boot_argn("net_rtref", &net_rtref
, sizeof(net_rtref
));
1585 PE_parse_boot_argn("ifnet_debug", &ifnet_debug
, sizeof(ifnet_debug
));
1587 dlif_size
= (ifnet_debug
== 0) ? sizeof(struct dlil_ifnet
) :
1588 sizeof(struct dlil_ifnet_dbg
);
1589 /* Enforce 64-bit alignment for dlil_ifnet structure */
1590 dlif_bufsize
= dlif_size
+ sizeof(void *) + sizeof(u_int64_t
);
1591 dlif_bufsize
= P2ROUNDUP(dlif_bufsize
, sizeof(u_int64_t
));
1592 dlif_zone
= zinit(dlif_bufsize
, DLIF_ZONE_MAX
* dlif_bufsize
,
1594 if (dlif_zone
== NULL
) {
1595 panic_plain("%s: failed allocating %s", __func__
,
1599 zone_change(dlif_zone
, Z_EXPAND
, TRUE
);
1600 zone_change(dlif_zone
, Z_CALLERACCT
, FALSE
);
1602 dlif_filt_size
= sizeof(struct ifnet_filter
);
1603 dlif_filt_zone
= zinit(dlif_filt_size
,
1604 DLIF_FILT_ZONE_MAX
* dlif_filt_size
, 0, DLIF_FILT_ZONE_NAME
);
1605 if (dlif_filt_zone
== NULL
) {
1606 panic_plain("%s: failed allocating %s", __func__
,
1607 DLIF_FILT_ZONE_NAME
);
1610 zone_change(dlif_filt_zone
, Z_EXPAND
, TRUE
);
1611 zone_change(dlif_filt_zone
, Z_CALLERACCT
, FALSE
);
1613 dlif_phash_size
= sizeof(struct proto_hash_entry
) * PROTO_HASH_SLOTS
;
1614 dlif_phash_zone
= zinit(dlif_phash_size
,
1615 DLIF_PHASH_ZONE_MAX
* dlif_phash_size
, 0, DLIF_PHASH_ZONE_NAME
);
1616 if (dlif_phash_zone
== NULL
) {
1617 panic_plain("%s: failed allocating %s", __func__
,
1618 DLIF_PHASH_ZONE_NAME
);
1621 zone_change(dlif_phash_zone
, Z_EXPAND
, TRUE
);
1622 zone_change(dlif_phash_zone
, Z_CALLERACCT
, FALSE
);
1624 dlif_proto_size
= sizeof(struct if_proto
);
1625 dlif_proto_zone
= zinit(dlif_proto_size
,
1626 DLIF_PROTO_ZONE_MAX
* dlif_proto_size
, 0, DLIF_PROTO_ZONE_NAME
);
1627 if (dlif_proto_zone
== NULL
) {
1628 panic_plain("%s: failed allocating %s", __func__
,
1629 DLIF_PROTO_ZONE_NAME
);
1632 zone_change(dlif_proto_zone
, Z_EXPAND
, TRUE
);
1633 zone_change(dlif_proto_zone
, Z_CALLERACCT
, FALSE
);
1635 dlif_tcpstat_size
= sizeof(struct tcpstat_local
);
1636 /* Enforce 64-bit alignment for tcpstat_local structure */
1637 dlif_tcpstat_bufsize
=
1638 dlif_tcpstat_size
+ sizeof(void *) + sizeof(u_int64_t
);
1639 dlif_tcpstat_bufsize
=
1640 P2ROUNDUP(dlif_tcpstat_bufsize
, sizeof(u_int64_t
));
1641 dlif_tcpstat_zone
= zinit(dlif_tcpstat_bufsize
,
1642 DLIF_TCPSTAT_ZONE_MAX
* dlif_tcpstat_bufsize
, 0,
1643 DLIF_TCPSTAT_ZONE_NAME
);
1644 if (dlif_tcpstat_zone
== NULL
) {
1645 panic_plain("%s: failed allocating %s", __func__
,
1646 DLIF_TCPSTAT_ZONE_NAME
);
1649 zone_change(dlif_tcpstat_zone
, Z_EXPAND
, TRUE
);
1650 zone_change(dlif_tcpstat_zone
, Z_CALLERACCT
, FALSE
);
1652 dlif_udpstat_size
= sizeof(struct udpstat_local
);
1653 /* Enforce 64-bit alignment for udpstat_local structure */
1654 dlif_udpstat_bufsize
=
1655 dlif_udpstat_size
+ sizeof(void *) + sizeof(u_int64_t
);
1656 dlif_udpstat_bufsize
=
1657 P2ROUNDUP(dlif_udpstat_bufsize
, sizeof(u_int64_t
));
1658 dlif_udpstat_zone
= zinit(dlif_udpstat_bufsize
,
1659 DLIF_TCPSTAT_ZONE_MAX
* dlif_udpstat_bufsize
, 0,
1660 DLIF_UDPSTAT_ZONE_NAME
);
1661 if (dlif_udpstat_zone
== NULL
) {
1662 panic_plain("%s: failed allocating %s", __func__
,
1663 DLIF_UDPSTAT_ZONE_NAME
);
1666 zone_change(dlif_udpstat_zone
, Z_EXPAND
, TRUE
);
1667 zone_change(dlif_udpstat_zone
, Z_CALLERACCT
, FALSE
);
1669 ifnet_llreach_init();
1670 eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt
);
1672 TAILQ_INIT(&dlil_ifnet_head
);
1673 TAILQ_INIT(&ifnet_head
);
1674 TAILQ_INIT(&ifnet_detaching_head
);
1675 TAILQ_INIT(&ifnet_ordered_head
);
1677 /* Setup the lock groups we will use */
1678 dlil_grp_attributes
= lck_grp_attr_alloc_init();
1680 dlil_lock_group
= lck_grp_alloc_init("DLIL internal locks",
1681 dlil_grp_attributes
);
1682 ifnet_lock_group
= lck_grp_alloc_init("ifnet locks",
1683 dlil_grp_attributes
);
1684 ifnet_head_lock_group
= lck_grp_alloc_init("ifnet head lock",
1685 dlil_grp_attributes
);
1686 ifnet_rcv_lock_group
= lck_grp_alloc_init("ifnet rcv locks",
1687 dlil_grp_attributes
);
1688 ifnet_snd_lock_group
= lck_grp_alloc_init("ifnet snd locks",
1689 dlil_grp_attributes
);
1691 /* Setup the lock attributes we will use */
1692 dlil_lck_attributes
= lck_attr_alloc_init();
1694 ifnet_lock_attr
= lck_attr_alloc_init();
1696 lck_rw_init(&ifnet_head_lock
, ifnet_head_lock_group
,
1697 dlil_lck_attributes
);
1698 lck_mtx_init(&dlil_ifnet_lock
, dlil_lock_group
, dlil_lck_attributes
);
1700 /* Setup interface flow control related items */
1701 lck_mtx_init(&ifnet_fc_lock
, dlil_lock_group
, dlil_lck_attributes
);
1703 ifnet_fc_zone_size
= sizeof(struct ifnet_fc_entry
);
1704 ifnet_fc_zone
= zinit(ifnet_fc_zone_size
,
1705 IFNET_FC_ZONE_MAX
* ifnet_fc_zone_size
, 0, IFNET_FC_ZONE_NAME
);
1706 if (ifnet_fc_zone
== NULL
) {
1707 panic_plain("%s: failed allocating %s", __func__
,
1708 IFNET_FC_ZONE_NAME
);
1711 zone_change(ifnet_fc_zone
, Z_EXPAND
, TRUE
);
1712 zone_change(ifnet_fc_zone
, Z_CALLERACCT
, FALSE
);
1714 /* Initialize interface address subsystem */
1718 /* Initialize the packet filter */
1722 /* Initialize queue algorithms */
1725 /* Initialize packet schedulers */
1728 /* Initialize flow advisory subsystem */
1731 /* Initialize the pktap virtual interface */
1734 /* Initialize the service class to dscp map */
1737 /* Initialize the interface port list */
1738 if_ports_used_init();
1740 /* Initialize the interface low power mode event handler */
1741 if_low_power_evhdlr_init();
1743 #if DEBUG || DEVELOPMENT
1744 /* Run self-tests */
1745 dlil_verify_sum16();
1746 #endif /* DEBUG || DEVELOPMENT */
1748 /* Initialize link layer table */
1749 lltable_glbl_init();
1752 * Create and start up the main DLIL input thread and the interface
1753 * detacher threads once everything is initialized.
1755 dlil_create_input_thread(NULL
, dlil_main_input_thread
);
1757 if (kernel_thread_start(ifnet_detacher_thread_func
,
1758 NULL
, &thread
) != KERN_SUCCESS
) {
1759 panic_plain("%s: couldn't create detacher thread", __func__
);
1762 thread_deallocate(thread
);
1766 if_flt_monitor_busy(struct ifnet
*ifp
)
1768 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1771 VERIFY(ifp
->if_flt_busy
!= 0);
1775 if_flt_monitor_unbusy(struct ifnet
*ifp
)
1777 if_flt_monitor_leave(ifp
);
1781 if_flt_monitor_enter(struct ifnet
*ifp
)
1783 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1785 while (ifp
->if_flt_busy
) {
1786 ++ifp
->if_flt_waiters
;
1787 (void) msleep(&ifp
->if_flt_head
, &ifp
->if_flt_lock
,
1788 (PZERO
- 1), "if_flt_monitor", NULL
);
1790 if_flt_monitor_busy(ifp
);
1794 if_flt_monitor_leave(struct ifnet
*ifp
)
1796 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1798 VERIFY(ifp
->if_flt_busy
!= 0);
1801 if (ifp
->if_flt_busy
== 0 && ifp
->if_flt_waiters
> 0) {
1802 ifp
->if_flt_waiters
= 0;
1803 wakeup(&ifp
->if_flt_head
);
1807 __private_extern__
int
1808 dlil_attach_filter(struct ifnet
*ifp
, const struct iff_filter
*if_filter
,
1809 interface_filter_t
*filter_ref
, u_int32_t flags
)
1812 struct ifnet_filter
*filter
= NULL
;
1814 ifnet_head_lock_shared();
1815 /* Check that the interface is in the global list */
1816 if (!ifnet_lookup(ifp
)) {
1821 filter
= zalloc(dlif_filt_zone
);
1822 if (filter
== NULL
) {
1826 bzero(filter
, dlif_filt_size
);
1828 /* refcnt held above during lookup */
1829 filter
->filt_flags
= flags
;
1830 filter
->filt_ifp
= ifp
;
1831 filter
->filt_cookie
= if_filter
->iff_cookie
;
1832 filter
->filt_name
= if_filter
->iff_name
;
1833 filter
->filt_protocol
= if_filter
->iff_protocol
;
1835 * Do not install filter callbacks for internal coproc interface
1837 if (!IFNET_IS_INTCOPROC(ifp
)) {
1838 filter
->filt_input
= if_filter
->iff_input
;
1839 filter
->filt_output
= if_filter
->iff_output
;
1840 filter
->filt_event
= if_filter
->iff_event
;
1841 filter
->filt_ioctl
= if_filter
->iff_ioctl
;
1843 filter
->filt_detached
= if_filter
->iff_detached
;
1845 lck_mtx_lock(&ifp
->if_flt_lock
);
1846 if_flt_monitor_enter(ifp
);
1848 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1849 TAILQ_INSERT_TAIL(&ifp
->if_flt_head
, filter
, filt_next
);
1851 if_flt_monitor_leave(ifp
);
1852 lck_mtx_unlock(&ifp
->if_flt_lock
);
1854 *filter_ref
= filter
;
1857 * Bump filter count and route_generation ID to let TCP
1858 * know it shouldn't do TSO on this connection
1860 if ((filter
->filt_flags
& DLIL_IFF_TSO
) == 0) {
1861 OSAddAtomic(1, &dlil_filter_disable_tso_count
);
1862 routegenid_update();
1864 OSIncrementAtomic64(&net_api_stats
.nas_iflt_attach_count
);
1865 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_iflt_attach_total
);
1866 if ((filter
->filt_flags
& DLIL_IFF_INTERNAL
)) {
1867 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_iflt_attach_os_total
);
1870 printf("%s: %s filter attached\n", if_name(ifp
),
1871 if_filter
->iff_name
);
1875 if (retval
!= 0 && ifp
!= NULL
) {
1876 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1877 if_name(ifp
), if_filter
->iff_name
, retval
);
1879 if (retval
!= 0 && filter
!= NULL
) {
1880 zfree(dlif_filt_zone
, filter
);
1887 dlil_detach_filter_internal(interface_filter_t filter
, int detached
)
1891 if (detached
== 0) {
1894 ifnet_head_lock_shared();
1895 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
1896 interface_filter_t entry
= NULL
;
1898 lck_mtx_lock(&ifp
->if_flt_lock
);
1899 TAILQ_FOREACH(entry
, &ifp
->if_flt_head
, filt_next
) {
1900 if (entry
!= filter
|| entry
->filt_skip
) {
1904 * We've found a match; since it's possible
1905 * that the thread gets blocked in the monitor,
1906 * we do the lock dance. Interface should
1907 * not be detached since we still have a use
1908 * count held during filter attach.
1910 entry
->filt_skip
= 1; /* skip input/output */
1911 lck_mtx_unlock(&ifp
->if_flt_lock
);
1914 lck_mtx_lock(&ifp
->if_flt_lock
);
1915 if_flt_monitor_enter(ifp
);
1916 LCK_MTX_ASSERT(&ifp
->if_flt_lock
,
1917 LCK_MTX_ASSERT_OWNED
);
1919 /* Remove the filter from the list */
1920 TAILQ_REMOVE(&ifp
->if_flt_head
, filter
,
1923 if_flt_monitor_leave(ifp
);
1924 lck_mtx_unlock(&ifp
->if_flt_lock
);
1926 printf("%s: %s filter detached\n",
1927 if_name(ifp
), filter
->filt_name
);
1931 lck_mtx_unlock(&ifp
->if_flt_lock
);
1935 /* filter parameter is not a valid filter ref */
1941 printf("%s filter detached\n", filter
->filt_name
);
1946 /* Call the detached function if there is one */
1947 if (filter
->filt_detached
) {
1948 filter
->filt_detached(filter
->filt_cookie
, filter
->filt_ifp
);
1952 * Decrease filter count and route_generation ID to let TCP
1953 * know it should reevalute doing TSO or not
1955 if ((filter
->filt_flags
& DLIL_IFF_TSO
) == 0) {
1956 OSAddAtomic(-1, &dlil_filter_disable_tso_count
);
1957 routegenid_update();
1960 VERIFY(OSDecrementAtomic64(&net_api_stats
.nas_iflt_attach_count
) > 0);
1962 /* Free the filter */
1963 zfree(dlif_filt_zone
, filter
);
1966 if (retval
!= 0 && filter
!= NULL
) {
1967 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1968 filter
->filt_name
, retval
);
1974 __private_extern__
void
1975 dlil_detach_filter(interface_filter_t filter
)
1977 if (filter
== NULL
) {
1980 dlil_detach_filter_internal(filter
, 0);
1984 * Main input thread:
1986 * a) handles all inbound packets for lo0
1987 * b) handles all inbound packets for interfaces with no dedicated
1988 * input thread (e.g. anything but Ethernet/PDP or those that support
1989 * opportunistic polling.)
1990 * c) protocol registrations
1991 * d) packet injections
1993 __attribute__((noreturn
))
1995 dlil_main_input_thread_func(void *v
, wait_result_t w
)
1998 struct dlil_main_threading_info
*inpm
= v
;
1999 struct dlil_threading_info
*inp
= v
;
2001 VERIFY(inp
== dlil_main_input_thread
);
2002 VERIFY(inp
->ifp
== NULL
);
2003 VERIFY(inp
->mode
== IFNET_MODEL_INPUT_POLL_OFF
);
2006 struct mbuf
*m
= NULL
, *m_loop
= NULL
;
2007 u_int32_t m_cnt
, m_cnt_loop
;
2008 boolean_t proto_req
;
2010 lck_mtx_lock_spin(&inp
->input_lck
);
2012 /* Wait until there is work to be done */
2013 while (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
2014 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
2015 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
2016 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
2019 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
2020 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
2022 /* Main input thread cannot be terminated */
2023 VERIFY(!(inp
->input_waiting
& DLIL_INPUT_TERMINATE
));
2025 proto_req
= (inp
->input_waiting
&
2026 (DLIL_PROTO_WAITING
| DLIL_PROTO_REGISTER
));
2028 /* Packets for non-dedicated interfaces other than lo0 */
2029 m_cnt
= qlen(&inp
->rcvq_pkts
);
2030 m
= _getq_all(&inp
->rcvq_pkts
, NULL
, NULL
, NULL
);
2032 /* Packets exclusive to lo0 */
2033 m_cnt_loop
= qlen(&inpm
->lo_rcvq_pkts
);
2034 m_loop
= _getq_all(&inpm
->lo_rcvq_pkts
, NULL
, NULL
, NULL
);
2038 lck_mtx_unlock(&inp
->input_lck
);
2041 * NOTE warning %%% attention !!!!
2042 * We should think about putting some thread starvation
2043 * safeguards if we deal with long chains of packets.
2045 if (m_loop
!= NULL
) {
2046 dlil_input_packet_list_extended(lo_ifp
, m_loop
,
2047 m_cnt_loop
, inp
->mode
);
2051 dlil_input_packet_list_extended(NULL
, m
,
2061 VERIFY(0); /* we should never get here */
2065 * Input thread for interfaces with legacy input model.
2068 dlil_input_thread_func(void *v
, wait_result_t w
)
2071 char thread_name
[MAXTHREADNAMESIZE
];
2072 struct dlil_threading_info
*inp
= v
;
2073 struct ifnet
*ifp
= inp
->ifp
;
2075 /* Construct the name for this thread, and then apply it. */
2076 bzero(thread_name
, sizeof(thread_name
));
2077 snprintf(thread_name
, sizeof(thread_name
), "dlil_input_%s", ifp
->if_xname
);
2078 thread_set_thread_name(inp
->input_thr
, thread_name
);
2080 VERIFY(inp
!= dlil_main_input_thread
);
2081 VERIFY(ifp
!= NULL
);
2082 VERIFY(!(ifp
->if_eflags
& IFEF_RXPOLL
) || !net_rxpoll
);
2083 VERIFY(inp
->mode
== IFNET_MODEL_INPUT_POLL_OFF
);
2086 struct mbuf
*m
= NULL
;
2089 lck_mtx_lock_spin(&inp
->input_lck
);
2091 /* Wait until there is work to be done */
2092 while (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
2093 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
2094 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
2095 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
2098 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
2099 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
2102 * Protocol registration and injection must always use
2103 * the main input thread; in theory the latter can utilize
2104 * the corresponding input thread where the packet arrived
2105 * on, but that requires our knowing the interface in advance
2106 * (and the benefits might not worth the trouble.)
2108 VERIFY(!(inp
->input_waiting
&
2109 (DLIL_PROTO_WAITING
| DLIL_PROTO_REGISTER
)));
2111 /* Packets for this interface */
2112 m_cnt
= qlen(&inp
->rcvq_pkts
);
2113 m
= _getq_all(&inp
->rcvq_pkts
, NULL
, NULL
, NULL
);
2115 if (inp
->input_waiting
& DLIL_INPUT_TERMINATE
) {
2116 lck_mtx_unlock(&inp
->input_lck
);
2118 /* Free up pending packets */
2123 dlil_terminate_input_thread(inp
);
2130 dlil_input_stats_sync(ifp
, inp
);
2132 lck_mtx_unlock(&inp
->input_lck
);
2135 * NOTE warning %%% attention !!!!
2136 * We should think about putting some thread starvation
2137 * safeguards if we deal with long chains of packets.
2140 dlil_input_packet_list_extended(NULL
, m
,
2146 VERIFY(0); /* we should never get here */
2150 * Input thread for interfaces with opportunistic polling input model.
2153 dlil_rxpoll_input_thread_func(void *v
, wait_result_t w
)
2156 struct dlil_threading_info
*inp
= v
;
2157 struct ifnet
*ifp
= inp
->ifp
;
2160 VERIFY(inp
!= dlil_main_input_thread
);
2161 VERIFY(ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_RXPOLL
));
2164 struct mbuf
*m
= NULL
;
2165 u_int32_t m_cnt
, m_size
, poll_req
= 0;
2167 struct timespec now
, delta
;
2170 lck_mtx_lock_spin(&inp
->input_lck
);
2172 if ((ival
= inp
->rxpoll_ival
) < IF_RXPOLL_INTERVALTIME_MIN
) {
2173 ival
= IF_RXPOLL_INTERVALTIME_MIN
;
2176 /* Link parameters changed? */
2177 if (ifp
->if_poll_update
!= 0) {
2178 ifp
->if_poll_update
= 0;
2179 (void) dlil_rxpoll_set_params(ifp
, NULL
, TRUE
);
2182 /* Current operating mode */
2185 /* Wait until there is work to be done */
2186 while (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
2187 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
2188 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
2189 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
2192 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
2193 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
2196 * Protocol registration and injection must always use
2197 * the main input thread; in theory the latter can utilize
2198 * the corresponding input thread where the packet arrived
2199 * on, but that requires our knowing the interface in advance
2200 * (and the benefits might not worth the trouble.)
2202 VERIFY(!(inp
->input_waiting
&
2203 (DLIL_PROTO_WAITING
| DLIL_PROTO_REGISTER
)));
2205 if (inp
->input_waiting
& DLIL_INPUT_TERMINATE
) {
2206 /* Free up pending packets */
2207 lck_mtx_convert_spin(&inp
->input_lck
);
2208 _flushq(&inp
->rcvq_pkts
);
2209 if (inp
->input_mit_tcall
!= NULL
) {
2210 if (thread_call_isactive(inp
->input_mit_tcall
)) {
2211 thread_call_cancel(inp
->input_mit_tcall
);
2214 lck_mtx_unlock(&inp
->input_lck
);
2216 dlil_terminate_input_thread(inp
);
2221 /* Total count of all packets */
2222 m_cnt
= qlen(&inp
->rcvq_pkts
);
2224 /* Total bytes of all packets */
2225 m_size
= qsize(&inp
->rcvq_pkts
);
2227 /* Packets for this interface */
2228 m
= _getq_all(&inp
->rcvq_pkts
, NULL
, NULL
, NULL
);
2229 VERIFY(m
!= NULL
|| m_cnt
== 0);
2232 if (!net_timerisset(&inp
->sample_lasttime
)) {
2233 *(&inp
->sample_lasttime
) = *(&now
);
2236 net_timersub(&now
, &inp
->sample_lasttime
, &delta
);
2237 if (if_rxpoll
&& net_timerisset(&inp
->sample_holdtime
)) {
2238 u_int32_t ptot
, btot
;
2240 /* Accumulate statistics for current sampling */
2241 PKTCNTR_ADD(&inp
->sstats
, m_cnt
, m_size
);
2243 if (net_timercmp(&delta
, &inp
->sample_holdtime
, <)) {
2247 *(&inp
->sample_lasttime
) = *(&now
);
2249 /* Calculate min/max of inbound bytes */
2250 btot
= (u_int32_t
)inp
->sstats
.bytes
;
2251 if (inp
->rxpoll_bmin
== 0 || inp
->rxpoll_bmin
> btot
) {
2252 inp
->rxpoll_bmin
= btot
;
2254 if (btot
> inp
->rxpoll_bmax
) {
2255 inp
->rxpoll_bmax
= btot
;
2258 /* Calculate EWMA of inbound bytes */
2259 DLIL_EWMA(inp
->rxpoll_bavg
, btot
, if_rxpoll_decay
);
2261 /* Calculate min/max of inbound packets */
2262 ptot
= (u_int32_t
)inp
->sstats
.packets
;
2263 if (inp
->rxpoll_pmin
== 0 || inp
->rxpoll_pmin
> ptot
) {
2264 inp
->rxpoll_pmin
= ptot
;
2266 if (ptot
> inp
->rxpoll_pmax
) {
2267 inp
->rxpoll_pmax
= ptot
;
2270 /* Calculate EWMA of inbound packets */
2271 DLIL_EWMA(inp
->rxpoll_pavg
, ptot
, if_rxpoll_decay
);
2273 /* Reset sampling statistics */
2274 PKTCNTR_CLEAR(&inp
->sstats
);
2276 /* Calculate EWMA of wakeup requests */
2277 DLIL_EWMA(inp
->rxpoll_wavg
, inp
->wtot
, if_rxpoll_decay
);
2281 if (!net_timerisset(&inp
->dbg_lasttime
)) {
2282 *(&inp
->dbg_lasttime
) = *(&now
);
2284 net_timersub(&now
, &inp
->dbg_lasttime
, &delta
);
2285 if (net_timercmp(&delta
, &dlil_dbgrate
, >=)) {
2286 *(&inp
->dbg_lasttime
) = *(&now
);
2287 printf("%s: [%s] pkts avg %d max %d "
2288 "limits [%d/%d], wreq avg %d "
2289 "limits [%d/%d], bytes avg %d "
2290 "limits [%d/%d]\n", if_name(ifp
),
2292 IFNET_MODEL_INPUT_POLL_ON
) ?
2293 "ON" : "OFF", inp
->rxpoll_pavg
,
2302 inp
->rxpoll_bhiwat
);
2306 /* Perform mode transition, if necessary */
2307 if (!net_timerisset(&inp
->mode_lasttime
)) {
2308 *(&inp
->mode_lasttime
) = *(&now
);
2311 net_timersub(&now
, &inp
->mode_lasttime
, &delta
);
2312 if (net_timercmp(&delta
, &inp
->mode_holdtime
, <)) {
2316 if (inp
->rxpoll_pavg
<= inp
->rxpoll_plowat
&&
2317 inp
->rxpoll_bavg
<= inp
->rxpoll_blowat
&&
2318 inp
->mode
!= IFNET_MODEL_INPUT_POLL_OFF
) {
2319 mode
= IFNET_MODEL_INPUT_POLL_OFF
;
2320 } else if (inp
->rxpoll_pavg
>= inp
->rxpoll_phiwat
&&
2321 (inp
->rxpoll_bavg
>= inp
->rxpoll_bhiwat
||
2322 inp
->rxpoll_wavg
>= inp
->rxpoll_whiwat
) &&
2323 inp
->mode
!= IFNET_MODEL_INPUT_POLL_ON
) {
2324 mode
= IFNET_MODEL_INPUT_POLL_ON
;
2327 if (mode
!= inp
->mode
) {
2329 *(&inp
->mode_lasttime
) = *(&now
);
2334 dlil_input_stats_sync(ifp
, inp
);
2336 lck_mtx_unlock(&inp
->input_lck
);
2339 * If there's a mode change and interface is still attached,
2340 * perform a downcall to the driver for the new mode. Also
2341 * hold an IO refcnt on the interface to prevent it from
2342 * being detached (will be release below.)
2344 if (poll_req
!= 0 && ifnet_is_attached(ifp
, 1)) {
2345 struct ifnet_model_params p
= { mode
, { 0 } };
2349 printf("%s: polling is now %s, "
2350 "pkts avg %d max %d limits [%d/%d], "
2351 "wreq avg %d limits [%d/%d], "
2352 "bytes avg %d limits [%d/%d]\n",
2354 (mode
== IFNET_MODEL_INPUT_POLL_ON
) ?
2355 "ON" : "OFF", inp
->rxpoll_pavg
,
2356 inp
->rxpoll_pmax
, inp
->rxpoll_plowat
,
2357 inp
->rxpoll_phiwat
, inp
->rxpoll_wavg
,
2358 inp
->rxpoll_wlowat
, inp
->rxpoll_whiwat
,
2359 inp
->rxpoll_bavg
, inp
->rxpoll_blowat
,
2360 inp
->rxpoll_bhiwat
);
2363 if ((err
= ((*ifp
->if_input_ctl
)(ifp
,
2364 IFNET_CTL_SET_INPUT_MODEL
, sizeof(p
), &p
))) != 0) {
2365 printf("%s: error setting polling mode "
2366 "to %s (%d)\n", if_name(ifp
),
2367 (mode
== IFNET_MODEL_INPUT_POLL_ON
) ?
2372 case IFNET_MODEL_INPUT_POLL_OFF
:
2373 ifnet_set_poll_cycle(ifp
, NULL
);
2374 inp
->rxpoll_offreq
++;
2376 inp
->rxpoll_offerr
++;
2380 case IFNET_MODEL_INPUT_POLL_ON
:
2381 net_nsectimer(&ival
, &ts
);
2382 ifnet_set_poll_cycle(ifp
, &ts
);
2384 inp
->rxpoll_onreq
++;
2386 inp
->rxpoll_onerr
++;
2395 /* Release the IO refcnt */
2396 ifnet_decr_iorefcnt(ifp
);
2400 * NOTE warning %%% attention !!!!
2401 * We should think about putting some thread starvation
2402 * safeguards if we deal with long chains of packets.
2405 dlil_input_packet_list_extended(NULL
, m
, m_cnt
, mode
);
2410 VERIFY(0); /* we should never get here */
2414 * Must be called on an attached ifnet (caller is expected to check.)
2415 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2418 dlil_rxpoll_set_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
,
2421 struct dlil_threading_info
*inp
;
2422 u_int64_t sample_holdtime
, inbw
;
2424 VERIFY(ifp
!= NULL
);
2425 if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || (inp
= ifp
->if_inp
) == NULL
) {
2430 if ((p
->packets_lowat
== 0 && p
->packets_hiwat
!= 0) ||
2431 (p
->packets_lowat
!= 0 && p
->packets_hiwat
== 0)) {
2434 if (p
->packets_lowat
!= 0 && /* hiwat must be non-zero */
2435 p
->packets_lowat
>= p
->packets_hiwat
) {
2438 if ((p
->bytes_lowat
== 0 && p
->bytes_hiwat
!= 0) ||
2439 (p
->bytes_lowat
!= 0 && p
->bytes_hiwat
== 0)) {
2442 if (p
->bytes_lowat
!= 0 && /* hiwat must be non-zero */
2443 p
->bytes_lowat
>= p
->bytes_hiwat
) {
2446 if (p
->interval_time
!= 0 &&
2447 p
->interval_time
< IF_RXPOLL_INTERVALTIME_MIN
) {
2448 p
->interval_time
= IF_RXPOLL_INTERVALTIME_MIN
;
2453 lck_mtx_lock(&inp
->input_lck
);
2456 LCK_MTX_ASSERT(&inp
->input_lck
, LCK_MTX_ASSERT_OWNED
);
2459 * Normally, we'd reset the parameters to the auto-tuned values
2460 * if the the input thread detects a change in link rate. If the
2461 * driver provides its own parameters right after a link rate
2462 * changes, but before the input thread gets to run, we want to
2463 * make sure to keep the driver's values. Clearing if_poll_update
2464 * will achieve that.
2466 if (p
!= NULL
&& !locked
&& ifp
->if_poll_update
!= 0) {
2467 ifp
->if_poll_update
= 0;
2470 if ((inbw
= ifnet_input_linkrate(ifp
)) == 0 && p
== NULL
) {
2471 sample_holdtime
= 0; /* polling is disabled */
2472 inp
->rxpoll_wlowat
= inp
->rxpoll_plowat
=
2473 inp
->rxpoll_blowat
= 0;
2474 inp
->rxpoll_whiwat
= inp
->rxpoll_phiwat
=
2475 inp
->rxpoll_bhiwat
= (u_int32_t
)-1;
2476 inp
->rxpoll_plim
= 0;
2477 inp
->rxpoll_ival
= IF_RXPOLL_INTERVALTIME_MIN
;
2479 u_int32_t plowat
, phiwat
, blowat
, bhiwat
, plim
;
2483 for (n
= 0, i
= 0; rxpoll_tbl
[i
].speed
!= 0; i
++) {
2484 if (inbw
< rxpoll_tbl
[i
].speed
) {
2489 /* auto-tune if caller didn't specify a value */
2490 plowat
= ((p
== NULL
|| p
->packets_lowat
== 0) ?
2491 rxpoll_tbl
[n
].plowat
: p
->packets_lowat
);
2492 phiwat
= ((p
== NULL
|| p
->packets_hiwat
== 0) ?
2493 rxpoll_tbl
[n
].phiwat
: p
->packets_hiwat
);
2494 blowat
= ((p
== NULL
|| p
->bytes_lowat
== 0) ?
2495 rxpoll_tbl
[n
].blowat
: p
->bytes_lowat
);
2496 bhiwat
= ((p
== NULL
|| p
->bytes_hiwat
== 0) ?
2497 rxpoll_tbl
[n
].bhiwat
: p
->bytes_hiwat
);
2498 plim
= ((p
== NULL
|| p
->packets_limit
== 0) ?
2499 if_rxpoll_max
: p
->packets_limit
);
2500 ival
= ((p
== NULL
|| p
->interval_time
== 0) ?
2501 if_rxpoll_interval_time
: p
->interval_time
);
2503 VERIFY(plowat
!= 0 && phiwat
!= 0);
2504 VERIFY(blowat
!= 0 && bhiwat
!= 0);
2505 VERIFY(ival
>= IF_RXPOLL_INTERVALTIME_MIN
);
2507 sample_holdtime
= if_rxpoll_sample_holdtime
;
2508 inp
->rxpoll_wlowat
= if_rxpoll_wlowat
;
2509 inp
->rxpoll_whiwat
= if_rxpoll_whiwat
;
2510 inp
->rxpoll_plowat
= plowat
;
2511 inp
->rxpoll_phiwat
= phiwat
;
2512 inp
->rxpoll_blowat
= blowat
;
2513 inp
->rxpoll_bhiwat
= bhiwat
;
2514 inp
->rxpoll_plim
= plim
;
2515 inp
->rxpoll_ival
= ival
;
2518 net_nsectimer(&if_rxpoll_mode_holdtime
, &inp
->mode_holdtime
);
2519 net_nsectimer(&sample_holdtime
, &inp
->sample_holdtime
);
2522 printf("%s: speed %llu bps, sample per %llu nsec, "
2523 "poll interval %llu nsec, pkts per poll %u, "
2524 "pkt limits [%u/%u], wreq limits [%u/%u], "
2525 "bytes limits [%u/%u]\n", if_name(ifp
),
2526 inbw
, sample_holdtime
, inp
->rxpoll_ival
, inp
->rxpoll_plim
,
2527 inp
->rxpoll_plowat
, inp
->rxpoll_phiwat
, inp
->rxpoll_wlowat
,
2528 inp
->rxpoll_whiwat
, inp
->rxpoll_blowat
, inp
->rxpoll_bhiwat
);
2532 lck_mtx_unlock(&inp
->input_lck
);
2539 * Must be called on an attached ifnet (caller is expected to check.)
2542 dlil_rxpoll_get_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
)
2544 struct dlil_threading_info
*inp
;
2546 VERIFY(ifp
!= NULL
&& p
!= NULL
);
2547 if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || (inp
= ifp
->if_inp
) == NULL
) {
2551 bzero(p
, sizeof(*p
));
2553 lck_mtx_lock(&inp
->input_lck
);
2554 p
->packets_limit
= inp
->rxpoll_plim
;
2555 p
->packets_lowat
= inp
->rxpoll_plowat
;
2556 p
->packets_hiwat
= inp
->rxpoll_phiwat
;
2557 p
->bytes_lowat
= inp
->rxpoll_blowat
;
2558 p
->bytes_hiwat
= inp
->rxpoll_bhiwat
;
2559 p
->interval_time
= inp
->rxpoll_ival
;
2560 lck_mtx_unlock(&inp
->input_lck
);
2566 ifnet_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
2567 const struct ifnet_stat_increment_param
*s
)
2569 return ifnet_input_common(ifp
, m_head
, NULL
, s
, FALSE
, FALSE
);
2573 ifnet_input_extended(struct ifnet
*ifp
, struct mbuf
*m_head
,
2574 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
)
2576 return ifnet_input_common(ifp
, m_head
, m_tail
, s
, TRUE
, FALSE
);
2580 ifnet_input_common(struct ifnet
*ifp
, struct mbuf
*m_head
, struct mbuf
*m_tail
,
2581 const struct ifnet_stat_increment_param
*s
, boolean_t ext
, boolean_t poll
)
2583 dlil_input_func input_func
;
2584 struct ifnet_stat_increment_param _s
;
2585 u_int32_t m_cnt
= 0, m_size
= 0;
2589 if ((m_head
== NULL
&& !poll
) || (s
== NULL
&& ext
)) {
2590 if (m_head
!= NULL
) {
2591 mbuf_freem_list(m_head
);
2596 VERIFY(m_head
!= NULL
|| (s
== NULL
&& m_tail
== NULL
&& !ext
&& poll
));
2597 VERIFY(m_tail
== NULL
|| ext
);
2598 VERIFY(s
!= NULL
|| !ext
);
2601 * Drop the packet(s) if the parameters are invalid, or if the
2602 * interface is no longer attached; else hold an IO refcnt to
2603 * prevent it from being detached (will be released below.)
2605 if (ifp
== NULL
|| (ifp
!= lo_ifp
&& !ifnet_is_attached(ifp
, 1))) {
2606 if (m_head
!= NULL
) {
2607 mbuf_freem_list(m_head
);
2612 input_func
= ifp
->if_input_dlil
;
2613 VERIFY(input_func
!= NULL
);
2615 if (m_tail
== NULL
) {
2617 while (m_head
!= NULL
) {
2618 #if IFNET_INPUT_SANITY_CHK
2619 if (dlil_input_sanity_check
!= 0) {
2620 DLIL_INPUT_CHECK(last
, ifp
);
2622 #endif /* IFNET_INPUT_SANITY_CHK */
2624 m_size
+= m_length(last
);
2625 if (mbuf_nextpkt(last
) == NULL
) {
2628 last
= mbuf_nextpkt(last
);
2632 #if IFNET_INPUT_SANITY_CHK
2633 if (dlil_input_sanity_check
!= 0) {
2636 DLIL_INPUT_CHECK(last
, ifp
);
2638 m_size
+= m_length(last
);
2639 if (mbuf_nextpkt(last
) == NULL
) {
2642 last
= mbuf_nextpkt(last
);
2645 m_cnt
= s
->packets_in
;
2646 m_size
= s
->bytes_in
;
2650 m_cnt
= s
->packets_in
;
2651 m_size
= s
->bytes_in
;
2653 #endif /* IFNET_INPUT_SANITY_CHK */
2656 if (last
!= m_tail
) {
2657 panic_plain("%s: invalid input packet chain for %s, "
2658 "tail mbuf %p instead of %p\n", __func__
, if_name(ifp
),
2663 * Assert packet count only for the extended variant, for backwards
2664 * compatibility, since this came directly from the device driver.
2665 * Relax this assertion for input bytes, as the driver may have
2666 * included the link-layer headers in the computation; hence
2667 * m_size is just an approximation.
2669 if (ext
&& s
->packets_in
!= m_cnt
) {
2670 panic_plain("%s: input packet count mismatch for %s, "
2671 "%d instead of %d\n", __func__
, if_name(ifp
),
2672 s
->packets_in
, m_cnt
);
2676 bzero(&_s
, sizeof(_s
));
2681 _s
.packets_in
= m_cnt
;
2682 _s
.bytes_in
= m_size
;
2684 err
= (*input_func
)(ifp
, m_head
, m_tail
, s
, poll
, current_thread());
2686 if (ifp
!= lo_ifp
) {
2687 /* Release the IO refcnt */
2688 ifnet_decr_iorefcnt(ifp
);
2696 dlil_output_handler(struct ifnet
*ifp
, struct mbuf
*m
)
2698 return ifp
->if_output(ifp
, m
);
2702 dlil_input_handler(struct ifnet
*ifp
, struct mbuf
*m_head
,
2703 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
2704 boolean_t poll
, struct thread
*tp
)
2706 struct dlil_threading_info
*inp
;
2707 u_int32_t m_cnt
= s
->packets_in
;
2708 u_int32_t m_size
= s
->bytes_in
;
2710 if ((inp
= ifp
->if_inp
) == NULL
) {
2711 inp
= dlil_main_input_thread
;
2715 * If there is a matching DLIL input thread associated with an
2716 * affinity set, associate this thread with the same set. We
2717 * will only do this once.
2719 lck_mtx_lock_spin(&inp
->input_lck
);
2720 if (inp
!= dlil_main_input_thread
&& inp
->net_affinity
&& tp
!= NULL
&&
2721 ((!poll
&& inp
->wloop_thr
== THREAD_NULL
) ||
2722 (poll
&& inp
->poll_thr
== THREAD_NULL
))) {
2723 u_int32_t tag
= inp
->tag
;
2726 VERIFY(inp
->poll_thr
== THREAD_NULL
);
2729 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
2730 inp
->wloop_thr
= tp
;
2732 lck_mtx_unlock(&inp
->input_lck
);
2734 /* Associate the current thread with the new affinity tag */
2735 (void) dlil_affinity_set(tp
, tag
);
2738 * Take a reference on the current thread; during detach,
2739 * we will need to refer to it in order to tear down its
2742 thread_reference(tp
);
2743 lck_mtx_lock_spin(&inp
->input_lck
);
2746 VERIFY(m_head
!= NULL
|| (m_tail
== NULL
&& m_cnt
== 0));
2749 * Because of loopbacked multicast we cannot stuff the ifp in
2750 * the rcvif of the packet header: loopback (lo0) packets use a
2751 * dedicated list so that we can later associate them with lo_ifp
2752 * on their way up the stack. Packets for other interfaces without
2753 * dedicated input threads go to the regular list.
2755 if (m_head
!= NULL
) {
2756 if (inp
== dlil_main_input_thread
&& ifp
== lo_ifp
) {
2757 struct dlil_main_threading_info
*inpm
=
2758 (struct dlil_main_threading_info
*)inp
;
2759 _addq_multi(&inpm
->lo_rcvq_pkts
, m_head
, m_tail
,
2762 _addq_multi(&inp
->rcvq_pkts
, m_head
, m_tail
,
2767 #if IFNET_INPUT_SANITY_CHK
2768 if (dlil_input_sanity_check
!= 0) {
2772 for (m0
= m_head
, count
= 0; m0
; m0
= mbuf_nextpkt(m0
)) {
2776 if (count
!= m_cnt
) {
2777 panic_plain("%s: invalid packet count %d "
2778 "(expected %d)\n", if_name(ifp
),
2783 inp
->input_mbuf_cnt
+= m_cnt
;
2785 #endif /* IFNET_INPUT_SANITY_CHK */
2787 dlil_input_stats_add(s
, inp
, poll
);
2789 * If we're using the main input thread, synchronize the
2790 * stats now since we have the interface context. All
2791 * other cases involving dedicated input threads will
2792 * have their stats synchronized there.
2794 if (inp
== dlil_main_input_thread
) {
2795 dlil_input_stats_sync(ifp
, inp
);
2798 if (inp
->input_mit_tcall
&&
2799 qlen(&inp
->rcvq_pkts
) >= dlil_rcv_mit_pkts_min
&&
2800 qlen(&inp
->rcvq_pkts
) < dlil_rcv_mit_pkts_max
&&
2801 (ifp
->if_family
== IFNET_FAMILY_ETHERNET
||
2802 ifp
->if_type
== IFT_CELLULAR
)
2804 if (!thread_call_isactive(inp
->input_mit_tcall
)) {
2806 clock_interval_to_deadline(dlil_rcv_mit_interval
,
2808 (void) thread_call_enter_delayed(
2809 inp
->input_mit_tcall
, deadline
);
2812 inp
->input_waiting
|= DLIL_INPUT_WAITING
;
2813 if (!(inp
->input_waiting
& DLIL_INPUT_RUNNING
)) {
2815 wakeup_one((caddr_t
)&inp
->input_waiting
);
2818 lck_mtx_unlock(&inp
->input_lck
);
2825 ifnet_start_common(struct ifnet
*ifp
, boolean_t resetfc
)
2827 if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
2831 * If the starter thread is inactive, signal it to do work,
2832 * unless the interface is being flow controlled from below,
2833 * e.g. a virtual interface being flow controlled by a real
2834 * network interface beneath it, or it's been disabled via
2835 * a call to ifnet_disable_output().
2837 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2839 ifp
->if_start_flags
&= ~IFSF_FLOW_CONTROLLED
;
2840 } else if (ifp
->if_start_flags
& IFSF_FLOW_CONTROLLED
) {
2841 lck_mtx_unlock(&ifp
->if_start_lock
);
2844 ifp
->if_start_req
++;
2845 if (!ifp
->if_start_active
&& ifp
->if_start_thread
!= THREAD_NULL
&&
2846 (resetfc
|| !(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) ||
2847 IFCQ_LEN(&ifp
->if_snd
) >= ifp
->if_start_delay_qlen
||
2848 ifp
->if_start_delayed
== 0)) {
2849 (void) thread_wakeup_thread((caddr_t
)&ifp
->if_start_thread
,
2850 ifp
->if_start_thread
);
2852 lck_mtx_unlock(&ifp
->if_start_lock
);
2856 ifnet_start(struct ifnet
*ifp
)
2858 ifnet_start_common(ifp
, FALSE
);
2862 ifnet_start_thread_fn(void *v
, wait_result_t w
)
2865 struct ifnet
*ifp
= v
;
2866 char ifname
[IFNAMSIZ
+ 1];
2867 char thread_name
[MAXTHREADNAMESIZE
];
2868 struct timespec
*ts
= NULL
;
2869 struct ifclassq
*ifq
= &ifp
->if_snd
;
2870 struct timespec delay_start_ts
;
2872 /* Construct the name for this thread, and then apply it. */
2873 bzero(thread_name
, sizeof(thread_name
));
2874 (void) snprintf(thread_name
, sizeof(thread_name
),
2875 "ifnet_start_%s", ifp
->if_xname
);
2876 thread_set_thread_name(ifp
->if_start_thread
, thread_name
);
2879 * Treat the dedicated starter thread for lo0 as equivalent to
2880 * the driver workloop thread; if net_affinity is enabled for
2881 * the main input thread, associate this starter thread to it
2882 * by binding them with the same affinity tag. This is done
2883 * only once (as we only have one lo_ifp which never goes away.)
2885 if (ifp
== lo_ifp
) {
2886 struct dlil_threading_info
*inp
= dlil_main_input_thread
;
2887 struct thread
*tp
= current_thread();
2889 lck_mtx_lock(&inp
->input_lck
);
2890 if (inp
->net_affinity
) {
2891 u_int32_t tag
= inp
->tag
;
2893 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
2894 VERIFY(inp
->poll_thr
== THREAD_NULL
);
2895 inp
->wloop_thr
= tp
;
2896 lck_mtx_unlock(&inp
->input_lck
);
2898 /* Associate this thread with the affinity tag */
2899 (void) dlil_affinity_set(tp
, tag
);
2901 lck_mtx_unlock(&inp
->input_lck
);
2905 (void) snprintf(ifname
, sizeof(ifname
), "%s_starter", if_name(ifp
));
2907 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2910 if (ifp
->if_start_thread
!= NULL
) {
2911 (void) msleep(&ifp
->if_start_thread
,
2912 &ifp
->if_start_lock
,
2913 (PZERO
- 1) | PSPIN
, ifname
, ts
);
2915 /* interface is detached? */
2916 if (ifp
->if_start_thread
== THREAD_NULL
) {
2917 ifnet_set_start_cycle(ifp
, NULL
);
2918 lck_mtx_unlock(&ifp
->if_start_lock
);
2922 printf("%s: starter thread terminated\n",
2926 /* for the extra refcnt from kernel_thread_start() */
2927 thread_deallocate(current_thread());
2928 /* this is the end */
2929 thread_terminate(current_thread());
2934 ifp
->if_start_active
= 1;
2937 u_int32_t req
= ifp
->if_start_req
;
2938 if (!IFCQ_IS_EMPTY(ifq
) &&
2939 (ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) &&
2940 ifp
->if_start_delayed
== 0 &&
2941 IFCQ_LEN(ifq
) < ifp
->if_start_delay_qlen
&&
2942 (ifp
->if_eflags
& IFEF_DELAY_START
)) {
2943 ifp
->if_start_delayed
= 1;
2944 ifnet_start_delayed
++;
2947 ifp
->if_start_delayed
= 0;
2949 lck_mtx_unlock(&ifp
->if_start_lock
);
2952 * If no longer attached, don't call start because ifp
2953 * is being destroyed; else hold an IO refcnt to
2954 * prevent the interface from being detached (will be
2957 if (!ifnet_is_attached(ifp
, 1)) {
2958 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2962 /* invoke the driver's start routine */
2963 ((*ifp
->if_start
)(ifp
));
2966 * Release the io ref count taken by ifnet_is_attached.
2968 ifnet_decr_iorefcnt(ifp
);
2970 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2973 * If there's no pending request or if the
2974 * interface has been disabled, we're done.
2976 if (req
== ifp
->if_start_req
||
2977 (ifp
->if_start_flags
& IFSF_FLOW_CONTROLLED
)) {
2982 ifp
->if_start_req
= 0;
2983 ifp
->if_start_active
= 0;
2986 * Wakeup N ns from now if rate-controlled by TBR, and if
2987 * there are still packets in the send queue which haven't
2988 * been dequeued so far; else sleep indefinitely (ts = NULL)
2989 * until ifnet_start() is called again.
2991 ts
= ((IFCQ_TBR_IS_ENABLED(ifq
) && !IFCQ_IS_EMPTY(ifq
)) ?
2992 &ifp
->if_start_cycle
: NULL
);
2994 if (ts
== NULL
&& ifp
->if_start_delayed
== 1) {
2995 delay_start_ts
.tv_sec
= 0;
2996 delay_start_ts
.tv_nsec
= ifp
->if_start_delay_timeout
;
2997 ts
= &delay_start_ts
;
3000 if (ts
!= NULL
&& ts
->tv_sec
== 0 && ts
->tv_nsec
== 0) {
3009 ifnet_set_start_cycle(struct ifnet
*ifp
, struct timespec
*ts
)
3012 bzero(&ifp
->if_start_cycle
, sizeof(ifp
->if_start_cycle
));
3014 *(&ifp
->if_start_cycle
) = *ts
;
3017 if (ts
!= NULL
&& ts
->tv_nsec
!= 0 && dlil_verbose
) {
3018 printf("%s: restart interval set to %lu nsec\n",
3019 if_name(ifp
), ts
->tv_nsec
);
3024 ifnet_poll(struct ifnet
*ifp
)
3027 * If the poller thread is inactive, signal it to do work.
3029 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
3031 if (!ifp
->if_poll_active
&& ifp
->if_poll_thread
!= THREAD_NULL
) {
3032 wakeup_one((caddr_t
)&ifp
->if_poll_thread
);
3034 lck_mtx_unlock(&ifp
->if_poll_lock
);
3038 ifnet_poll_thread_fn(void *v
, wait_result_t w
)
3041 struct dlil_threading_info
*inp
;
3042 struct ifnet
*ifp
= v
;
3043 char ifname
[IFNAMSIZ
+ 1];
3044 struct timespec
*ts
= NULL
;
3045 struct ifnet_stat_increment_param s
;
3047 snprintf(ifname
, sizeof(ifname
), "%s_poller",
3049 bzero(&s
, sizeof(s
));
3051 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
3054 VERIFY(inp
!= NULL
);
3057 if (ifp
->if_poll_thread
!= THREAD_NULL
) {
3058 (void) msleep(&ifp
->if_poll_thread
, &ifp
->if_poll_lock
,
3059 (PZERO
- 1) | PSPIN
, ifname
, ts
);
3062 /* interface is detached (maybe while asleep)? */
3063 if (ifp
->if_poll_thread
== THREAD_NULL
) {
3064 ifnet_set_poll_cycle(ifp
, NULL
);
3065 lck_mtx_unlock(&ifp
->if_poll_lock
);
3068 printf("%s: poller thread terminated\n",
3072 /* for the extra refcnt from kernel_thread_start() */
3073 thread_deallocate(current_thread());
3074 /* this is the end */
3075 thread_terminate(current_thread());
3080 ifp
->if_poll_active
= 1;
3082 struct mbuf
*m_head
, *m_tail
;
3083 u_int32_t m_lim
, m_cnt
, m_totlen
;
3084 u_int16_t req
= ifp
->if_poll_req
;
3086 lck_mtx_unlock(&ifp
->if_poll_lock
);
3089 * If no longer attached, there's nothing to do;
3090 * else hold an IO refcnt to prevent the interface
3091 * from being detached (will be released below.)
3093 if (!ifnet_is_attached(ifp
, 1)) {
3094 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
3098 m_lim
= (inp
->rxpoll_plim
!= 0) ? inp
->rxpoll_plim
:
3099 MAX((qlimit(&inp
->rcvq_pkts
)),
3100 (inp
->rxpoll_phiwat
<< 2));
3102 if (dlil_verbose
> 1) {
3103 printf("%s: polling up to %d pkts, "
3104 "pkts avg %d max %d, wreq avg %d, "
3106 if_name(ifp
), m_lim
,
3107 inp
->rxpoll_pavg
, inp
->rxpoll_pmax
,
3108 inp
->rxpoll_wavg
, inp
->rxpoll_bavg
);
3111 /* invoke the driver's input poll routine */
3112 ((*ifp
->if_input_poll
)(ifp
, 0, m_lim
, &m_head
, &m_tail
,
3113 &m_cnt
, &m_totlen
));
3115 if (m_head
!= NULL
) {
3116 VERIFY(m_tail
!= NULL
&& m_cnt
> 0);
3118 if (dlil_verbose
> 1) {
3119 printf("%s: polled %d pkts, "
3120 "pkts avg %d max %d, wreq avg %d, "
3122 if_name(ifp
), m_cnt
,
3123 inp
->rxpoll_pavg
, inp
->rxpoll_pmax
,
3124 inp
->rxpoll_wavg
, inp
->rxpoll_bavg
);
3127 /* stats are required for extended variant */
3128 s
.packets_in
= m_cnt
;
3129 s
.bytes_in
= m_totlen
;
3131 (void) ifnet_input_common(ifp
, m_head
, m_tail
,
3134 if (dlil_verbose
> 1) {
3135 printf("%s: no packets, "
3136 "pkts avg %d max %d, wreq avg %d, "
3138 if_name(ifp
), inp
->rxpoll_pavg
,
3139 inp
->rxpoll_pmax
, inp
->rxpoll_wavg
,
3143 (void) ifnet_input_common(ifp
, NULL
, NULL
,
3147 /* Release the io ref count */
3148 ifnet_decr_iorefcnt(ifp
);
3150 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
3152 /* if there's no pending request, we're done */
3153 if (req
== ifp
->if_poll_req
) {
3157 ifp
->if_poll_req
= 0;
3158 ifp
->if_poll_active
= 0;
3161 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
3162 * until ifnet_poll() is called again.
3164 ts
= &ifp
->if_poll_cycle
;
3165 if (ts
->tv_sec
== 0 && ts
->tv_nsec
== 0) {
3174 ifnet_set_poll_cycle(struct ifnet
*ifp
, struct timespec
*ts
)
3177 bzero(&ifp
->if_poll_cycle
, sizeof(ifp
->if_poll_cycle
));
3179 *(&ifp
->if_poll_cycle
) = *ts
;
3182 if (ts
!= NULL
&& ts
->tv_nsec
!= 0 && dlil_verbose
) {
3183 printf("%s: poll interval set to %lu nsec\n",
3184 if_name(ifp
), ts
->tv_nsec
);
3189 ifnet_purge(struct ifnet
*ifp
)
3191 if (ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_TXSTART
)) {
3197 ifnet_update_sndq(struct ifclassq
*ifq
, cqev_t ev
)
3199 IFCQ_LOCK_ASSERT_HELD(ifq
);
3201 if (!(IFCQ_IS_READY(ifq
))) {
3205 if (IFCQ_TBR_IS_ENABLED(ifq
)) {
3206 struct tb_profile tb
= { ifq
->ifcq_tbr
.tbr_rate_raw
,
3207 ifq
->ifcq_tbr
.tbr_percent
, 0 };
3208 (void) ifclassq_tbr_set(ifq
, &tb
, FALSE
);
3211 ifclassq_update(ifq
, ev
);
3215 ifnet_update_rcv(struct ifnet
*ifp
, cqev_t ev
)
3218 case CLASSQ_EV_LINK_BANDWIDTH
:
3219 if (net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) {
3220 ifp
->if_poll_update
++;
3230 ifnet_set_output_sched_model(struct ifnet
*ifp
, u_int32_t model
)
3232 struct ifclassq
*ifq
;
3236 if (ifp
== NULL
|| model
>= IFNET_SCHED_MODEL_MAX
) {
3238 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
3244 omodel
= ifp
->if_output_sched_model
;
3245 ifp
->if_output_sched_model
= model
;
3246 if ((err
= ifclassq_pktsched_setup(ifq
)) != 0) {
3247 ifp
->if_output_sched_model
= omodel
;
3255 ifnet_set_sndq_maxlen(struct ifnet
*ifp
, u_int32_t maxqlen
)
3259 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
3263 ifclassq_set_maxlen(&ifp
->if_snd
, maxqlen
);
3269 ifnet_get_sndq_maxlen(struct ifnet
*ifp
, u_int32_t
*maxqlen
)
3271 if (ifp
== NULL
|| maxqlen
== NULL
) {
3273 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
3277 *maxqlen
= ifclassq_get_maxlen(&ifp
->if_snd
);
3283 ifnet_get_sndq_len(struct ifnet
*ifp
, u_int32_t
*pkts
)
3287 if (ifp
== NULL
|| pkts
== NULL
) {
3289 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
3292 err
= ifclassq_get_len(&ifp
->if_snd
, MBUF_SC_UNSPEC
,
3300 ifnet_get_service_class_sndq_len(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3301 u_int32_t
*pkts
, u_int32_t
*bytes
)
3305 if (ifp
== NULL
|| !MBUF_VALID_SC(sc
) ||
3306 (pkts
== NULL
&& bytes
== NULL
)) {
3308 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
3311 err
= ifclassq_get_len(&ifp
->if_snd
, sc
, pkts
, bytes
);
3318 ifnet_set_rcvq_maxlen(struct ifnet
*ifp
, u_int32_t maxqlen
)
3320 struct dlil_threading_info
*inp
;
3324 } else if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || ifp
->if_inp
== NULL
) {
3329 maxqlen
= if_rcvq_maxlen
;
3330 } else if (maxqlen
< IF_RCVQ_MINLEN
) {
3331 maxqlen
= IF_RCVQ_MINLEN
;
3335 lck_mtx_lock(&inp
->input_lck
);
3336 qlimit(&inp
->rcvq_pkts
) = maxqlen
;
3337 lck_mtx_unlock(&inp
->input_lck
);
3343 ifnet_get_rcvq_maxlen(struct ifnet
*ifp
, u_int32_t
*maxqlen
)
3345 struct dlil_threading_info
*inp
;
3347 if (ifp
== NULL
|| maxqlen
== NULL
) {
3349 } else if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || ifp
->if_inp
== NULL
) {
3354 lck_mtx_lock(&inp
->input_lck
);
3355 *maxqlen
= qlimit(&inp
->rcvq_pkts
);
3356 lck_mtx_unlock(&inp
->input_lck
);
3361 ifnet_enqueue_multi_setup(struct ifnet
*ifp
, uint16_t delay_qlen
,
3362 uint16_t delay_timeout
)
3364 if (delay_qlen
> 0 && delay_timeout
> 0) {
3365 ifp
->if_eflags
|= IFEF_ENQUEUE_MULTI
;
3366 ifp
->if_start_delay_qlen
= min(100, delay_qlen
);
3367 ifp
->if_start_delay_timeout
= min(20000, delay_timeout
);
3368 /* convert timeout to nanoseconds */
3369 ifp
->if_start_delay_timeout
*= 1000;
3370 kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
3371 ifp
->if_xname
, (uint32_t)delay_qlen
,
3372 (uint32_t)delay_timeout
);
3374 ifp
->if_eflags
&= ~IFEF_ENQUEUE_MULTI
;
3378 static inline errno_t
3379 ifnet_enqueue_common(struct ifnet
*ifp
, void *p
, classq_pkt_type_t ptype
,
3380 boolean_t flush
, boolean_t
*pdrop
)
3382 volatile uint64_t *fg_ts
= NULL
;
3383 volatile uint64_t *rt_ts
= NULL
;
3385 struct timespec now
;
3386 u_int64_t now_nsec
= 0;
3389 ASSERT(ifp
->if_eflags
& IFEF_TXSTART
);
3392 * If packet already carries a timestamp, either from dlil_output()
3393 * or from flowswitch, use it here. Otherwise, record timestamp.
3394 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
3395 * the timestamp value is used internally there.
3399 ASSERT(m
->m_flags
& M_PKTHDR
);
3400 ASSERT(m
->m_nextpkt
== NULL
);
3402 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_TS_VALID
) ||
3403 m
->m_pkthdr
.pkt_timestamp
== 0) {
3405 net_timernsec(&now
, &now_nsec
);
3406 m
->m_pkthdr
.pkt_timestamp
= now_nsec
;
3408 m
->m_pkthdr
.pkt_flags
&= ~PKTF_TS_VALID
;
3410 * If the packet service class is not background,
3411 * update the timestamp to indicate recent activity
3412 * on a foreground socket.
3414 if ((m
->m_pkthdr
.pkt_flags
& PKTF_FLOW_ID
) &&
3415 m
->m_pkthdr
.pkt_flowsrc
== FLOWSRC_INPCB
) {
3416 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_SO_BACKGROUND
)) {
3417 ifp
->if_fg_sendts
= _net_uptime
;
3418 if (fg_ts
!= NULL
) {
3419 *fg_ts
= _net_uptime
;
3422 if (m
->m_pkthdr
.pkt_flags
& PKTF_SO_REALTIME
) {
3423 ifp
->if_rt_sendts
= _net_uptime
;
3424 if (rt_ts
!= NULL
) {
3425 *rt_ts
= _net_uptime
;
3437 if (ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) {
3438 if (now_nsec
== 0) {
3440 net_timernsec(&now
, &now_nsec
);
3443 * If the driver chose to delay start callback for
3444 * coalescing multiple packets, Then use the following
3445 * heuristics to make sure that start callback will
3446 * be delayed only when bulk data transfer is detected.
3447 * 1. number of packets enqueued in (delay_win * 2) is
3448 * greater than or equal to the delay qlen.
3449 * 2. If delay_start is enabled it will stay enabled for
3450 * another 10 idle windows. This is to take into account
3451 * variable RTT and burst traffic.
3452 * 3. If the time elapsed since last enqueue is more
3453 * than 200ms we disable delaying start callback. This is
3454 * is to take idle time into account.
3456 u_int64_t dwin
= (ifp
->if_start_delay_timeout
<< 1);
3457 if (ifp
->if_start_delay_swin
> 0) {
3458 if ((ifp
->if_start_delay_swin
+ dwin
) > now_nsec
) {
3459 ifp
->if_start_delay_cnt
++;
3460 } else if ((now_nsec
- ifp
->if_start_delay_swin
)
3461 >= (200 * 1000 * 1000)) {
3462 ifp
->if_start_delay_swin
= now_nsec
;
3463 ifp
->if_start_delay_cnt
= 1;
3464 ifp
->if_start_delay_idle
= 0;
3465 if (ifp
->if_eflags
& IFEF_DELAY_START
) {
3467 ~(IFEF_DELAY_START
);
3468 ifnet_delay_start_disabled
++;
3471 if (ifp
->if_start_delay_cnt
>=
3472 ifp
->if_start_delay_qlen
) {
3473 ifp
->if_eflags
|= IFEF_DELAY_START
;
3474 ifp
->if_start_delay_idle
= 0;
3476 if (ifp
->if_start_delay_idle
>= 10) {
3477 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3478 ifnet_delay_start_disabled
++;
3480 ifp
->if_start_delay_idle
++;
3483 ifp
->if_start_delay_swin
= now_nsec
;
3484 ifp
->if_start_delay_cnt
= 1;
3487 ifp
->if_start_delay_swin
= now_nsec
;
3488 ifp
->if_start_delay_cnt
= 1;
3489 ifp
->if_start_delay_idle
= 0;
3490 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3493 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3498 /* enqueue the packet (caller consumes object) */
3499 error
= ifclassq_enqueue(&ifp
->if_snd
, m
, QP_MBUF
, pdrop
);
3509 * Tell the driver to start dequeueing; do this even when the queue
3510 * for the packet is suspended (EQSUSPENDED), as the driver could still
3511 * be dequeueing from other unsuspended queues.
3513 if (!(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) &&
3514 ((error
== 0 && flush
) || error
== EQFULL
|| error
== EQSUSPENDED
)) {
3522 ifnet_enqueue(struct ifnet
*ifp
, struct mbuf
*m
)
3525 return ifnet_enqueue_mbuf(ifp
, m
, TRUE
, &pdrop
);
3529 ifnet_enqueue_mbuf(struct ifnet
*ifp
, struct mbuf
*m
, boolean_t flush
,
3532 if (ifp
== NULL
|| m
== NULL
|| !(m
->m_flags
& M_PKTHDR
) ||
3533 m
->m_nextpkt
!= NULL
) {
3539 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3540 !IF_FULLY_ATTACHED(ifp
)) {
3541 /* flag tested without lock for performance */
3545 } else if (!(ifp
->if_flags
& IFF_UP
)) {
3551 return ifnet_enqueue_common(ifp
, m
, QP_MBUF
, flush
, pdrop
);
3556 ifnet_dequeue(struct ifnet
*ifp
, struct mbuf
**mp
)
3559 classq_pkt_type_t ptype
;
3560 if (ifp
== NULL
|| mp
== NULL
) {
3562 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3563 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
) {
3566 if (!ifnet_is_attached(ifp
, 1)) {
3570 rc
= ifclassq_dequeue(&ifp
->if_snd
, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
,
3571 (void **)mp
, NULL
, NULL
, NULL
, &ptype
);
3572 VERIFY((*mp
== NULL
) || (ptype
== QP_MBUF
));
3573 ifnet_decr_iorefcnt(ifp
);
3579 ifnet_dequeue_service_class(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3583 classq_pkt_type_t ptype
;
3584 if (ifp
== NULL
|| mp
== NULL
|| !MBUF_VALID_SC(sc
)) {
3586 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3587 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
) {
3590 if (!ifnet_is_attached(ifp
, 1)) {
3594 rc
= ifclassq_dequeue_sc(&ifp
->if_snd
, sc
, 1,
3595 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, (void **)mp
, NULL
, NULL
,
3597 VERIFY((*mp
== NULL
) || (ptype
== QP_MBUF
));
3598 ifnet_decr_iorefcnt(ifp
);
3603 ifnet_dequeue_multi(struct ifnet
*ifp
, u_int32_t pkt_limit
,
3604 struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
, u_int32_t
*len
)
3607 classq_pkt_type_t ptype
;
3608 if (ifp
== NULL
|| head
== NULL
|| pkt_limit
< 1) {
3610 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3611 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
) {
3614 if (!ifnet_is_attached(ifp
, 1)) {
3618 rc
= ifclassq_dequeue(&ifp
->if_snd
, pkt_limit
,
3619 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, (void **)head
, (void **)tail
, cnt
,
3621 VERIFY((*head
== NULL
) || (ptype
== QP_MBUF
));
3622 ifnet_decr_iorefcnt(ifp
);
3627 ifnet_dequeue_multi_bytes(struct ifnet
*ifp
, u_int32_t byte_limit
,
3628 struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
, u_int32_t
*len
)
3631 classq_pkt_type_t ptype
;
3632 if (ifp
== NULL
|| head
== NULL
|| byte_limit
< 1) {
3634 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3635 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
) {
3638 if (!ifnet_is_attached(ifp
, 1)) {
3642 rc
= ifclassq_dequeue(&ifp
->if_snd
, CLASSQ_DEQUEUE_MAX_PKT_LIMIT
,
3643 byte_limit
, (void **)head
, (void **)tail
, cnt
, len
, &ptype
);
3644 VERIFY((*head
== NULL
) || (ptype
== QP_MBUF
));
3645 ifnet_decr_iorefcnt(ifp
);
3650 ifnet_dequeue_service_class_multi(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3651 u_int32_t pkt_limit
, struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
,
3655 classq_pkt_type_t ptype
;
3656 if (ifp
== NULL
|| head
== NULL
|| pkt_limit
< 1 ||
3657 !MBUF_VALID_SC(sc
)) {
3659 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3660 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
) {
3663 if (!ifnet_is_attached(ifp
, 1)) {
3667 rc
= ifclassq_dequeue_sc(&ifp
->if_snd
, sc
, pkt_limit
,
3668 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, (void **)head
,
3669 (void **)tail
, cnt
, len
, &ptype
);
3670 VERIFY((*head
== NULL
) || (ptype
== QP_MBUF
));
3671 ifnet_decr_iorefcnt(ifp
);
3675 #if !CONFIG_EMBEDDED
3677 ifnet_framer_stub(struct ifnet
*ifp
, struct mbuf
**m
,
3678 const struct sockaddr
*dest
, const char *dest_linkaddr
,
3679 const char *frame_type
, u_int32_t
*pre
, u_int32_t
*post
)
3688 return ifp
->if_framer_legacy(ifp
, m
, dest
, dest_linkaddr
, frame_type
);
3690 #endif /* !CONFIG_EMBEDDED */
3693 dlil_interface_filters_input(struct ifnet
*ifp
, struct mbuf
**m_p
,
3694 char **frame_header_p
, protocol_family_t protocol_family
)
3696 struct ifnet_filter
*filter
;
3699 * Pass the inbound packet to the interface filters
3701 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3702 /* prevent filter list from changing in case we drop the lock */
3703 if_flt_monitor_busy(ifp
);
3704 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
3707 if (!filter
->filt_skip
&& filter
->filt_input
!= NULL
&&
3708 (filter
->filt_protocol
== 0 ||
3709 filter
->filt_protocol
== protocol_family
)) {
3710 lck_mtx_unlock(&ifp
->if_flt_lock
);
3712 result
= (*filter
->filt_input
)(filter
->filt_cookie
,
3713 ifp
, protocol_family
, m_p
, frame_header_p
);
3715 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3717 /* we're done with the filter list */
3718 if_flt_monitor_unbusy(ifp
);
3719 lck_mtx_unlock(&ifp
->if_flt_lock
);
3724 /* we're done with the filter list */
3725 if_flt_monitor_unbusy(ifp
);
3726 lck_mtx_unlock(&ifp
->if_flt_lock
);
3729 * Strip away M_PROTO1 bit prior to sending packet up the stack as
3730 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
3733 (*m_p
)->m_flags
&= ~M_PROTO1
;
3740 dlil_interface_filters_output(struct ifnet
*ifp
, struct mbuf
**m_p
,
3741 protocol_family_t protocol_family
)
3743 struct ifnet_filter
*filter
;
3746 * Pass the outbound packet to the interface filters
3748 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3749 /* prevent filter list from changing in case we drop the lock */
3750 if_flt_monitor_busy(ifp
);
3751 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
3754 if (!filter
->filt_skip
&& filter
->filt_output
!= NULL
&&
3755 (filter
->filt_protocol
== 0 ||
3756 filter
->filt_protocol
== protocol_family
)) {
3757 lck_mtx_unlock(&ifp
->if_flt_lock
);
3759 result
= filter
->filt_output(filter
->filt_cookie
, ifp
,
3760 protocol_family
, m_p
);
3762 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3764 /* we're done with the filter list */
3765 if_flt_monitor_unbusy(ifp
);
3766 lck_mtx_unlock(&ifp
->if_flt_lock
);
3771 /* we're done with the filter list */
3772 if_flt_monitor_unbusy(ifp
);
3773 lck_mtx_unlock(&ifp
->if_flt_lock
);
3779 dlil_ifproto_input(struct if_proto
* ifproto
, mbuf_t m
)
3783 if (ifproto
->proto_kpi
== kProtoKPI_v1
) {
3784 /* Version 1 protocols get one packet at a time */
3786 char * frame_header
;
3789 next_packet
= m
->m_nextpkt
;
3790 m
->m_nextpkt
= NULL
;
3791 frame_header
= m
->m_pkthdr
.pkt_hdr
;
3792 m
->m_pkthdr
.pkt_hdr
= NULL
;
3793 error
= (*ifproto
->kpi
.v1
.input
)(ifproto
->ifp
,
3794 ifproto
->protocol_family
, m
, frame_header
);
3795 if (error
!= 0 && error
!= EJUSTRETURN
) {
3800 } else if (ifproto
->proto_kpi
== kProtoKPI_v2
) {
3801 /* Version 2 protocols support packet lists */
3802 error
= (*ifproto
->kpi
.v2
.input
)(ifproto
->ifp
,
3803 ifproto
->protocol_family
, m
);
3804 if (error
!= 0 && error
!= EJUSTRETURN
) {
3811 dlil_input_stats_add(const struct ifnet_stat_increment_param
*s
,
3812 struct dlil_threading_info
*inp
, boolean_t poll
)
3814 struct ifnet_stat_increment_param
*d
= &inp
->stats
;
3816 if (s
->packets_in
!= 0) {
3817 d
->packets_in
+= s
->packets_in
;
3819 if (s
->bytes_in
!= 0) {
3820 d
->bytes_in
+= s
->bytes_in
;
3822 if (s
->errors_in
!= 0) {
3823 d
->errors_in
+= s
->errors_in
;
3826 if (s
->packets_out
!= 0) {
3827 d
->packets_out
+= s
->packets_out
;
3829 if (s
->bytes_out
!= 0) {
3830 d
->bytes_out
+= s
->bytes_out
;
3832 if (s
->errors_out
!= 0) {
3833 d
->errors_out
+= s
->errors_out
;
3836 if (s
->collisions
!= 0) {
3837 d
->collisions
+= s
->collisions
;
3839 if (s
->dropped
!= 0) {
3840 d
->dropped
+= s
->dropped
;
3844 PKTCNTR_ADD(&inp
->tstats
, s
->packets_in
, s
->bytes_in
);
3849 dlil_input_stats_sync(struct ifnet
*ifp
, struct dlil_threading_info
*inp
)
3851 struct ifnet_stat_increment_param
*s
= &inp
->stats
;
3854 * Use of atomic operations is unavoidable here because
3855 * these stats may also be incremented elsewhere via KPIs.
3857 if (s
->packets_in
!= 0) {
3858 atomic_add_64(&ifp
->if_data
.ifi_ipackets
, s
->packets_in
);
3861 if (s
->bytes_in
!= 0) {
3862 atomic_add_64(&ifp
->if_data
.ifi_ibytes
, s
->bytes_in
);
3865 if (s
->errors_in
!= 0) {
3866 atomic_add_64(&ifp
->if_data
.ifi_ierrors
, s
->errors_in
);
3870 if (s
->packets_out
!= 0) {
3871 atomic_add_64(&ifp
->if_data
.ifi_opackets
, s
->packets_out
);
3874 if (s
->bytes_out
!= 0) {
3875 atomic_add_64(&ifp
->if_data
.ifi_obytes
, s
->bytes_out
);
3878 if (s
->errors_out
!= 0) {
3879 atomic_add_64(&ifp
->if_data
.ifi_oerrors
, s
->errors_out
);
3883 if (s
->collisions
!= 0) {
3884 atomic_add_64(&ifp
->if_data
.ifi_collisions
, s
->collisions
);
3887 if (s
->dropped
!= 0) {
3888 atomic_add_64(&ifp
->if_data
.ifi_iqdrops
, s
->dropped
);
3892 if (ifp
->if_data_threshold
!= 0) {
3893 lck_mtx_convert_spin(&inp
->input_lck
);
3894 ifnet_notify_data_threshold(ifp
);
3898 * No need for atomic operations as they are modified here
3899 * only from within the DLIL input thread context.
3901 if (inp
->tstats
.packets
!= 0) {
3902 inp
->pstats
.ifi_poll_packets
+= inp
->tstats
.packets
;
3903 inp
->tstats
.packets
= 0;
3905 if (inp
->tstats
.bytes
!= 0) {
3906 inp
->pstats
.ifi_poll_bytes
+= inp
->tstats
.bytes
;
3907 inp
->tstats
.bytes
= 0;
3911 __private_extern__
void
3912 dlil_input_packet_list(struct ifnet
*ifp
, struct mbuf
*m
)
3914 return dlil_input_packet_list_common(ifp
, m
, 0,
3915 IFNET_MODEL_INPUT_POLL_OFF
, FALSE
);
3918 __private_extern__
void
3919 dlil_input_packet_list_extended(struct ifnet
*ifp
, struct mbuf
*m
,
3920 u_int32_t cnt
, ifnet_model_t mode
)
3922 return dlil_input_packet_list_common(ifp
, m
, cnt
, mode
, TRUE
);
3926 dlil_input_packet_list_common(struct ifnet
*ifp_param
, struct mbuf
*m
,
3927 u_int32_t cnt
, ifnet_model_t mode
, boolean_t ext
)
3930 protocol_family_t protocol_family
;
3932 ifnet_t ifp
= ifp_param
;
3933 char *frame_header
= NULL
;
3934 struct if_proto
*last_ifproto
= NULL
;
3935 mbuf_t pkt_first
= NULL
;
3936 mbuf_t
*pkt_next
= NULL
;
3937 u_int32_t poll_thresh
= 0, poll_ival
= 0;
3939 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
3941 if (ext
&& mode
== IFNET_MODEL_INPUT_POLL_ON
&& cnt
> 1 &&
3942 (poll_ival
= if_rxpoll_interval_pkts
) > 0) {
3947 struct if_proto
*ifproto
= NULL
;
3949 uint32_t pktf_mask
; /* pkt flags to preserve */
3951 if (ifp_param
== NULL
) {
3952 ifp
= m
->m_pkthdr
.rcvif
;
3955 if ((ifp
->if_eflags
& IFEF_RXPOLL
) && poll_thresh
!= 0 &&
3956 poll_ival
> 0 && (--poll_thresh
% poll_ival
) == 0) {
3960 /* Check if this mbuf looks valid */
3961 MBUF_INPUT_CHECK(m
, ifp
);
3963 next_packet
= m
->m_nextpkt
;
3964 m
->m_nextpkt
= NULL
;
3965 frame_header
= m
->m_pkthdr
.pkt_hdr
;
3966 m
->m_pkthdr
.pkt_hdr
= NULL
;
3969 * Get an IO reference count if the interface is not
3970 * loopback (lo0) and it is attached; lo0 never goes
3971 * away, so optimize for that.
3973 if (ifp
!= lo_ifp
) {
3974 if (!ifnet_is_attached(ifp
, 1)) {
3980 * Preserve the time stamp if it was set.
3982 pktf_mask
= PKTF_TS_VALID
;
3985 * If this arrived on lo0, preserve interface addr
3986 * info to allow for connectivity between loopback
3987 * and local interface addresses.
3989 pktf_mask
= (PKTF_LOOP
| PKTF_IFAINFO
);
3992 /* make sure packet comes in clean */
3993 m_classifier_init(m
, pktf_mask
);
3995 ifp_inc_traffic_class_in(ifp
, m
);
3997 /* find which protocol family this packet is for */
3998 ifnet_lock_shared(ifp
);
3999 error
= (*ifp
->if_demux
)(ifp
, m
, frame_header
,
4001 ifnet_lock_done(ifp
);
4003 if (error
== EJUSTRETURN
) {
4006 protocol_family
= 0;
4009 pktap_input(ifp
, protocol_family
, m
, frame_header
);
4011 /* Drop v4 packets received on CLAT46 enabled interface */
4012 if (protocol_family
== PF_INET
&& IS_INTF_CLAT46(ifp
)) {
4014 ip6stat
.ip6s_clat464_in_v4_drop
++;
4018 /* Translate the packet if it is received on CLAT interface */
4019 if (protocol_family
== PF_INET6
&& IS_INTF_CLAT46(ifp
)
4020 && dlil_is_clat_needed(protocol_family
, m
)) {
4022 struct ether_header eh
;
4023 struct ether_header
*ehp
= NULL
;
4025 if (ifp
->if_type
== IFT_ETHER
) {
4026 ehp
= (struct ether_header
*)(void *)frame_header
;
4027 /* Skip RX Ethernet packets if they are not IPV6 */
4028 if (ntohs(ehp
->ether_type
) != ETHERTYPE_IPV6
) {
4032 /* Keep a copy of frame_header for Ethernet packets */
4033 bcopy(frame_header
, (caddr_t
)&eh
, ETHER_HDR_LEN
);
4035 error
= dlil_clat64(ifp
, &protocol_family
, &m
);
4036 data
= (char *) mbuf_data(m
);
4039 ip6stat
.ip6s_clat464_in_drop
++;
4042 /* Native v6 should be No-op */
4043 if (protocol_family
!= PF_INET
) {
4047 /* Do this only for translated v4 packets. */
4048 switch (ifp
->if_type
) {
4050 frame_header
= data
;
4054 * Drop if the mbuf doesn't have enough
4055 * space for Ethernet header
4057 if (M_LEADINGSPACE(m
) < ETHER_HDR_LEN
) {
4059 ip6stat
.ip6s_clat464_in_drop
++;
4063 * Set the frame_header ETHER_HDR_LEN bytes
4064 * preceeding the data pointer. Change
4065 * the ether_type too.
4067 frame_header
= data
- ETHER_HDR_LEN
;
4068 eh
.ether_type
= htons(ETHERTYPE_IP
);
4069 bcopy((caddr_t
)&eh
, frame_header
, ETHER_HDR_LEN
);
4074 if (hwcksum_dbg
!= 0 && !(ifp
->if_flags
& IFF_LOOPBACK
) &&
4075 !(m
->m_pkthdr
.pkt_flags
& PKTF_LOOP
)) {
4076 dlil_input_cksum_dbg(ifp
, m
, frame_header
,
4081 * For partial checksum offload, we expect the driver to
4082 * set the start offset indicating the start of the span
4083 * that is covered by the hardware-computed checksum;
4084 * adjust this start offset accordingly because the data
4085 * pointer has been advanced beyond the link-layer header.
4087 * Don't adjust if the interface is a bridge member, as
4088 * the adjustment will occur from the context of the
4089 * bridge interface during input.
4091 if (ifp
->if_bridge
== NULL
&& (m
->m_pkthdr
.csum_flags
&
4092 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
4093 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
4095 if (frame_header
== NULL
||
4096 frame_header
< (char *)mbuf_datastart(m
) ||
4097 frame_header
> (char *)m
->m_data
||
4098 (adj
= (m
->m_data
- frame_header
)) >
4099 m
->m_pkthdr
.csum_rx_start
) {
4100 m
->m_pkthdr
.csum_data
= 0;
4101 m
->m_pkthdr
.csum_flags
&= ~CSUM_DATA_VALID
;
4102 hwcksum_in_invalidated
++;
4104 m
->m_pkthdr
.csum_rx_start
-= adj
;
4109 pktap_input(ifp
, protocol_family
, m
, frame_header
);
4112 if (m
->m_flags
& (M_BCAST
| M_MCAST
)) {
4113 atomic_add_64(&ifp
->if_imcasts
, 1);
4116 /* run interface filters, exclude VLAN packets PR-3586856 */
4117 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) == 0) {
4118 error
= dlil_interface_filters_input(ifp
, &m
,
4119 &frame_header
, protocol_family
);
4121 if (error
!= EJUSTRETURN
) {
4127 if (error
!= 0 || ((m
->m_flags
& M_PROMISC
) != 0)) {
4132 /* Lookup the protocol attachment to this interface */
4133 if (protocol_family
== 0) {
4135 } else if (last_ifproto
!= NULL
&& last_ifproto
->ifp
== ifp
&&
4136 (last_ifproto
->protocol_family
== protocol_family
)) {
4137 VERIFY(ifproto
== NULL
);
4138 ifproto
= last_ifproto
;
4139 if_proto_ref(last_ifproto
);
4141 VERIFY(ifproto
== NULL
);
4142 ifnet_lock_shared(ifp
);
4143 /* callee holds a proto refcnt upon success */
4144 ifproto
= find_attached_proto(ifp
, protocol_family
);
4145 ifnet_lock_done(ifp
);
4147 if (ifproto
== NULL
) {
4148 /* no protocol for this packet, discard */
4152 if (ifproto
!= last_ifproto
) {
4153 if (last_ifproto
!= NULL
) {
4154 /* pass up the list for the previous protocol */
4155 dlil_ifproto_input(last_ifproto
, pkt_first
);
4157 if_proto_free(last_ifproto
);
4159 last_ifproto
= ifproto
;
4160 if_proto_ref(ifproto
);
4162 /* extend the list */
4163 m
->m_pkthdr
.pkt_hdr
= frame_header
;
4164 if (pkt_first
== NULL
) {
4169 pkt_next
= &m
->m_nextpkt
;
4172 if (next_packet
== NULL
&& last_ifproto
!= NULL
) {
4173 /* pass up the last list of packets */
4174 dlil_ifproto_input(last_ifproto
, pkt_first
);
4175 if_proto_free(last_ifproto
);
4176 last_ifproto
= NULL
;
4178 if (ifproto
!= NULL
) {
4179 if_proto_free(ifproto
);
4185 /* update the driver's multicast filter, if needed */
4186 if (ifp
->if_updatemcasts
> 0 && if_mcasts_update(ifp
) == 0) {
4187 ifp
->if_updatemcasts
= 0;
4189 if (iorefcnt
== 1) {
4190 ifnet_decr_iorefcnt(ifp
);
4194 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4198 if_mcasts_update(struct ifnet
*ifp
)
4202 err
= ifnet_ioctl(ifp
, 0, SIOCADDMULTI
, NULL
);
4203 if (err
== EAFNOSUPPORT
) {
4206 printf("%s: %s %d suspended link-layer multicast membership(s) "
4207 "(err=%d)\n", if_name(ifp
),
4208 (err
== 0 ? "successfully restored" : "failed to restore"),
4209 ifp
->if_updatemcasts
, err
);
4211 /* just return success */
4215 /* If ifp is set, we will increment the generation for the interface */
4217 dlil_post_complete_msg(struct ifnet
*ifp
, struct kev_msg
*event
)
4220 ifnet_increment_generation(ifp
);
4224 necp_update_all_clients();
4227 return kev_post_msg(event
);
4230 __private_extern__
void
4231 dlil_post_sifflags_msg(struct ifnet
* ifp
)
4233 struct kev_msg ev_msg
;
4234 struct net_event_data ev_data
;
4236 bzero(&ev_data
, sizeof(ev_data
));
4237 bzero(&ev_msg
, sizeof(ev_msg
));
4238 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
4239 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
4240 ev_msg
.kev_subclass
= KEV_DL_SUBCLASS
;
4241 ev_msg
.event_code
= KEV_DL_SIFFLAGS
;
4242 strlcpy(&ev_data
.if_name
[0], ifp
->if_name
, IFNAMSIZ
);
4243 ev_data
.if_family
= ifp
->if_family
;
4244 ev_data
.if_unit
= (u_int32_t
) ifp
->if_unit
;
4245 ev_msg
.dv
[0].data_length
= sizeof(struct net_event_data
);
4246 ev_msg
.dv
[0].data_ptr
= &ev_data
;
4247 ev_msg
.dv
[1].data_length
= 0;
4248 dlil_post_complete_msg(ifp
, &ev_msg
);
4251 #define TMP_IF_PROTO_ARR_SIZE 10
4253 dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*event
, bool update_generation
)
4255 struct ifnet_filter
*filter
= NULL
;
4256 struct if_proto
*proto
= NULL
;
4257 int if_proto_count
= 0;
4258 struct if_proto
**tmp_ifproto_arr
= NULL
;
4259 struct if_proto
*tmp_ifproto_stack_arr
[TMP_IF_PROTO_ARR_SIZE
] = {NULL
};
4260 int tmp_ifproto_arr_idx
= 0;
4261 bool tmp_malloc
= false;
4264 * Pass the event to the interface filters
4266 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4267 /* prevent filter list from changing in case we drop the lock */
4268 if_flt_monitor_busy(ifp
);
4269 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
4270 if (filter
->filt_event
!= NULL
) {
4271 lck_mtx_unlock(&ifp
->if_flt_lock
);
4273 filter
->filt_event(filter
->filt_cookie
, ifp
,
4274 filter
->filt_protocol
, event
);
4276 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4279 /* we're done with the filter list */
4280 if_flt_monitor_unbusy(ifp
);
4281 lck_mtx_unlock(&ifp
->if_flt_lock
);
4283 /* Get an io ref count if the interface is attached */
4284 if (!ifnet_is_attached(ifp
, 1)) {
4289 * An embedded tmp_list_entry in if_proto may still get
4290 * over-written by another thread after giving up ifnet lock,
4291 * therefore we are avoiding embedded pointers here.
4293 ifnet_lock_shared(ifp
);
4294 if_proto_count
= dlil_ifp_protolist(ifp
, NULL
, 0);
4295 if (if_proto_count
) {
4297 VERIFY(ifp
->if_proto_hash
!= NULL
);
4298 if (if_proto_count
<= TMP_IF_PROTO_ARR_SIZE
) {
4299 tmp_ifproto_arr
= tmp_ifproto_stack_arr
;
4301 MALLOC(tmp_ifproto_arr
, struct if_proto
**,
4302 sizeof(*tmp_ifproto_arr
) * if_proto_count
,
4304 if (tmp_ifproto_arr
== NULL
) {
4305 ifnet_lock_done(ifp
);
4311 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
4312 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
],
4314 if_proto_ref(proto
);
4315 tmp_ifproto_arr
[tmp_ifproto_arr_idx
] = proto
;
4316 tmp_ifproto_arr_idx
++;
4319 VERIFY(if_proto_count
== tmp_ifproto_arr_idx
);
4321 ifnet_lock_done(ifp
);
4323 for (tmp_ifproto_arr_idx
= 0; tmp_ifproto_arr_idx
< if_proto_count
;
4324 tmp_ifproto_arr_idx
++) {
4325 proto
= tmp_ifproto_arr
[tmp_ifproto_arr_idx
];
4326 VERIFY(proto
!= NULL
);
4327 proto_media_event eventp
=
4328 (proto
->proto_kpi
== kProtoKPI_v1
?
4329 proto
->kpi
.v1
.event
:
4330 proto
->kpi
.v2
.event
);
4332 if (eventp
!= NULL
) {
4333 eventp(ifp
, proto
->protocol_family
,
4336 if_proto_free(proto
);
4341 FREE(tmp_ifproto_arr
, M_TEMP
);
4344 /* Pass the event to the interface */
4345 if (ifp
->if_event
!= NULL
) {
4346 ifp
->if_event(ifp
, event
);
4349 /* Release the io ref count */
4350 ifnet_decr_iorefcnt(ifp
);
4352 return dlil_post_complete_msg(update_generation
? ifp
: NULL
, event
);
4356 ifnet_event(ifnet_t ifp
, struct kern_event_msg
*event
)
4358 struct kev_msg kev_msg
;
4361 if (ifp
== NULL
|| event
== NULL
) {
4365 bzero(&kev_msg
, sizeof(kev_msg
));
4366 kev_msg
.vendor_code
= event
->vendor_code
;
4367 kev_msg
.kev_class
= event
->kev_class
;
4368 kev_msg
.kev_subclass
= event
->kev_subclass
;
4369 kev_msg
.event_code
= event
->event_code
;
4370 kev_msg
.dv
[0].data_ptr
= &event
->event_data
[0];
4371 kev_msg
.dv
[0].data_length
= event
->total_size
- KEV_MSG_HEADER_SIZE
;
4372 kev_msg
.dv
[1].data_length
= 0;
4374 result
= dlil_event_internal(ifp
, &kev_msg
, TRUE
);
4380 #include <netinet/ip6.h>
4381 #include <netinet/ip.h>
4383 dlil_get_socket_type(struct mbuf
**mp
, int family
, int raw
)
4387 struct ip6_hdr
*ip6
;
4388 int type
= SOCK_RAW
;
4393 m
= m_pullup(*mp
, sizeof(struct ip
));
4398 ip
= mtod(m
, struct ip
*);
4399 if (ip
->ip_p
== IPPROTO_TCP
) {
4401 } else if (ip
->ip_p
== IPPROTO_UDP
) {
4406 m
= m_pullup(*mp
, sizeof(struct ip6_hdr
));
4411 ip6
= mtod(m
, struct ip6_hdr
*);
4412 if (ip6
->ip6_nxt
== IPPROTO_TCP
) {
4414 } else if (ip6
->ip6_nxt
== IPPROTO_UDP
) {
4426 dlil_count_chain_len(mbuf_t m
, struct chain_len_stats
*cls
)
4439 atomic_add_64(&cls
->cls_one
, 1);
4442 atomic_add_64(&cls
->cls_two
, 1);
4445 atomic_add_64(&cls
->cls_three
, 1);
4448 atomic_add_64(&cls
->cls_four
, 1);
4452 atomic_add_64(&cls
->cls_five_or_more
, 1);
4460 * Caller should have a lock on the protocol domain if the protocol
4461 * doesn't support finer grained locking. In most cases, the lock
4462 * will be held from the socket layer and won't be released until
4463 * we return back to the socket layer.
4465 * This does mean that we must take a protocol lock before we take
4466 * an interface lock if we're going to take both. This makes sense
4467 * because a protocol is likely to interact with an ifp while it
4468 * is under the protocol lock.
4470 * An advisory code will be returned if adv is not null. This
4471 * can be used to provide feedback about interface queues to the
4475 dlil_output(ifnet_t ifp
, protocol_family_t proto_family
, mbuf_t packetlist
,
4476 void *route
, const struct sockaddr
*dest
, int raw
, struct flowadv
*adv
)
4478 char *frame_type
= NULL
;
4479 char *dst_linkaddr
= NULL
;
4481 char frame_type_buffer
[MAX_FRAME_TYPE_SIZE
* 4];
4482 char dst_linkaddr_buffer
[MAX_LINKADDR
* 4];
4483 struct if_proto
*proto
= NULL
;
4485 mbuf_t send_head
= NULL
;
4486 mbuf_t
*send_tail
= &send_head
;
4488 u_int32_t pre
= 0, post
= 0;
4489 u_int32_t fpkts
= 0, fbytes
= 0;
4491 struct timespec now
;
4493 boolean_t did_clat46
= FALSE
;
4494 protocol_family_t old_proto_family
= proto_family
;
4495 struct rtentry
*rt
= NULL
;
4497 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
4500 * Get an io refcnt if the interface is attached to prevent ifnet_detach
4501 * from happening while this operation is in progress
4503 if (!ifnet_is_attached(ifp
, 1)) {
4509 VERIFY(ifp
->if_output_dlil
!= NULL
);
4511 /* update the driver's multicast filter, if needed */
4512 if (ifp
->if_updatemcasts
> 0 && if_mcasts_update(ifp
) == 0) {
4513 ifp
->if_updatemcasts
= 0;
4516 frame_type
= frame_type_buffer
;
4517 dst_linkaddr
= dst_linkaddr_buffer
;
4520 ifnet_lock_shared(ifp
);
4521 /* callee holds a proto refcnt upon success */
4522 proto
= find_attached_proto(ifp
, proto_family
);
4523 if (proto
== NULL
) {
4524 ifnet_lock_done(ifp
);
4528 ifnet_lock_done(ifp
);
4532 if (packetlist
== NULL
) {
4537 packetlist
= packetlist
->m_nextpkt
;
4538 m
->m_nextpkt
= NULL
;
4541 * Perform address family translation for the first
4542 * packet outside the loop in order to perform address
4543 * lookup for the translated proto family.
4545 if (proto_family
== PF_INET
&& IS_INTF_CLAT46(ifp
) &&
4546 (ifp
->if_type
== IFT_CELLULAR
||
4547 dlil_is_clat_needed(proto_family
, m
))) {
4548 retval
= dlil_clat46(ifp
, &proto_family
, &m
);
4550 * Go to the next packet if translation fails
4555 ip6stat
.ip6s_clat464_out_drop
++;
4556 /* Make sure that the proto family is PF_INET */
4557 ASSERT(proto_family
== PF_INET
);
4561 * Free the old one and make it point to the IPv6 proto structure.
4563 * Change proto for the first time we have successfully
4564 * performed address family translation.
4566 if (!did_clat46
&& proto_family
== PF_INET6
) {
4567 struct sockaddr_in6 dest6
;
4570 if (proto
!= NULL
) {
4571 if_proto_free(proto
);
4573 ifnet_lock_shared(ifp
);
4574 /* callee holds a proto refcnt upon success */
4575 proto
= find_attached_proto(ifp
, proto_family
);
4576 if (proto
== NULL
) {
4577 ifnet_lock_done(ifp
);
4583 ifnet_lock_done(ifp
);
4584 if (ifp
->if_type
== IFT_ETHER
) {
4585 /* Update the dest to translated v6 address */
4586 dest6
.sin6_len
= sizeof(struct sockaddr_in6
);
4587 dest6
.sin6_family
= AF_INET6
;
4588 dest6
.sin6_addr
= (mtod(m
, struct ip6_hdr
*))->ip6_dst
;
4589 dest
= (const struct sockaddr
*)&dest6
;
4592 * Lookup route to the translated destination
4593 * Free this route ref during cleanup
4595 rt
= rtalloc1_scoped((struct sockaddr
*)&dest6
,
4596 0, 0, ifp
->if_index
);
4604 * This path gets packet chain going to the same destination.
4605 * The pre output routine is used to either trigger resolution of
4606 * the next hop or retreive the next hop's link layer addressing.
4607 * For ex: ether_inet(6)_pre_output routine.
4609 * If the routine returns EJUSTRETURN, it implies that packet has
4610 * been queued, and therefore we have to call preout_again for the
4611 * following packet in the chain.
4613 * For errors other than EJUSTRETURN, the current packet is freed
4614 * and the rest of the chain (pointed by packetlist is freed as
4617 * Else if there is no error the retrieved information is used for
4618 * all the packets in the chain.
4621 proto_media_preout preoutp
= (proto
->proto_kpi
== kProtoKPI_v1
?
4622 proto
->kpi
.v1
.pre_output
: proto
->kpi
.v2
.pre_output
);
4624 if (preoutp
!= NULL
) {
4625 retval
= preoutp(ifp
, proto_family
, &m
, dest
, route
,
4626 frame_type
, dst_linkaddr
);
4629 if (retval
== EJUSTRETURN
) {
4640 retval
= mac_ifnet_check_transmit(ifp
, m
, proto_family
,
4641 dlil_get_socket_type(&m
, proto_family
, raw
));
4650 * Perform address family translation if needed.
4651 * For now we only support stateless 4 to 6 translation
4654 * The routine below translates IP header, updates protocol
4655 * checksum and also translates ICMP.
4657 * We skip the first packet as it is already translated and
4658 * the proto family is set to PF_INET6.
4660 if (proto_family
== PF_INET
&& IS_INTF_CLAT46(ifp
) &&
4661 (ifp
->if_type
== IFT_CELLULAR
||
4662 dlil_is_clat_needed(proto_family
, m
))) {
4663 retval
= dlil_clat46(ifp
, &proto_family
, &m
);
4664 /* Goto the next packet if the translation fails */
4668 ip6stat
.ip6s_clat464_out_drop
++;
4674 if (!raw
&& proto_family
== PF_INET
) {
4675 struct ip
*ip
= mtod(m
, struct ip
*);
4676 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
4677 struct ip
*, ip
, struct ifnet
*, ifp
,
4678 struct ip
*, ip
, struct ip6_hdr
*, NULL
);
4679 } else if (!raw
&& proto_family
== PF_INET6
) {
4680 struct ip6_hdr
*ip6
= mtod(m
, struct ip6_hdr
*);
4681 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
4682 struct ip6_hdr
*, ip6
, struct ifnet
*, ifp
,
4683 struct ip
*, NULL
, struct ip6_hdr
*, ip6
);
4685 #endif /* CONFIG_DTRACE */
4687 if (raw
== 0 && ifp
->if_framer
!= NULL
) {
4691 * If this is a broadcast packet that needs to be
4692 * looped back into the system, set the inbound ifp
4693 * to that of the outbound ifp. This will allow
4694 * us to determine that it is a legitimate packet
4695 * for the system. Only set the ifp if it's not
4696 * already set, just to be safe.
4698 if ((m
->m_flags
& (M_BCAST
| M_LOOP
)) &&
4699 m
->m_pkthdr
.rcvif
== NULL
) {
4700 m
->m_pkthdr
.rcvif
= ifp
;
4704 retval
= ifp
->if_framer(ifp
, &m
, dest
, dst_linkaddr
,
4705 frame_type
, &pre
, &post
);
4707 if (retval
!= EJUSTRETURN
) {
4714 * For partial checksum offload, adjust the start
4715 * and stuff offsets based on the prepended header.
4717 if ((m
->m_pkthdr
.csum_flags
&
4718 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
4719 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
4720 m
->m_pkthdr
.csum_tx_stuff
+= pre
;
4721 m
->m_pkthdr
.csum_tx_start
+= pre
;
4724 if (hwcksum_dbg
!= 0 && !(ifp
->if_flags
& IFF_LOOPBACK
)) {
4725 dlil_output_cksum_dbg(ifp
, m
, pre
,
4730 * Clear the ifp if it was set above, and to be
4731 * safe, only if it is still the same as the
4732 * outbound ifp we have in context. If it was
4733 * looped back, then a copy of it was sent to the
4734 * loopback interface with the rcvif set, and we
4735 * are clearing the one that will go down to the
4738 if (rcvif_set
&& m
->m_pkthdr
.rcvif
== ifp
) {
4739 m
->m_pkthdr
.rcvif
= NULL
;
4744 * Let interface filters (if any) do their thing ...
4746 /* Do not pass VLAN tagged packets to filters PR-3586856 */
4747 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) == 0) {
4748 retval
= dlil_interface_filters_output(ifp
,
4751 if (retval
!= EJUSTRETURN
) {
4758 * Strip away M_PROTO1 bit prior to sending packet
4759 * to the driver as this field may be used by the driver
4761 m
->m_flags
&= ~M_PROTO1
;
4764 * If the underlying interface is not capable of handling a
4765 * packet whose data portion spans across physically disjoint
4766 * pages, we need to "normalize" the packet so that we pass
4767 * down a chain of mbufs where each mbuf points to a span that
4768 * resides in the system page boundary. If the packet does
4769 * not cross page(s), the following is a no-op.
4771 if (!(ifp
->if_hwassist
& IFNET_MULTIPAGES
)) {
4772 if ((m
= m_normalize(m
)) == NULL
) {
4778 * If this is a TSO packet, make sure the interface still
4779 * advertise TSO capability.
4781 if (TSO_IPV4_NOTOK(ifp
, m
) || TSO_IPV6_NOTOK(ifp
, m
)) {
4787 ifp_inc_traffic_class_out(ifp
, m
);
4788 pktap_output(ifp
, proto_family
, m
, pre
, post
);
4791 * Count the number of elements in the mbuf chain
4793 if (tx_chain_len_count
) {
4794 dlil_count_chain_len(m
, &tx_chain_len_stats
);
4798 * Record timestamp; ifnet_enqueue() will use this info
4799 * rather than redoing the work. An optimization could
4800 * involve doing this just once at the top, if there are
4801 * no interface filters attached, but that's probably
4805 net_timernsec(&now
, &now_nsec
);
4806 (void) mbuf_set_timestamp(m
, now_nsec
, TRUE
);
4809 * Discard partial sum information if this packet originated
4810 * from another interface; the packet would already have the
4811 * final checksum and we shouldn't recompute it.
4813 if ((m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) &&
4814 (m
->m_pkthdr
.csum_flags
& (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
4815 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
4816 m
->m_pkthdr
.csum_flags
&= ~CSUM_TX_FLAGS
;
4817 m
->m_pkthdr
.csum_data
= 0;
4821 * Finally, call the driver.
4823 if (ifp
->if_eflags
& (IFEF_SENDLIST
| IFEF_ENQUEUE_MULTI
)) {
4824 if (m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) {
4825 flen
+= (m_pktlen(m
) - (pre
+ post
));
4826 m
->m_pkthdr
.pkt_flags
&= ~PKTF_FORWARDED
;
4829 send_tail
= &m
->m_nextpkt
;
4831 if (m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) {
4832 flen
= (m_pktlen(m
) - (pre
+ post
));
4833 m
->m_pkthdr
.pkt_flags
&= ~PKTF_FORWARDED
;
4837 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
,
4839 retval
= (*ifp
->if_output_dlil
)(ifp
, m
);
4840 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
4841 if (adv
!= NULL
&& adv
->code
== FADV_SUCCESS
) {
4842 adv
->code
= (retval
== EQFULL
?
4843 FADV_FLOW_CONTROLLED
:
4848 if (retval
== 0 && flen
> 0) {
4852 if (retval
!= 0 && dlil_verbose
) {
4853 printf("%s: output error on %s retval = %d\n",
4854 __func__
, if_name(ifp
),
4857 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
,
4860 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4865 packetlist
= packetlist
->m_nextpkt
;
4866 m
->m_nextpkt
= NULL
;
4868 /* Reset the proto family to old proto family for CLAT */
4870 proto_family
= old_proto_family
;
4872 } while (m
!= NULL
);
4874 if (send_head
!= NULL
) {
4875 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
,
4877 if (ifp
->if_eflags
& IFEF_SENDLIST
) {
4878 retval
= (*ifp
->if_output_dlil
)(ifp
, send_head
);
4879 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
4881 adv
->code
= (retval
== EQFULL
?
4882 FADV_FLOW_CONTROLLED
:
4887 if (retval
== 0 && flen
> 0) {
4891 if (retval
!= 0 && dlil_verbose
) {
4892 printf("%s: output error on %s retval = %d\n",
4893 __func__
, if_name(ifp
), retval
);
4896 struct mbuf
*send_m
;
4898 VERIFY(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
);
4899 while (send_head
!= NULL
) {
4901 send_head
= send_m
->m_nextpkt
;
4902 send_m
->m_nextpkt
= NULL
;
4903 retval
= (*ifp
->if_output_dlil
)(ifp
, send_m
);
4904 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
4906 adv
->code
= (retval
== EQFULL
?
4907 FADV_FLOW_CONTROLLED
:
4918 if (retval
!= 0 && dlil_verbose
) {
4919 printf("%s: output error on %s "
4921 __func__
, if_name(ifp
), retval
);
4929 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4932 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4936 ifp
->if_fbytes
+= fbytes
;
4939 ifp
->if_fpackets
+= fpkts
;
4941 if (proto
!= NULL
) {
4942 if_proto_free(proto
);
4944 if (packetlist
) { /* if any packets are left, clean up */
4945 mbuf_freem_list(packetlist
);
4947 if (retval
== EJUSTRETURN
) {
4950 if (iorefcnt
== 1) {
4951 ifnet_decr_iorefcnt(ifp
);
4962 * This routine checks if the destination address is not a loopback, link-local,
4963 * multicast or broadcast address.
4966 dlil_is_clat_needed(protocol_family_t proto_family
, mbuf_t m
)
4969 switch (proto_family
) {
4971 struct ip
*iph
= mtod(m
, struct ip
*);
4972 if (CLAT46_NEEDED(ntohl(iph
->ip_dst
.s_addr
))) {
4978 struct ip6_hdr
*ip6h
= mtod(m
, struct ip6_hdr
*);
4979 if ((size_t)m_pktlen(m
) >= sizeof(struct ip6_hdr
) &&
4980 CLAT64_NEEDED(&ip6h
->ip6_dst
)) {
4990 * @brief This routine translates IPv4 packet to IPv6 packet,
4991 * updates protocol checksum and also translates ICMP for code
4992 * along with inner header translation.
4994 * @param ifp Pointer to the interface
4995 * @param proto_family pointer to protocol family. It is updated if function
4996 * performs the translation successfully.
4997 * @param m Pointer to the pointer pointing to the packet. Needed because this
4998 * routine can end up changing the mbuf to a different one.
5000 * @return 0 on success or else a negative value.
5003 dlil_clat46(ifnet_t ifp
, protocol_family_t
*proto_family
, mbuf_t
*m
)
5005 VERIFY(*proto_family
== PF_INET
);
5006 VERIFY(IS_INTF_CLAT46(ifp
));
5008 pbuf_t pbuf_store
, *pbuf
= NULL
;
5009 struct ip
*iph
= NULL
;
5010 struct in_addr osrc
, odst
;
5012 struct in6_ifaddr
*ia6_clat_src
= NULL
;
5013 struct in6_addr
*src
= NULL
;
5014 struct in6_addr dst
;
5017 uint64_t tot_len
= 0;
5018 uint16_t ip_id_val
= 0;
5019 uint16_t ip_frag_off
= 0;
5021 boolean_t is_frag
= FALSE
;
5022 boolean_t is_first_frag
= TRUE
;
5023 boolean_t is_last_frag
= TRUE
;
5025 pbuf_init_mbuf(&pbuf_store
, *m
, ifp
);
5027 iph
= pbuf
->pb_data
;
5032 off
= iph
->ip_hl
<< 2;
5033 ip_id_val
= iph
->ip_id
;
5034 ip_frag_off
= ntohs(iph
->ip_off
) & IP_OFFMASK
;
5036 tot_len
= ntohs(iph
->ip_len
);
5039 * For packets that are not first frags
5040 * we only need to adjust CSUM.
5041 * For 4 to 6, Fragmentation header gets appended
5042 * after proto translation.
5044 if (ntohs(iph
->ip_off
) & ~(IP_DF
| IP_RF
)) {
5047 /* If the offset is not zero, it is not first frag */
5048 if (ip_frag_off
!= 0) {
5049 is_first_frag
= FALSE
;
5052 /* If IP_MF is set, then it is not last frag */
5053 if (ntohs(iph
->ip_off
) & IP_MF
) {
5054 is_last_frag
= FALSE
;
5059 * Retrive the local IPv6 CLAT46 address reserved for stateless
5062 ia6_clat_src
= in6ifa_ifpwithflag(ifp
, IN6_IFF_CLAT46
);
5063 if (ia6_clat_src
== NULL
) {
5064 ip6stat
.ip6s_clat464_out_nov6addr_drop
++;
5069 src
= &ia6_clat_src
->ia_addr
.sin6_addr
;
5072 * Translate IPv4 destination to IPv6 destination by using the
5073 * prefixes learned through prior PLAT discovery.
5075 if ((error
= nat464_synthesize_ipv6(ifp
, &odst
, &dst
)) != 0) {
5076 ip6stat
.ip6s_clat464_out_v6synthfail_drop
++;
5080 /* Translate the IP header part first */
5081 error
= (nat464_translate_46(pbuf
, off
, iph
->ip_tos
, iph
->ip_p
,
5082 iph
->ip_ttl
, *src
, dst
, tot_len
) == NT_NAT64
) ? 0 : -1;
5084 iph
= NULL
; /* Invalidate iph as pbuf has been modified */
5087 ip6stat
.ip6s_clat464_out_46transfail_drop
++;
5092 * Translate protocol header, update checksum, checksum flags
5093 * and related fields.
5095 error
= (nat464_translate_proto(pbuf
, (struct nat464_addr
*)&osrc
, (struct nat464_addr
*)&odst
,
5096 proto
, PF_INET
, PF_INET6
, NT_OUT
, !is_first_frag
) == NT_NAT64
) ? 0 : -1;
5099 ip6stat
.ip6s_clat464_out_46proto_transfail_drop
++;
5103 /* Now insert the IPv6 fragment header */
5105 error
= nat464_insert_frag46(pbuf
, ip_id_val
, ip_frag_off
, is_last_frag
);
5108 ip6stat
.ip6s_clat464_out_46frag_transfail_drop
++;
5114 if (ia6_clat_src
!= NULL
) {
5115 IFA_REMREF(&ia6_clat_src
->ia_ifa
);
5118 if (pbuf_is_valid(pbuf
)) {
5120 pbuf
->pb_mbuf
= NULL
;
5124 ip6stat
.ip6s_clat464_out_invalpbuf_drop
++;
5128 *proto_family
= PF_INET6
;
5129 ip6stat
.ip6s_clat464_out_success
++;
5136 * @brief This routine translates incoming IPv6 to IPv4 packet,
5137 * updates protocol checksum and also translates ICMPv6 outer
5140 * @return 0 on success or else a negative value.
5143 dlil_clat64(ifnet_t ifp
, protocol_family_t
*proto_family
, mbuf_t
*m
)
5145 VERIFY(*proto_family
== PF_INET6
);
5146 VERIFY(IS_INTF_CLAT46(ifp
));
5148 struct ip6_hdr
*ip6h
= NULL
;
5149 struct in6_addr osrc
, odst
;
5151 struct in6_ifaddr
*ia6_clat_dst
= NULL
;
5152 struct in_ifaddr
*ia4_clat_dst
= NULL
;
5153 struct in_addr
*dst
= NULL
;
5157 u_int64_t tot_len
= 0;
5159 boolean_t is_first_frag
= TRUE
;
5161 /* Incoming mbuf does not contain valid IP6 header */
5162 if ((size_t)(*m
)->m_pkthdr
.len
< sizeof(struct ip6_hdr
) ||
5163 ((size_t)(*m
)->m_len
< sizeof(struct ip6_hdr
) &&
5164 (*m
= m_pullup(*m
, sizeof(struct ip6_hdr
))) == NULL
)) {
5165 ip6stat
.ip6s_clat464_in_tooshort_drop
++;
5169 ip6h
= mtod(*m
, struct ip6_hdr
*);
5170 /* Validate that mbuf contains IP payload equal to ip6_plen */
5171 if ((size_t)(*m
)->m_pkthdr
.len
< ntohs(ip6h
->ip6_plen
) + sizeof(struct ip6_hdr
)) {
5172 ip6stat
.ip6s_clat464_in_tooshort_drop
++;
5176 osrc
= ip6h
->ip6_src
;
5177 odst
= ip6h
->ip6_dst
;
5180 * Retrieve the local CLAT46 reserved IPv6 address.
5181 * Let the packet pass if we don't find one, as the flag
5182 * may get set before IPv6 configuration has taken place.
5184 ia6_clat_dst
= in6ifa_ifpwithflag(ifp
, IN6_IFF_CLAT46
);
5185 if (ia6_clat_dst
== NULL
) {
5190 * Check if the original dest in the packet is same as the reserved
5191 * CLAT46 IPv6 address
5193 if (IN6_ARE_ADDR_EQUAL(&odst
, &ia6_clat_dst
->ia_addr
.sin6_addr
)) {
5194 pbuf_t pbuf_store
, *pbuf
= NULL
;
5195 pbuf_init_mbuf(&pbuf_store
, *m
, ifp
);
5199 * Retrive the local CLAT46 IPv4 address reserved for stateless
5202 ia4_clat_dst
= inifa_ifpclatv4(ifp
);
5203 if (ia4_clat_dst
== NULL
) {
5204 IFA_REMREF(&ia6_clat_dst
->ia_ifa
);
5205 ip6stat
.ip6s_clat464_in_nov4addr_drop
++;
5209 IFA_REMREF(&ia6_clat_dst
->ia_ifa
);
5211 /* Translate IPv6 src to IPv4 src by removing the NAT64 prefix */
5212 dst
= &ia4_clat_dst
->ia_addr
.sin_addr
;
5213 if ((error
= nat464_synthesize_ipv4(ifp
, &osrc
, &src
)) != 0) {
5214 ip6stat
.ip6s_clat464_in_v4synthfail_drop
++;
5219 ip6h
= pbuf
->pb_data
;
5220 off
= sizeof(struct ip6_hdr
);
5221 proto
= ip6h
->ip6_nxt
;
5222 tos
= (ntohl(ip6h
->ip6_flow
) >> 20) & 0xff;
5223 tot_len
= ntohs(ip6h
->ip6_plen
) + sizeof(struct ip6_hdr
);
5226 * Translate the IP header and update the fragmentation
5229 error
= (nat464_translate_64(pbuf
, off
, tos
, &proto
,
5230 ip6h
->ip6_hlim
, src
, *dst
, tot_len
, &is_first_frag
) == NT_NAT64
) ?
5233 ip6h
= NULL
; /* Invalidate ip6h as pbuf has been changed */
5236 ip6stat
.ip6s_clat464_in_64transfail_drop
++;
5241 * Translate protocol header, update checksum, checksum flags
5242 * and related fields.
5244 error
= (nat464_translate_proto(pbuf
, (struct nat464_addr
*)&osrc
,
5245 (struct nat464_addr
*)&odst
, proto
, PF_INET6
, PF_INET
,
5246 NT_IN
, !is_first_frag
) == NT_NAT64
) ? 0 : -1;
5249 ip6stat
.ip6s_clat464_in_64proto_transfail_drop
++;
5254 if (ia4_clat_dst
!= NULL
) {
5255 IFA_REMREF(&ia4_clat_dst
->ia_ifa
);
5258 if (pbuf_is_valid(pbuf
)) {
5260 pbuf
->pb_mbuf
= NULL
;
5264 ip6stat
.ip6s_clat464_in_invalpbuf_drop
++;
5268 *proto_family
= PF_INET
;
5269 ip6stat
.ip6s_clat464_in_success
++;
5271 } /* CLAT traffic */
5278 ifnet_ioctl(ifnet_t ifp
, protocol_family_t proto_fam
, u_long ioctl_code
,
5281 struct ifnet_filter
*filter
;
5282 int retval
= EOPNOTSUPP
;
5285 if (ifp
== NULL
|| ioctl_code
== 0) {
5289 /* Get an io ref count if the interface is attached */
5290 if (!ifnet_is_attached(ifp
, 1)) {
5295 * Run the interface filters first.
5296 * We want to run all filters before calling the protocol,
5297 * interface family, or interface.
5299 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
5300 /* prevent filter list from changing in case we drop the lock */
5301 if_flt_monitor_busy(ifp
);
5302 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
5303 if (filter
->filt_ioctl
!= NULL
&& (filter
->filt_protocol
== 0 ||
5304 filter
->filt_protocol
== proto_fam
)) {
5305 lck_mtx_unlock(&ifp
->if_flt_lock
);
5307 result
= filter
->filt_ioctl(filter
->filt_cookie
, ifp
,
5308 proto_fam
, ioctl_code
, ioctl_arg
);
5310 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
5312 /* Only update retval if no one has handled the ioctl */
5313 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
5314 if (result
== ENOTSUP
) {
5315 result
= EOPNOTSUPP
;
5318 if (retval
!= 0 && retval
!= EOPNOTSUPP
) {
5319 /* we're done with the filter list */
5320 if_flt_monitor_unbusy(ifp
);
5321 lck_mtx_unlock(&ifp
->if_flt_lock
);
5327 /* we're done with the filter list */
5328 if_flt_monitor_unbusy(ifp
);
5329 lck_mtx_unlock(&ifp
->if_flt_lock
);
5331 /* Allow the protocol to handle the ioctl */
5332 if (proto_fam
!= 0) {
5333 struct if_proto
*proto
;
5335 /* callee holds a proto refcnt upon success */
5336 ifnet_lock_shared(ifp
);
5337 proto
= find_attached_proto(ifp
, proto_fam
);
5338 ifnet_lock_done(ifp
);
5339 if (proto
!= NULL
) {
5340 proto_media_ioctl ioctlp
=
5341 (proto
->proto_kpi
== kProtoKPI_v1
?
5342 proto
->kpi
.v1
.ioctl
: proto
->kpi
.v2
.ioctl
);
5343 result
= EOPNOTSUPP
;
5344 if (ioctlp
!= NULL
) {
5345 result
= ioctlp(ifp
, proto_fam
, ioctl_code
,
5348 if_proto_free(proto
);
5350 /* Only update retval if no one has handled the ioctl */
5351 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
5352 if (result
== ENOTSUP
) {
5353 result
= EOPNOTSUPP
;
5356 if (retval
&& retval
!= EOPNOTSUPP
) {
5363 /* retval is either 0 or EOPNOTSUPP */
5366 * Let the interface handle this ioctl.
5367 * If it returns EOPNOTSUPP, ignore that, we may have
5368 * already handled this in the protocol or family.
5370 if (ifp
->if_ioctl
) {
5371 result
= (*ifp
->if_ioctl
)(ifp
, ioctl_code
, ioctl_arg
);
5374 /* Only update retval if no one has handled the ioctl */
5375 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
5376 if (result
== ENOTSUP
) {
5377 result
= EOPNOTSUPP
;
5380 if (retval
&& retval
!= EOPNOTSUPP
) {
5386 if (retval
== EJUSTRETURN
) {
5390 ifnet_decr_iorefcnt(ifp
);
5395 __private_extern__ errno_t
5396 dlil_set_bpf_tap(ifnet_t ifp
, bpf_tap_mode mode
, bpf_packet_func callback
)
5401 if (ifp
->if_set_bpf_tap
) {
5402 /* Get an io reference on the interface if it is attached */
5403 if (!ifnet_is_attached(ifp
, 1)) {
5406 error
= ifp
->if_set_bpf_tap(ifp
, mode
, callback
);
5407 ifnet_decr_iorefcnt(ifp
);
5413 dlil_resolve_multi(struct ifnet
*ifp
, const struct sockaddr
*proto_addr
,
5414 struct sockaddr
*ll_addr
, size_t ll_len
)
5416 errno_t result
= EOPNOTSUPP
;
5417 struct if_proto
*proto
;
5418 const struct sockaddr
*verify
;
5419 proto_media_resolve_multi resolvep
;
5421 if (!ifnet_is_attached(ifp
, 1)) {
5425 bzero(ll_addr
, ll_len
);
5427 /* Call the protocol first; callee holds a proto refcnt upon success */
5428 ifnet_lock_shared(ifp
);
5429 proto
= find_attached_proto(ifp
, proto_addr
->sa_family
);
5430 ifnet_lock_done(ifp
);
5431 if (proto
!= NULL
) {
5432 resolvep
= (proto
->proto_kpi
== kProtoKPI_v1
?
5433 proto
->kpi
.v1
.resolve_multi
: proto
->kpi
.v2
.resolve_multi
);
5434 if (resolvep
!= NULL
) {
5435 result
= resolvep(ifp
, proto_addr
,
5436 (struct sockaddr_dl
*)(void *)ll_addr
, ll_len
);
5438 if_proto_free(proto
);
5441 /* Let the interface verify the multicast address */
5442 if ((result
== EOPNOTSUPP
|| result
== 0) && ifp
->if_check_multi
) {
5446 verify
= proto_addr
;
5448 result
= ifp
->if_check_multi(ifp
, verify
);
5451 ifnet_decr_iorefcnt(ifp
);
5455 __private_extern__ errno_t
5456 dlil_send_arp_internal(ifnet_t ifp
, u_short arpop
,
5457 const struct sockaddr_dl
*sender_hw
, const struct sockaddr
*sender_proto
,
5458 const struct sockaddr_dl
*target_hw
, const struct sockaddr
*target_proto
)
5460 struct if_proto
*proto
;
5463 /* callee holds a proto refcnt upon success */
5464 ifnet_lock_shared(ifp
);
5465 proto
= find_attached_proto(ifp
, target_proto
->sa_family
);
5466 ifnet_lock_done(ifp
);
5467 if (proto
== NULL
) {
5470 proto_media_send_arp arpp
;
5471 arpp
= (proto
->proto_kpi
== kProtoKPI_v1
?
5472 proto
->kpi
.v1
.send_arp
: proto
->kpi
.v2
.send_arp
);
5478 arpstat
.txrequests
++;
5479 if (target_hw
!= NULL
) {
5480 arpstat
.txurequests
++;
5484 arpstat
.txreplies
++;
5487 result
= arpp(ifp
, arpop
, sender_hw
, sender_proto
,
5488 target_hw
, target_proto
);
5490 if_proto_free(proto
);
5496 struct net_thread_marks
{ };
5497 static const struct net_thread_marks net_thread_marks_base
= { };
5499 __private_extern__
const net_thread_marks_t net_thread_marks_none
=
5500 &net_thread_marks_base
;
5502 __private_extern__ net_thread_marks_t
5503 net_thread_marks_push(u_int32_t push
)
5505 static const char *const base
= (const void*)&net_thread_marks_base
;
5509 struct uthread
*uth
= get_bsdthread_info(current_thread());
5511 pop
= push
& ~uth
->uu_network_marks
;
5513 uth
->uu_network_marks
|= pop
;
5517 return (net_thread_marks_t
)&base
[pop
];
5520 __private_extern__ net_thread_marks_t
5521 net_thread_unmarks_push(u_int32_t unpush
)
5523 static const char *const base
= (const void*)&net_thread_marks_base
;
5524 u_int32_t unpop
= 0;
5527 struct uthread
*uth
= get_bsdthread_info(current_thread());
5529 unpop
= unpush
& uth
->uu_network_marks
;
5531 uth
->uu_network_marks
&= ~unpop
;
5535 return (net_thread_marks_t
)&base
[unpop
];
5538 __private_extern__
void
5539 net_thread_marks_pop(net_thread_marks_t popx
)
5541 static const char *const base
= (const void*)&net_thread_marks_base
;
5542 const ptrdiff_t pop
= (const char *)popx
- (const char *)base
;
5545 static const ptrdiff_t ones
= (ptrdiff_t)(u_int32_t
)~0U;
5546 struct uthread
*uth
= get_bsdthread_info(current_thread());
5548 VERIFY((pop
& ones
) == pop
);
5549 VERIFY((ptrdiff_t)(uth
->uu_network_marks
& pop
) == pop
);
5550 uth
->uu_network_marks
&= ~pop
;
5554 __private_extern__
void
5555 net_thread_unmarks_pop(net_thread_marks_t unpopx
)
5557 static const char *const base
= (const void*)&net_thread_marks_base
;
5558 ptrdiff_t unpop
= (const char *)unpopx
- (const char *)base
;
5561 static const ptrdiff_t ones
= (ptrdiff_t)(u_int32_t
)~0U;
5562 struct uthread
*uth
= get_bsdthread_info(current_thread());
5564 VERIFY((unpop
& ones
) == unpop
);
5565 VERIFY((ptrdiff_t)(uth
->uu_network_marks
& unpop
) == 0);
5566 uth
->uu_network_marks
|= unpop
;
5570 __private_extern__ u_int32_t
5571 net_thread_is_marked(u_int32_t check
)
5574 struct uthread
*uth
= get_bsdthread_info(current_thread());
5575 return uth
->uu_network_marks
& check
;
5581 __private_extern__ u_int32_t
5582 net_thread_is_unmarked(u_int32_t check
)
5585 struct uthread
*uth
= get_bsdthread_info(current_thread());
5586 return ~uth
->uu_network_marks
& check
;
5592 static __inline__
int
5593 _is_announcement(const struct sockaddr_in
* sender_sin
,
5594 const struct sockaddr_in
* target_sin
)
5596 if (sender_sin
== NULL
) {
5599 return sender_sin
->sin_addr
.s_addr
== target_sin
->sin_addr
.s_addr
;
5602 __private_extern__ errno_t
5603 dlil_send_arp(ifnet_t ifp
, u_short arpop
, const struct sockaddr_dl
*sender_hw
,
5604 const struct sockaddr
*sender_proto
, const struct sockaddr_dl
*target_hw
,
5605 const struct sockaddr
*target_proto0
, u_int32_t rtflags
)
5608 const struct sockaddr_in
* sender_sin
;
5609 const struct sockaddr_in
* target_sin
;
5610 struct sockaddr_inarp target_proto_sinarp
;
5611 struct sockaddr
*target_proto
= (void *)(uintptr_t)target_proto0
;
5613 if (target_proto
== NULL
|| (sender_proto
!= NULL
&&
5614 sender_proto
->sa_family
!= target_proto
->sa_family
)) {
5619 * If the target is a (default) router, provide that
5620 * information to the send_arp callback routine.
5622 if (rtflags
& RTF_ROUTER
) {
5623 bcopy(target_proto
, &target_proto_sinarp
,
5624 sizeof(struct sockaddr_in
));
5625 target_proto_sinarp
.sin_other
|= SIN_ROUTER
;
5626 target_proto
= (struct sockaddr
*)&target_proto_sinarp
;
5630 * If this is an ARP request and the target IP is IPv4LL,
5631 * send the request on all interfaces. The exception is
5632 * an announcement, which must only appear on the specific
5635 sender_sin
= (struct sockaddr_in
*)(void *)(uintptr_t)sender_proto
;
5636 target_sin
= (struct sockaddr_in
*)(void *)(uintptr_t)target_proto
;
5637 if (target_proto
->sa_family
== AF_INET
&&
5638 IN_LINKLOCAL(ntohl(target_sin
->sin_addr
.s_addr
)) &&
5639 ipv4_ll_arp_aware
!= 0 && arpop
== ARPOP_REQUEST
&&
5640 !_is_announcement(target_sin
, sender_sin
)) {
5647 if (ifnet_list_get(IFNET_FAMILY_ANY
, &ifp_list
, &count
) == 0) {
5648 for (ifp_on
= 0; ifp_on
< count
; ifp_on
++) {
5650 ifaddr_t source_hw
= NULL
;
5651 ifaddr_t source_ip
= NULL
;
5652 struct sockaddr_in source_ip_copy
;
5653 struct ifnet
*cur_ifp
= ifp_list
[ifp_on
];
5656 * Only arp on interfaces marked for IPv4LL
5657 * ARPing. This may mean that we don't ARP on
5658 * the interface the subnet route points to.
5660 if (!(cur_ifp
->if_eflags
& IFEF_ARPLL
)) {
5664 /* Find the source IP address */
5665 ifnet_lock_shared(cur_ifp
);
5666 source_hw
= cur_ifp
->if_lladdr
;
5667 TAILQ_FOREACH(source_ip
, &cur_ifp
->if_addrhead
,
5669 IFA_LOCK(source_ip
);
5670 if (source_ip
->ifa_addr
!= NULL
&&
5671 source_ip
->ifa_addr
->sa_family
==
5673 /* Copy the source IP address */
5675 *(struct sockaddr_in
*)
5676 (void *)source_ip
->ifa_addr
;
5677 IFA_UNLOCK(source_ip
);
5680 IFA_UNLOCK(source_ip
);
5683 /* No IP Source, don't arp */
5684 if (source_ip
== NULL
) {
5685 ifnet_lock_done(cur_ifp
);
5689 IFA_ADDREF(source_hw
);
5690 ifnet_lock_done(cur_ifp
);
5693 new_result
= dlil_send_arp_internal(cur_ifp
,
5694 arpop
, (struct sockaddr_dl
*)(void *)
5695 source_hw
->ifa_addr
,
5696 (struct sockaddr
*)&source_ip_copy
, NULL
,
5699 IFA_REMREF(source_hw
);
5700 if (result
== ENOTSUP
) {
5701 result
= new_result
;
5704 ifnet_list_free(ifp_list
);
5707 result
= dlil_send_arp_internal(ifp
, arpop
, sender_hw
,
5708 sender_proto
, target_hw
, target_proto
);
5715 * Caller must hold ifnet head lock.
5718 ifnet_lookup(struct ifnet
*ifp
)
5722 LCK_RW_ASSERT(&ifnet_head_lock
, LCK_RW_ASSERT_HELD
);
5723 TAILQ_FOREACH(_ifp
, &ifnet_head
, if_link
) {
5728 return _ifp
!= NULL
;
5732 * Caller has to pass a non-zero refio argument to get a
5733 * IO reference count. This will prevent ifnet_detach from
5734 * being called when there are outstanding io reference counts.
5737 ifnet_is_attached(struct ifnet
*ifp
, int refio
)
5741 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5742 if ((ret
= IF_FULLY_ATTACHED(ifp
))) {
5747 lck_mtx_unlock(&ifp
->if_ref_lock
);
5753 * Caller must ensure the interface is attached; the assumption is that
5754 * there is at least an outstanding IO reference count held already.
5755 * Most callers would call ifnet_is_attached() instead.
5758 ifnet_incr_iorefcnt(struct ifnet
*ifp
)
5760 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5761 VERIFY(IF_FULLY_ATTACHED(ifp
));
5762 VERIFY(ifp
->if_refio
> 0);
5764 lck_mtx_unlock(&ifp
->if_ref_lock
);
5768 ifnet_decr_iorefcnt(struct ifnet
*ifp
)
5770 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5771 VERIFY(ifp
->if_refio
> 0);
5772 VERIFY(ifp
->if_refflags
& (IFRF_ATTACHED
| IFRF_DETACHING
));
5776 * if there are no more outstanding io references, wakeup the
5777 * ifnet_detach thread if detaching flag is set.
5779 if (ifp
->if_refio
== 0 && (ifp
->if_refflags
& IFRF_DETACHING
)) {
5780 wakeup(&(ifp
->if_refio
));
5783 lck_mtx_unlock(&ifp
->if_ref_lock
);
5787 dlil_if_trace(struct dlil_ifnet
*dl_if
, int refhold
)
5789 struct dlil_ifnet_dbg
*dl_if_dbg
= (struct dlil_ifnet_dbg
*)dl_if
;
5794 if (!(dl_if
->dl_if_flags
& DLIF_DEBUG
)) {
5795 panic("%s: dl_if %p has no debug structure", __func__
, dl_if
);
5800 cnt
= &dl_if_dbg
->dldbg_if_refhold_cnt
;
5801 tr
= dl_if_dbg
->dldbg_if_refhold
;
5803 cnt
= &dl_if_dbg
->dldbg_if_refrele_cnt
;
5804 tr
= dl_if_dbg
->dldbg_if_refrele
;
5807 idx
= atomic_add_16_ov(cnt
, 1) % IF_REF_TRACE_HIST_SIZE
;
5808 ctrace_record(&tr
[idx
]);
5812 dlil_if_ref(struct ifnet
*ifp
)
5814 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
5816 if (dl_if
== NULL
) {
5820 lck_mtx_lock_spin(&dl_if
->dl_if_lock
);
5821 ++dl_if
->dl_if_refcnt
;
5822 if (dl_if
->dl_if_refcnt
== 0) {
5823 panic("%s: wraparound refcnt for ifp=%p", __func__
, ifp
);
5826 if (dl_if
->dl_if_trace
!= NULL
) {
5827 (*dl_if
->dl_if_trace
)(dl_if
, TRUE
);
5829 lck_mtx_unlock(&dl_if
->dl_if_lock
);
5835 dlil_if_free(struct ifnet
*ifp
)
5837 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
5838 bool need_release
= FALSE
;
5840 if (dl_if
== NULL
) {
5844 lck_mtx_lock_spin(&dl_if
->dl_if_lock
);
5845 switch (dl_if
->dl_if_refcnt
) {
5847 panic("%s: negative refcnt for ifp=%p", __func__
, ifp
);
5851 if ((ifp
->if_refflags
& IFRF_EMBRYONIC
) != 0) {
5852 need_release
= TRUE
;
5858 --dl_if
->dl_if_refcnt
;
5859 if (dl_if
->dl_if_trace
!= NULL
) {
5860 (*dl_if
->dl_if_trace
)(dl_if
, FALSE
);
5862 lck_mtx_unlock(&dl_if
->dl_if_lock
);
5864 dlil_if_release(ifp
);
5870 dlil_attach_protocol_internal(struct if_proto
*proto
,
5871 const struct ifnet_demux_desc
*demux_list
, u_int32_t demux_count
,
5872 uint32_t * proto_count
)
5874 struct kev_dl_proto_data ev_pr_data
;
5875 struct ifnet
*ifp
= proto
->ifp
;
5877 u_int32_t hash_value
= proto_hash_value(proto
->protocol_family
);
5878 struct if_proto
*prev_proto
;
5879 struct if_proto
*_proto
;
5881 /* callee holds a proto refcnt upon success */
5882 ifnet_lock_exclusive(ifp
);
5883 _proto
= find_attached_proto(ifp
, proto
->protocol_family
);
5884 if (_proto
!= NULL
) {
5885 ifnet_lock_done(ifp
);
5886 if_proto_free(_proto
);
5891 * Call family module add_proto routine so it can refine the
5892 * demux descriptors as it wishes.
5894 retval
= ifp
->if_add_proto(ifp
, proto
->protocol_family
, demux_list
,
5897 ifnet_lock_done(ifp
);
5902 * Insert the protocol in the hash
5904 prev_proto
= SLIST_FIRST(&ifp
->if_proto_hash
[hash_value
]);
5905 while (prev_proto
!= NULL
&& SLIST_NEXT(prev_proto
, next_hash
) != NULL
) {
5906 prev_proto
= SLIST_NEXT(prev_proto
, next_hash
);
5909 SLIST_INSERT_AFTER(prev_proto
, proto
, next_hash
);
5911 SLIST_INSERT_HEAD(&ifp
->if_proto_hash
[hash_value
],
5915 /* hold a proto refcnt for attach */
5916 if_proto_ref(proto
);
5919 * The reserved field carries the number of protocol still attached
5920 * (subject to change)
5922 ev_pr_data
.proto_family
= proto
->protocol_family
;
5923 ev_pr_data
.proto_remaining_count
= dlil_ifp_protolist(ifp
, NULL
, 0);
5925 ifnet_lock_done(ifp
);
5927 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_ATTACHED
,
5928 (struct net_event_data
*)&ev_pr_data
,
5929 sizeof(struct kev_dl_proto_data
));
5930 if (proto_count
!= NULL
) {
5931 *proto_count
= ev_pr_data
.proto_remaining_count
;
5937 ifnet_attach_protocol(ifnet_t ifp
, protocol_family_t protocol
,
5938 const struct ifnet_attach_proto_param
*proto_details
)
5941 struct if_proto
*ifproto
= NULL
;
5942 uint32_t proto_count
= 0;
5944 ifnet_head_lock_shared();
5945 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
) {
5949 /* Check that the interface is in the global list */
5950 if (!ifnet_lookup(ifp
)) {
5955 ifproto
= zalloc(dlif_proto_zone
);
5956 if (ifproto
== NULL
) {
5960 bzero(ifproto
, dlif_proto_size
);
5962 /* refcnt held above during lookup */
5964 ifproto
->protocol_family
= protocol
;
5965 ifproto
->proto_kpi
= kProtoKPI_v1
;
5966 ifproto
->kpi
.v1
.input
= proto_details
->input
;
5967 ifproto
->kpi
.v1
.pre_output
= proto_details
->pre_output
;
5968 ifproto
->kpi
.v1
.event
= proto_details
->event
;
5969 ifproto
->kpi
.v1
.ioctl
= proto_details
->ioctl
;
5970 ifproto
->kpi
.v1
.detached
= proto_details
->detached
;
5971 ifproto
->kpi
.v1
.resolve_multi
= proto_details
->resolve
;
5972 ifproto
->kpi
.v1
.send_arp
= proto_details
->send_arp
;
5974 retval
= dlil_attach_protocol_internal(ifproto
,
5975 proto_details
->demux_list
, proto_details
->demux_count
,
5979 if (retval
!= 0 && retval
!= EEXIST
&& ifp
!= NULL
) {
5980 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
5981 if_name(ifp
), protocol
, retval
);
5984 printf("%s: attached v1 protocol %d (count = %d)\n",
5986 protocol
, proto_count
);
5992 * A protocol has been attached, mark the interface up.
5993 * This used to be done by configd.KernelEventMonitor, but that
5994 * is inherently prone to races (rdar://problem/30810208).
5996 (void) ifnet_set_flags(ifp
, IFF_UP
, IFF_UP
);
5997 (void) ifnet_ioctl(ifp
, 0, SIOCSIFFLAGS
, NULL
);
5998 dlil_post_sifflags_msg(ifp
);
5999 } else if (ifproto
!= NULL
) {
6000 zfree(dlif_proto_zone
, ifproto
);
6006 ifnet_attach_protocol_v2(ifnet_t ifp
, protocol_family_t protocol
,
6007 const struct ifnet_attach_proto_param_v2
*proto_details
)
6010 struct if_proto
*ifproto
= NULL
;
6011 uint32_t proto_count
= 0;
6013 ifnet_head_lock_shared();
6014 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
) {
6018 /* Check that the interface is in the global list */
6019 if (!ifnet_lookup(ifp
)) {
6024 ifproto
= zalloc(dlif_proto_zone
);
6025 if (ifproto
== NULL
) {
6029 bzero(ifproto
, sizeof(*ifproto
));
6031 /* refcnt held above during lookup */
6033 ifproto
->protocol_family
= protocol
;
6034 ifproto
->proto_kpi
= kProtoKPI_v2
;
6035 ifproto
->kpi
.v2
.input
= proto_details
->input
;
6036 ifproto
->kpi
.v2
.pre_output
= proto_details
->pre_output
;
6037 ifproto
->kpi
.v2
.event
= proto_details
->event
;
6038 ifproto
->kpi
.v2
.ioctl
= proto_details
->ioctl
;
6039 ifproto
->kpi
.v2
.detached
= proto_details
->detached
;
6040 ifproto
->kpi
.v2
.resolve_multi
= proto_details
->resolve
;
6041 ifproto
->kpi
.v2
.send_arp
= proto_details
->send_arp
;
6043 retval
= dlil_attach_protocol_internal(ifproto
,
6044 proto_details
->demux_list
, proto_details
->demux_count
,
6048 if (retval
!= 0 && retval
!= EEXIST
&& ifp
!= NULL
) {
6049 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
6050 if_name(ifp
), protocol
, retval
);
6053 printf("%s: attached v2 protocol %d (count = %d)\n",
6055 protocol
, proto_count
);
6061 * A protocol has been attached, mark the interface up.
6062 * This used to be done by configd.KernelEventMonitor, but that
6063 * is inherently prone to races (rdar://problem/30810208).
6065 (void) ifnet_set_flags(ifp
, IFF_UP
, IFF_UP
);
6066 (void) ifnet_ioctl(ifp
, 0, SIOCSIFFLAGS
, NULL
);
6067 dlil_post_sifflags_msg(ifp
);
6068 } else if (ifproto
!= NULL
) {
6069 zfree(dlif_proto_zone
, ifproto
);
6075 ifnet_detach_protocol(ifnet_t ifp
, protocol_family_t proto_family
)
6077 struct if_proto
*proto
= NULL
;
6080 if (ifp
== NULL
|| proto_family
== 0) {
6085 ifnet_lock_exclusive(ifp
);
6086 /* callee holds a proto refcnt upon success */
6087 proto
= find_attached_proto(ifp
, proto_family
);
6088 if (proto
== NULL
) {
6090 ifnet_lock_done(ifp
);
6094 /* call family module del_proto */
6095 if (ifp
->if_del_proto
) {
6096 ifp
->if_del_proto(ifp
, proto
->protocol_family
);
6099 SLIST_REMOVE(&ifp
->if_proto_hash
[proto_hash_value(proto_family
)],
6100 proto
, if_proto
, next_hash
);
6102 if (proto
->proto_kpi
== kProtoKPI_v1
) {
6103 proto
->kpi
.v1
.input
= ifproto_media_input_v1
;
6104 proto
->kpi
.v1
.pre_output
= ifproto_media_preout
;
6105 proto
->kpi
.v1
.event
= ifproto_media_event
;
6106 proto
->kpi
.v1
.ioctl
= ifproto_media_ioctl
;
6107 proto
->kpi
.v1
.resolve_multi
= ifproto_media_resolve_multi
;
6108 proto
->kpi
.v1
.send_arp
= ifproto_media_send_arp
;
6110 proto
->kpi
.v2
.input
= ifproto_media_input_v2
;
6111 proto
->kpi
.v2
.pre_output
= ifproto_media_preout
;
6112 proto
->kpi
.v2
.event
= ifproto_media_event
;
6113 proto
->kpi
.v2
.ioctl
= ifproto_media_ioctl
;
6114 proto
->kpi
.v2
.resolve_multi
= ifproto_media_resolve_multi
;
6115 proto
->kpi
.v2
.send_arp
= ifproto_media_send_arp
;
6117 proto
->detached
= 1;
6118 ifnet_lock_done(ifp
);
6121 printf("%s: detached %s protocol %d\n", if_name(ifp
),
6122 (proto
->proto_kpi
== kProtoKPI_v1
) ?
6123 "v1" : "v2", proto_family
);
6126 /* release proto refcnt held during protocol attach */
6127 if_proto_free(proto
);
6130 * Release proto refcnt held during lookup; the rest of
6131 * protocol detach steps will happen when the last proto
6132 * reference is released.
6134 if_proto_free(proto
);
6142 ifproto_media_input_v1(struct ifnet
*ifp
, protocol_family_t protocol
,
6143 struct mbuf
*packet
, char *header
)
6145 #pragma unused(ifp, protocol, packet, header)
6150 ifproto_media_input_v2(struct ifnet
*ifp
, protocol_family_t protocol
,
6151 struct mbuf
*packet
)
6153 #pragma unused(ifp, protocol, packet)
6158 ifproto_media_preout(struct ifnet
*ifp
, protocol_family_t protocol
,
6159 mbuf_t
*packet
, const struct sockaddr
*dest
, void *route
, char *frame_type
,
6160 char *link_layer_dest
)
6162 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
6167 ifproto_media_event(struct ifnet
*ifp
, protocol_family_t protocol
,
6168 const struct kev_msg
*event
)
6170 #pragma unused(ifp, protocol, event)
6174 ifproto_media_ioctl(struct ifnet
*ifp
, protocol_family_t protocol
,
6175 unsigned long command
, void *argument
)
6177 #pragma unused(ifp, protocol, command, argument)
6182 ifproto_media_resolve_multi(ifnet_t ifp
, const struct sockaddr
*proto_addr
,
6183 struct sockaddr_dl
*out_ll
, size_t ll_len
)
6185 #pragma unused(ifp, proto_addr, out_ll, ll_len)
6190 ifproto_media_send_arp(struct ifnet
*ifp
, u_short arpop
,
6191 const struct sockaddr_dl
*sender_hw
, const struct sockaddr
*sender_proto
,
6192 const struct sockaddr_dl
*target_hw
, const struct sockaddr
*target_proto
)
6194 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
6198 extern int if_next_index(void);
6199 extern int tcp_ecn_outbound
;
6202 ifnet_attach(ifnet_t ifp
, const struct sockaddr_dl
*ll_addr
)
6204 struct ifnet
*tmp_if
;
6206 struct if_data_internal if_data_saved
;
6207 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
6208 struct dlil_threading_info
*dl_inp
;
6209 u_int32_t sflags
= 0;
6217 * Serialize ifnet attach using dlil_ifnet_lock, in order to
6218 * prevent the interface from being configured while it is
6219 * embryonic, as ifnet_head_lock is dropped and reacquired
6220 * below prior to marking the ifnet with IFRF_ATTACHED.
6223 ifnet_head_lock_exclusive();
6224 /* Verify we aren't already on the list */
6225 TAILQ_FOREACH(tmp_if
, &ifnet_head
, if_link
) {
6226 if (tmp_if
== ifp
) {
6233 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6234 if (!(ifp
->if_refflags
& IFRF_EMBRYONIC
)) {
6235 panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
6239 lck_mtx_unlock(&ifp
->if_ref_lock
);
6241 ifnet_lock_exclusive(ifp
);
6244 VERIFY(ifp
->if_detaching_link
.tqe_next
== NULL
);
6245 VERIFY(ifp
->if_detaching_link
.tqe_prev
== NULL
);
6247 if (ll_addr
!= NULL
) {
6248 if (ifp
->if_addrlen
== 0) {
6249 ifp
->if_addrlen
= ll_addr
->sdl_alen
;
6250 } else if (ll_addr
->sdl_alen
!= ifp
->if_addrlen
) {
6251 ifnet_lock_done(ifp
);
6259 * Allow interfaces without protocol families to attach
6260 * only if they have the necessary fields filled out.
6262 if (ifp
->if_add_proto
== NULL
|| ifp
->if_del_proto
== NULL
) {
6263 DLIL_PRINTF("%s: Attempt to attach interface without "
6264 "family module - %d\n", __func__
, ifp
->if_family
);
6265 ifnet_lock_done(ifp
);
6271 /* Allocate protocol hash table */
6272 VERIFY(ifp
->if_proto_hash
== NULL
);
6273 ifp
->if_proto_hash
= zalloc(dlif_phash_zone
);
6274 if (ifp
->if_proto_hash
== NULL
) {
6275 ifnet_lock_done(ifp
);
6280 bzero(ifp
->if_proto_hash
, dlif_phash_size
);
6282 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
6283 VERIFY(TAILQ_EMPTY(&ifp
->if_flt_head
));
6284 TAILQ_INIT(&ifp
->if_flt_head
);
6285 VERIFY(ifp
->if_flt_busy
== 0);
6286 VERIFY(ifp
->if_flt_waiters
== 0);
6287 lck_mtx_unlock(&ifp
->if_flt_lock
);
6289 if (!(dl_if
->dl_if_flags
& DLIF_REUSE
)) {
6290 VERIFY(LIST_EMPTY(&ifp
->if_multiaddrs
));
6291 LIST_INIT(&ifp
->if_multiaddrs
);
6294 VERIFY(ifp
->if_allhostsinm
== NULL
);
6295 VERIFY(TAILQ_EMPTY(&ifp
->if_addrhead
));
6296 TAILQ_INIT(&ifp
->if_addrhead
);
6298 if (ifp
->if_index
== 0) {
6299 int idx
= if_next_index();
6303 ifnet_lock_done(ifp
);
6308 ifp
->if_index
= idx
;
6310 /* There should not be anything occupying this slot */
6311 VERIFY(ifindex2ifnet
[ifp
->if_index
] == NULL
);
6313 /* allocate (if needed) and initialize a link address */
6314 ifa
= dlil_alloc_lladdr(ifp
, ll_addr
);
6316 ifnet_lock_done(ifp
);
6322 VERIFY(ifnet_addrs
[ifp
->if_index
- 1] == NULL
);
6323 ifnet_addrs
[ifp
->if_index
- 1] = ifa
;
6325 /* make this address the first on the list */
6327 /* hold a reference for ifnet_addrs[] */
6328 IFA_ADDREF_LOCKED(ifa
);
6329 /* if_attach_link_ifa() holds a reference for ifa_link */
6330 if_attach_link_ifa(ifp
, ifa
);
6334 mac_ifnet_label_associate(ifp
);
6337 TAILQ_INSERT_TAIL(&ifnet_head
, ifp
, if_link
);
6338 ifindex2ifnet
[ifp
->if_index
] = ifp
;
6340 /* Hold a reference to the underlying dlil_ifnet */
6341 ifnet_reference(ifp
);
6343 /* Clear stats (save and restore other fields that we care) */
6344 if_data_saved
= ifp
->if_data
;
6345 bzero(&ifp
->if_data
, sizeof(ifp
->if_data
));
6346 ifp
->if_data
.ifi_type
= if_data_saved
.ifi_type
;
6347 ifp
->if_data
.ifi_typelen
= if_data_saved
.ifi_typelen
;
6348 ifp
->if_data
.ifi_physical
= if_data_saved
.ifi_physical
;
6349 ifp
->if_data
.ifi_addrlen
= if_data_saved
.ifi_addrlen
;
6350 ifp
->if_data
.ifi_hdrlen
= if_data_saved
.ifi_hdrlen
;
6351 ifp
->if_data
.ifi_mtu
= if_data_saved
.ifi_mtu
;
6352 ifp
->if_data
.ifi_baudrate
= if_data_saved
.ifi_baudrate
;
6353 ifp
->if_data
.ifi_hwassist
= if_data_saved
.ifi_hwassist
;
6354 ifp
->if_data
.ifi_tso_v4_mtu
= if_data_saved
.ifi_tso_v4_mtu
;
6355 ifp
->if_data
.ifi_tso_v6_mtu
= if_data_saved
.ifi_tso_v6_mtu
;
6356 ifnet_touch_lastchange(ifp
);
6358 VERIFY(ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_NORMAL
||
6359 ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_DRIVER_MANAGED
||
6360 ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_FQ_CODEL
);
6362 /* By default, use SFB and enable flow advisory */
6363 sflags
= PKTSCHEDF_QALG_SFB
;
6365 sflags
|= PKTSCHEDF_QALG_FLOWCTL
;
6368 if (if_delaybased_queue
) {
6369 sflags
|= PKTSCHEDF_QALG_DELAYBASED
;
6372 if (ifp
->if_output_sched_model
==
6373 IFNET_SCHED_MODEL_DRIVER_MANAGED
) {
6374 sflags
|= PKTSCHEDF_QALG_DRIVER_MANAGED
;
6377 /* Initialize transmit queue(s) */
6378 err
= ifclassq_setup(ifp
, sflags
, (dl_if
->dl_if_flags
& DLIF_REUSE
));
6380 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
6381 "err=%d", __func__
, ifp
, err
);
6385 /* Sanity checks on the input thread storage */
6386 dl_inp
= &dl_if
->dl_if_inpstorage
;
6387 bzero(&dl_inp
->stats
, sizeof(dl_inp
->stats
));
6388 VERIFY(dl_inp
->input_waiting
== 0);
6389 VERIFY(dl_inp
->wtot
== 0);
6390 VERIFY(dl_inp
->ifp
== NULL
);
6391 VERIFY(qhead(&dl_inp
->rcvq_pkts
) == NULL
&& qempty(&dl_inp
->rcvq_pkts
));
6392 VERIFY(qlimit(&dl_inp
->rcvq_pkts
) == 0);
6393 VERIFY(!dl_inp
->net_affinity
);
6394 VERIFY(ifp
->if_inp
== NULL
);
6395 VERIFY(dl_inp
->input_thr
== THREAD_NULL
);
6396 VERIFY(dl_inp
->wloop_thr
== THREAD_NULL
);
6397 VERIFY(dl_inp
->poll_thr
== THREAD_NULL
);
6398 VERIFY(dl_inp
->tag
== 0);
6399 VERIFY(dl_inp
->mode
== IFNET_MODEL_INPUT_POLL_OFF
);
6400 bzero(&dl_inp
->tstats
, sizeof(dl_inp
->tstats
));
6401 bzero(&dl_inp
->pstats
, sizeof(dl_inp
->pstats
));
6402 bzero(&dl_inp
->sstats
, sizeof(dl_inp
->sstats
));
6403 #if IFNET_INPUT_SANITY_CHK
6404 VERIFY(dl_inp
->input_mbuf_cnt
== 0);
6405 #endif /* IFNET_INPUT_SANITY_CHK */
6408 * A specific DLIL input thread is created per Ethernet/cellular
6409 * interface or for an interface which supports opportunistic
6410 * input polling. Pseudo interfaces or other types of interfaces
6411 * use the main input thread instead.
6413 if ((net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) ||
6414 ifp
->if_type
== IFT_ETHER
|| ifp
->if_type
== IFT_CELLULAR
) {
6415 ifp
->if_inp
= dl_inp
;
6416 err
= dlil_create_input_thread(ifp
, ifp
->if_inp
);
6418 panic_plain("%s: ifp=%p couldn't get an input thread; "
6419 "err=%d", __func__
, ifp
, err
);
6424 if (ifp
->if_inp
!= NULL
&& ifp
->if_inp
->input_mit_tcall
== NULL
) {
6425 ifp
->if_inp
->input_mit_tcall
=
6426 thread_call_allocate_with_priority(dlil_mit_tcall_fn
,
6427 ifp
, THREAD_CALL_PRIORITY_KERNEL
);
6431 * If the driver supports the new transmit model, calculate flow hash
6432 * and create a workloop starter thread to invoke the if_start callback
6433 * where the packets may be dequeued and transmitted.
6435 if (ifp
->if_eflags
& IFEF_TXSTART
) {
6436 ifp
->if_flowhash
= ifnet_calc_flowhash(ifp
);
6437 VERIFY(ifp
->if_flowhash
!= 0);
6438 VERIFY(ifp
->if_start_thread
== THREAD_NULL
);
6440 ifnet_set_start_cycle(ifp
, NULL
);
6441 ifp
->if_start_active
= 0;
6442 ifp
->if_start_req
= 0;
6443 ifp
->if_start_flags
= 0;
6444 VERIFY(ifp
->if_start
!= NULL
);
6445 if ((err
= kernel_thread_start(ifnet_start_thread_fn
,
6446 ifp
, &ifp
->if_start_thread
)) != KERN_SUCCESS
) {
6448 "ifp=%p couldn't get a start thread; "
6449 "err=%d", __func__
, ifp
, err
);
6452 ml_thread_policy(ifp
->if_start_thread
, MACHINE_GROUP
,
6453 (MACHINE_NETWORK_GROUP
| MACHINE_NETWORK_WORKLOOP
));
6455 ifp
->if_flowhash
= 0;
6459 * If the driver supports the new receive model, create a poller
6460 * thread to invoke if_input_poll callback where the packets may
6461 * be dequeued from the driver and processed for reception.
6463 if (ifp
->if_eflags
& IFEF_RXPOLL
) {
6464 VERIFY(ifp
->if_input_poll
!= NULL
);
6465 VERIFY(ifp
->if_input_ctl
!= NULL
);
6466 VERIFY(ifp
->if_poll_thread
== THREAD_NULL
);
6468 ifnet_set_poll_cycle(ifp
, NULL
);
6469 ifp
->if_poll_update
= 0;
6470 ifp
->if_poll_active
= 0;
6471 ifp
->if_poll_req
= 0;
6472 if ((err
= kernel_thread_start(ifnet_poll_thread_fn
, ifp
,
6473 &ifp
->if_poll_thread
)) != KERN_SUCCESS
) {
6474 panic_plain("%s: ifp=%p couldn't get a poll thread; "
6475 "err=%d", __func__
, ifp
, err
);
6478 ml_thread_policy(ifp
->if_poll_thread
, MACHINE_GROUP
,
6479 (MACHINE_NETWORK_GROUP
| MACHINE_NETWORK_WORKLOOP
));
6482 VERIFY(ifp
->if_desc
.ifd_maxlen
== IF_DESCSIZE
);
6483 VERIFY(ifp
->if_desc
.ifd_len
== 0);
6484 VERIFY(ifp
->if_desc
.ifd_desc
!= NULL
);
6486 /* Record attach PC stacktrace */
6487 ctrace_record(&((struct dlil_ifnet
*)ifp
)->dl_if_attach
);
6489 ifp
->if_updatemcasts
= 0;
6490 if (!LIST_EMPTY(&ifp
->if_multiaddrs
)) {
6491 struct ifmultiaddr
*ifma
;
6492 LIST_FOREACH(ifma
, &ifp
->if_multiaddrs
, ifma_link
) {
6494 if (ifma
->ifma_addr
->sa_family
== AF_LINK
||
6495 ifma
->ifma_addr
->sa_family
== AF_UNSPEC
) {
6496 ifp
->if_updatemcasts
++;
6501 printf("%s: attached with %d suspended link-layer multicast "
6502 "membership(s)\n", if_name(ifp
),
6503 ifp
->if_updatemcasts
);
6506 /* Clear logging parameters */
6507 bzero(&ifp
->if_log
, sizeof(ifp
->if_log
));
6509 /* Clear foreground/realtime activity timestamps */
6510 ifp
->if_fg_sendts
= 0;
6511 ifp
->if_rt_sendts
= 0;
6513 VERIFY(ifp
->if_delegated
.ifp
== NULL
);
6514 VERIFY(ifp
->if_delegated
.type
== 0);
6515 VERIFY(ifp
->if_delegated
.family
== 0);
6516 VERIFY(ifp
->if_delegated
.subfamily
== 0);
6517 VERIFY(ifp
->if_delegated
.expensive
== 0);
6519 VERIFY(ifp
->if_agentids
== NULL
);
6520 VERIFY(ifp
->if_agentcount
== 0);
6522 /* Reset interface state */
6523 bzero(&ifp
->if_interface_state
, sizeof(ifp
->if_interface_state
));
6524 ifp
->if_interface_state
.valid_bitmask
|=
6525 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
6526 ifp
->if_interface_state
.interface_availability
=
6527 IF_INTERFACE_STATE_INTERFACE_AVAILABLE
;
6529 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
6530 if (ifp
== lo_ifp
) {
6531 ifp
->if_interface_state
.lqm_state
= IFNET_LQM_THRESH_GOOD
;
6532 ifp
->if_interface_state
.valid_bitmask
|=
6533 IF_INTERFACE_STATE_LQM_STATE_VALID
;
6535 ifp
->if_interface_state
.lqm_state
= IFNET_LQM_THRESH_UNKNOWN
;
6539 * Enable ECN capability on this interface depending on the
6540 * value of ECN global setting
6542 if (tcp_ecn_outbound
== 2 && !IFNET_IS_CELLULAR(ifp
)) {
6543 ifp
->if_eflags
|= IFEF_ECN_ENABLE
;
6544 ifp
->if_eflags
&= ~IFEF_ECN_DISABLE
;
6548 * Built-in Cyclops always on policy for WiFi infra
6550 if (IFNET_IS_WIFI_INFRA(ifp
) && net_qos_policy_wifi_enabled
!= 0) {
6553 error
= if_set_qosmarking_mode(ifp
,
6554 IFRTYPE_QOSMARKING_FASTLANE
);
6556 printf("%s if_set_qosmarking_mode(%s) error %d\n",
6557 __func__
, ifp
->if_xname
, error
);
6559 ifp
->if_eflags
|= IFEF_QOSMARKING_ENABLED
;
6560 #if (DEVELOPMENT || DEBUG)
6561 printf("%s fastlane enabled on %s\n",
6562 __func__
, ifp
->if_xname
);
6563 #endif /* (DEVELOPMENT || DEBUG) */
6567 ifnet_lock_done(ifp
);
6571 lck_mtx_lock(&ifp
->if_cached_route_lock
);
6572 /* Enable forwarding cached route */
6573 ifp
->if_fwd_cacheok
= 1;
6574 /* Clean up any existing cached routes */
6575 ROUTE_RELEASE(&ifp
->if_fwd_route
);
6576 bzero(&ifp
->if_fwd_route
, sizeof(ifp
->if_fwd_route
));
6577 ROUTE_RELEASE(&ifp
->if_src_route
);
6578 bzero(&ifp
->if_src_route
, sizeof(ifp
->if_src_route
));
6579 ROUTE_RELEASE(&ifp
->if_src_route6
);
6580 bzero(&ifp
->if_src_route6
, sizeof(ifp
->if_src_route6
));
6581 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6583 ifnet_llreach_ifattach(ifp
, (dl_if
->dl_if_flags
& DLIF_REUSE
));
6586 * Allocate and attach IGMPv3/MLDv2 interface specific variables
6587 * and trees; do this before the ifnet is marked as attached.
6588 * The ifnet keeps the reference to the info structures even after
6589 * the ifnet is detached, since the network-layer records still
6590 * refer to the info structures even after that. This also
6591 * makes it possible for them to still function after the ifnet
6592 * is recycled or reattached.
6595 if (IGMP_IFINFO(ifp
) == NULL
) {
6596 IGMP_IFINFO(ifp
) = igmp_domifattach(ifp
, M_WAITOK
);
6597 VERIFY(IGMP_IFINFO(ifp
) != NULL
);
6599 VERIFY(IGMP_IFINFO(ifp
)->igi_ifp
== ifp
);
6600 igmp_domifreattach(IGMP_IFINFO(ifp
));
6604 if (MLD_IFINFO(ifp
) == NULL
) {
6605 MLD_IFINFO(ifp
) = mld_domifattach(ifp
, M_WAITOK
);
6606 VERIFY(MLD_IFINFO(ifp
) != NULL
);
6608 VERIFY(MLD_IFINFO(ifp
)->mli_ifp
== ifp
);
6609 mld_domifreattach(MLD_IFINFO(ifp
));
6613 VERIFY(ifp
->if_data_threshold
== 0);
6614 VERIFY(ifp
->if_dt_tcall
!= NULL
);
6617 * Finally, mark this ifnet as attached.
6619 lck_mtx_lock(rnh_lock
);
6620 ifnet_lock_exclusive(ifp
);
6621 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6622 ifp
->if_refflags
= IFRF_ATTACHED
; /* clears embryonic */
6623 lck_mtx_unlock(&ifp
->if_ref_lock
);
6625 /* boot-args override; enable idle notification */
6626 (void) ifnet_set_idle_flags_locked(ifp
, IFRF_IDLE_NOTIFY
,
6629 /* apply previous request(s) to set the idle flags, if any */
6630 (void) ifnet_set_idle_flags_locked(ifp
, ifp
->if_idle_new_flags
,
6631 ifp
->if_idle_new_flags_mask
);
6633 ifnet_lock_done(ifp
);
6634 lck_mtx_unlock(rnh_lock
);
6639 * Attach packet filter to this interface, if enabled.
6641 pf_ifnet_hook(ifp
, 1);
6644 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_ATTACHED
, NULL
, 0);
6647 printf("%s: attached%s\n", if_name(ifp
),
6648 (dl_if
->dl_if_flags
& DLIF_REUSE
) ? " (recycled)" : "");
6655 * Prepare the storage for the first/permanent link address, which must
6656 * must have the same lifetime as the ifnet itself. Although the link
6657 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
6658 * its location in memory must never change as it may still be referred
6659 * to by some parts of the system afterwards (unfortunate implementation
6660 * artifacts inherited from BSD.)
6662 * Caller must hold ifnet lock as writer.
6664 static struct ifaddr
*
6665 dlil_alloc_lladdr(struct ifnet
*ifp
, const struct sockaddr_dl
*ll_addr
)
6667 struct ifaddr
*ifa
, *oifa
;
6668 struct sockaddr_dl
*asdl
, *msdl
;
6669 char workbuf
[IFNAMSIZ
* 2];
6670 int namelen
, masklen
, socksize
;
6671 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
6673 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_EXCLUSIVE
);
6674 VERIFY(ll_addr
== NULL
|| ll_addr
->sdl_alen
== ifp
->if_addrlen
);
6676 namelen
= snprintf(workbuf
, sizeof(workbuf
), "%s",
6678 masklen
= offsetof(struct sockaddr_dl
, sdl_data
[0])
6679 + ((namelen
> 0) ? namelen
: 0);
6680 socksize
= masklen
+ ifp
->if_addrlen
;
6681 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
6682 if ((u_int32_t
)socksize
< sizeof(struct sockaddr_dl
)) {
6683 socksize
= sizeof(struct sockaddr_dl
);
6685 socksize
= ROUNDUP(socksize
);
6688 ifa
= ifp
->if_lladdr
;
6689 if (socksize
> DLIL_SDLMAXLEN
||
6690 (ifa
!= NULL
&& ifa
!= &dl_if
->dl_if_lladdr
.ifa
)) {
6692 * Rare, but in the event that the link address requires
6693 * more storage space than DLIL_SDLMAXLEN, allocate the
6694 * largest possible storages for address and mask, such
6695 * that we can reuse the same space when if_addrlen grows.
6696 * This same space will be used when if_addrlen shrinks.
6698 if (ifa
== NULL
|| ifa
== &dl_if
->dl_if_lladdr
.ifa
) {
6699 int ifasize
= sizeof(*ifa
) + 2 * SOCK_MAXADDRLEN
;
6700 ifa
= _MALLOC(ifasize
, M_IFADDR
, M_WAITOK
| M_ZERO
);
6705 /* Don't set IFD_ALLOC, as this is permanent */
6706 ifa
->ifa_debug
= IFD_LINK
;
6709 /* address and mask sockaddr_dl locations */
6710 asdl
= (struct sockaddr_dl
*)(ifa
+ 1);
6711 bzero(asdl
, SOCK_MAXADDRLEN
);
6712 msdl
= (struct sockaddr_dl
*)(void *)
6713 ((char *)asdl
+ SOCK_MAXADDRLEN
);
6714 bzero(msdl
, SOCK_MAXADDRLEN
);
6716 VERIFY(ifa
== NULL
|| ifa
== &dl_if
->dl_if_lladdr
.ifa
);
6718 * Use the storage areas for address and mask within the
6719 * dlil_ifnet structure. This is the most common case.
6722 ifa
= &dl_if
->dl_if_lladdr
.ifa
;
6724 /* Don't set IFD_ALLOC, as this is permanent */
6725 ifa
->ifa_debug
= IFD_LINK
;
6728 /* address and mask sockaddr_dl locations */
6729 asdl
= (struct sockaddr_dl
*)(void *)&dl_if
->dl_if_lladdr
.asdl
;
6730 bzero(asdl
, sizeof(dl_if
->dl_if_lladdr
.asdl
));
6731 msdl
= (struct sockaddr_dl
*)(void *)&dl_if
->dl_if_lladdr
.msdl
;
6732 bzero(msdl
, sizeof(dl_if
->dl_if_lladdr
.msdl
));
6735 /* hold a permanent reference for the ifnet itself */
6736 IFA_ADDREF_LOCKED(ifa
);
6737 oifa
= ifp
->if_lladdr
;
6738 ifp
->if_lladdr
= ifa
;
6740 VERIFY(ifa
->ifa_debug
== IFD_LINK
);
6742 ifa
->ifa_rtrequest
= link_rtrequest
;
6743 ifa
->ifa_addr
= (struct sockaddr
*)asdl
;
6744 asdl
->sdl_len
= socksize
;
6745 asdl
->sdl_family
= AF_LINK
;
6747 bcopy(workbuf
, asdl
->sdl_data
, min(namelen
,
6748 sizeof(asdl
->sdl_data
)));
6749 asdl
->sdl_nlen
= namelen
;
6753 asdl
->sdl_index
= ifp
->if_index
;
6754 asdl
->sdl_type
= ifp
->if_type
;
6755 if (ll_addr
!= NULL
) {
6756 asdl
->sdl_alen
= ll_addr
->sdl_alen
;
6757 bcopy(CONST_LLADDR(ll_addr
), LLADDR(asdl
), asdl
->sdl_alen
);
6761 ifa
->ifa_netmask
= (struct sockaddr
*)msdl
;
6762 msdl
->sdl_len
= masklen
;
6763 while (namelen
> 0) {
6764 msdl
->sdl_data
[--namelen
] = 0xff;
6776 if_purgeaddrs(struct ifnet
*ifp
)
6782 in6_purgeaddrs(ifp
);
6787 ifnet_detach(ifnet_t ifp
)
6789 struct ifnet
*delegated_ifp
;
6790 struct nd_ifinfo
*ndi
= NULL
;
6796 ndi
= ND_IFINFO(ifp
);
6798 ndi
->cga_initialized
= FALSE
;
6801 lck_mtx_lock(rnh_lock
);
6802 ifnet_head_lock_exclusive();
6803 ifnet_lock_exclusive(ifp
);
6806 * Check to see if this interface has previously triggered
6807 * aggressive protocol draining; if so, decrement the global
6808 * refcnt and clear PR_AGGDRAIN on the route domain if
6809 * there are no more of such an interface around.
6811 (void) ifnet_set_idle_flags_locked(ifp
, 0, ~0);
6813 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6814 if (!(ifp
->if_refflags
& IFRF_ATTACHED
)) {
6815 lck_mtx_unlock(&ifp
->if_ref_lock
);
6816 ifnet_lock_done(ifp
);
6818 lck_mtx_unlock(rnh_lock
);
6820 } else if (ifp
->if_refflags
& IFRF_DETACHING
) {
6821 /* Interface has already been detached */
6822 lck_mtx_unlock(&ifp
->if_ref_lock
);
6823 ifnet_lock_done(ifp
);
6825 lck_mtx_unlock(rnh_lock
);
6828 VERIFY(!(ifp
->if_refflags
& IFRF_EMBRYONIC
));
6829 /* Indicate this interface is being detached */
6830 ifp
->if_refflags
&= ~IFRF_ATTACHED
;
6831 ifp
->if_refflags
|= IFRF_DETACHING
;
6832 lck_mtx_unlock(&ifp
->if_ref_lock
);
6835 printf("%s: detaching\n", if_name(ifp
));
6838 /* clean up flow control entry object if there's any */
6839 if (ifp
->if_eflags
& IFEF_TXSTART
) {
6840 ifnet_flowadv(ifp
->if_flowhash
);
6843 /* Reset ECN enable/disable flags */
6844 ifp
->if_eflags
&= ~IFEF_ECN_DISABLE
;
6845 ifp
->if_eflags
&= ~IFEF_ECN_ENABLE
;
6847 /* Reset CLAT46 flag */
6848 ifp
->if_eflags
&= ~IFEF_CLAT46
;
6851 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
6852 * no longer be visible during lookups from this point.
6854 VERIFY(ifindex2ifnet
[ifp
->if_index
] == ifp
);
6855 TAILQ_REMOVE(&ifnet_head
, ifp
, if_link
);
6856 ifp
->if_link
.tqe_next
= NULL
;
6857 ifp
->if_link
.tqe_prev
= NULL
;
6858 if (ifp
->if_ordered_link
.tqe_next
!= NULL
||
6859 ifp
->if_ordered_link
.tqe_prev
!= NULL
) {
6860 ifnet_remove_from_ordered_list(ifp
);
6862 ifindex2ifnet
[ifp
->if_index
] = NULL
;
6864 /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */
6865 ifp
->if_eflags
&= ~(IFEF_IPV4_ROUTER
| IFEF_IPV6_ROUTER
);
6867 /* Record detach PC stacktrace */
6868 ctrace_record(&((struct dlil_ifnet
*)ifp
)->dl_if_detach
);
6870 /* Clear logging parameters */
6871 bzero(&ifp
->if_log
, sizeof(ifp
->if_log
));
6873 /* Clear delegated interface info (reference released below) */
6874 delegated_ifp
= ifp
->if_delegated
.ifp
;
6875 bzero(&ifp
->if_delegated
, sizeof(ifp
->if_delegated
));
6877 /* Reset interface state */
6878 bzero(&ifp
->if_interface_state
, sizeof(ifp
->if_interface_state
));
6880 ifnet_lock_done(ifp
);
6882 lck_mtx_unlock(rnh_lock
);
6885 /* Release reference held on the delegated interface */
6886 if (delegated_ifp
!= NULL
) {
6887 ifnet_release(delegated_ifp
);
6890 /* Reset Link Quality Metric (unless loopback [lo0]) */
6891 if (ifp
!= lo_ifp
) {
6892 if_lqm_update(ifp
, IFNET_LQM_THRESH_OFF
, 0);
6895 /* Reset TCP local statistics */
6896 if (ifp
->if_tcp_stat
!= NULL
) {
6897 bzero(ifp
->if_tcp_stat
, sizeof(*ifp
->if_tcp_stat
));
6900 /* Reset UDP local statistics */
6901 if (ifp
->if_udp_stat
!= NULL
) {
6902 bzero(ifp
->if_udp_stat
, sizeof(*ifp
->if_udp_stat
));
6905 /* Reset ifnet IPv4 stats */
6906 if (ifp
->if_ipv4_stat
!= NULL
) {
6907 bzero(ifp
->if_ipv4_stat
, sizeof(*ifp
->if_ipv4_stat
));
6910 /* Reset ifnet IPv6 stats */
6911 if (ifp
->if_ipv6_stat
!= NULL
) {
6912 bzero(ifp
->if_ipv6_stat
, sizeof(*ifp
->if_ipv6_stat
));
6915 /* Release memory held for interface link status report */
6916 if (ifp
->if_link_status
!= NULL
) {
6917 FREE(ifp
->if_link_status
, M_TEMP
);
6918 ifp
->if_link_status
= NULL
;
6921 /* Clear agent IDs */
6922 if (ifp
->if_agentids
!= NULL
) {
6923 FREE(ifp
->if_agentids
, M_NETAGENT
);
6924 ifp
->if_agentids
= NULL
;
6926 ifp
->if_agentcount
= 0;
6929 /* Let BPF know we're detaching */
6932 /* Mark the interface as DOWN */
6935 /* Disable forwarding cached route */
6936 lck_mtx_lock(&ifp
->if_cached_route_lock
);
6937 ifp
->if_fwd_cacheok
= 0;
6938 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6940 /* Disable data threshold and wait for any pending event posting */
6941 ifp
->if_data_threshold
= 0;
6942 VERIFY(ifp
->if_dt_tcall
!= NULL
);
6943 (void) thread_call_cancel_wait(ifp
->if_dt_tcall
);
6946 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
6947 * references to the info structures and leave them attached to
6951 igmp_domifdetach(ifp
);
6954 mld_domifdetach(ifp
);
6957 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHING
, NULL
, 0);
6959 /* Let worker thread take care of the rest, to avoid reentrancy */
6961 ifnet_detaching_enqueue(ifp
);
6968 ifnet_detaching_enqueue(struct ifnet
*ifp
)
6970 dlil_if_lock_assert();
6972 ++ifnet_detaching_cnt
;
6973 VERIFY(ifnet_detaching_cnt
!= 0);
6974 TAILQ_INSERT_TAIL(&ifnet_detaching_head
, ifp
, if_detaching_link
);
6975 wakeup((caddr_t
)&ifnet_delayed_run
);
6978 static struct ifnet
*
6979 ifnet_detaching_dequeue(void)
6983 dlil_if_lock_assert();
6985 ifp
= TAILQ_FIRST(&ifnet_detaching_head
);
6986 VERIFY(ifnet_detaching_cnt
!= 0 || ifp
== NULL
);
6988 VERIFY(ifnet_detaching_cnt
!= 0);
6989 --ifnet_detaching_cnt
;
6990 TAILQ_REMOVE(&ifnet_detaching_head
, ifp
, if_detaching_link
);
6991 ifp
->if_detaching_link
.tqe_next
= NULL
;
6992 ifp
->if_detaching_link
.tqe_prev
= NULL
;
6998 ifnet_detacher_thread_cont(int err
)
7004 dlil_if_lock_assert();
7005 while (ifnet_detaching_cnt
== 0) {
7006 (void) msleep0(&ifnet_delayed_run
, &dlil_ifnet_lock
,
7007 (PZERO
- 1), "ifnet_detacher_cont", 0,
7008 ifnet_detacher_thread_cont
);
7012 VERIFY(TAILQ_FIRST(&ifnet_detaching_head
) != NULL
);
7014 /* Take care of detaching ifnet */
7015 ifp
= ifnet_detaching_dequeue();
7018 ifnet_detach_final(ifp
);
7025 ifnet_detacher_thread_func(void *v
, wait_result_t w
)
7027 #pragma unused(v, w)
7029 (void) msleep0(&ifnet_delayed_run
, &dlil_ifnet_lock
,
7030 (PZERO
- 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont
);
7032 * msleep0() shouldn't have returned as PCATCH was not set;
7033 * therefore assert in this case.
7040 ifnet_detach_final(struct ifnet
*ifp
)
7042 struct ifnet_filter
*filter
, *filter_next
;
7043 struct ifnet_filter_head fhead
;
7044 struct dlil_threading_info
*inp
;
7046 ifnet_detached_func if_free
;
7049 lck_mtx_lock(&ifp
->if_ref_lock
);
7050 if (!(ifp
->if_refflags
& IFRF_DETACHING
)) {
7051 panic("%s: flags mismatch (detaching not set) ifp=%p",
7057 * Wait until the existing IO references get released
7058 * before we proceed with ifnet_detach. This is not a
7059 * common case, so block without using a continuation.
7061 while (ifp
->if_refio
> 0) {
7062 printf("%s: Waiting for IO references on %s interface "
7063 "to be released\n", __func__
, if_name(ifp
));
7064 (void) msleep(&(ifp
->if_refio
), &ifp
->if_ref_lock
,
7065 (PZERO
- 1), "ifnet_ioref_wait", NULL
);
7067 lck_mtx_unlock(&ifp
->if_ref_lock
);
7069 /* Drain and destroy send queue */
7070 ifclassq_teardown(ifp
);
7072 /* Detach interface filters */
7073 lck_mtx_lock(&ifp
->if_flt_lock
);
7074 if_flt_monitor_enter(ifp
);
7076 LCK_MTX_ASSERT(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
7077 fhead
= ifp
->if_flt_head
;
7078 TAILQ_INIT(&ifp
->if_flt_head
);
7080 for (filter
= TAILQ_FIRST(&fhead
); filter
; filter
= filter_next
) {
7081 filter_next
= TAILQ_NEXT(filter
, filt_next
);
7082 lck_mtx_unlock(&ifp
->if_flt_lock
);
7084 dlil_detach_filter_internal(filter
, 1);
7085 lck_mtx_lock(&ifp
->if_flt_lock
);
7087 if_flt_monitor_leave(ifp
);
7088 lck_mtx_unlock(&ifp
->if_flt_lock
);
7090 /* Tell upper layers to drop their network addresses */
7093 ifnet_lock_exclusive(ifp
);
7095 /* Uplumb all protocols */
7096 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
7097 struct if_proto
*proto
;
7099 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
7100 while (proto
!= NULL
) {
7101 protocol_family_t family
= proto
->protocol_family
;
7102 ifnet_lock_done(ifp
);
7103 proto_unplumb(family
, ifp
);
7104 ifnet_lock_exclusive(ifp
);
7105 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
7107 /* There should not be any protocols left */
7108 VERIFY(SLIST_EMPTY(&ifp
->if_proto_hash
[i
]));
7110 zfree(dlif_phash_zone
, ifp
->if_proto_hash
);
7111 ifp
->if_proto_hash
= NULL
;
7113 /* Detach (permanent) link address from if_addrhead */
7114 ifa
= TAILQ_FIRST(&ifp
->if_addrhead
);
7115 VERIFY(ifnet_addrs
[ifp
->if_index
- 1] == ifa
);
7117 if_detach_link_ifa(ifp
, ifa
);
7120 /* Remove (permanent) link address from ifnet_addrs[] */
7122 ifnet_addrs
[ifp
->if_index
- 1] = NULL
;
7124 /* This interface should not be on {ifnet_head,detaching} */
7125 VERIFY(ifp
->if_link
.tqe_next
== NULL
);
7126 VERIFY(ifp
->if_link
.tqe_prev
== NULL
);
7127 VERIFY(ifp
->if_detaching_link
.tqe_next
== NULL
);
7128 VERIFY(ifp
->if_detaching_link
.tqe_prev
== NULL
);
7129 VERIFY(ifp
->if_ordered_link
.tqe_next
== NULL
);
7130 VERIFY(ifp
->if_ordered_link
.tqe_prev
== NULL
);
7132 /* The slot should have been emptied */
7133 VERIFY(ifindex2ifnet
[ifp
->if_index
] == NULL
);
7135 /* There should not be any addresses left */
7136 VERIFY(TAILQ_EMPTY(&ifp
->if_addrhead
));
7139 * Signal the starter thread to terminate itself.
7141 if (ifp
->if_start_thread
!= THREAD_NULL
) {
7142 lck_mtx_lock_spin(&ifp
->if_start_lock
);
7143 ifp
->if_start_flags
= 0;
7144 ifp
->if_start_thread
= THREAD_NULL
;
7145 wakeup_one((caddr_t
)&ifp
->if_start_thread
);
7146 lck_mtx_unlock(&ifp
->if_start_lock
);
7150 * Signal the poller thread to terminate itself.
7152 if (ifp
->if_poll_thread
!= THREAD_NULL
) {
7153 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
7154 ifp
->if_poll_thread
= THREAD_NULL
;
7155 wakeup_one((caddr_t
)&ifp
->if_poll_thread
);
7156 lck_mtx_unlock(&ifp
->if_poll_lock
);
7160 * If thread affinity was set for the workloop thread, we will need
7161 * to tear down the affinity and release the extra reference count
7162 * taken at attach time. Does not apply to lo0 or other interfaces
7163 * without dedicated input threads.
7165 if ((inp
= ifp
->if_inp
) != NULL
) {
7166 VERIFY(inp
!= dlil_main_input_thread
);
7168 if (inp
->net_affinity
) {
7169 struct thread
*tp
, *wtp
, *ptp
;
7171 lck_mtx_lock_spin(&inp
->input_lck
);
7172 wtp
= inp
->wloop_thr
;
7173 inp
->wloop_thr
= THREAD_NULL
;
7174 ptp
= inp
->poll_thr
;
7175 inp
->poll_thr
= THREAD_NULL
;
7176 tp
= inp
->input_thr
; /* don't nullify now */
7178 inp
->net_affinity
= FALSE
;
7179 lck_mtx_unlock(&inp
->input_lck
);
7181 /* Tear down poll thread affinity */
7183 VERIFY(ifp
->if_eflags
& IFEF_RXPOLL
);
7184 (void) dlil_affinity_set(ptp
,
7185 THREAD_AFFINITY_TAG_NULL
);
7186 thread_deallocate(ptp
);
7189 /* Tear down workloop thread affinity */
7191 (void) dlil_affinity_set(wtp
,
7192 THREAD_AFFINITY_TAG_NULL
);
7193 thread_deallocate(wtp
);
7196 /* Tear down DLIL input thread affinity */
7197 (void) dlil_affinity_set(tp
, THREAD_AFFINITY_TAG_NULL
);
7198 thread_deallocate(tp
);
7201 /* disassociate ifp DLIL input thread */
7204 /* tell the input thread to terminate */
7205 lck_mtx_lock_spin(&inp
->input_lck
);
7206 inp
->input_waiting
|= DLIL_INPUT_TERMINATE
;
7207 if (!(inp
->input_waiting
& DLIL_INPUT_RUNNING
)) {
7208 wakeup_one((caddr_t
)&inp
->input_waiting
);
7210 lck_mtx_unlock(&inp
->input_lck
);
7211 ifnet_lock_done(ifp
);
7213 /* wait for the input thread to terminate */
7214 lck_mtx_lock_spin(&inp
->input_lck
);
7215 while ((inp
->input_waiting
& DLIL_INPUT_TERMINATE_COMPLETE
)
7217 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
7218 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
7220 lck_mtx_unlock(&inp
->input_lck
);
7221 ifnet_lock_exclusive(ifp
);
7223 /* clean-up input thread state */
7224 dlil_clean_threading_info(inp
);
7227 /* The driver might unload, so point these to ourselves */
7228 if_free
= ifp
->if_free
;
7229 ifp
->if_output_dlil
= ifp_if_output
;
7230 ifp
->if_output
= ifp_if_output
;
7231 ifp
->if_pre_enqueue
= ifp_if_output
;
7232 ifp
->if_start
= ifp_if_start
;
7233 ifp
->if_output_ctl
= ifp_if_ctl
;
7234 ifp
->if_input_dlil
= ifp_if_input
;
7235 ifp
->if_input_poll
= ifp_if_input_poll
;
7236 ifp
->if_input_ctl
= ifp_if_ctl
;
7237 ifp
->if_ioctl
= ifp_if_ioctl
;
7238 ifp
->if_set_bpf_tap
= ifp_if_set_bpf_tap
;
7239 ifp
->if_free
= ifp_if_free
;
7240 ifp
->if_demux
= ifp_if_demux
;
7241 ifp
->if_event
= ifp_if_event
;
7242 ifp
->if_framer_legacy
= ifp_if_framer
;
7243 ifp
->if_framer
= ifp_if_framer_extended
;
7244 ifp
->if_add_proto
= ifp_if_add_proto
;
7245 ifp
->if_del_proto
= ifp_if_del_proto
;
7246 ifp
->if_check_multi
= ifp_if_check_multi
;
7248 /* wipe out interface description */
7249 VERIFY(ifp
->if_desc
.ifd_maxlen
== IF_DESCSIZE
);
7250 ifp
->if_desc
.ifd_len
= 0;
7251 VERIFY(ifp
->if_desc
.ifd_desc
!= NULL
);
7252 bzero(ifp
->if_desc
.ifd_desc
, IF_DESCSIZE
);
7254 /* there shouldn't be any delegation by now */
7255 VERIFY(ifp
->if_delegated
.ifp
== NULL
);
7256 VERIFY(ifp
->if_delegated
.type
== 0);
7257 VERIFY(ifp
->if_delegated
.family
== 0);
7258 VERIFY(ifp
->if_delegated
.subfamily
== 0);
7259 VERIFY(ifp
->if_delegated
.expensive
== 0);
7261 /* QoS marking get cleared */
7262 ifp
->if_eflags
&= ~IFEF_QOSMARKING_ENABLED
;
7263 if_set_qosmarking_mode(ifp
, IFRTYPE_QOSMARKING_MODE_NONE
);
7266 ifnet_lock_done(ifp
);
7270 * Detach this interface from packet filter, if enabled.
7272 pf_ifnet_hook(ifp
, 0);
7275 /* Filter list should be empty */
7276 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
7277 VERIFY(TAILQ_EMPTY(&ifp
->if_flt_head
));
7278 VERIFY(ifp
->if_flt_busy
== 0);
7279 VERIFY(ifp
->if_flt_waiters
== 0);
7280 lck_mtx_unlock(&ifp
->if_flt_lock
);
7282 /* Last chance to drain send queue */
7285 /* Last chance to cleanup any cached route */
7286 lck_mtx_lock(&ifp
->if_cached_route_lock
);
7287 VERIFY(!ifp
->if_fwd_cacheok
);
7288 ROUTE_RELEASE(&ifp
->if_fwd_route
);
7289 bzero(&ifp
->if_fwd_route
, sizeof(ifp
->if_fwd_route
));
7290 ROUTE_RELEASE(&ifp
->if_src_route
);
7291 bzero(&ifp
->if_src_route
, sizeof(ifp
->if_src_route
));
7292 ROUTE_RELEASE(&ifp
->if_src_route6
);
7293 bzero(&ifp
->if_src_route6
, sizeof(ifp
->if_src_route6
));
7294 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
7296 VERIFY(ifp
->if_data_threshold
== 0);
7297 VERIFY(ifp
->if_dt_tcall
!= NULL
);
7298 VERIFY(!thread_call_isactive(ifp
->if_dt_tcall
));
7300 ifnet_llreach_ifdetach(ifp
);
7302 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHED
, NULL
, 0);
7305 * Finally, mark this ifnet as detached.
7307 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
7308 if (!(ifp
->if_refflags
& IFRF_DETACHING
)) {
7309 panic("%s: flags mismatch (detaching not set) ifp=%p",
7313 ifp
->if_refflags
&= ~IFRF_DETACHING
;
7314 lck_mtx_unlock(&ifp
->if_ref_lock
);
7315 if (if_free
!= NULL
) {
7320 printf("%s: detached\n", if_name(ifp
));
7323 /* Release reference held during ifnet attach */
7328 ifp_if_output(struct ifnet
*ifp
, struct mbuf
*m
)
7336 ifp_if_start(struct ifnet
*ifp
)
7342 ifp_if_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
7343 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
7344 boolean_t poll
, struct thread
*tp
)
7346 #pragma unused(ifp, m_tail, s, poll, tp)
7347 m_freem_list(m_head
);
7352 ifp_if_input_poll(struct ifnet
*ifp
, u_int32_t flags
, u_int32_t max_cnt
,
7353 struct mbuf
**m_head
, struct mbuf
**m_tail
, u_int32_t
*cnt
, u_int32_t
*len
)
7355 #pragma unused(ifp, flags, max_cnt)
7356 if (m_head
!= NULL
) {
7359 if (m_tail
!= NULL
) {
7371 ifp_if_ctl(struct ifnet
*ifp
, ifnet_ctl_cmd_t cmd
, u_int32_t arglen
, void *arg
)
7373 #pragma unused(ifp, cmd, arglen, arg)
7378 ifp_if_demux(struct ifnet
*ifp
, struct mbuf
*m
, char *fh
, protocol_family_t
*pf
)
7380 #pragma unused(ifp, fh, pf)
7386 ifp_if_add_proto(struct ifnet
*ifp
, protocol_family_t pf
,
7387 const struct ifnet_demux_desc
*da
, u_int32_t dc
)
7389 #pragma unused(ifp, pf, da, dc)
7394 ifp_if_del_proto(struct ifnet
*ifp
, protocol_family_t pf
)
7396 #pragma unused(ifp, pf)
7401 ifp_if_check_multi(struct ifnet
*ifp
, const struct sockaddr
*sa
)
7403 #pragma unused(ifp, sa)
7409 ifp_if_framer(struct ifnet
*ifp
, struct mbuf
**m
,
7410 const struct sockaddr
*sa
, const char *ll
, const char *t
,
7411 u_int32_t
*pre
, u_int32_t
*post
)
7414 ifp_if_framer(struct ifnet
*ifp
, struct mbuf
**m
,
7415 const struct sockaddr
*sa
, const char *ll
, const char *t
)
7416 #endif /* !CONFIG_EMBEDDED */
7418 #pragma unused(ifp, m, sa, ll, t)
7420 return ifp_if_framer_extended(ifp
, m
, sa
, ll
, t
, pre
, post
);
7422 return ifp_if_framer_extended(ifp
, m
, sa
, ll
, t
, NULL
, NULL
);
7423 #endif /* !CONFIG_EMBEDDED */
7427 ifp_if_framer_extended(struct ifnet
*ifp
, struct mbuf
**m
,
7428 const struct sockaddr
*sa
, const char *ll
, const char *t
,
7429 u_int32_t
*pre
, u_int32_t
*post
)
7431 #pragma unused(ifp, sa, ll, t)
7446 ifp_if_ioctl(struct ifnet
*ifp
, unsigned long cmd
, void *arg
)
7448 #pragma unused(ifp, cmd, arg)
7453 ifp_if_set_bpf_tap(struct ifnet
*ifp
, bpf_tap_mode tm
, bpf_packet_func f
)
7455 #pragma unused(ifp, tm, f)
7456 /* XXX not sure what to do here */
7461 ifp_if_free(struct ifnet
*ifp
)
7467 ifp_if_event(struct ifnet
*ifp
, const struct kev_msg
*e
)
7469 #pragma unused(ifp, e)
7473 dlil_if_acquire(u_int32_t family
, const void *uniqueid
,
7474 size_t uniqueid_len
, const char *ifxname
, struct ifnet
**ifp
)
7476 struct ifnet
*ifp1
= NULL
;
7477 struct dlil_ifnet
*dlifp1
= NULL
;
7478 void *buf
, *base
, **pbuf
;
7481 VERIFY(*ifp
== NULL
);
7484 * We absolutely can't have an interface with the same name
7486 * To make sure of that list has to be traversed completely
7488 TAILQ_FOREACH(dlifp1
, &dlil_ifnet_head
, dl_if_link
) {
7489 ifp1
= (struct ifnet
*)dlifp1
;
7491 if (ifp1
->if_family
!= family
) {
7496 * If interface is in use, return EBUSY if either unique id
7497 * or interface extended names are the same
7499 lck_mtx_lock(&dlifp1
->dl_if_lock
);
7500 if (strncmp(ifxname
, ifp1
->if_xname
, IFXNAMSIZ
) == 0) {
7501 if (dlifp1
->dl_if_flags
& DLIF_INUSE
) {
7502 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
7509 if (uniqueid_len
== dlifp1
->dl_if_uniqueid_len
&&
7510 bcmp(uniqueid
, dlifp1
->dl_if_uniqueid
, uniqueid_len
) == 0) {
7511 if (dlifp1
->dl_if_flags
& DLIF_INUSE
) {
7512 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
7516 dlifp1
->dl_if_flags
|= (DLIF_INUSE
| DLIF_REUSE
);
7517 /* Cache the first interface that can be recycled */
7522 * XXX Do not break or jump to end as we have to traverse
7523 * the whole list to ensure there are no name collisions
7528 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
7531 /* If there's an interface that can be recycled, use that */
7536 /* no interface found, allocate a new one */
7537 buf
= zalloc(dlif_zone
);
7542 bzero(buf
, dlif_bufsize
);
7544 /* Get the 64-bit aligned base address for this object */
7545 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof(u_int64_t
),
7547 VERIFY(((intptr_t)base
+ dlif_size
) <= ((intptr_t)buf
+ dlif_bufsize
));
7550 * Wind back a pointer size from the aligned base and
7551 * save the original address so we can free it later.
7553 pbuf
= (void **)((intptr_t)base
- sizeof(void *));
7558 MALLOC(dlifp1
->dl_if_uniqueid
, void *, uniqueid_len
,
7560 if (dlifp1
->dl_if_uniqueid
== NULL
) {
7561 zfree(dlif_zone
, buf
);
7565 bcopy(uniqueid
, dlifp1
->dl_if_uniqueid
, uniqueid_len
);
7566 dlifp1
->dl_if_uniqueid_len
= uniqueid_len
;
7569 ifp1
= (struct ifnet
*)dlifp1
;
7570 dlifp1
->dl_if_flags
= DLIF_INUSE
;
7572 dlifp1
->dl_if_flags
|= DLIF_DEBUG
;
7573 dlifp1
->dl_if_trace
= dlil_if_trace
;
7575 ifp1
->if_name
= dlifp1
->dl_if_namestorage
;
7576 ifp1
->if_xname
= dlifp1
->dl_if_xnamestorage
;
7578 /* initialize interface description */
7579 ifp1
->if_desc
.ifd_maxlen
= IF_DESCSIZE
;
7580 ifp1
->if_desc
.ifd_len
= 0;
7581 ifp1
->if_desc
.ifd_desc
= dlifp1
->dl_if_descstorage
;
7585 mac_ifnet_label_init(ifp1
);
7588 if ((ret
= dlil_alloc_local_stats(ifp1
)) != 0) {
7589 DLIL_PRINTF("%s: failed to allocate if local stats, "
7590 "error: %d\n", __func__
, ret
);
7591 /* This probably shouldn't be fatal */
7595 lck_mtx_init(&dlifp1
->dl_if_lock
, ifnet_lock_group
, ifnet_lock_attr
);
7596 lck_rw_init(&ifp1
->if_lock
, ifnet_lock_group
, ifnet_lock_attr
);
7597 lck_mtx_init(&ifp1
->if_ref_lock
, ifnet_lock_group
, ifnet_lock_attr
);
7598 lck_mtx_init(&ifp1
->if_flt_lock
, ifnet_lock_group
, ifnet_lock_attr
);
7599 lck_mtx_init(&ifp1
->if_addrconfig_lock
, ifnet_lock_group
,
7601 lck_rw_init(&ifp1
->if_llreach_lock
, ifnet_lock_group
, ifnet_lock_attr
);
7603 lck_rw_init(&ifp1
->if_inetdata_lock
, ifnet_lock_group
,
7605 ifp1
->if_inetdata
= NULL
;
7608 lck_rw_init(&ifp1
->if_inet6data_lock
, ifnet_lock_group
,
7610 ifp1
->if_inet6data
= NULL
;
7612 lck_rw_init(&ifp1
->if_link_status_lock
, ifnet_lock_group
,
7614 ifp1
->if_link_status
= NULL
;
7616 /* for send data paths */
7617 lck_mtx_init(&ifp1
->if_start_lock
, ifnet_snd_lock_group
,
7619 lck_mtx_init(&ifp1
->if_cached_route_lock
, ifnet_snd_lock_group
,
7621 lck_mtx_init(&ifp1
->if_snd
.ifcq_lock
, ifnet_snd_lock_group
,
7624 /* for receive data paths */
7625 lck_mtx_init(&ifp1
->if_poll_lock
, ifnet_rcv_lock_group
,
7628 /* thread call allocation is done with sleeping zalloc */
7629 ifp1
->if_dt_tcall
= thread_call_allocate_with_options(dlil_dt_tcall_fn
,
7630 ifp1
, THREAD_CALL_PRIORITY_KERNEL
, THREAD_CALL_OPTIONS_ONCE
);
7631 if (ifp1
->if_dt_tcall
== NULL
) {
7632 panic_plain("%s: couldn't create if_dt_tcall", __func__
);
7636 TAILQ_INSERT_TAIL(&dlil_ifnet_head
, dlifp1
, dl_if_link
);
7643 VERIFY(dlifp1
== NULL
|| (IS_P2ALIGNED(dlifp1
, sizeof(u_int64_t
)) &&
7644 IS_P2ALIGNED(&ifp1
->if_data
, sizeof(u_int64_t
))));
7649 __private_extern__
void
7650 dlil_if_release(ifnet_t ifp
)
7652 struct dlil_ifnet
*dlifp
= (struct dlil_ifnet
*)ifp
;
7654 VERIFY(OSDecrementAtomic64(&net_api_stats
.nas_ifnet_alloc_count
) > 0);
7655 if (!(ifp
->if_xflags
& IFXF_ALLOC_KPI
)) {
7656 VERIFY(OSDecrementAtomic64(&net_api_stats
.nas_ifnet_alloc_os_count
) > 0);
7659 ifnet_lock_exclusive(ifp
);
7660 lck_mtx_lock(&dlifp
->dl_if_lock
);
7661 dlifp
->dl_if_flags
&= ~DLIF_INUSE
;
7662 strlcpy(dlifp
->dl_if_namestorage
, ifp
->if_name
, IFNAMSIZ
);
7663 ifp
->if_name
= dlifp
->dl_if_namestorage
;
7664 /* Reset external name (name + unit) */
7665 ifp
->if_xname
= dlifp
->dl_if_xnamestorage
;
7666 snprintf(__DECONST(char *, ifp
->if_xname
), IFXNAMSIZ
,
7667 "%s?", ifp
->if_name
);
7668 lck_mtx_unlock(&dlifp
->dl_if_lock
);
7671 * We can either recycle the MAC label here or in dlil_if_acquire().
7672 * It seems logical to do it here but this means that anything that
7673 * still has a handle on ifp will now see it as unlabeled.
7674 * Since the interface is "dead" that may be OK. Revisit later.
7676 mac_ifnet_label_recycle(ifp
);
7678 ifnet_lock_done(ifp
);
7681 __private_extern__
void
7684 lck_mtx_lock(&dlil_ifnet_lock
);
7687 __private_extern__
void
7688 dlil_if_unlock(void)
7690 lck_mtx_unlock(&dlil_ifnet_lock
);
7693 __private_extern__
void
7694 dlil_if_lock_assert(void)
7696 LCK_MTX_ASSERT(&dlil_ifnet_lock
, LCK_MTX_ASSERT_OWNED
);
7699 __private_extern__
void
7700 dlil_proto_unplumb_all(struct ifnet
*ifp
)
7703 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
7704 * each bucket contains exactly one entry; PF_VLAN does not need an
7707 * if_proto_hash[3] is for other protocols; we expect anything
7708 * in this bucket to respond to the DETACHING event (which would
7709 * have happened by now) and do the unplumb then.
7711 (void) proto_unplumb(PF_INET
, ifp
);
7713 (void) proto_unplumb(PF_INET6
, ifp
);
7718 ifp_src_route_copyout(struct ifnet
*ifp
, struct route
*dst
)
7720 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
7721 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
7723 route_copyout(dst
, &ifp
->if_src_route
, sizeof(*dst
));
7725 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
7729 ifp_src_route_copyin(struct ifnet
*ifp
, struct route
*src
)
7731 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
7732 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
7734 if (ifp
->if_fwd_cacheok
) {
7735 route_copyin(src
, &ifp
->if_src_route
, sizeof(*src
));
7739 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
7744 ifp_src_route6_copyout(struct ifnet
*ifp
, struct route_in6
*dst
)
7746 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
7747 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
7749 route_copyout((struct route
*)dst
, (struct route
*)&ifp
->if_src_route6
,
7752 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
7756 ifp_src_route6_copyin(struct ifnet
*ifp
, struct route_in6
*src
)
7758 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
7759 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
7761 if (ifp
->if_fwd_cacheok
) {
7762 route_copyin((struct route
*)src
,
7763 (struct route
*)&ifp
->if_src_route6
, sizeof(*src
));
7767 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
7772 ifnet_cached_rtlookup_inet(struct ifnet
*ifp
, struct in_addr src_ip
)
7774 struct route src_rt
;
7775 struct sockaddr_in
*dst
;
7777 dst
= (struct sockaddr_in
*)(void *)(&src_rt
.ro_dst
);
7779 ifp_src_route_copyout(ifp
, &src_rt
);
7781 if (ROUTE_UNUSABLE(&src_rt
) || src_ip
.s_addr
!= dst
->sin_addr
.s_addr
) {
7782 ROUTE_RELEASE(&src_rt
);
7783 if (dst
->sin_family
!= AF_INET
) {
7784 bzero(&src_rt
.ro_dst
, sizeof(src_rt
.ro_dst
));
7785 dst
->sin_len
= sizeof(src_rt
.ro_dst
);
7786 dst
->sin_family
= AF_INET
;
7788 dst
->sin_addr
= src_ip
;
7790 VERIFY(src_rt
.ro_rt
== NULL
);
7791 src_rt
.ro_rt
= rtalloc1_scoped((struct sockaddr
*)dst
,
7792 0, 0, ifp
->if_index
);
7794 if (src_rt
.ro_rt
!= NULL
) {
7795 /* retain a ref, copyin consumes one */
7796 struct rtentry
*rte
= src_rt
.ro_rt
;
7798 ifp_src_route_copyin(ifp
, &src_rt
);
7803 return src_rt
.ro_rt
;
7808 ifnet_cached_rtlookup_inet6(struct ifnet
*ifp
, struct in6_addr
*src_ip6
)
7810 struct route_in6 src_rt
;
7812 ifp_src_route6_copyout(ifp
, &src_rt
);
7814 if (ROUTE_UNUSABLE(&src_rt
) ||
7815 !IN6_ARE_ADDR_EQUAL(src_ip6
, &src_rt
.ro_dst
.sin6_addr
)) {
7816 ROUTE_RELEASE(&src_rt
);
7817 if (src_rt
.ro_dst
.sin6_family
!= AF_INET6
) {
7818 bzero(&src_rt
.ro_dst
, sizeof(src_rt
.ro_dst
));
7819 src_rt
.ro_dst
.sin6_len
= sizeof(src_rt
.ro_dst
);
7820 src_rt
.ro_dst
.sin6_family
= AF_INET6
;
7822 src_rt
.ro_dst
.sin6_scope_id
= in6_addr2scopeid(ifp
, src_ip6
);
7823 bcopy(src_ip6
, &src_rt
.ro_dst
.sin6_addr
,
7824 sizeof(src_rt
.ro_dst
.sin6_addr
));
7826 if (src_rt
.ro_rt
== NULL
) {
7827 src_rt
.ro_rt
= rtalloc1_scoped(
7828 (struct sockaddr
*)&src_rt
.ro_dst
, 0, 0,
7831 if (src_rt
.ro_rt
!= NULL
) {
7832 /* retain a ref, copyin consumes one */
7833 struct rtentry
*rte
= src_rt
.ro_rt
;
7835 ifp_src_route6_copyin(ifp
, &src_rt
);
7841 return src_rt
.ro_rt
;
7846 if_lqm_update(struct ifnet
*ifp
, int lqm
, int locked
)
7848 struct kev_dl_link_quality_metric_data ev_lqm_data
;
7850 VERIFY(lqm
>= IFNET_LQM_MIN
&& lqm
<= IFNET_LQM_MAX
);
7852 /* Normalize to edge */
7853 if (lqm
>= 0 && lqm
<= IFNET_LQM_THRESH_ABORT
) {
7854 lqm
= IFNET_LQM_THRESH_ABORT
;
7855 atomic_bitset_32(&tcbinfo
.ipi_flags
,
7856 INPCBINFO_HANDLE_LQM_ABORT
);
7857 inpcb_timer_sched(&tcbinfo
, INPCB_TIMER_FAST
);
7858 } else if (lqm
> IFNET_LQM_THRESH_ABORT
&&
7859 lqm
<= IFNET_LQM_THRESH_MINIMALLY_VIABLE
) {
7860 lqm
= IFNET_LQM_THRESH_MINIMALLY_VIABLE
;
7861 } else if (lqm
> IFNET_LQM_THRESH_MINIMALLY_VIABLE
&&
7862 lqm
<= IFNET_LQM_THRESH_POOR
) {
7863 lqm
= IFNET_LQM_THRESH_POOR
;
7864 } else if (lqm
> IFNET_LQM_THRESH_POOR
&&
7865 lqm
<= IFNET_LQM_THRESH_GOOD
) {
7866 lqm
= IFNET_LQM_THRESH_GOOD
;
7870 * Take the lock if needed
7873 ifnet_lock_exclusive(ifp
);
7876 if (lqm
== ifp
->if_interface_state
.lqm_state
&&
7877 (ifp
->if_interface_state
.valid_bitmask
&
7878 IF_INTERFACE_STATE_LQM_STATE_VALID
)) {
7880 * Release the lock if was not held by the caller
7883 ifnet_lock_done(ifp
);
7885 return; /* nothing to update */
7887 ifp
->if_interface_state
.valid_bitmask
|=
7888 IF_INTERFACE_STATE_LQM_STATE_VALID
;
7889 ifp
->if_interface_state
.lqm_state
= lqm
;
7892 * Don't want to hold the lock when issuing kernel events
7894 ifnet_lock_done(ifp
);
7896 bzero(&ev_lqm_data
, sizeof(ev_lqm_data
));
7897 ev_lqm_data
.link_quality_metric
= lqm
;
7899 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_LINK_QUALITY_METRIC_CHANGED
,
7900 (struct net_event_data
*)&ev_lqm_data
, sizeof(ev_lqm_data
));
7903 * Reacquire the lock for the caller
7906 ifnet_lock_exclusive(ifp
);
7911 if_rrc_state_update(struct ifnet
*ifp
, unsigned int rrc_state
)
7913 struct kev_dl_rrc_state kev
;
7915 if (rrc_state
== ifp
->if_interface_state
.rrc_state
&&
7916 (ifp
->if_interface_state
.valid_bitmask
&
7917 IF_INTERFACE_STATE_RRC_STATE_VALID
)) {
7921 ifp
->if_interface_state
.valid_bitmask
|=
7922 IF_INTERFACE_STATE_RRC_STATE_VALID
;
7924 ifp
->if_interface_state
.rrc_state
= rrc_state
;
7927 * Don't want to hold the lock when issuing kernel events
7929 ifnet_lock_done(ifp
);
7931 bzero(&kev
, sizeof(struct kev_dl_rrc_state
));
7932 kev
.rrc_state
= rrc_state
;
7934 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_RRC_STATE_CHANGED
,
7935 (struct net_event_data
*)&kev
, sizeof(struct kev_dl_rrc_state
));
7937 ifnet_lock_exclusive(ifp
);
7941 if_state_update(struct ifnet
*ifp
,
7942 struct if_interface_state
*if_interface_state
)
7944 u_short if_index_available
= 0;
7946 ifnet_lock_exclusive(ifp
);
7948 if ((ifp
->if_type
!= IFT_CELLULAR
) &&
7949 (if_interface_state
->valid_bitmask
&
7950 IF_INTERFACE_STATE_RRC_STATE_VALID
)) {
7951 ifnet_lock_done(ifp
);
7954 if ((if_interface_state
->valid_bitmask
&
7955 IF_INTERFACE_STATE_LQM_STATE_VALID
) &&
7956 (if_interface_state
->lqm_state
< IFNET_LQM_MIN
||
7957 if_interface_state
->lqm_state
> IFNET_LQM_MAX
)) {
7958 ifnet_lock_done(ifp
);
7961 if ((if_interface_state
->valid_bitmask
&
7962 IF_INTERFACE_STATE_RRC_STATE_VALID
) &&
7963 if_interface_state
->rrc_state
!=
7964 IF_INTERFACE_STATE_RRC_STATE_IDLE
&&
7965 if_interface_state
->rrc_state
!=
7966 IF_INTERFACE_STATE_RRC_STATE_CONNECTED
) {
7967 ifnet_lock_done(ifp
);
7971 if (if_interface_state
->valid_bitmask
&
7972 IF_INTERFACE_STATE_LQM_STATE_VALID
) {
7973 if_lqm_update(ifp
, if_interface_state
->lqm_state
, 1);
7975 if (if_interface_state
->valid_bitmask
&
7976 IF_INTERFACE_STATE_RRC_STATE_VALID
) {
7977 if_rrc_state_update(ifp
, if_interface_state
->rrc_state
);
7979 if (if_interface_state
->valid_bitmask
&
7980 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
) {
7981 ifp
->if_interface_state
.valid_bitmask
|=
7982 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
7983 ifp
->if_interface_state
.interface_availability
=
7984 if_interface_state
->interface_availability
;
7986 if (ifp
->if_interface_state
.interface_availability
==
7987 IF_INTERFACE_STATE_INTERFACE_AVAILABLE
) {
7988 if_index_available
= ifp
->if_index
;
7991 ifnet_lock_done(ifp
);
7994 * Check if the TCP connections going on this interface should be
7995 * forced to send probe packets instead of waiting for TCP timers
7996 * to fire. This will be done when there is an explicit
7997 * notification that the interface became available.
7999 if (if_index_available
> 0) {
8000 tcp_interface_send_probe(if_index_available
);
8007 if_get_state(struct ifnet
*ifp
,
8008 struct if_interface_state
*if_interface_state
)
8010 ifnet_lock_shared(ifp
);
8012 if_interface_state
->valid_bitmask
= 0;
8014 if (ifp
->if_interface_state
.valid_bitmask
&
8015 IF_INTERFACE_STATE_RRC_STATE_VALID
) {
8016 if_interface_state
->valid_bitmask
|=
8017 IF_INTERFACE_STATE_RRC_STATE_VALID
;
8018 if_interface_state
->rrc_state
=
8019 ifp
->if_interface_state
.rrc_state
;
8021 if (ifp
->if_interface_state
.valid_bitmask
&
8022 IF_INTERFACE_STATE_LQM_STATE_VALID
) {
8023 if_interface_state
->valid_bitmask
|=
8024 IF_INTERFACE_STATE_LQM_STATE_VALID
;
8025 if_interface_state
->lqm_state
=
8026 ifp
->if_interface_state
.lqm_state
;
8028 if (ifp
->if_interface_state
.valid_bitmask
&
8029 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
) {
8030 if_interface_state
->valid_bitmask
|=
8031 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
8032 if_interface_state
->interface_availability
=
8033 ifp
->if_interface_state
.interface_availability
;
8036 ifnet_lock_done(ifp
);
8040 if_probe_connectivity(struct ifnet
*ifp
, u_int32_t conn_probe
)
8042 ifnet_lock_exclusive(ifp
);
8043 if (conn_probe
> 1) {
8044 ifnet_lock_done(ifp
);
8047 if (conn_probe
== 0) {
8048 ifp
->if_eflags
&= ~IFEF_PROBE_CONNECTIVITY
;
8050 ifp
->if_eflags
|= IFEF_PROBE_CONNECTIVITY
;
8052 ifnet_lock_done(ifp
);
8055 necp_update_all_clients();
8058 tcp_probe_connectivity(ifp
, conn_probe
);
8064 uuid_get_ethernet(u_int8_t
*node
)
8067 struct sockaddr_dl
*sdl
;
8069 ifnet_head_lock_shared();
8070 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
8071 ifnet_lock_shared(ifp
);
8072 IFA_LOCK_SPIN(ifp
->if_lladdr
);
8073 sdl
= (struct sockaddr_dl
*)(void *)ifp
->if_lladdr
->ifa_addr
;
8074 if (sdl
->sdl_type
== IFT_ETHER
) {
8075 memcpy(node
, LLADDR(sdl
), ETHER_ADDR_LEN
);
8076 IFA_UNLOCK(ifp
->if_lladdr
);
8077 ifnet_lock_done(ifp
);
8081 IFA_UNLOCK(ifp
->if_lladdr
);
8082 ifnet_lock_done(ifp
);
8090 sysctl_rxpoll SYSCTL_HANDLER_ARGS
8092 #pragma unused(arg1, arg2)
8098 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8099 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
8103 if (net_rxpoll
== 0) {
8112 sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
8114 #pragma unused(arg1, arg2)
8118 q
= if_rxpoll_mode_holdtime
;
8120 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
8121 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
8125 if (q
< IF_RXPOLL_MODE_HOLDTIME_MIN
) {
8126 q
= IF_RXPOLL_MODE_HOLDTIME_MIN
;
8129 if_rxpoll_mode_holdtime
= q
;
8135 sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
8137 #pragma unused(arg1, arg2)
8141 q
= if_rxpoll_sample_holdtime
;
8143 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
8144 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
8148 if (q
< IF_RXPOLL_SAMPLETIME_MIN
) {
8149 q
= IF_RXPOLL_SAMPLETIME_MIN
;
8152 if_rxpoll_sample_holdtime
= q
;
8158 sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
8160 #pragma unused(arg1, arg2)
8164 q
= if_rxpoll_interval_time
;
8166 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
8167 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
8171 if (q
< IF_RXPOLL_INTERVALTIME_MIN
) {
8172 q
= IF_RXPOLL_INTERVALTIME_MIN
;
8175 if_rxpoll_interval_time
= q
;
8181 sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
8183 #pragma unused(arg1, arg2)
8187 i
= if_rxpoll_wlowat
;
8189 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8190 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
8194 if (i
== 0 || i
>= if_rxpoll_whiwat
) {
8198 if_rxpoll_wlowat
= i
;
8203 sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
8205 #pragma unused(arg1, arg2)
8209 i
= if_rxpoll_whiwat
;
8211 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8212 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
8216 if (i
<= if_rxpoll_wlowat
) {
8220 if_rxpoll_whiwat
= i
;
8225 sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
8227 #pragma unused(arg1, arg2)
8232 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8233 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
8237 if (i
< IF_SNDQ_MINLEN
) {
8246 sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
8248 #pragma unused(arg1, arg2)
8253 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8254 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
8258 if (i
< IF_RCVQ_MINLEN
) {
8267 dlil_node_present(struct ifnet
*ifp
, struct sockaddr
*sa
,
8268 int32_t rssi
, int lqm
, int npm
, u_int8_t srvinfo
[48])
8270 struct kev_dl_node_presence kev
;
8271 struct sockaddr_dl
*sdl
;
8272 struct sockaddr_in6
*sin6
;
8276 VERIFY(sa
->sa_family
== AF_LINK
|| sa
->sa_family
== AF_INET6
);
8278 bzero(&kev
, sizeof(kev
));
8279 sin6
= &kev
.sin6_node_address
;
8280 sdl
= &kev
.sdl_node_address
;
8281 nd6_alt_node_addr_decompose(ifp
, sa
, sdl
, sin6
);
8283 kev
.link_quality_metric
= lqm
;
8284 kev
.node_proximity_metric
= npm
;
8285 bcopy(srvinfo
, kev
.node_service_info
, sizeof(kev
.node_service_info
));
8287 nd6_alt_node_present(ifp
, sin6
, sdl
, rssi
, lqm
, npm
);
8288 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_PRESENCE
,
8289 &kev
.link_data
, sizeof(kev
));
8293 dlil_node_absent(struct ifnet
*ifp
, struct sockaddr
*sa
)
8295 struct kev_dl_node_absence kev
;
8296 struct sockaddr_in6
*sin6
;
8297 struct sockaddr_dl
*sdl
;
8301 VERIFY(sa
->sa_family
== AF_LINK
|| sa
->sa_family
== AF_INET6
);
8303 bzero(&kev
, sizeof(kev
));
8304 sin6
= &kev
.sin6_node_address
;
8305 sdl
= &kev
.sdl_node_address
;
8306 nd6_alt_node_addr_decompose(ifp
, sa
, sdl
, sin6
);
8308 nd6_alt_node_absent(ifp
, sin6
);
8309 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_ABSENCE
,
8310 &kev
.link_data
, sizeof(kev
));
8314 dlil_ifaddr_bytes(const struct sockaddr_dl
*sdl
, size_t *sizep
,
8315 kauth_cred_t
*credp
)
8317 const u_int8_t
*bytes
;
8320 bytes
= CONST_LLADDR(sdl
);
8321 size
= sdl
->sdl_alen
;
8324 if (dlil_lladdr_ckreq
) {
8325 switch (sdl
->sdl_type
) {
8335 if (credp
&& mac_system_check_info(*credp
, "net.link.addr")) {
8336 static const u_int8_t unspec
[FIREWIRE_EUI64_LEN
] = {
8344 #pragma unused(credp)
8347 if (sizep
!= NULL
) {
8354 dlil_report_issues(struct ifnet
*ifp
, u_int8_t modid
[DLIL_MODIDLEN
],
8355 u_int8_t info
[DLIL_MODARGLEN
])
8357 struct kev_dl_issues kev
;
8360 VERIFY(ifp
!= NULL
);
8361 VERIFY(modid
!= NULL
);
8362 _CASSERT(sizeof(kev
.modid
) == DLIL_MODIDLEN
);
8363 _CASSERT(sizeof(kev
.info
) == DLIL_MODARGLEN
);
8365 bzero(&kev
, sizeof(kev
));
8368 kev
.timestamp
= tv
.tv_sec
;
8369 bcopy(modid
, &kev
.modid
, DLIL_MODIDLEN
);
8371 bcopy(info
, &kev
.info
, DLIL_MODARGLEN
);
8374 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_ISSUES
,
8375 &kev
.link_data
, sizeof(kev
));
8379 ifnet_getset_opportunistic(ifnet_t ifp
, u_long cmd
, struct ifreq
*ifr
,
8382 u_int32_t level
= IFNET_THROTTLE_OFF
;
8385 VERIFY(cmd
== SIOCSIFOPPORTUNISTIC
|| cmd
== SIOCGIFOPPORTUNISTIC
);
8387 if (cmd
== SIOCSIFOPPORTUNISTIC
) {
8389 * XXX: Use priv_check_cred() instead of root check?
8391 if ((result
= proc_suser(p
)) != 0) {
8395 if (ifr
->ifr_opportunistic
.ifo_flags
==
8396 IFRIFOF_BLOCK_OPPORTUNISTIC
) {
8397 level
= IFNET_THROTTLE_OPPORTUNISTIC
;
8398 } else if (ifr
->ifr_opportunistic
.ifo_flags
== 0) {
8399 level
= IFNET_THROTTLE_OFF
;
8405 result
= ifnet_set_throttle(ifp
, level
);
8407 } else if ((result
= ifnet_get_throttle(ifp
, &level
)) == 0) {
8408 ifr
->ifr_opportunistic
.ifo_flags
= 0;
8409 if (level
== IFNET_THROTTLE_OPPORTUNISTIC
) {
8410 ifr
->ifr_opportunistic
.ifo_flags
|=
8411 IFRIFOF_BLOCK_OPPORTUNISTIC
;
8416 * Return the count of current opportunistic connections
8417 * over the interface.
8421 flags
|= (cmd
== SIOCSIFOPPORTUNISTIC
) ?
8422 INPCB_OPPORTUNISTIC_SETCMD
: 0;
8423 flags
|= (level
== IFNET_THROTTLE_OPPORTUNISTIC
) ?
8424 INPCB_OPPORTUNISTIC_THROTTLEON
: 0;
8425 ifr
->ifr_opportunistic
.ifo_inuse
=
8426 udp_count_opportunistic(ifp
->if_index
, flags
) +
8427 tcp_count_opportunistic(ifp
->if_index
, flags
);
8430 if (result
== EALREADY
) {
8438 ifnet_get_throttle(struct ifnet
*ifp
, u_int32_t
*level
)
8440 struct ifclassq
*ifq
;
8443 if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
8447 *level
= IFNET_THROTTLE_OFF
;
8451 /* Throttling works only for IFCQ, not ALTQ instances */
8452 if (IFCQ_IS_ENABLED(ifq
)) {
8453 IFCQ_GET_THROTTLE(ifq
, *level
, err
);
8461 ifnet_set_throttle(struct ifnet
*ifp
, u_int32_t level
)
8463 struct ifclassq
*ifq
;
8466 if (!(ifp
->if_eflags
& IFEF_TXSTART
)) {
8473 case IFNET_THROTTLE_OFF
:
8474 case IFNET_THROTTLE_OPPORTUNISTIC
:
8481 if (IFCQ_IS_ENABLED(ifq
)) {
8482 IFCQ_SET_THROTTLE(ifq
, level
, err
);
8487 printf("%s: throttling level set to %d\n", if_name(ifp
),
8489 if (level
== IFNET_THROTTLE_OFF
) {
8498 ifnet_getset_log(ifnet_t ifp
, u_long cmd
, struct ifreq
*ifr
,
8504 int level
, category
, subcategory
;
8506 VERIFY(cmd
== SIOCSIFLOG
|| cmd
== SIOCGIFLOG
);
8508 if (cmd
== SIOCSIFLOG
) {
8509 if ((result
= priv_check_cred(kauth_cred_get(),
8510 PRIV_NET_INTERFACE_CONTROL
, 0)) != 0) {
8514 level
= ifr
->ifr_log
.ifl_level
;
8515 if (level
< IFNET_LOG_MIN
|| level
> IFNET_LOG_MAX
) {
8519 flags
= ifr
->ifr_log
.ifl_flags
;
8520 if ((flags
&= IFNET_LOGF_MASK
) == 0) {
8524 category
= ifr
->ifr_log
.ifl_category
;
8525 subcategory
= ifr
->ifr_log
.ifl_subcategory
;
8528 result
= ifnet_set_log(ifp
, level
, flags
,
8529 category
, subcategory
);
8532 result
= ifnet_get_log(ifp
, &level
, &flags
, &category
,
8535 ifr
->ifr_log
.ifl_level
= level
;
8536 ifr
->ifr_log
.ifl_flags
= flags
;
8537 ifr
->ifr_log
.ifl_category
= category
;
8538 ifr
->ifr_log
.ifl_subcategory
= subcategory
;
8546 ifnet_set_log(struct ifnet
*ifp
, int32_t level
, uint32_t flags
,
8547 int32_t category
, int32_t subcategory
)
8551 VERIFY(level
>= IFNET_LOG_MIN
&& level
<= IFNET_LOG_MAX
);
8552 VERIFY(flags
& IFNET_LOGF_MASK
);
8555 * The logging level applies to all facilities; make sure to
8556 * update them all with the most current level.
8558 flags
|= ifp
->if_log
.flags
;
8560 if (ifp
->if_output_ctl
!= NULL
) {
8561 struct ifnet_log_params l
;
8563 bzero(&l
, sizeof(l
));
8566 l
.flags
&= ~IFNET_LOGF_DLIL
;
8567 l
.category
= category
;
8568 l
.subcategory
= subcategory
;
8570 /* Send this request to lower layers */
8572 err
= ifp
->if_output_ctl(ifp
, IFNET_CTL_SET_LOG
,
8575 } else if ((flags
& ~IFNET_LOGF_DLIL
) && ifp
->if_output_ctl
== NULL
) {
8577 * If targeted to the lower layers without an output
8578 * control callback registered on the interface, just
8579 * silently ignore facilities other than ours.
8581 flags
&= IFNET_LOGF_DLIL
;
8582 if (flags
== 0 && (!(ifp
->if_log
.flags
& IFNET_LOGF_DLIL
))) {
8588 if ((ifp
->if_log
.level
= level
) == IFNET_LOG_DEFAULT
) {
8589 ifp
->if_log
.flags
= 0;
8591 ifp
->if_log
.flags
|= flags
;
8594 log(LOG_INFO
, "%s: logging level set to %d flags=%b "
8595 "arg=%b, category=%d subcategory=%d\n", if_name(ifp
),
8596 ifp
->if_log
.level
, ifp
->if_log
.flags
,
8597 IFNET_LOGF_BITS
, flags
, IFNET_LOGF_BITS
,
8598 category
, subcategory
);
8605 ifnet_get_log(struct ifnet
*ifp
, int32_t *level
, uint32_t *flags
,
8606 int32_t *category
, int32_t *subcategory
)
8608 if (level
!= NULL
) {
8609 *level
= ifp
->if_log
.level
;
8611 if (flags
!= NULL
) {
8612 *flags
= ifp
->if_log
.flags
;
8614 if (category
!= NULL
) {
8615 *category
= ifp
->if_log
.category
;
8617 if (subcategory
!= NULL
) {
8618 *subcategory
= ifp
->if_log
.subcategory
;
8625 ifnet_notify_address(struct ifnet
*ifp
, int af
)
8627 struct ifnet_notify_address_params na
;
8630 (void) pf_ifaddr_hook(ifp
);
8633 if (ifp
->if_output_ctl
== NULL
) {
8637 bzero(&na
, sizeof(na
));
8638 na
.address_family
= af
;
8640 return ifp
->if_output_ctl(ifp
, IFNET_CTL_NOTIFY_ADDRESS
,
8645 ifnet_flowid(struct ifnet
*ifp
, uint32_t *flowid
)
8647 if (ifp
== NULL
|| flowid
== NULL
) {
8649 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
8650 !IF_FULLY_ATTACHED(ifp
)) {
8654 *flowid
= ifp
->if_flowhash
;
8660 ifnet_disable_output(struct ifnet
*ifp
)
8666 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
8667 !IF_FULLY_ATTACHED(ifp
)) {
8671 if ((err
= ifnet_fc_add(ifp
)) == 0) {
8672 lck_mtx_lock_spin(&ifp
->if_start_lock
);
8673 ifp
->if_start_flags
|= IFSF_FLOW_CONTROLLED
;
8674 lck_mtx_unlock(&ifp
->if_start_lock
);
8680 ifnet_enable_output(struct ifnet
*ifp
)
8684 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
8685 !IF_FULLY_ATTACHED(ifp
)) {
8689 ifnet_start_common(ifp
, TRUE
);
8694 ifnet_flowadv(uint32_t flowhash
)
8696 struct ifnet_fc_entry
*ifce
;
8699 ifce
= ifnet_fc_get(flowhash
);
8704 VERIFY(ifce
->ifce_ifp
!= NULL
);
8705 ifp
= ifce
->ifce_ifp
;
8707 /* flow hash gets recalculated per attach, so check */
8708 if (ifnet_is_attached(ifp
, 1)) {
8709 if (ifp
->if_flowhash
== flowhash
) {
8710 (void) ifnet_enable_output(ifp
);
8712 ifnet_decr_iorefcnt(ifp
);
8714 ifnet_fc_entry_free(ifce
);
8718 * Function to compare ifnet_fc_entries in ifnet flow control tree
8721 ifce_cmp(const struct ifnet_fc_entry
*fc1
, const struct ifnet_fc_entry
*fc2
)
8723 return fc1
->ifce_flowhash
- fc2
->ifce_flowhash
;
8727 ifnet_fc_add(struct ifnet
*ifp
)
8729 struct ifnet_fc_entry keyfc
, *ifce
;
8732 VERIFY(ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_TXSTART
));
8733 VERIFY(ifp
->if_flowhash
!= 0);
8734 flowhash
= ifp
->if_flowhash
;
8736 bzero(&keyfc
, sizeof(keyfc
));
8737 keyfc
.ifce_flowhash
= flowhash
;
8739 lck_mtx_lock_spin(&ifnet_fc_lock
);
8740 ifce
= RB_FIND(ifnet_fc_tree
, &ifnet_fc_tree
, &keyfc
);
8741 if (ifce
!= NULL
&& ifce
->ifce_ifp
== ifp
) {
8742 /* Entry is already in ifnet_fc_tree, return */
8743 lck_mtx_unlock(&ifnet_fc_lock
);
8749 * There is a different fc entry with the same flow hash
8750 * but different ifp pointer. There can be a collision
8751 * on flow hash but the probability is low. Let's just
8752 * avoid adding a second one when there is a collision.
8754 lck_mtx_unlock(&ifnet_fc_lock
);
8758 /* become regular mutex */
8759 lck_mtx_convert_spin(&ifnet_fc_lock
);
8761 ifce
= zalloc(ifnet_fc_zone
);
8763 /* memory allocation failed */
8764 lck_mtx_unlock(&ifnet_fc_lock
);
8767 bzero(ifce
, ifnet_fc_zone_size
);
8769 ifce
->ifce_flowhash
= flowhash
;
8770 ifce
->ifce_ifp
= ifp
;
8772 RB_INSERT(ifnet_fc_tree
, &ifnet_fc_tree
, ifce
);
8773 lck_mtx_unlock(&ifnet_fc_lock
);
8777 static struct ifnet_fc_entry
*
8778 ifnet_fc_get(uint32_t flowhash
)
8780 struct ifnet_fc_entry keyfc
, *ifce
;
8783 bzero(&keyfc
, sizeof(keyfc
));
8784 keyfc
.ifce_flowhash
= flowhash
;
8786 lck_mtx_lock_spin(&ifnet_fc_lock
);
8787 ifce
= RB_FIND(ifnet_fc_tree
, &ifnet_fc_tree
, &keyfc
);
8789 /* Entry is not present in ifnet_fc_tree, return */
8790 lck_mtx_unlock(&ifnet_fc_lock
);
8794 RB_REMOVE(ifnet_fc_tree
, &ifnet_fc_tree
, ifce
);
8796 VERIFY(ifce
->ifce_ifp
!= NULL
);
8797 ifp
= ifce
->ifce_ifp
;
8799 /* become regular mutex */
8800 lck_mtx_convert_spin(&ifnet_fc_lock
);
8802 if (!ifnet_is_attached(ifp
, 0)) {
8804 * This ifp is not attached or in the process of being
8805 * detached; just don't process it.
8807 ifnet_fc_entry_free(ifce
);
8810 lck_mtx_unlock(&ifnet_fc_lock
);
8816 ifnet_fc_entry_free(struct ifnet_fc_entry
*ifce
)
8818 zfree(ifnet_fc_zone
, ifce
);
8822 ifnet_calc_flowhash(struct ifnet
*ifp
)
8824 struct ifnet_flowhash_key fh
__attribute__((aligned(8)));
8825 uint32_t flowhash
= 0;
8827 if (ifnet_flowhash_seed
== 0) {
8828 ifnet_flowhash_seed
= RandomULong();
8831 bzero(&fh
, sizeof(fh
));
8833 (void) snprintf(fh
.ifk_name
, sizeof(fh
.ifk_name
), "%s", ifp
->if_name
);
8834 fh
.ifk_unit
= ifp
->if_unit
;
8835 fh
.ifk_flags
= ifp
->if_flags
;
8836 fh
.ifk_eflags
= ifp
->if_eflags
;
8837 fh
.ifk_capabilities
= ifp
->if_capabilities
;
8838 fh
.ifk_capenable
= ifp
->if_capenable
;
8839 fh
.ifk_output_sched_model
= ifp
->if_output_sched_model
;
8840 fh
.ifk_rand1
= RandomULong();
8841 fh
.ifk_rand2
= RandomULong();
8844 flowhash
= net_flowhash(&fh
, sizeof(fh
), ifnet_flowhash_seed
);
8845 if (flowhash
== 0) {
8846 /* try to get a non-zero flowhash */
8847 ifnet_flowhash_seed
= RandomULong();
8855 ifnet_set_netsignature(struct ifnet
*ifp
, uint8_t family
, uint8_t len
,
8856 uint16_t flags
, uint8_t *data
)
8858 #pragma unused(flags)
8863 if_inetdata_lock_exclusive(ifp
);
8864 if (IN_IFEXTRA(ifp
) != NULL
) {
8866 /* Allow clearing the signature */
8867 IN_IFEXTRA(ifp
)->netsig_len
= 0;
8868 bzero(IN_IFEXTRA(ifp
)->netsig
,
8869 sizeof(IN_IFEXTRA(ifp
)->netsig
));
8870 if_inetdata_lock_done(ifp
);
8872 } else if (len
> sizeof(IN_IFEXTRA(ifp
)->netsig
)) {
8874 if_inetdata_lock_done(ifp
);
8877 IN_IFEXTRA(ifp
)->netsig_len
= len
;
8878 bcopy(data
, IN_IFEXTRA(ifp
)->netsig
, len
);
8882 if_inetdata_lock_done(ifp
);
8886 if_inet6data_lock_exclusive(ifp
);
8887 if (IN6_IFEXTRA(ifp
) != NULL
) {
8889 /* Allow clearing the signature */
8890 IN6_IFEXTRA(ifp
)->netsig_len
= 0;
8891 bzero(IN6_IFEXTRA(ifp
)->netsig
,
8892 sizeof(IN6_IFEXTRA(ifp
)->netsig
));
8893 if_inet6data_lock_done(ifp
);
8895 } else if (len
> sizeof(IN6_IFEXTRA(ifp
)->netsig
)) {
8897 if_inet6data_lock_done(ifp
);
8900 IN6_IFEXTRA(ifp
)->netsig_len
= len
;
8901 bcopy(data
, IN6_IFEXTRA(ifp
)->netsig
, len
);
8905 if_inet6data_lock_done(ifp
);
8917 ifnet_get_netsignature(struct ifnet
*ifp
, uint8_t family
, uint8_t *len
,
8918 uint16_t *flags
, uint8_t *data
)
8922 if (ifp
== NULL
|| len
== NULL
|| data
== NULL
) {
8928 if_inetdata_lock_shared(ifp
);
8929 if (IN_IFEXTRA(ifp
) != NULL
) {
8930 if (*len
== 0 || *len
< IN_IFEXTRA(ifp
)->netsig_len
) {
8932 if_inetdata_lock_done(ifp
);
8935 if ((*len
= IN_IFEXTRA(ifp
)->netsig_len
) > 0) {
8936 bcopy(IN_IFEXTRA(ifp
)->netsig
, data
, *len
);
8943 if_inetdata_lock_done(ifp
);
8947 if_inet6data_lock_shared(ifp
);
8948 if (IN6_IFEXTRA(ifp
) != NULL
) {
8949 if (*len
== 0 || *len
< IN6_IFEXTRA(ifp
)->netsig_len
) {
8951 if_inet6data_lock_done(ifp
);
8954 if ((*len
= IN6_IFEXTRA(ifp
)->netsig_len
) > 0) {
8955 bcopy(IN6_IFEXTRA(ifp
)->netsig
, data
, *len
);
8962 if_inet6data_lock_done(ifp
);
8970 if (error
== 0 && flags
!= NULL
) {
8979 ifnet_set_nat64prefix(struct ifnet
*ifp
, struct ipv6_prefix
*prefixes
)
8981 int i
, error
= 0, one_set
= 0;
8983 if_inet6data_lock_exclusive(ifp
);
8985 if (IN6_IFEXTRA(ifp
) == NULL
) {
8990 for (i
= 0; i
< NAT64_MAX_NUM_PREFIXES
; i
++) {
8991 uint32_t prefix_len
=
8992 prefixes
[i
].prefix_len
;
8993 struct in6_addr
*prefix
=
8994 &prefixes
[i
].ipv6_prefix
;
8996 if (prefix_len
== 0) {
8997 clat_log0((LOG_DEBUG
,
8998 "NAT64 prefixes purged from Interface %s\n",
9000 /* Allow clearing the signature */
9001 IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].prefix_len
= 0;
9002 bzero(&IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].ipv6_prefix
,
9003 sizeof(struct in6_addr
));
9006 } else if (prefix_len
!= NAT64_PREFIX_LEN_32
&&
9007 prefix_len
!= NAT64_PREFIX_LEN_40
&&
9008 prefix_len
!= NAT64_PREFIX_LEN_48
&&
9009 prefix_len
!= NAT64_PREFIX_LEN_56
&&
9010 prefix_len
!= NAT64_PREFIX_LEN_64
&&
9011 prefix_len
!= NAT64_PREFIX_LEN_96
) {
9012 clat_log0((LOG_DEBUG
,
9013 "NAT64 prefixlen is incorrect %d\n", prefix_len
));
9018 if (IN6_IS_SCOPE_EMBED(prefix
)) {
9019 clat_log0((LOG_DEBUG
,
9020 "NAT64 prefix has interface/link local scope.\n"));
9025 IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].prefix_len
= prefix_len
;
9026 bcopy(prefix
, &IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].ipv6_prefix
,
9027 sizeof(struct in6_addr
));
9028 clat_log0((LOG_DEBUG
,
9029 "NAT64 prefix set to %s with prefixlen: %d\n",
9030 ip6_sprintf(prefix
), prefix_len
));
9035 if_inet6data_lock_done(ifp
);
9037 if (error
== 0 && one_set
!= 0) {
9038 necp_update_all_clients();
9045 ifnet_get_nat64prefix(struct ifnet
*ifp
, struct ipv6_prefix
*prefixes
)
9047 int i
, found_one
= 0, error
= 0;
9053 if_inet6data_lock_shared(ifp
);
9055 if (IN6_IFEXTRA(ifp
) == NULL
) {
9060 for (i
= 0; i
< NAT64_MAX_NUM_PREFIXES
; i
++) {
9061 if (IN6_IFEXTRA(ifp
)->nat64_prefixes
[i
].prefix_len
!= 0) {
9066 if (found_one
== 0) {
9072 bcopy(IN6_IFEXTRA(ifp
)->nat64_prefixes
, prefixes
,
9073 sizeof(IN6_IFEXTRA(ifp
)->nat64_prefixes
));
9077 if_inet6data_lock_done(ifp
);
9084 dlil_output_cksum_dbg(struct ifnet
*ifp
, struct mbuf
*m
, uint32_t hoff
,
9085 protocol_family_t pf
)
9090 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_FINALIZE_FORCED
) ||
9091 (m
->m_pkthdr
.csum_flags
& (CSUM_TSO_IPV4
| CSUM_TSO_IPV6
))) {
9097 did_sw
= in_finalize_cksum(m
, hoff
, m
->m_pkthdr
.csum_flags
);
9098 if (did_sw
& CSUM_DELAY_IP
) {
9099 hwcksum_dbg_finalized_hdr
++;
9101 if (did_sw
& CSUM_DELAY_DATA
) {
9102 hwcksum_dbg_finalized_data
++;
9108 * Checksum offload should not have been enabled when
9109 * extension headers exist; that also means that we
9110 * cannot force-finalize packets with extension headers.
9111 * Indicate to the callee should it skip such case by
9112 * setting optlen to -1.
9114 did_sw
= in6_finalize_cksum(m
, hoff
, -1, -1,
9115 m
->m_pkthdr
.csum_flags
);
9116 if (did_sw
& CSUM_DELAY_IPV6_DATA
) {
9117 hwcksum_dbg_finalized_data
++;
9127 dlil_input_cksum_dbg(struct ifnet
*ifp
, struct mbuf
*m
, char *frame_header
,
9128 protocol_family_t pf
)
9133 if (frame_header
== NULL
||
9134 frame_header
< (char *)mbuf_datastart(m
) ||
9135 frame_header
> (char *)m
->m_data
) {
9136 printf("%s: frame header pointer 0x%llx out of range "
9137 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp
),
9138 (uint64_t)VM_KERNEL_ADDRPERM(frame_header
),
9139 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m
)),
9140 (uint64_t)VM_KERNEL_ADDRPERM(m
->m_data
),
9141 (uint64_t)VM_KERNEL_ADDRPERM(m
));
9144 hlen
= (m
->m_data
- frame_header
);
9157 * Force partial checksum offload; useful to simulate cases
9158 * where the hardware does not support partial checksum offload,
9159 * in order to validate correctness throughout the layers above.
9161 if (hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
) {
9162 uint32_t foff
= hwcksum_dbg_partial_rxoff_forced
;
9164 if (foff
> (uint32_t)m
->m_pkthdr
.len
) {
9168 m
->m_pkthdr
.csum_flags
&= ~CSUM_RX_FLAGS
;
9170 /* Compute 16-bit 1's complement sum from forced offset */
9171 sum
= m_sum16(m
, foff
, (m
->m_pkthdr
.len
- foff
));
9173 m
->m_pkthdr
.csum_flags
|= (CSUM_DATA_VALID
| CSUM_PARTIAL
);
9174 m
->m_pkthdr
.csum_rx_val
= sum
;
9175 m
->m_pkthdr
.csum_rx_start
= (foff
+ hlen
);
9177 hwcksum_dbg_partial_forced
++;
9178 hwcksum_dbg_partial_forced_bytes
+= m
->m_pkthdr
.len
;
9182 * Partial checksum offload verification (and adjustment);
9183 * useful to validate and test cases where the hardware
9184 * supports partial checksum offload.
9186 if ((m
->m_pkthdr
.csum_flags
&
9187 (CSUM_DATA_VALID
| CSUM_PARTIAL
| CSUM_PSEUDO_HDR
)) ==
9188 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
9191 /* Start offset must begin after frame header */
9192 rxoff
= m
->m_pkthdr
.csum_rx_start
;
9194 hwcksum_dbg_bad_rxoff
++;
9196 printf("%s: partial cksum start offset %d "
9197 "is less than frame header length %d for "
9198 "mbuf 0x%llx\n", if_name(ifp
), rxoff
, hlen
,
9199 (uint64_t)VM_KERNEL_ADDRPERM(m
));
9205 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
)) {
9207 * Compute the expected 16-bit 1's complement sum;
9208 * skip this if we've already computed it above
9209 * when partial checksum offload is forced.
9211 sum
= m_sum16(m
, rxoff
, (m
->m_pkthdr
.len
- rxoff
));
9213 /* Hardware or driver is buggy */
9214 if (sum
!= m
->m_pkthdr
.csum_rx_val
) {
9215 hwcksum_dbg_bad_cksum
++;
9217 printf("%s: bad partial cksum value "
9218 "0x%x (expected 0x%x) for mbuf "
9219 "0x%llx [rx_start %d]\n",
9221 m
->m_pkthdr
.csum_rx_val
, sum
,
9222 (uint64_t)VM_KERNEL_ADDRPERM(m
),
9223 m
->m_pkthdr
.csum_rx_start
);
9228 hwcksum_dbg_verified
++;
9231 * This code allows us to emulate various hardwares that
9232 * perform 16-bit 1's complement sum beginning at various
9233 * start offset values.
9235 if (hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_RXOFF_ADJ
) {
9236 uint32_t aoff
= hwcksum_dbg_partial_rxoff_adj
;
9238 if (aoff
== rxoff
|| aoff
> (uint32_t)m
->m_pkthdr
.len
) {
9242 sum
= m_adj_sum16(m
, rxoff
, aoff
,
9243 m_pktlen(m
) - aoff
, sum
);
9245 m
->m_pkthdr
.csum_rx_val
= sum
;
9246 m
->m_pkthdr
.csum_rx_start
= (aoff
+ hlen
);
9248 hwcksum_dbg_adjusted
++;
9254 sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
9256 #pragma unused(arg1, arg2)
9260 i
= hwcksum_dbg_mode
;
9262 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
9263 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
9267 if (hwcksum_dbg
== 0) {
9271 if ((i
& ~HWCKSUM_DBG_MASK
) != 0) {
9275 hwcksum_dbg_mode
= (i
& HWCKSUM_DBG_MASK
);
9281 sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
9283 #pragma unused(arg1, arg2)
9287 i
= hwcksum_dbg_partial_rxoff_forced
;
9289 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
9290 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
9294 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
)) {
9298 hwcksum_dbg_partial_rxoff_forced
= i
;
9304 sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
9306 #pragma unused(arg1, arg2)
9310 i
= hwcksum_dbg_partial_rxoff_adj
;
9312 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
9313 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
) {
9317 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_RXOFF_ADJ
)) {
9321 hwcksum_dbg_partial_rxoff_adj
= i
;
9327 sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
9329 #pragma unused(oidp, arg1, arg2)
9332 if (req
->oldptr
== USER_ADDR_NULL
) {
9334 if (req
->newptr
!= USER_ADDR_NULL
) {
9337 err
= SYSCTL_OUT(req
, &tx_chain_len_stats
,
9338 sizeof(struct chain_len_stats
));
9344 #if DEBUG || DEVELOPMENT
9345 /* Blob for sum16 verification */
9346 static uint8_t sumdata
[] = {
9347 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
9348 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
9349 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
9350 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
9351 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
9352 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
9353 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
9354 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
9355 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
9356 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
9357 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
9358 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
9359 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
9360 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
9361 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
9362 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
9363 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
9364 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
9365 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
9366 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
9367 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
9368 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
9369 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
9370 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
9371 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
9372 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
9373 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
9374 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
9375 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
9376 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
9377 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
9378 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
9379 0xc8, 0x28, 0x02, 0x00, 0x00
9382 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
9386 uint16_t sumr
; /* reference */
9387 uint16_t sumrp
; /* reference, precomputed */
9389 { FALSE
, 0, 0, 0x0000 },
9390 { FALSE
, 1, 0, 0x001f },
9391 { FALSE
, 2, 0, 0x8b1f },
9392 { FALSE
, 3, 0, 0x8b27 },
9393 { FALSE
, 7, 0, 0x790e },
9394 { FALSE
, 11, 0, 0xcb6d },
9395 { FALSE
, 20, 0, 0x20dd },
9396 { FALSE
, 27, 0, 0xbabd },
9397 { FALSE
, 32, 0, 0xf3e8 },
9398 { FALSE
, 37, 0, 0x197d },
9399 { FALSE
, 43, 0, 0x9eae },
9400 { FALSE
, 64, 0, 0x4678 },
9401 { FALSE
, 127, 0, 0x9399 },
9402 { FALSE
, 256, 0, 0xd147 },
9403 { FALSE
, 325, 0, 0x0358 },
9405 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
9408 dlil_verify_sum16(void)
9414 /* Make sure test data plus extra room for alignment fits in cluster */
9415 _CASSERT((sizeof(sumdata
) + (sizeof(uint64_t) * 2)) <= MCLBYTES
);
9417 kprintf("DLIL: running SUM16 self-tests ... ");
9419 m
= m_getcl(M_WAITOK
, MT_DATA
, M_PKTHDR
);
9420 m_align(m
, sizeof(sumdata
) + (sizeof(uint64_t) * 2));
9422 buf
= mtod(m
, uint8_t *); /* base address */
9424 for (n
= 0; n
< SUMTBL_MAX
; n
++) {
9425 uint16_t len
= sumtbl
[n
].len
;
9428 /* Verify for all possible alignments */
9429 for (i
= 0; i
< (int)sizeof(uint64_t); i
++) {
9433 /* Copy over test data to mbuf */
9434 VERIFY(len
<= sizeof(sumdata
));
9436 bcopy(sumdata
, c
, len
);
9438 /* Zero-offset test (align by data pointer) */
9439 m
->m_data
= (caddr_t
)c
;
9441 sum
= m_sum16(m
, 0, len
);
9443 if (!sumtbl
[n
].init
) {
9444 sumr
= in_cksum_mbuf_ref(m
, len
, 0, 0);
9445 sumtbl
[n
].sumr
= sumr
;
9446 sumtbl
[n
].init
= TRUE
;
9448 sumr
= sumtbl
[n
].sumr
;
9451 /* Something is horribly broken; stop now */
9452 if (sumr
!= sumtbl
[n
].sumrp
) {
9453 panic_plain("\n%s: broken in_cksum_mbuf_ref() "
9454 "for len=%d align=%d sum=0x%04x "
9455 "[expected=0x%04x]\n", __func__
,
9458 } else if (sum
!= sumr
) {
9459 panic_plain("\n%s: broken m_sum16() for len=%d "
9460 "align=%d sum=0x%04x [expected=0x%04x]\n",
9461 __func__
, len
, i
, sum
, sumr
);
9465 /* Alignment test by offset (fixed data pointer) */
9466 m
->m_data
= (caddr_t
)buf
;
9468 sum
= m_sum16(m
, i
, len
);
9470 /* Something is horribly broken; stop now */
9472 panic_plain("\n%s: broken m_sum16() for len=%d "
9473 "offset=%d sum=0x%04x [expected=0x%04x]\n",
9474 __func__
, len
, i
, sum
, sumr
);
9478 /* Simple sum16 contiguous buffer test by aligment */
9479 sum
= b_sum16(c
, len
);
9481 /* Something is horribly broken; stop now */
9483 panic_plain("\n%s: broken b_sum16() for len=%d "
9484 "align=%d sum=0x%04x [expected=0x%04x]\n",
9485 __func__
, len
, i
, sum
, sumr
);
9493 kprintf("PASSED\n");
9495 #endif /* DEBUG || DEVELOPMENT */
9497 #define CASE_STRINGIFY(x) case x: return #x
9499 __private_extern__
const char *
9500 dlil_kev_dl_code_str(u_int32_t event_code
)
9502 switch (event_code
) {
9503 CASE_STRINGIFY(KEV_DL_SIFFLAGS
);
9504 CASE_STRINGIFY(KEV_DL_SIFMETRICS
);
9505 CASE_STRINGIFY(KEV_DL_SIFMTU
);
9506 CASE_STRINGIFY(KEV_DL_SIFPHYS
);
9507 CASE_STRINGIFY(KEV_DL_SIFMEDIA
);
9508 CASE_STRINGIFY(KEV_DL_SIFGENERIC
);
9509 CASE_STRINGIFY(KEV_DL_ADDMULTI
);
9510 CASE_STRINGIFY(KEV_DL_DELMULTI
);
9511 CASE_STRINGIFY(KEV_DL_IF_ATTACHED
);
9512 CASE_STRINGIFY(KEV_DL_IF_DETACHING
);
9513 CASE_STRINGIFY(KEV_DL_IF_DETACHED
);
9514 CASE_STRINGIFY(KEV_DL_LINK_OFF
);
9515 CASE_STRINGIFY(KEV_DL_LINK_ON
);
9516 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED
);
9517 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED
);
9518 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED
);
9519 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED
);
9520 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT
);
9521 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED
);
9522 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED
);
9523 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE
);
9524 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE
);
9525 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED
);
9526 CASE_STRINGIFY(KEV_DL_ISSUES
);
9527 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED
);
9535 dlil_dt_tcall_fn(thread_call_param_t arg0
, thread_call_param_t arg1
)
9537 #pragma unused(arg1)
9538 struct ifnet
*ifp
= arg0
;
9540 if (ifnet_is_attached(ifp
, 1)) {
9541 nstat_ifnet_threshold_reached(ifp
->if_index
);
9542 ifnet_decr_iorefcnt(ifp
);
9547 ifnet_notify_data_threshold(struct ifnet
*ifp
)
9549 uint64_t bytes
= (ifp
->if_ibytes
+ ifp
->if_obytes
);
9550 uint64_t oldbytes
= ifp
->if_dt_bytes
;
9552 ASSERT(ifp
->if_dt_tcall
!= NULL
);
9555 * If we went over the threshold, notify NetworkStatistics.
9556 * We rate-limit it based on the threshold interval value.
9558 if (threshold_notify
&& (bytes
- oldbytes
) > ifp
->if_data_threshold
&&
9559 OSCompareAndSwap64(oldbytes
, bytes
, &ifp
->if_dt_bytes
) &&
9560 !thread_call_isactive(ifp
->if_dt_tcall
)) {
9561 uint64_t tival
= (threshold_interval
* NSEC_PER_SEC
);
9562 uint64_t now
= mach_absolute_time(), deadline
= now
;
9566 nanoseconds_to_absolutetime(tival
, &ival
);
9567 clock_deadline_for_periodic_event(ival
, now
, &deadline
);
9568 (void) thread_call_enter_delayed(ifp
->if_dt_tcall
,
9571 (void) thread_call_enter(ifp
->if_dt_tcall
);
9576 #if (DEVELOPMENT || DEBUG)
9578 * The sysctl variable name contains the input parameters of
9579 * ifnet_get_keepalive_offload_frames()
9580 * ifp (interface index): name[0]
9581 * frames_array_count: name[1]
9582 * frame_data_offset: name[2]
9583 * The return length gives used_frames_count
9586 sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
9588 #pragma unused(oidp)
9589 int *name
= (int *)arg1
;
9590 u_int namelen
= arg2
;
9593 u_int32_t frames_array_count
;
9594 size_t frame_data_offset
;
9595 u_int32_t used_frames_count
;
9596 struct ifnet_keepalive_offload_frame
*frames_array
= NULL
;
9601 * Only root can get look at other people TCP frames
9603 error
= proc_suser(current_proc());
9608 * Validate the input parameters
9610 if (req
->newptr
!= USER_ADDR_NULL
) {
9618 if (req
->oldptr
== USER_ADDR_NULL
) {
9622 if (req
->oldlen
== 0) {
9627 frames_array_count
= name
[1];
9628 frame_data_offset
= name
[2];
9630 /* Make sure the passed buffer is large enough */
9631 if (frames_array_count
* sizeof(struct ifnet_keepalive_offload_frame
) >
9637 ifnet_head_lock_shared();
9638 if (!IF_INDEX_IN_RANGE(idx
)) {
9643 ifp
= ifindex2ifnet
[idx
];
9646 frames_array
= _MALLOC(frames_array_count
*
9647 sizeof(struct ifnet_keepalive_offload_frame
), M_TEMP
, M_WAITOK
);
9648 if (frames_array
== NULL
) {
9653 error
= ifnet_get_keepalive_offload_frames(ifp
, frames_array
,
9654 frames_array_count
, frame_data_offset
, &used_frames_count
);
9656 printf("%s: ifnet_get_keepalive_offload_frames error %d\n",
9661 for (i
= 0; i
< used_frames_count
; i
++) {
9662 error
= SYSCTL_OUT(req
, frames_array
+ i
,
9663 sizeof(struct ifnet_keepalive_offload_frame
));
9669 if (frames_array
!= NULL
) {
9670 _FREE(frames_array
, M_TEMP
);
9674 #endif /* DEVELOPMENT || DEBUG */
9677 ifnet_update_stats_per_flow(struct ifnet_stats_per_flow
*ifs
,
9680 tcp_update_stats_per_flow(ifs
, ifp
);
9684 dlil_mit_tcall_fn(thread_call_param_t arg0
, thread_call_param_t arg1
)
9686 #pragma unused(arg1)
9687 struct ifnet
*ifp
= (struct ifnet
*)arg0
;
9688 struct dlil_threading_info
*inp
= ifp
->if_inp
;
9690 ifnet_lock_shared(ifp
);
9691 if (!IF_FULLY_ATTACHED(ifp
) || inp
== NULL
) {
9692 ifnet_lock_done(ifp
);
9696 lck_mtx_lock_spin(&inp
->input_lck
);
9697 inp
->input_waiting
|= DLIL_INPUT_WAITING
;
9698 if (!(inp
->input_waiting
& DLIL_INPUT_RUNNING
) ||
9699 !qempty(&inp
->rcvq_pkts
)) {
9701 wakeup_one((caddr_t
)&inp
->input_waiting
);
9703 lck_mtx_unlock(&inp
->input_lck
);
9704 ifnet_lock_done(ifp
);