2 * Copyright (c) 1999-2016 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
41 #include <sys/socket.h>
42 #include <sys/domain.h>
44 #include <sys/random.h>
45 #include <sys/socketvar.h>
46 #include <net/if_dl.h>
48 #include <net/route.h>
49 #include <net/if_var.h>
51 #include <net/if_arp.h>
52 #include <net/iptap.h>
53 #include <net/pktap.h>
54 #include <sys/kern_event.h>
55 #include <sys/kdebug.h>
56 #include <sys/mcache.h>
57 #include <sys/syslog.h>
58 #include <sys/protosw.h>
61 #include <kern/assert.h>
62 #include <kern/task.h>
63 #include <kern/thread.h>
64 #include <kern/sched_prim.h>
65 #include <kern/locks.h>
66 #include <kern/zalloc.h>
68 #include <net/kpi_protocol.h>
69 #include <net/if_types.h>
70 #include <net/if_llreach.h>
71 #include <net/kpi_interfacefilter.h>
72 #include <net/classq/classq.h>
73 #include <net/classq/classq_sfb.h>
74 #include <net/flowhash.h>
75 #include <net/ntstat.h>
78 #include <netinet/in_var.h>
79 #include <netinet/igmp_var.h>
80 #include <netinet/ip_var.h>
81 #include <netinet/tcp.h>
82 #include <netinet/tcp_var.h>
83 #include <netinet/udp.h>
84 #include <netinet/udp_var.h>
85 #include <netinet/if_ether.h>
86 #include <netinet/in_pcb.h>
87 #include <netinet/in_tclass.h>
91 #include <netinet6/in6_var.h>
92 #include <netinet6/nd6.h>
93 #include <netinet6/mld6_var.h>
94 #include <netinet6/scope6_var.h>
97 #include <libkern/OSAtomic.h>
98 #include <libkern/tree.h>
100 #include <dev/random/randomdev.h>
101 #include <machine/machine_routines.h>
103 #include <mach/thread_act.h>
104 #include <mach/sdt.h>
107 #include <sys/kauth.h>
108 #include <security/mac_framework.h>
109 #include <net/ethernet.h>
110 #include <net/firewire.h>
114 #include <net/pfvar.h>
117 #include <net/altq/altq.h>
119 #include <net/pktsched/pktsched.h>
122 #include <net/necp.h>
125 #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
126 #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
127 #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
128 #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
129 #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
131 #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
132 #define MAX_LINKADDR 4 /* LONGWORDS */
133 #define M_NKE M_IFADDR
136 #define DLIL_PRINTF printf
138 #define DLIL_PRINTF kprintf
141 #define IF_DATA_REQUIRE_ALIGNED_64(f) \
142 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
144 #define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
145 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
153 * List of if_proto structures in if_proto_hash[] is protected by
154 * the ifnet lock. The rest of the fields are initialized at protocol
155 * attach time and never change, thus no lock required as long as
156 * a reference to it is valid, via if_proto_ref().
159 SLIST_ENTRY(if_proto
) next_hash
;
163 protocol_family_t protocol_family
;
167 proto_media_input input
;
168 proto_media_preout pre_output
;
169 proto_media_event event
;
170 proto_media_ioctl ioctl
;
171 proto_media_detached detached
;
172 proto_media_resolve_multi resolve_multi
;
173 proto_media_send_arp send_arp
;
176 proto_media_input_v2 input
;
177 proto_media_preout pre_output
;
178 proto_media_event event
;
179 proto_media_ioctl ioctl
;
180 proto_media_detached detached
;
181 proto_media_resolve_multi resolve_multi
;
182 proto_media_send_arp send_arp
;
187 SLIST_HEAD(proto_hash_entry
, if_proto
);
189 #define DLIL_SDLMAXLEN 64
190 #define DLIL_SDLDATALEN \
191 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
194 struct ifnet dl_if
; /* public ifnet */
196 * DLIL private fields, protected by dl_if_lock
198 decl_lck_mtx_data(, dl_if_lock
);
199 TAILQ_ENTRY(dlil_ifnet
) dl_if_link
; /* dlil_ifnet link */
200 u_int32_t dl_if_flags
; /* flags (below) */
201 u_int32_t dl_if_refcnt
; /* refcnt */
202 void (*dl_if_trace
)(struct dlil_ifnet
*, int); /* ref trace callback */
203 void *dl_if_uniqueid
; /* unique interface id */
204 size_t dl_if_uniqueid_len
; /* length of the unique id */
205 char dl_if_namestorage
[IFNAMSIZ
]; /* interface name storage */
206 char dl_if_xnamestorage
[IFXNAMSIZ
]; /* external name storage */
208 struct ifaddr ifa
; /* lladdr ifa */
209 u_int8_t asdl
[DLIL_SDLMAXLEN
]; /* addr storage */
210 u_int8_t msdl
[DLIL_SDLMAXLEN
]; /* mask storage */
212 u_int8_t dl_if_descstorage
[IF_DESCSIZE
]; /* desc storage */
213 struct dlil_threading_info dl_if_inpstorage
; /* input thread storage */
214 ctrace_t dl_if_attach
; /* attach PC stacktrace */
215 ctrace_t dl_if_detach
; /* detach PC stacktrace */
218 /* Values for dl_if_flags (private to DLIL) */
219 #define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
220 #define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
221 #define DLIF_DEBUG 0x4 /* has debugging info */
223 #define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
226 __private_extern__
unsigned int if_ref_trace_hist_size
= IF_REF_TRACE_HIST_SIZE
;
228 struct dlil_ifnet_dbg
{
229 struct dlil_ifnet dldbg_dlif
; /* dlil_ifnet */
230 u_int16_t dldbg_if_refhold_cnt
; /* # ifnet references */
231 u_int16_t dldbg_if_refrele_cnt
; /* # ifnet releases */
233 * Circular lists of ifnet_{reference,release} callers.
235 ctrace_t dldbg_if_refhold
[IF_REF_TRACE_HIST_SIZE
];
236 ctrace_t dldbg_if_refrele
[IF_REF_TRACE_HIST_SIZE
];
239 #define DLIL_TO_IFP(s) (&s->dl_if)
240 #define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
242 struct ifnet_filter
{
243 TAILQ_ENTRY(ifnet_filter
) filt_next
;
245 u_int32_t filt_flags
;
247 const char *filt_name
;
249 protocol_family_t filt_protocol
;
250 iff_input_func filt_input
;
251 iff_output_func filt_output
;
252 iff_event_func filt_event
;
253 iff_ioctl_func filt_ioctl
;
254 iff_detached_func filt_detached
;
257 struct proto_input_entry
;
259 static TAILQ_HEAD(, dlil_ifnet
) dlil_ifnet_head
;
260 static lck_grp_t
*dlil_lock_group
;
261 lck_grp_t
*ifnet_lock_group
;
262 static lck_grp_t
*ifnet_head_lock_group
;
263 static lck_grp_t
*ifnet_snd_lock_group
;
264 static lck_grp_t
*ifnet_rcv_lock_group
;
265 lck_attr_t
*ifnet_lock_attr
;
266 decl_lck_rw_data(static, ifnet_head_lock
);
267 decl_lck_mtx_data(static, dlil_ifnet_lock
);
268 u_int32_t dlil_filter_disable_tso_count
= 0;
271 static unsigned int ifnet_debug
= 1; /* debugging (enabled) */
273 static unsigned int ifnet_debug
; /* debugging (disabled) */
275 static unsigned int dlif_size
; /* size of dlil_ifnet to allocate */
276 static unsigned int dlif_bufsize
; /* size of dlif_size + headroom */
277 static struct zone
*dlif_zone
; /* zone for dlil_ifnet */
279 #define DLIF_ZONE_MAX 64 /* maximum elements in zone */
280 #define DLIF_ZONE_NAME "ifnet" /* zone name */
282 static unsigned int dlif_filt_size
; /* size of ifnet_filter */
283 static struct zone
*dlif_filt_zone
; /* zone for ifnet_filter */
285 #define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
286 #define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
288 static unsigned int dlif_phash_size
; /* size of ifnet proto hash table */
289 static struct zone
*dlif_phash_zone
; /* zone for ifnet proto hash table */
291 #define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
292 #define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
294 static unsigned int dlif_proto_size
; /* size of if_proto */
295 static struct zone
*dlif_proto_zone
; /* zone for if_proto */
297 #define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
298 #define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
300 static unsigned int dlif_tcpstat_size
; /* size of tcpstat_local to allocate */
301 static unsigned int dlif_tcpstat_bufsize
; /* size of dlif_tcpstat_size + headroom */
302 static struct zone
*dlif_tcpstat_zone
; /* zone for tcpstat_local */
304 #define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
305 #define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
307 static unsigned int dlif_udpstat_size
; /* size of udpstat_local to allocate */
308 static unsigned int dlif_udpstat_bufsize
; /* size of dlif_udpstat_size + headroom */
309 static struct zone
*dlif_udpstat_zone
; /* zone for udpstat_local */
311 #define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
312 #define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
314 static u_int32_t net_rtref
;
316 static struct dlil_main_threading_info dlil_main_input_thread_info
;
317 __private_extern__
struct dlil_threading_info
*dlil_main_input_thread
=
318 (struct dlil_threading_info
*)&dlil_main_input_thread_info
;
320 static int dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*msg
, bool update_generation
);
321 static int dlil_detach_filter_internal(interface_filter_t filter
, int detached
);
322 static void dlil_if_trace(struct dlil_ifnet
*, int);
323 static void if_proto_ref(struct if_proto
*);
324 static void if_proto_free(struct if_proto
*);
325 static struct if_proto
*find_attached_proto(struct ifnet
*, u_int32_t
);
326 static int dlil_ifp_proto_count(struct ifnet
*);
327 static void if_flt_monitor_busy(struct ifnet
*);
328 static void if_flt_monitor_unbusy(struct ifnet
*);
329 static void if_flt_monitor_enter(struct ifnet
*);
330 static void if_flt_monitor_leave(struct ifnet
*);
331 static int dlil_interface_filters_input(struct ifnet
*, struct mbuf
**,
332 char **, protocol_family_t
);
333 static int dlil_interface_filters_output(struct ifnet
*, struct mbuf
**,
335 static struct ifaddr
*dlil_alloc_lladdr(struct ifnet
*,
336 const struct sockaddr_dl
*);
337 static int ifnet_lookup(struct ifnet
*);
338 static void if_purgeaddrs(struct ifnet
*);
340 static errno_t
ifproto_media_input_v1(struct ifnet
*, protocol_family_t
,
341 struct mbuf
*, char *);
342 static errno_t
ifproto_media_input_v2(struct ifnet
*, protocol_family_t
,
344 static errno_t
ifproto_media_preout(struct ifnet
*, protocol_family_t
,
345 mbuf_t
*, const struct sockaddr
*, void *, char *, char *);
346 static void ifproto_media_event(struct ifnet
*, protocol_family_t
,
347 const struct kev_msg
*);
348 static errno_t
ifproto_media_ioctl(struct ifnet
*, protocol_family_t
,
349 unsigned long, void *);
350 static errno_t
ifproto_media_resolve_multi(ifnet_t
, const struct sockaddr
*,
351 struct sockaddr_dl
*, size_t);
352 static errno_t
ifproto_media_send_arp(struct ifnet
*, u_short
,
353 const struct sockaddr_dl
*, const struct sockaddr
*,
354 const struct sockaddr_dl
*, const struct sockaddr
*);
356 static errno_t
ifp_if_output(struct ifnet
*, struct mbuf
*);
357 static void ifp_if_start(struct ifnet
*);
358 static errno_t
ifp_if_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
359 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
360 boolean_t poll
, struct thread
*tp
);
361 static void ifp_if_input_poll(struct ifnet
*, u_int32_t
, u_int32_t
,
362 struct mbuf
**, struct mbuf
**, u_int32_t
*, u_int32_t
*);
363 static errno_t
ifp_if_ctl(struct ifnet
*, ifnet_ctl_cmd_t
, u_int32_t
, void *);
364 static errno_t
ifp_if_demux(struct ifnet
*, struct mbuf
*, char *,
365 protocol_family_t
*);
366 static errno_t
ifp_if_add_proto(struct ifnet
*, protocol_family_t
,
367 const struct ifnet_demux_desc
*, u_int32_t
);
368 static errno_t
ifp_if_del_proto(struct ifnet
*, protocol_family_t
);
369 static errno_t
ifp_if_check_multi(struct ifnet
*, const struct sockaddr
*);
370 static errno_t
ifp_if_framer(struct ifnet
*, struct mbuf
**,
371 const struct sockaddr
*, const char *, const char *);
372 static errno_t
ifp_if_framer_extended(struct ifnet
*, struct mbuf
**,
373 const struct sockaddr
*, const char *, const char *,
374 u_int32_t
*, u_int32_t
*);
375 static errno_t
ifp_if_set_bpf_tap(struct ifnet
*, bpf_tap_mode
, bpf_packet_func
);
376 static void ifp_if_free(struct ifnet
*);
377 static void ifp_if_event(struct ifnet
*, const struct kev_msg
*);
378 static __inline
void ifp_inc_traffic_class_in(struct ifnet
*, struct mbuf
*);
379 static __inline
void ifp_inc_traffic_class_out(struct ifnet
*, struct mbuf
*);
381 static void dlil_main_input_thread_func(void *, wait_result_t
);
382 static void dlil_input_thread_func(void *, wait_result_t
);
383 static void dlil_rxpoll_input_thread_func(void *, wait_result_t
);
384 static int dlil_create_input_thread(ifnet_t
, struct dlil_threading_info
*);
385 static void dlil_terminate_input_thread(struct dlil_threading_info
*);
386 static void dlil_input_stats_add(const struct ifnet_stat_increment_param
*,
387 struct dlil_threading_info
*, boolean_t
);
388 static void dlil_input_stats_sync(struct ifnet
*, struct dlil_threading_info
*);
389 static void dlil_input_packet_list_common(struct ifnet
*, struct mbuf
*,
390 u_int32_t
, ifnet_model_t
, boolean_t
);
391 static errno_t
ifnet_input_common(struct ifnet
*, struct mbuf
*, struct mbuf
*,
392 const struct ifnet_stat_increment_param
*, boolean_t
, boolean_t
);
395 static void dlil_verify_sum16(void);
397 static void dlil_output_cksum_dbg(struct ifnet
*, struct mbuf
*, uint32_t,
399 static void dlil_input_cksum_dbg(struct ifnet
*, struct mbuf
*, char *,
402 static void ifnet_detacher_thread_func(void *, wait_result_t
);
403 static int ifnet_detacher_thread_cont(int);
404 static void ifnet_detach_final(struct ifnet
*);
405 static void ifnet_detaching_enqueue(struct ifnet
*);
406 static struct ifnet
*ifnet_detaching_dequeue(void);
408 static void ifnet_start_thread_fn(void *, wait_result_t
);
409 static void ifnet_poll_thread_fn(void *, wait_result_t
);
410 static void ifnet_poll(struct ifnet
*);
412 static void ifp_src_route_copyout(struct ifnet
*, struct route
*);
413 static void ifp_src_route_copyin(struct ifnet
*, struct route
*);
415 static void ifp_src_route6_copyout(struct ifnet
*, struct route_in6
*);
416 static void ifp_src_route6_copyin(struct ifnet
*, struct route_in6
*);
419 static int sysctl_rxpoll SYSCTL_HANDLER_ARGS
;
420 static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
;
421 static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
;
422 static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
;
423 static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
;
424 static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
;
425 static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
;
426 static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
;
427 static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
;
428 static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
;
429 static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
;
430 static int sysctl_get_ports_used SYSCTL_HANDLER_ARGS
;
432 struct chain_len_stats tx_chain_len_stats
;
433 static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
;
435 /* The following are protected by dlil_ifnet_lock */
436 static TAILQ_HEAD(, ifnet
) ifnet_detaching_head
;
437 static u_int32_t ifnet_detaching_cnt
;
438 static void *ifnet_delayed_run
; /* wait channel for detaching thread */
440 decl_lck_mtx_data(static, ifnet_fc_lock
);
442 static uint32_t ifnet_flowhash_seed
;
444 struct ifnet_flowhash_key
{
445 char ifk_name
[IFNAMSIZ
];
449 uint32_t ifk_capabilities
;
450 uint32_t ifk_capenable
;
451 uint32_t ifk_output_sched_model
;
456 /* Flow control entry per interface */
457 struct ifnet_fc_entry
{
458 RB_ENTRY(ifnet_fc_entry
) ifce_entry
;
459 u_int32_t ifce_flowhash
;
460 struct ifnet
*ifce_ifp
;
463 static uint32_t ifnet_calc_flowhash(struct ifnet
*);
464 static int ifce_cmp(const struct ifnet_fc_entry
*,
465 const struct ifnet_fc_entry
*);
466 static int ifnet_fc_add(struct ifnet
*);
467 static struct ifnet_fc_entry
*ifnet_fc_get(u_int32_t
);
468 static void ifnet_fc_entry_free(struct ifnet_fc_entry
*);
470 /* protected by ifnet_fc_lock */
471 RB_HEAD(ifnet_fc_tree
, ifnet_fc_entry
) ifnet_fc_tree
;
472 RB_PROTOTYPE(ifnet_fc_tree
, ifnet_fc_entry
, ifce_entry
, ifce_cmp
);
473 RB_GENERATE(ifnet_fc_tree
, ifnet_fc_entry
, ifce_entry
, ifce_cmp
);
475 static unsigned int ifnet_fc_zone_size
; /* sizeof ifnet_fc_entry */
476 static struct zone
*ifnet_fc_zone
; /* ifnet_fc_entry zone */
478 #define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
479 #define IFNET_FC_ZONE_MAX 32
481 extern void bpfdetach(struct ifnet
*);
482 extern void proto_input_run(void);
484 extern uint32_t udp_count_opportunistic(unsigned int ifindex
,
486 extern uint32_t tcp_count_opportunistic(unsigned int ifindex
,
489 __private_extern__
void link_rtrequest(int, struct rtentry
*, struct sockaddr
*);
492 int dlil_lladdr_ckreq
= 0;
496 int dlil_verbose
= 1;
498 int dlil_verbose
= 0;
500 #if IFNET_INPUT_SANITY_CHK
501 /* sanity checking of input packet lists received */
502 static u_int32_t dlil_input_sanity_check
= 0;
503 #endif /* IFNET_INPUT_SANITY_CHK */
504 /* rate limit debug messages */
505 struct timespec dlil_dbgrate
= { 1, 0 };
507 SYSCTL_DECL(_net_link_generic_system
);
510 SYSCTL_INT(_net_link_generic_system
, OID_AUTO
, dlil_lladdr_ckreq
,
511 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_lladdr_ckreq
, 0,
512 "Require MACF system info check to expose link-layer address");
515 SYSCTL_INT(_net_link_generic_system
, OID_AUTO
, dlil_verbose
,
516 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_verbose
, 0, "Log DLIL error messages");
518 #define IF_SNDQ_MINLEN 32
519 u_int32_t if_sndq_maxlen
= IFQ_MAXLEN
;
520 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, sndq_maxlen
,
521 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_sndq_maxlen
, IFQ_MAXLEN
,
522 sysctl_sndq_maxlen
, "I", "Default transmit queue max length");
524 #define IF_RCVQ_MINLEN 32
525 #define IF_RCVQ_MAXLEN 256
526 u_int32_t if_rcvq_maxlen
= IF_RCVQ_MAXLEN
;
527 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rcvq_maxlen
,
528 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rcvq_maxlen
, IFQ_MAXLEN
,
529 sysctl_rcvq_maxlen
, "I", "Default receive queue max length");
531 #define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
532 static u_int32_t if_rxpoll_decay
= IF_RXPOLL_DECAY
;
533 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_decay
,
534 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_decay
, IF_RXPOLL_DECAY
,
535 "ilog2 of EWMA decay rate of avg inbound packets");
537 #define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
538 #define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
539 static u_int64_t if_rxpoll_mode_holdtime
= IF_RXPOLL_MODE_HOLDTIME
;
540 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_freeze_time
,
541 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_mode_holdtime
,
542 IF_RXPOLL_MODE_HOLDTIME
, sysctl_rxpoll_mode_holdtime
,
543 "Q", "input poll mode freeze time");
545 #define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
546 #define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
547 static u_int64_t if_rxpoll_sample_holdtime
= IF_RXPOLL_SAMPLETIME
;
548 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_sample_time
,
549 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_sample_holdtime
,
550 IF_RXPOLL_SAMPLETIME
, sysctl_rxpoll_sample_holdtime
,
551 "Q", "input poll sampling time");
553 #define IF_RXPOLL_INTERVALTIME_MIN (1ULL * 1000) /* 1 us */
554 #define IF_RXPOLL_INTERVALTIME (1ULL * 1000 * 1000) /* 1 ms */
555 static u_int64_t if_rxpoll_interval_time
= IF_RXPOLL_INTERVALTIME
;
556 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_interval_time
,
557 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_interval_time
,
558 IF_RXPOLL_INTERVALTIME
, sysctl_rxpoll_interval_time
,
559 "Q", "input poll interval (time)");
561 #define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
562 static u_int32_t if_rxpoll_interval_pkts
= IF_RXPOLL_INTERVAL_PKTS
;
563 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_interval_pkts
,
564 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_interval_pkts
,
565 IF_RXPOLL_INTERVAL_PKTS
, "input poll interval (packets)");
567 #define IF_RXPOLL_WLOWAT 10
568 static u_int32_t if_rxpoll_wlowat
= IF_RXPOLL_WLOWAT
;
569 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_wakeups_lowat
,
570 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_wlowat
,
571 IF_RXPOLL_WLOWAT
, sysctl_rxpoll_wlowat
,
572 "I", "input poll wakeup low watermark");
574 #define IF_RXPOLL_WHIWAT 100
575 static u_int32_t if_rxpoll_whiwat
= IF_RXPOLL_WHIWAT
;
576 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_wakeups_hiwat
,
577 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_whiwat
,
578 IF_RXPOLL_WHIWAT
, sysctl_rxpoll_whiwat
,
579 "I", "input poll wakeup high watermark");
581 static u_int32_t if_rxpoll_max
= 0; /* 0 (automatic) */
582 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_max
,
583 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_max
, 0,
584 "max packets per poll call");
586 static u_int32_t if_rxpoll
= 1;
587 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll
,
588 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll
, 0,
589 sysctl_rxpoll
, "I", "enable opportunistic input polling");
591 u_int32_t if_bw_smoothing_val
= 3;
592 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, if_bw_smoothing_val
,
593 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_bw_smoothing_val
, 0, "");
595 u_int32_t if_bw_measure_size
= 10;
596 SYSCTL_INT(_net_link_generic_system
, OID_AUTO
, if_bw_measure_size
,
597 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_bw_measure_size
, 0, "");
599 static u_int32_t cur_dlil_input_threads
= 0;
600 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, dlil_input_threads
,
601 CTLFLAG_RD
| CTLFLAG_LOCKED
, &cur_dlil_input_threads
, 0,
602 "Current number of DLIL input threads");
604 #if IFNET_INPUT_SANITY_CHK
605 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, dlil_input_sanity_check
,
606 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_input_sanity_check
, 0,
607 "Turn on sanity checking in DLIL input");
608 #endif /* IFNET_INPUT_SANITY_CHK */
610 static u_int32_t if_flowadv
= 1;
611 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, flow_advisory
,
612 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_flowadv
, 1,
613 "enable flow-advisory mechanism");
615 static u_int32_t if_delaybased_queue
= 1;
616 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, delaybased_queue
,
617 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_delaybased_queue
, 1,
618 "enable delay based dynamic queue sizing");
620 static uint64_t hwcksum_in_invalidated
= 0;
621 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
622 hwcksum_in_invalidated
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
623 &hwcksum_in_invalidated
, "inbound packets with invalidated hardware cksum");
625 uint32_t hwcksum_dbg
= 0;
626 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg
,
627 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg
, 0,
628 "enable hardware cksum debugging");
630 u_int32_t ifnet_start_delayed
= 0;
631 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, start_delayed
,
632 CTLFLAG_RW
| CTLFLAG_LOCKED
, &ifnet_start_delayed
, 0,
633 "number of times start was delayed");
635 u_int32_t ifnet_delay_start_disabled
= 0;
636 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, start_delay_disabled
,
637 CTLFLAG_RW
| CTLFLAG_LOCKED
, &ifnet_delay_start_disabled
, 0,
638 "number of times start was delayed");
640 #define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
641 #define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
642 #define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
643 #define HWCKSUM_DBG_MASK \
644 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
645 HWCKSUM_DBG_FINALIZE_FORCED)
647 static uint32_t hwcksum_dbg_mode
= 0;
648 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg_mode
,
649 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg_mode
,
650 0, sysctl_hwcksum_dbg_mode
, "I", "hardware cksum debugging mode");
652 static uint64_t hwcksum_dbg_partial_forced
= 0;
653 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
654 hwcksum_dbg_partial_forced
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
655 &hwcksum_dbg_partial_forced
, "packets forced using partial cksum");
657 static uint64_t hwcksum_dbg_partial_forced_bytes
= 0;
658 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
659 hwcksum_dbg_partial_forced_bytes
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
660 &hwcksum_dbg_partial_forced_bytes
, "bytes forced using partial cksum");
662 static uint32_t hwcksum_dbg_partial_rxoff_forced
= 0;
663 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
,
664 hwcksum_dbg_partial_rxoff_forced
, CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
665 &hwcksum_dbg_partial_rxoff_forced
, 0,
666 sysctl_hwcksum_dbg_partial_rxoff_forced
, "I",
667 "forced partial cksum rx offset");
669 static uint32_t hwcksum_dbg_partial_rxoff_adj
= 0;
670 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg_partial_rxoff_adj
,
671 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg_partial_rxoff_adj
,
672 0, sysctl_hwcksum_dbg_partial_rxoff_adj
, "I",
673 "adjusted partial cksum rx offset");
675 static uint64_t hwcksum_dbg_verified
= 0;
676 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
677 hwcksum_dbg_verified
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
678 &hwcksum_dbg_verified
, "packets verified for having good checksum");
680 static uint64_t hwcksum_dbg_bad_cksum
= 0;
681 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
682 hwcksum_dbg_bad_cksum
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
683 &hwcksum_dbg_bad_cksum
, "packets with bad hardware calculated checksum");
685 static uint64_t hwcksum_dbg_bad_rxoff
= 0;
686 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
687 hwcksum_dbg_bad_rxoff
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
688 &hwcksum_dbg_bad_rxoff
, "packets with invalid rxoff");
690 static uint64_t hwcksum_dbg_adjusted
= 0;
691 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
692 hwcksum_dbg_adjusted
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
693 &hwcksum_dbg_adjusted
, "packets with rxoff adjusted");
695 static uint64_t hwcksum_dbg_finalized_hdr
= 0;
696 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
697 hwcksum_dbg_finalized_hdr
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
698 &hwcksum_dbg_finalized_hdr
, "finalized headers");
700 static uint64_t hwcksum_dbg_finalized_data
= 0;
701 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
702 hwcksum_dbg_finalized_data
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
703 &hwcksum_dbg_finalized_data
, "finalized payloads");
705 uint32_t hwcksum_tx
= 1;
706 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_tx
,
707 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_tx
, 0,
708 "enable transmit hardware checksum offload");
710 uint32_t hwcksum_rx
= 1;
711 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_rx
,
712 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_rx
, 0,
713 "enable receive hardware checksum offload");
715 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, tx_chain_len_stats
,
716 CTLFLAG_RD
| CTLFLAG_LOCKED
, 0, 9,
717 sysctl_tx_chain_len_stats
, "S", "");
719 uint32_t tx_chain_len_count
= 0;
720 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, tx_chain_len_count
,
721 CTLFLAG_RW
| CTLFLAG_LOCKED
, &tx_chain_len_count
, 0, "");
723 SYSCTL_NODE(_net_link_generic_system
, OID_AUTO
, get_ports_used
,
724 CTLFLAG_RD
| CTLFLAG_LOCKED
, sysctl_get_ports_used
, "");
726 #if (DEVELOPMENT || DEBUG)
727 static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
;
728 SYSCTL_NODE(_net_link_generic_system
, OID_AUTO
, get_kao_frames
,
729 CTLFLAG_RD
| CTLFLAG_LOCKED
, sysctl_get_kao_frames
, "");
730 #endif /* DEVELOPMENT || DEBUG */
732 unsigned int net_rxpoll
= 1;
733 unsigned int net_affinity
= 1;
734 static kern_return_t
dlil_affinity_set(struct thread
*, u_int32_t
);
736 extern u_int32_t inject_buckets
;
738 static lck_grp_attr_t
*dlil_grp_attributes
= NULL
;
739 static lck_attr_t
*dlil_lck_attributes
= NULL
;
742 #define DLIL_INPUT_CHECK(m, ifp) { \
743 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
744 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
745 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
746 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
751 #define DLIL_EWMA(old, new, decay) do { \
753 if ((_avg = (old)) > 0) \
754 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
760 #define MBPS (1ULL * 1000 * 1000)
761 #define GBPS (MBPS * 1000)
763 struct rxpoll_time_tbl
{
764 u_int64_t speed
; /* downlink speed */
765 u_int32_t plowat
; /* packets low watermark */
766 u_int32_t phiwat
; /* packets high watermark */
767 u_int32_t blowat
; /* bytes low watermark */
768 u_int32_t bhiwat
; /* bytes high watermark */
771 static struct rxpoll_time_tbl rxpoll_tbl
[] = {
772 { 10 * MBPS
, 2, 8, (1 * 1024), (6 * 1024) },
773 { 100 * MBPS
, 10, 40, (4 * 1024), (64 * 1024) },
774 { 1 * GBPS
, 10, 40, (4 * 1024), (64 * 1024) },
775 { 10 * GBPS
, 10, 40, (4 * 1024), (64 * 1024) },
776 { 100 * GBPS
, 10, 40, (4 * 1024), (64 * 1024) },
781 proto_hash_value(u_int32_t protocol_family
)
784 * dlil_proto_unplumb_all() depends on the mapping between
785 * the hash bucket index and the protocol family defined
786 * here; future changes must be applied there as well.
788 switch (protocol_family
) {
802 * Caller must already be holding ifnet lock.
804 static struct if_proto
*
805 find_attached_proto(struct ifnet
*ifp
, u_int32_t protocol_family
)
807 struct if_proto
*proto
= NULL
;
808 u_int32_t i
= proto_hash_value(protocol_family
);
810 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_OWNED
);
812 if (ifp
->if_proto_hash
!= NULL
)
813 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
815 while (proto
!= NULL
&& proto
->protocol_family
!= protocol_family
)
816 proto
= SLIST_NEXT(proto
, next_hash
);
825 if_proto_ref(struct if_proto
*proto
)
827 atomic_add_32(&proto
->refcount
, 1);
830 extern void if_rtproto_del(struct ifnet
*ifp
, int protocol
);
833 if_proto_free(struct if_proto
*proto
)
836 struct ifnet
*ifp
= proto
->ifp
;
837 u_int32_t proto_family
= proto
->protocol_family
;
838 struct kev_dl_proto_data ev_pr_data
;
840 oldval
= atomic_add_32_ov(&proto
->refcount
, -1);
844 /* No more reference on this, protocol must have been detached */
845 VERIFY(proto
->detached
);
847 if (proto
->proto_kpi
== kProtoKPI_v1
) {
848 if (proto
->kpi
.v1
.detached
)
849 proto
->kpi
.v1
.detached(ifp
, proto
->protocol_family
);
851 if (proto
->proto_kpi
== kProtoKPI_v2
) {
852 if (proto
->kpi
.v2
.detached
)
853 proto
->kpi
.v2
.detached(ifp
, proto
->protocol_family
);
857 * Cleanup routes that may still be in the routing table for that
858 * interface/protocol pair.
860 if_rtproto_del(ifp
, proto_family
);
863 * The reserved field carries the number of protocol still attached
864 * (subject to change)
866 ifnet_lock_shared(ifp
);
867 ev_pr_data
.proto_family
= proto_family
;
868 ev_pr_data
.proto_remaining_count
= dlil_ifp_proto_count(ifp
);
869 ifnet_lock_done(ifp
);
871 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_DETACHED
,
872 (struct net_event_data
*)&ev_pr_data
,
873 sizeof (struct kev_dl_proto_data
));
875 zfree(dlif_proto_zone
, proto
);
878 __private_extern__
void
879 ifnet_lock_assert(struct ifnet
*ifp
, ifnet_lock_assert_t what
)
881 unsigned int type
= 0;
885 case IFNET_LCK_ASSERT_EXCLUSIVE
:
886 type
= LCK_RW_ASSERT_EXCLUSIVE
;
889 case IFNET_LCK_ASSERT_SHARED
:
890 type
= LCK_RW_ASSERT_SHARED
;
893 case IFNET_LCK_ASSERT_OWNED
:
894 type
= LCK_RW_ASSERT_HELD
;
897 case IFNET_LCK_ASSERT_NOTOWNED
:
898 /* nothing to do here for RW lock; bypass assert */
903 panic("bad ifnet assert type: %d", what
);
907 lck_rw_assert(&ifp
->if_lock
, type
);
910 __private_extern__
void
911 ifnet_lock_shared(struct ifnet
*ifp
)
913 lck_rw_lock_shared(&ifp
->if_lock
);
916 __private_extern__
void
917 ifnet_lock_exclusive(struct ifnet
*ifp
)
919 lck_rw_lock_exclusive(&ifp
->if_lock
);
922 __private_extern__
void
923 ifnet_lock_done(struct ifnet
*ifp
)
925 lck_rw_done(&ifp
->if_lock
);
929 __private_extern__
void
930 if_inetdata_lock_shared(struct ifnet
*ifp
)
932 lck_rw_lock_shared(&ifp
->if_inetdata_lock
);
935 __private_extern__
void
936 if_inetdata_lock_exclusive(struct ifnet
*ifp
)
938 lck_rw_lock_exclusive(&ifp
->if_inetdata_lock
);
941 __private_extern__
void
942 if_inetdata_lock_done(struct ifnet
*ifp
)
944 lck_rw_done(&ifp
->if_inetdata_lock
);
949 __private_extern__
void
950 if_inet6data_lock_shared(struct ifnet
*ifp
)
952 lck_rw_lock_shared(&ifp
->if_inet6data_lock
);
955 __private_extern__
void
956 if_inet6data_lock_exclusive(struct ifnet
*ifp
)
958 lck_rw_lock_exclusive(&ifp
->if_inet6data_lock
);
961 __private_extern__
void
962 if_inet6data_lock_done(struct ifnet
*ifp
)
964 lck_rw_done(&ifp
->if_inet6data_lock
);
968 __private_extern__
void
969 ifnet_head_lock_shared(void)
971 lck_rw_lock_shared(&ifnet_head_lock
);
974 __private_extern__
void
975 ifnet_head_lock_exclusive(void)
977 lck_rw_lock_exclusive(&ifnet_head_lock
);
980 __private_extern__
void
981 ifnet_head_done(void)
983 lck_rw_done(&ifnet_head_lock
);
986 __private_extern__
void
987 ifnet_head_assert_exclusive(void)
989 lck_rw_assert(&ifnet_head_lock
, LCK_RW_ASSERT_EXCLUSIVE
);
993 * Caller must already be holding ifnet lock.
996 dlil_ifp_proto_count(struct ifnet
*ifp
)
1000 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_OWNED
);
1002 if (ifp
->if_proto_hash
== NULL
)
1005 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
1006 struct if_proto
*proto
;
1007 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
], next_hash
) {
1015 __private_extern__
void
1016 dlil_post_msg(struct ifnet
*ifp
, u_int32_t event_subclass
,
1017 u_int32_t event_code
, struct net_event_data
*event_data
,
1018 u_int32_t event_data_len
)
1020 struct net_event_data ev_data
;
1021 struct kev_msg ev_msg
;
1023 bzero(&ev_msg
, sizeof (ev_msg
));
1024 bzero(&ev_data
, sizeof (ev_data
));
1026 * a net event always starts with a net_event_data structure
1027 * but the caller can generate a simple net event or
1028 * provide a longer event structure to post
1030 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
1031 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
1032 ev_msg
.kev_subclass
= event_subclass
;
1033 ev_msg
.event_code
= event_code
;
1035 if (event_data
== NULL
) {
1036 event_data
= &ev_data
;
1037 event_data_len
= sizeof (struct net_event_data
);
1040 strlcpy(&event_data
->if_name
[0], ifp
->if_name
, IFNAMSIZ
);
1041 event_data
->if_family
= ifp
->if_family
;
1042 event_data
->if_unit
= (u_int32_t
)ifp
->if_unit
;
1044 ev_msg
.dv
[0].data_length
= event_data_len
;
1045 ev_msg
.dv
[0].data_ptr
= event_data
;
1046 ev_msg
.dv
[1].data_length
= 0;
1048 /* Don't update interface generation for quality and RRC state changess */
1049 bool update_generation
= (event_subclass
!= KEV_DL_SUBCLASS
||
1050 (event_code
!= KEV_DL_LINK_QUALITY_METRIC_CHANGED
&&
1051 event_code
!= KEV_DL_RRC_STATE_CHANGED
));
1053 dlil_event_internal(ifp
, &ev_msg
, update_generation
);
1056 __private_extern__
int
1057 dlil_alloc_local_stats(struct ifnet
*ifp
)
1060 void *buf
, *base
, **pbuf
;
1065 if (ifp
->if_tcp_stat
== NULL
&& ifp
->if_udp_stat
== NULL
) {
1066 /* allocate tcpstat_local structure */
1067 buf
= zalloc(dlif_tcpstat_zone
);
1072 bzero(buf
, dlif_tcpstat_bufsize
);
1074 /* Get the 64-bit aligned base address for this object */
1075 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof (u_int64_t
),
1076 sizeof (u_int64_t
));
1077 VERIFY(((intptr_t)base
+ dlif_tcpstat_size
) <=
1078 ((intptr_t)buf
+ dlif_tcpstat_bufsize
));
1081 * Wind back a pointer size from the aligned base and
1082 * save the original address so we can free it later.
1084 pbuf
= (void **)((intptr_t)base
- sizeof (void *));
1086 ifp
->if_tcp_stat
= base
;
1088 /* allocate udpstat_local structure */
1089 buf
= zalloc(dlif_udpstat_zone
);
1094 bzero(buf
, dlif_udpstat_bufsize
);
1096 /* Get the 64-bit aligned base address for this object */
1097 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof (u_int64_t
),
1098 sizeof (u_int64_t
));
1099 VERIFY(((intptr_t)base
+ dlif_udpstat_size
) <=
1100 ((intptr_t)buf
+ dlif_udpstat_bufsize
));
1103 * Wind back a pointer size from the aligned base and
1104 * save the original address so we can free it later.
1106 pbuf
= (void **)((intptr_t)base
- sizeof (void *));
1108 ifp
->if_udp_stat
= base
;
1110 VERIFY(IS_P2ALIGNED(ifp
->if_tcp_stat
, sizeof (u_int64_t
)) &&
1111 IS_P2ALIGNED(ifp
->if_udp_stat
, sizeof (u_int64_t
)));
1116 if (ifp
->if_ipv4_stat
== NULL
) {
1117 MALLOC(ifp
->if_ipv4_stat
, struct if_tcp_ecn_stat
*,
1118 sizeof (struct if_tcp_ecn_stat
), M_TEMP
, M_WAITOK
|M_ZERO
);
1119 if (ifp
->if_ipv4_stat
== NULL
) {
1125 if (ifp
->if_ipv6_stat
== NULL
) {
1126 MALLOC(ifp
->if_ipv6_stat
, struct if_tcp_ecn_stat
*,
1127 sizeof (struct if_tcp_ecn_stat
), M_TEMP
, M_WAITOK
|M_ZERO
);
1128 if (ifp
->if_ipv6_stat
== NULL
) {
1135 if (ifp
->if_tcp_stat
!= NULL
) {
1137 ((intptr_t)ifp
->if_tcp_stat
- sizeof (void *));
1138 zfree(dlif_tcpstat_zone
, *pbuf
);
1139 ifp
->if_tcp_stat
= NULL
;
1141 if (ifp
->if_udp_stat
!= NULL
) {
1143 ((intptr_t)ifp
->if_udp_stat
- sizeof (void *));
1144 zfree(dlif_udpstat_zone
, *pbuf
);
1145 ifp
->if_udp_stat
= NULL
;
1147 if (ifp
->if_ipv4_stat
!= NULL
) {
1148 FREE(ifp
->if_ipv4_stat
, M_TEMP
);
1149 ifp
->if_ipv4_stat
= NULL
;
1151 if (ifp
->if_ipv6_stat
!= NULL
) {
1152 FREE(ifp
->if_ipv6_stat
, M_TEMP
);
1153 ifp
->if_ipv6_stat
= NULL
;
1161 dlil_create_input_thread(ifnet_t ifp
, struct dlil_threading_info
*inp
)
1163 thread_continue_t func
;
1167 /* NULL ifp indicates the main input thread, called at dlil_init time */
1169 func
= dlil_main_input_thread_func
;
1170 VERIFY(inp
== dlil_main_input_thread
);
1171 (void) strlcat(inp
->input_name
,
1172 "main_input", DLIL_THREADNAME_LEN
);
1173 } else if (net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) {
1174 func
= dlil_rxpoll_input_thread_func
;
1175 VERIFY(inp
!= dlil_main_input_thread
);
1176 (void) snprintf(inp
->input_name
, DLIL_THREADNAME_LEN
,
1177 "%s_input_poll", if_name(ifp
));
1179 func
= dlil_input_thread_func
;
1180 VERIFY(inp
!= dlil_main_input_thread
);
1181 (void) snprintf(inp
->input_name
, DLIL_THREADNAME_LEN
,
1182 "%s_input", if_name(ifp
));
1184 VERIFY(inp
->input_thr
== THREAD_NULL
);
1186 inp
->lck_grp
= lck_grp_alloc_init(inp
->input_name
, dlil_grp_attributes
);
1187 lck_mtx_init(&inp
->input_lck
, inp
->lck_grp
, dlil_lck_attributes
);
1189 inp
->mode
= IFNET_MODEL_INPUT_POLL_OFF
;
1190 inp
->ifp
= ifp
; /* NULL for main input thread */
1192 net_timerclear(&inp
->mode_holdtime
);
1193 net_timerclear(&inp
->mode_lasttime
);
1194 net_timerclear(&inp
->sample_holdtime
);
1195 net_timerclear(&inp
->sample_lasttime
);
1196 net_timerclear(&inp
->dbg_lasttime
);
1199 * For interfaces that support opportunistic polling, set the
1200 * low and high watermarks for outstanding inbound packets/bytes.
1201 * Also define freeze times for transitioning between modes
1202 * and updating the average.
1204 if (ifp
!= NULL
&& net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) {
1205 limit
= MAX(if_rcvq_maxlen
, IF_RCVQ_MINLEN
);
1206 (void) dlil_rxpoll_set_params(ifp
, NULL
, FALSE
);
1208 limit
= (u_int32_t
)-1;
1211 _qinit(&inp
->rcvq_pkts
, Q_DROPTAIL
, limit
);
1212 if (inp
== dlil_main_input_thread
) {
1213 struct dlil_main_threading_info
*inpm
=
1214 (struct dlil_main_threading_info
*)inp
;
1215 _qinit(&inpm
->lo_rcvq_pkts
, Q_DROPTAIL
, limit
);
1218 error
= kernel_thread_start(func
, inp
, &inp
->input_thr
);
1219 if (error
== KERN_SUCCESS
) {
1220 ml_thread_policy(inp
->input_thr
, MACHINE_GROUP
,
1221 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_NETISR
));
1223 * We create an affinity set so that the matching workloop
1224 * thread or the starter thread (for loopback) can be
1225 * scheduled on the same processor set as the input thread.
1228 struct thread
*tp
= inp
->input_thr
;
1231 * Randomize to reduce the probability
1232 * of affinity tag namespace collision.
1234 read_random(&tag
, sizeof (tag
));
1235 if (dlil_affinity_set(tp
, tag
) == KERN_SUCCESS
) {
1236 thread_reference(tp
);
1238 inp
->net_affinity
= TRUE
;
1241 } else if (inp
== dlil_main_input_thread
) {
1242 panic_plain("%s: couldn't create main input thread", __func__
);
1245 panic_plain("%s: couldn't create %s input thread", __func__
,
1249 OSAddAtomic(1, &cur_dlil_input_threads
);
1255 dlil_terminate_input_thread(struct dlil_threading_info
*inp
)
1259 VERIFY(current_thread() == inp
->input_thr
);
1260 VERIFY(inp
!= dlil_main_input_thread
);
1262 OSAddAtomic(-1, &cur_dlil_input_threads
);
1264 lck_mtx_destroy(&inp
->input_lck
, inp
->lck_grp
);
1265 lck_grp_free(inp
->lck_grp
);
1267 inp
->input_waiting
= 0;
1269 bzero(inp
->input_name
, sizeof (inp
->input_name
));
1272 VERIFY(qhead(&inp
->rcvq_pkts
) == NULL
&& qempty(&inp
->rcvq_pkts
));
1273 qlimit(&inp
->rcvq_pkts
) = 0;
1274 bzero(&inp
->stats
, sizeof (inp
->stats
));
1276 VERIFY(!inp
->net_affinity
);
1277 inp
->input_thr
= THREAD_NULL
;
1278 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
1279 VERIFY(inp
->poll_thr
== THREAD_NULL
);
1280 VERIFY(inp
->tag
== 0);
1282 inp
->mode
= IFNET_MODEL_INPUT_POLL_OFF
;
1283 bzero(&inp
->tstats
, sizeof (inp
->tstats
));
1284 bzero(&inp
->pstats
, sizeof (inp
->pstats
));
1285 bzero(&inp
->sstats
, sizeof (inp
->sstats
));
1287 net_timerclear(&inp
->mode_holdtime
);
1288 net_timerclear(&inp
->mode_lasttime
);
1289 net_timerclear(&inp
->sample_holdtime
);
1290 net_timerclear(&inp
->sample_lasttime
);
1291 net_timerclear(&inp
->dbg_lasttime
);
1293 #if IFNET_INPUT_SANITY_CHK
1294 inp
->input_mbuf_cnt
= 0;
1295 #endif /* IFNET_INPUT_SANITY_CHK */
1298 printf("%s: input thread terminated\n",
1302 /* for the extra refcnt from kernel_thread_start() */
1303 thread_deallocate(current_thread());
1305 /* this is the end */
1306 thread_terminate(current_thread());
1310 static kern_return_t
1311 dlil_affinity_set(struct thread
*tp
, u_int32_t tag
)
1313 thread_affinity_policy_data_t policy
;
1315 bzero(&policy
, sizeof (policy
));
1316 policy
.affinity_tag
= tag
;
1317 return (thread_policy_set(tp
, THREAD_AFFINITY_POLICY
,
1318 (thread_policy_t
)&policy
, THREAD_AFFINITY_POLICY_COUNT
));
1324 thread_t thread
= THREAD_NULL
;
1327 * The following fields must be 64-bit aligned for atomic operations.
1329 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets
);
1330 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors
);
1331 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets
);
1332 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors
);
1333 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions
);
1334 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes
);
1335 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes
);
1336 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts
);
1337 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts
);
1338 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops
);
1339 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto
);
1340 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs
);
1341 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes
);
1342 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets
);
1343 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes
);
1345 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets
);
1346 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors
);
1347 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets
);
1348 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors
);
1349 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions
);
1350 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes
);
1351 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes
);
1352 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts
);
1353 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts
);
1354 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops
);
1355 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto
);
1356 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs
);
1357 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes
);
1358 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets
);
1359 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes
);
1362 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1364 _CASSERT(IF_HWASSIST_CSUM_IP
== IFNET_CSUM_IP
);
1365 _CASSERT(IF_HWASSIST_CSUM_TCP
== IFNET_CSUM_TCP
);
1366 _CASSERT(IF_HWASSIST_CSUM_UDP
== IFNET_CSUM_UDP
);
1367 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS
== IFNET_CSUM_FRAGMENT
);
1368 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT
== IFNET_IP_FRAGMENT
);
1369 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6
== IFNET_CSUM_TCPIPV6
);
1370 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6
== IFNET_CSUM_UDPIPV6
);
1371 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6
== IFNET_IPV6_FRAGMENT
);
1372 _CASSERT(IF_HWASSIST_CSUM_PARTIAL
== IFNET_CSUM_PARTIAL
);
1373 _CASSERT(IF_HWASSIST_VLAN_TAGGING
== IFNET_VLAN_TAGGING
);
1374 _CASSERT(IF_HWASSIST_VLAN_MTU
== IFNET_VLAN_MTU
);
1375 _CASSERT(IF_HWASSIST_TSO_V4
== IFNET_TSO_IPV4
);
1376 _CASSERT(IF_HWASSIST_TSO_V6
== IFNET_TSO_IPV6
);
1379 * ... as well as the mbuf checksum flags counterparts.
1381 _CASSERT(CSUM_IP
== IF_HWASSIST_CSUM_IP
);
1382 _CASSERT(CSUM_TCP
== IF_HWASSIST_CSUM_TCP
);
1383 _CASSERT(CSUM_UDP
== IF_HWASSIST_CSUM_UDP
);
1384 _CASSERT(CSUM_IP_FRAGS
== IF_HWASSIST_CSUM_IP_FRAGS
);
1385 _CASSERT(CSUM_FRAGMENT
== IF_HWASSIST_CSUM_FRAGMENT
);
1386 _CASSERT(CSUM_TCPIPV6
== IF_HWASSIST_CSUM_TCPIPV6
);
1387 _CASSERT(CSUM_UDPIPV6
== IF_HWASSIST_CSUM_UDPIPV6
);
1388 _CASSERT(CSUM_FRAGMENT_IPV6
== IF_HWASSIST_CSUM_FRAGMENT_IPV6
);
1389 _CASSERT(CSUM_PARTIAL
== IF_HWASSIST_CSUM_PARTIAL
);
1390 _CASSERT(CSUM_VLAN_TAG_VALID
== IF_HWASSIST_VLAN_TAGGING
);
1393 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1395 _CASSERT(IF_LLREACH_MAXLEN
<= IF_LLREACHINFO_ADDRLEN
);
1396 _CASSERT(IFNET_LLREACHINFO_ADDRLEN
== IF_LLREACHINFO_ADDRLEN
);
1398 _CASSERT(IFRLOGF_DLIL
== IFNET_LOGF_DLIL
);
1399 _CASSERT(IFRLOGF_FAMILY
== IFNET_LOGF_FAMILY
);
1400 _CASSERT(IFRLOGF_DRIVER
== IFNET_LOGF_DRIVER
);
1401 _CASSERT(IFRLOGF_FIRMWARE
== IFNET_LOGF_FIRMWARE
);
1403 _CASSERT(IFRLOGCAT_CONNECTIVITY
== IFNET_LOGCAT_CONNECTIVITY
);
1404 _CASSERT(IFRLOGCAT_QUALITY
== IFNET_LOGCAT_QUALITY
);
1405 _CASSERT(IFRLOGCAT_PERFORMANCE
== IFNET_LOGCAT_PERFORMANCE
);
1407 _CASSERT(IFRTYPE_FAMILY_ANY
== IFNET_FAMILY_ANY
);
1408 _CASSERT(IFRTYPE_FAMILY_LOOPBACK
== IFNET_FAMILY_LOOPBACK
);
1409 _CASSERT(IFRTYPE_FAMILY_ETHERNET
== IFNET_FAMILY_ETHERNET
);
1410 _CASSERT(IFRTYPE_FAMILY_SLIP
== IFNET_FAMILY_SLIP
);
1411 _CASSERT(IFRTYPE_FAMILY_TUN
== IFNET_FAMILY_TUN
);
1412 _CASSERT(IFRTYPE_FAMILY_VLAN
== IFNET_FAMILY_VLAN
);
1413 _CASSERT(IFRTYPE_FAMILY_PPP
== IFNET_FAMILY_PPP
);
1414 _CASSERT(IFRTYPE_FAMILY_PVC
== IFNET_FAMILY_PVC
);
1415 _CASSERT(IFRTYPE_FAMILY_DISC
== IFNET_FAMILY_DISC
);
1416 _CASSERT(IFRTYPE_FAMILY_MDECAP
== IFNET_FAMILY_MDECAP
);
1417 _CASSERT(IFRTYPE_FAMILY_GIF
== IFNET_FAMILY_GIF
);
1418 _CASSERT(IFRTYPE_FAMILY_FAITH
== IFNET_FAMILY_FAITH
);
1419 _CASSERT(IFRTYPE_FAMILY_STF
== IFNET_FAMILY_STF
);
1420 _CASSERT(IFRTYPE_FAMILY_FIREWIRE
== IFNET_FAMILY_FIREWIRE
);
1421 _CASSERT(IFRTYPE_FAMILY_BOND
== IFNET_FAMILY_BOND
);
1422 _CASSERT(IFRTYPE_FAMILY_CELLULAR
== IFNET_FAMILY_CELLULAR
);
1424 _CASSERT(IFRTYPE_SUBFAMILY_ANY
== IFNET_SUBFAMILY_ANY
);
1425 _CASSERT(IFRTYPE_SUBFAMILY_USB
== IFNET_SUBFAMILY_USB
);
1426 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH
== IFNET_SUBFAMILY_BLUETOOTH
);
1427 _CASSERT(IFRTYPE_SUBFAMILY_WIFI
== IFNET_SUBFAMILY_WIFI
);
1428 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT
== IFNET_SUBFAMILY_THUNDERBOLT
);
1429 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED
== IFNET_SUBFAMILY_RESERVED
);
1430 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC
== IFNET_SUBFAMILY_INTCOPROC
);
1432 _CASSERT(DLIL_MODIDLEN
== IFNET_MODIDLEN
);
1433 _CASSERT(DLIL_MODARGLEN
== IFNET_MODARGLEN
);
1435 PE_parse_boot_argn("net_affinity", &net_affinity
,
1436 sizeof (net_affinity
));
1438 PE_parse_boot_argn("net_rxpoll", &net_rxpoll
, sizeof (net_rxpoll
));
1440 PE_parse_boot_argn("net_rtref", &net_rtref
, sizeof (net_rtref
));
1442 PE_parse_boot_argn("ifnet_debug", &ifnet_debug
, sizeof (ifnet_debug
));
1444 dlif_size
= (ifnet_debug
== 0) ? sizeof (struct dlil_ifnet
) :
1445 sizeof (struct dlil_ifnet_dbg
);
1446 /* Enforce 64-bit alignment for dlil_ifnet structure */
1447 dlif_bufsize
= dlif_size
+ sizeof (void *) + sizeof (u_int64_t
);
1448 dlif_bufsize
= P2ROUNDUP(dlif_bufsize
, sizeof (u_int64_t
));
1449 dlif_zone
= zinit(dlif_bufsize
, DLIF_ZONE_MAX
* dlif_bufsize
,
1451 if (dlif_zone
== NULL
) {
1452 panic_plain("%s: failed allocating %s", __func__
,
1456 zone_change(dlif_zone
, Z_EXPAND
, TRUE
);
1457 zone_change(dlif_zone
, Z_CALLERACCT
, FALSE
);
1459 dlif_filt_size
= sizeof (struct ifnet_filter
);
1460 dlif_filt_zone
= zinit(dlif_filt_size
,
1461 DLIF_FILT_ZONE_MAX
* dlif_filt_size
, 0, DLIF_FILT_ZONE_NAME
);
1462 if (dlif_filt_zone
== NULL
) {
1463 panic_plain("%s: failed allocating %s", __func__
,
1464 DLIF_FILT_ZONE_NAME
);
1467 zone_change(dlif_filt_zone
, Z_EXPAND
, TRUE
);
1468 zone_change(dlif_filt_zone
, Z_CALLERACCT
, FALSE
);
1470 dlif_phash_size
= sizeof (struct proto_hash_entry
) * PROTO_HASH_SLOTS
;
1471 dlif_phash_zone
= zinit(dlif_phash_size
,
1472 DLIF_PHASH_ZONE_MAX
* dlif_phash_size
, 0, DLIF_PHASH_ZONE_NAME
);
1473 if (dlif_phash_zone
== NULL
) {
1474 panic_plain("%s: failed allocating %s", __func__
,
1475 DLIF_PHASH_ZONE_NAME
);
1478 zone_change(dlif_phash_zone
, Z_EXPAND
, TRUE
);
1479 zone_change(dlif_phash_zone
, Z_CALLERACCT
, FALSE
);
1481 dlif_proto_size
= sizeof (struct if_proto
);
1482 dlif_proto_zone
= zinit(dlif_proto_size
,
1483 DLIF_PROTO_ZONE_MAX
* dlif_proto_size
, 0, DLIF_PROTO_ZONE_NAME
);
1484 if (dlif_proto_zone
== NULL
) {
1485 panic_plain("%s: failed allocating %s", __func__
,
1486 DLIF_PROTO_ZONE_NAME
);
1489 zone_change(dlif_proto_zone
, Z_EXPAND
, TRUE
);
1490 zone_change(dlif_proto_zone
, Z_CALLERACCT
, FALSE
);
1492 dlif_tcpstat_size
= sizeof (struct tcpstat_local
);
1493 /* Enforce 64-bit alignment for tcpstat_local structure */
1494 dlif_tcpstat_bufsize
=
1495 dlif_tcpstat_size
+ sizeof (void *) + sizeof (u_int64_t
);
1496 dlif_tcpstat_bufsize
=
1497 P2ROUNDUP(dlif_tcpstat_bufsize
, sizeof (u_int64_t
));
1498 dlif_tcpstat_zone
= zinit(dlif_tcpstat_bufsize
,
1499 DLIF_TCPSTAT_ZONE_MAX
* dlif_tcpstat_bufsize
, 0,
1500 DLIF_TCPSTAT_ZONE_NAME
);
1501 if (dlif_tcpstat_zone
== NULL
) {
1502 panic_plain("%s: failed allocating %s", __func__
,
1503 DLIF_TCPSTAT_ZONE_NAME
);
1506 zone_change(dlif_tcpstat_zone
, Z_EXPAND
, TRUE
);
1507 zone_change(dlif_tcpstat_zone
, Z_CALLERACCT
, FALSE
);
1509 dlif_udpstat_size
= sizeof (struct udpstat_local
);
1510 /* Enforce 64-bit alignment for udpstat_local structure */
1511 dlif_udpstat_bufsize
=
1512 dlif_udpstat_size
+ sizeof (void *) + sizeof (u_int64_t
);
1513 dlif_udpstat_bufsize
=
1514 P2ROUNDUP(dlif_udpstat_bufsize
, sizeof (u_int64_t
));
1515 dlif_udpstat_zone
= zinit(dlif_udpstat_bufsize
,
1516 DLIF_TCPSTAT_ZONE_MAX
* dlif_udpstat_bufsize
, 0,
1517 DLIF_UDPSTAT_ZONE_NAME
);
1518 if (dlif_udpstat_zone
== NULL
) {
1519 panic_plain("%s: failed allocating %s", __func__
,
1520 DLIF_UDPSTAT_ZONE_NAME
);
1523 zone_change(dlif_udpstat_zone
, Z_EXPAND
, TRUE
);
1524 zone_change(dlif_udpstat_zone
, Z_CALLERACCT
, FALSE
);
1526 ifnet_llreach_init();
1528 TAILQ_INIT(&dlil_ifnet_head
);
1529 TAILQ_INIT(&ifnet_head
);
1530 TAILQ_INIT(&ifnet_detaching_head
);
1531 TAILQ_INIT(&ifnet_ordered_head
);
1533 /* Setup the lock groups we will use */
1534 dlil_grp_attributes
= lck_grp_attr_alloc_init();
1536 dlil_lock_group
= lck_grp_alloc_init("DLIL internal locks",
1537 dlil_grp_attributes
);
1538 ifnet_lock_group
= lck_grp_alloc_init("ifnet locks",
1539 dlil_grp_attributes
);
1540 ifnet_head_lock_group
= lck_grp_alloc_init("ifnet head lock",
1541 dlil_grp_attributes
);
1542 ifnet_rcv_lock_group
= lck_grp_alloc_init("ifnet rcv locks",
1543 dlil_grp_attributes
);
1544 ifnet_snd_lock_group
= lck_grp_alloc_init("ifnet snd locks",
1545 dlil_grp_attributes
);
1547 /* Setup the lock attributes we will use */
1548 dlil_lck_attributes
= lck_attr_alloc_init();
1550 ifnet_lock_attr
= lck_attr_alloc_init();
1552 lck_rw_init(&ifnet_head_lock
, ifnet_head_lock_group
,
1553 dlil_lck_attributes
);
1554 lck_mtx_init(&dlil_ifnet_lock
, dlil_lock_group
, dlil_lck_attributes
);
1556 /* Setup interface flow control related items */
1557 lck_mtx_init(&ifnet_fc_lock
, dlil_lock_group
, dlil_lck_attributes
);
1559 ifnet_fc_zone_size
= sizeof (struct ifnet_fc_entry
);
1560 ifnet_fc_zone
= zinit(ifnet_fc_zone_size
,
1561 IFNET_FC_ZONE_MAX
* ifnet_fc_zone_size
, 0, IFNET_FC_ZONE_NAME
);
1562 if (ifnet_fc_zone
== NULL
) {
1563 panic_plain("%s: failed allocating %s", __func__
,
1564 IFNET_FC_ZONE_NAME
);
1567 zone_change(ifnet_fc_zone
, Z_EXPAND
, TRUE
);
1568 zone_change(ifnet_fc_zone
, Z_CALLERACCT
, FALSE
);
1570 /* Initialize interface address subsystem */
1574 /* Initialize the packet filter */
1578 /* Initialize queue algorithms */
1581 /* Initialize packet schedulers */
1584 /* Initialize flow advisory subsystem */
1587 /* Initialize the pktap virtual interface */
1590 /* Initialize the service class to dscp map */
1594 /* Run self-tests */
1595 dlil_verify_sum16();
1599 * Create and start up the main DLIL input thread and the interface
1600 * detacher threads once everything is initialized.
1602 dlil_create_input_thread(NULL
, dlil_main_input_thread
);
1604 if (kernel_thread_start(ifnet_detacher_thread_func
,
1605 NULL
, &thread
) != KERN_SUCCESS
) {
1606 panic_plain("%s: couldn't create detacher thread", __func__
);
1609 thread_deallocate(thread
);
1613 if_flt_monitor_busy(struct ifnet
*ifp
)
1615 lck_mtx_assert(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1618 VERIFY(ifp
->if_flt_busy
!= 0);
1622 if_flt_monitor_unbusy(struct ifnet
*ifp
)
1624 if_flt_monitor_leave(ifp
);
1628 if_flt_monitor_enter(struct ifnet
*ifp
)
1630 lck_mtx_assert(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1632 while (ifp
->if_flt_busy
) {
1633 ++ifp
->if_flt_waiters
;
1634 (void) msleep(&ifp
->if_flt_head
, &ifp
->if_flt_lock
,
1635 (PZERO
- 1), "if_flt_monitor", NULL
);
1637 if_flt_monitor_busy(ifp
);
1641 if_flt_monitor_leave(struct ifnet
*ifp
)
1643 lck_mtx_assert(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1645 VERIFY(ifp
->if_flt_busy
!= 0);
1648 if (ifp
->if_flt_busy
== 0 && ifp
->if_flt_waiters
> 0) {
1649 ifp
->if_flt_waiters
= 0;
1650 wakeup(&ifp
->if_flt_head
);
1654 __private_extern__
int
1655 dlil_attach_filter(struct ifnet
*ifp
, const struct iff_filter
*if_filter
,
1656 interface_filter_t
*filter_ref
, u_int32_t flags
)
1659 struct ifnet_filter
*filter
= NULL
;
1661 ifnet_head_lock_shared();
1662 /* Check that the interface is in the global list */
1663 if (!ifnet_lookup(ifp
)) {
1668 filter
= zalloc(dlif_filt_zone
);
1669 if (filter
== NULL
) {
1673 bzero(filter
, dlif_filt_size
);
1675 /* refcnt held above during lookup */
1676 filter
->filt_flags
= flags
;
1677 filter
->filt_ifp
= ifp
;
1678 filter
->filt_cookie
= if_filter
->iff_cookie
;
1679 filter
->filt_name
= if_filter
->iff_name
;
1680 filter
->filt_protocol
= if_filter
->iff_protocol
;
1682 * Do not install filter callbacks for internal coproc interface
1684 if (!IFNET_IS_INTCOPROC(ifp
)) {
1685 filter
->filt_input
= if_filter
->iff_input
;
1686 filter
->filt_output
= if_filter
->iff_output
;
1687 filter
->filt_event
= if_filter
->iff_event
;
1688 filter
->filt_ioctl
= if_filter
->iff_ioctl
;
1690 filter
->filt_detached
= if_filter
->iff_detached
;
1692 lck_mtx_lock(&ifp
->if_flt_lock
);
1693 if_flt_monitor_enter(ifp
);
1695 lck_mtx_assert(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1696 TAILQ_INSERT_TAIL(&ifp
->if_flt_head
, filter
, filt_next
);
1698 if_flt_monitor_leave(ifp
);
1699 lck_mtx_unlock(&ifp
->if_flt_lock
);
1701 *filter_ref
= filter
;
1704 * Bump filter count and route_generation ID to let TCP
1705 * know it shouldn't do TSO on this connection
1707 if ((filter
->filt_flags
& DLIL_IFF_TSO
) == 0) {
1708 OSAddAtomic(1, &dlil_filter_disable_tso_count
);
1709 routegenid_update();
1712 printf("%s: %s filter attached\n", if_name(ifp
),
1713 if_filter
->iff_name
);
1717 if (retval
!= 0 && ifp
!= NULL
) {
1718 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1719 if_name(ifp
), if_filter
->iff_name
, retval
);
1721 if (retval
!= 0 && filter
!= NULL
)
1722 zfree(dlif_filt_zone
, filter
);
1728 dlil_detach_filter_internal(interface_filter_t filter
, int detached
)
1732 if (detached
== 0) {
1735 ifnet_head_lock_shared();
1736 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
1737 interface_filter_t entry
= NULL
;
1739 lck_mtx_lock(&ifp
->if_flt_lock
);
1740 TAILQ_FOREACH(entry
, &ifp
->if_flt_head
, filt_next
) {
1741 if (entry
!= filter
|| entry
->filt_skip
)
1744 * We've found a match; since it's possible
1745 * that the thread gets blocked in the monitor,
1746 * we do the lock dance. Interface should
1747 * not be detached since we still have a use
1748 * count held during filter attach.
1750 entry
->filt_skip
= 1; /* skip input/output */
1751 lck_mtx_unlock(&ifp
->if_flt_lock
);
1754 lck_mtx_lock(&ifp
->if_flt_lock
);
1755 if_flt_monitor_enter(ifp
);
1756 lck_mtx_assert(&ifp
->if_flt_lock
,
1757 LCK_MTX_ASSERT_OWNED
);
1759 /* Remove the filter from the list */
1760 TAILQ_REMOVE(&ifp
->if_flt_head
, filter
,
1763 if_flt_monitor_leave(ifp
);
1764 lck_mtx_unlock(&ifp
->if_flt_lock
);
1766 printf("%s: %s filter detached\n",
1767 if_name(ifp
), filter
->filt_name
);
1771 lck_mtx_unlock(&ifp
->if_flt_lock
);
1775 /* filter parameter is not a valid filter ref */
1781 printf("%s filter detached\n", filter
->filt_name
);
1785 /* Call the detached function if there is one */
1786 if (filter
->filt_detached
)
1787 filter
->filt_detached(filter
->filt_cookie
, filter
->filt_ifp
);
1790 * Decrease filter count and route_generation ID to let TCP
1791 * know it should reevalute doing TSO or not
1793 if ((filter
->filt_flags
& DLIL_IFF_TSO
) == 0) {
1794 OSAddAtomic(-1, &dlil_filter_disable_tso_count
);
1795 routegenid_update();
1798 /* Free the filter */
1799 zfree(dlif_filt_zone
, filter
);
1802 if (retval
!= 0 && filter
!= NULL
) {
1803 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1804 filter
->filt_name
, retval
);
1810 __private_extern__
void
1811 dlil_detach_filter(interface_filter_t filter
)
1815 dlil_detach_filter_internal(filter
, 0);
1819 * Main input thread:
1821 * a) handles all inbound packets for lo0
1822 * b) handles all inbound packets for interfaces with no dedicated
1823 * input thread (e.g. anything but Ethernet/PDP or those that support
1824 * opportunistic polling.)
1825 * c) protocol registrations
1826 * d) packet injections
1828 __attribute__((noreturn
))
1830 dlil_main_input_thread_func(void *v
, wait_result_t w
)
1833 struct dlil_main_threading_info
*inpm
= v
;
1834 struct dlil_threading_info
*inp
= v
;
1836 VERIFY(inp
== dlil_main_input_thread
);
1837 VERIFY(inp
->ifp
== NULL
);
1838 VERIFY(inp
->mode
== IFNET_MODEL_INPUT_POLL_OFF
);
1841 struct mbuf
*m
= NULL
, *m_loop
= NULL
;
1842 u_int32_t m_cnt
, m_cnt_loop
;
1843 boolean_t proto_req
;
1845 lck_mtx_lock_spin(&inp
->input_lck
);
1847 /* Wait until there is work to be done */
1848 while (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
1849 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
1850 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
1851 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
1854 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
1855 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
1857 /* Main input thread cannot be terminated */
1858 VERIFY(!(inp
->input_waiting
& DLIL_INPUT_TERMINATE
));
1860 proto_req
= (inp
->input_waiting
&
1861 (DLIL_PROTO_WAITING
| DLIL_PROTO_REGISTER
));
1863 /* Packets for non-dedicated interfaces other than lo0 */
1864 m_cnt
= qlen(&inp
->rcvq_pkts
);
1865 m
= _getq_all(&inp
->rcvq_pkts
, NULL
, NULL
, NULL
);
1867 /* Packets exclusive to lo0 */
1868 m_cnt_loop
= qlen(&inpm
->lo_rcvq_pkts
);
1869 m_loop
= _getq_all(&inpm
->lo_rcvq_pkts
, NULL
, NULL
, NULL
);
1873 lck_mtx_unlock(&inp
->input_lck
);
1876 * NOTE warning %%% attention !!!!
1877 * We should think about putting some thread starvation
1878 * safeguards if we deal with long chains of packets.
1881 dlil_input_packet_list_extended(lo_ifp
, m_loop
,
1882 m_cnt_loop
, inp
->mode
);
1885 dlil_input_packet_list_extended(NULL
, m
,
1893 VERIFY(0); /* we should never get here */
1897 * Input thread for interfaces with legacy input model.
1900 dlil_input_thread_func(void *v
, wait_result_t w
)
1903 char thread_name
[MAXTHREADNAMESIZE
];
1904 struct dlil_threading_info
*inp
= v
;
1905 struct ifnet
*ifp
= inp
->ifp
;
1907 /* Construct the name for this thread, and then apply it. */
1908 bzero(thread_name
, sizeof(thread_name
));
1909 snprintf(thread_name
, sizeof(thread_name
), "dlil_input_%s", ifp
->if_xname
);
1910 thread_set_thread_name(inp
->input_thr
, thread_name
);
1912 VERIFY(inp
!= dlil_main_input_thread
);
1913 VERIFY(ifp
!= NULL
);
1914 VERIFY(!(ifp
->if_eflags
& IFEF_RXPOLL
) || !net_rxpoll
);
1915 VERIFY(inp
->mode
== IFNET_MODEL_INPUT_POLL_OFF
);
1918 struct mbuf
*m
= NULL
;
1921 lck_mtx_lock_spin(&inp
->input_lck
);
1923 /* Wait until there is work to be done */
1924 while (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
1925 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
1926 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
1927 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
1930 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
1931 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
1934 * Protocol registration and injection must always use
1935 * the main input thread; in theory the latter can utilize
1936 * the corresponding input thread where the packet arrived
1937 * on, but that requires our knowing the interface in advance
1938 * (and the benefits might not worth the trouble.)
1940 VERIFY(!(inp
->input_waiting
&
1941 (DLIL_PROTO_WAITING
|DLIL_PROTO_REGISTER
)));
1943 /* Packets for this interface */
1944 m_cnt
= qlen(&inp
->rcvq_pkts
);
1945 m
= _getq_all(&inp
->rcvq_pkts
, NULL
, NULL
, NULL
);
1947 if (inp
->input_waiting
& DLIL_INPUT_TERMINATE
) {
1948 lck_mtx_unlock(&inp
->input_lck
);
1950 /* Free up pending packets */
1954 dlil_terminate_input_thread(inp
);
1961 dlil_input_stats_sync(ifp
, inp
);
1963 lck_mtx_unlock(&inp
->input_lck
);
1966 * NOTE warning %%% attention !!!!
1967 * We should think about putting some thread starvation
1968 * safeguards if we deal with long chains of packets.
1971 dlil_input_packet_list_extended(NULL
, m
,
1976 VERIFY(0); /* we should never get here */
1980 * Input thread for interfaces with opportunistic polling input model.
1983 dlil_rxpoll_input_thread_func(void *v
, wait_result_t w
)
1986 struct dlil_threading_info
*inp
= v
;
1987 struct ifnet
*ifp
= inp
->ifp
;
1990 VERIFY(inp
!= dlil_main_input_thread
);
1991 VERIFY(ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_RXPOLL
));
1994 struct mbuf
*m
= NULL
;
1995 u_int32_t m_cnt
, m_size
, poll_req
= 0;
1997 struct timespec now
, delta
;
2000 lck_mtx_lock_spin(&inp
->input_lck
);
2002 if ((ival
= inp
->rxpoll_ival
) < IF_RXPOLL_INTERVALTIME_MIN
)
2003 ival
= IF_RXPOLL_INTERVALTIME_MIN
;
2005 /* Link parameters changed? */
2006 if (ifp
->if_poll_update
!= 0) {
2007 ifp
->if_poll_update
= 0;
2008 (void) dlil_rxpoll_set_params(ifp
, NULL
, TRUE
);
2011 /* Current operating mode */
2014 /* Wait until there is work to be done */
2015 while (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
2016 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
2017 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
2018 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
2021 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
2022 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
2025 * Protocol registration and injection must always use
2026 * the main input thread; in theory the latter can utilize
2027 * the corresponding input thread where the packet arrived
2028 * on, but that requires our knowing the interface in advance
2029 * (and the benefits might not worth the trouble.)
2031 VERIFY(!(inp
->input_waiting
&
2032 (DLIL_PROTO_WAITING
|DLIL_PROTO_REGISTER
)));
2034 if (inp
->input_waiting
& DLIL_INPUT_TERMINATE
) {
2035 /* Free up pending packets */
2036 _flushq(&inp
->rcvq_pkts
);
2037 lck_mtx_unlock(&inp
->input_lck
);
2039 dlil_terminate_input_thread(inp
);
2044 /* Total count of all packets */
2045 m_cnt
= qlen(&inp
->rcvq_pkts
);
2047 /* Total bytes of all packets */
2048 m_size
= qsize(&inp
->rcvq_pkts
);
2050 /* Packets for this interface */
2051 m
= _getq_all(&inp
->rcvq_pkts
, NULL
, NULL
, NULL
);
2052 VERIFY(m
!= NULL
|| m_cnt
== 0);
2055 if (!net_timerisset(&inp
->sample_lasttime
))
2056 *(&inp
->sample_lasttime
) = *(&now
);
2058 net_timersub(&now
, &inp
->sample_lasttime
, &delta
);
2059 if (if_rxpoll
&& net_timerisset(&inp
->sample_holdtime
)) {
2060 u_int32_t ptot
, btot
;
2062 /* Accumulate statistics for current sampling */
2063 PKTCNTR_ADD(&inp
->sstats
, m_cnt
, m_size
);
2065 if (net_timercmp(&delta
, &inp
->sample_holdtime
, <))
2068 *(&inp
->sample_lasttime
) = *(&now
);
2070 /* Calculate min/max of inbound bytes */
2071 btot
= (u_int32_t
)inp
->sstats
.bytes
;
2072 if (inp
->rxpoll_bmin
== 0 || inp
->rxpoll_bmin
> btot
)
2073 inp
->rxpoll_bmin
= btot
;
2074 if (btot
> inp
->rxpoll_bmax
)
2075 inp
->rxpoll_bmax
= btot
;
2077 /* Calculate EWMA of inbound bytes */
2078 DLIL_EWMA(inp
->rxpoll_bavg
, btot
, if_rxpoll_decay
);
2080 /* Calculate min/max of inbound packets */
2081 ptot
= (u_int32_t
)inp
->sstats
.packets
;
2082 if (inp
->rxpoll_pmin
== 0 || inp
->rxpoll_pmin
> ptot
)
2083 inp
->rxpoll_pmin
= ptot
;
2084 if (ptot
> inp
->rxpoll_pmax
)
2085 inp
->rxpoll_pmax
= ptot
;
2087 /* Calculate EWMA of inbound packets */
2088 DLIL_EWMA(inp
->rxpoll_pavg
, ptot
, if_rxpoll_decay
);
2090 /* Reset sampling statistics */
2091 PKTCNTR_CLEAR(&inp
->sstats
);
2093 /* Calculate EWMA of wakeup requests */
2094 DLIL_EWMA(inp
->rxpoll_wavg
, inp
->wtot
, if_rxpoll_decay
);
2098 if (!net_timerisset(&inp
->dbg_lasttime
))
2099 *(&inp
->dbg_lasttime
) = *(&now
);
2100 net_timersub(&now
, &inp
->dbg_lasttime
, &delta
);
2101 if (net_timercmp(&delta
, &dlil_dbgrate
, >=)) {
2102 *(&inp
->dbg_lasttime
) = *(&now
);
2103 printf("%s: [%s] pkts avg %d max %d "
2104 "limits [%d/%d], wreq avg %d "
2105 "limits [%d/%d], bytes avg %d "
2106 "limits [%d/%d]\n", if_name(ifp
),
2108 IFNET_MODEL_INPUT_POLL_ON
) ?
2109 "ON" : "OFF", inp
->rxpoll_pavg
,
2118 inp
->rxpoll_bhiwat
);
2122 /* Perform mode transition, if necessary */
2123 if (!net_timerisset(&inp
->mode_lasttime
))
2124 *(&inp
->mode_lasttime
) = *(&now
);
2126 net_timersub(&now
, &inp
->mode_lasttime
, &delta
);
2127 if (net_timercmp(&delta
, &inp
->mode_holdtime
, <))
2130 if (inp
->rxpoll_pavg
<= inp
->rxpoll_plowat
&&
2131 inp
->rxpoll_bavg
<= inp
->rxpoll_blowat
&&
2132 inp
->mode
!= IFNET_MODEL_INPUT_POLL_OFF
) {
2133 mode
= IFNET_MODEL_INPUT_POLL_OFF
;
2134 } else if (inp
->rxpoll_pavg
>= inp
->rxpoll_phiwat
&&
2135 (inp
->rxpoll_bavg
>= inp
->rxpoll_bhiwat
||
2136 inp
->rxpoll_wavg
>= inp
->rxpoll_whiwat
) &&
2137 inp
->mode
!= IFNET_MODEL_INPUT_POLL_ON
) {
2138 mode
= IFNET_MODEL_INPUT_POLL_ON
;
2141 if (mode
!= inp
->mode
) {
2143 *(&inp
->mode_lasttime
) = *(&now
);
2148 dlil_input_stats_sync(ifp
, inp
);
2150 lck_mtx_unlock(&inp
->input_lck
);
2153 * If there's a mode change and interface is still attached,
2154 * perform a downcall to the driver for the new mode. Also
2155 * hold an IO refcnt on the interface to prevent it from
2156 * being detached (will be release below.)
2158 if (poll_req
!= 0 && ifnet_is_attached(ifp
, 1)) {
2159 struct ifnet_model_params p
= { mode
, { 0 } };
2163 printf("%s: polling is now %s, "
2164 "pkts avg %d max %d limits [%d/%d], "
2165 "wreq avg %d limits [%d/%d], "
2166 "bytes avg %d limits [%d/%d]\n",
2168 (mode
== IFNET_MODEL_INPUT_POLL_ON
) ?
2169 "ON" : "OFF", inp
->rxpoll_pavg
,
2170 inp
->rxpoll_pmax
, inp
->rxpoll_plowat
,
2171 inp
->rxpoll_phiwat
, inp
->rxpoll_wavg
,
2172 inp
->rxpoll_wlowat
, inp
->rxpoll_whiwat
,
2173 inp
->rxpoll_bavg
, inp
->rxpoll_blowat
,
2174 inp
->rxpoll_bhiwat
);
2177 if ((err
= ((*ifp
->if_input_ctl
)(ifp
,
2178 IFNET_CTL_SET_INPUT_MODEL
, sizeof (p
), &p
))) != 0) {
2179 printf("%s: error setting polling mode "
2180 "to %s (%d)\n", if_name(ifp
),
2181 (mode
== IFNET_MODEL_INPUT_POLL_ON
) ?
2186 case IFNET_MODEL_INPUT_POLL_OFF
:
2187 ifnet_set_poll_cycle(ifp
, NULL
);
2188 inp
->rxpoll_offreq
++;
2190 inp
->rxpoll_offerr
++;
2193 case IFNET_MODEL_INPUT_POLL_ON
:
2194 net_nsectimer(&ival
, &ts
);
2195 ifnet_set_poll_cycle(ifp
, &ts
);
2197 inp
->rxpoll_onreq
++;
2199 inp
->rxpoll_onerr
++;
2207 /* Release the IO refcnt */
2208 ifnet_decr_iorefcnt(ifp
);
2212 * NOTE warning %%% attention !!!!
2213 * We should think about putting some thread starvation
2214 * safeguards if we deal with long chains of packets.
2217 dlil_input_packet_list_extended(NULL
, m
, m_cnt
, mode
);
2221 VERIFY(0); /* we should never get here */
2225 * Must be called on an attached ifnet (caller is expected to check.)
2226 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2229 dlil_rxpoll_set_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
,
2232 struct dlil_threading_info
*inp
;
2233 u_int64_t sample_holdtime
, inbw
;
2235 VERIFY(ifp
!= NULL
);
2236 if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || (inp
= ifp
->if_inp
) == NULL
)
2240 if ((p
->packets_lowat
== 0 && p
->packets_hiwat
!= 0) ||
2241 (p
->packets_lowat
!= 0 && p
->packets_hiwat
== 0))
2243 if (p
->packets_lowat
!= 0 && /* hiwat must be non-zero */
2244 p
->packets_lowat
>= p
->packets_hiwat
)
2246 if ((p
->bytes_lowat
== 0 && p
->bytes_hiwat
!= 0) ||
2247 (p
->bytes_lowat
!= 0 && p
->bytes_hiwat
== 0))
2249 if (p
->bytes_lowat
!= 0 && /* hiwat must be non-zero */
2250 p
->bytes_lowat
>= p
->bytes_hiwat
)
2252 if (p
->interval_time
!= 0 &&
2253 p
->interval_time
< IF_RXPOLL_INTERVALTIME_MIN
)
2254 p
->interval_time
= IF_RXPOLL_INTERVALTIME_MIN
;
2258 lck_mtx_lock(&inp
->input_lck
);
2260 lck_mtx_assert(&inp
->input_lck
, LCK_MTX_ASSERT_OWNED
);
2263 * Normally, we'd reset the parameters to the auto-tuned values
2264 * if the the input thread detects a change in link rate. If the
2265 * driver provides its own parameters right after a link rate
2266 * changes, but before the input thread gets to run, we want to
2267 * make sure to keep the driver's values. Clearing if_poll_update
2268 * will achieve that.
2270 if (p
!= NULL
&& !locked
&& ifp
->if_poll_update
!= 0)
2271 ifp
->if_poll_update
= 0;
2273 if ((inbw
= ifnet_input_linkrate(ifp
)) == 0 && p
== NULL
) {
2274 sample_holdtime
= 0; /* polling is disabled */
2275 inp
->rxpoll_wlowat
= inp
->rxpoll_plowat
=
2276 inp
->rxpoll_blowat
= 0;
2277 inp
->rxpoll_whiwat
= inp
->rxpoll_phiwat
=
2278 inp
->rxpoll_bhiwat
= (u_int32_t
)-1;
2279 inp
->rxpoll_plim
= 0;
2280 inp
->rxpoll_ival
= IF_RXPOLL_INTERVALTIME_MIN
;
2282 u_int32_t plowat
, phiwat
, blowat
, bhiwat
, plim
;
2286 for (n
= 0, i
= 0; rxpoll_tbl
[i
].speed
!= 0; i
++) {
2287 if (inbw
< rxpoll_tbl
[i
].speed
)
2291 /* auto-tune if caller didn't specify a value */
2292 plowat
= ((p
== NULL
|| p
->packets_lowat
== 0) ?
2293 rxpoll_tbl
[n
].plowat
: p
->packets_lowat
);
2294 phiwat
= ((p
== NULL
|| p
->packets_hiwat
== 0) ?
2295 rxpoll_tbl
[n
].phiwat
: p
->packets_hiwat
);
2296 blowat
= ((p
== NULL
|| p
->bytes_lowat
== 0) ?
2297 rxpoll_tbl
[n
].blowat
: p
->bytes_lowat
);
2298 bhiwat
= ((p
== NULL
|| p
->bytes_hiwat
== 0) ?
2299 rxpoll_tbl
[n
].bhiwat
: p
->bytes_hiwat
);
2300 plim
= ((p
== NULL
|| p
->packets_limit
== 0) ?
2301 if_rxpoll_max
: p
->packets_limit
);
2302 ival
= ((p
== NULL
|| p
->interval_time
== 0) ?
2303 if_rxpoll_interval_time
: p
->interval_time
);
2305 VERIFY(plowat
!= 0 && phiwat
!= 0);
2306 VERIFY(blowat
!= 0 && bhiwat
!= 0);
2307 VERIFY(ival
>= IF_RXPOLL_INTERVALTIME_MIN
);
2309 sample_holdtime
= if_rxpoll_sample_holdtime
;
2310 inp
->rxpoll_wlowat
= if_rxpoll_wlowat
;
2311 inp
->rxpoll_whiwat
= if_rxpoll_whiwat
;
2312 inp
->rxpoll_plowat
= plowat
;
2313 inp
->rxpoll_phiwat
= phiwat
;
2314 inp
->rxpoll_blowat
= blowat
;
2315 inp
->rxpoll_bhiwat
= bhiwat
;
2316 inp
->rxpoll_plim
= plim
;
2317 inp
->rxpoll_ival
= ival
;
2320 net_nsectimer(&if_rxpoll_mode_holdtime
, &inp
->mode_holdtime
);
2321 net_nsectimer(&sample_holdtime
, &inp
->sample_holdtime
);
2324 printf("%s: speed %llu bps, sample per %llu nsec, "
2325 "poll interval %llu nsec, pkts per poll %u, "
2326 "pkt limits [%u/%u], wreq limits [%u/%u], "
2327 "bytes limits [%u/%u]\n", if_name(ifp
),
2328 inbw
, sample_holdtime
, inp
->rxpoll_ival
, inp
->rxpoll_plim
,
2329 inp
->rxpoll_plowat
, inp
->rxpoll_phiwat
, inp
->rxpoll_wlowat
,
2330 inp
->rxpoll_whiwat
, inp
->rxpoll_blowat
, inp
->rxpoll_bhiwat
);
2334 lck_mtx_unlock(&inp
->input_lck
);
2340 * Must be called on an attached ifnet (caller is expected to check.)
2343 dlil_rxpoll_get_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
)
2345 struct dlil_threading_info
*inp
;
2347 VERIFY(ifp
!= NULL
&& p
!= NULL
);
2348 if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || (inp
= ifp
->if_inp
) == NULL
)
2351 bzero(p
, sizeof (*p
));
2353 lck_mtx_lock(&inp
->input_lck
);
2354 p
->packets_limit
= inp
->rxpoll_plim
;
2355 p
->packets_lowat
= inp
->rxpoll_plowat
;
2356 p
->packets_hiwat
= inp
->rxpoll_phiwat
;
2357 p
->bytes_lowat
= inp
->rxpoll_blowat
;
2358 p
->bytes_hiwat
= inp
->rxpoll_bhiwat
;
2359 p
->interval_time
= inp
->rxpoll_ival
;
2360 lck_mtx_unlock(&inp
->input_lck
);
2366 ifnet_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
2367 const struct ifnet_stat_increment_param
*s
)
2369 return (ifnet_input_common(ifp
, m_head
, NULL
, s
, FALSE
, FALSE
));
2373 ifnet_input_extended(struct ifnet
*ifp
, struct mbuf
*m_head
,
2374 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
)
2376 return (ifnet_input_common(ifp
, m_head
, m_tail
, s
, TRUE
, FALSE
));
2380 ifnet_input_common(struct ifnet
*ifp
, struct mbuf
*m_head
, struct mbuf
*m_tail
,
2381 const struct ifnet_stat_increment_param
*s
, boolean_t ext
, boolean_t poll
)
2383 ifnet_input_handler_func handler_func
;
2384 struct ifnet_stat_increment_param _s
;
2385 u_int32_t m_cnt
= 0, m_size
= 0;
2389 if ((m_head
== NULL
&& !poll
) || (s
== NULL
&& ext
)) {
2391 mbuf_freem_list(m_head
);
2395 VERIFY(m_head
!= NULL
|| (s
== NULL
&& m_tail
== NULL
&& !ext
&& poll
));
2396 VERIFY(m_tail
== NULL
|| ext
);
2397 VERIFY(s
!= NULL
|| !ext
);
2400 * Drop the packet(s) if the parameters are invalid, or if the
2401 * interface is no longer attached; else hold an IO refcnt to
2402 * prevent it from being detached (will be released below.)
2404 if (ifp
== NULL
|| (ifp
!= lo_ifp
&& !ifnet_is_attached(ifp
, 1))) {
2406 mbuf_freem_list(m_head
);
2410 handler_func
= ifp
->if_input_handler
;
2411 VERIFY(handler_func
!= NULL
);
2413 if (m_tail
== NULL
) {
2415 while (m_head
!= NULL
) {
2416 #if IFNET_INPUT_SANITY_CHK
2417 if (dlil_input_sanity_check
!= 0)
2418 DLIL_INPUT_CHECK(last
, ifp
);
2419 #endif /* IFNET_INPUT_SANITY_CHK */
2421 m_size
+= m_length(last
);
2422 if (mbuf_nextpkt(last
) == NULL
)
2424 last
= mbuf_nextpkt(last
);
2428 #if IFNET_INPUT_SANITY_CHK
2429 if (dlil_input_sanity_check
!= 0) {
2432 DLIL_INPUT_CHECK(last
, ifp
);
2434 m_size
+= m_length(last
);
2435 if (mbuf_nextpkt(last
) == NULL
)
2437 last
= mbuf_nextpkt(last
);
2440 m_cnt
= s
->packets_in
;
2441 m_size
= s
->bytes_in
;
2445 m_cnt
= s
->packets_in
;
2446 m_size
= s
->bytes_in
;
2448 #endif /* IFNET_INPUT_SANITY_CHK */
2451 if (last
!= m_tail
) {
2452 panic_plain("%s: invalid input packet chain for %s, "
2453 "tail mbuf %p instead of %p\n", __func__
, if_name(ifp
),
2458 * Assert packet count only for the extended variant, for backwards
2459 * compatibility, since this came directly from the device driver.
2460 * Relax this assertion for input bytes, as the driver may have
2461 * included the link-layer headers in the computation; hence
2462 * m_size is just an approximation.
2464 if (ext
&& s
->packets_in
!= m_cnt
) {
2465 panic_plain("%s: input packet count mismatch for %s, "
2466 "%d instead of %d\n", __func__
, if_name(ifp
),
2467 s
->packets_in
, m_cnt
);
2471 bzero(&_s
, sizeof (_s
));
2476 _s
.packets_in
= m_cnt
;
2477 _s
.bytes_in
= m_size
;
2479 err
= (*handler_func
)(ifp
, m_head
, m_tail
, s
, poll
, current_thread());
2481 if (ifp
!= lo_ifp
) {
2482 /* Release the IO refcnt */
2483 ifnet_decr_iorefcnt(ifp
);
2490 ifnet_set_input_handler(struct ifnet
*ifp
, ifnet_input_handler_func fn
)
2492 return (atomic_test_set_ptr(&ifp
->if_input_handler
,
2493 dlil_input_handler
, fn
) ? 0 : EBUSY
);
2497 ifnet_reset_input_handler(struct ifnet
*ifp
)
2499 atomic_set_ptr(&ifp
->if_input_handler
, dlil_input_handler
);
2503 ifnet_set_output_handler(struct ifnet
*ifp
, ifnet_output_handler_func fn
)
2505 return (atomic_test_set_ptr(&ifp
->if_output_handler
,
2506 dlil_output_handler
, fn
) ? 0 : EBUSY
);
2510 ifnet_reset_output_handler(struct ifnet
*ifp
)
2512 atomic_set_ptr(&ifp
->if_output_handler
, dlil_output_handler
);
2516 dlil_output_handler(struct ifnet
*ifp
, struct mbuf
*m
)
2518 return (ifp
->if_output(ifp
, m
));
2522 dlil_input_handler(struct ifnet
*ifp
, struct mbuf
*m_head
,
2523 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
2524 boolean_t poll
, struct thread
*tp
)
2526 struct dlil_threading_info
*inp
;
2527 u_int32_t m_cnt
= s
->packets_in
;
2528 u_int32_t m_size
= s
->bytes_in
;
2530 if ((inp
= ifp
->if_inp
) == NULL
)
2531 inp
= dlil_main_input_thread
;
2534 * If there is a matching DLIL input thread associated with an
2535 * affinity set, associate this thread with the same set. We
2536 * will only do this once.
2538 lck_mtx_lock_spin(&inp
->input_lck
);
2539 if (inp
!= dlil_main_input_thread
&& inp
->net_affinity
&& tp
!= NULL
&&
2540 ((!poll
&& inp
->wloop_thr
== THREAD_NULL
) ||
2541 (poll
&& inp
->poll_thr
== THREAD_NULL
))) {
2542 u_int32_t tag
= inp
->tag
;
2545 VERIFY(inp
->poll_thr
== THREAD_NULL
);
2548 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
2549 inp
->wloop_thr
= tp
;
2551 lck_mtx_unlock(&inp
->input_lck
);
2553 /* Associate the current thread with the new affinity tag */
2554 (void) dlil_affinity_set(tp
, tag
);
2557 * Take a reference on the current thread; during detach,
2558 * we will need to refer to it in order ot tear down its
2561 thread_reference(tp
);
2562 lck_mtx_lock_spin(&inp
->input_lck
);
2565 VERIFY(m_head
!= NULL
|| (m_tail
== NULL
&& m_cnt
== 0));
2568 * Because of loopbacked multicast we cannot stuff the ifp in
2569 * the rcvif of the packet header: loopback (lo0) packets use a
2570 * dedicated list so that we can later associate them with lo_ifp
2571 * on their way up the stack. Packets for other interfaces without
2572 * dedicated input threads go to the regular list.
2574 if (m_head
!= NULL
) {
2575 if (inp
== dlil_main_input_thread
&& ifp
== lo_ifp
) {
2576 struct dlil_main_threading_info
*inpm
=
2577 (struct dlil_main_threading_info
*)inp
;
2578 _addq_multi(&inpm
->lo_rcvq_pkts
, m_head
, m_tail
,
2581 _addq_multi(&inp
->rcvq_pkts
, m_head
, m_tail
,
2586 #if IFNET_INPUT_SANITY_CHK
2587 if (dlil_input_sanity_check
!= 0) {
2591 for (m0
= m_head
, count
= 0; m0
; m0
= mbuf_nextpkt(m0
))
2594 if (count
!= m_cnt
) {
2595 panic_plain("%s: invalid packet count %d "
2596 "(expected %d)\n", if_name(ifp
),
2601 inp
->input_mbuf_cnt
+= m_cnt
;
2603 #endif /* IFNET_INPUT_SANITY_CHK */
2605 dlil_input_stats_add(s
, inp
, poll
);
2607 * If we're using the main input thread, synchronize the
2608 * stats now since we have the interface context. All
2609 * other cases involving dedicated input threads will
2610 * have their stats synchronized there.
2612 if (inp
== dlil_main_input_thread
)
2613 dlil_input_stats_sync(ifp
, inp
);
2615 inp
->input_waiting
|= DLIL_INPUT_WAITING
;
2616 if (!(inp
->input_waiting
& DLIL_INPUT_RUNNING
)) {
2618 wakeup_one((caddr_t
)&inp
->input_waiting
);
2620 lck_mtx_unlock(&inp
->input_lck
);
2626 ifnet_start_common(struct ifnet
*ifp
, int resetfc
)
2628 if (!(ifp
->if_eflags
& IFEF_TXSTART
))
2631 * If the starter thread is inactive, signal it to do work,
2632 * unless the interface is being flow controlled from below,
2633 * e.g. a virtual interface being flow controlled by a real
2634 * network interface beneath it.
2636 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2638 ifp
->if_start_flags
&= ~IFSF_FLOW_CONTROLLED
;
2639 } else if (ifp
->if_start_flags
& IFSF_FLOW_CONTROLLED
) {
2640 lck_mtx_unlock(&ifp
->if_start_lock
);
2643 ifp
->if_start_req
++;
2644 if (!ifp
->if_start_active
&& ifp
->if_start_thread
!= THREAD_NULL
&&
2645 (resetfc
|| !(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) ||
2646 IFCQ_LEN(&ifp
->if_snd
) >= ifp
->if_start_delay_qlen
||
2647 ifp
->if_start_delayed
== 0)) {
2648 wakeup_one((caddr_t
)&ifp
->if_start_thread
);
2650 lck_mtx_unlock(&ifp
->if_start_lock
);
2654 ifnet_start(struct ifnet
*ifp
)
2656 ifnet_start_common(ifp
, 0);
2660 ifnet_start_thread_fn(void *v
, wait_result_t w
)
2663 struct ifnet
*ifp
= v
;
2664 char ifname
[IFNAMSIZ
+ 1];
2665 char thread_name
[MAXTHREADNAMESIZE
];
2666 struct timespec
*ts
= NULL
;
2667 struct ifclassq
*ifq
= &ifp
->if_snd
;
2668 struct timespec delay_start_ts
;
2670 /* Construct the name for this thread, and then apply it. */
2671 bzero(thread_name
, sizeof(thread_name
));
2672 snprintf(thread_name
, sizeof(thread_name
), "ifnet_start_%s", ifp
->if_xname
);
2673 thread_set_thread_name(ifp
->if_start_thread
, thread_name
);
2676 * Treat the dedicated starter thread for lo0 as equivalent to
2677 * the driver workloop thread; if net_affinity is enabled for
2678 * the main input thread, associate this starter thread to it
2679 * by binding them with the same affinity tag. This is done
2680 * only once (as we only have one lo_ifp which never goes away.)
2682 if (ifp
== lo_ifp
) {
2683 struct dlil_threading_info
*inp
= dlil_main_input_thread
;
2684 struct thread
*tp
= current_thread();
2686 lck_mtx_lock(&inp
->input_lck
);
2687 if (inp
->net_affinity
) {
2688 u_int32_t tag
= inp
->tag
;
2690 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
2691 VERIFY(inp
->poll_thr
== THREAD_NULL
);
2692 inp
->wloop_thr
= tp
;
2693 lck_mtx_unlock(&inp
->input_lck
);
2695 /* Associate this thread with the affinity tag */
2696 (void) dlil_affinity_set(tp
, tag
);
2698 lck_mtx_unlock(&inp
->input_lck
);
2702 snprintf(ifname
, sizeof (ifname
), "%s_starter",
2705 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2708 if (ifp
->if_start_thread
!= NULL
)
2709 (void) msleep(&ifp
->if_start_thread
,
2710 &ifp
->if_start_lock
,
2711 (PZERO
- 1) | PSPIN
, ifname
, ts
);
2713 /* interface is detached? */
2714 if (ifp
->if_start_thread
== THREAD_NULL
) {
2715 ifnet_set_start_cycle(ifp
, NULL
);
2716 lck_mtx_unlock(&ifp
->if_start_lock
);
2720 printf("%s: starter thread terminated\n",
2724 /* for the extra refcnt from kernel_thread_start() */
2725 thread_deallocate(current_thread());
2726 /* this is the end */
2727 thread_terminate(current_thread());
2732 ifp
->if_start_active
= 1;
2735 u_int32_t req
= ifp
->if_start_req
;
2736 if (!IFCQ_IS_EMPTY(ifq
) &&
2737 (ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) &&
2738 ifp
->if_start_delayed
== 0 &&
2739 IFCQ_LEN(ifq
) < ifp
->if_start_delay_qlen
&&
2740 (ifp
->if_eflags
& IFEF_DELAY_START
)) {
2741 ifp
->if_start_delayed
= 1;
2742 ifnet_start_delayed
++;
2745 ifp
->if_start_delayed
= 0;
2747 lck_mtx_unlock(&ifp
->if_start_lock
);
2750 * If no longer attached, don't call start because ifp
2751 * is being destroyed; else hold an IO refcnt to
2752 * prevent the interface from being detached (will be
2755 if (!ifnet_is_attached(ifp
, 1)) {
2756 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2760 /* invoke the driver's start routine */
2761 ((*ifp
->if_start
)(ifp
));
2764 * Release the io ref count taken by ifnet_is_attached.
2766 ifnet_decr_iorefcnt(ifp
);
2768 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2770 /* if there's no pending request, we're done */
2771 if (req
== ifp
->if_start_req
)
2775 ifp
->if_start_req
= 0;
2776 ifp
->if_start_active
= 0;
2779 * Wakeup N ns from now if rate-controlled by TBR, and if
2780 * there are still packets in the send queue which haven't
2781 * been dequeued so far; else sleep indefinitely (ts = NULL)
2782 * until ifnet_start() is called again.
2784 ts
= ((IFCQ_TBR_IS_ENABLED(ifq
) && !IFCQ_IS_EMPTY(ifq
)) ?
2785 &ifp
->if_start_cycle
: NULL
);
2787 if (ts
== NULL
&& ifp
->if_start_delayed
== 1) {
2788 delay_start_ts
.tv_sec
= 0;
2789 delay_start_ts
.tv_nsec
= ifp
->if_start_delay_timeout
;
2790 ts
= &delay_start_ts
;
2793 if (ts
!= NULL
&& ts
->tv_sec
== 0 && ts
->tv_nsec
== 0)
2801 ifnet_set_start_cycle(struct ifnet
*ifp
, struct timespec
*ts
)
2804 bzero(&ifp
->if_start_cycle
, sizeof (ifp
->if_start_cycle
));
2806 *(&ifp
->if_start_cycle
) = *ts
;
2808 if (ts
!= NULL
&& ts
->tv_nsec
!= 0 && dlil_verbose
)
2809 printf("%s: restart interval set to %lu nsec\n",
2810 if_name(ifp
), ts
->tv_nsec
);
2814 ifnet_poll(struct ifnet
*ifp
)
2817 * If the poller thread is inactive, signal it to do work.
2819 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
2821 if (!ifp
->if_poll_active
&& ifp
->if_poll_thread
!= THREAD_NULL
) {
2822 wakeup_one((caddr_t
)&ifp
->if_poll_thread
);
2824 lck_mtx_unlock(&ifp
->if_poll_lock
);
2828 ifnet_poll_thread_fn(void *v
, wait_result_t w
)
2831 struct dlil_threading_info
*inp
;
2832 struct ifnet
*ifp
= v
;
2833 char ifname
[IFNAMSIZ
+ 1];
2834 struct timespec
*ts
= NULL
;
2835 struct ifnet_stat_increment_param s
;
2837 snprintf(ifname
, sizeof (ifname
), "%s_poller",
2839 bzero(&s
, sizeof (s
));
2841 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
2844 VERIFY(inp
!= NULL
);
2847 if (ifp
->if_poll_thread
!= THREAD_NULL
) {
2848 (void) msleep(&ifp
->if_poll_thread
, &ifp
->if_poll_lock
,
2849 (PZERO
- 1) | PSPIN
, ifname
, ts
);
2852 /* interface is detached (maybe while asleep)? */
2853 if (ifp
->if_poll_thread
== THREAD_NULL
) {
2854 ifnet_set_poll_cycle(ifp
, NULL
);
2855 lck_mtx_unlock(&ifp
->if_poll_lock
);
2858 printf("%s: poller thread terminated\n",
2862 /* for the extra refcnt from kernel_thread_start() */
2863 thread_deallocate(current_thread());
2864 /* this is the end */
2865 thread_terminate(current_thread());
2870 ifp
->if_poll_active
= 1;
2872 struct mbuf
*m_head
, *m_tail
;
2873 u_int32_t m_lim
, m_cnt
, m_totlen
;
2874 u_int16_t req
= ifp
->if_poll_req
;
2876 lck_mtx_unlock(&ifp
->if_poll_lock
);
2879 * If no longer attached, there's nothing to do;
2880 * else hold an IO refcnt to prevent the interface
2881 * from being detached (will be released below.)
2883 if (!ifnet_is_attached(ifp
, 1)) {
2884 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
2888 m_lim
= (inp
->rxpoll_plim
!= 0) ? inp
->rxpoll_plim
:
2889 MAX((qlimit(&inp
->rcvq_pkts
)),
2890 (inp
->rxpoll_phiwat
<< 2));
2892 if (dlil_verbose
> 1) {
2893 printf("%s: polling up to %d pkts, "
2894 "pkts avg %d max %d, wreq avg %d, "
2896 if_name(ifp
), m_lim
,
2897 inp
->rxpoll_pavg
, inp
->rxpoll_pmax
,
2898 inp
->rxpoll_wavg
, inp
->rxpoll_bavg
);
2901 /* invoke the driver's input poll routine */
2902 ((*ifp
->if_input_poll
)(ifp
, 0, m_lim
, &m_head
, &m_tail
,
2903 &m_cnt
, &m_totlen
));
2905 if (m_head
!= NULL
) {
2906 VERIFY(m_tail
!= NULL
&& m_cnt
> 0);
2908 if (dlil_verbose
> 1) {
2909 printf("%s: polled %d pkts, "
2910 "pkts avg %d max %d, wreq avg %d, "
2912 if_name(ifp
), m_cnt
,
2913 inp
->rxpoll_pavg
, inp
->rxpoll_pmax
,
2914 inp
->rxpoll_wavg
, inp
->rxpoll_bavg
);
2917 /* stats are required for extended variant */
2918 s
.packets_in
= m_cnt
;
2919 s
.bytes_in
= m_totlen
;
2921 (void) ifnet_input_common(ifp
, m_head
, m_tail
,
2924 if (dlil_verbose
> 1) {
2925 printf("%s: no packets, "
2926 "pkts avg %d max %d, wreq avg %d, "
2928 if_name(ifp
), inp
->rxpoll_pavg
,
2929 inp
->rxpoll_pmax
, inp
->rxpoll_wavg
,
2933 (void) ifnet_input_common(ifp
, NULL
, NULL
,
2937 /* Release the io ref count */
2938 ifnet_decr_iorefcnt(ifp
);
2940 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
2942 /* if there's no pending request, we're done */
2943 if (req
== ifp
->if_poll_req
)
2946 ifp
->if_poll_req
= 0;
2947 ifp
->if_poll_active
= 0;
2950 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
2951 * until ifnet_poll() is called again.
2953 ts
= &ifp
->if_poll_cycle
;
2954 if (ts
->tv_sec
== 0 && ts
->tv_nsec
== 0)
2962 ifnet_set_poll_cycle(struct ifnet
*ifp
, struct timespec
*ts
)
2965 bzero(&ifp
->if_poll_cycle
, sizeof (ifp
->if_poll_cycle
));
2967 *(&ifp
->if_poll_cycle
) = *ts
;
2969 if (ts
!= NULL
&& ts
->tv_nsec
!= 0 && dlil_verbose
)
2970 printf("%s: poll interval set to %lu nsec\n",
2971 if_name(ifp
), ts
->tv_nsec
);
2975 ifnet_purge(struct ifnet
*ifp
)
2977 if (ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_TXSTART
))
2982 ifnet_update_sndq(struct ifclassq
*ifq
, cqev_t ev
)
2984 IFCQ_LOCK_ASSERT_HELD(ifq
);
2986 if (!(IFCQ_IS_READY(ifq
)))
2989 if (IFCQ_TBR_IS_ENABLED(ifq
)) {
2990 struct tb_profile tb
= { ifq
->ifcq_tbr
.tbr_rate_raw
,
2991 ifq
->ifcq_tbr
.tbr_percent
, 0 };
2992 (void) ifclassq_tbr_set(ifq
, &tb
, FALSE
);
2995 ifclassq_update(ifq
, ev
);
2999 ifnet_update_rcv(struct ifnet
*ifp
, cqev_t ev
)
3002 case CLASSQ_EV_LINK_BANDWIDTH
:
3003 if (net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
))
3004 ifp
->if_poll_update
++;
3013 ifnet_set_output_sched_model(struct ifnet
*ifp
, u_int32_t model
)
3015 struct ifclassq
*ifq
;
3019 if (ifp
== NULL
|| model
>= IFNET_SCHED_MODEL_MAX
)
3021 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3026 omodel
= ifp
->if_output_sched_model
;
3027 ifp
->if_output_sched_model
= model
;
3028 if ((err
= ifclassq_pktsched_setup(ifq
)) != 0)
3029 ifp
->if_output_sched_model
= omodel
;
3036 ifnet_set_sndq_maxlen(struct ifnet
*ifp
, u_int32_t maxqlen
)
3040 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3043 ifclassq_set_maxlen(&ifp
->if_snd
, maxqlen
);
3049 ifnet_get_sndq_maxlen(struct ifnet
*ifp
, u_int32_t
*maxqlen
)
3051 if (ifp
== NULL
|| maxqlen
== NULL
)
3053 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3056 *maxqlen
= ifclassq_get_maxlen(&ifp
->if_snd
);
3062 ifnet_get_sndq_len(struct ifnet
*ifp
, u_int32_t
*pkts
)
3066 if (ifp
== NULL
|| pkts
== NULL
)
3068 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3071 err
= ifclassq_get_len(&ifp
->if_snd
, MBUF_SC_UNSPEC
,
3078 ifnet_get_service_class_sndq_len(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3079 u_int32_t
*pkts
, u_int32_t
*bytes
)
3083 if (ifp
== NULL
|| !MBUF_VALID_SC(sc
) ||
3084 (pkts
== NULL
&& bytes
== NULL
))
3086 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
3089 err
= ifclassq_get_len(&ifp
->if_snd
, sc
, pkts
, bytes
);
3095 ifnet_set_rcvq_maxlen(struct ifnet
*ifp
, u_int32_t maxqlen
)
3097 struct dlil_threading_info
*inp
;
3101 else if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || ifp
->if_inp
== NULL
)
3105 maxqlen
= if_rcvq_maxlen
;
3106 else if (maxqlen
< IF_RCVQ_MINLEN
)
3107 maxqlen
= IF_RCVQ_MINLEN
;
3110 lck_mtx_lock(&inp
->input_lck
);
3111 qlimit(&inp
->rcvq_pkts
) = maxqlen
;
3112 lck_mtx_unlock(&inp
->input_lck
);
3118 ifnet_get_rcvq_maxlen(struct ifnet
*ifp
, u_int32_t
*maxqlen
)
3120 struct dlil_threading_info
*inp
;
3122 if (ifp
== NULL
|| maxqlen
== NULL
)
3124 else if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || ifp
->if_inp
== NULL
)
3128 lck_mtx_lock(&inp
->input_lck
);
3129 *maxqlen
= qlimit(&inp
->rcvq_pkts
);
3130 lck_mtx_unlock(&inp
->input_lck
);
3135 ifnet_enqueue(struct ifnet
*ifp
, struct mbuf
*m
)
3138 struct timespec now
;
3141 if (ifp
== NULL
|| m
== NULL
|| !(m
->m_flags
& M_PKTHDR
) ||
3142 m
->m_nextpkt
!= NULL
) {
3146 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3147 !(ifp
->if_refflags
& IFRF_ATTACHED
)) {
3148 /* flag tested without lock for performance */
3151 } else if (!(ifp
->if_flags
& IFF_UP
)) {
3157 net_timernsec(&now
, &now_nsec
);
3158 m
->m_pkthdr
.pkt_timestamp
= now_nsec
;
3159 m
->m_pkthdr
.pkt_flags
&= ~PKTF_DRV_TS_VALID
;
3161 if (ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) {
3163 * If the driver chose to delay start callback for
3164 * coalescing multiple packets, Then use the following
3165 * heuristics to make sure that start callback will
3166 * be delayed only when bulk data transfer is detected.
3167 * 1. number of packets enqueued in (delay_win * 2) is
3168 * greater than or equal to the delay qlen.
3169 * 2. If delay_start is enabled it will stay enabled for
3170 * another 10 idle windows. This is to take into account
3171 * variable RTT and burst traffic.
3172 * 3. If the time elapsed since last enqueue is more
3173 * than 200ms we disable delaying start callback. This is
3174 * is to take idle time into account.
3176 u_int64_t dwin
= (ifp
->if_start_delay_timeout
<< 1);
3177 if (ifp
->if_start_delay_swin
> 0) {
3178 if ((ifp
->if_start_delay_swin
+ dwin
) > now_nsec
) {
3179 ifp
->if_start_delay_cnt
++;
3180 } else if ((now_nsec
- ifp
->if_start_delay_swin
)
3181 >= (200 * 1000 * 1000)) {
3182 ifp
->if_start_delay_swin
= now_nsec
;
3183 ifp
->if_start_delay_cnt
= 1;
3184 ifp
->if_start_delay_idle
= 0;
3185 if (ifp
->if_eflags
& IFEF_DELAY_START
) {
3187 ~(IFEF_DELAY_START
);
3188 ifnet_delay_start_disabled
++;
3191 if (ifp
->if_start_delay_cnt
>=
3192 ifp
->if_start_delay_qlen
) {
3193 ifp
->if_eflags
|= IFEF_DELAY_START
;
3194 ifp
->if_start_delay_idle
= 0;
3196 if (ifp
->if_start_delay_idle
>= 10) {
3197 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3198 ifnet_delay_start_disabled
++;
3200 ifp
->if_start_delay_idle
++;
3203 ifp
->if_start_delay_swin
= now_nsec
;
3204 ifp
->if_start_delay_cnt
= 1;
3207 ifp
->if_start_delay_swin
= now_nsec
;
3208 ifp
->if_start_delay_cnt
= 1;
3209 ifp
->if_start_delay_idle
= 0;
3210 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3213 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3216 /* enqueue the packet */
3217 error
= ifclassq_enqueue(&ifp
->if_snd
, m
);
3220 * Tell the driver to start dequeueing; do this even when the queue
3221 * for the packet is suspended (EQSUSPENDED), as the driver could still
3222 * be dequeueing from other unsuspended queues.
3224 if (!(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) &&
3225 (error
== 0 || error
== EQFULL
|| error
== EQSUSPENDED
))
3232 ifnet_dequeue(struct ifnet
*ifp
, struct mbuf
**mp
)
3235 if (ifp
== NULL
|| mp
== NULL
)
3237 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3238 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3240 if (!ifnet_is_attached(ifp
, 1))
3242 rc
= ifclassq_dequeue(&ifp
->if_snd
, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
,
3243 mp
, NULL
, NULL
, NULL
);
3244 ifnet_decr_iorefcnt(ifp
);
3250 ifnet_dequeue_service_class(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3254 if (ifp
== NULL
|| mp
== NULL
|| !MBUF_VALID_SC(sc
))
3256 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3257 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3259 if (!ifnet_is_attached(ifp
, 1))
3262 rc
= ifclassq_dequeue_sc(&ifp
->if_snd
, sc
, 1, mp
, NULL
, NULL
, NULL
);
3263 ifnet_decr_iorefcnt(ifp
);
3268 ifnet_dequeue_multi(struct ifnet
*ifp
, u_int32_t pkt_limit
,
3269 struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
, u_int32_t
*len
)
3272 if (ifp
== NULL
|| head
== NULL
|| pkt_limit
< 1)
3274 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3275 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3277 if (!ifnet_is_attached(ifp
, 1))
3280 rc
= ifclassq_dequeue(&ifp
->if_snd
, pkt_limit
,
3281 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, head
, tail
, cnt
, len
);
3282 ifnet_decr_iorefcnt(ifp
);
3287 ifnet_dequeue_multi_bytes(struct ifnet
*ifp
, u_int32_t byte_limit
,
3288 struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
, u_int32_t
*len
)
3291 if (ifp
== NULL
|| head
== NULL
|| byte_limit
< 1)
3293 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3294 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3296 if (!ifnet_is_attached(ifp
, 1))
3299 rc
= ifclassq_dequeue(&ifp
->if_snd
, CLASSQ_DEQUEUE_MAX_PKT_LIMIT
,
3300 byte_limit
, head
, tail
, cnt
, len
);
3301 ifnet_decr_iorefcnt(ifp
);
3306 ifnet_dequeue_service_class_multi(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3307 u_int32_t pkt_limit
, struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
,
3311 if (ifp
== NULL
|| head
== NULL
|| pkt_limit
< 1 ||
3314 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3315 ifp
->if_output_sched_model
>= IFNET_SCHED_MODEL_MAX
)
3317 if (!ifnet_is_attached(ifp
, 1))
3319 rc
= ifclassq_dequeue_sc(&ifp
->if_snd
, sc
, pkt_limit
, head
,
3321 ifnet_decr_iorefcnt(ifp
);
3326 ifnet_framer_stub(struct ifnet
*ifp
, struct mbuf
**m
,
3327 const struct sockaddr
*dest
, const char *dest_linkaddr
,
3328 const char *frame_type
, u_int32_t
*pre
, u_int32_t
*post
)
3335 return (ifp
->if_framer_legacy(ifp
, m
, dest
, dest_linkaddr
, frame_type
));
3339 dlil_interface_filters_input(struct ifnet
*ifp
, struct mbuf
**m_p
,
3340 char **frame_header_p
, protocol_family_t protocol_family
)
3342 struct ifnet_filter
*filter
;
3345 * Pass the inbound packet to the interface filters
3347 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3348 /* prevent filter list from changing in case we drop the lock */
3349 if_flt_monitor_busy(ifp
);
3350 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
3353 if (!filter
->filt_skip
&& filter
->filt_input
!= NULL
&&
3354 (filter
->filt_protocol
== 0 ||
3355 filter
->filt_protocol
== protocol_family
)) {
3356 lck_mtx_unlock(&ifp
->if_flt_lock
);
3358 result
= (*filter
->filt_input
)(filter
->filt_cookie
,
3359 ifp
, protocol_family
, m_p
, frame_header_p
);
3361 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3363 /* we're done with the filter list */
3364 if_flt_monitor_unbusy(ifp
);
3365 lck_mtx_unlock(&ifp
->if_flt_lock
);
3370 /* we're done with the filter list */
3371 if_flt_monitor_unbusy(ifp
);
3372 lck_mtx_unlock(&ifp
->if_flt_lock
);
3375 * Strip away M_PROTO1 bit prior to sending packet up the stack as
3376 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
3379 (*m_p
)->m_flags
&= ~M_PROTO1
;
3385 dlil_interface_filters_output(struct ifnet
*ifp
, struct mbuf
**m_p
,
3386 protocol_family_t protocol_family
)
3388 struct ifnet_filter
*filter
;
3391 * Pass the outbound packet to the interface filters
3393 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3394 /* prevent filter list from changing in case we drop the lock */
3395 if_flt_monitor_busy(ifp
);
3396 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
3399 if (!filter
->filt_skip
&& filter
->filt_output
!= NULL
&&
3400 (filter
->filt_protocol
== 0 ||
3401 filter
->filt_protocol
== protocol_family
)) {
3402 lck_mtx_unlock(&ifp
->if_flt_lock
);
3404 result
= filter
->filt_output(filter
->filt_cookie
, ifp
,
3405 protocol_family
, m_p
);
3407 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3409 /* we're done with the filter list */
3410 if_flt_monitor_unbusy(ifp
);
3411 lck_mtx_unlock(&ifp
->if_flt_lock
);
3416 /* we're done with the filter list */
3417 if_flt_monitor_unbusy(ifp
);
3418 lck_mtx_unlock(&ifp
->if_flt_lock
);
3424 dlil_ifproto_input(struct if_proto
* ifproto
, mbuf_t m
)
3428 if (ifproto
->proto_kpi
== kProtoKPI_v1
) {
3429 /* Version 1 protocols get one packet at a time */
3431 char * frame_header
;
3434 next_packet
= m
->m_nextpkt
;
3435 m
->m_nextpkt
= NULL
;
3436 frame_header
= m
->m_pkthdr
.pkt_hdr
;
3437 m
->m_pkthdr
.pkt_hdr
= NULL
;
3438 error
= (*ifproto
->kpi
.v1
.input
)(ifproto
->ifp
,
3439 ifproto
->protocol_family
, m
, frame_header
);
3440 if (error
!= 0 && error
!= EJUSTRETURN
)
3444 } else if (ifproto
->proto_kpi
== kProtoKPI_v2
) {
3445 /* Version 2 protocols support packet lists */
3446 error
= (*ifproto
->kpi
.v2
.input
)(ifproto
->ifp
,
3447 ifproto
->protocol_family
, m
);
3448 if (error
!= 0 && error
!= EJUSTRETURN
)
3454 dlil_input_stats_add(const struct ifnet_stat_increment_param
*s
,
3455 struct dlil_threading_info
*inp
, boolean_t poll
)
3457 struct ifnet_stat_increment_param
*d
= &inp
->stats
;
3459 if (s
->packets_in
!= 0)
3460 d
->packets_in
+= s
->packets_in
;
3461 if (s
->bytes_in
!= 0)
3462 d
->bytes_in
+= s
->bytes_in
;
3463 if (s
->errors_in
!= 0)
3464 d
->errors_in
+= s
->errors_in
;
3466 if (s
->packets_out
!= 0)
3467 d
->packets_out
+= s
->packets_out
;
3468 if (s
->bytes_out
!= 0)
3469 d
->bytes_out
+= s
->bytes_out
;
3470 if (s
->errors_out
!= 0)
3471 d
->errors_out
+= s
->errors_out
;
3473 if (s
->collisions
!= 0)
3474 d
->collisions
+= s
->collisions
;
3475 if (s
->dropped
!= 0)
3476 d
->dropped
+= s
->dropped
;
3479 PKTCNTR_ADD(&inp
->tstats
, s
->packets_in
, s
->bytes_in
);
3483 dlil_input_stats_sync(struct ifnet
*ifp
, struct dlil_threading_info
*inp
)
3485 struct ifnet_stat_increment_param
*s
= &inp
->stats
;
3488 * Use of atomic operations is unavoidable here because
3489 * these stats may also be incremented elsewhere via KPIs.
3491 if (s
->packets_in
!= 0) {
3492 atomic_add_64(&ifp
->if_data
.ifi_ipackets
, s
->packets_in
);
3495 if (s
->bytes_in
!= 0) {
3496 atomic_add_64(&ifp
->if_data
.ifi_ibytes
, s
->bytes_in
);
3499 if (s
->errors_in
!= 0) {
3500 atomic_add_64(&ifp
->if_data
.ifi_ierrors
, s
->errors_in
);
3504 if (s
->packets_out
!= 0) {
3505 atomic_add_64(&ifp
->if_data
.ifi_opackets
, s
->packets_out
);
3508 if (s
->bytes_out
!= 0) {
3509 atomic_add_64(&ifp
->if_data
.ifi_obytes
, s
->bytes_out
);
3512 if (s
->errors_out
!= 0) {
3513 atomic_add_64(&ifp
->if_data
.ifi_oerrors
, s
->errors_out
);
3517 if (s
->collisions
!= 0) {
3518 atomic_add_64(&ifp
->if_data
.ifi_collisions
, s
->collisions
);
3521 if (s
->dropped
!= 0) {
3522 atomic_add_64(&ifp
->if_data
.ifi_iqdrops
, s
->dropped
);
3526 * If we went over the threshold, notify NetworkStatistics.
3528 if (ifp
->if_data_threshold
&&
3529 (ifp
->if_ibytes
+ ifp
->if_obytes
) - ifp
->if_dt_bytes
>
3530 ifp
->if_data_threshold
) {
3531 ifp
->if_dt_bytes
= ifp
->if_ibytes
+ ifp
->if_obytes
;
3533 lck_mtx_convert_spin(&inp
->input_lck
);
3534 nstat_ifnet_threshold_reached(ifp
->if_index
);
3537 * No need for atomic operations as they are modified here
3538 * only from within the DLIL input thread context.
3540 if (inp
->tstats
.packets
!= 0) {
3541 inp
->pstats
.ifi_poll_packets
+= inp
->tstats
.packets
;
3542 inp
->tstats
.packets
= 0;
3544 if (inp
->tstats
.bytes
!= 0) {
3545 inp
->pstats
.ifi_poll_bytes
+= inp
->tstats
.bytes
;
3546 inp
->tstats
.bytes
= 0;
3550 __private_extern__
void
3551 dlil_input_packet_list(struct ifnet
*ifp
, struct mbuf
*m
)
3553 return (dlil_input_packet_list_common(ifp
, m
, 0,
3554 IFNET_MODEL_INPUT_POLL_OFF
, FALSE
));
3557 __private_extern__
void
3558 dlil_input_packet_list_extended(struct ifnet
*ifp
, struct mbuf
*m
,
3559 u_int32_t cnt
, ifnet_model_t mode
)
3561 return (dlil_input_packet_list_common(ifp
, m
, cnt
, mode
, TRUE
));
3565 dlil_input_packet_list_common(struct ifnet
*ifp_param
, struct mbuf
*m
,
3566 u_int32_t cnt
, ifnet_model_t mode
, boolean_t ext
)
3569 protocol_family_t protocol_family
;
3571 ifnet_t ifp
= ifp_param
;
3572 char * frame_header
;
3573 struct if_proto
* last_ifproto
= NULL
;
3574 mbuf_t pkt_first
= NULL
;
3575 mbuf_t
* pkt_next
= NULL
;
3576 u_int32_t poll_thresh
= 0, poll_ival
= 0;
3578 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
3580 if (ext
&& mode
== IFNET_MODEL_INPUT_POLL_ON
&& cnt
> 1 &&
3581 (poll_ival
= if_rxpoll_interval_pkts
) > 0)
3585 struct if_proto
*ifproto
= NULL
;
3587 uint32_t pktf_mask
; /* pkt flags to preserve */
3589 if (ifp_param
== NULL
)
3590 ifp
= m
->m_pkthdr
.rcvif
;
3592 if ((ifp
->if_eflags
& IFEF_RXPOLL
) && poll_thresh
!= 0 &&
3593 poll_ival
> 0 && (--poll_thresh
% poll_ival
) == 0)
3596 /* Check if this mbuf looks valid */
3597 MBUF_INPUT_CHECK(m
, ifp
);
3599 next_packet
= m
->m_nextpkt
;
3600 m
->m_nextpkt
= NULL
;
3601 frame_header
= m
->m_pkthdr
.pkt_hdr
;
3602 m
->m_pkthdr
.pkt_hdr
= NULL
;
3605 * Get an IO reference count if the interface is not
3606 * loopback (lo0) and it is attached; lo0 never goes
3607 * away, so optimize for that.
3609 if (ifp
!= lo_ifp
) {
3610 if (!ifnet_is_attached(ifp
, 1)) {
3618 * If this arrived on lo0, preserve interface addr
3619 * info to allow for connectivity between loopback
3620 * and local interface addresses.
3622 pktf_mask
= (PKTF_LOOP
|PKTF_IFAINFO
);
3625 /* make sure packet comes in clean */
3626 m_classifier_init(m
, pktf_mask
);
3628 ifp_inc_traffic_class_in(ifp
, m
);
3630 /* find which protocol family this packet is for */
3631 ifnet_lock_shared(ifp
);
3632 error
= (*ifp
->if_demux
)(ifp
, m
, frame_header
,
3634 ifnet_lock_done(ifp
);
3636 if (error
== EJUSTRETURN
)
3638 protocol_family
= 0;
3641 if (hwcksum_dbg
!= 0 && !(ifp
->if_flags
& IFF_LOOPBACK
) &&
3642 !(m
->m_pkthdr
.pkt_flags
& PKTF_LOOP
))
3643 dlil_input_cksum_dbg(ifp
, m
, frame_header
,
3647 * For partial checksum offload, we expect the driver to
3648 * set the start offset indicating the start of the span
3649 * that is covered by the hardware-computed checksum;
3650 * adjust this start offset accordingly because the data
3651 * pointer has been advanced beyond the link-layer header.
3653 * Don't adjust if the interface is a bridge member, as
3654 * the adjustment will occur from the context of the
3655 * bridge interface during input.
3657 if (ifp
->if_bridge
== NULL
&& (m
->m_pkthdr
.csum_flags
&
3658 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
3659 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
3662 if (frame_header
== NULL
||
3663 frame_header
< (char *)mbuf_datastart(m
) ||
3664 frame_header
> (char *)m
->m_data
||
3665 (adj
= (m
->m_data
- frame_header
)) >
3666 m
->m_pkthdr
.csum_rx_start
) {
3667 m
->m_pkthdr
.csum_data
= 0;
3668 m
->m_pkthdr
.csum_flags
&= ~CSUM_DATA_VALID
;
3669 hwcksum_in_invalidated
++;
3671 m
->m_pkthdr
.csum_rx_start
-= adj
;
3675 pktap_input(ifp
, protocol_family
, m
, frame_header
);
3677 if (m
->m_flags
& (M_BCAST
|M_MCAST
))
3678 atomic_add_64(&ifp
->if_imcasts
, 1);
3680 /* run interface filters, exclude VLAN packets PR-3586856 */
3681 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) == 0) {
3682 error
= dlil_interface_filters_input(ifp
, &m
,
3683 &frame_header
, protocol_family
);
3685 if (error
!= EJUSTRETURN
)
3690 if (error
!= 0 || ((m
->m_flags
& M_PROMISC
) != 0)) {
3695 /* Lookup the protocol attachment to this interface */
3696 if (protocol_family
== 0) {
3698 } else if (last_ifproto
!= NULL
&& last_ifproto
->ifp
== ifp
&&
3699 (last_ifproto
->protocol_family
== protocol_family
)) {
3700 VERIFY(ifproto
== NULL
);
3701 ifproto
= last_ifproto
;
3702 if_proto_ref(last_ifproto
);
3704 VERIFY(ifproto
== NULL
);
3705 ifnet_lock_shared(ifp
);
3706 /* callee holds a proto refcnt upon success */
3707 ifproto
= find_attached_proto(ifp
, protocol_family
);
3708 ifnet_lock_done(ifp
);
3710 if (ifproto
== NULL
) {
3711 /* no protocol for this packet, discard */
3715 if (ifproto
!= last_ifproto
) {
3716 if (last_ifproto
!= NULL
) {
3717 /* pass up the list for the previous protocol */
3718 dlil_ifproto_input(last_ifproto
, pkt_first
);
3720 if_proto_free(last_ifproto
);
3722 last_ifproto
= ifproto
;
3723 if_proto_ref(ifproto
);
3725 /* extend the list */
3726 m
->m_pkthdr
.pkt_hdr
= frame_header
;
3727 if (pkt_first
== NULL
) {
3732 pkt_next
= &m
->m_nextpkt
;
3735 if (next_packet
== NULL
&& last_ifproto
!= NULL
) {
3736 /* pass up the last list of packets */
3737 dlil_ifproto_input(last_ifproto
, pkt_first
);
3738 if_proto_free(last_ifproto
);
3739 last_ifproto
= NULL
;
3741 if (ifproto
!= NULL
) {
3742 if_proto_free(ifproto
);
3748 /* update the driver's multicast filter, if needed */
3749 if (ifp
->if_updatemcasts
> 0 && if_mcasts_update(ifp
) == 0)
3750 ifp
->if_updatemcasts
= 0;
3752 ifnet_decr_iorefcnt(ifp
);
3755 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
3759 if_mcasts_update(struct ifnet
*ifp
)
3763 err
= ifnet_ioctl(ifp
, 0, SIOCADDMULTI
, NULL
);
3764 if (err
== EAFNOSUPPORT
)
3766 printf("%s: %s %d suspended link-layer multicast membership(s) "
3767 "(err=%d)\n", if_name(ifp
),
3768 (err
== 0 ? "successfully restored" : "failed to restore"),
3769 ifp
->if_updatemcasts
, err
);
3771 /* just return success */
3775 /* If ifp is set, we will increment the generation for the interface */
3777 dlil_post_complete_msg(struct ifnet
*ifp
, struct kev_msg
*event
)
3780 ifnet_increment_generation(ifp
);
3784 necp_update_all_clients();
3787 return (kev_post_msg(event
));
3790 #define TMP_IF_PROTO_ARR_SIZE 10
3792 dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*event
, bool update_generation
)
3794 struct ifnet_filter
*filter
= NULL
;
3795 struct if_proto
*proto
= NULL
;
3796 int if_proto_count
= 0;
3797 struct if_proto
**tmp_ifproto_arr
= NULL
;
3798 struct if_proto
*tmp_ifproto_stack_arr
[TMP_IF_PROTO_ARR_SIZE
] = {NULL
};
3799 int tmp_ifproto_arr_idx
= 0;
3800 bool tmp_malloc
= false;
3803 * Pass the event to the interface filters
3805 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3806 /* prevent filter list from changing in case we drop the lock */
3807 if_flt_monitor_busy(ifp
);
3808 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
3809 if (filter
->filt_event
!= NULL
) {
3810 lck_mtx_unlock(&ifp
->if_flt_lock
);
3812 filter
->filt_event(filter
->filt_cookie
, ifp
,
3813 filter
->filt_protocol
, event
);
3815 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3818 /* we're done with the filter list */
3819 if_flt_monitor_unbusy(ifp
);
3820 lck_mtx_unlock(&ifp
->if_flt_lock
);
3822 /* Get an io ref count if the interface is attached */
3823 if (!ifnet_is_attached(ifp
, 1))
3827 * An embedded tmp_list_entry in if_proto may still get
3828 * over-written by another thread after giving up ifnet lock,
3829 * therefore we are avoiding embedded pointers here.
3831 ifnet_lock_shared(ifp
);
3832 if_proto_count
= dlil_ifp_proto_count(ifp
);
3833 if (if_proto_count
) {
3835 VERIFY(ifp
->if_proto_hash
!= NULL
);
3836 if (if_proto_count
<= TMP_IF_PROTO_ARR_SIZE
) {
3837 tmp_ifproto_arr
= tmp_ifproto_stack_arr
;
3839 MALLOC(tmp_ifproto_arr
, struct if_proto
**,
3840 sizeof (*tmp_ifproto_arr
) * if_proto_count
,
3842 if (tmp_ifproto_arr
== NULL
) {
3843 ifnet_lock_done(ifp
);
3849 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
3850 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
],
3852 if_proto_ref(proto
);
3853 tmp_ifproto_arr
[tmp_ifproto_arr_idx
] = proto
;
3854 tmp_ifproto_arr_idx
++;
3857 VERIFY(if_proto_count
== tmp_ifproto_arr_idx
);
3859 ifnet_lock_done(ifp
);
3861 for (tmp_ifproto_arr_idx
= 0; tmp_ifproto_arr_idx
< if_proto_count
;
3862 tmp_ifproto_arr_idx
++) {
3863 proto
= tmp_ifproto_arr
[tmp_ifproto_arr_idx
];
3864 VERIFY(proto
!= NULL
);
3865 proto_media_event eventp
=
3866 (proto
->proto_kpi
== kProtoKPI_v1
?
3867 proto
->kpi
.v1
.event
:
3868 proto
->kpi
.v2
.event
);
3870 if (eventp
!= NULL
) {
3871 eventp(ifp
, proto
->protocol_family
,
3874 if_proto_free(proto
);
3879 FREE(tmp_ifproto_arr
, M_TEMP
);
3882 /* Pass the event to the interface */
3883 if (ifp
->if_event
!= NULL
)
3884 ifp
->if_event(ifp
, event
);
3886 /* Release the io ref count */
3887 ifnet_decr_iorefcnt(ifp
);
3889 return (dlil_post_complete_msg(update_generation
? ifp
: NULL
, event
));
3893 ifnet_event(ifnet_t ifp
, struct kern_event_msg
*event
)
3895 struct kev_msg kev_msg
;
3898 if (ifp
== NULL
|| event
== NULL
)
3901 bzero(&kev_msg
, sizeof (kev_msg
));
3902 kev_msg
.vendor_code
= event
->vendor_code
;
3903 kev_msg
.kev_class
= event
->kev_class
;
3904 kev_msg
.kev_subclass
= event
->kev_subclass
;
3905 kev_msg
.event_code
= event
->event_code
;
3906 kev_msg
.dv
[0].data_ptr
= &event
->event_data
[0];
3907 kev_msg
.dv
[0].data_length
= event
->total_size
- KEV_MSG_HEADER_SIZE
;
3908 kev_msg
.dv
[1].data_length
= 0;
3910 result
= dlil_event_internal(ifp
, &kev_msg
, TRUE
);
3916 #include <netinet/ip6.h>
3917 #include <netinet/ip.h>
3919 dlil_get_socket_type(struct mbuf
**mp
, int family
, int raw
)
3923 struct ip6_hdr
*ip6
;
3924 int type
= SOCK_RAW
;
3929 m
= m_pullup(*mp
, sizeof(struct ip
));
3933 ip
= mtod(m
, struct ip
*);
3934 if (ip
->ip_p
== IPPROTO_TCP
)
3936 else if (ip
->ip_p
== IPPROTO_UDP
)
3940 m
= m_pullup(*mp
, sizeof(struct ip6_hdr
));
3944 ip6
= mtod(m
, struct ip6_hdr
*);
3945 if (ip6
->ip6_nxt
== IPPROTO_TCP
)
3947 else if (ip6
->ip6_nxt
== IPPROTO_UDP
)
3958 * This is mostly called from the context of the DLIL input thread;
3959 * because of that there is no need for atomic operations.
3961 static __inline
void
3962 ifp_inc_traffic_class_in(struct ifnet
*ifp
, struct mbuf
*m
)
3964 if (!(m
->m_flags
& M_PKTHDR
))
3967 switch (m_get_traffic_class(m
)) {
3969 ifp
->if_tc
.ifi_ibepackets
++;
3970 ifp
->if_tc
.ifi_ibebytes
+= m
->m_pkthdr
.len
;
3973 ifp
->if_tc
.ifi_ibkpackets
++;
3974 ifp
->if_tc
.ifi_ibkbytes
+= m
->m_pkthdr
.len
;
3977 ifp
->if_tc
.ifi_ivipackets
++;
3978 ifp
->if_tc
.ifi_ivibytes
+= m
->m_pkthdr
.len
;
3981 ifp
->if_tc
.ifi_ivopackets
++;
3982 ifp
->if_tc
.ifi_ivobytes
+= m
->m_pkthdr
.len
;
3988 if (mbuf_is_traffic_class_privileged(m
)) {
3989 ifp
->if_tc
.ifi_ipvpackets
++;
3990 ifp
->if_tc
.ifi_ipvbytes
+= m
->m_pkthdr
.len
;
3995 * This is called from DLIL output, hence multiple threads could end
3996 * up modifying the statistics. We trade off acccuracy for performance
3997 * by not using atomic operations here.
3999 static __inline
void
4000 ifp_inc_traffic_class_out(struct ifnet
*ifp
, struct mbuf
*m
)
4002 if (!(m
->m_flags
& M_PKTHDR
))
4005 switch (m_get_traffic_class(m
)) {
4007 ifp
->if_tc
.ifi_obepackets
++;
4008 ifp
->if_tc
.ifi_obebytes
+= m
->m_pkthdr
.len
;
4011 ifp
->if_tc
.ifi_obkpackets
++;
4012 ifp
->if_tc
.ifi_obkbytes
+= m
->m_pkthdr
.len
;
4015 ifp
->if_tc
.ifi_ovipackets
++;
4016 ifp
->if_tc
.ifi_ovibytes
+= m
->m_pkthdr
.len
;
4019 ifp
->if_tc
.ifi_ovopackets
++;
4020 ifp
->if_tc
.ifi_ovobytes
+= m
->m_pkthdr
.len
;
4026 if (mbuf_is_traffic_class_privileged(m
)) {
4027 ifp
->if_tc
.ifi_opvpackets
++;
4028 ifp
->if_tc
.ifi_opvbytes
+= m
->m_pkthdr
.len
;
4033 dlil_count_chain_len(mbuf_t m
, struct chain_len_stats
*cls
)
4046 atomic_add_64(&cls
->cls_one
, 1);
4049 atomic_add_64(&cls
->cls_two
, 1);
4052 atomic_add_64(&cls
->cls_three
, 1);
4055 atomic_add_64(&cls
->cls_four
, 1);
4059 atomic_add_64(&cls
->cls_five_or_more
, 1);
4067 * Caller should have a lock on the protocol domain if the protocol
4068 * doesn't support finer grained locking. In most cases, the lock
4069 * will be held from the socket layer and won't be released until
4070 * we return back to the socket layer.
4072 * This does mean that we must take a protocol lock before we take
4073 * an interface lock if we're going to take both. This makes sense
4074 * because a protocol is likely to interact with an ifp while it
4075 * is under the protocol lock.
4077 * An advisory code will be returned if adv is not null. This
4078 * can be used to provide feedback about interface queues to the
4082 dlil_output(ifnet_t ifp
, protocol_family_t proto_family
, mbuf_t packetlist
,
4083 void *route
, const struct sockaddr
*dest
, int raw
, struct flowadv
*adv
)
4085 ifnet_output_handler_func handler_func
;
4086 char *frame_type
= NULL
;
4087 char *dst_linkaddr
= NULL
;
4089 char frame_type_buffer
[MAX_FRAME_TYPE_SIZE
* 4];
4090 char dst_linkaddr_buffer
[MAX_LINKADDR
* 4];
4091 struct if_proto
*proto
= NULL
;
4093 mbuf_t send_head
= NULL
;
4094 mbuf_t
*send_tail
= &send_head
;
4096 u_int32_t pre
= 0, post
= 0;
4097 u_int32_t fpkts
= 0, fbytes
= 0;
4100 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
4103 * Get an io refcnt if the interface is attached to prevent ifnet_detach
4104 * from happening while this operation is in progress
4106 if (!ifnet_is_attached(ifp
, 1)) {
4112 handler_func
= ifp
->if_output_handler
;
4113 VERIFY(handler_func
!= NULL
);
4115 /* update the driver's multicast filter, if needed */
4116 if (ifp
->if_updatemcasts
> 0 && if_mcasts_update(ifp
) == 0)
4117 ifp
->if_updatemcasts
= 0;
4119 frame_type
= frame_type_buffer
;
4120 dst_linkaddr
= dst_linkaddr_buffer
;
4123 ifnet_lock_shared(ifp
);
4124 /* callee holds a proto refcnt upon success */
4125 proto
= find_attached_proto(ifp
, proto_family
);
4126 if (proto
== NULL
) {
4127 ifnet_lock_done(ifp
);
4131 ifnet_lock_done(ifp
);
4135 if (packetlist
== NULL
)
4139 packetlist
= packetlist
->m_nextpkt
;
4140 m
->m_nextpkt
= NULL
;
4143 proto_media_preout preoutp
= (proto
->proto_kpi
== kProtoKPI_v1
?
4144 proto
->kpi
.v1
.pre_output
: proto
->kpi
.v2
.pre_output
);
4146 if (preoutp
!= NULL
) {
4147 retval
= preoutp(ifp
, proto_family
, &m
, dest
, route
,
4148 frame_type
, dst_linkaddr
);
4151 if (retval
== EJUSTRETURN
)
4160 retval
= mac_ifnet_check_transmit(ifp
, m
, proto_family
,
4161 dlil_get_socket_type(&m
, proto_family
, raw
));
4170 if (!raw
&& proto_family
== PF_INET
) {
4171 struct ip
*ip
= mtod(m
, struct ip
*);
4172 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
4173 struct ip
*, ip
, struct ifnet
*, ifp
,
4174 struct ip
*, ip
, struct ip6_hdr
*, NULL
);
4176 } else if (!raw
&& proto_family
== PF_INET6
) {
4177 struct ip6_hdr
*ip6
= mtod(m
, struct ip6_hdr
*);
4178 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
4179 struct ip6_hdr
*, ip6
, struct ifnet
*, ifp
,
4180 struct ip
*, NULL
, struct ip6_hdr
*, ip6
);
4182 #endif /* CONFIG_DTRACE */
4184 if (raw
== 0 && ifp
->if_framer
!= NULL
) {
4188 * If this is a broadcast packet that needs to be
4189 * looped back into the system, set the inbound ifp
4190 * to that of the outbound ifp. This will allow
4191 * us to determine that it is a legitimate packet
4192 * for the system. Only set the ifp if it's not
4193 * already set, just to be safe.
4195 if ((m
->m_flags
& (M_BCAST
| M_LOOP
)) &&
4196 m
->m_pkthdr
.rcvif
== NULL
) {
4197 m
->m_pkthdr
.rcvif
= ifp
;
4201 retval
= ifp
->if_framer(ifp
, &m
, dest
, dst_linkaddr
,
4202 frame_type
, &pre
, &post
);
4204 if (retval
!= EJUSTRETURN
)
4210 * For partial checksum offload, adjust the start
4211 * and stuff offsets based on the prepended header.
4213 if ((m
->m_pkthdr
.csum_flags
&
4214 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
4215 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
4216 m
->m_pkthdr
.csum_tx_stuff
+= pre
;
4217 m
->m_pkthdr
.csum_tx_start
+= pre
;
4220 if (hwcksum_dbg
!= 0 && !(ifp
->if_flags
& IFF_LOOPBACK
))
4221 dlil_output_cksum_dbg(ifp
, m
, pre
,
4225 * Clear the ifp if it was set above, and to be
4226 * safe, only if it is still the same as the
4227 * outbound ifp we have in context. If it was
4228 * looped back, then a copy of it was sent to the
4229 * loopback interface with the rcvif set, and we
4230 * are clearing the one that will go down to the
4233 if (rcvif_set
&& m
->m_pkthdr
.rcvif
== ifp
)
4234 m
->m_pkthdr
.rcvif
= NULL
;
4238 * Let interface filters (if any) do their thing ...
4240 /* Do not pass VLAN tagged packets to filters PR-3586856 */
4241 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) == 0) {
4242 retval
= dlil_interface_filters_output(ifp
,
4245 if (retval
!= EJUSTRETURN
)
4251 * Strip away M_PROTO1 bit prior to sending packet
4252 * to the driver as this field may be used by the driver
4254 m
->m_flags
&= ~M_PROTO1
;
4257 * If the underlying interface is not capable of handling a
4258 * packet whose data portion spans across physically disjoint
4259 * pages, we need to "normalize" the packet so that we pass
4260 * down a chain of mbufs where each mbuf points to a span that
4261 * resides in the system page boundary. If the packet does
4262 * not cross page(s), the following is a no-op.
4264 if (!(ifp
->if_hwassist
& IFNET_MULTIPAGES
)) {
4265 if ((m
= m_normalize(m
)) == NULL
)
4270 * If this is a TSO packet, make sure the interface still
4271 * advertise TSO capability.
4273 if (TSO_IPV4_NOTOK(ifp
, m
) || TSO_IPV6_NOTOK(ifp
, m
)) {
4280 * If the packet service class is not background,
4281 * update the timestamp to indicate recent activity
4282 * on a foreground socket.
4284 if ((m
->m_pkthdr
.pkt_flags
& PKTF_FLOW_ID
) &&
4285 m
->m_pkthdr
.pkt_flowsrc
== FLOWSRC_INPCB
) {
4286 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_SO_BACKGROUND
))
4287 ifp
->if_fg_sendts
= net_uptime();
4289 if (m
->m_pkthdr
.pkt_flags
& PKTF_SO_REALTIME
)
4290 ifp
->if_rt_sendts
= net_uptime();
4293 ifp_inc_traffic_class_out(ifp
, m
);
4294 pktap_output(ifp
, proto_family
, m
, pre
, post
);
4297 * Count the number of elements in the mbuf chain
4299 if (tx_chain_len_count
) {
4300 dlil_count_chain_len(m
, &tx_chain_len_stats
);
4304 * Finally, call the driver.
4306 if (ifp
->if_eflags
& (IFEF_SENDLIST
| IFEF_ENQUEUE_MULTI
)) {
4307 if (m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) {
4308 flen
+= (m_pktlen(m
) - (pre
+ post
));
4309 m
->m_pkthdr
.pkt_flags
&= ~PKTF_FORWARDED
;
4312 send_tail
= &m
->m_nextpkt
;
4314 if (m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) {
4315 flen
= (m_pktlen(m
) - (pre
+ post
));
4316 m
->m_pkthdr
.pkt_flags
&= ~PKTF_FORWARDED
;
4320 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
,
4322 retval
= (*handler_func
)(ifp
, m
);
4323 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
4324 if (adv
!= NULL
&& adv
->code
== FADV_SUCCESS
) {
4325 adv
->code
= (retval
== EQFULL
?
4326 FADV_FLOW_CONTROLLED
:
4331 if (retval
== 0 && flen
> 0) {
4335 if (retval
!= 0 && dlil_verbose
) {
4336 printf("%s: output error on %s retval = %d\n",
4337 __func__
, if_name(ifp
),
4340 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
,
4343 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4348 packetlist
= packetlist
->m_nextpkt
;
4349 m
->m_nextpkt
= NULL
;
4351 } while (m
!= NULL
);
4353 if (send_head
!= NULL
) {
4354 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
,
4356 if (ifp
->if_eflags
& IFEF_SENDLIST
) {
4357 retval
= (*handler_func
)(ifp
, send_head
);
4358 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
4360 adv
->code
= (retval
== EQFULL
?
4361 FADV_FLOW_CONTROLLED
:
4366 if (retval
== 0 && flen
> 0) {
4370 if (retval
!= 0 && dlil_verbose
) {
4371 printf("%s: output error on %s retval = %d\n",
4372 __func__
, if_name(ifp
), retval
);
4375 struct mbuf
*send_m
;
4377 VERIFY(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
);
4378 while (send_head
!= NULL
) {
4380 send_head
= send_m
->m_nextpkt
;
4381 send_m
->m_nextpkt
= NULL
;
4382 retval
= (*handler_func
)(ifp
, send_m
);
4383 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
4385 adv
->code
= (retval
== EQFULL
?
4386 FADV_FLOW_CONTROLLED
:
4396 if (retval
!= 0 && dlil_verbose
) {
4397 printf("%s: output error on %s "
4399 __func__
, if_name(ifp
), retval
);
4407 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4410 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4414 ifp
->if_fbytes
+= fbytes
;
4416 ifp
->if_fpackets
+= fpkts
;
4418 if_proto_free(proto
);
4419 if (packetlist
) /* if any packets are left, clean up */
4420 mbuf_freem_list(packetlist
);
4421 if (retval
== EJUSTRETURN
)
4424 ifnet_decr_iorefcnt(ifp
);
4430 ifnet_ioctl(ifnet_t ifp
, protocol_family_t proto_fam
, u_long ioctl_code
,
4433 struct ifnet_filter
*filter
;
4434 int retval
= EOPNOTSUPP
;
4437 if (ifp
== NULL
|| ioctl_code
== 0)
4440 /* Get an io ref count if the interface is attached */
4441 if (!ifnet_is_attached(ifp
, 1))
4442 return (EOPNOTSUPP
);
4445 * Run the interface filters first.
4446 * We want to run all filters before calling the protocol,
4447 * interface family, or interface.
4449 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4450 /* prevent filter list from changing in case we drop the lock */
4451 if_flt_monitor_busy(ifp
);
4452 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
4453 if (filter
->filt_ioctl
!= NULL
&& (filter
->filt_protocol
== 0 ||
4454 filter
->filt_protocol
== proto_fam
)) {
4455 lck_mtx_unlock(&ifp
->if_flt_lock
);
4457 result
= filter
->filt_ioctl(filter
->filt_cookie
, ifp
,
4458 proto_fam
, ioctl_code
, ioctl_arg
);
4460 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4462 /* Only update retval if no one has handled the ioctl */
4463 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
4464 if (result
== ENOTSUP
)
4465 result
= EOPNOTSUPP
;
4467 if (retval
!= 0 && retval
!= EOPNOTSUPP
) {
4468 /* we're done with the filter list */
4469 if_flt_monitor_unbusy(ifp
);
4470 lck_mtx_unlock(&ifp
->if_flt_lock
);
4476 /* we're done with the filter list */
4477 if_flt_monitor_unbusy(ifp
);
4478 lck_mtx_unlock(&ifp
->if_flt_lock
);
4480 /* Allow the protocol to handle the ioctl */
4481 if (proto_fam
!= 0) {
4482 struct if_proto
*proto
;
4484 /* callee holds a proto refcnt upon success */
4485 ifnet_lock_shared(ifp
);
4486 proto
= find_attached_proto(ifp
, proto_fam
);
4487 ifnet_lock_done(ifp
);
4488 if (proto
!= NULL
) {
4489 proto_media_ioctl ioctlp
=
4490 (proto
->proto_kpi
== kProtoKPI_v1
?
4491 proto
->kpi
.v1
.ioctl
: proto
->kpi
.v2
.ioctl
);
4492 result
= EOPNOTSUPP
;
4494 result
= ioctlp(ifp
, proto_fam
, ioctl_code
,
4496 if_proto_free(proto
);
4498 /* Only update retval if no one has handled the ioctl */
4499 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
4500 if (result
== ENOTSUP
)
4501 result
= EOPNOTSUPP
;
4503 if (retval
&& retval
!= EOPNOTSUPP
)
4509 /* retval is either 0 or EOPNOTSUPP */
4512 * Let the interface handle this ioctl.
4513 * If it returns EOPNOTSUPP, ignore that, we may have
4514 * already handled this in the protocol or family.
4517 result
= (*ifp
->if_ioctl
)(ifp
, ioctl_code
, ioctl_arg
);
4519 /* Only update retval if no one has handled the ioctl */
4520 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
4521 if (result
== ENOTSUP
)
4522 result
= EOPNOTSUPP
;
4524 if (retval
&& retval
!= EOPNOTSUPP
) {
4530 if (retval
== EJUSTRETURN
)
4533 ifnet_decr_iorefcnt(ifp
);
4538 __private_extern__ errno_t
4539 dlil_set_bpf_tap(ifnet_t ifp
, bpf_tap_mode mode
, bpf_packet_func callback
)
4544 if (ifp
->if_set_bpf_tap
) {
4545 /* Get an io reference on the interface if it is attached */
4546 if (!ifnet_is_attached(ifp
, 1))
4548 error
= ifp
->if_set_bpf_tap(ifp
, mode
, callback
);
4549 ifnet_decr_iorefcnt(ifp
);
4555 dlil_resolve_multi(struct ifnet
*ifp
, const struct sockaddr
*proto_addr
,
4556 struct sockaddr
*ll_addr
, size_t ll_len
)
4558 errno_t result
= EOPNOTSUPP
;
4559 struct if_proto
*proto
;
4560 const struct sockaddr
*verify
;
4561 proto_media_resolve_multi resolvep
;
4563 if (!ifnet_is_attached(ifp
, 1))
4566 bzero(ll_addr
, ll_len
);
4568 /* Call the protocol first; callee holds a proto refcnt upon success */
4569 ifnet_lock_shared(ifp
);
4570 proto
= find_attached_proto(ifp
, proto_addr
->sa_family
);
4571 ifnet_lock_done(ifp
);
4572 if (proto
!= NULL
) {
4573 resolvep
= (proto
->proto_kpi
== kProtoKPI_v1
?
4574 proto
->kpi
.v1
.resolve_multi
: proto
->kpi
.v2
.resolve_multi
);
4575 if (resolvep
!= NULL
)
4576 result
= resolvep(ifp
, proto_addr
,
4577 (struct sockaddr_dl
*)(void *)ll_addr
, ll_len
);
4578 if_proto_free(proto
);
4581 /* Let the interface verify the multicast address */
4582 if ((result
== EOPNOTSUPP
|| result
== 0) && ifp
->if_check_multi
) {
4586 verify
= proto_addr
;
4587 result
= ifp
->if_check_multi(ifp
, verify
);
4590 ifnet_decr_iorefcnt(ifp
);
4594 __private_extern__ errno_t
4595 dlil_send_arp_internal(ifnet_t ifp
, u_short arpop
,
4596 const struct sockaddr_dl
*sender_hw
, const struct sockaddr
*sender_proto
,
4597 const struct sockaddr_dl
*target_hw
, const struct sockaddr
*target_proto
)
4599 struct if_proto
*proto
;
4602 /* callee holds a proto refcnt upon success */
4603 ifnet_lock_shared(ifp
);
4604 proto
= find_attached_proto(ifp
, target_proto
->sa_family
);
4605 ifnet_lock_done(ifp
);
4606 if (proto
== NULL
) {
4609 proto_media_send_arp arpp
;
4610 arpp
= (proto
->proto_kpi
== kProtoKPI_v1
?
4611 proto
->kpi
.v1
.send_arp
: proto
->kpi
.v2
.send_arp
);
4617 arpstat
.txrequests
++;
4618 if (target_hw
!= NULL
)
4619 arpstat
.txurequests
++;
4622 arpstat
.txreplies
++;
4625 result
= arpp(ifp
, arpop
, sender_hw
, sender_proto
,
4626 target_hw
, target_proto
);
4628 if_proto_free(proto
);
4634 struct net_thread_marks
{ };
4635 static const struct net_thread_marks net_thread_marks_base
= { };
4637 __private_extern__
const net_thread_marks_t net_thread_marks_none
=
4638 &net_thread_marks_base
;
4640 __private_extern__ net_thread_marks_t
4641 net_thread_marks_push(u_int32_t push
)
4643 static const char *const base
= (const void*)&net_thread_marks_base
;
4647 struct uthread
*uth
= get_bsdthread_info(current_thread());
4649 pop
= push
& ~uth
->uu_network_marks
;
4651 uth
->uu_network_marks
|= pop
;
4654 return ((net_thread_marks_t
)&base
[pop
]);
4657 __private_extern__ net_thread_marks_t
4658 net_thread_unmarks_push(u_int32_t unpush
)
4660 static const char *const base
= (const void*)&net_thread_marks_base
;
4661 u_int32_t unpop
= 0;
4664 struct uthread
*uth
= get_bsdthread_info(current_thread());
4666 unpop
= unpush
& uth
->uu_network_marks
;
4668 uth
->uu_network_marks
&= ~unpop
;
4671 return ((net_thread_marks_t
)&base
[unpop
]);
4674 __private_extern__
void
4675 net_thread_marks_pop(net_thread_marks_t popx
)
4677 static const char *const base
= (const void*)&net_thread_marks_base
;
4678 const ptrdiff_t pop
= (const char *)popx
- (const char *)base
;
4681 static const ptrdiff_t ones
= (ptrdiff_t)(u_int32_t
)~0U;
4682 struct uthread
*uth
= get_bsdthread_info(current_thread());
4684 VERIFY((pop
& ones
) == pop
);
4685 VERIFY((ptrdiff_t)(uth
->uu_network_marks
& pop
) == pop
);
4686 uth
->uu_network_marks
&= ~pop
;
4690 __private_extern__
void
4691 net_thread_unmarks_pop(net_thread_marks_t unpopx
)
4693 static const char *const base
= (const void*)&net_thread_marks_base
;
4694 ptrdiff_t unpop
= (const char *)unpopx
- (const char *)base
;
4697 static const ptrdiff_t ones
= (ptrdiff_t)(u_int32_t
)~0U;
4698 struct uthread
*uth
= get_bsdthread_info(current_thread());
4700 VERIFY((unpop
& ones
) == unpop
);
4701 VERIFY((ptrdiff_t)(uth
->uu_network_marks
& unpop
) == 0);
4702 uth
->uu_network_marks
|= unpop
;
4706 __private_extern__ u_int32_t
4707 net_thread_is_marked(u_int32_t check
)
4710 struct uthread
*uth
= get_bsdthread_info(current_thread());
4711 return (uth
->uu_network_marks
& check
);
4717 __private_extern__ u_int32_t
4718 net_thread_is_unmarked(u_int32_t check
)
4721 struct uthread
*uth
= get_bsdthread_info(current_thread());
4722 return (~uth
->uu_network_marks
& check
);
4728 static __inline__
int
4729 _is_announcement(const struct sockaddr_in
* sender_sin
,
4730 const struct sockaddr_in
* target_sin
)
4732 if (sender_sin
== NULL
) {
4735 return (sender_sin
->sin_addr
.s_addr
== target_sin
->sin_addr
.s_addr
);
4738 __private_extern__ errno_t
4739 dlil_send_arp(ifnet_t ifp
, u_short arpop
, const struct sockaddr_dl
*sender_hw
,
4740 const struct sockaddr
*sender_proto
, const struct sockaddr_dl
*target_hw
,
4741 const struct sockaddr
*target_proto0
, u_int32_t rtflags
)
4744 const struct sockaddr_in
* sender_sin
;
4745 const struct sockaddr_in
* target_sin
;
4746 struct sockaddr_inarp target_proto_sinarp
;
4747 struct sockaddr
*target_proto
= (void *)(uintptr_t)target_proto0
;
4749 if (target_proto
== NULL
|| (sender_proto
!= NULL
&&
4750 sender_proto
->sa_family
!= target_proto
->sa_family
))
4754 * If the target is a (default) router, provide that
4755 * information to the send_arp callback routine.
4757 if (rtflags
& RTF_ROUTER
) {
4758 bcopy(target_proto
, &target_proto_sinarp
,
4759 sizeof (struct sockaddr_in
));
4760 target_proto_sinarp
.sin_other
|= SIN_ROUTER
;
4761 target_proto
= (struct sockaddr
*)&target_proto_sinarp
;
4765 * If this is an ARP request and the target IP is IPv4LL,
4766 * send the request on all interfaces. The exception is
4767 * an announcement, which must only appear on the specific
4770 sender_sin
= (struct sockaddr_in
*)(void *)(uintptr_t)sender_proto
;
4771 target_sin
= (struct sockaddr_in
*)(void *)(uintptr_t)target_proto
;
4772 if (target_proto
->sa_family
== AF_INET
&&
4773 IN_LINKLOCAL(ntohl(target_sin
->sin_addr
.s_addr
)) &&
4774 ipv4_ll_arp_aware
!= 0 && arpop
== ARPOP_REQUEST
&&
4775 !_is_announcement(target_sin
, sender_sin
)) {
4782 if (ifnet_list_get(IFNET_FAMILY_ANY
, &ifp_list
, &count
) == 0) {
4783 for (ifp_on
= 0; ifp_on
< count
; ifp_on
++) {
4785 ifaddr_t source_hw
= NULL
;
4786 ifaddr_t source_ip
= NULL
;
4787 struct sockaddr_in source_ip_copy
;
4788 struct ifnet
*cur_ifp
= ifp_list
[ifp_on
];
4791 * Only arp on interfaces marked for IPv4LL
4792 * ARPing. This may mean that we don't ARP on
4793 * the interface the subnet route points to.
4795 if (!(cur_ifp
->if_eflags
& IFEF_ARPLL
))
4798 /* Find the source IP address */
4799 ifnet_lock_shared(cur_ifp
);
4800 source_hw
= cur_ifp
->if_lladdr
;
4801 TAILQ_FOREACH(source_ip
, &cur_ifp
->if_addrhead
,
4803 IFA_LOCK(source_ip
);
4804 if (source_ip
->ifa_addr
!= NULL
&&
4805 source_ip
->ifa_addr
->sa_family
==
4807 /* Copy the source IP address */
4809 *(struct sockaddr_in
*)
4810 (void *)source_ip
->ifa_addr
;
4811 IFA_UNLOCK(source_ip
);
4814 IFA_UNLOCK(source_ip
);
4817 /* No IP Source, don't arp */
4818 if (source_ip
== NULL
) {
4819 ifnet_lock_done(cur_ifp
);
4823 IFA_ADDREF(source_hw
);
4824 ifnet_lock_done(cur_ifp
);
4827 new_result
= dlil_send_arp_internal(cur_ifp
,
4828 arpop
, (struct sockaddr_dl
*)(void *)
4829 source_hw
->ifa_addr
,
4830 (struct sockaddr
*)&source_ip_copy
, NULL
,
4833 IFA_REMREF(source_hw
);
4834 if (result
== ENOTSUP
) {
4835 result
= new_result
;
4838 ifnet_list_free(ifp_list
);
4841 result
= dlil_send_arp_internal(ifp
, arpop
, sender_hw
,
4842 sender_proto
, target_hw
, target_proto
);
4849 * Caller must hold ifnet head lock.
4852 ifnet_lookup(struct ifnet
*ifp
)
4856 lck_rw_assert(&ifnet_head_lock
, LCK_RW_ASSERT_HELD
);
4857 TAILQ_FOREACH(_ifp
, &ifnet_head
, if_link
) {
4861 return (_ifp
!= NULL
);
4865 * Caller has to pass a non-zero refio argument to get a
4866 * IO reference count. This will prevent ifnet_detach from
4867 * being called when there are outstanding io reference counts.
4870 ifnet_is_attached(struct ifnet
*ifp
, int refio
)
4874 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
4875 if ((ret
= ((ifp
->if_refflags
& (IFRF_ATTACHED
| IFRF_DETACHING
)) ==
4880 lck_mtx_unlock(&ifp
->if_ref_lock
);
4886 * Caller must ensure the interface is attached; the assumption is that
4887 * there is at least an outstanding IO reference count held already.
4888 * Most callers would call ifnet_is_attached() instead.
4891 ifnet_incr_iorefcnt(struct ifnet
*ifp
)
4893 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
4894 VERIFY((ifp
->if_refflags
& (IFRF_ATTACHED
| IFRF_DETACHING
)) ==
4896 VERIFY(ifp
->if_refio
> 0);
4898 lck_mtx_unlock(&ifp
->if_ref_lock
);
4902 ifnet_decr_iorefcnt(struct ifnet
*ifp
)
4904 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
4905 VERIFY(ifp
->if_refio
> 0);
4906 VERIFY((ifp
->if_refflags
& (IFRF_ATTACHED
| IFRF_DETACHING
)) != 0);
4910 * if there are no more outstanding io references, wakeup the
4911 * ifnet_detach thread if detaching flag is set.
4913 if (ifp
->if_refio
== 0 &&
4914 (ifp
->if_refflags
& IFRF_DETACHING
) != 0) {
4915 wakeup(&(ifp
->if_refio
));
4917 lck_mtx_unlock(&ifp
->if_ref_lock
);
4921 dlil_if_trace(struct dlil_ifnet
*dl_if
, int refhold
)
4923 struct dlil_ifnet_dbg
*dl_if_dbg
= (struct dlil_ifnet_dbg
*)dl_if
;
4928 if (!(dl_if
->dl_if_flags
& DLIF_DEBUG
)) {
4929 panic("%s: dl_if %p has no debug structure", __func__
, dl_if
);
4934 cnt
= &dl_if_dbg
->dldbg_if_refhold_cnt
;
4935 tr
= dl_if_dbg
->dldbg_if_refhold
;
4937 cnt
= &dl_if_dbg
->dldbg_if_refrele_cnt
;
4938 tr
= dl_if_dbg
->dldbg_if_refrele
;
4941 idx
= atomic_add_16_ov(cnt
, 1) % IF_REF_TRACE_HIST_SIZE
;
4942 ctrace_record(&tr
[idx
]);
4946 dlil_if_ref(struct ifnet
*ifp
)
4948 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
4953 lck_mtx_lock_spin(&dl_if
->dl_if_lock
);
4954 ++dl_if
->dl_if_refcnt
;
4955 if (dl_if
->dl_if_refcnt
== 0) {
4956 panic("%s: wraparound refcnt for ifp=%p", __func__
, ifp
);
4959 if (dl_if
->dl_if_trace
!= NULL
)
4960 (*dl_if
->dl_if_trace
)(dl_if
, TRUE
);
4961 lck_mtx_unlock(&dl_if
->dl_if_lock
);
4967 dlil_if_free(struct ifnet
*ifp
)
4969 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
4974 lck_mtx_lock_spin(&dl_if
->dl_if_lock
);
4975 if (dl_if
->dl_if_refcnt
== 0) {
4976 panic("%s: negative refcnt for ifp=%p", __func__
, ifp
);
4979 --dl_if
->dl_if_refcnt
;
4980 if (dl_if
->dl_if_trace
!= NULL
)
4981 (*dl_if
->dl_if_trace
)(dl_if
, FALSE
);
4982 lck_mtx_unlock(&dl_if
->dl_if_lock
);
4988 dlil_attach_protocol_internal(struct if_proto
*proto
,
4989 const struct ifnet_demux_desc
*demux_list
, u_int32_t demux_count
)
4991 struct kev_dl_proto_data ev_pr_data
;
4992 struct ifnet
*ifp
= proto
->ifp
;
4994 u_int32_t hash_value
= proto_hash_value(proto
->protocol_family
);
4995 struct if_proto
*prev_proto
;
4996 struct if_proto
*_proto
;
4998 /* callee holds a proto refcnt upon success */
4999 ifnet_lock_exclusive(ifp
);
5000 _proto
= find_attached_proto(ifp
, proto
->protocol_family
);
5001 if (_proto
!= NULL
) {
5002 ifnet_lock_done(ifp
);
5003 if_proto_free(_proto
);
5008 * Call family module add_proto routine so it can refine the
5009 * demux descriptors as it wishes.
5011 retval
= ifp
->if_add_proto(ifp
, proto
->protocol_family
, demux_list
,
5014 ifnet_lock_done(ifp
);
5019 * Insert the protocol in the hash
5021 prev_proto
= SLIST_FIRST(&ifp
->if_proto_hash
[hash_value
]);
5022 while (prev_proto
!= NULL
&& SLIST_NEXT(prev_proto
, next_hash
) != NULL
)
5023 prev_proto
= SLIST_NEXT(prev_proto
, next_hash
);
5025 SLIST_INSERT_AFTER(prev_proto
, proto
, next_hash
);
5027 SLIST_INSERT_HEAD(&ifp
->if_proto_hash
[hash_value
],
5030 /* hold a proto refcnt for attach */
5031 if_proto_ref(proto
);
5034 * The reserved field carries the number of protocol still attached
5035 * (subject to change)
5037 ev_pr_data
.proto_family
= proto
->protocol_family
;
5038 ev_pr_data
.proto_remaining_count
= dlil_ifp_proto_count(ifp
);
5039 ifnet_lock_done(ifp
);
5041 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_ATTACHED
,
5042 (struct net_event_data
*)&ev_pr_data
,
5043 sizeof (struct kev_dl_proto_data
));
5048 ifnet_attach_protocol(ifnet_t ifp
, protocol_family_t protocol
,
5049 const struct ifnet_attach_proto_param
*proto_details
)
5052 struct if_proto
*ifproto
= NULL
;
5054 ifnet_head_lock_shared();
5055 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
) {
5059 /* Check that the interface is in the global list */
5060 if (!ifnet_lookup(ifp
)) {
5065 ifproto
= zalloc(dlif_proto_zone
);
5066 if (ifproto
== NULL
) {
5070 bzero(ifproto
, dlif_proto_size
);
5072 /* refcnt held above during lookup */
5074 ifproto
->protocol_family
= protocol
;
5075 ifproto
->proto_kpi
= kProtoKPI_v1
;
5076 ifproto
->kpi
.v1
.input
= proto_details
->input
;
5077 ifproto
->kpi
.v1
.pre_output
= proto_details
->pre_output
;
5078 ifproto
->kpi
.v1
.event
= proto_details
->event
;
5079 ifproto
->kpi
.v1
.ioctl
= proto_details
->ioctl
;
5080 ifproto
->kpi
.v1
.detached
= proto_details
->detached
;
5081 ifproto
->kpi
.v1
.resolve_multi
= proto_details
->resolve
;
5082 ifproto
->kpi
.v1
.send_arp
= proto_details
->send_arp
;
5084 retval
= dlil_attach_protocol_internal(ifproto
,
5085 proto_details
->demux_list
, proto_details
->demux_count
);
5088 printf("%s: attached v1 protocol %d\n", if_name(ifp
),
5093 if (retval
!= 0 && retval
!= EEXIST
&& ifp
!= NULL
) {
5094 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
5095 if_name(ifp
), protocol
, retval
);
5098 if (retval
!= 0 && ifproto
!= NULL
)
5099 zfree(dlif_proto_zone
, ifproto
);
5104 ifnet_attach_protocol_v2(ifnet_t ifp
, protocol_family_t protocol
,
5105 const struct ifnet_attach_proto_param_v2
*proto_details
)
5108 struct if_proto
*ifproto
= NULL
;
5110 ifnet_head_lock_shared();
5111 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
) {
5115 /* Check that the interface is in the global list */
5116 if (!ifnet_lookup(ifp
)) {
5121 ifproto
= zalloc(dlif_proto_zone
);
5122 if (ifproto
== NULL
) {
5126 bzero(ifproto
, sizeof(*ifproto
));
5128 /* refcnt held above during lookup */
5130 ifproto
->protocol_family
= protocol
;
5131 ifproto
->proto_kpi
= kProtoKPI_v2
;
5132 ifproto
->kpi
.v2
.input
= proto_details
->input
;
5133 ifproto
->kpi
.v2
.pre_output
= proto_details
->pre_output
;
5134 ifproto
->kpi
.v2
.event
= proto_details
->event
;
5135 ifproto
->kpi
.v2
.ioctl
= proto_details
->ioctl
;
5136 ifproto
->kpi
.v2
.detached
= proto_details
->detached
;
5137 ifproto
->kpi
.v2
.resolve_multi
= proto_details
->resolve
;
5138 ifproto
->kpi
.v2
.send_arp
= proto_details
->send_arp
;
5140 retval
= dlil_attach_protocol_internal(ifproto
,
5141 proto_details
->demux_list
, proto_details
->demux_count
);
5144 printf("%s: attached v2 protocol %d\n", if_name(ifp
),
5149 if (retval
!= 0 && retval
!= EEXIST
&& ifp
!= NULL
) {
5150 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
5151 if_name(ifp
), protocol
, retval
);
5154 if (retval
!= 0 && ifproto
!= NULL
)
5155 zfree(dlif_proto_zone
, ifproto
);
5160 ifnet_detach_protocol(ifnet_t ifp
, protocol_family_t proto_family
)
5162 struct if_proto
*proto
= NULL
;
5165 if (ifp
== NULL
|| proto_family
== 0) {
5170 ifnet_lock_exclusive(ifp
);
5171 /* callee holds a proto refcnt upon success */
5172 proto
= find_attached_proto(ifp
, proto_family
);
5173 if (proto
== NULL
) {
5175 ifnet_lock_done(ifp
);
5179 /* call family module del_proto */
5180 if (ifp
->if_del_proto
)
5181 ifp
->if_del_proto(ifp
, proto
->protocol_family
);
5183 SLIST_REMOVE(&ifp
->if_proto_hash
[proto_hash_value(proto_family
)],
5184 proto
, if_proto
, next_hash
);
5186 if (proto
->proto_kpi
== kProtoKPI_v1
) {
5187 proto
->kpi
.v1
.input
= ifproto_media_input_v1
;
5188 proto
->kpi
.v1
.pre_output
= ifproto_media_preout
;
5189 proto
->kpi
.v1
.event
= ifproto_media_event
;
5190 proto
->kpi
.v1
.ioctl
= ifproto_media_ioctl
;
5191 proto
->kpi
.v1
.resolve_multi
= ifproto_media_resolve_multi
;
5192 proto
->kpi
.v1
.send_arp
= ifproto_media_send_arp
;
5194 proto
->kpi
.v2
.input
= ifproto_media_input_v2
;
5195 proto
->kpi
.v2
.pre_output
= ifproto_media_preout
;
5196 proto
->kpi
.v2
.event
= ifproto_media_event
;
5197 proto
->kpi
.v2
.ioctl
= ifproto_media_ioctl
;
5198 proto
->kpi
.v2
.resolve_multi
= ifproto_media_resolve_multi
;
5199 proto
->kpi
.v2
.send_arp
= ifproto_media_send_arp
;
5201 proto
->detached
= 1;
5202 ifnet_lock_done(ifp
);
5205 printf("%s: detached %s protocol %d\n", if_name(ifp
),
5206 (proto
->proto_kpi
== kProtoKPI_v1
) ?
5207 "v1" : "v2", proto_family
);
5210 /* release proto refcnt held during protocol attach */
5211 if_proto_free(proto
);
5214 * Release proto refcnt held during lookup; the rest of
5215 * protocol detach steps will happen when the last proto
5216 * reference is released.
5218 if_proto_free(proto
);
5226 ifproto_media_input_v1(struct ifnet
*ifp
, protocol_family_t protocol
,
5227 struct mbuf
*packet
, char *header
)
5229 #pragma unused(ifp, protocol, packet, header)
5234 ifproto_media_input_v2(struct ifnet
*ifp
, protocol_family_t protocol
,
5235 struct mbuf
*packet
)
5237 #pragma unused(ifp, protocol, packet)
5243 ifproto_media_preout(struct ifnet
*ifp
, protocol_family_t protocol
,
5244 mbuf_t
*packet
, const struct sockaddr
*dest
, void *route
, char *frame_type
,
5245 char *link_layer_dest
)
5247 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
5253 ifproto_media_event(struct ifnet
*ifp
, protocol_family_t protocol
,
5254 const struct kev_msg
*event
)
5256 #pragma unused(ifp, protocol, event)
5260 ifproto_media_ioctl(struct ifnet
*ifp
, protocol_family_t protocol
,
5261 unsigned long command
, void *argument
)
5263 #pragma unused(ifp, protocol, command, argument)
5268 ifproto_media_resolve_multi(ifnet_t ifp
, const struct sockaddr
*proto_addr
,
5269 struct sockaddr_dl
*out_ll
, size_t ll_len
)
5271 #pragma unused(ifp, proto_addr, out_ll, ll_len)
5276 ifproto_media_send_arp(struct ifnet
*ifp
, u_short arpop
,
5277 const struct sockaddr_dl
*sender_hw
, const struct sockaddr
*sender_proto
,
5278 const struct sockaddr_dl
*target_hw
, const struct sockaddr
*target_proto
)
5280 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
5284 extern int if_next_index(void);
5285 extern int tcp_ecn_outbound
;
5288 ifnet_attach(ifnet_t ifp
, const struct sockaddr_dl
*ll_addr
)
5290 struct ifnet
*tmp_if
;
5292 struct if_data_internal if_data_saved
;
5293 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
5294 struct dlil_threading_info
*dl_inp
;
5295 u_int32_t sflags
= 0;
5302 * Serialize ifnet attach using dlil_ifnet_lock, in order to
5303 * prevent the interface from being configured while it is
5304 * embryonic, as ifnet_head_lock is dropped and reacquired
5305 * below prior to marking the ifnet with IFRF_ATTACHED.
5308 ifnet_head_lock_exclusive();
5309 /* Verify we aren't already on the list */
5310 TAILQ_FOREACH(tmp_if
, &ifnet_head
, if_link
) {
5311 if (tmp_if
== ifp
) {
5318 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5319 if (ifp
->if_refflags
& IFRF_ATTACHED
) {
5320 panic_plain("%s: flags mismatch (attached set) ifp=%p",
5324 lck_mtx_unlock(&ifp
->if_ref_lock
);
5326 ifnet_lock_exclusive(ifp
);
5329 VERIFY(ifp
->if_detaching_link
.tqe_next
== NULL
);
5330 VERIFY(ifp
->if_detaching_link
.tqe_prev
== NULL
);
5332 if (ll_addr
!= NULL
) {
5333 if (ifp
->if_addrlen
== 0) {
5334 ifp
->if_addrlen
= ll_addr
->sdl_alen
;
5335 } else if (ll_addr
->sdl_alen
!= ifp
->if_addrlen
) {
5336 ifnet_lock_done(ifp
);
5344 * Allow interfaces without protocol families to attach
5345 * only if they have the necessary fields filled out.
5347 if (ifp
->if_add_proto
== NULL
|| ifp
->if_del_proto
== NULL
) {
5348 DLIL_PRINTF("%s: Attempt to attach interface without "
5349 "family module - %d\n", __func__
, ifp
->if_family
);
5350 ifnet_lock_done(ifp
);
5356 /* Allocate protocol hash table */
5357 VERIFY(ifp
->if_proto_hash
== NULL
);
5358 ifp
->if_proto_hash
= zalloc(dlif_phash_zone
);
5359 if (ifp
->if_proto_hash
== NULL
) {
5360 ifnet_lock_done(ifp
);
5365 bzero(ifp
->if_proto_hash
, dlif_phash_size
);
5367 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
5368 VERIFY(TAILQ_EMPTY(&ifp
->if_flt_head
));
5369 TAILQ_INIT(&ifp
->if_flt_head
);
5370 VERIFY(ifp
->if_flt_busy
== 0);
5371 VERIFY(ifp
->if_flt_waiters
== 0);
5372 lck_mtx_unlock(&ifp
->if_flt_lock
);
5374 if (!(dl_if
->dl_if_flags
& DLIF_REUSE
)) {
5375 VERIFY(LIST_EMPTY(&ifp
->if_multiaddrs
));
5376 LIST_INIT(&ifp
->if_multiaddrs
);
5379 VERIFY(ifp
->if_allhostsinm
== NULL
);
5380 VERIFY(TAILQ_EMPTY(&ifp
->if_addrhead
));
5381 TAILQ_INIT(&ifp
->if_addrhead
);
5383 if (ifp
->if_index
== 0) {
5384 int idx
= if_next_index();
5388 ifnet_lock_done(ifp
);
5393 ifp
->if_index
= idx
;
5395 /* There should not be anything occupying this slot */
5396 VERIFY(ifindex2ifnet
[ifp
->if_index
] == NULL
);
5398 /* allocate (if needed) and initialize a link address */
5399 VERIFY(!(dl_if
->dl_if_flags
& DLIF_REUSE
) || ifp
->if_lladdr
!= NULL
);
5400 ifa
= dlil_alloc_lladdr(ifp
, ll_addr
);
5402 ifnet_lock_done(ifp
);
5408 VERIFY(ifnet_addrs
[ifp
->if_index
- 1] == NULL
);
5409 ifnet_addrs
[ifp
->if_index
- 1] = ifa
;
5411 /* make this address the first on the list */
5413 /* hold a reference for ifnet_addrs[] */
5414 IFA_ADDREF_LOCKED(ifa
);
5415 /* if_attach_link_ifa() holds a reference for ifa_link */
5416 if_attach_link_ifa(ifp
, ifa
);
5420 mac_ifnet_label_associate(ifp
);
5423 TAILQ_INSERT_TAIL(&ifnet_head
, ifp
, if_link
);
5424 ifindex2ifnet
[ifp
->if_index
] = ifp
;
5426 /* Hold a reference to the underlying dlil_ifnet */
5427 ifnet_reference(ifp
);
5429 /* Clear stats (save and restore other fields that we care) */
5430 if_data_saved
= ifp
->if_data
;
5431 bzero(&ifp
->if_data
, sizeof (ifp
->if_data
));
5432 ifp
->if_data
.ifi_type
= if_data_saved
.ifi_type
;
5433 ifp
->if_data
.ifi_typelen
= if_data_saved
.ifi_typelen
;
5434 ifp
->if_data
.ifi_physical
= if_data_saved
.ifi_physical
;
5435 ifp
->if_data
.ifi_addrlen
= if_data_saved
.ifi_addrlen
;
5436 ifp
->if_data
.ifi_hdrlen
= if_data_saved
.ifi_hdrlen
;
5437 ifp
->if_data
.ifi_mtu
= if_data_saved
.ifi_mtu
;
5438 ifp
->if_data
.ifi_baudrate
= if_data_saved
.ifi_baudrate
;
5439 ifp
->if_data
.ifi_hwassist
= if_data_saved
.ifi_hwassist
;
5440 ifp
->if_data
.ifi_tso_v4_mtu
= if_data_saved
.ifi_tso_v4_mtu
;
5441 ifp
->if_data
.ifi_tso_v6_mtu
= if_data_saved
.ifi_tso_v6_mtu
;
5442 ifnet_touch_lastchange(ifp
);
5444 VERIFY(ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_NORMAL
||
5445 ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_DRIVER_MANAGED
||
5446 ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_FQ_CODEL
);
5448 /* By default, use SFB and enable flow advisory */
5449 sflags
= PKTSCHEDF_QALG_SFB
;
5451 sflags
|= PKTSCHEDF_QALG_FLOWCTL
;
5453 if (if_delaybased_queue
)
5454 sflags
|= PKTSCHEDF_QALG_DELAYBASED
;
5456 /* Initialize transmit queue(s) */
5457 err
= ifclassq_setup(ifp
, sflags
, (dl_if
->dl_if_flags
& DLIF_REUSE
));
5459 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
5460 "err=%d", __func__
, ifp
, err
);
5464 /* Sanity checks on the input thread storage */
5465 dl_inp
= &dl_if
->dl_if_inpstorage
;
5466 bzero(&dl_inp
->stats
, sizeof (dl_inp
->stats
));
5467 VERIFY(dl_inp
->input_waiting
== 0);
5468 VERIFY(dl_inp
->wtot
== 0);
5469 VERIFY(dl_inp
->ifp
== NULL
);
5470 VERIFY(qhead(&dl_inp
->rcvq_pkts
) == NULL
&& qempty(&dl_inp
->rcvq_pkts
));
5471 VERIFY(qlimit(&dl_inp
->rcvq_pkts
) == 0);
5472 VERIFY(!dl_inp
->net_affinity
);
5473 VERIFY(ifp
->if_inp
== NULL
);
5474 VERIFY(dl_inp
->input_thr
== THREAD_NULL
);
5475 VERIFY(dl_inp
->wloop_thr
== THREAD_NULL
);
5476 VERIFY(dl_inp
->poll_thr
== THREAD_NULL
);
5477 VERIFY(dl_inp
->tag
== 0);
5478 VERIFY(dl_inp
->mode
== IFNET_MODEL_INPUT_POLL_OFF
);
5479 bzero(&dl_inp
->tstats
, sizeof (dl_inp
->tstats
));
5480 bzero(&dl_inp
->pstats
, sizeof (dl_inp
->pstats
));
5481 bzero(&dl_inp
->sstats
, sizeof (dl_inp
->sstats
));
5482 #if IFNET_INPUT_SANITY_CHK
5483 VERIFY(dl_inp
->input_mbuf_cnt
== 0);
5484 #endif /* IFNET_INPUT_SANITY_CHK */
5487 * A specific DLIL input thread is created per Ethernet/cellular
5488 * interface or for an interface which supports opportunistic
5489 * input polling. Pseudo interfaces or other types of interfaces
5490 * use the main input thread instead.
5492 if ((net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) ||
5493 ifp
->if_type
== IFT_ETHER
|| ifp
->if_type
== IFT_CELLULAR
) {
5494 ifp
->if_inp
= dl_inp
;
5495 err
= dlil_create_input_thread(ifp
, ifp
->if_inp
);
5497 panic_plain("%s: ifp=%p couldn't get an input thread; "
5498 "err=%d", __func__
, ifp
, err
);
5504 * If the driver supports the new transmit model, calculate flow hash
5505 * and create a workloop starter thread to invoke the if_start callback
5506 * where the packets may be dequeued and transmitted.
5508 if (ifp
->if_eflags
& IFEF_TXSTART
) {
5509 ifp
->if_flowhash
= ifnet_calc_flowhash(ifp
);
5510 VERIFY(ifp
->if_flowhash
!= 0);
5512 VERIFY(ifp
->if_start
!= NULL
);
5513 VERIFY(ifp
->if_start_thread
== THREAD_NULL
);
5515 ifnet_set_start_cycle(ifp
, NULL
);
5516 ifp
->if_start_active
= 0;
5517 ifp
->if_start_req
= 0;
5518 ifp
->if_start_flags
= 0;
5519 if ((err
= kernel_thread_start(ifnet_start_thread_fn
, ifp
,
5520 &ifp
->if_start_thread
)) != KERN_SUCCESS
) {
5521 panic_plain("%s: ifp=%p couldn't get a start thread; "
5522 "err=%d", __func__
, ifp
, err
);
5525 ml_thread_policy(ifp
->if_start_thread
, MACHINE_GROUP
,
5526 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_WORKLOOP
));
5528 ifp
->if_flowhash
= 0;
5532 * If the driver supports the new receive model, create a poller
5533 * thread to invoke if_input_poll callback where the packets may
5534 * be dequeued from the driver and processed for reception.
5536 if (ifp
->if_eflags
& IFEF_RXPOLL
) {
5537 VERIFY(ifp
->if_input_poll
!= NULL
);
5538 VERIFY(ifp
->if_input_ctl
!= NULL
);
5539 VERIFY(ifp
->if_poll_thread
== THREAD_NULL
);
5541 ifnet_set_poll_cycle(ifp
, NULL
);
5542 ifp
->if_poll_update
= 0;
5543 ifp
->if_poll_active
= 0;
5544 ifp
->if_poll_req
= 0;
5545 if ((err
= kernel_thread_start(ifnet_poll_thread_fn
, ifp
,
5546 &ifp
->if_poll_thread
)) != KERN_SUCCESS
) {
5547 panic_plain("%s: ifp=%p couldn't get a poll thread; "
5548 "err=%d", __func__
, ifp
, err
);
5551 ml_thread_policy(ifp
->if_poll_thread
, MACHINE_GROUP
,
5552 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_WORKLOOP
));
5555 VERIFY(ifp
->if_desc
.ifd_maxlen
== IF_DESCSIZE
);
5556 VERIFY(ifp
->if_desc
.ifd_len
== 0);
5557 VERIFY(ifp
->if_desc
.ifd_desc
!= NULL
);
5559 /* Record attach PC stacktrace */
5560 ctrace_record(&((struct dlil_ifnet
*)ifp
)->dl_if_attach
);
5562 ifp
->if_updatemcasts
= 0;
5563 if (!LIST_EMPTY(&ifp
->if_multiaddrs
)) {
5564 struct ifmultiaddr
*ifma
;
5565 LIST_FOREACH(ifma
, &ifp
->if_multiaddrs
, ifma_link
) {
5567 if (ifma
->ifma_addr
->sa_family
== AF_LINK
||
5568 ifma
->ifma_addr
->sa_family
== AF_UNSPEC
)
5569 ifp
->if_updatemcasts
++;
5573 printf("%s: attached with %d suspended link-layer multicast "
5574 "membership(s)\n", if_name(ifp
),
5575 ifp
->if_updatemcasts
);
5578 /* Clear logging parameters */
5579 bzero(&ifp
->if_log
, sizeof (ifp
->if_log
));
5580 ifp
->if_fg_sendts
= 0;
5582 VERIFY(ifp
->if_delegated
.ifp
== NULL
);
5583 VERIFY(ifp
->if_delegated
.type
== 0);
5584 VERIFY(ifp
->if_delegated
.family
== 0);
5585 VERIFY(ifp
->if_delegated
.subfamily
== 0);
5586 VERIFY(ifp
->if_delegated
.expensive
== 0);
5588 VERIFY(ifp
->if_agentids
== NULL
);
5589 VERIFY(ifp
->if_agentcount
== 0);
5591 /* Reset interface state */
5592 bzero(&ifp
->if_interface_state
, sizeof(ifp
->if_interface_state
));
5593 ifp
->if_interface_state
.valid_bitmask
|=
5594 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
5595 ifp
->if_interface_state
.interface_availability
=
5596 IF_INTERFACE_STATE_INTERFACE_AVAILABLE
;
5598 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
5599 if (ifp
== lo_ifp
) {
5600 ifp
->if_interface_state
.lqm_state
= IFNET_LQM_THRESH_GOOD
;
5601 ifp
->if_interface_state
.valid_bitmask
|=
5602 IF_INTERFACE_STATE_LQM_STATE_VALID
;
5604 ifp
->if_interface_state
.lqm_state
= IFNET_LQM_THRESH_UNKNOWN
;
5608 * Enable ECN capability on this interface depending on the
5609 * value of ECN global setting
5611 if (tcp_ecn_outbound
== 2 && !IFNET_IS_CELLULAR(ifp
)) {
5612 ifp
->if_eflags
|= IFEF_ECN_ENABLE
;
5613 ifp
->if_eflags
&= ~IFEF_ECN_DISABLE
;
5617 * Built-in Cyclops always on policy for WiFi infra
5619 if (IFNET_IS_WIFI_INFRA(ifp
) && net_qos_policy_wifi_enabled
!= 0) {
5622 error
= if_set_qosmarking_mode(ifp
,
5623 IFRTYPE_QOSMARKING_FASTLANE
);
5625 printf("%s if_set_qosmarking_mode(%s) error %d\n",
5626 __func__
, ifp
->if_xname
, error
);
5628 ifp
->if_eflags
|= IFEF_QOSMARKING_ENABLED
;
5629 #if (DEVELOPMENT || DEBUG)
5630 printf("%s fastlane enabled on %s\n",
5631 __func__
, ifp
->if_xname
);
5632 #endif /* (DEVELOPMENT || DEBUG) */
5636 ifnet_lock_done(ifp
);
5639 lck_mtx_lock(&ifp
->if_cached_route_lock
);
5640 /* Enable forwarding cached route */
5641 ifp
->if_fwd_cacheok
= 1;
5642 /* Clean up any existing cached routes */
5643 ROUTE_RELEASE(&ifp
->if_fwd_route
);
5644 bzero(&ifp
->if_fwd_route
, sizeof (ifp
->if_fwd_route
));
5645 ROUTE_RELEASE(&ifp
->if_src_route
);
5646 bzero(&ifp
->if_src_route
, sizeof (ifp
->if_src_route
));
5647 ROUTE_RELEASE(&ifp
->if_src_route6
);
5648 bzero(&ifp
->if_src_route6
, sizeof (ifp
->if_src_route6
));
5649 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
5651 ifnet_llreach_ifattach(ifp
, (dl_if
->dl_if_flags
& DLIF_REUSE
));
5654 * Allocate and attach IGMPv3/MLDv2 interface specific variables
5655 * and trees; do this before the ifnet is marked as attached.
5656 * The ifnet keeps the reference to the info structures even after
5657 * the ifnet is detached, since the network-layer records still
5658 * refer to the info structures even after that. This also
5659 * makes it possible for them to still function after the ifnet
5660 * is recycled or reattached.
5663 if (IGMP_IFINFO(ifp
) == NULL
) {
5664 IGMP_IFINFO(ifp
) = igmp_domifattach(ifp
, M_WAITOK
);
5665 VERIFY(IGMP_IFINFO(ifp
) != NULL
);
5667 VERIFY(IGMP_IFINFO(ifp
)->igi_ifp
== ifp
);
5668 igmp_domifreattach(IGMP_IFINFO(ifp
));
5672 if (MLD_IFINFO(ifp
) == NULL
) {
5673 MLD_IFINFO(ifp
) = mld_domifattach(ifp
, M_WAITOK
);
5674 VERIFY(MLD_IFINFO(ifp
) != NULL
);
5676 VERIFY(MLD_IFINFO(ifp
)->mli_ifp
== ifp
);
5677 mld_domifreattach(MLD_IFINFO(ifp
));
5681 VERIFY(ifp
->if_data_threshold
== 0);
5684 * Finally, mark this ifnet as attached.
5686 lck_mtx_lock(rnh_lock
);
5687 ifnet_lock_exclusive(ifp
);
5688 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5689 ifp
->if_refflags
= IFRF_ATTACHED
;
5690 lck_mtx_unlock(&ifp
->if_ref_lock
);
5692 /* boot-args override; enable idle notification */
5693 (void) ifnet_set_idle_flags_locked(ifp
, IFRF_IDLE_NOTIFY
,
5696 /* apply previous request(s) to set the idle flags, if any */
5697 (void) ifnet_set_idle_flags_locked(ifp
, ifp
->if_idle_new_flags
,
5698 ifp
->if_idle_new_flags_mask
);
5701 ifnet_lock_done(ifp
);
5702 lck_mtx_unlock(rnh_lock
);
5707 * Attach packet filter to this interface, if enabled.
5709 pf_ifnet_hook(ifp
, 1);
5712 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_ATTACHED
, NULL
, 0);
5715 printf("%s: attached%s\n", if_name(ifp
),
5716 (dl_if
->dl_if_flags
& DLIF_REUSE
) ? " (recycled)" : "");
5723 * Prepare the storage for the first/permanent link address, which must
5724 * must have the same lifetime as the ifnet itself. Although the link
5725 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
5726 * its location in memory must never change as it may still be referred
5727 * to by some parts of the system afterwards (unfortunate implementation
5728 * artifacts inherited from BSD.)
5730 * Caller must hold ifnet lock as writer.
5732 static struct ifaddr
*
5733 dlil_alloc_lladdr(struct ifnet
*ifp
, const struct sockaddr_dl
*ll_addr
)
5735 struct ifaddr
*ifa
, *oifa
;
5736 struct sockaddr_dl
*asdl
, *msdl
;
5737 char workbuf
[IFNAMSIZ
*2];
5738 int namelen
, masklen
, socksize
;
5739 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
5741 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_EXCLUSIVE
);
5742 VERIFY(ll_addr
== NULL
|| ll_addr
->sdl_alen
== ifp
->if_addrlen
);
5744 namelen
= snprintf(workbuf
, sizeof (workbuf
), "%s",
5746 masklen
= offsetof(struct sockaddr_dl
, sdl_data
[0])
5747 + ((namelen
> 0) ? namelen
: 0);
5748 socksize
= masklen
+ ifp
->if_addrlen
;
5749 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
5750 if ((u_int32_t
)socksize
< sizeof (struct sockaddr_dl
))
5751 socksize
= sizeof(struct sockaddr_dl
);
5752 socksize
= ROUNDUP(socksize
);
5755 ifa
= ifp
->if_lladdr
;
5756 if (socksize
> DLIL_SDLMAXLEN
||
5757 (ifa
!= NULL
&& ifa
!= &dl_if
->dl_if_lladdr
.ifa
)) {
5759 * Rare, but in the event that the link address requires
5760 * more storage space than DLIL_SDLMAXLEN, allocate the
5761 * largest possible storages for address and mask, such
5762 * that we can reuse the same space when if_addrlen grows.
5763 * This same space will be used when if_addrlen shrinks.
5765 if (ifa
== NULL
|| ifa
== &dl_if
->dl_if_lladdr
.ifa
) {
5766 int ifasize
= sizeof (*ifa
) + 2 * SOCK_MAXADDRLEN
;
5767 ifa
= _MALLOC(ifasize
, M_IFADDR
, M_WAITOK
| M_ZERO
);
5771 /* Don't set IFD_ALLOC, as this is permanent */
5772 ifa
->ifa_debug
= IFD_LINK
;
5775 /* address and mask sockaddr_dl locations */
5776 asdl
= (struct sockaddr_dl
*)(ifa
+ 1);
5777 bzero(asdl
, SOCK_MAXADDRLEN
);
5778 msdl
= (struct sockaddr_dl
*)(void *)
5779 ((char *)asdl
+ SOCK_MAXADDRLEN
);
5780 bzero(msdl
, SOCK_MAXADDRLEN
);
5782 VERIFY(ifa
== NULL
|| ifa
== &dl_if
->dl_if_lladdr
.ifa
);
5784 * Use the storage areas for address and mask within the
5785 * dlil_ifnet structure. This is the most common case.
5788 ifa
= &dl_if
->dl_if_lladdr
.ifa
;
5790 /* Don't set IFD_ALLOC, as this is permanent */
5791 ifa
->ifa_debug
= IFD_LINK
;
5794 /* address and mask sockaddr_dl locations */
5795 asdl
= (struct sockaddr_dl
*)(void *)&dl_if
->dl_if_lladdr
.asdl
;
5796 bzero(asdl
, sizeof (dl_if
->dl_if_lladdr
.asdl
));
5797 msdl
= (struct sockaddr_dl
*)(void *)&dl_if
->dl_if_lladdr
.msdl
;
5798 bzero(msdl
, sizeof (dl_if
->dl_if_lladdr
.msdl
));
5801 /* hold a permanent reference for the ifnet itself */
5802 IFA_ADDREF_LOCKED(ifa
);
5803 oifa
= ifp
->if_lladdr
;
5804 ifp
->if_lladdr
= ifa
;
5806 VERIFY(ifa
->ifa_debug
== IFD_LINK
);
5808 ifa
->ifa_rtrequest
= link_rtrequest
;
5809 ifa
->ifa_addr
= (struct sockaddr
*)asdl
;
5810 asdl
->sdl_len
= socksize
;
5811 asdl
->sdl_family
= AF_LINK
;
5813 bcopy(workbuf
, asdl
->sdl_data
, min(namelen
,
5814 sizeof (asdl
->sdl_data
)));
5815 asdl
->sdl_nlen
= namelen
;
5819 asdl
->sdl_index
= ifp
->if_index
;
5820 asdl
->sdl_type
= ifp
->if_type
;
5821 if (ll_addr
!= NULL
) {
5822 asdl
->sdl_alen
= ll_addr
->sdl_alen
;
5823 bcopy(CONST_LLADDR(ll_addr
), LLADDR(asdl
), asdl
->sdl_alen
);
5827 ifa
->ifa_netmask
= (struct sockaddr
*)msdl
;
5828 msdl
->sdl_len
= masklen
;
5830 msdl
->sdl_data
[--namelen
] = 0xff;
5840 if_purgeaddrs(struct ifnet
*ifp
)
5846 in6_purgeaddrs(ifp
);
5851 ifnet_detach(ifnet_t ifp
)
5853 struct ifnet
*delegated_ifp
;
5854 struct nd_ifinfo
*ndi
= NULL
;
5859 ndi
= ND_IFINFO(ifp
);
5861 ndi
->cga_initialized
= FALSE
;
5863 lck_mtx_lock(rnh_lock
);
5864 ifnet_head_lock_exclusive();
5865 ifnet_lock_exclusive(ifp
);
5868 * Check to see if this interface has previously triggered
5869 * aggressive protocol draining; if so, decrement the global
5870 * refcnt and clear PR_AGGDRAIN on the route domain if
5871 * there are no more of such an interface around.
5873 (void) ifnet_set_idle_flags_locked(ifp
, 0, ~0);
5875 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5876 if (!(ifp
->if_refflags
& IFRF_ATTACHED
)) {
5877 lck_mtx_unlock(&ifp
->if_ref_lock
);
5878 ifnet_lock_done(ifp
);
5880 lck_mtx_unlock(rnh_lock
);
5882 } else if (ifp
->if_refflags
& IFRF_DETACHING
) {
5883 /* Interface has already been detached */
5884 lck_mtx_unlock(&ifp
->if_ref_lock
);
5885 ifnet_lock_done(ifp
);
5887 lck_mtx_unlock(rnh_lock
);
5890 /* Indicate this interface is being detached */
5891 ifp
->if_refflags
&= ~IFRF_ATTACHED
;
5892 ifp
->if_refflags
|= IFRF_DETACHING
;
5893 lck_mtx_unlock(&ifp
->if_ref_lock
);
5896 printf("%s: detaching\n", if_name(ifp
));
5898 /* Reset ECN enable/disable flags */
5899 ifp
->if_eflags
&= ~IFEF_ECN_DISABLE
;
5900 ifp
->if_eflags
&= ~IFEF_ECN_ENABLE
;
5903 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
5904 * no longer be visible during lookups from this point.
5906 VERIFY(ifindex2ifnet
[ifp
->if_index
] == ifp
);
5907 TAILQ_REMOVE(&ifnet_head
, ifp
, if_link
);
5908 ifp
->if_link
.tqe_next
= NULL
;
5909 ifp
->if_link
.tqe_prev
= NULL
;
5910 if (ifp
->if_ordered_link
.tqe_next
!= NULL
||
5911 ifp
->if_ordered_link
.tqe_prev
!= NULL
) {
5912 ifnet_remove_from_ordered_list(ifp
);
5914 ifindex2ifnet
[ifp
->if_index
] = NULL
;
5916 /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */
5917 ifp
->if_eflags
&= ~(IFEF_IPV4_ROUTER
| IFEF_IPV6_ROUTER
);
5919 /* Record detach PC stacktrace */
5920 ctrace_record(&((struct dlil_ifnet
*)ifp
)->dl_if_detach
);
5922 /* Clear logging parameters */
5923 bzero(&ifp
->if_log
, sizeof (ifp
->if_log
));
5925 /* Clear delegated interface info (reference released below) */
5926 delegated_ifp
= ifp
->if_delegated
.ifp
;
5927 bzero(&ifp
->if_delegated
, sizeof (ifp
->if_delegated
));
5929 /* Reset interface state */
5930 bzero(&ifp
->if_interface_state
, sizeof(ifp
->if_interface_state
));
5932 ifnet_lock_done(ifp
);
5934 lck_mtx_unlock(rnh_lock
);
5936 /* Release reference held on the delegated interface */
5937 if (delegated_ifp
!= NULL
)
5938 ifnet_release(delegated_ifp
);
5940 /* Reset Link Quality Metric (unless loopback [lo0]) */
5942 if_lqm_update(ifp
, IFNET_LQM_THRESH_OFF
, 0);
5944 /* Reset TCP local statistics */
5945 if (ifp
->if_tcp_stat
!= NULL
)
5946 bzero(ifp
->if_tcp_stat
, sizeof(*ifp
->if_tcp_stat
));
5948 /* Reset UDP local statistics */
5949 if (ifp
->if_udp_stat
!= NULL
)
5950 bzero(ifp
->if_udp_stat
, sizeof(*ifp
->if_udp_stat
));
5952 /* Reset ifnet IPv4 stats */
5953 if (ifp
->if_ipv4_stat
!= NULL
)
5954 bzero(ifp
->if_ipv4_stat
, sizeof(*ifp
->if_ipv4_stat
));
5956 /* Reset ifnet IPv6 stats */
5957 if (ifp
->if_ipv6_stat
!= NULL
)
5958 bzero(ifp
->if_ipv6_stat
, sizeof(*ifp
->if_ipv6_stat
));
5960 /* Release memory held for interface link status report */
5961 if (ifp
->if_link_status
!= NULL
) {
5962 FREE(ifp
->if_link_status
, M_TEMP
);
5963 ifp
->if_link_status
= NULL
;
5966 /* Clear agent IDs */
5967 if (ifp
->if_agentids
!= NULL
) {
5968 FREE(ifp
->if_agentids
, M_NETAGENT
);
5969 ifp
->if_agentids
= NULL
;
5971 ifp
->if_agentcount
= 0;
5974 /* Let BPF know we're detaching */
5977 /* Mark the interface as DOWN */
5980 /* Disable forwarding cached route */
5981 lck_mtx_lock(&ifp
->if_cached_route_lock
);
5982 ifp
->if_fwd_cacheok
= 0;
5983 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
5985 ifp
->if_data_threshold
= 0;
5987 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
5988 * references to the info structures and leave them attached to
5992 igmp_domifdetach(ifp
);
5995 mld_domifdetach(ifp
);
5998 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHING
, NULL
, 0);
6000 /* Let worker thread take care of the rest, to avoid reentrancy */
6002 ifnet_detaching_enqueue(ifp
);
6009 ifnet_detaching_enqueue(struct ifnet
*ifp
)
6011 dlil_if_lock_assert();
6013 ++ifnet_detaching_cnt
;
6014 VERIFY(ifnet_detaching_cnt
!= 0);
6015 TAILQ_INSERT_TAIL(&ifnet_detaching_head
, ifp
, if_detaching_link
);
6016 wakeup((caddr_t
)&ifnet_delayed_run
);
6019 static struct ifnet
*
6020 ifnet_detaching_dequeue(void)
6024 dlil_if_lock_assert();
6026 ifp
= TAILQ_FIRST(&ifnet_detaching_head
);
6027 VERIFY(ifnet_detaching_cnt
!= 0 || ifp
== NULL
);
6029 VERIFY(ifnet_detaching_cnt
!= 0);
6030 --ifnet_detaching_cnt
;
6031 TAILQ_REMOVE(&ifnet_detaching_head
, ifp
, if_detaching_link
);
6032 ifp
->if_detaching_link
.tqe_next
= NULL
;
6033 ifp
->if_detaching_link
.tqe_prev
= NULL
;
6039 ifnet_detacher_thread_cont(int err
)
6045 dlil_if_lock_assert();
6046 while (ifnet_detaching_cnt
== 0) {
6047 (void) msleep0(&ifnet_delayed_run
, &dlil_ifnet_lock
,
6048 (PZERO
- 1), "ifnet_detacher_cont", 0,
6049 ifnet_detacher_thread_cont
);
6053 VERIFY(TAILQ_FIRST(&ifnet_detaching_head
) != NULL
);
6055 /* Take care of detaching ifnet */
6056 ifp
= ifnet_detaching_dequeue();
6059 ifnet_detach_final(ifp
);
6066 ifnet_detacher_thread_func(void *v
, wait_result_t w
)
6068 #pragma unused(v, w)
6070 (void) msleep0(&ifnet_delayed_run
, &dlil_ifnet_lock
,
6071 (PZERO
- 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont
);
6073 * msleep0() shouldn't have returned as PCATCH was not set;
6074 * therefore assert in this case.
6081 ifnet_detach_final(struct ifnet
*ifp
)
6083 struct ifnet_filter
*filter
, *filter_next
;
6084 struct ifnet_filter_head fhead
;
6085 struct dlil_threading_info
*inp
;
6087 ifnet_detached_func if_free
;
6090 lck_mtx_lock(&ifp
->if_ref_lock
);
6091 if (!(ifp
->if_refflags
& IFRF_DETACHING
)) {
6092 panic("%s: flags mismatch (detaching not set) ifp=%p",
6098 * Wait until the existing IO references get released
6099 * before we proceed with ifnet_detach. This is not a
6100 * common case, so block without using a continuation.
6102 while (ifp
->if_refio
> 0) {
6103 printf("%s: Waiting for IO references on %s interface "
6104 "to be released\n", __func__
, if_name(ifp
));
6105 (void) msleep(&(ifp
->if_refio
), &ifp
->if_ref_lock
,
6106 (PZERO
- 1), "ifnet_ioref_wait", NULL
);
6108 lck_mtx_unlock(&ifp
->if_ref_lock
);
6110 /* Drain and destroy send queue */
6111 ifclassq_teardown(ifp
);
6113 /* Detach interface filters */
6114 lck_mtx_lock(&ifp
->if_flt_lock
);
6115 if_flt_monitor_enter(ifp
);
6117 lck_mtx_assert(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
6118 fhead
= ifp
->if_flt_head
;
6119 TAILQ_INIT(&ifp
->if_flt_head
);
6121 for (filter
= TAILQ_FIRST(&fhead
); filter
; filter
= filter_next
) {
6122 filter_next
= TAILQ_NEXT(filter
, filt_next
);
6123 lck_mtx_unlock(&ifp
->if_flt_lock
);
6125 dlil_detach_filter_internal(filter
, 1);
6126 lck_mtx_lock(&ifp
->if_flt_lock
);
6128 if_flt_monitor_leave(ifp
);
6129 lck_mtx_unlock(&ifp
->if_flt_lock
);
6131 /* Tell upper layers to drop their network addresses */
6134 ifnet_lock_exclusive(ifp
);
6136 /* Uplumb all protocols */
6137 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
6138 struct if_proto
*proto
;
6140 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
6141 while (proto
!= NULL
) {
6142 protocol_family_t family
= proto
->protocol_family
;
6143 ifnet_lock_done(ifp
);
6144 proto_unplumb(family
, ifp
);
6145 ifnet_lock_exclusive(ifp
);
6146 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
6148 /* There should not be any protocols left */
6149 VERIFY(SLIST_EMPTY(&ifp
->if_proto_hash
[i
]));
6151 zfree(dlif_phash_zone
, ifp
->if_proto_hash
);
6152 ifp
->if_proto_hash
= NULL
;
6154 /* Detach (permanent) link address from if_addrhead */
6155 ifa
= TAILQ_FIRST(&ifp
->if_addrhead
);
6156 VERIFY(ifnet_addrs
[ifp
->if_index
- 1] == ifa
);
6158 if_detach_link_ifa(ifp
, ifa
);
6161 /* Remove (permanent) link address from ifnet_addrs[] */
6163 ifnet_addrs
[ifp
->if_index
- 1] = NULL
;
6165 /* This interface should not be on {ifnet_head,detaching} */
6166 VERIFY(ifp
->if_link
.tqe_next
== NULL
);
6167 VERIFY(ifp
->if_link
.tqe_prev
== NULL
);
6168 VERIFY(ifp
->if_detaching_link
.tqe_next
== NULL
);
6169 VERIFY(ifp
->if_detaching_link
.tqe_prev
== NULL
);
6170 VERIFY(ifp
->if_ordered_link
.tqe_next
== NULL
);
6171 VERIFY(ifp
->if_ordered_link
.tqe_prev
== NULL
);
6173 /* The slot should have been emptied */
6174 VERIFY(ifindex2ifnet
[ifp
->if_index
] == NULL
);
6176 /* There should not be any addresses left */
6177 VERIFY(TAILQ_EMPTY(&ifp
->if_addrhead
));
6180 * Signal the starter thread to terminate itself.
6182 if (ifp
->if_start_thread
!= THREAD_NULL
) {
6183 lck_mtx_lock_spin(&ifp
->if_start_lock
);
6184 ifp
->if_start_flags
= 0;
6185 ifp
->if_start_thread
= THREAD_NULL
;
6186 wakeup_one((caddr_t
)&ifp
->if_start_thread
);
6187 lck_mtx_unlock(&ifp
->if_start_lock
);
6191 * Signal the poller thread to terminate itself.
6193 if (ifp
->if_poll_thread
!= THREAD_NULL
) {
6194 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
6195 ifp
->if_poll_thread
= THREAD_NULL
;
6196 wakeup_one((caddr_t
)&ifp
->if_poll_thread
);
6197 lck_mtx_unlock(&ifp
->if_poll_lock
);
6201 * If thread affinity was set for the workloop thread, we will need
6202 * to tear down the affinity and release the extra reference count
6203 * taken at attach time. Does not apply to lo0 or other interfaces
6204 * without dedicated input threads.
6206 if ((inp
= ifp
->if_inp
) != NULL
) {
6207 VERIFY(inp
!= dlil_main_input_thread
);
6209 if (inp
->net_affinity
) {
6210 struct thread
*tp
, *wtp
, *ptp
;
6212 lck_mtx_lock_spin(&inp
->input_lck
);
6213 wtp
= inp
->wloop_thr
;
6214 inp
->wloop_thr
= THREAD_NULL
;
6215 ptp
= inp
->poll_thr
;
6216 inp
->poll_thr
= THREAD_NULL
;
6217 tp
= inp
->input_thr
; /* don't nullify now */
6219 inp
->net_affinity
= FALSE
;
6220 lck_mtx_unlock(&inp
->input_lck
);
6222 /* Tear down poll thread affinity */
6224 VERIFY(ifp
->if_eflags
& IFEF_RXPOLL
);
6225 (void) dlil_affinity_set(ptp
,
6226 THREAD_AFFINITY_TAG_NULL
);
6227 thread_deallocate(ptp
);
6230 /* Tear down workloop thread affinity */
6232 (void) dlil_affinity_set(wtp
,
6233 THREAD_AFFINITY_TAG_NULL
);
6234 thread_deallocate(wtp
);
6237 /* Tear down DLIL input thread affinity */
6238 (void) dlil_affinity_set(tp
, THREAD_AFFINITY_TAG_NULL
);
6239 thread_deallocate(tp
);
6242 /* disassociate ifp DLIL input thread */
6245 lck_mtx_lock_spin(&inp
->input_lck
);
6246 inp
->input_waiting
|= DLIL_INPUT_TERMINATE
;
6247 if (!(inp
->input_waiting
& DLIL_INPUT_RUNNING
)) {
6248 wakeup_one((caddr_t
)&inp
->input_waiting
);
6250 lck_mtx_unlock(&inp
->input_lck
);
6253 /* The driver might unload, so point these to ourselves */
6254 if_free
= ifp
->if_free
;
6255 ifp
->if_output_handler
= ifp_if_output
;
6256 ifp
->if_output
= ifp_if_output
;
6257 ifp
->if_pre_enqueue
= ifp_if_output
;
6258 ifp
->if_start
= ifp_if_start
;
6259 ifp
->if_output_ctl
= ifp_if_ctl
;
6260 ifp
->if_input_handler
= ifp_if_input
;
6261 ifp
->if_input_poll
= ifp_if_input_poll
;
6262 ifp
->if_input_ctl
= ifp_if_ctl
;
6263 ifp
->if_ioctl
= ifp_if_ioctl
;
6264 ifp
->if_set_bpf_tap
= ifp_if_set_bpf_tap
;
6265 ifp
->if_free
= ifp_if_free
;
6266 ifp
->if_demux
= ifp_if_demux
;
6267 ifp
->if_event
= ifp_if_event
;
6268 ifp
->if_framer_legacy
= ifp_if_framer
;
6269 ifp
->if_framer
= ifp_if_framer_extended
;
6270 ifp
->if_add_proto
= ifp_if_add_proto
;
6271 ifp
->if_del_proto
= ifp_if_del_proto
;
6272 ifp
->if_check_multi
= ifp_if_check_multi
;
6274 /* wipe out interface description */
6275 VERIFY(ifp
->if_desc
.ifd_maxlen
== IF_DESCSIZE
);
6276 ifp
->if_desc
.ifd_len
= 0;
6277 VERIFY(ifp
->if_desc
.ifd_desc
!= NULL
);
6278 bzero(ifp
->if_desc
.ifd_desc
, IF_DESCSIZE
);
6280 /* there shouldn't be any delegation by now */
6281 VERIFY(ifp
->if_delegated
.ifp
== NULL
);
6282 VERIFY(ifp
->if_delegated
.type
== 0);
6283 VERIFY(ifp
->if_delegated
.family
== 0);
6284 VERIFY(ifp
->if_delegated
.subfamily
== 0);
6285 VERIFY(ifp
->if_delegated
.expensive
== 0);
6287 /* QoS marking get cleared */
6288 ifp
->if_eflags
&= ~IFEF_QOSMARKING_ENABLED
;
6289 if_set_qosmarking_mode(ifp
, IFRTYPE_QOSMARKING_MODE_NONE
);
6291 ifnet_lock_done(ifp
);
6295 * Detach this interface from packet filter, if enabled.
6297 pf_ifnet_hook(ifp
, 0);
6300 /* Filter list should be empty */
6301 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
6302 VERIFY(TAILQ_EMPTY(&ifp
->if_flt_head
));
6303 VERIFY(ifp
->if_flt_busy
== 0);
6304 VERIFY(ifp
->if_flt_waiters
== 0);
6305 lck_mtx_unlock(&ifp
->if_flt_lock
);
6307 /* Last chance to drain send queue */
6310 /* Last chance to cleanup any cached route */
6311 lck_mtx_lock(&ifp
->if_cached_route_lock
);
6312 VERIFY(!ifp
->if_fwd_cacheok
);
6313 ROUTE_RELEASE(&ifp
->if_fwd_route
);
6314 bzero(&ifp
->if_fwd_route
, sizeof (ifp
->if_fwd_route
));
6315 ROUTE_RELEASE(&ifp
->if_src_route
);
6316 bzero(&ifp
->if_src_route
, sizeof (ifp
->if_src_route
));
6317 ROUTE_RELEASE(&ifp
->if_src_route6
);
6318 bzero(&ifp
->if_src_route6
, sizeof (ifp
->if_src_route6
));
6319 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6321 VERIFY(ifp
->if_data_threshold
== 0);
6323 ifnet_llreach_ifdetach(ifp
);
6325 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHED
, NULL
, 0);
6328 * Finally, mark this ifnet as detached.
6330 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6331 if (!(ifp
->if_refflags
& IFRF_DETACHING
)) {
6332 panic("%s: flags mismatch (detaching not set) ifp=%p",
6336 ifp
->if_refflags
&= ~IFRF_DETACHING
;
6337 lck_mtx_unlock(&ifp
->if_ref_lock
);
6338 if (if_free
!= NULL
)
6342 printf("%s: detached\n", if_name(ifp
));
6344 /* Release reference held during ifnet attach */
6349 ifp_if_output(struct ifnet
*ifp
, struct mbuf
*m
)
6357 ifp_if_start(struct ifnet
*ifp
)
6363 ifp_if_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
6364 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
,
6365 boolean_t poll
, struct thread
*tp
)
6367 #pragma unused(ifp, m_tail, s, poll, tp)
6368 m_freem_list(m_head
);
6373 ifp_if_input_poll(struct ifnet
*ifp
, u_int32_t flags
, u_int32_t max_cnt
,
6374 struct mbuf
**m_head
, struct mbuf
**m_tail
, u_int32_t
*cnt
, u_int32_t
*len
)
6376 #pragma unused(ifp, flags, max_cnt)
6388 ifp_if_ctl(struct ifnet
*ifp
, ifnet_ctl_cmd_t cmd
, u_int32_t arglen
, void *arg
)
6390 #pragma unused(ifp, cmd, arglen, arg)
6391 return (EOPNOTSUPP
);
6395 ifp_if_demux(struct ifnet
*ifp
, struct mbuf
*m
, char *fh
, protocol_family_t
*pf
)
6397 #pragma unused(ifp, fh, pf)
6399 return (EJUSTRETURN
);
6403 ifp_if_add_proto(struct ifnet
*ifp
, protocol_family_t pf
,
6404 const struct ifnet_demux_desc
*da
, u_int32_t dc
)
6406 #pragma unused(ifp, pf, da, dc)
6411 ifp_if_del_proto(struct ifnet
*ifp
, protocol_family_t pf
)
6413 #pragma unused(ifp, pf)
6418 ifp_if_check_multi(struct ifnet
*ifp
, const struct sockaddr
*sa
)
6420 #pragma unused(ifp, sa)
6421 return (EOPNOTSUPP
);
6425 ifp_if_framer(struct ifnet
*ifp
, struct mbuf
**m
,
6426 const struct sockaddr
*sa
, const char *ll
, const char *t
)
6428 #pragma unused(ifp, m, sa, ll, t)
6429 return (ifp_if_framer_extended(ifp
, m
, sa
, ll
, t
, NULL
, NULL
));
6433 ifp_if_framer_extended(struct ifnet
*ifp
, struct mbuf
**m
,
6434 const struct sockaddr
*sa
, const char *ll
, const char *t
,
6435 u_int32_t
*pre
, u_int32_t
*post
)
6437 #pragma unused(ifp, sa, ll, t)
6446 return (EJUSTRETURN
);
6450 ifp_if_ioctl(struct ifnet
*ifp
, unsigned long cmd
, void *arg
)
6452 #pragma unused(ifp, cmd, arg)
6453 return (EOPNOTSUPP
);
6457 ifp_if_set_bpf_tap(struct ifnet
*ifp
, bpf_tap_mode tm
, bpf_packet_func f
)
6459 #pragma unused(ifp, tm, f)
6460 /* XXX not sure what to do here */
6465 ifp_if_free(struct ifnet
*ifp
)
6471 ifp_if_event(struct ifnet
*ifp
, const struct kev_msg
*e
)
6473 #pragma unused(ifp, e)
6477 int dlil_if_acquire(u_int32_t family
, const void *uniqueid
,
6478 size_t uniqueid_len
, struct ifnet
**ifp
)
6480 struct ifnet
*ifp1
= NULL
;
6481 struct dlil_ifnet
*dlifp1
= NULL
;
6482 void *buf
, *base
, **pbuf
;
6486 TAILQ_FOREACH(dlifp1
, &dlil_ifnet_head
, dl_if_link
) {
6487 ifp1
= (struct ifnet
*)dlifp1
;
6489 if (ifp1
->if_family
!= family
)
6492 lck_mtx_lock(&dlifp1
->dl_if_lock
);
6493 /* same uniqueid and same len or no unique id specified */
6494 if ((uniqueid_len
== dlifp1
->dl_if_uniqueid_len
) &&
6495 bcmp(uniqueid
, dlifp1
->dl_if_uniqueid
, uniqueid_len
) == 0) {
6496 /* check for matching interface in use */
6497 if (dlifp1
->dl_if_flags
& DLIF_INUSE
) {
6500 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
6504 dlifp1
->dl_if_flags
|= (DLIF_INUSE
|DLIF_REUSE
);
6505 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
6510 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
6513 /* no interface found, allocate a new one */
6514 buf
= zalloc(dlif_zone
);
6519 bzero(buf
, dlif_bufsize
);
6521 /* Get the 64-bit aligned base address for this object */
6522 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof (u_int64_t
),
6523 sizeof (u_int64_t
));
6524 VERIFY(((intptr_t)base
+ dlif_size
) <= ((intptr_t)buf
+ dlif_bufsize
));
6527 * Wind back a pointer size from the aligned base and
6528 * save the original address so we can free it later.
6530 pbuf
= (void **)((intptr_t)base
- sizeof (void *));
6535 MALLOC(dlifp1
->dl_if_uniqueid
, void *, uniqueid_len
,
6537 if (dlifp1
->dl_if_uniqueid
== NULL
) {
6538 zfree(dlif_zone
, dlifp1
);
6542 bcopy(uniqueid
, dlifp1
->dl_if_uniqueid
, uniqueid_len
);
6543 dlifp1
->dl_if_uniqueid_len
= uniqueid_len
;
6546 ifp1
= (struct ifnet
*)dlifp1
;
6547 dlifp1
->dl_if_flags
= DLIF_INUSE
;
6549 dlifp1
->dl_if_flags
|= DLIF_DEBUG
;
6550 dlifp1
->dl_if_trace
= dlil_if_trace
;
6552 ifp1
->if_name
= dlifp1
->dl_if_namestorage
;
6553 ifp1
->if_xname
= dlifp1
->dl_if_xnamestorage
;
6555 /* initialize interface description */
6556 ifp1
->if_desc
.ifd_maxlen
= IF_DESCSIZE
;
6557 ifp1
->if_desc
.ifd_len
= 0;
6558 ifp1
->if_desc
.ifd_desc
= dlifp1
->dl_if_descstorage
;
6561 mac_ifnet_label_init(ifp1
);
6564 if ((ret
= dlil_alloc_local_stats(ifp1
)) != 0) {
6565 DLIL_PRINTF("%s: failed to allocate if local stats, "
6566 "error: %d\n", __func__
, ret
);
6567 /* This probably shouldn't be fatal */
6571 lck_mtx_init(&dlifp1
->dl_if_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6572 lck_rw_init(&ifp1
->if_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6573 lck_mtx_init(&ifp1
->if_ref_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6574 lck_mtx_init(&ifp1
->if_flt_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6575 lck_mtx_init(&ifp1
->if_addrconfig_lock
, ifnet_lock_group
,
6577 lck_rw_init(&ifp1
->if_llreach_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6579 lck_rw_init(&ifp1
->if_inetdata_lock
, ifnet_lock_group
,
6581 ifp1
->if_inetdata
= NULL
;
6584 lck_rw_init(&ifp1
->if_inet6data_lock
, ifnet_lock_group
,
6586 ifp1
->if_inet6data
= NULL
;
6588 lck_rw_init(&ifp1
->if_link_status_lock
, ifnet_lock_group
,
6590 ifp1
->if_link_status
= NULL
;
6592 /* for send data paths */
6593 lck_mtx_init(&ifp1
->if_start_lock
, ifnet_snd_lock_group
,
6595 lck_mtx_init(&ifp1
->if_cached_route_lock
, ifnet_snd_lock_group
,
6597 lck_mtx_init(&ifp1
->if_snd
.ifcq_lock
, ifnet_snd_lock_group
,
6600 /* for receive data paths */
6601 lck_mtx_init(&ifp1
->if_poll_lock
, ifnet_rcv_lock_group
,
6604 TAILQ_INSERT_TAIL(&dlil_ifnet_head
, dlifp1
, dl_if_link
);
6611 VERIFY(dlifp1
== NULL
|| (IS_P2ALIGNED(dlifp1
, sizeof (u_int64_t
)) &&
6612 IS_P2ALIGNED(&ifp1
->if_data
, sizeof (u_int64_t
))));
6617 __private_extern__
void
6618 dlil_if_release(ifnet_t ifp
)
6620 struct dlil_ifnet
*dlifp
= (struct dlil_ifnet
*)ifp
;
6622 ifnet_lock_exclusive(ifp
);
6623 lck_mtx_lock(&dlifp
->dl_if_lock
);
6624 dlifp
->dl_if_flags
&= ~DLIF_INUSE
;
6625 strlcpy(dlifp
->dl_if_namestorage
, ifp
->if_name
, IFNAMSIZ
);
6626 ifp
->if_name
= dlifp
->dl_if_namestorage
;
6627 /* Reset external name (name + unit) */
6628 ifp
->if_xname
= dlifp
->dl_if_xnamestorage
;
6629 snprintf(__DECONST(char *, ifp
->if_xname
), IFXNAMSIZ
,
6630 "%s?", ifp
->if_name
);
6631 lck_mtx_unlock(&dlifp
->dl_if_lock
);
6634 * We can either recycle the MAC label here or in dlil_if_acquire().
6635 * It seems logical to do it here but this means that anything that
6636 * still has a handle on ifp will now see it as unlabeled.
6637 * Since the interface is "dead" that may be OK. Revisit later.
6639 mac_ifnet_label_recycle(ifp
);
6641 ifnet_lock_done(ifp
);
6644 __private_extern__
void
6647 lck_mtx_lock(&dlil_ifnet_lock
);
6650 __private_extern__
void
6651 dlil_if_unlock(void)
6653 lck_mtx_unlock(&dlil_ifnet_lock
);
6656 __private_extern__
void
6657 dlil_if_lock_assert(void)
6659 lck_mtx_assert(&dlil_ifnet_lock
, LCK_MTX_ASSERT_OWNED
);
6662 __private_extern__
void
6663 dlil_proto_unplumb_all(struct ifnet
*ifp
)
6666 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
6667 * each bucket contains exactly one entry; PF_VLAN does not need an
6670 * if_proto_hash[3] is for other protocols; we expect anything
6671 * in this bucket to respond to the DETACHING event (which would
6672 * have happened by now) and do the unplumb then.
6674 (void) proto_unplumb(PF_INET
, ifp
);
6676 (void) proto_unplumb(PF_INET6
, ifp
);
6681 ifp_src_route_copyout(struct ifnet
*ifp
, struct route
*dst
)
6683 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
6684 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
6686 route_copyout(dst
, &ifp
->if_src_route
, sizeof (*dst
));
6688 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6692 ifp_src_route_copyin(struct ifnet
*ifp
, struct route
*src
)
6694 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
6695 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
6697 if (ifp
->if_fwd_cacheok
) {
6698 route_copyin(src
, &ifp
->if_src_route
, sizeof (*src
));
6702 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6707 ifp_src_route6_copyout(struct ifnet
*ifp
, struct route_in6
*dst
)
6709 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
6710 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
6712 route_copyout((struct route
*)dst
, (struct route
*)&ifp
->if_src_route6
,
6715 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6719 ifp_src_route6_copyin(struct ifnet
*ifp
, struct route_in6
*src
)
6721 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
6722 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
6724 if (ifp
->if_fwd_cacheok
) {
6725 route_copyin((struct route
*)src
,
6726 (struct route
*)&ifp
->if_src_route6
, sizeof (*src
));
6730 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6735 ifnet_cached_rtlookup_inet(struct ifnet
*ifp
, struct in_addr src_ip
)
6737 struct route src_rt
;
6738 struct sockaddr_in
*dst
;
6740 dst
= (struct sockaddr_in
*)(void *)(&src_rt
.ro_dst
);
6742 ifp_src_route_copyout(ifp
, &src_rt
);
6744 if (ROUTE_UNUSABLE(&src_rt
) || src_ip
.s_addr
!= dst
->sin_addr
.s_addr
) {
6745 ROUTE_RELEASE(&src_rt
);
6746 if (dst
->sin_family
!= AF_INET
) {
6747 bzero(&src_rt
.ro_dst
, sizeof (src_rt
.ro_dst
));
6748 dst
->sin_len
= sizeof (src_rt
.ro_dst
);
6749 dst
->sin_family
= AF_INET
;
6751 dst
->sin_addr
= src_ip
;
6753 if (src_rt
.ro_rt
== NULL
) {
6754 src_rt
.ro_rt
= rtalloc1_scoped((struct sockaddr
*)dst
,
6755 0, 0, ifp
->if_index
);
6757 if (src_rt
.ro_rt
!= NULL
) {
6758 /* retain a ref, copyin consumes one */
6759 struct rtentry
*rte
= src_rt
.ro_rt
;
6761 ifp_src_route_copyin(ifp
, &src_rt
);
6767 return (src_rt
.ro_rt
);
6772 ifnet_cached_rtlookup_inet6(struct ifnet
*ifp
, struct in6_addr
*src_ip6
)
6774 struct route_in6 src_rt
;
6776 ifp_src_route6_copyout(ifp
, &src_rt
);
6778 if (ROUTE_UNUSABLE(&src_rt
) ||
6779 !IN6_ARE_ADDR_EQUAL(src_ip6
, &src_rt
.ro_dst
.sin6_addr
)) {
6780 ROUTE_RELEASE(&src_rt
);
6781 if (src_rt
.ro_dst
.sin6_family
!= AF_INET6
) {
6782 bzero(&src_rt
.ro_dst
, sizeof (src_rt
.ro_dst
));
6783 src_rt
.ro_dst
.sin6_len
= sizeof (src_rt
.ro_dst
);
6784 src_rt
.ro_dst
.sin6_family
= AF_INET6
;
6786 src_rt
.ro_dst
.sin6_scope_id
= in6_addr2scopeid(ifp
, src_ip6
);
6787 bcopy(src_ip6
, &src_rt
.ro_dst
.sin6_addr
,
6788 sizeof (src_rt
.ro_dst
.sin6_addr
));
6790 if (src_rt
.ro_rt
== NULL
) {
6791 src_rt
.ro_rt
= rtalloc1_scoped(
6792 (struct sockaddr
*)&src_rt
.ro_dst
, 0, 0,
6795 if (src_rt
.ro_rt
!= NULL
) {
6796 /* retain a ref, copyin consumes one */
6797 struct rtentry
*rte
= src_rt
.ro_rt
;
6799 ifp_src_route6_copyin(ifp
, &src_rt
);
6805 return (src_rt
.ro_rt
);
6810 if_lqm_update(struct ifnet
*ifp
, int lqm
, int locked
)
6812 struct kev_dl_link_quality_metric_data ev_lqm_data
;
6814 VERIFY(lqm
>= IFNET_LQM_MIN
&& lqm
<= IFNET_LQM_MAX
);
6816 /* Normalize to edge */
6817 if (lqm
>= 0 && lqm
<= IFNET_LQM_THRESH_BAD
)
6818 lqm
= IFNET_LQM_THRESH_BAD
;
6819 else if (lqm
> IFNET_LQM_THRESH_BAD
&& lqm
<= IFNET_LQM_THRESH_POOR
)
6820 lqm
= IFNET_LQM_THRESH_POOR
;
6821 else if (lqm
> IFNET_LQM_THRESH_POOR
&& lqm
<= IFNET_LQM_THRESH_GOOD
)
6822 lqm
= IFNET_LQM_THRESH_GOOD
;
6825 * Take the lock if needed
6828 ifnet_lock_exclusive(ifp
);
6830 if (lqm
== ifp
->if_interface_state
.lqm_state
&&
6831 (ifp
->if_interface_state
.valid_bitmask
&
6832 IF_INTERFACE_STATE_LQM_STATE_VALID
)) {
6834 * Release the lock if was not held by the caller
6837 ifnet_lock_done(ifp
);
6838 return; /* nothing to update */
6840 ifp
->if_interface_state
.valid_bitmask
|=
6841 IF_INTERFACE_STATE_LQM_STATE_VALID
;
6842 ifp
->if_interface_state
.lqm_state
= lqm
;
6845 * Don't want to hold the lock when issuing kernel events
6847 ifnet_lock_done(ifp
);
6849 bzero(&ev_lqm_data
, sizeof (ev_lqm_data
));
6850 ev_lqm_data
.link_quality_metric
= lqm
;
6852 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_LINK_QUALITY_METRIC_CHANGED
,
6853 (struct net_event_data
*)&ev_lqm_data
, sizeof (ev_lqm_data
));
6856 * Reacquire the lock for the caller
6859 ifnet_lock_exclusive(ifp
);
6863 if_rrc_state_update(struct ifnet
*ifp
, unsigned int rrc_state
)
6865 struct kev_dl_rrc_state kev
;
6867 if (rrc_state
== ifp
->if_interface_state
.rrc_state
&&
6868 (ifp
->if_interface_state
.valid_bitmask
&
6869 IF_INTERFACE_STATE_RRC_STATE_VALID
))
6872 ifp
->if_interface_state
.valid_bitmask
|=
6873 IF_INTERFACE_STATE_RRC_STATE_VALID
;
6875 ifp
->if_interface_state
.rrc_state
= rrc_state
;
6878 * Don't want to hold the lock when issuing kernel events
6880 ifnet_lock_done(ifp
);
6882 bzero(&kev
, sizeof(struct kev_dl_rrc_state
));
6883 kev
.rrc_state
= rrc_state
;
6885 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_RRC_STATE_CHANGED
,
6886 (struct net_event_data
*)&kev
, sizeof(struct kev_dl_rrc_state
));
6888 ifnet_lock_exclusive(ifp
);
6892 if_state_update(struct ifnet
*ifp
,
6893 struct if_interface_state
*if_interface_state
)
6895 u_short if_index_available
= 0;
6897 ifnet_lock_exclusive(ifp
);
6899 if ((ifp
->if_type
!= IFT_CELLULAR
) &&
6900 (if_interface_state
->valid_bitmask
&
6901 IF_INTERFACE_STATE_RRC_STATE_VALID
)) {
6902 ifnet_lock_done(ifp
);
6905 if ((if_interface_state
->valid_bitmask
&
6906 IF_INTERFACE_STATE_LQM_STATE_VALID
) &&
6907 (if_interface_state
->lqm_state
< IFNET_LQM_MIN
||
6908 if_interface_state
->lqm_state
> IFNET_LQM_MAX
)) {
6909 ifnet_lock_done(ifp
);
6912 if ((if_interface_state
->valid_bitmask
&
6913 IF_INTERFACE_STATE_RRC_STATE_VALID
) &&
6914 if_interface_state
->rrc_state
!=
6915 IF_INTERFACE_STATE_RRC_STATE_IDLE
&&
6916 if_interface_state
->rrc_state
!=
6917 IF_INTERFACE_STATE_RRC_STATE_CONNECTED
) {
6918 ifnet_lock_done(ifp
);
6922 if (if_interface_state
->valid_bitmask
&
6923 IF_INTERFACE_STATE_LQM_STATE_VALID
) {
6924 if_lqm_update(ifp
, if_interface_state
->lqm_state
, 1);
6926 if (if_interface_state
->valid_bitmask
&
6927 IF_INTERFACE_STATE_RRC_STATE_VALID
) {
6928 if_rrc_state_update(ifp
, if_interface_state
->rrc_state
);
6930 if (if_interface_state
->valid_bitmask
&
6931 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
) {
6932 ifp
->if_interface_state
.valid_bitmask
|=
6933 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
6934 ifp
->if_interface_state
.interface_availability
=
6935 if_interface_state
->interface_availability
;
6937 if (ifp
->if_interface_state
.interface_availability
==
6938 IF_INTERFACE_STATE_INTERFACE_AVAILABLE
) {
6939 if_index_available
= ifp
->if_index
;
6942 ifnet_lock_done(ifp
);
6945 * Check if the TCP connections going on this interface should be
6946 * forced to send probe packets instead of waiting for TCP timers
6947 * to fire. This will be done when there is an explicit
6948 * notification that the interface became available.
6950 if (if_index_available
> 0)
6951 tcp_interface_send_probe(if_index_available
);
6957 if_get_state(struct ifnet
*ifp
,
6958 struct if_interface_state
*if_interface_state
)
6960 ifnet_lock_shared(ifp
);
6962 if_interface_state
->valid_bitmask
= 0;
6964 if (ifp
->if_interface_state
.valid_bitmask
&
6965 IF_INTERFACE_STATE_RRC_STATE_VALID
) {
6966 if_interface_state
->valid_bitmask
|=
6967 IF_INTERFACE_STATE_RRC_STATE_VALID
;
6968 if_interface_state
->rrc_state
=
6969 ifp
->if_interface_state
.rrc_state
;
6971 if (ifp
->if_interface_state
.valid_bitmask
&
6972 IF_INTERFACE_STATE_LQM_STATE_VALID
) {
6973 if_interface_state
->valid_bitmask
|=
6974 IF_INTERFACE_STATE_LQM_STATE_VALID
;
6975 if_interface_state
->lqm_state
=
6976 ifp
->if_interface_state
.lqm_state
;
6978 if (ifp
->if_interface_state
.valid_bitmask
&
6979 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
) {
6980 if_interface_state
->valid_bitmask
|=
6981 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
6982 if_interface_state
->interface_availability
=
6983 ifp
->if_interface_state
.interface_availability
;
6986 ifnet_lock_done(ifp
);
6990 if_probe_connectivity(struct ifnet
*ifp
, u_int32_t conn_probe
)
6992 ifnet_lock_exclusive(ifp
);
6993 if (conn_probe
> 1) {
6994 ifnet_lock_done(ifp
);
6997 if (conn_probe
== 0)
6998 ifp
->if_eflags
&= ~IFEF_PROBE_CONNECTIVITY
;
7000 ifp
->if_eflags
|= IFEF_PROBE_CONNECTIVITY
;
7001 ifnet_lock_done(ifp
);
7003 tcp_probe_connectivity(ifp
, conn_probe
);
7009 uuid_get_ethernet(u_int8_t
*node
)
7012 struct sockaddr_dl
*sdl
;
7014 ifnet_head_lock_shared();
7015 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
7016 ifnet_lock_shared(ifp
);
7017 IFA_LOCK_SPIN(ifp
->if_lladdr
);
7018 sdl
= (struct sockaddr_dl
*)(void *)ifp
->if_lladdr
->ifa_addr
;
7019 if (sdl
->sdl_type
== IFT_ETHER
) {
7020 memcpy(node
, LLADDR(sdl
), ETHER_ADDR_LEN
);
7021 IFA_UNLOCK(ifp
->if_lladdr
);
7022 ifnet_lock_done(ifp
);
7026 IFA_UNLOCK(ifp
->if_lladdr
);
7027 ifnet_lock_done(ifp
);
7035 sysctl_rxpoll SYSCTL_HANDLER_ARGS
7037 #pragma unused(arg1, arg2)
7043 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7044 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7047 if (net_rxpoll
== 0)
7055 sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
7057 #pragma unused(arg1, arg2)
7061 q
= if_rxpoll_mode_holdtime
;
7063 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
7064 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7067 if (q
< IF_RXPOLL_MODE_HOLDTIME_MIN
)
7068 q
= IF_RXPOLL_MODE_HOLDTIME_MIN
;
7070 if_rxpoll_mode_holdtime
= q
;
7076 sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
7078 #pragma unused(arg1, arg2)
7082 q
= if_rxpoll_sample_holdtime
;
7084 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
7085 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7088 if (q
< IF_RXPOLL_SAMPLETIME_MIN
)
7089 q
= IF_RXPOLL_SAMPLETIME_MIN
;
7091 if_rxpoll_sample_holdtime
= q
;
7097 sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
7099 #pragma unused(arg1, arg2)
7103 q
= if_rxpoll_interval_time
;
7105 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
7106 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7109 if (q
< IF_RXPOLL_INTERVALTIME_MIN
)
7110 q
= IF_RXPOLL_INTERVALTIME_MIN
;
7112 if_rxpoll_interval_time
= q
;
7118 sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
7120 #pragma unused(arg1, arg2)
7124 i
= if_rxpoll_wlowat
;
7126 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7127 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7130 if (i
== 0 || i
>= if_rxpoll_whiwat
)
7133 if_rxpoll_wlowat
= i
;
7138 sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
7140 #pragma unused(arg1, arg2)
7144 i
= if_rxpoll_whiwat
;
7146 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7147 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7150 if (i
<= if_rxpoll_wlowat
)
7153 if_rxpoll_whiwat
= i
;
7158 sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
7160 #pragma unused(arg1, arg2)
7165 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7166 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7169 if (i
< IF_SNDQ_MINLEN
)
7177 sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
7179 #pragma unused(arg1, arg2)
7184 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7185 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7188 if (i
< IF_RCVQ_MINLEN
)
7196 dlil_node_present(struct ifnet
*ifp
, struct sockaddr
*sa
,
7197 int32_t rssi
, int lqm
, int npm
, u_int8_t srvinfo
[48])
7199 struct kev_dl_node_presence kev
;
7200 struct sockaddr_dl
*sdl
;
7201 struct sockaddr_in6
*sin6
;
7205 VERIFY(sa
->sa_family
== AF_LINK
|| sa
->sa_family
== AF_INET6
);
7207 bzero(&kev
, sizeof (kev
));
7208 sin6
= &kev
.sin6_node_address
;
7209 sdl
= &kev
.sdl_node_address
;
7210 nd6_alt_node_addr_decompose(ifp
, sa
, sdl
, sin6
);
7212 kev
.link_quality_metric
= lqm
;
7213 kev
.node_proximity_metric
= npm
;
7214 bcopy(srvinfo
, kev
.node_service_info
, sizeof (kev
.node_service_info
));
7216 nd6_alt_node_present(ifp
, sin6
, sdl
, rssi
, lqm
, npm
);
7217 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_PRESENCE
,
7218 &kev
.link_data
, sizeof (kev
));
7222 dlil_node_absent(struct ifnet
*ifp
, struct sockaddr
*sa
)
7224 struct kev_dl_node_absence kev
;
7225 struct sockaddr_in6
*sin6
;
7226 struct sockaddr_dl
*sdl
;
7230 VERIFY(sa
->sa_family
== AF_LINK
|| sa
->sa_family
== AF_INET6
);
7232 bzero(&kev
, sizeof (kev
));
7233 sin6
= &kev
.sin6_node_address
;
7234 sdl
= &kev
.sdl_node_address
;
7235 nd6_alt_node_addr_decompose(ifp
, sa
, sdl
, sin6
);
7237 nd6_alt_node_absent(ifp
, sin6
);
7238 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_ABSENCE
,
7239 &kev
.link_data
, sizeof (kev
));
7243 dlil_ifaddr_bytes(const struct sockaddr_dl
*sdl
, size_t *sizep
,
7244 kauth_cred_t
*credp
)
7246 const u_int8_t
*bytes
;
7249 bytes
= CONST_LLADDR(sdl
);
7250 size
= sdl
->sdl_alen
;
7253 if (dlil_lladdr_ckreq
) {
7254 switch (sdl
->sdl_type
) {
7263 if (credp
&& mac_system_check_info(*credp
, "net.link.addr")) {
7264 static const u_int8_t unspec
[FIREWIRE_EUI64_LEN
] = {
7268 switch (sdl
->sdl_type
) {
7270 VERIFY(size
== ETHER_ADDR_LEN
);
7274 VERIFY(size
== FIREWIRE_EUI64_LEN
);
7284 #pragma unused(credp)
7287 if (sizep
!= NULL
) *sizep
= size
;
7292 dlil_report_issues(struct ifnet
*ifp
, u_int8_t modid
[DLIL_MODIDLEN
],
7293 u_int8_t info
[DLIL_MODARGLEN
])
7295 struct kev_dl_issues kev
;
7298 VERIFY(ifp
!= NULL
);
7299 VERIFY(modid
!= NULL
);
7300 _CASSERT(sizeof (kev
.modid
) == DLIL_MODIDLEN
);
7301 _CASSERT(sizeof (kev
.info
) == DLIL_MODARGLEN
);
7303 bzero(&kev
, sizeof (kev
));
7306 kev
.timestamp
= tv
.tv_sec
;
7307 bcopy(modid
, &kev
.modid
, DLIL_MODIDLEN
);
7309 bcopy(info
, &kev
.info
, DLIL_MODARGLEN
);
7311 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_ISSUES
,
7312 &kev
.link_data
, sizeof (kev
));
7316 ifnet_getset_opportunistic(ifnet_t ifp
, u_long cmd
, struct ifreq
*ifr
,
7319 u_int32_t level
= IFNET_THROTTLE_OFF
;
7322 VERIFY(cmd
== SIOCSIFOPPORTUNISTIC
|| cmd
== SIOCGIFOPPORTUNISTIC
);
7324 if (cmd
== SIOCSIFOPPORTUNISTIC
) {
7326 * XXX: Use priv_check_cred() instead of root check?
7328 if ((result
= proc_suser(p
)) != 0)
7331 if (ifr
->ifr_opportunistic
.ifo_flags
==
7332 IFRIFOF_BLOCK_OPPORTUNISTIC
)
7333 level
= IFNET_THROTTLE_OPPORTUNISTIC
;
7334 else if (ifr
->ifr_opportunistic
.ifo_flags
== 0)
7335 level
= IFNET_THROTTLE_OFF
;
7340 result
= ifnet_set_throttle(ifp
, level
);
7341 } else if ((result
= ifnet_get_throttle(ifp
, &level
)) == 0) {
7342 ifr
->ifr_opportunistic
.ifo_flags
= 0;
7343 if (level
== IFNET_THROTTLE_OPPORTUNISTIC
) {
7344 ifr
->ifr_opportunistic
.ifo_flags
|=
7345 IFRIFOF_BLOCK_OPPORTUNISTIC
;
7350 * Return the count of current opportunistic connections
7351 * over the interface.
7355 flags
|= (cmd
== SIOCSIFOPPORTUNISTIC
) ?
7356 INPCB_OPPORTUNISTIC_SETCMD
: 0;
7357 flags
|= (level
== IFNET_THROTTLE_OPPORTUNISTIC
) ?
7358 INPCB_OPPORTUNISTIC_THROTTLEON
: 0;
7359 ifr
->ifr_opportunistic
.ifo_inuse
=
7360 udp_count_opportunistic(ifp
->if_index
, flags
) +
7361 tcp_count_opportunistic(ifp
->if_index
, flags
);
7364 if (result
== EALREADY
)
7371 ifnet_get_throttle(struct ifnet
*ifp
, u_int32_t
*level
)
7373 struct ifclassq
*ifq
;
7376 if (!(ifp
->if_eflags
& IFEF_TXSTART
))
7379 *level
= IFNET_THROTTLE_OFF
;
7383 /* Throttling works only for IFCQ, not ALTQ instances */
7384 if (IFCQ_IS_ENABLED(ifq
))
7385 IFCQ_GET_THROTTLE(ifq
, *level
, err
);
7392 ifnet_set_throttle(struct ifnet
*ifp
, u_int32_t level
)
7394 struct ifclassq
*ifq
;
7397 if (!(ifp
->if_eflags
& IFEF_TXSTART
))
7403 case IFNET_THROTTLE_OFF
:
7404 case IFNET_THROTTLE_OPPORTUNISTIC
:
7406 /* Throttling works only for IFCQ, not ALTQ instances */
7407 if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq
)))
7409 #endif /* PF_ALTQ */
7416 if (IFCQ_IS_ENABLED(ifq
))
7417 IFCQ_SET_THROTTLE(ifq
, level
, err
);
7421 printf("%s: throttling level set to %d\n", if_name(ifp
),
7423 if (level
== IFNET_THROTTLE_OFF
)
7431 ifnet_getset_log(ifnet_t ifp
, u_long cmd
, struct ifreq
*ifr
,
7437 int level
, category
, subcategory
;
7439 VERIFY(cmd
== SIOCSIFLOG
|| cmd
== SIOCGIFLOG
);
7441 if (cmd
== SIOCSIFLOG
) {
7442 if ((result
= priv_check_cred(kauth_cred_get(),
7443 PRIV_NET_INTERFACE_CONTROL
, 0)) != 0)
7446 level
= ifr
->ifr_log
.ifl_level
;
7447 if (level
< IFNET_LOG_MIN
|| level
> IFNET_LOG_MAX
)
7450 flags
= ifr
->ifr_log
.ifl_flags
;
7451 if ((flags
&= IFNET_LOGF_MASK
) == 0)
7454 category
= ifr
->ifr_log
.ifl_category
;
7455 subcategory
= ifr
->ifr_log
.ifl_subcategory
;
7458 result
= ifnet_set_log(ifp
, level
, flags
,
7459 category
, subcategory
);
7461 result
= ifnet_get_log(ifp
, &level
, &flags
, &category
,
7464 ifr
->ifr_log
.ifl_level
= level
;
7465 ifr
->ifr_log
.ifl_flags
= flags
;
7466 ifr
->ifr_log
.ifl_category
= category
;
7467 ifr
->ifr_log
.ifl_subcategory
= subcategory
;
7475 ifnet_set_log(struct ifnet
*ifp
, int32_t level
, uint32_t flags
,
7476 int32_t category
, int32_t subcategory
)
7480 VERIFY(level
>= IFNET_LOG_MIN
&& level
<= IFNET_LOG_MAX
);
7481 VERIFY(flags
& IFNET_LOGF_MASK
);
7484 * The logging level applies to all facilities; make sure to
7485 * update them all with the most current level.
7487 flags
|= ifp
->if_log
.flags
;
7489 if (ifp
->if_output_ctl
!= NULL
) {
7490 struct ifnet_log_params l
;
7492 bzero(&l
, sizeof (l
));
7495 l
.flags
&= ~IFNET_LOGF_DLIL
;
7496 l
.category
= category
;
7497 l
.subcategory
= subcategory
;
7499 /* Send this request to lower layers */
7501 err
= ifp
->if_output_ctl(ifp
, IFNET_CTL_SET_LOG
,
7504 } else if ((flags
& ~IFNET_LOGF_DLIL
) && ifp
->if_output_ctl
== NULL
) {
7506 * If targeted to the lower layers without an output
7507 * control callback registered on the interface, just
7508 * silently ignore facilities other than ours.
7510 flags
&= IFNET_LOGF_DLIL
;
7511 if (flags
== 0 && (!(ifp
->if_log
.flags
& IFNET_LOGF_DLIL
)))
7516 if ((ifp
->if_log
.level
= level
) == IFNET_LOG_DEFAULT
)
7517 ifp
->if_log
.flags
= 0;
7519 ifp
->if_log
.flags
|= flags
;
7521 log(LOG_INFO
, "%s: logging level set to %d flags=%b "
7522 "arg=%b, category=%d subcategory=%d\n", if_name(ifp
),
7523 ifp
->if_log
.level
, ifp
->if_log
.flags
,
7524 IFNET_LOGF_BITS
, flags
, IFNET_LOGF_BITS
,
7525 category
, subcategory
);
7532 ifnet_get_log(struct ifnet
*ifp
, int32_t *level
, uint32_t *flags
,
7533 int32_t *category
, int32_t *subcategory
)
7536 *level
= ifp
->if_log
.level
;
7538 *flags
= ifp
->if_log
.flags
;
7539 if (category
!= NULL
)
7540 *category
= ifp
->if_log
.category
;
7541 if (subcategory
!= NULL
)
7542 *subcategory
= ifp
->if_log
.subcategory
;
7548 ifnet_notify_address(struct ifnet
*ifp
, int af
)
7550 struct ifnet_notify_address_params na
;
7553 (void) pf_ifaddr_hook(ifp
);
7556 if (ifp
->if_output_ctl
== NULL
)
7557 return (EOPNOTSUPP
);
7559 bzero(&na
, sizeof (na
));
7560 na
.address_family
= af
;
7562 return (ifp
->if_output_ctl(ifp
, IFNET_CTL_NOTIFY_ADDRESS
,
7567 ifnet_flowid(struct ifnet
*ifp
, uint32_t *flowid
)
7569 if (ifp
== NULL
|| flowid
== NULL
) {
7571 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
7572 !(ifp
->if_refflags
& IFRF_ATTACHED
)) {
7576 *flowid
= ifp
->if_flowhash
;
7582 ifnet_disable_output(struct ifnet
*ifp
)
7588 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
7589 !(ifp
->if_refflags
& IFRF_ATTACHED
)) {
7593 if ((err
= ifnet_fc_add(ifp
)) == 0) {
7594 lck_mtx_lock_spin(&ifp
->if_start_lock
);
7595 ifp
->if_start_flags
|= IFSF_FLOW_CONTROLLED
;
7596 lck_mtx_unlock(&ifp
->if_start_lock
);
7602 ifnet_enable_output(struct ifnet
*ifp
)
7606 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
7607 !(ifp
->if_refflags
& IFRF_ATTACHED
)) {
7611 ifnet_start_common(ifp
, 1);
7616 ifnet_flowadv(uint32_t flowhash
)
7618 struct ifnet_fc_entry
*ifce
;
7621 ifce
= ifnet_fc_get(flowhash
);
7625 VERIFY(ifce
->ifce_ifp
!= NULL
);
7626 ifp
= ifce
->ifce_ifp
;
7628 /* flow hash gets recalculated per attach, so check */
7629 if (ifnet_is_attached(ifp
, 1)) {
7630 if (ifp
->if_flowhash
== flowhash
)
7631 (void) ifnet_enable_output(ifp
);
7632 ifnet_decr_iorefcnt(ifp
);
7634 ifnet_fc_entry_free(ifce
);
7638 * Function to compare ifnet_fc_entries in ifnet flow control tree
7641 ifce_cmp(const struct ifnet_fc_entry
*fc1
, const struct ifnet_fc_entry
*fc2
)
7643 return (fc1
->ifce_flowhash
- fc2
->ifce_flowhash
);
7647 ifnet_fc_add(struct ifnet
*ifp
)
7649 struct ifnet_fc_entry keyfc
, *ifce
;
7652 VERIFY(ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_TXSTART
));
7653 VERIFY(ifp
->if_flowhash
!= 0);
7654 flowhash
= ifp
->if_flowhash
;
7656 bzero(&keyfc
, sizeof (keyfc
));
7657 keyfc
.ifce_flowhash
= flowhash
;
7659 lck_mtx_lock_spin(&ifnet_fc_lock
);
7660 ifce
= RB_FIND(ifnet_fc_tree
, &ifnet_fc_tree
, &keyfc
);
7661 if (ifce
!= NULL
&& ifce
->ifce_ifp
== ifp
) {
7662 /* Entry is already in ifnet_fc_tree, return */
7663 lck_mtx_unlock(&ifnet_fc_lock
);
7669 * There is a different fc entry with the same flow hash
7670 * but different ifp pointer. There can be a collision
7671 * on flow hash but the probability is low. Let's just
7672 * avoid adding a second one when there is a collision.
7674 lck_mtx_unlock(&ifnet_fc_lock
);
7678 /* become regular mutex */
7679 lck_mtx_convert_spin(&ifnet_fc_lock
);
7681 ifce
= zalloc_noblock(ifnet_fc_zone
);
7683 /* memory allocation failed */
7684 lck_mtx_unlock(&ifnet_fc_lock
);
7687 bzero(ifce
, ifnet_fc_zone_size
);
7689 ifce
->ifce_flowhash
= flowhash
;
7690 ifce
->ifce_ifp
= ifp
;
7692 RB_INSERT(ifnet_fc_tree
, &ifnet_fc_tree
, ifce
);
7693 lck_mtx_unlock(&ifnet_fc_lock
);
7697 static struct ifnet_fc_entry
*
7698 ifnet_fc_get(uint32_t flowhash
)
7700 struct ifnet_fc_entry keyfc
, *ifce
;
7703 bzero(&keyfc
, sizeof (keyfc
));
7704 keyfc
.ifce_flowhash
= flowhash
;
7706 lck_mtx_lock_spin(&ifnet_fc_lock
);
7707 ifce
= RB_FIND(ifnet_fc_tree
, &ifnet_fc_tree
, &keyfc
);
7709 /* Entry is not present in ifnet_fc_tree, return */
7710 lck_mtx_unlock(&ifnet_fc_lock
);
7714 RB_REMOVE(ifnet_fc_tree
, &ifnet_fc_tree
, ifce
);
7716 VERIFY(ifce
->ifce_ifp
!= NULL
);
7717 ifp
= ifce
->ifce_ifp
;
7719 /* become regular mutex */
7720 lck_mtx_convert_spin(&ifnet_fc_lock
);
7722 if (!ifnet_is_attached(ifp
, 0)) {
7724 * This ifp is not attached or in the process of being
7725 * detached; just don't process it.
7727 ifnet_fc_entry_free(ifce
);
7730 lck_mtx_unlock(&ifnet_fc_lock
);
7736 ifnet_fc_entry_free(struct ifnet_fc_entry
*ifce
)
7738 zfree(ifnet_fc_zone
, ifce
);
7742 ifnet_calc_flowhash(struct ifnet
*ifp
)
7744 struct ifnet_flowhash_key fh
__attribute__((aligned(8)));
7745 uint32_t flowhash
= 0;
7747 if (ifnet_flowhash_seed
== 0)
7748 ifnet_flowhash_seed
= RandomULong();
7750 bzero(&fh
, sizeof (fh
));
7752 (void) snprintf(fh
.ifk_name
, sizeof (fh
.ifk_name
), "%s", ifp
->if_name
);
7753 fh
.ifk_unit
= ifp
->if_unit
;
7754 fh
.ifk_flags
= ifp
->if_flags
;
7755 fh
.ifk_eflags
= ifp
->if_eflags
;
7756 fh
.ifk_capabilities
= ifp
->if_capabilities
;
7757 fh
.ifk_capenable
= ifp
->if_capenable
;
7758 fh
.ifk_output_sched_model
= ifp
->if_output_sched_model
;
7759 fh
.ifk_rand1
= RandomULong();
7760 fh
.ifk_rand2
= RandomULong();
7763 flowhash
= net_flowhash(&fh
, sizeof (fh
), ifnet_flowhash_seed
);
7764 if (flowhash
== 0) {
7765 /* try to get a non-zero flowhash */
7766 ifnet_flowhash_seed
= RandomULong();
7774 ifnet_set_netsignature(struct ifnet
*ifp
, uint8_t family
, uint8_t len
,
7775 uint16_t flags
, uint8_t *data
)
7777 #pragma unused(flags)
7782 if_inetdata_lock_exclusive(ifp
);
7783 if (IN_IFEXTRA(ifp
) != NULL
) {
7785 /* Allow clearing the signature */
7786 IN_IFEXTRA(ifp
)->netsig_len
= 0;
7787 bzero(IN_IFEXTRA(ifp
)->netsig
,
7788 sizeof (IN_IFEXTRA(ifp
)->netsig
));
7789 if_inetdata_lock_done(ifp
);
7791 } else if (len
> sizeof (IN_IFEXTRA(ifp
)->netsig
)) {
7793 if_inetdata_lock_done(ifp
);
7796 IN_IFEXTRA(ifp
)->netsig_len
= len
;
7797 bcopy(data
, IN_IFEXTRA(ifp
)->netsig
, len
);
7801 if_inetdata_lock_done(ifp
);
7805 if_inet6data_lock_exclusive(ifp
);
7806 if (IN6_IFEXTRA(ifp
) != NULL
) {
7808 /* Allow clearing the signature */
7809 IN6_IFEXTRA(ifp
)->netsig_len
= 0;
7810 bzero(IN6_IFEXTRA(ifp
)->netsig
,
7811 sizeof (IN6_IFEXTRA(ifp
)->netsig
));
7812 if_inet6data_lock_done(ifp
);
7814 } else if (len
> sizeof (IN6_IFEXTRA(ifp
)->netsig
)) {
7816 if_inet6data_lock_done(ifp
);
7819 IN6_IFEXTRA(ifp
)->netsig_len
= len
;
7820 bcopy(data
, IN6_IFEXTRA(ifp
)->netsig
, len
);
7824 if_inet6data_lock_done(ifp
);
7836 ifnet_get_netsignature(struct ifnet
*ifp
, uint8_t family
, uint8_t *len
,
7837 uint16_t *flags
, uint8_t *data
)
7841 if (ifp
== NULL
|| len
== NULL
|| flags
== NULL
|| data
== NULL
)
7846 if_inetdata_lock_shared(ifp
);
7847 if (IN_IFEXTRA(ifp
) != NULL
) {
7848 if (*len
== 0 || *len
< IN_IFEXTRA(ifp
)->netsig_len
) {
7850 if_inetdata_lock_done(ifp
);
7853 if ((*len
= IN_IFEXTRA(ifp
)->netsig_len
) > 0)
7854 bcopy(IN_IFEXTRA(ifp
)->netsig
, data
, *len
);
7860 if_inetdata_lock_done(ifp
);
7864 if_inet6data_lock_shared(ifp
);
7865 if (IN6_IFEXTRA(ifp
) != NULL
) {
7866 if (*len
== 0 || *len
< IN6_IFEXTRA(ifp
)->netsig_len
) {
7868 if_inet6data_lock_done(ifp
);
7871 if ((*len
= IN6_IFEXTRA(ifp
)->netsig_len
) > 0)
7872 bcopy(IN6_IFEXTRA(ifp
)->netsig
, data
, *len
);
7878 if_inet6data_lock_done(ifp
);
7893 dlil_output_cksum_dbg(struct ifnet
*ifp
, struct mbuf
*m
, uint32_t hoff
,
7894 protocol_family_t pf
)
7899 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_FINALIZE_FORCED
) ||
7900 (m
->m_pkthdr
.csum_flags
& (CSUM_TSO_IPV4
|CSUM_TSO_IPV6
)))
7905 did_sw
= in_finalize_cksum(m
, hoff
, m
->m_pkthdr
.csum_flags
);
7906 if (did_sw
& CSUM_DELAY_IP
)
7907 hwcksum_dbg_finalized_hdr
++;
7908 if (did_sw
& CSUM_DELAY_DATA
)
7909 hwcksum_dbg_finalized_data
++;
7914 * Checksum offload should not have been enabled when
7915 * extension headers exist; that also means that we
7916 * cannot force-finalize packets with extension headers.
7917 * Indicate to the callee should it skip such case by
7918 * setting optlen to -1.
7920 did_sw
= in6_finalize_cksum(m
, hoff
, -1, -1,
7921 m
->m_pkthdr
.csum_flags
);
7922 if (did_sw
& CSUM_DELAY_IPV6_DATA
)
7923 hwcksum_dbg_finalized_data
++;
7932 dlil_input_cksum_dbg(struct ifnet
*ifp
, struct mbuf
*m
, char *frame_header
,
7933 protocol_family_t pf
)
7938 if (frame_header
== NULL
||
7939 frame_header
< (char *)mbuf_datastart(m
) ||
7940 frame_header
> (char *)m
->m_data
) {
7941 printf("%s: frame header pointer 0x%llx out of range "
7942 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp
),
7943 (uint64_t)VM_KERNEL_ADDRPERM(frame_header
),
7944 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m
)),
7945 (uint64_t)VM_KERNEL_ADDRPERM(m
->m_data
),
7946 (uint64_t)VM_KERNEL_ADDRPERM(m
));
7949 hlen
= (m
->m_data
- frame_header
);
7962 * Force partial checksum offload; useful to simulate cases
7963 * where the hardware does not support partial checksum offload,
7964 * in order to validate correctness throughout the layers above.
7966 if (hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
) {
7967 uint32_t foff
= hwcksum_dbg_partial_rxoff_forced
;
7969 if (foff
> (uint32_t)m
->m_pkthdr
.len
)
7972 m
->m_pkthdr
.csum_flags
&= ~CSUM_RX_FLAGS
;
7974 /* Compute 16-bit 1's complement sum from forced offset */
7975 sum
= m_sum16(m
, foff
, (m
->m_pkthdr
.len
- foff
));
7977 m
->m_pkthdr
.csum_flags
|= (CSUM_DATA_VALID
| CSUM_PARTIAL
);
7978 m
->m_pkthdr
.csum_rx_val
= sum
;
7979 m
->m_pkthdr
.csum_rx_start
= (foff
+ hlen
);
7981 hwcksum_dbg_partial_forced
++;
7982 hwcksum_dbg_partial_forced_bytes
+= m
->m_pkthdr
.len
;
7986 * Partial checksum offload verification (and adjustment);
7987 * useful to validate and test cases where the hardware
7988 * supports partial checksum offload.
7990 if ((m
->m_pkthdr
.csum_flags
&
7991 (CSUM_DATA_VALID
| CSUM_PARTIAL
| CSUM_PSEUDO_HDR
)) ==
7992 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
7995 /* Start offset must begin after frame header */
7996 rxoff
= m
->m_pkthdr
.csum_rx_start
;
7998 hwcksum_dbg_bad_rxoff
++;
8000 printf("%s: partial cksum start offset %d "
8001 "is less than frame header length %d for "
8002 "mbuf 0x%llx\n", if_name(ifp
), rxoff
, hlen
,
8003 (uint64_t)VM_KERNEL_ADDRPERM(m
));
8009 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
)) {
8011 * Compute the expected 16-bit 1's complement sum;
8012 * skip this if we've already computed it above
8013 * when partial checksum offload is forced.
8015 sum
= m_sum16(m
, rxoff
, (m
->m_pkthdr
.len
- rxoff
));
8017 /* Hardware or driver is buggy */
8018 if (sum
!= m
->m_pkthdr
.csum_rx_val
) {
8019 hwcksum_dbg_bad_cksum
++;
8021 printf("%s: bad partial cksum value "
8022 "0x%x (expected 0x%x) for mbuf "
8023 "0x%llx [rx_start %d]\n",
8025 m
->m_pkthdr
.csum_rx_val
, sum
,
8026 (uint64_t)VM_KERNEL_ADDRPERM(m
),
8027 m
->m_pkthdr
.csum_rx_start
);
8032 hwcksum_dbg_verified
++;
8035 * This code allows us to emulate various hardwares that
8036 * perform 16-bit 1's complement sum beginning at various
8037 * start offset values.
8039 if (hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_RXOFF_ADJ
) {
8040 uint32_t aoff
= hwcksum_dbg_partial_rxoff_adj
;
8042 if (aoff
== rxoff
|| aoff
> (uint32_t)m
->m_pkthdr
.len
)
8045 sum
= m_adj_sum16(m
, rxoff
, aoff
, sum
);
8047 m
->m_pkthdr
.csum_rx_val
= sum
;
8048 m
->m_pkthdr
.csum_rx_start
= (aoff
+ hlen
);
8050 hwcksum_dbg_adjusted
++;
8056 sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
8058 #pragma unused(arg1, arg2)
8062 i
= hwcksum_dbg_mode
;
8064 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8065 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
8068 if (hwcksum_dbg
== 0)
8071 if ((i
& ~HWCKSUM_DBG_MASK
) != 0)
8074 hwcksum_dbg_mode
= (i
& HWCKSUM_DBG_MASK
);
8080 sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
8082 #pragma unused(arg1, arg2)
8086 i
= hwcksum_dbg_partial_rxoff_forced
;
8088 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8089 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
8092 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
))
8095 hwcksum_dbg_partial_rxoff_forced
= i
;
8101 sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
8103 #pragma unused(arg1, arg2)
8107 i
= hwcksum_dbg_partial_rxoff_adj
;
8109 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
8110 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
8113 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_RXOFF_ADJ
))
8116 hwcksum_dbg_partial_rxoff_adj
= i
;
8122 sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
8124 #pragma unused(oidp, arg1, arg2)
8127 if (req
->oldptr
== USER_ADDR_NULL
) {
8130 if (req
->newptr
!= USER_ADDR_NULL
) {
8133 err
= SYSCTL_OUT(req
, &tx_chain_len_stats
,
8134 sizeof(struct chain_len_stats
));
8141 /* Blob for sum16 verification */
8142 static uint8_t sumdata
[] = {
8143 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
8144 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
8145 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
8146 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
8147 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
8148 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
8149 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
8150 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
8151 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
8152 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
8153 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
8154 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
8155 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
8156 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
8157 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
8158 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
8159 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
8160 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
8161 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
8162 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
8163 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
8164 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
8165 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
8166 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
8167 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
8168 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
8169 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
8170 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
8171 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
8172 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
8173 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
8174 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
8175 0xc8, 0x28, 0x02, 0x00, 0x00
8178 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
8194 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
8197 dlil_verify_sum16(void)
8203 /* Make sure test data plus extra room for alignment fits in cluster */
8204 _CASSERT((sizeof (sumdata
) + (sizeof (uint64_t) * 2)) <= MCLBYTES
);
8206 m
= m_getcl(M_WAITOK
, MT_DATA
, M_PKTHDR
);
8207 MH_ALIGN(m
, sizeof (uint32_t)); /* 32-bit starting alignment */
8208 buf
= mtod(m
, uint8_t *); /* base address */
8210 for (n
= 0; n
< SUMTBL_MAX
; n
++) {
8211 uint16_t len
= sumtbl
[n
].len
;
8214 /* Verify for all possible alignments */
8215 for (i
= 0; i
< (int)sizeof (uint64_t); i
++) {
8219 /* Copy over test data to mbuf */
8220 VERIFY(len
<= sizeof (sumdata
));
8222 bcopy(sumdata
, c
, len
);
8224 /* Zero-offset test (align by data pointer) */
8225 m
->m_data
= (caddr_t
)c
;
8227 sum
= m_sum16(m
, 0, len
);
8229 /* Something is horribly broken; stop now */
8230 if (sum
!= sumtbl
[n
].sum
) {
8231 panic("%s: broken m_sum16 for len=%d align=%d "
8232 "sum=0x%04x [expected=0x%04x]\n", __func__
,
8233 len
, i
, sum
, sumtbl
[n
].sum
);
8237 /* Alignment test by offset (fixed data pointer) */
8238 m
->m_data
= (caddr_t
)buf
;
8240 sum
= m_sum16(m
, i
, len
);
8242 /* Something is horribly broken; stop now */
8243 if (sum
!= sumtbl
[n
].sum
) {
8244 panic("%s: broken m_sum16 for len=%d offset=%d "
8245 "sum=0x%04x [expected=0x%04x]\n", __func__
,
8246 len
, i
, sum
, sumtbl
[n
].sum
);
8250 /* Simple sum16 contiguous buffer test by aligment */
8251 sum
= b_sum16(c
, len
);
8253 /* Something is horribly broken; stop now */
8254 if (sum
!= sumtbl
[n
].sum
) {
8255 panic("%s: broken b_sum16 for len=%d align=%d "
8256 "sum=0x%04x [expected=0x%04x]\n", __func__
,
8257 len
, i
, sum
, sumtbl
[n
].sum
);
8265 printf("DLIL: SUM16 self-tests PASSED\n");
8269 #define CASE_STRINGIFY(x) case x: return #x
8271 __private_extern__
const char *
8272 dlil_kev_dl_code_str(u_int32_t event_code
)
8274 switch (event_code
) {
8275 CASE_STRINGIFY(KEV_DL_SIFFLAGS
);
8276 CASE_STRINGIFY(KEV_DL_SIFMETRICS
);
8277 CASE_STRINGIFY(KEV_DL_SIFMTU
);
8278 CASE_STRINGIFY(KEV_DL_SIFPHYS
);
8279 CASE_STRINGIFY(KEV_DL_SIFMEDIA
);
8280 CASE_STRINGIFY(KEV_DL_SIFGENERIC
);
8281 CASE_STRINGIFY(KEV_DL_ADDMULTI
);
8282 CASE_STRINGIFY(KEV_DL_DELMULTI
);
8283 CASE_STRINGIFY(KEV_DL_IF_ATTACHED
);
8284 CASE_STRINGIFY(KEV_DL_IF_DETACHING
);
8285 CASE_STRINGIFY(KEV_DL_IF_DETACHED
);
8286 CASE_STRINGIFY(KEV_DL_LINK_OFF
);
8287 CASE_STRINGIFY(KEV_DL_LINK_ON
);
8288 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED
);
8289 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED
);
8290 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED
);
8291 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED
);
8292 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT
);
8293 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED
);
8294 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED
);
8295 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE
);
8296 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE
);
8297 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED
);
8298 CASE_STRINGIFY(KEV_DL_ISSUES
);
8299 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED
);
8307 * Mirror the arguments of ifnet_get_local_ports_extended()
8313 sysctl_get_ports_used SYSCTL_HANDLER_ARGS
8315 #pragma unused(oidp)
8316 int *name
= (int *)arg1
;
8320 protocol_family_t protocol
;
8323 u_int8_t
*bitfield
= NULL
;
8325 if (req
->newptr
!= USER_ADDR_NULL
) {
8334 if (req
->oldptr
== USER_ADDR_NULL
) {
8335 req
->oldidx
= bitstr_size(65536);
8338 if (req
->oldlen
< bitstr_size(65536)) {
8347 ifnet_head_lock_shared();
8348 if (idx
> if_index
) {
8353 ifp
= ifindex2ifnet
[idx
];
8356 bitfield
= _MALLOC(bitstr_size(65536), M_TEMP
, M_WAITOK
);
8357 if (bitfield
== NULL
) {
8361 error
= ifnet_get_local_ports_extended(ifp
, protocol
, flags
, bitfield
);
8363 printf("%s: ifnet_get_local_ports_extended() error %d\n",
8367 error
= SYSCTL_OUT(req
, bitfield
, bitstr_size(65536));
8369 if (bitfield
!= NULL
)
8370 _FREE(bitfield
, M_TEMP
);
8374 #if (DEVELOPMENT || DEBUG)
8376 * The sysctl variable name contains the input parameters of
8377 * ifnet_get_keepalive_offload_frames()
8378 * ifp (interface index): name[0]
8379 * frames_array_count: name[1]
8380 * frame_data_offset: name[2]
8381 * The return length gives used_frames_count
8384 sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
8386 #pragma unused(oidp)
8387 int *name
= (int *)arg1
;
8388 u_int namelen
= arg2
;
8391 u_int32_t frames_array_count
;
8392 size_t frame_data_offset
;
8393 u_int32_t used_frames_count
;
8394 struct ifnet_keepalive_offload_frame
*frames_array
= NULL
;
8399 * Only root can get look at other people TCP frames
8401 error
= proc_suser(current_proc());
8405 * Validate the input parameters
8407 if (req
->newptr
!= USER_ADDR_NULL
) {
8415 if (req
->oldptr
== USER_ADDR_NULL
) {
8419 if (req
->oldlen
== 0) {
8424 frames_array_count
= name
[1];
8425 frame_data_offset
= name
[2];
8427 /* Make sure the passed buffer is large enough */
8428 if (frames_array_count
* sizeof(struct ifnet_keepalive_offload_frame
) >
8434 ifnet_head_lock_shared();
8435 if (idx
> if_index
) {
8440 ifp
= ifindex2ifnet
[idx
];
8443 frames_array
= _MALLOC(frames_array_count
*
8444 sizeof(struct ifnet_keepalive_offload_frame
), M_TEMP
, M_WAITOK
);
8445 if (frames_array
== NULL
) {
8450 error
= ifnet_get_keepalive_offload_frames(ifp
, frames_array
,
8451 frames_array_count
, frame_data_offset
, &used_frames_count
);
8453 printf("%s: ifnet_get_keepalive_offload_frames error %d\n",
8458 for (i
= 0; i
< used_frames_count
; i
++) {
8459 error
= SYSCTL_OUT(req
, frames_array
+ i
,
8460 sizeof(struct ifnet_keepalive_offload_frame
));
8466 if (frames_array
!= NULL
)
8467 _FREE(frames_array
, M_TEMP
);
8470 #endif /* DEVELOPMENT || DEBUG */