2 * Copyright (c) 1999-2015 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
41 #include <sys/socket.h>
42 #include <sys/domain.h>
44 #include <sys/random.h>
45 #include <sys/socketvar.h>
46 #include <net/if_dl.h>
48 #include <net/route.h>
49 #include <net/if_var.h>
51 #include <net/if_arp.h>
52 #include <net/iptap.h>
53 #include <net/pktap.h>
54 #include <sys/kern_event.h>
55 #include <sys/kdebug.h>
56 #include <sys/mcache.h>
57 #include <sys/syslog.h>
58 #include <sys/protosw.h>
61 #include <kern/assert.h>
62 #include <kern/task.h>
63 #include <kern/thread.h>
64 #include <kern/sched_prim.h>
65 #include <kern/locks.h>
66 #include <kern/zalloc.h>
68 #include <net/kpi_protocol.h>
69 #include <net/if_types.h>
70 #include <net/if_llreach.h>
71 #include <net/kpi_interfacefilter.h>
72 #include <net/classq/classq.h>
73 #include <net/classq/classq_sfb.h>
74 #include <net/flowhash.h>
75 #include <net/ntstat.h>
78 #include <netinet/in_var.h>
79 #include <netinet/igmp_var.h>
80 #include <netinet/ip_var.h>
81 #include <netinet/tcp.h>
82 #include <netinet/tcp_var.h>
83 #include <netinet/udp.h>
84 #include <netinet/udp_var.h>
85 #include <netinet/if_ether.h>
86 #include <netinet/in_pcb.h>
90 #include <netinet6/in6_var.h>
91 #include <netinet6/nd6.h>
92 #include <netinet6/mld6_var.h>
93 #include <netinet6/scope6_var.h>
96 #include <libkern/OSAtomic.h>
97 #include <libkern/tree.h>
99 #include <dev/random/randomdev.h>
100 #include <machine/machine_routines.h>
102 #include <mach/thread_act.h>
103 #include <mach/sdt.h>
106 #include <sys/kauth.h>
107 #include <security/mac_framework.h>
108 #include <net/ethernet.h>
109 #include <net/firewire.h>
113 #include <net/pfvar.h>
116 #include <net/altq/altq.h>
118 #include <net/pktsched/pktsched.h>
120 #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
121 #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
122 #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
123 #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
124 #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
126 #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
127 #define MAX_LINKADDR 4 /* LONGWORDS */
128 #define M_NKE M_IFADDR
131 #define DLIL_PRINTF printf
133 #define DLIL_PRINTF kprintf
136 #define IF_DATA_REQUIRE_ALIGNED_64(f) \
137 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
139 #define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
140 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
148 * List of if_proto structures in if_proto_hash[] is protected by
149 * the ifnet lock. The rest of the fields are initialized at protocol
150 * attach time and never change, thus no lock required as long as
151 * a reference to it is valid, via if_proto_ref().
154 SLIST_ENTRY(if_proto
) next_hash
;
158 protocol_family_t protocol_family
;
162 proto_media_input input
;
163 proto_media_preout pre_output
;
164 proto_media_event event
;
165 proto_media_ioctl ioctl
;
166 proto_media_detached detached
;
167 proto_media_resolve_multi resolve_multi
;
168 proto_media_send_arp send_arp
;
171 proto_media_input_v2 input
;
172 proto_media_preout pre_output
;
173 proto_media_event event
;
174 proto_media_ioctl ioctl
;
175 proto_media_detached detached
;
176 proto_media_resolve_multi resolve_multi
;
177 proto_media_send_arp send_arp
;
182 SLIST_HEAD(proto_hash_entry
, if_proto
);
184 #define DLIL_SDLMAXLEN 64
185 #define DLIL_SDLDATALEN \
186 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
189 struct ifnet dl_if
; /* public ifnet */
191 * DLIL private fields, protected by dl_if_lock
193 decl_lck_mtx_data(, dl_if_lock
);
194 TAILQ_ENTRY(dlil_ifnet
) dl_if_link
; /* dlil_ifnet link */
195 u_int32_t dl_if_flags
; /* flags (below) */
196 u_int32_t dl_if_refcnt
; /* refcnt */
197 void (*dl_if_trace
)(struct dlil_ifnet
*, int); /* ref trace callback */
198 void *dl_if_uniqueid
; /* unique interface id */
199 size_t dl_if_uniqueid_len
; /* length of the unique id */
200 char dl_if_namestorage
[IFNAMSIZ
]; /* interface name storage */
201 char dl_if_xnamestorage
[IFXNAMSIZ
]; /* external name storage */
203 struct ifaddr ifa
; /* lladdr ifa */
204 u_int8_t asdl
[DLIL_SDLMAXLEN
]; /* addr storage */
205 u_int8_t msdl
[DLIL_SDLMAXLEN
]; /* mask storage */
207 u_int8_t dl_if_descstorage
[IF_DESCSIZE
]; /* desc storage */
208 struct dlil_threading_info dl_if_inpstorage
; /* input thread storage */
209 ctrace_t dl_if_attach
; /* attach PC stacktrace */
210 ctrace_t dl_if_detach
; /* detach PC stacktrace */
213 /* Values for dl_if_flags (private to DLIL) */
214 #define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
215 #define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
216 #define DLIF_DEBUG 0x4 /* has debugging info */
218 #define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
221 __private_extern__
unsigned int if_ref_trace_hist_size
= IF_REF_TRACE_HIST_SIZE
;
223 struct dlil_ifnet_dbg
{
224 struct dlil_ifnet dldbg_dlif
; /* dlil_ifnet */
225 u_int16_t dldbg_if_refhold_cnt
; /* # ifnet references */
226 u_int16_t dldbg_if_refrele_cnt
; /* # ifnet releases */
228 * Circular lists of ifnet_{reference,release} callers.
230 ctrace_t dldbg_if_refhold
[IF_REF_TRACE_HIST_SIZE
];
231 ctrace_t dldbg_if_refrele
[IF_REF_TRACE_HIST_SIZE
];
234 #define DLIL_TO_IFP(s) (&s->dl_if)
235 #define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
237 struct ifnet_filter
{
238 TAILQ_ENTRY(ifnet_filter
) filt_next
;
240 u_int32_t filt_flags
;
242 const char *filt_name
;
244 protocol_family_t filt_protocol
;
245 iff_input_func filt_input
;
246 iff_output_func filt_output
;
247 iff_event_func filt_event
;
248 iff_ioctl_func filt_ioctl
;
249 iff_detached_func filt_detached
;
252 struct proto_input_entry
;
254 static TAILQ_HEAD(, dlil_ifnet
) dlil_ifnet_head
;
255 static lck_grp_t
*dlil_lock_group
;
256 lck_grp_t
*ifnet_lock_group
;
257 static lck_grp_t
*ifnet_head_lock_group
;
258 static lck_grp_t
*ifnet_snd_lock_group
;
259 static lck_grp_t
*ifnet_rcv_lock_group
;
260 lck_attr_t
*ifnet_lock_attr
;
261 decl_lck_rw_data(static, ifnet_head_lock
);
262 decl_lck_mtx_data(static, dlil_ifnet_lock
);
263 u_int32_t dlil_filter_disable_tso_count
= 0;
266 static unsigned int ifnet_debug
= 1; /* debugging (enabled) */
268 static unsigned int ifnet_debug
; /* debugging (disabled) */
270 static unsigned int dlif_size
; /* size of dlil_ifnet to allocate */
271 static unsigned int dlif_bufsize
; /* size of dlif_size + headroom */
272 static struct zone
*dlif_zone
; /* zone for dlil_ifnet */
274 #define DLIF_ZONE_MAX 64 /* maximum elements in zone */
275 #define DLIF_ZONE_NAME "ifnet" /* zone name */
277 static unsigned int dlif_filt_size
; /* size of ifnet_filter */
278 static struct zone
*dlif_filt_zone
; /* zone for ifnet_filter */
280 #define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
281 #define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
283 static unsigned int dlif_phash_size
; /* size of ifnet proto hash table */
284 static struct zone
*dlif_phash_zone
; /* zone for ifnet proto hash table */
286 #define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
287 #define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
289 static unsigned int dlif_proto_size
; /* size of if_proto */
290 static struct zone
*dlif_proto_zone
; /* zone for if_proto */
292 #define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
293 #define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
295 static unsigned int dlif_tcpstat_size
; /* size of tcpstat_local to allocate */
296 static unsigned int dlif_tcpstat_bufsize
; /* size of dlif_tcpstat_size + headroom */
297 static struct zone
*dlif_tcpstat_zone
; /* zone for tcpstat_local */
299 #define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
300 #define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
302 static unsigned int dlif_udpstat_size
; /* size of udpstat_local to allocate */
303 static unsigned int dlif_udpstat_bufsize
; /* size of dlif_udpstat_size + headroom */
304 static struct zone
*dlif_udpstat_zone
; /* zone for udpstat_local */
306 #define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
307 #define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
310 * Updating this variable should be done by first acquiring the global
311 * radix node head (rnh_lock), in tandem with settting/clearing the
312 * PR_AGGDRAIN for routedomain.
314 u_int32_t ifnet_aggressive_drainers
;
315 static u_int32_t net_rtref
;
317 static struct dlil_main_threading_info dlil_main_input_thread_info
;
318 __private_extern__
struct dlil_threading_info
*dlil_main_input_thread
=
319 (struct dlil_threading_info
*)&dlil_main_input_thread_info
;
321 static int dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*msg
);
322 static int dlil_detach_filter_internal(interface_filter_t filter
, int detached
);
323 static void dlil_if_trace(struct dlil_ifnet
*, int);
324 static void if_proto_ref(struct if_proto
*);
325 static void if_proto_free(struct if_proto
*);
326 static struct if_proto
*find_attached_proto(struct ifnet
*, u_int32_t
);
327 static int dlil_ifp_proto_count(struct ifnet
*);
328 static void if_flt_monitor_busy(struct ifnet
*);
329 static void if_flt_monitor_unbusy(struct ifnet
*);
330 static void if_flt_monitor_enter(struct ifnet
*);
331 static void if_flt_monitor_leave(struct ifnet
*);
332 static int dlil_interface_filters_input(struct ifnet
*, struct mbuf
**,
333 char **, protocol_family_t
);
334 static int dlil_interface_filters_output(struct ifnet
*, struct mbuf
**,
336 static struct ifaddr
*dlil_alloc_lladdr(struct ifnet
*,
337 const struct sockaddr_dl
*);
338 static int ifnet_lookup(struct ifnet
*);
339 static void if_purgeaddrs(struct ifnet
*);
341 static errno_t
ifproto_media_input_v1(struct ifnet
*, protocol_family_t
,
342 struct mbuf
*, char *);
343 static errno_t
ifproto_media_input_v2(struct ifnet
*, protocol_family_t
,
345 static errno_t
ifproto_media_preout(struct ifnet
*, protocol_family_t
,
346 mbuf_t
*, const struct sockaddr
*, void *, char *, char *);
347 static void ifproto_media_event(struct ifnet
*, protocol_family_t
,
348 const struct kev_msg
*);
349 static errno_t
ifproto_media_ioctl(struct ifnet
*, protocol_family_t
,
350 unsigned long, void *);
351 static errno_t
ifproto_media_resolve_multi(ifnet_t
, const struct sockaddr
*,
352 struct sockaddr_dl
*, size_t);
353 static errno_t
ifproto_media_send_arp(struct ifnet
*, u_short
,
354 const struct sockaddr_dl
*, const struct sockaddr
*,
355 const struct sockaddr_dl
*, const struct sockaddr
*);
357 static errno_t
ifp_if_output(struct ifnet
*, struct mbuf
*);
358 static void ifp_if_start(struct ifnet
*);
359 static void ifp_if_input_poll(struct ifnet
*, u_int32_t
, u_int32_t
,
360 struct mbuf
**, struct mbuf
**, u_int32_t
*, u_int32_t
*);
361 static errno_t
ifp_if_ctl(struct ifnet
*, ifnet_ctl_cmd_t
, u_int32_t
, void *);
362 static errno_t
ifp_if_demux(struct ifnet
*, struct mbuf
*, char *,
363 protocol_family_t
*);
364 static errno_t
ifp_if_add_proto(struct ifnet
*, protocol_family_t
,
365 const struct ifnet_demux_desc
*, u_int32_t
);
366 static errno_t
ifp_if_del_proto(struct ifnet
*, protocol_family_t
);
367 static errno_t
ifp_if_check_multi(struct ifnet
*, const struct sockaddr
*);
368 static errno_t
ifp_if_framer(struct ifnet
*, struct mbuf
**,
369 const struct sockaddr
*, const char *, const char *);
370 static errno_t
ifp_if_framer_extended(struct ifnet
*, struct mbuf
**,
371 const struct sockaddr
*, const char *, const char *,
372 u_int32_t
*, u_int32_t
*);
373 static errno_t
ifp_if_set_bpf_tap(struct ifnet
*, bpf_tap_mode
, bpf_packet_func
);
374 static void ifp_if_free(struct ifnet
*);
375 static void ifp_if_event(struct ifnet
*, const struct kev_msg
*);
376 static __inline
void ifp_inc_traffic_class_in(struct ifnet
*, struct mbuf
*);
377 static __inline
void ifp_inc_traffic_class_out(struct ifnet
*, struct mbuf
*);
379 static void dlil_main_input_thread_func(void *, wait_result_t
);
380 static void dlil_input_thread_func(void *, wait_result_t
);
381 static void dlil_rxpoll_input_thread_func(void *, wait_result_t
);
382 static int dlil_create_input_thread(ifnet_t
, struct dlil_threading_info
*);
383 static void dlil_terminate_input_thread(struct dlil_threading_info
*);
384 static void dlil_input_stats_add(const struct ifnet_stat_increment_param
*,
385 struct dlil_threading_info
*, boolean_t
);
386 static void dlil_input_stats_sync(struct ifnet
*, struct dlil_threading_info
*);
387 static void dlil_input_packet_list_common(struct ifnet
*, struct mbuf
*,
388 u_int32_t
, ifnet_model_t
, boolean_t
);
389 static errno_t
ifnet_input_common(struct ifnet
*, struct mbuf
*, struct mbuf
*,
390 const struct ifnet_stat_increment_param
*, boolean_t
, boolean_t
);
393 static void dlil_verify_sum16(void);
395 static void dlil_output_cksum_dbg(struct ifnet
*, struct mbuf
*, uint32_t,
397 static void dlil_input_cksum_dbg(struct ifnet
*, struct mbuf
*, char *,
400 static void ifnet_detacher_thread_func(void *, wait_result_t
);
401 static int ifnet_detacher_thread_cont(int);
402 static void ifnet_detach_final(struct ifnet
*);
403 static void ifnet_detaching_enqueue(struct ifnet
*);
404 static struct ifnet
*ifnet_detaching_dequeue(void);
406 static void ifnet_start_thread_fn(void *, wait_result_t
);
407 static void ifnet_poll_thread_fn(void *, wait_result_t
);
408 static void ifnet_poll(struct ifnet
*);
410 static void ifp_src_route_copyout(struct ifnet
*, struct route
*);
411 static void ifp_src_route_copyin(struct ifnet
*, struct route
*);
413 static void ifp_src_route6_copyout(struct ifnet
*, struct route_in6
*);
414 static void ifp_src_route6_copyin(struct ifnet
*, struct route_in6
*);
417 static int sysctl_rxpoll SYSCTL_HANDLER_ARGS
;
418 static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
;
419 static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
;
420 static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
;
421 static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
;
422 static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
;
423 static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
;
424 static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
;
425 static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
;
426 static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
;
427 static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
;
428 static int sysctl_get_ports_used SYSCTL_HANDLER_ARGS
;
430 struct chain_len_stats tx_chain_len_stats
;
431 static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
;
433 /* The following are protected by dlil_ifnet_lock */
434 static TAILQ_HEAD(, ifnet
) ifnet_detaching_head
;
435 static u_int32_t ifnet_detaching_cnt
;
436 static void *ifnet_delayed_run
; /* wait channel for detaching thread */
438 decl_lck_mtx_data(static, ifnet_fc_lock
);
440 static uint32_t ifnet_flowhash_seed
;
442 struct ifnet_flowhash_key
{
443 char ifk_name
[IFNAMSIZ
];
447 uint32_t ifk_capabilities
;
448 uint32_t ifk_capenable
;
449 uint32_t ifk_output_sched_model
;
454 /* Flow control entry per interface */
455 struct ifnet_fc_entry
{
456 RB_ENTRY(ifnet_fc_entry
) ifce_entry
;
457 u_int32_t ifce_flowhash
;
458 struct ifnet
*ifce_ifp
;
461 static uint32_t ifnet_calc_flowhash(struct ifnet
*);
462 static int ifce_cmp(const struct ifnet_fc_entry
*,
463 const struct ifnet_fc_entry
*);
464 static int ifnet_fc_add(struct ifnet
*);
465 static struct ifnet_fc_entry
*ifnet_fc_get(u_int32_t
);
466 static void ifnet_fc_entry_free(struct ifnet_fc_entry
*);
468 /* protected by ifnet_fc_lock */
469 RB_HEAD(ifnet_fc_tree
, ifnet_fc_entry
) ifnet_fc_tree
;
470 RB_PROTOTYPE(ifnet_fc_tree
, ifnet_fc_entry
, ifce_entry
, ifce_cmp
);
471 RB_GENERATE(ifnet_fc_tree
, ifnet_fc_entry
, ifce_entry
, ifce_cmp
);
473 static unsigned int ifnet_fc_zone_size
; /* sizeof ifnet_fc_entry */
474 static struct zone
*ifnet_fc_zone
; /* ifnet_fc_entry zone */
476 #define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
477 #define IFNET_FC_ZONE_MAX 32
479 extern void bpfdetach(struct ifnet
*);
480 extern void proto_input_run(void);
482 extern uint32_t udp_count_opportunistic(unsigned int ifindex
,
484 extern uint32_t tcp_count_opportunistic(unsigned int ifindex
,
487 __private_extern__
void link_rtrequest(int, struct rtentry
*, struct sockaddr
*);
490 int dlil_lladdr_ckreq
= 0;
494 int dlil_verbose
= 1;
496 int dlil_verbose
= 0;
498 #if IFNET_INPUT_SANITY_CHK
499 /* sanity checking of input packet lists received */
500 static u_int32_t dlil_input_sanity_check
= 0;
501 #endif /* IFNET_INPUT_SANITY_CHK */
502 /* rate limit debug messages */
503 struct timespec dlil_dbgrate
= { 1, 0 };
505 SYSCTL_DECL(_net_link_generic_system
);
508 SYSCTL_INT(_net_link_generic_system
, OID_AUTO
, dlil_lladdr_ckreq
,
509 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_lladdr_ckreq
, 0,
510 "Require MACF system info check to expose link-layer address");
513 SYSCTL_INT(_net_link_generic_system
, OID_AUTO
, dlil_verbose
,
514 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_verbose
, 0, "Log DLIL error messages");
516 #define IF_SNDQ_MINLEN 32
517 u_int32_t if_sndq_maxlen
= IFQ_MAXLEN
;
518 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, sndq_maxlen
,
519 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_sndq_maxlen
, IFQ_MAXLEN
,
520 sysctl_sndq_maxlen
, "I", "Default transmit queue max length");
522 #define IF_RCVQ_MINLEN 32
523 #define IF_RCVQ_MAXLEN 256
524 u_int32_t if_rcvq_maxlen
= IF_RCVQ_MAXLEN
;
525 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rcvq_maxlen
,
526 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rcvq_maxlen
, IFQ_MAXLEN
,
527 sysctl_rcvq_maxlen
, "I", "Default receive queue max length");
529 #define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
530 static u_int32_t if_rxpoll_decay
= IF_RXPOLL_DECAY
;
531 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_decay
,
532 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_decay
, IF_RXPOLL_DECAY
,
533 "ilog2 of EWMA decay rate of avg inbound packets");
535 #define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
536 #define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
537 static u_int64_t if_rxpoll_mode_holdtime
= IF_RXPOLL_MODE_HOLDTIME
;
538 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_freeze_time
,
539 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_mode_holdtime
,
540 IF_RXPOLL_MODE_HOLDTIME
, sysctl_rxpoll_mode_holdtime
,
541 "Q", "input poll mode freeze time");
543 #define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
544 #define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
545 static u_int64_t if_rxpoll_sample_holdtime
= IF_RXPOLL_SAMPLETIME
;
546 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_sample_time
,
547 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_sample_holdtime
,
548 IF_RXPOLL_SAMPLETIME
, sysctl_rxpoll_sample_holdtime
,
549 "Q", "input poll sampling time");
551 #define IF_RXPOLL_INTERVALTIME_MIN (1ULL * 1000) /* 1 us */
552 #define IF_RXPOLL_INTERVALTIME (1ULL * 1000 * 1000) /* 1 ms */
553 static u_int64_t if_rxpoll_interval_time
= IF_RXPOLL_INTERVALTIME
;
554 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_interval_time
,
555 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_interval_time
,
556 IF_RXPOLL_INTERVALTIME
, sysctl_rxpoll_interval_time
,
557 "Q", "input poll interval (time)");
559 #define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
560 static u_int32_t if_rxpoll_interval_pkts
= IF_RXPOLL_INTERVAL_PKTS
;
561 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_interval_pkts
,
562 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_interval_pkts
,
563 IF_RXPOLL_INTERVAL_PKTS
, "input poll interval (packets)");
565 #define IF_RXPOLL_WLOWAT 10
566 static u_int32_t if_rxpoll_wlowat
= IF_RXPOLL_WLOWAT
;
567 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_wakeups_lowat
,
568 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_wlowat
,
569 IF_RXPOLL_WLOWAT
, sysctl_rxpoll_wlowat
,
570 "I", "input poll wakeup low watermark");
572 #define IF_RXPOLL_WHIWAT 100
573 static u_int32_t if_rxpoll_whiwat
= IF_RXPOLL_WHIWAT
;
574 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll_wakeups_hiwat
,
575 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_whiwat
,
576 IF_RXPOLL_WHIWAT
, sysctl_rxpoll_whiwat
,
577 "I", "input poll wakeup high watermark");
579 static u_int32_t if_rxpoll_max
= 0; /* 0 (automatic) */
580 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, rxpoll_max
,
581 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll_max
, 0,
582 "max packets per poll call");
584 static u_int32_t if_rxpoll
= 1;
585 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, rxpoll
,
586 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_rxpoll
, 0,
587 sysctl_rxpoll
, "I", "enable opportunistic input polling");
589 u_int32_t if_bw_smoothing_val
= 3;
590 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, if_bw_smoothing_val
,
591 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_bw_smoothing_val
, 0, "");
593 u_int32_t if_bw_measure_size
= 10;
594 SYSCTL_INT(_net_link_generic_system
, OID_AUTO
, if_bw_measure_size
,
595 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_bw_measure_size
, 0, "");
597 static u_int32_t cur_dlil_input_threads
= 0;
598 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, dlil_input_threads
,
599 CTLFLAG_RD
| CTLFLAG_LOCKED
, &cur_dlil_input_threads
, 0,
600 "Current number of DLIL input threads");
602 #if IFNET_INPUT_SANITY_CHK
603 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, dlil_input_sanity_check
,
604 CTLFLAG_RW
| CTLFLAG_LOCKED
, &dlil_input_sanity_check
, 0,
605 "Turn on sanity checking in DLIL input");
606 #endif /* IFNET_INPUT_SANITY_CHK */
608 static u_int32_t if_flowadv
= 1;
609 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, flow_advisory
,
610 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_flowadv
, 1,
611 "enable flow-advisory mechanism");
613 static u_int32_t if_delaybased_queue
= 1;
614 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, delaybased_queue
,
615 CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_delaybased_queue
, 1,
616 "enable delay based dynamic queue sizing");
618 static uint64_t hwcksum_in_invalidated
= 0;
619 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
620 hwcksum_in_invalidated
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
621 &hwcksum_in_invalidated
, "inbound packets with invalidated hardware cksum");
623 uint32_t hwcksum_dbg
= 0;
624 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg
,
625 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg
, 0,
626 "enable hardware cksum debugging");
628 u_int32_t ifnet_start_delayed
= 0;
629 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, start_delayed
,
630 CTLFLAG_RW
| CTLFLAG_LOCKED
, &ifnet_start_delayed
, 0,
631 "number of times start was delayed");
633 u_int32_t ifnet_delay_start_disabled
= 0;
634 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, start_delay_disabled
,
635 CTLFLAG_RW
| CTLFLAG_LOCKED
, &ifnet_delay_start_disabled
, 0,
636 "number of times start was delayed");
638 #define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
639 #define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
640 #define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
641 #define HWCKSUM_DBG_MASK \
642 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
643 HWCKSUM_DBG_FINALIZE_FORCED)
645 static uint32_t hwcksum_dbg_mode
= 0;
646 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg_mode
,
647 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg_mode
,
648 0, sysctl_hwcksum_dbg_mode
, "I", "hardware cksum debugging mode");
650 static uint64_t hwcksum_dbg_partial_forced
= 0;
651 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
652 hwcksum_dbg_partial_forced
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
653 &hwcksum_dbg_partial_forced
, "packets forced using partial cksum");
655 static uint64_t hwcksum_dbg_partial_forced_bytes
= 0;
656 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
657 hwcksum_dbg_partial_forced_bytes
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
658 &hwcksum_dbg_partial_forced_bytes
, "bytes forced using partial cksum");
660 static uint32_t hwcksum_dbg_partial_rxoff_forced
= 0;
661 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
,
662 hwcksum_dbg_partial_rxoff_forced
, CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
663 &hwcksum_dbg_partial_rxoff_forced
, 0,
664 sysctl_hwcksum_dbg_partial_rxoff_forced
, "I",
665 "forced partial cksum rx offset");
667 static uint32_t hwcksum_dbg_partial_rxoff_adj
= 0;
668 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, hwcksum_dbg_partial_rxoff_adj
,
669 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_dbg_partial_rxoff_adj
,
670 0, sysctl_hwcksum_dbg_partial_rxoff_adj
, "I",
671 "adjusted partial cksum rx offset");
673 static uint64_t hwcksum_dbg_verified
= 0;
674 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
675 hwcksum_dbg_verified
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
676 &hwcksum_dbg_verified
, "packets verified for having good checksum");
678 static uint64_t hwcksum_dbg_bad_cksum
= 0;
679 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
680 hwcksum_dbg_bad_cksum
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
681 &hwcksum_dbg_bad_cksum
, "packets with bad hardware calculated checksum");
683 static uint64_t hwcksum_dbg_bad_rxoff
= 0;
684 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
685 hwcksum_dbg_bad_rxoff
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
686 &hwcksum_dbg_bad_rxoff
, "packets with invalid rxoff");
688 static uint64_t hwcksum_dbg_adjusted
= 0;
689 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
690 hwcksum_dbg_adjusted
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
691 &hwcksum_dbg_adjusted
, "packets with rxoff adjusted");
693 static uint64_t hwcksum_dbg_finalized_hdr
= 0;
694 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
695 hwcksum_dbg_finalized_hdr
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
696 &hwcksum_dbg_finalized_hdr
, "finalized headers");
698 static uint64_t hwcksum_dbg_finalized_data
= 0;
699 SYSCTL_QUAD(_net_link_generic_system
, OID_AUTO
,
700 hwcksum_dbg_finalized_data
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
701 &hwcksum_dbg_finalized_data
, "finalized payloads");
703 uint32_t hwcksum_tx
= 1;
704 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_tx
,
705 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_tx
, 0,
706 "enable transmit hardware checksum offload");
708 uint32_t hwcksum_rx
= 1;
709 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, hwcksum_rx
,
710 CTLFLAG_RW
| CTLFLAG_LOCKED
, &hwcksum_rx
, 0,
711 "enable receive hardware checksum offload");
713 SYSCTL_PROC(_net_link_generic_system
, OID_AUTO
, tx_chain_len_stats
,
714 CTLFLAG_RD
| CTLFLAG_LOCKED
, 0, 9,
715 sysctl_tx_chain_len_stats
, "S", "");
717 uint32_t tx_chain_len_count
= 0;
718 SYSCTL_UINT(_net_link_generic_system
, OID_AUTO
, tx_chain_len_count
,
719 CTLFLAG_RW
| CTLFLAG_LOCKED
, &tx_chain_len_count
, 0,
722 SYSCTL_NODE(_net_link_generic_system
, OID_AUTO
, get_ports_used
,
723 CTLFLAG_RD
| CTLFLAG_LOCKED
, sysctl_get_ports_used
, "");
725 unsigned int net_rxpoll
= 1;
726 unsigned int net_affinity
= 1;
727 static kern_return_t
dlil_affinity_set(struct thread
*, u_int32_t
);
729 extern u_int32_t inject_buckets
;
731 static lck_grp_attr_t
*dlil_grp_attributes
= NULL
;
732 static lck_attr_t
*dlil_lck_attributes
= NULL
;
735 #define DLIL_INPUT_CHECK(m, ifp) { \
736 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
737 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
738 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
739 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
744 #define DLIL_EWMA(old, new, decay) do { \
746 if ((_avg = (old)) > 0) \
747 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
753 #define MBPS (1ULL * 1000 * 1000)
754 #define GBPS (MBPS * 1000)
756 struct rxpoll_time_tbl
{
757 u_int64_t speed
; /* downlink speed */
758 u_int32_t plowat
; /* packets low watermark */
759 u_int32_t phiwat
; /* packets high watermark */
760 u_int32_t blowat
; /* bytes low watermark */
761 u_int32_t bhiwat
; /* bytes high watermark */
764 static struct rxpoll_time_tbl rxpoll_tbl
[] = {
765 { 10 * MBPS
, 2, 8, (1 * 1024), (6 * 1024) },
766 { 100 * MBPS
, 10, 40, (4 * 1024), (64 * 1024) },
767 { 1 * GBPS
, 10, 40, (4 * 1024), (64 * 1024) },
768 { 10 * GBPS
, 10, 40, (4 * 1024), (64 * 1024) },
769 { 100 * GBPS
, 10, 40, (4 * 1024), (64 * 1024) },
774 proto_hash_value(u_int32_t protocol_family
)
777 * dlil_proto_unplumb_all() depends on the mapping between
778 * the hash bucket index and the protocol family defined
779 * here; future changes must be applied there as well.
781 switch(protocol_family
) {
795 * Caller must already be holding ifnet lock.
797 static struct if_proto
*
798 find_attached_proto(struct ifnet
*ifp
, u_int32_t protocol_family
)
800 struct if_proto
*proto
= NULL
;
801 u_int32_t i
= proto_hash_value(protocol_family
);
803 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_OWNED
);
805 if (ifp
->if_proto_hash
!= NULL
)
806 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
808 while (proto
!= NULL
&& proto
->protocol_family
!= protocol_family
)
809 proto
= SLIST_NEXT(proto
, next_hash
);
818 if_proto_ref(struct if_proto
*proto
)
820 atomic_add_32(&proto
->refcount
, 1);
823 extern void if_rtproto_del(struct ifnet
*ifp
, int protocol
);
826 if_proto_free(struct if_proto
*proto
)
829 struct ifnet
*ifp
= proto
->ifp
;
830 u_int32_t proto_family
= proto
->protocol_family
;
831 struct kev_dl_proto_data ev_pr_data
;
833 oldval
= atomic_add_32_ov(&proto
->refcount
, -1);
837 /* No more reference on this, protocol must have been detached */
838 VERIFY(proto
->detached
);
840 if (proto
->proto_kpi
== kProtoKPI_v1
) {
841 if (proto
->kpi
.v1
.detached
)
842 proto
->kpi
.v1
.detached(ifp
, proto
->protocol_family
);
844 if (proto
->proto_kpi
== kProtoKPI_v2
) {
845 if (proto
->kpi
.v2
.detached
)
846 proto
->kpi
.v2
.detached(ifp
, proto
->protocol_family
);
850 * Cleanup routes that may still be in the routing table for that
851 * interface/protocol pair.
853 if_rtproto_del(ifp
, proto_family
);
856 * The reserved field carries the number of protocol still attached
857 * (subject to change)
859 ifnet_lock_shared(ifp
);
860 ev_pr_data
.proto_family
= proto_family
;
861 ev_pr_data
.proto_remaining_count
= dlil_ifp_proto_count(ifp
);
862 ifnet_lock_done(ifp
);
864 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_DETACHED
,
865 (struct net_event_data
*)&ev_pr_data
,
866 sizeof(struct kev_dl_proto_data
));
868 zfree(dlif_proto_zone
, proto
);
871 __private_extern__
void
872 ifnet_lock_assert(struct ifnet
*ifp
, ifnet_lock_assert_t what
)
874 unsigned int type
= 0;
878 case IFNET_LCK_ASSERT_EXCLUSIVE
:
879 type
= LCK_RW_ASSERT_EXCLUSIVE
;
882 case IFNET_LCK_ASSERT_SHARED
:
883 type
= LCK_RW_ASSERT_SHARED
;
886 case IFNET_LCK_ASSERT_OWNED
:
887 type
= LCK_RW_ASSERT_HELD
;
890 case IFNET_LCK_ASSERT_NOTOWNED
:
891 /* nothing to do here for RW lock; bypass assert */
896 panic("bad ifnet assert type: %d", what
);
900 lck_rw_assert(&ifp
->if_lock
, type
);
903 __private_extern__
void
904 ifnet_lock_shared(struct ifnet
*ifp
)
906 lck_rw_lock_shared(&ifp
->if_lock
);
909 __private_extern__
void
910 ifnet_lock_exclusive(struct ifnet
*ifp
)
912 lck_rw_lock_exclusive(&ifp
->if_lock
);
915 __private_extern__
void
916 ifnet_lock_done(struct ifnet
*ifp
)
918 lck_rw_done(&ifp
->if_lock
);
922 __private_extern__
void
923 if_inetdata_lock_shared(struct ifnet
*ifp
)
925 lck_rw_lock_shared(&ifp
->if_inetdata_lock
);
928 __private_extern__
void
929 if_inetdata_lock_exclusive(struct ifnet
*ifp
)
931 lck_rw_lock_exclusive(&ifp
->if_inetdata_lock
);
934 __private_extern__
void
935 if_inetdata_lock_done(struct ifnet
*ifp
)
937 lck_rw_done(&ifp
->if_inetdata_lock
);
942 __private_extern__
void
943 if_inet6data_lock_shared(struct ifnet
*ifp
)
945 lck_rw_lock_shared(&ifp
->if_inet6data_lock
);
948 __private_extern__
void
949 if_inet6data_lock_exclusive(struct ifnet
*ifp
)
951 lck_rw_lock_exclusive(&ifp
->if_inet6data_lock
);
954 __private_extern__
void
955 if_inet6data_lock_done(struct ifnet
*ifp
)
957 lck_rw_done(&ifp
->if_inet6data_lock
);
961 __private_extern__
void
962 ifnet_head_lock_shared(void)
964 lck_rw_lock_shared(&ifnet_head_lock
);
967 __private_extern__
void
968 ifnet_head_lock_exclusive(void)
970 lck_rw_lock_exclusive(&ifnet_head_lock
);
973 __private_extern__
void
974 ifnet_head_done(void)
976 lck_rw_done(&ifnet_head_lock
);
980 * Caller must already be holding ifnet lock.
983 dlil_ifp_proto_count(struct ifnet
* ifp
)
987 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_OWNED
);
989 if (ifp
->if_proto_hash
== NULL
)
992 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
993 struct if_proto
*proto
;
994 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
], next_hash
) {
1002 __private_extern__
void
1003 dlil_post_msg(struct ifnet
*ifp
, u_int32_t event_subclass
,
1004 u_int32_t event_code
, struct net_event_data
*event_data
,
1005 u_int32_t event_data_len
)
1007 struct net_event_data ev_data
;
1008 struct kev_msg ev_msg
;
1010 bzero(&ev_msg
, sizeof (ev_msg
));
1011 bzero(&ev_data
, sizeof (ev_data
));
1013 * a net event always starts with a net_event_data structure
1014 * but the caller can generate a simple net event or
1015 * provide a longer event structure to post
1017 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
1018 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
1019 ev_msg
.kev_subclass
= event_subclass
;
1020 ev_msg
.event_code
= event_code
;
1022 if (event_data
== NULL
) {
1023 event_data
= &ev_data
;
1024 event_data_len
= sizeof(struct net_event_data
);
1027 strlcpy(&event_data
->if_name
[0], ifp
->if_name
, IFNAMSIZ
);
1028 event_data
->if_family
= ifp
->if_family
;
1029 event_data
->if_unit
= (u_int32_t
) ifp
->if_unit
;
1031 ev_msg
.dv
[0].data_length
= event_data_len
;
1032 ev_msg
.dv
[0].data_ptr
= event_data
;
1033 ev_msg
.dv
[1].data_length
= 0;
1035 dlil_event_internal(ifp
, &ev_msg
);
1038 __private_extern__
int
1039 dlil_alloc_local_stats(struct ifnet
*ifp
)
1042 void *buf
, *base
, **pbuf
;
1047 if (ifp
->if_tcp_stat
== NULL
&& ifp
->if_udp_stat
== NULL
) {
1048 /* allocate tcpstat_local structure */
1049 buf
= zalloc(dlif_tcpstat_zone
);
1054 bzero(buf
, dlif_tcpstat_bufsize
);
1056 /* Get the 64-bit aligned base address for this object */
1057 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof (u_int64_t
),
1058 sizeof (u_int64_t
));
1059 VERIFY(((intptr_t)base
+ dlif_tcpstat_size
) <=
1060 ((intptr_t)buf
+ dlif_tcpstat_bufsize
));
1063 * Wind back a pointer size from the aligned base and
1064 * save the original address so we can free it later.
1066 pbuf
= (void **)((intptr_t)base
- sizeof (void *));
1068 ifp
->if_tcp_stat
= base
;
1070 /* allocate udpstat_local structure */
1071 buf
= zalloc(dlif_udpstat_zone
);
1076 bzero(buf
, dlif_udpstat_bufsize
);
1078 /* Get the 64-bit aligned base address for this object */
1079 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof (u_int64_t
),
1080 sizeof (u_int64_t
));
1081 VERIFY(((intptr_t)base
+ dlif_udpstat_size
) <=
1082 ((intptr_t)buf
+ dlif_udpstat_bufsize
));
1085 * Wind back a pointer size from the aligned base and
1086 * save the original address so we can free it later.
1088 pbuf
= (void **)((intptr_t)base
- sizeof (void *));
1090 ifp
->if_udp_stat
= base
;
1092 VERIFY(IS_P2ALIGNED(ifp
->if_tcp_stat
, sizeof (u_int64_t
)) &&
1093 IS_P2ALIGNED(ifp
->if_udp_stat
, sizeof (u_int64_t
)));
1098 if (ifp
->if_ipv4_stat
== NULL
) {
1099 MALLOC(ifp
->if_ipv4_stat
, struct if_tcp_ecn_stat
*,
1100 sizeof(struct if_tcp_ecn_stat
), M_TEMP
, M_WAITOK
|M_ZERO
);
1101 if (ifp
->if_ipv4_stat
== NULL
) {
1107 if (ifp
->if_ipv6_stat
== NULL
) {
1108 MALLOC(ifp
->if_ipv6_stat
, struct if_tcp_ecn_stat
*,
1109 sizeof(struct if_tcp_ecn_stat
), M_TEMP
, M_WAITOK
|M_ZERO
);
1110 if (ifp
->if_ipv6_stat
== NULL
) {
1117 if (ifp
->if_tcp_stat
!= NULL
) {
1119 ((intptr_t)ifp
->if_tcp_stat
- sizeof (void *));
1120 zfree(dlif_tcpstat_zone
, *pbuf
);
1121 ifp
->if_tcp_stat
= NULL
;
1123 if (ifp
->if_udp_stat
!= NULL
) {
1125 ((intptr_t)ifp
->if_udp_stat
- sizeof (void *));
1126 zfree(dlif_udpstat_zone
, *pbuf
);
1127 ifp
->if_udp_stat
= NULL
;
1129 if (ifp
->if_ipv4_stat
!= NULL
) {
1130 FREE(ifp
->if_ipv4_stat
, M_TEMP
);
1131 ifp
->if_ipv4_stat
= NULL
;
1133 if (ifp
->if_ipv6_stat
!= NULL
) {
1134 FREE(ifp
->if_ipv6_stat
, M_TEMP
);
1135 ifp
->if_ipv6_stat
= NULL
;
1143 dlil_create_input_thread(ifnet_t ifp
, struct dlil_threading_info
*inp
)
1145 thread_continue_t func
;
1149 /* NULL ifp indicates the main input thread, called at dlil_init time */
1151 func
= dlil_main_input_thread_func
;
1152 VERIFY(inp
== dlil_main_input_thread
);
1153 (void) strlcat(inp
->input_name
,
1154 "main_input", DLIL_THREADNAME_LEN
);
1155 } else if (net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) {
1156 func
= dlil_rxpoll_input_thread_func
;
1157 VERIFY(inp
!= dlil_main_input_thread
);
1158 (void) snprintf(inp
->input_name
, DLIL_THREADNAME_LEN
,
1159 "%s_input_poll", if_name(ifp
));
1161 func
= dlil_input_thread_func
;
1162 VERIFY(inp
!= dlil_main_input_thread
);
1163 (void) snprintf(inp
->input_name
, DLIL_THREADNAME_LEN
,
1164 "%s_input", if_name(ifp
));
1166 VERIFY(inp
->input_thr
== THREAD_NULL
);
1168 inp
->lck_grp
= lck_grp_alloc_init(inp
->input_name
, dlil_grp_attributes
);
1169 lck_mtx_init(&inp
->input_lck
, inp
->lck_grp
, dlil_lck_attributes
);
1171 inp
->mode
= IFNET_MODEL_INPUT_POLL_OFF
;
1172 inp
->ifp
= ifp
; /* NULL for main input thread */
1174 net_timerclear(&inp
->mode_holdtime
);
1175 net_timerclear(&inp
->mode_lasttime
);
1176 net_timerclear(&inp
->sample_holdtime
);
1177 net_timerclear(&inp
->sample_lasttime
);
1178 net_timerclear(&inp
->dbg_lasttime
);
1181 * For interfaces that support opportunistic polling, set the
1182 * low and high watermarks for outstanding inbound packets/bytes.
1183 * Also define freeze times for transitioning between modes
1184 * and updating the average.
1186 if (ifp
!= NULL
&& net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) {
1187 limit
= MAX(if_rcvq_maxlen
, IF_RCVQ_MINLEN
);
1188 (void) dlil_rxpoll_set_params(ifp
, NULL
, FALSE
);
1190 limit
= (u_int32_t
)-1;
1193 _qinit(&inp
->rcvq_pkts
, Q_DROPTAIL
, limit
);
1194 if (inp
== dlil_main_input_thread
) {
1195 struct dlil_main_threading_info
*inpm
=
1196 (struct dlil_main_threading_info
*)inp
;
1197 _qinit(&inpm
->lo_rcvq_pkts
, Q_DROPTAIL
, limit
);
1200 error
= kernel_thread_start(func
, inp
, &inp
->input_thr
);
1201 if (error
== KERN_SUCCESS
) {
1202 ml_thread_policy(inp
->input_thr
, MACHINE_GROUP
,
1203 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_NETISR
));
1205 * We create an affinity set so that the matching workloop
1206 * thread or the starter thread (for loopback) can be
1207 * scheduled on the same processor set as the input thread.
1210 struct thread
*tp
= inp
->input_thr
;
1213 * Randomize to reduce the probability
1214 * of affinity tag namespace collision.
1216 read_random(&tag
, sizeof (tag
));
1217 if (dlil_affinity_set(tp
, tag
) == KERN_SUCCESS
) {
1218 thread_reference(tp
);
1220 inp
->net_affinity
= TRUE
;
1223 } else if (inp
== dlil_main_input_thread
) {
1224 panic_plain("%s: couldn't create main input thread", __func__
);
1227 panic_plain("%s: couldn't create %s input thread", __func__
,
1231 OSAddAtomic(1, &cur_dlil_input_threads
);
1237 dlil_terminate_input_thread(struct dlil_threading_info
*inp
)
1241 VERIFY(current_thread() == inp
->input_thr
);
1242 VERIFY(inp
!= dlil_main_input_thread
);
1244 OSAddAtomic(-1, &cur_dlil_input_threads
);
1246 lck_mtx_destroy(&inp
->input_lck
, inp
->lck_grp
);
1247 lck_grp_free(inp
->lck_grp
);
1249 inp
->input_waiting
= 0;
1251 bzero(inp
->input_name
, sizeof (inp
->input_name
));
1254 VERIFY(qhead(&inp
->rcvq_pkts
) == NULL
&& qempty(&inp
->rcvq_pkts
));
1255 qlimit(&inp
->rcvq_pkts
) = 0;
1256 bzero(&inp
->stats
, sizeof (inp
->stats
));
1258 VERIFY(!inp
->net_affinity
);
1259 inp
->input_thr
= THREAD_NULL
;
1260 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
1261 VERIFY(inp
->poll_thr
== THREAD_NULL
);
1262 VERIFY(inp
->tag
== 0);
1264 inp
->mode
= IFNET_MODEL_INPUT_POLL_OFF
;
1265 bzero(&inp
->tstats
, sizeof (inp
->tstats
));
1266 bzero(&inp
->pstats
, sizeof (inp
->pstats
));
1267 bzero(&inp
->sstats
, sizeof (inp
->sstats
));
1269 net_timerclear(&inp
->mode_holdtime
);
1270 net_timerclear(&inp
->mode_lasttime
);
1271 net_timerclear(&inp
->sample_holdtime
);
1272 net_timerclear(&inp
->sample_lasttime
);
1273 net_timerclear(&inp
->dbg_lasttime
);
1275 #if IFNET_INPUT_SANITY_CHK
1276 inp
->input_mbuf_cnt
= 0;
1277 #endif /* IFNET_INPUT_SANITY_CHK */
1280 printf("%s: input thread terminated\n",
1284 /* for the extra refcnt from kernel_thread_start() */
1285 thread_deallocate(current_thread());
1287 /* this is the end */
1288 thread_terminate(current_thread());
1292 static kern_return_t
1293 dlil_affinity_set(struct thread
*tp
, u_int32_t tag
)
1295 thread_affinity_policy_data_t policy
;
1297 bzero(&policy
, sizeof (policy
));
1298 policy
.affinity_tag
= tag
;
1299 return (thread_policy_set(tp
, THREAD_AFFINITY_POLICY
,
1300 (thread_policy_t
)&policy
, THREAD_AFFINITY_POLICY_COUNT
));
1306 thread_t thread
= THREAD_NULL
;
1309 * The following fields must be 64-bit aligned for atomic operations.
1311 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets
);
1312 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors
)
1313 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets
);
1314 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors
);
1315 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions
);
1316 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes
);
1317 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes
);
1318 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts
);
1319 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts
);
1320 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops
);
1321 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto
);
1322 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs
);
1323 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes
);
1324 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets
);
1325 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes
);
1327 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets
);
1328 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors
)
1329 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets
);
1330 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors
);
1331 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions
);
1332 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes
);
1333 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes
);
1334 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts
);
1335 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts
);
1336 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops
);
1337 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto
);
1338 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs
);
1339 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes
);
1340 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets
);
1341 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes
);
1344 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1346 _CASSERT(IF_HWASSIST_CSUM_IP
== IFNET_CSUM_IP
);
1347 _CASSERT(IF_HWASSIST_CSUM_TCP
== IFNET_CSUM_TCP
);
1348 _CASSERT(IF_HWASSIST_CSUM_UDP
== IFNET_CSUM_UDP
);
1349 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS
== IFNET_CSUM_FRAGMENT
);
1350 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT
== IFNET_IP_FRAGMENT
);
1351 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6
== IFNET_CSUM_TCPIPV6
);
1352 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6
== IFNET_CSUM_UDPIPV6
);
1353 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6
== IFNET_IPV6_FRAGMENT
);
1354 _CASSERT(IF_HWASSIST_CSUM_PARTIAL
== IFNET_CSUM_PARTIAL
);
1355 _CASSERT(IF_HWASSIST_VLAN_TAGGING
== IFNET_VLAN_TAGGING
);
1356 _CASSERT(IF_HWASSIST_VLAN_MTU
== IFNET_VLAN_MTU
);
1357 _CASSERT(IF_HWASSIST_TSO_V4
== IFNET_TSO_IPV4
);
1358 _CASSERT(IF_HWASSIST_TSO_V6
== IFNET_TSO_IPV6
);
1361 * ... as well as the mbuf checksum flags counterparts.
1363 _CASSERT(CSUM_IP
== IF_HWASSIST_CSUM_IP
);
1364 _CASSERT(CSUM_TCP
== IF_HWASSIST_CSUM_TCP
);
1365 _CASSERT(CSUM_UDP
== IF_HWASSIST_CSUM_UDP
);
1366 _CASSERT(CSUM_IP_FRAGS
== IF_HWASSIST_CSUM_IP_FRAGS
);
1367 _CASSERT(CSUM_FRAGMENT
== IF_HWASSIST_CSUM_FRAGMENT
);
1368 _CASSERT(CSUM_TCPIPV6
== IF_HWASSIST_CSUM_TCPIPV6
);
1369 _CASSERT(CSUM_UDPIPV6
== IF_HWASSIST_CSUM_UDPIPV6
);
1370 _CASSERT(CSUM_FRAGMENT_IPV6
== IF_HWASSIST_CSUM_FRAGMENT_IPV6
);
1371 _CASSERT(CSUM_PARTIAL
== IF_HWASSIST_CSUM_PARTIAL
);
1372 _CASSERT(CSUM_VLAN_TAG_VALID
== IF_HWASSIST_VLAN_TAGGING
);
1375 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1377 _CASSERT(IF_LLREACH_MAXLEN
<= IF_LLREACHINFO_ADDRLEN
);
1378 _CASSERT(IFNET_LLREACHINFO_ADDRLEN
== IF_LLREACHINFO_ADDRLEN
);
1380 _CASSERT(IFRLOGF_DLIL
== IFNET_LOGF_DLIL
);
1381 _CASSERT(IFRLOGF_FAMILY
== IFNET_LOGF_FAMILY
);
1382 _CASSERT(IFRLOGF_DRIVER
== IFNET_LOGF_DRIVER
);
1383 _CASSERT(IFRLOGF_FIRMWARE
== IFNET_LOGF_FIRMWARE
);
1385 _CASSERT(IFRLOGCAT_CONNECTIVITY
== IFNET_LOGCAT_CONNECTIVITY
);
1386 _CASSERT(IFRLOGCAT_QUALITY
== IFNET_LOGCAT_QUALITY
);
1387 _CASSERT(IFRLOGCAT_PERFORMANCE
== IFNET_LOGCAT_PERFORMANCE
);
1389 _CASSERT(IFRTYPE_FAMILY_ANY
== IFNET_FAMILY_ANY
);
1390 _CASSERT(IFRTYPE_FAMILY_LOOPBACK
== IFNET_FAMILY_LOOPBACK
);
1391 _CASSERT(IFRTYPE_FAMILY_ETHERNET
== IFNET_FAMILY_ETHERNET
);
1392 _CASSERT(IFRTYPE_FAMILY_SLIP
== IFNET_FAMILY_SLIP
);
1393 _CASSERT(IFRTYPE_FAMILY_TUN
== IFNET_FAMILY_TUN
);
1394 _CASSERT(IFRTYPE_FAMILY_VLAN
== IFNET_FAMILY_VLAN
);
1395 _CASSERT(IFRTYPE_FAMILY_PPP
== IFNET_FAMILY_PPP
);
1396 _CASSERT(IFRTYPE_FAMILY_PVC
== IFNET_FAMILY_PVC
);
1397 _CASSERT(IFRTYPE_FAMILY_DISC
== IFNET_FAMILY_DISC
);
1398 _CASSERT(IFRTYPE_FAMILY_MDECAP
== IFNET_FAMILY_MDECAP
);
1399 _CASSERT(IFRTYPE_FAMILY_GIF
== IFNET_FAMILY_GIF
);
1400 _CASSERT(IFRTYPE_FAMILY_FAITH
== IFNET_FAMILY_FAITH
);
1401 _CASSERT(IFRTYPE_FAMILY_STF
== IFNET_FAMILY_STF
);
1402 _CASSERT(IFRTYPE_FAMILY_FIREWIRE
== IFNET_FAMILY_FIREWIRE
);
1403 _CASSERT(IFRTYPE_FAMILY_BOND
== IFNET_FAMILY_BOND
);
1404 _CASSERT(IFRTYPE_FAMILY_CELLULAR
== IFNET_FAMILY_CELLULAR
);
1406 _CASSERT(IFRTYPE_SUBFAMILY_ANY
== IFNET_SUBFAMILY_ANY
);
1407 _CASSERT(IFRTYPE_SUBFAMILY_USB
== IFNET_SUBFAMILY_USB
);
1408 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH
== IFNET_SUBFAMILY_BLUETOOTH
);
1409 _CASSERT(IFRTYPE_SUBFAMILY_WIFI
== IFNET_SUBFAMILY_WIFI
);
1410 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT
== IFNET_SUBFAMILY_THUNDERBOLT
);
1411 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED
== IFNET_SUBFAMILY_RESERVED
);
1413 _CASSERT(DLIL_MODIDLEN
== IFNET_MODIDLEN
);
1414 _CASSERT(DLIL_MODARGLEN
== IFNET_MODARGLEN
);
1416 PE_parse_boot_argn("net_affinity", &net_affinity
,
1417 sizeof (net_affinity
));
1419 PE_parse_boot_argn("net_rxpoll", &net_rxpoll
, sizeof (net_rxpoll
));
1421 PE_parse_boot_argn("net_rtref", &net_rtref
, sizeof (net_rtref
));
1423 PE_parse_boot_argn("ifnet_debug", &ifnet_debug
, sizeof (ifnet_debug
));
1425 dlif_size
= (ifnet_debug
== 0) ? sizeof (struct dlil_ifnet
) :
1426 sizeof (struct dlil_ifnet_dbg
);
1427 /* Enforce 64-bit alignment for dlil_ifnet structure */
1428 dlif_bufsize
= dlif_size
+ sizeof (void *) + sizeof (u_int64_t
);
1429 dlif_bufsize
= P2ROUNDUP(dlif_bufsize
, sizeof (u_int64_t
));
1430 dlif_zone
= zinit(dlif_bufsize
, DLIF_ZONE_MAX
* dlif_bufsize
,
1432 if (dlif_zone
== NULL
) {
1433 panic_plain("%s: failed allocating %s", __func__
,
1437 zone_change(dlif_zone
, Z_EXPAND
, TRUE
);
1438 zone_change(dlif_zone
, Z_CALLERACCT
, FALSE
);
1440 dlif_filt_size
= sizeof (struct ifnet_filter
);
1441 dlif_filt_zone
= zinit(dlif_filt_size
,
1442 DLIF_FILT_ZONE_MAX
* dlif_filt_size
, 0, DLIF_FILT_ZONE_NAME
);
1443 if (dlif_filt_zone
== NULL
) {
1444 panic_plain("%s: failed allocating %s", __func__
,
1445 DLIF_FILT_ZONE_NAME
);
1448 zone_change(dlif_filt_zone
, Z_EXPAND
, TRUE
);
1449 zone_change(dlif_filt_zone
, Z_CALLERACCT
, FALSE
);
1451 dlif_phash_size
= sizeof (struct proto_hash_entry
) * PROTO_HASH_SLOTS
;
1452 dlif_phash_zone
= zinit(dlif_phash_size
,
1453 DLIF_PHASH_ZONE_MAX
* dlif_phash_size
, 0, DLIF_PHASH_ZONE_NAME
);
1454 if (dlif_phash_zone
== NULL
) {
1455 panic_plain("%s: failed allocating %s", __func__
,
1456 DLIF_PHASH_ZONE_NAME
);
1459 zone_change(dlif_phash_zone
, Z_EXPAND
, TRUE
);
1460 zone_change(dlif_phash_zone
, Z_CALLERACCT
, FALSE
);
1462 dlif_proto_size
= sizeof (struct if_proto
);
1463 dlif_proto_zone
= zinit(dlif_proto_size
,
1464 DLIF_PROTO_ZONE_MAX
* dlif_proto_size
, 0, DLIF_PROTO_ZONE_NAME
);
1465 if (dlif_proto_zone
== NULL
) {
1466 panic_plain("%s: failed allocating %s", __func__
,
1467 DLIF_PROTO_ZONE_NAME
);
1470 zone_change(dlif_proto_zone
, Z_EXPAND
, TRUE
);
1471 zone_change(dlif_proto_zone
, Z_CALLERACCT
, FALSE
);
1473 dlif_tcpstat_size
= sizeof (struct tcpstat_local
);
1474 /* Enforce 64-bit alignment for tcpstat_local structure */
1475 dlif_tcpstat_bufsize
=
1476 dlif_tcpstat_size
+ sizeof (void *) + sizeof (u_int64_t
);
1477 dlif_tcpstat_bufsize
=
1478 P2ROUNDUP(dlif_tcpstat_bufsize
, sizeof (u_int64_t
));
1479 dlif_tcpstat_zone
= zinit(dlif_tcpstat_bufsize
,
1480 DLIF_TCPSTAT_ZONE_MAX
* dlif_tcpstat_bufsize
, 0,
1481 DLIF_TCPSTAT_ZONE_NAME
);
1482 if (dlif_tcpstat_zone
== NULL
) {
1483 panic_plain("%s: failed allocating %s", __func__
,
1484 DLIF_TCPSTAT_ZONE_NAME
);
1487 zone_change(dlif_tcpstat_zone
, Z_EXPAND
, TRUE
);
1488 zone_change(dlif_tcpstat_zone
, Z_CALLERACCT
, FALSE
);
1490 dlif_udpstat_size
= sizeof (struct udpstat_local
);
1491 /* Enforce 64-bit alignment for udpstat_local structure */
1492 dlif_udpstat_bufsize
=
1493 dlif_udpstat_size
+ sizeof (void *) + sizeof (u_int64_t
);
1494 dlif_udpstat_bufsize
=
1495 P2ROUNDUP(dlif_udpstat_bufsize
, sizeof (u_int64_t
));
1496 dlif_udpstat_zone
= zinit(dlif_udpstat_bufsize
,
1497 DLIF_TCPSTAT_ZONE_MAX
* dlif_udpstat_bufsize
, 0,
1498 DLIF_UDPSTAT_ZONE_NAME
);
1499 if (dlif_udpstat_zone
== NULL
) {
1500 panic_plain("%s: failed allocating %s", __func__
,
1501 DLIF_UDPSTAT_ZONE_NAME
);
1504 zone_change(dlif_udpstat_zone
, Z_EXPAND
, TRUE
);
1505 zone_change(dlif_udpstat_zone
, Z_CALLERACCT
, FALSE
);
1507 ifnet_llreach_init();
1509 TAILQ_INIT(&dlil_ifnet_head
);
1510 TAILQ_INIT(&ifnet_head
);
1511 TAILQ_INIT(&ifnet_detaching_head
);
1513 /* Setup the lock groups we will use */
1514 dlil_grp_attributes
= lck_grp_attr_alloc_init();
1516 dlil_lock_group
= lck_grp_alloc_init("DLIL internal locks",
1517 dlil_grp_attributes
);
1518 ifnet_lock_group
= lck_grp_alloc_init("ifnet locks",
1519 dlil_grp_attributes
);
1520 ifnet_head_lock_group
= lck_grp_alloc_init("ifnet head lock",
1521 dlil_grp_attributes
);
1522 ifnet_rcv_lock_group
= lck_grp_alloc_init("ifnet rcv locks",
1523 dlil_grp_attributes
);
1524 ifnet_snd_lock_group
= lck_grp_alloc_init("ifnet snd locks",
1525 dlil_grp_attributes
);
1527 /* Setup the lock attributes we will use */
1528 dlil_lck_attributes
= lck_attr_alloc_init();
1530 ifnet_lock_attr
= lck_attr_alloc_init();
1532 lck_rw_init(&ifnet_head_lock
, ifnet_head_lock_group
,
1533 dlil_lck_attributes
);
1534 lck_mtx_init(&dlil_ifnet_lock
, dlil_lock_group
, dlil_lck_attributes
);
1536 /* Setup interface flow control related items */
1537 lck_mtx_init(&ifnet_fc_lock
, dlil_lock_group
, dlil_lck_attributes
);
1539 ifnet_fc_zone_size
= sizeof (struct ifnet_fc_entry
);
1540 ifnet_fc_zone
= zinit(ifnet_fc_zone_size
,
1541 IFNET_FC_ZONE_MAX
* ifnet_fc_zone_size
, 0, IFNET_FC_ZONE_NAME
);
1542 if (ifnet_fc_zone
== NULL
) {
1543 panic_plain("%s: failed allocating %s", __func__
,
1544 IFNET_FC_ZONE_NAME
);
1547 zone_change(ifnet_fc_zone
, Z_EXPAND
, TRUE
);
1548 zone_change(ifnet_fc_zone
, Z_CALLERACCT
, FALSE
);
1550 /* Initialize interface address subsystem */
1554 /* Initialize the packet filter */
1558 /* Initialize queue algorithms */
1561 /* Initialize packet schedulers */
1564 /* Initialize flow advisory subsystem */
1567 /* Initialize the pktap virtual interface */
1571 /* Run self-tests */
1572 dlil_verify_sum16();
1576 * Create and start up the main DLIL input thread and the interface
1577 * detacher threads once everything is initialized.
1579 dlil_create_input_thread(NULL
, dlil_main_input_thread
);
1581 if (kernel_thread_start(ifnet_detacher_thread_func
,
1582 NULL
, &thread
) != KERN_SUCCESS
) {
1583 panic_plain("%s: couldn't create detacher thread", __func__
);
1586 thread_deallocate(thread
);
1590 if_flt_monitor_busy(struct ifnet
*ifp
)
1592 lck_mtx_assert(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1595 VERIFY(ifp
->if_flt_busy
!= 0);
1599 if_flt_monitor_unbusy(struct ifnet
*ifp
)
1601 if_flt_monitor_leave(ifp
);
1605 if_flt_monitor_enter(struct ifnet
*ifp
)
1607 lck_mtx_assert(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1609 while (ifp
->if_flt_busy
) {
1610 ++ifp
->if_flt_waiters
;
1611 (void) msleep(&ifp
->if_flt_head
, &ifp
->if_flt_lock
,
1612 (PZERO
- 1), "if_flt_monitor", NULL
);
1614 if_flt_monitor_busy(ifp
);
1618 if_flt_monitor_leave(struct ifnet
*ifp
)
1620 lck_mtx_assert(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1622 VERIFY(ifp
->if_flt_busy
!= 0);
1625 if (ifp
->if_flt_busy
== 0 && ifp
->if_flt_waiters
> 0) {
1626 ifp
->if_flt_waiters
= 0;
1627 wakeup(&ifp
->if_flt_head
);
1631 __private_extern__
int
1632 dlil_attach_filter(struct ifnet
*ifp
, const struct iff_filter
*if_filter
,
1633 interface_filter_t
*filter_ref
, u_int32_t flags
)
1636 struct ifnet_filter
*filter
= NULL
;
1638 ifnet_head_lock_shared();
1639 /* Check that the interface is in the global list */
1640 if (!ifnet_lookup(ifp
)) {
1645 filter
= zalloc(dlif_filt_zone
);
1646 if (filter
== NULL
) {
1650 bzero(filter
, dlif_filt_size
);
1652 /* refcnt held above during lookup */
1653 filter
->filt_flags
= flags
;
1654 filter
->filt_ifp
= ifp
;
1655 filter
->filt_cookie
= if_filter
->iff_cookie
;
1656 filter
->filt_name
= if_filter
->iff_name
;
1657 filter
->filt_protocol
= if_filter
->iff_protocol
;
1658 filter
->filt_input
= if_filter
->iff_input
;
1659 filter
->filt_output
= if_filter
->iff_output
;
1660 filter
->filt_event
= if_filter
->iff_event
;
1661 filter
->filt_ioctl
= if_filter
->iff_ioctl
;
1662 filter
->filt_detached
= if_filter
->iff_detached
;
1664 lck_mtx_lock(&ifp
->if_flt_lock
);
1665 if_flt_monitor_enter(ifp
);
1667 lck_mtx_assert(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
1668 TAILQ_INSERT_TAIL(&ifp
->if_flt_head
, filter
, filt_next
);
1670 if_flt_monitor_leave(ifp
);
1671 lck_mtx_unlock(&ifp
->if_flt_lock
);
1673 *filter_ref
= filter
;
1676 * Bump filter count and route_generation ID to let TCP
1677 * know it shouldn't do TSO on this connection
1679 if ((filter
->filt_flags
& DLIL_IFF_TSO
) == 0) {
1680 OSAddAtomic(1, &dlil_filter_disable_tso_count
);
1681 routegenid_update();
1684 printf("%s: %s filter attached\n", if_name(ifp
),
1685 if_filter
->iff_name
);
1689 if (retval
!= 0 && ifp
!= NULL
) {
1690 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1691 if_name(ifp
), if_filter
->iff_name
, retval
);
1693 if (retval
!= 0 && filter
!= NULL
)
1694 zfree(dlif_filt_zone
, filter
);
1700 dlil_detach_filter_internal(interface_filter_t filter
, int detached
)
1704 if (detached
== 0) {
1707 ifnet_head_lock_shared();
1708 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
1709 interface_filter_t entry
= NULL
;
1711 lck_mtx_lock(&ifp
->if_flt_lock
);
1712 TAILQ_FOREACH(entry
, &ifp
->if_flt_head
, filt_next
) {
1713 if (entry
!= filter
|| entry
->filt_skip
)
1716 * We've found a match; since it's possible
1717 * that the thread gets blocked in the monitor,
1718 * we do the lock dance. Interface should
1719 * not be detached since we still have a use
1720 * count held during filter attach.
1722 entry
->filt_skip
= 1; /* skip input/output */
1723 lck_mtx_unlock(&ifp
->if_flt_lock
);
1726 lck_mtx_lock(&ifp
->if_flt_lock
);
1727 if_flt_monitor_enter(ifp
);
1728 lck_mtx_assert(&ifp
->if_flt_lock
,
1729 LCK_MTX_ASSERT_OWNED
);
1731 /* Remove the filter from the list */
1732 TAILQ_REMOVE(&ifp
->if_flt_head
, filter
,
1735 if_flt_monitor_leave(ifp
);
1736 lck_mtx_unlock(&ifp
->if_flt_lock
);
1738 printf("%s: %s filter detached\n",
1739 if_name(ifp
), filter
->filt_name
);
1743 lck_mtx_unlock(&ifp
->if_flt_lock
);
1747 /* filter parameter is not a valid filter ref */
1753 printf("%s filter detached\n", filter
->filt_name
);
1757 /* Call the detached function if there is one */
1758 if (filter
->filt_detached
)
1759 filter
->filt_detached(filter
->filt_cookie
, filter
->filt_ifp
);
1761 /* Free the filter */
1762 zfree(dlif_filt_zone
, filter
);
1765 * Decrease filter count and route_generation ID to let TCP
1766 * know it should reevalute doing TSO or not
1768 if ((filter
->filt_flags
& DLIL_IFF_TSO
) == 0) {
1769 OSAddAtomic(-1, &dlil_filter_disable_tso_count
);
1770 routegenid_update();
1774 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1775 filter
->filt_name
, retval
);
1780 __private_extern__
void
1781 dlil_detach_filter(interface_filter_t filter
)
1785 dlil_detach_filter_internal(filter
, 0);
1789 * Main input thread:
1791 * a) handles all inbound packets for lo0
1792 * b) handles all inbound packets for interfaces with no dedicated
1793 * input thread (e.g. anything but Ethernet/PDP or those that support
1794 * opportunistic polling.)
1795 * c) protocol registrations
1796 * d) packet injections
1799 dlil_main_input_thread_func(void *v
, wait_result_t w
)
1802 struct dlil_main_threading_info
*inpm
= v
;
1803 struct dlil_threading_info
*inp
= v
;
1805 VERIFY(inp
== dlil_main_input_thread
);
1806 VERIFY(inp
->ifp
== NULL
);
1807 VERIFY(inp
->mode
== IFNET_MODEL_INPUT_POLL_OFF
);
1810 struct mbuf
*m
= NULL
, *m_loop
= NULL
;
1811 u_int32_t m_cnt
, m_cnt_loop
;
1812 boolean_t proto_req
;
1814 lck_mtx_lock_spin(&inp
->input_lck
);
1816 /* Wait until there is work to be done */
1817 while (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
1818 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
1819 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
1820 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
1823 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
1824 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
1826 /* Main input thread cannot be terminated */
1827 VERIFY(!(inp
->input_waiting
& DLIL_INPUT_TERMINATE
));
1829 proto_req
= (inp
->input_waiting
&
1830 (DLIL_PROTO_WAITING
| DLIL_PROTO_REGISTER
));
1832 /* Packets for non-dedicated interfaces other than lo0 */
1833 m_cnt
= qlen(&inp
->rcvq_pkts
);
1834 m
= _getq_all(&inp
->rcvq_pkts
);
1836 /* Packets exclusive to lo0 */
1837 m_cnt_loop
= qlen(&inpm
->lo_rcvq_pkts
);
1838 m_loop
= _getq_all(&inpm
->lo_rcvq_pkts
);
1842 lck_mtx_unlock(&inp
->input_lck
);
1845 * NOTE warning %%% attention !!!!
1846 * We should think about putting some thread starvation
1847 * safeguards if we deal with long chains of packets.
1850 dlil_input_packet_list_extended(lo_ifp
, m_loop
,
1851 m_cnt_loop
, inp
->mode
);
1854 dlil_input_packet_list_extended(NULL
, m
,
1862 VERIFY(0); /* we should never get here */
1866 * Input thread for interfaces with legacy input model.
1869 dlil_input_thread_func(void *v
, wait_result_t w
)
1872 struct dlil_threading_info
*inp
= v
;
1873 struct ifnet
*ifp
= inp
->ifp
;
1875 VERIFY(inp
!= dlil_main_input_thread
);
1876 VERIFY(ifp
!= NULL
);
1877 VERIFY(!(ifp
->if_eflags
& IFEF_RXPOLL
) || !net_rxpoll
);
1878 VERIFY(inp
->mode
== IFNET_MODEL_INPUT_POLL_OFF
);
1881 struct mbuf
*m
= NULL
;
1884 lck_mtx_lock_spin(&inp
->input_lck
);
1886 /* Wait until there is work to be done */
1887 while (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
1888 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
1889 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
1890 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
1893 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
1894 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
1897 * Protocol registration and injection must always use
1898 * the main input thread; in theory the latter can utilize
1899 * the corresponding input thread where the packet arrived
1900 * on, but that requires our knowing the interface in advance
1901 * (and the benefits might not worth the trouble.)
1903 VERIFY(!(inp
->input_waiting
&
1904 (DLIL_PROTO_WAITING
|DLIL_PROTO_REGISTER
)));
1906 /* Packets for this interface */
1907 m_cnt
= qlen(&inp
->rcvq_pkts
);
1908 m
= _getq_all(&inp
->rcvq_pkts
);
1910 if (inp
->input_waiting
& DLIL_INPUT_TERMINATE
) {
1911 lck_mtx_unlock(&inp
->input_lck
);
1913 /* Free up pending packets */
1917 dlil_terminate_input_thread(inp
);
1924 dlil_input_stats_sync(ifp
, inp
);
1926 lck_mtx_unlock(&inp
->input_lck
);
1929 * NOTE warning %%% attention !!!!
1930 * We should think about putting some thread starvation
1931 * safeguards if we deal with long chains of packets.
1934 dlil_input_packet_list_extended(NULL
, m
,
1939 VERIFY(0); /* we should never get here */
1943 * Input thread for interfaces with opportunistic polling input model.
1946 dlil_rxpoll_input_thread_func(void *v
, wait_result_t w
)
1949 struct dlil_threading_info
*inp
= v
;
1950 struct ifnet
*ifp
= inp
->ifp
;
1953 VERIFY(inp
!= dlil_main_input_thread
);
1954 VERIFY(ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_RXPOLL
));
1957 struct mbuf
*m
= NULL
;
1958 u_int32_t m_cnt
, m_size
, poll_req
= 0;
1960 struct timespec now
, delta
;
1963 lck_mtx_lock_spin(&inp
->input_lck
);
1965 if ((ival
= inp
->rxpoll_ival
) < IF_RXPOLL_INTERVALTIME_MIN
)
1966 ival
= IF_RXPOLL_INTERVALTIME_MIN
;
1968 /* Link parameters changed? */
1969 if (ifp
->if_poll_update
!= 0) {
1970 ifp
->if_poll_update
= 0;
1971 (void) dlil_rxpoll_set_params(ifp
, NULL
, TRUE
);
1974 /* Current operating mode */
1977 /* Wait until there is work to be done */
1978 while (!(inp
->input_waiting
& ~DLIL_INPUT_RUNNING
)) {
1979 inp
->input_waiting
&= ~DLIL_INPUT_RUNNING
;
1980 (void) msleep(&inp
->input_waiting
, &inp
->input_lck
,
1981 (PZERO
- 1) | PSPIN
, inp
->input_name
, NULL
);
1984 inp
->input_waiting
|= DLIL_INPUT_RUNNING
;
1985 inp
->input_waiting
&= ~DLIL_INPUT_WAITING
;
1988 * Protocol registration and injection must always use
1989 * the main input thread; in theory the latter can utilize
1990 * the corresponding input thread where the packet arrived
1991 * on, but that requires our knowing the interface in advance
1992 * (and the benefits might not worth the trouble.)
1994 VERIFY(!(inp
->input_waiting
&
1995 (DLIL_PROTO_WAITING
|DLIL_PROTO_REGISTER
)));
1997 if (inp
->input_waiting
& DLIL_INPUT_TERMINATE
) {
1998 /* Free up pending packets */
1999 _flushq(&inp
->rcvq_pkts
);
2000 lck_mtx_unlock(&inp
->input_lck
);
2002 dlil_terminate_input_thread(inp
);
2007 /* Total count of all packets */
2008 m_cnt
= qlen(&inp
->rcvq_pkts
);
2010 /* Total bytes of all packets */
2011 m_size
= qsize(&inp
->rcvq_pkts
);
2013 /* Packets for this interface */
2014 m
= _getq_all(&inp
->rcvq_pkts
);
2015 VERIFY(m
!= NULL
|| m_cnt
== 0);
2018 if (!net_timerisset(&inp
->sample_lasttime
))
2019 *(&inp
->sample_lasttime
) = *(&now
);
2021 net_timersub(&now
, &inp
->sample_lasttime
, &delta
);
2022 if (if_rxpoll
&& net_timerisset(&inp
->sample_holdtime
)) {
2023 u_int32_t ptot
, btot
;
2025 /* Accumulate statistics for current sampling */
2026 PKTCNTR_ADD(&inp
->sstats
, m_cnt
, m_size
);
2028 if (net_timercmp(&delta
, &inp
->sample_holdtime
, <))
2031 *(&inp
->sample_lasttime
) = *(&now
);
2033 /* Calculate min/max of inbound bytes */
2034 btot
= (u_int32_t
)inp
->sstats
.bytes
;
2035 if (inp
->rxpoll_bmin
== 0 || inp
->rxpoll_bmin
> btot
)
2036 inp
->rxpoll_bmin
= btot
;
2037 if (btot
> inp
->rxpoll_bmax
)
2038 inp
->rxpoll_bmax
= btot
;
2040 /* Calculate EWMA of inbound bytes */
2041 DLIL_EWMA(inp
->rxpoll_bavg
, btot
, if_rxpoll_decay
);
2043 /* Calculate min/max of inbound packets */
2044 ptot
= (u_int32_t
)inp
->sstats
.packets
;
2045 if (inp
->rxpoll_pmin
== 0 || inp
->rxpoll_pmin
> ptot
)
2046 inp
->rxpoll_pmin
= ptot
;
2047 if (ptot
> inp
->rxpoll_pmax
)
2048 inp
->rxpoll_pmax
= ptot
;
2050 /* Calculate EWMA of inbound packets */
2051 DLIL_EWMA(inp
->rxpoll_pavg
, ptot
, if_rxpoll_decay
);
2053 /* Reset sampling statistics */
2054 PKTCNTR_CLEAR(&inp
->sstats
);
2056 /* Calculate EWMA of wakeup requests */
2057 DLIL_EWMA(inp
->rxpoll_wavg
, inp
->wtot
, if_rxpoll_decay
);
2061 if (!net_timerisset(&inp
->dbg_lasttime
))
2062 *(&inp
->dbg_lasttime
) = *(&now
);
2063 net_timersub(&now
, &inp
->dbg_lasttime
, &delta
);
2064 if (net_timercmp(&delta
, &dlil_dbgrate
, >=)) {
2065 *(&inp
->dbg_lasttime
) = *(&now
);
2066 printf("%s: [%s] pkts avg %d max %d "
2067 "limits [%d/%d], wreq avg %d "
2068 "limits [%d/%d], bytes avg %d "
2069 "limits [%d/%d]\n", if_name(ifp
),
2071 IFNET_MODEL_INPUT_POLL_ON
) ?
2072 "ON" : "OFF", inp
->rxpoll_pavg
,
2081 inp
->rxpoll_bhiwat
);
2085 /* Perform mode transition, if necessary */
2086 if (!net_timerisset(&inp
->mode_lasttime
))
2087 *(&inp
->mode_lasttime
) = *(&now
);
2089 net_timersub(&now
, &inp
->mode_lasttime
, &delta
);
2090 if (net_timercmp(&delta
, &inp
->mode_holdtime
, <))
2093 if (inp
->rxpoll_pavg
<= inp
->rxpoll_plowat
&&
2094 inp
->rxpoll_bavg
<= inp
->rxpoll_blowat
&&
2095 inp
->mode
!= IFNET_MODEL_INPUT_POLL_OFF
) {
2096 mode
= IFNET_MODEL_INPUT_POLL_OFF
;
2097 } else if (inp
->rxpoll_pavg
>= inp
->rxpoll_phiwat
&&
2098 (inp
->rxpoll_bavg
>= inp
->rxpoll_bhiwat
||
2099 inp
->rxpoll_wavg
>= inp
->rxpoll_whiwat
) &&
2100 inp
->mode
!= IFNET_MODEL_INPUT_POLL_ON
) {
2101 mode
= IFNET_MODEL_INPUT_POLL_ON
;
2104 if (mode
!= inp
->mode
) {
2106 *(&inp
->mode_lasttime
) = *(&now
);
2111 dlil_input_stats_sync(ifp
, inp
);
2113 lck_mtx_unlock(&inp
->input_lck
);
2116 * If there's a mode change and interface is still attached,
2117 * perform a downcall to the driver for the new mode. Also
2118 * hold an IO refcnt on the interface to prevent it from
2119 * being detached (will be release below.)
2121 if (poll_req
!= 0 && ifnet_is_attached(ifp
, 1)) {
2122 struct ifnet_model_params p
= { mode
, { 0 } };
2126 printf("%s: polling is now %s, "
2127 "pkts avg %d max %d limits [%d/%d], "
2128 "wreq avg %d limits [%d/%d], "
2129 "bytes avg %d limits [%d/%d]\n",
2131 (mode
== IFNET_MODEL_INPUT_POLL_ON
) ?
2132 "ON" : "OFF", inp
->rxpoll_pavg
,
2133 inp
->rxpoll_pmax
, inp
->rxpoll_plowat
,
2134 inp
->rxpoll_phiwat
, inp
->rxpoll_wavg
,
2135 inp
->rxpoll_wlowat
, inp
->rxpoll_whiwat
,
2136 inp
->rxpoll_bavg
, inp
->rxpoll_blowat
,
2137 inp
->rxpoll_bhiwat
);
2140 if ((err
= ((*ifp
->if_input_ctl
)(ifp
,
2141 IFNET_CTL_SET_INPUT_MODEL
, sizeof (p
), &p
))) != 0) {
2142 printf("%s: error setting polling mode "
2143 "to %s (%d)\n", if_name(ifp
),
2144 (mode
== IFNET_MODEL_INPUT_POLL_ON
) ?
2149 case IFNET_MODEL_INPUT_POLL_OFF
:
2150 ifnet_set_poll_cycle(ifp
, NULL
);
2151 inp
->rxpoll_offreq
++;
2153 inp
->rxpoll_offerr
++;
2156 case IFNET_MODEL_INPUT_POLL_ON
:
2157 net_nsectimer(&ival
, &ts
);
2158 ifnet_set_poll_cycle(ifp
, &ts
);
2160 inp
->rxpoll_onreq
++;
2162 inp
->rxpoll_onerr
++;
2170 /* Release the IO refcnt */
2171 ifnet_decr_iorefcnt(ifp
);
2175 * NOTE warning %%% attention !!!!
2176 * We should think about putting some thread starvation
2177 * safeguards if we deal with long chains of packets.
2180 dlil_input_packet_list_extended(NULL
, m
, m_cnt
, mode
);
2184 VERIFY(0); /* we should never get here */
2188 * Must be called on an attached ifnet (caller is expected to check.)
2189 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2192 dlil_rxpoll_set_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
,
2195 struct dlil_threading_info
*inp
;
2196 u_int64_t sample_holdtime
, inbw
;
2198 VERIFY(ifp
!= NULL
);
2199 if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || (inp
= ifp
->if_inp
) == NULL
)
2203 if ((p
->packets_lowat
== 0 && p
->packets_hiwat
!= 0) ||
2204 (p
->packets_lowat
!= 0 && p
->packets_hiwat
== 0))
2206 if (p
->packets_lowat
!= 0 && /* hiwat must be non-zero */
2207 p
->packets_lowat
>= p
->packets_hiwat
)
2209 if ((p
->bytes_lowat
== 0 && p
->bytes_hiwat
!= 0) ||
2210 (p
->bytes_lowat
!= 0 && p
->bytes_hiwat
== 0))
2212 if (p
->bytes_lowat
!= 0 && /* hiwat must be non-zero */
2213 p
->bytes_lowat
>= p
->bytes_hiwat
)
2215 if (p
->interval_time
!= 0 &&
2216 p
->interval_time
< IF_RXPOLL_INTERVALTIME_MIN
)
2217 p
->interval_time
= IF_RXPOLL_INTERVALTIME_MIN
;
2221 lck_mtx_lock(&inp
->input_lck
);
2223 lck_mtx_assert(&inp
->input_lck
, LCK_MTX_ASSERT_OWNED
);
2226 * Normally, we'd reset the parameters to the auto-tuned values
2227 * if the the input thread detects a change in link rate. If the
2228 * driver provides its own parameters right after a link rate
2229 * changes, but before the input thread gets to run, we want to
2230 * make sure to keep the driver's values. Clearing if_poll_update
2231 * will achieve that.
2233 if (p
!= NULL
&& !locked
&& ifp
->if_poll_update
!= 0)
2234 ifp
->if_poll_update
= 0;
2236 if ((inbw
= ifnet_input_linkrate(ifp
)) == 0 && p
== NULL
) {
2237 sample_holdtime
= 0; /* polling is disabled */
2238 inp
->rxpoll_wlowat
= inp
->rxpoll_plowat
=
2239 inp
->rxpoll_blowat
= 0;
2240 inp
->rxpoll_whiwat
= inp
->rxpoll_phiwat
=
2241 inp
->rxpoll_bhiwat
= (u_int32_t
)-1;
2242 inp
->rxpoll_plim
= 0;
2243 inp
->rxpoll_ival
= IF_RXPOLL_INTERVALTIME_MIN
;
2245 u_int32_t plowat
, phiwat
, blowat
, bhiwat
, plim
;
2249 for (n
= 0, i
= 0; rxpoll_tbl
[i
].speed
!= 0; i
++) {
2250 if (inbw
< rxpoll_tbl
[i
].speed
)
2254 /* auto-tune if caller didn't specify a value */
2255 plowat
= ((p
== NULL
|| p
->packets_lowat
== 0) ?
2256 rxpoll_tbl
[n
].plowat
: p
->packets_lowat
);
2257 phiwat
= ((p
== NULL
|| p
->packets_hiwat
== 0) ?
2258 rxpoll_tbl
[n
].phiwat
: p
->packets_hiwat
);
2259 blowat
= ((p
== NULL
|| p
->bytes_lowat
== 0) ?
2260 rxpoll_tbl
[n
].blowat
: p
->bytes_lowat
);
2261 bhiwat
= ((p
== NULL
|| p
->bytes_hiwat
== 0) ?
2262 rxpoll_tbl
[n
].bhiwat
: p
->bytes_hiwat
);
2263 plim
= ((p
== NULL
|| p
->packets_limit
== 0) ?
2264 if_rxpoll_max
: p
->packets_limit
);
2265 ival
= ((p
== NULL
|| p
->interval_time
== 0) ?
2266 if_rxpoll_interval_time
: p
->interval_time
);
2268 VERIFY(plowat
!= 0 && phiwat
!= 0);
2269 VERIFY(blowat
!= 0 && bhiwat
!= 0);
2270 VERIFY(ival
>= IF_RXPOLL_INTERVALTIME_MIN
);
2272 sample_holdtime
= if_rxpoll_sample_holdtime
;
2273 inp
->rxpoll_wlowat
= if_rxpoll_wlowat
;
2274 inp
->rxpoll_whiwat
= if_rxpoll_whiwat
;
2275 inp
->rxpoll_plowat
= plowat
;
2276 inp
->rxpoll_phiwat
= phiwat
;
2277 inp
->rxpoll_blowat
= blowat
;
2278 inp
->rxpoll_bhiwat
= bhiwat
;
2279 inp
->rxpoll_plim
= plim
;
2280 inp
->rxpoll_ival
= ival
;
2283 net_nsectimer(&if_rxpoll_mode_holdtime
, &inp
->mode_holdtime
);
2284 net_nsectimer(&sample_holdtime
, &inp
->sample_holdtime
);
2287 printf("%s: speed %llu bps, sample per %llu nsec, "
2288 "poll interval %llu nsec, pkts per poll %u, "
2289 "pkt limits [%u/%u], wreq limits [%u/%u], "
2290 "bytes limits [%u/%u]\n", if_name(ifp
),
2291 inbw
, sample_holdtime
, inp
->rxpoll_ival
, inp
->rxpoll_plim
,
2292 inp
->rxpoll_plowat
, inp
->rxpoll_phiwat
, inp
->rxpoll_wlowat
,
2293 inp
->rxpoll_whiwat
, inp
->rxpoll_blowat
, inp
->rxpoll_bhiwat
);
2297 lck_mtx_unlock(&inp
->input_lck
);
2303 * Must be called on an attached ifnet (caller is expected to check.)
2306 dlil_rxpoll_get_params(struct ifnet
*ifp
, struct ifnet_poll_params
*p
)
2308 struct dlil_threading_info
*inp
;
2310 VERIFY(ifp
!= NULL
&& p
!= NULL
);
2311 if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || (inp
= ifp
->if_inp
) == NULL
)
2314 bzero(p
, sizeof (*p
));
2316 lck_mtx_lock(&inp
->input_lck
);
2317 p
->packets_limit
= inp
->rxpoll_plim
;
2318 p
->packets_lowat
= inp
->rxpoll_plowat
;
2319 p
->packets_hiwat
= inp
->rxpoll_phiwat
;
2320 p
->bytes_lowat
= inp
->rxpoll_blowat
;
2321 p
->bytes_hiwat
= inp
->rxpoll_bhiwat
;
2322 p
->interval_time
= inp
->rxpoll_ival
;
2323 lck_mtx_unlock(&inp
->input_lck
);
2329 ifnet_input(struct ifnet
*ifp
, struct mbuf
*m_head
,
2330 const struct ifnet_stat_increment_param
*s
)
2332 return (ifnet_input_common(ifp
, m_head
, NULL
, s
, FALSE
, FALSE
));
2336 ifnet_input_extended(struct ifnet
*ifp
, struct mbuf
*m_head
,
2337 struct mbuf
*m_tail
, const struct ifnet_stat_increment_param
*s
)
2339 return (ifnet_input_common(ifp
, m_head
, m_tail
, s
, TRUE
, FALSE
));
2343 ifnet_input_common(struct ifnet
*ifp
, struct mbuf
*m_head
, struct mbuf
*m_tail
,
2344 const struct ifnet_stat_increment_param
*s
, boolean_t ext
, boolean_t poll
)
2346 struct thread
*tp
= current_thread();
2348 struct dlil_threading_info
*inp
;
2349 u_int32_t m_cnt
= 0, m_size
= 0;
2351 if ((m_head
== NULL
&& !poll
) || (s
== NULL
&& ext
)) {
2353 mbuf_freem_list(m_head
);
2357 VERIFY(m_head
!= NULL
|| (s
== NULL
&& m_tail
== NULL
&& !ext
&& poll
));
2358 VERIFY(m_tail
== NULL
|| ext
);
2359 VERIFY(s
!= NULL
|| !ext
);
2362 * Drop the packet(s) if the parameters are invalid, or if the
2363 * interface is no longer attached; else hold an IO refcnt to
2364 * prevent it from being detached (will be released below.)
2366 if (ifp
== NULL
|| (ifp
!= lo_ifp
&& !ifnet_is_attached(ifp
, 1))) {
2368 mbuf_freem_list(m_head
);
2372 if (m_tail
== NULL
) {
2374 while (m_head
!= NULL
) {
2375 #if IFNET_INPUT_SANITY_CHK
2376 if (dlil_input_sanity_check
!= 0)
2377 DLIL_INPUT_CHECK(last
, ifp
);
2378 #endif /* IFNET_INPUT_SANITY_CHK */
2380 m_size
+= m_length(last
);
2381 if (mbuf_nextpkt(last
) == NULL
)
2383 last
= mbuf_nextpkt(last
);
2387 #if IFNET_INPUT_SANITY_CHK
2388 if (dlil_input_sanity_check
!= 0) {
2391 DLIL_INPUT_CHECK(last
, ifp
);
2393 m_size
+= m_length(last
);
2394 if (mbuf_nextpkt(last
) == NULL
)
2396 last
= mbuf_nextpkt(last
);
2399 m_cnt
= s
->packets_in
;
2400 m_size
= s
->bytes_in
;
2404 m_cnt
= s
->packets_in
;
2405 m_size
= s
->bytes_in
;
2407 #endif /* IFNET_INPUT_SANITY_CHK */
2410 if (last
!= m_tail
) {
2411 panic_plain("%s: invalid input packet chain for %s, "
2412 "tail mbuf %p instead of %p\n", __func__
, if_name(ifp
),
2417 * Assert packet count only for the extended variant, for backwards
2418 * compatibility, since this came directly from the device driver.
2419 * Relax this assertion for input bytes, as the driver may have
2420 * included the link-layer headers in the computation; hence
2421 * m_size is just an approximation.
2423 if (ext
&& s
->packets_in
!= m_cnt
) {
2424 panic_plain("%s: input packet count mismatch for %s, "
2425 "%d instead of %d\n", __func__
, if_name(ifp
),
2426 s
->packets_in
, m_cnt
);
2429 if ((inp
= ifp
->if_inp
) == NULL
)
2430 inp
= dlil_main_input_thread
;
2433 * If there is a matching DLIL input thread associated with an
2434 * affinity set, associate this thread with the same set. We
2435 * will only do this once.
2437 lck_mtx_lock_spin(&inp
->input_lck
);
2438 if (inp
!= dlil_main_input_thread
&& inp
->net_affinity
&&
2439 ((!poll
&& inp
->wloop_thr
== THREAD_NULL
) ||
2440 (poll
&& inp
->poll_thr
== THREAD_NULL
))) {
2441 u_int32_t tag
= inp
->tag
;
2444 VERIFY(inp
->poll_thr
== THREAD_NULL
);
2447 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
2448 inp
->wloop_thr
= tp
;
2450 lck_mtx_unlock(&inp
->input_lck
);
2452 /* Associate the current thread with the new affinity tag */
2453 (void) dlil_affinity_set(tp
, tag
);
2456 * Take a reference on the current thread; during detach,
2457 * we will need to refer to it in order ot tear down its
2460 thread_reference(tp
);
2461 lck_mtx_lock_spin(&inp
->input_lck
);
2464 VERIFY(m_head
!= NULL
|| (m_tail
== NULL
&& m_cnt
== 0));
2467 * Because of loopbacked multicast we cannot stuff the ifp in
2468 * the rcvif of the packet header: loopback (lo0) packets use a
2469 * dedicated list so that we can later associate them with lo_ifp
2470 * on their way up the stack. Packets for other interfaces without
2471 * dedicated input threads go to the regular list.
2473 if (m_head
!= NULL
) {
2474 if (inp
== dlil_main_input_thread
&& ifp
== lo_ifp
) {
2475 struct dlil_main_threading_info
*inpm
=
2476 (struct dlil_main_threading_info
*)inp
;
2477 _addq_multi(&inpm
->lo_rcvq_pkts
, m_head
, m_tail
,
2480 _addq_multi(&inp
->rcvq_pkts
, m_head
, m_tail
,
2485 #if IFNET_INPUT_SANITY_CHK
2486 if (dlil_input_sanity_check
!= 0) {
2490 for (m0
= m_head
, count
= 0; m0
; m0
= mbuf_nextpkt(m0
))
2493 if (count
!= m_cnt
) {
2494 panic_plain("%s: invalid packet count %d "
2495 "(expected %d)\n", if_name(ifp
),
2500 inp
->input_mbuf_cnt
+= m_cnt
;
2502 #endif /* IFNET_INPUT_SANITY_CHK */
2505 dlil_input_stats_add(s
, inp
, poll
);
2507 * If we're using the main input thread, synchronize the
2508 * stats now since we have the interface context. All
2509 * other cases involving dedicated input threads will
2510 * have their stats synchronized there.
2512 if (inp
== dlil_main_input_thread
)
2513 dlil_input_stats_sync(ifp
, inp
);
2516 inp
->input_waiting
|= DLIL_INPUT_WAITING
;
2517 if (!(inp
->input_waiting
& DLIL_INPUT_RUNNING
)) {
2519 wakeup_one((caddr_t
)&inp
->input_waiting
);
2521 lck_mtx_unlock(&inp
->input_lck
);
2523 if (ifp
!= lo_ifp
) {
2524 /* Release the IO refcnt */
2525 ifnet_decr_iorefcnt(ifp
);
2532 ifnet_start_common(struct ifnet
*ifp
, int resetfc
)
2534 if (!(ifp
->if_eflags
& IFEF_TXSTART
))
2537 * If the starter thread is inactive, signal it to do work,
2538 * unless the interface is being flow controlled from below,
2539 * e.g. a virtual interface being flow controlled by a real
2540 * network interface beneath it.
2542 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2544 ifp
->if_start_flags
&= ~IFSF_FLOW_CONTROLLED
;
2545 } else if (ifp
->if_start_flags
& IFSF_FLOW_CONTROLLED
) {
2546 lck_mtx_unlock(&ifp
->if_start_lock
);
2549 ifp
->if_start_req
++;
2550 if (!ifp
->if_start_active
&& ifp
->if_start_thread
!= THREAD_NULL
&&
2551 (resetfc
|| !(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) ||
2552 IFCQ_LEN(&ifp
->if_snd
) >= ifp
->if_start_delay_qlen
2553 || ifp
->if_start_delayed
== 0)) {
2554 wakeup_one((caddr_t
)&ifp
->if_start_thread
);
2556 lck_mtx_unlock(&ifp
->if_start_lock
);
2560 ifnet_start(struct ifnet
*ifp
)
2562 ifnet_start_common(ifp
, 0);
2566 ifnet_start_thread_fn(void *v
, wait_result_t w
)
2569 struct ifnet
*ifp
= v
;
2570 char ifname
[IFNAMSIZ
+ 1];
2571 struct timespec
*ts
= NULL
;
2572 struct ifclassq
*ifq
= &ifp
->if_snd
;
2573 struct timespec delay_start_ts
;
2576 * Treat the dedicated starter thread for lo0 as equivalent to
2577 * the driver workloop thread; if net_affinity is enabled for
2578 * the main input thread, associate this starter thread to it
2579 * by binding them with the same affinity tag. This is done
2580 * only once (as we only have one lo_ifp which never goes away.)
2582 if (ifp
== lo_ifp
) {
2583 struct dlil_threading_info
*inp
= dlil_main_input_thread
;
2584 struct thread
*tp
= current_thread();
2586 lck_mtx_lock(&inp
->input_lck
);
2587 if (inp
->net_affinity
) {
2588 u_int32_t tag
= inp
->tag
;
2590 VERIFY(inp
->wloop_thr
== THREAD_NULL
);
2591 VERIFY(inp
->poll_thr
== THREAD_NULL
);
2592 inp
->wloop_thr
= tp
;
2593 lck_mtx_unlock(&inp
->input_lck
);
2595 /* Associate this thread with the affinity tag */
2596 (void) dlil_affinity_set(tp
, tag
);
2598 lck_mtx_unlock(&inp
->input_lck
);
2602 snprintf(ifname
, sizeof (ifname
), "%s_starter",
2605 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2608 if (ifp
->if_start_thread
!= NULL
)
2609 (void) msleep(&ifp
->if_start_thread
, &ifp
->if_start_lock
,
2610 (PZERO
- 1) | PSPIN
, ifname
, ts
);
2612 /* interface is detached? */
2613 if (ifp
->if_start_thread
== THREAD_NULL
) {
2614 ifnet_set_start_cycle(ifp
, NULL
);
2615 lck_mtx_unlock(&ifp
->if_start_lock
);
2619 printf("%s: starter thread terminated\n",
2623 /* for the extra refcnt from kernel_thread_start() */
2624 thread_deallocate(current_thread());
2625 /* this is the end */
2626 thread_terminate(current_thread());
2631 ifp
->if_start_active
= 1;
2634 u_int32_t req
= ifp
->if_start_req
;
2635 if (!IFCQ_IS_EMPTY(ifq
) &&
2636 (ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) &&
2637 ifp
->if_start_delayed
== 0 &&
2638 IFCQ_LEN(ifq
) < ifp
->if_start_delay_qlen
&&
2639 (ifp
->if_eflags
& IFEF_DELAY_START
)) {
2640 ifp
->if_start_delayed
= 1;
2641 ifnet_start_delayed
++;
2644 ifp
->if_start_delayed
= 0;
2646 lck_mtx_unlock(&ifp
->if_start_lock
);
2649 * If no longer attached, don't call start because ifp
2650 * is being destroyed; else hold an IO refcnt to
2651 * prevent the interface from being detached (will be
2654 if (!ifnet_is_attached(ifp
, 1)) {
2655 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2659 /* invoke the driver's start routine */
2660 ((*ifp
->if_start
)(ifp
));
2663 * Release the io ref count taken by ifnet_is_attached.
2665 ifnet_decr_iorefcnt(ifp
);
2667 lck_mtx_lock_spin(&ifp
->if_start_lock
);
2669 /* if there's no pending request, we're done */
2670 if (req
== ifp
->if_start_req
)
2674 ifp
->if_start_req
= 0;
2675 ifp
->if_start_active
= 0;
2678 * Wakeup N ns from now if rate-controlled by TBR, and if
2679 * there are still packets in the send queue which haven't
2680 * been dequeued so far; else sleep indefinitely (ts = NULL)
2681 * until ifnet_start() is called again.
2683 ts
= ((IFCQ_TBR_IS_ENABLED(ifq
) && !IFCQ_IS_EMPTY(ifq
)) ?
2684 &ifp
->if_start_cycle
: NULL
);
2686 if (ts
== NULL
&& ifp
->if_start_delayed
== 1) {
2687 delay_start_ts
.tv_sec
= 0;
2688 delay_start_ts
.tv_nsec
= ifp
->if_start_delay_timeout
;
2689 ts
= &delay_start_ts
;
2692 if (ts
!= NULL
&& ts
->tv_sec
== 0 && ts
->tv_nsec
== 0)
2700 ifnet_set_start_cycle(struct ifnet
*ifp
, struct timespec
*ts
)
2703 bzero(&ifp
->if_start_cycle
, sizeof (ifp
->if_start_cycle
));
2705 *(&ifp
->if_start_cycle
) = *ts
;
2707 if (ts
!= NULL
&& ts
->tv_nsec
!= 0 && dlil_verbose
)
2708 printf("%s: restart interval set to %lu nsec\n",
2709 if_name(ifp
), ts
->tv_nsec
);
2713 ifnet_poll(struct ifnet
*ifp
)
2716 * If the poller thread is inactive, signal it to do work.
2718 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
2720 if (!ifp
->if_poll_active
&& ifp
->if_poll_thread
!= THREAD_NULL
) {
2721 wakeup_one((caddr_t
)&ifp
->if_poll_thread
);
2723 lck_mtx_unlock(&ifp
->if_poll_lock
);
2727 ifnet_poll_thread_fn(void *v
, wait_result_t w
)
2730 struct dlil_threading_info
*inp
;
2731 struct ifnet
*ifp
= v
;
2732 char ifname
[IFNAMSIZ
+ 1];
2733 struct timespec
*ts
= NULL
;
2734 struct ifnet_stat_increment_param s
;
2736 snprintf(ifname
, sizeof (ifname
), "%s_poller",
2738 bzero(&s
, sizeof (s
));
2740 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
2743 VERIFY(inp
!= NULL
);
2746 if (ifp
->if_poll_thread
!= THREAD_NULL
) {
2747 (void) msleep(&ifp
->if_poll_thread
, &ifp
->if_poll_lock
,
2748 (PZERO
- 1) | PSPIN
, ifname
, ts
);
2751 /* interface is detached (maybe while asleep)? */
2752 if (ifp
->if_poll_thread
== THREAD_NULL
) {
2753 ifnet_set_poll_cycle(ifp
, NULL
);
2754 lck_mtx_unlock(&ifp
->if_poll_lock
);
2757 printf("%s: poller thread terminated\n",
2761 /* for the extra refcnt from kernel_thread_start() */
2762 thread_deallocate(current_thread());
2763 /* this is the end */
2764 thread_terminate(current_thread());
2769 ifp
->if_poll_active
= 1;
2771 struct mbuf
*m_head
, *m_tail
;
2772 u_int32_t m_lim
, m_cnt
, m_totlen
;
2773 u_int16_t req
= ifp
->if_poll_req
;
2775 lck_mtx_unlock(&ifp
->if_poll_lock
);
2778 * If no longer attached, there's nothing to do;
2779 * else hold an IO refcnt to prevent the interface
2780 * from being detached (will be released below.)
2782 if (!ifnet_is_attached(ifp
, 1)) {
2783 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
2787 m_lim
= (inp
->rxpoll_plim
!= 0) ? inp
->rxpoll_plim
:
2788 MAX((qlimit(&inp
->rcvq_pkts
)),
2789 (inp
->rxpoll_phiwat
<< 2));
2791 if (dlil_verbose
> 1) {
2792 printf("%s: polling up to %d pkts, "
2793 "pkts avg %d max %d, wreq avg %d, "
2795 if_name(ifp
), m_lim
,
2796 inp
->rxpoll_pavg
, inp
->rxpoll_pmax
,
2797 inp
->rxpoll_wavg
, inp
->rxpoll_bavg
);
2800 /* invoke the driver's input poll routine */
2801 ((*ifp
->if_input_poll
)(ifp
, 0, m_lim
, &m_head
, &m_tail
,
2802 &m_cnt
, &m_totlen
));
2804 if (m_head
!= NULL
) {
2805 VERIFY(m_tail
!= NULL
&& m_cnt
> 0);
2807 if (dlil_verbose
> 1) {
2808 printf("%s: polled %d pkts, "
2809 "pkts avg %d max %d, wreq avg %d, "
2811 if_name(ifp
), m_cnt
,
2812 inp
->rxpoll_pavg
, inp
->rxpoll_pmax
,
2813 inp
->rxpoll_wavg
, inp
->rxpoll_bavg
);
2816 /* stats are required for extended variant */
2817 s
.packets_in
= m_cnt
;
2818 s
.bytes_in
= m_totlen
;
2820 (void) ifnet_input_common(ifp
, m_head
, m_tail
,
2823 if (dlil_verbose
> 1) {
2824 printf("%s: no packets, "
2825 "pkts avg %d max %d, wreq avg %d, "
2827 if_name(ifp
), inp
->rxpoll_pavg
,
2828 inp
->rxpoll_pmax
, inp
->rxpoll_wavg
,
2832 (void) ifnet_input_common(ifp
, NULL
, NULL
,
2836 /* Release the io ref count */
2837 ifnet_decr_iorefcnt(ifp
);
2839 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
2841 /* if there's no pending request, we're done */
2842 if (req
== ifp
->if_poll_req
)
2845 ifp
->if_poll_req
= 0;
2846 ifp
->if_poll_active
= 0;
2849 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
2850 * until ifnet_poll() is called again.
2852 ts
= &ifp
->if_poll_cycle
;
2853 if (ts
->tv_sec
== 0 && ts
->tv_nsec
== 0)
2861 ifnet_set_poll_cycle(struct ifnet
*ifp
, struct timespec
*ts
)
2864 bzero(&ifp
->if_poll_cycle
, sizeof (ifp
->if_poll_cycle
));
2866 *(&ifp
->if_poll_cycle
) = *ts
;
2868 if (ts
!= NULL
&& ts
->tv_nsec
!= 0 && dlil_verbose
)
2869 printf("%s: poll interval set to %lu nsec\n",
2870 if_name(ifp
), ts
->tv_nsec
);
2874 ifnet_purge(struct ifnet
*ifp
)
2876 if (ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_TXSTART
))
2881 ifnet_update_sndq(struct ifclassq
*ifq
, cqev_t ev
)
2883 IFCQ_LOCK_ASSERT_HELD(ifq
);
2885 if (!(IFCQ_IS_READY(ifq
)))
2888 if (IFCQ_TBR_IS_ENABLED(ifq
)) {
2889 struct tb_profile tb
= { ifq
->ifcq_tbr
.tbr_rate_raw
,
2890 ifq
->ifcq_tbr
.tbr_percent
, 0 };
2891 (void) ifclassq_tbr_set(ifq
, &tb
, FALSE
);
2894 ifclassq_update(ifq
, ev
);
2898 ifnet_update_rcv(struct ifnet
*ifp
, cqev_t ev
)
2901 case CLASSQ_EV_LINK_BANDWIDTH
:
2902 if (net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
))
2903 ifp
->if_poll_update
++;
2912 ifnet_set_output_sched_model(struct ifnet
*ifp
, u_int32_t model
)
2914 struct ifclassq
*ifq
;
2918 if (ifp
== NULL
|| (model
!= IFNET_SCHED_MODEL_DRIVER_MANAGED
&&
2919 model
!= IFNET_SCHED_MODEL_NORMAL
))
2921 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
2926 omodel
= ifp
->if_output_sched_model
;
2927 ifp
->if_output_sched_model
= model
;
2928 if ((err
= ifclassq_pktsched_setup(ifq
)) != 0)
2929 ifp
->if_output_sched_model
= omodel
;
2936 ifnet_set_sndq_maxlen(struct ifnet
*ifp
, u_int32_t maxqlen
)
2940 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
2943 ifclassq_set_maxlen(&ifp
->if_snd
, maxqlen
);
2949 ifnet_get_sndq_maxlen(struct ifnet
*ifp
, u_int32_t
*maxqlen
)
2951 if (ifp
== NULL
|| maxqlen
== NULL
)
2953 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
2956 *maxqlen
= ifclassq_get_maxlen(&ifp
->if_snd
);
2962 ifnet_get_sndq_len(struct ifnet
*ifp
, u_int32_t
*pkts
)
2966 if (ifp
== NULL
|| pkts
== NULL
)
2968 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
2971 err
= ifclassq_get_len(&ifp
->if_snd
, MBUF_SC_UNSPEC
,
2978 ifnet_get_service_class_sndq_len(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
2979 u_int32_t
*pkts
, u_int32_t
*bytes
)
2983 if (ifp
== NULL
|| !MBUF_VALID_SC(sc
) ||
2984 (pkts
== NULL
&& bytes
== NULL
))
2986 else if (!(ifp
->if_eflags
& IFEF_TXSTART
))
2989 err
= ifclassq_get_len(&ifp
->if_snd
, sc
, pkts
, bytes
);
2995 ifnet_set_rcvq_maxlen(struct ifnet
*ifp
, u_int32_t maxqlen
)
2997 struct dlil_threading_info
*inp
;
3001 else if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || ifp
->if_inp
== NULL
)
3005 maxqlen
= if_rcvq_maxlen
;
3006 else if (maxqlen
< IF_RCVQ_MINLEN
)
3007 maxqlen
= IF_RCVQ_MINLEN
;
3010 lck_mtx_lock(&inp
->input_lck
);
3011 qlimit(&inp
->rcvq_pkts
) = maxqlen
;
3012 lck_mtx_unlock(&inp
->input_lck
);
3018 ifnet_get_rcvq_maxlen(struct ifnet
*ifp
, u_int32_t
*maxqlen
)
3020 struct dlil_threading_info
*inp
;
3022 if (ifp
== NULL
|| maxqlen
== NULL
)
3024 else if (!(ifp
->if_eflags
& IFEF_RXPOLL
) || ifp
->if_inp
== NULL
)
3028 lck_mtx_lock(&inp
->input_lck
);
3029 *maxqlen
= qlimit(&inp
->rcvq_pkts
);
3030 lck_mtx_unlock(&inp
->input_lck
);
3035 ifnet_enqueue(struct ifnet
*ifp
, struct mbuf
*m
)
3038 struct timespec now
;
3041 if (ifp
== NULL
|| m
== NULL
|| !(m
->m_flags
& M_PKTHDR
) ||
3042 m
->m_nextpkt
!= NULL
) {
3046 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3047 !(ifp
->if_refflags
& IFRF_ATTACHED
)) {
3048 /* flag tested without lock for performance */
3051 } else if (!(ifp
->if_flags
& IFF_UP
)) {
3057 net_timernsec(&now
, &now_nsec
);
3058 m
->m_pkthdr
.pkt_enqueue_ts
= now_nsec
;
3060 if (ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) {
3062 * If the driver chose to delay start callback for
3063 * coalescing multiple packets, Then use the following
3064 * heuristics to make sure that start callback will
3065 * be delayed only when bulk data transfer is detected.
3066 * 1. number of packets enqueued in (delay_win * 2) is
3067 * greater than or equal to the delay qlen.
3068 * 2. If delay_start is enabled it will stay enabled for
3069 * another 10 idle windows. This is to take into account
3070 * variable RTT and burst traffic.
3071 * 3. If the time elapsed since last enqueue is more
3072 * than 200ms we disable delaying start callback. This is
3073 * is to take idle time into account.
3075 u_int64_t dwin
= (ifp
->if_start_delay_timeout
<< 1);
3076 if (ifp
->if_start_delay_swin
> 0) {
3077 if ((ifp
->if_start_delay_swin
+ dwin
) > now_nsec
) {
3078 ifp
->if_start_delay_cnt
++;
3079 } else if ((now_nsec
- ifp
->if_start_delay_swin
)
3080 >= (200 * 1000 * 1000)) {
3081 ifp
->if_start_delay_swin
= now_nsec
;
3082 ifp
->if_start_delay_cnt
= 1;
3083 ifp
->if_start_delay_idle
= 0;
3084 if (ifp
->if_eflags
& IFEF_DELAY_START
) {
3086 ~(IFEF_DELAY_START
);
3087 ifnet_delay_start_disabled
++;
3090 if (ifp
->if_start_delay_cnt
>=
3091 ifp
->if_start_delay_qlen
) {
3092 ifp
->if_eflags
|= IFEF_DELAY_START
;
3093 ifp
->if_start_delay_idle
= 0;
3095 if (ifp
->if_start_delay_idle
>= 10) {
3096 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3097 ifnet_delay_start_disabled
++;
3099 ifp
->if_start_delay_idle
++;
3102 ifp
->if_start_delay_swin
= now_nsec
;
3103 ifp
->if_start_delay_cnt
= 1;
3106 ifp
->if_start_delay_swin
= now_nsec
;
3107 ifp
->if_start_delay_cnt
= 1;
3108 ifp
->if_start_delay_idle
= 0;
3109 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3112 ifp
->if_eflags
&= ~(IFEF_DELAY_START
);
3115 /* enqueue the packet */
3116 error
= ifclassq_enqueue(&ifp
->if_snd
, m
);
3119 * Tell the driver to start dequeueing; do this even when the queue
3120 * for the packet is suspended (EQSUSPENDED), as the driver could still
3121 * be dequeueing from other unsuspended queues.
3123 if (!(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
) &&
3124 (error
== 0 || error
== EQFULL
|| error
== EQSUSPENDED
))
3131 ifnet_dequeue(struct ifnet
*ifp
, struct mbuf
**mp
)
3134 if (ifp
== NULL
|| mp
== NULL
)
3136 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3137 (ifp
->if_output_sched_model
!= IFNET_SCHED_MODEL_NORMAL
))
3139 if (!ifnet_is_attached(ifp
, 1))
3141 rc
= ifclassq_dequeue(&ifp
->if_snd
, 1, mp
, NULL
, NULL
, NULL
);
3142 ifnet_decr_iorefcnt(ifp
);
3148 ifnet_dequeue_service_class(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3152 if (ifp
== NULL
|| mp
== NULL
|| !MBUF_VALID_SC(sc
))
3154 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3155 (ifp
->if_output_sched_model
!= IFNET_SCHED_MODEL_DRIVER_MANAGED
))
3157 if (!ifnet_is_attached(ifp
, 1))
3160 rc
= ifclassq_dequeue_sc(&ifp
->if_snd
, sc
, 1, mp
, NULL
, NULL
, NULL
);
3161 ifnet_decr_iorefcnt(ifp
);
3166 ifnet_dequeue_multi(struct ifnet
*ifp
, u_int32_t limit
, struct mbuf
**head
,
3167 struct mbuf
**tail
, u_int32_t
*cnt
, u_int32_t
*len
)
3170 if (ifp
== NULL
|| head
== NULL
|| limit
< 1)
3172 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3173 (ifp
->if_output_sched_model
!= IFNET_SCHED_MODEL_NORMAL
))
3175 if (!ifnet_is_attached(ifp
, 1))
3178 rc
= ifclassq_dequeue(&ifp
->if_snd
, limit
, head
, tail
, cnt
, len
);
3179 ifnet_decr_iorefcnt(ifp
);
3184 ifnet_dequeue_service_class_multi(struct ifnet
*ifp
, mbuf_svc_class_t sc
,
3185 u_int32_t limit
, struct mbuf
**head
, struct mbuf
**tail
, u_int32_t
*cnt
,
3189 if (ifp
== NULL
|| head
== NULL
|| limit
< 1 || !MBUF_VALID_SC(sc
))
3191 else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
3192 (ifp
->if_output_sched_model
!= IFNET_SCHED_MODEL_DRIVER_MANAGED
))
3194 if (!ifnet_is_attached(ifp
, 1))
3196 rc
= ifclassq_dequeue_sc(&ifp
->if_snd
, sc
, limit
, head
,
3198 ifnet_decr_iorefcnt(ifp
);
3203 ifnet_framer_stub(struct ifnet
*ifp
, struct mbuf
**m
,
3204 const struct sockaddr
*dest
, const char *dest_linkaddr
,
3205 const char *frame_type
, u_int32_t
*pre
, u_int32_t
*post
)
3212 return (ifp
->if_framer_legacy(ifp
, m
, dest
, dest_linkaddr
, frame_type
));
3216 dlil_interface_filters_input(struct ifnet
*ifp
, struct mbuf
**m_p
,
3217 char **frame_header_p
, protocol_family_t protocol_family
)
3219 struct ifnet_filter
*filter
;
3222 * Pass the inbound packet to the interface filters
3224 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3225 /* prevent filter list from changing in case we drop the lock */
3226 if_flt_monitor_busy(ifp
);
3227 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
3230 if (!filter
->filt_skip
&& filter
->filt_input
!= NULL
&&
3231 (filter
->filt_protocol
== 0 ||
3232 filter
->filt_protocol
== protocol_family
)) {
3233 lck_mtx_unlock(&ifp
->if_flt_lock
);
3235 result
= (*filter
->filt_input
)(filter
->filt_cookie
,
3236 ifp
, protocol_family
, m_p
, frame_header_p
);
3238 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3240 /* we're done with the filter list */
3241 if_flt_monitor_unbusy(ifp
);
3242 lck_mtx_unlock(&ifp
->if_flt_lock
);
3247 /* we're done with the filter list */
3248 if_flt_monitor_unbusy(ifp
);
3249 lck_mtx_unlock(&ifp
->if_flt_lock
);
3252 * Strip away M_PROTO1 bit prior to sending packet up the stack as
3253 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
3256 (*m_p
)->m_flags
&= ~M_PROTO1
;
3262 dlil_interface_filters_output(struct ifnet
*ifp
, struct mbuf
**m_p
,
3263 protocol_family_t protocol_family
)
3265 struct ifnet_filter
*filter
;
3268 * Pass the outbound packet to the interface filters
3270 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3271 /* prevent filter list from changing in case we drop the lock */
3272 if_flt_monitor_busy(ifp
);
3273 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
3276 if (!filter
->filt_skip
&& filter
->filt_output
!= NULL
&&
3277 (filter
->filt_protocol
== 0 ||
3278 filter
->filt_protocol
== protocol_family
)) {
3279 lck_mtx_unlock(&ifp
->if_flt_lock
);
3281 result
= filter
->filt_output(filter
->filt_cookie
, ifp
,
3282 protocol_family
, m_p
);
3284 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3286 /* we're done with the filter list */
3287 if_flt_monitor_unbusy(ifp
);
3288 lck_mtx_unlock(&ifp
->if_flt_lock
);
3293 /* we're done with the filter list */
3294 if_flt_monitor_unbusy(ifp
);
3295 lck_mtx_unlock(&ifp
->if_flt_lock
);
3301 dlil_ifproto_input(struct if_proto
* ifproto
, mbuf_t m
)
3305 if (ifproto
->proto_kpi
== kProtoKPI_v1
) {
3306 /* Version 1 protocols get one packet at a time */
3308 char * frame_header
;
3311 next_packet
= m
->m_nextpkt
;
3312 m
->m_nextpkt
= NULL
;
3313 frame_header
= m
->m_pkthdr
.pkt_hdr
;
3314 m
->m_pkthdr
.pkt_hdr
= NULL
;
3315 error
= (*ifproto
->kpi
.v1
.input
)(ifproto
->ifp
,
3316 ifproto
->protocol_family
, m
, frame_header
);
3317 if (error
!= 0 && error
!= EJUSTRETURN
)
3321 } else if (ifproto
->proto_kpi
== kProtoKPI_v2
) {
3322 /* Version 2 protocols support packet lists */
3323 error
= (*ifproto
->kpi
.v2
.input
)(ifproto
->ifp
,
3324 ifproto
->protocol_family
, m
);
3325 if (error
!= 0 && error
!= EJUSTRETURN
)
3332 dlil_input_stats_add(const struct ifnet_stat_increment_param
*s
,
3333 struct dlil_threading_info
*inp
, boolean_t poll
)
3335 struct ifnet_stat_increment_param
*d
= &inp
->stats
;
3337 if (s
->packets_in
!= 0)
3338 d
->packets_in
+= s
->packets_in
;
3339 if (s
->bytes_in
!= 0)
3340 d
->bytes_in
+= s
->bytes_in
;
3341 if (s
->errors_in
!= 0)
3342 d
->errors_in
+= s
->errors_in
;
3344 if (s
->packets_out
!= 0)
3345 d
->packets_out
+= s
->packets_out
;
3346 if (s
->bytes_out
!= 0)
3347 d
->bytes_out
+= s
->bytes_out
;
3348 if (s
->errors_out
!= 0)
3349 d
->errors_out
+= s
->errors_out
;
3351 if (s
->collisions
!= 0)
3352 d
->collisions
+= s
->collisions
;
3353 if (s
->dropped
!= 0)
3354 d
->dropped
+= s
->dropped
;
3357 PKTCNTR_ADD(&inp
->tstats
, s
->packets_in
, s
->bytes_in
);
3361 dlil_input_stats_sync(struct ifnet
*ifp
, struct dlil_threading_info
*inp
)
3363 struct ifnet_stat_increment_param
*s
= &inp
->stats
;
3366 * Use of atomic operations is unavoidable here because
3367 * these stats may also be incremented elsewhere via KPIs.
3369 if (s
->packets_in
!= 0) {
3370 atomic_add_64(&ifp
->if_data
.ifi_ipackets
, s
->packets_in
);
3373 if (s
->bytes_in
!= 0) {
3374 atomic_add_64(&ifp
->if_data
.ifi_ibytes
, s
->bytes_in
);
3377 if (s
->errors_in
!= 0) {
3378 atomic_add_64(&ifp
->if_data
.ifi_ierrors
, s
->errors_in
);
3382 if (s
->packets_out
!= 0) {
3383 atomic_add_64(&ifp
->if_data
.ifi_opackets
, s
->packets_out
);
3386 if (s
->bytes_out
!= 0) {
3387 atomic_add_64(&ifp
->if_data
.ifi_obytes
, s
->bytes_out
);
3390 if (s
->errors_out
!= 0) {
3391 atomic_add_64(&ifp
->if_data
.ifi_oerrors
, s
->errors_out
);
3395 if (s
->collisions
!= 0) {
3396 atomic_add_64(&ifp
->if_data
.ifi_collisions
, s
->collisions
);
3399 if (s
->dropped
!= 0) {
3400 atomic_add_64(&ifp
->if_data
.ifi_iqdrops
, s
->dropped
);
3404 * If we went over the threshold, notify NetworkStatistics.
3406 if (ifp
->if_data_threshold
&&
3407 (ifp
->if_ibytes
+ ifp
->if_obytes
) - ifp
->if_dt_bytes
>
3408 ifp
->if_data_threshold
) {
3409 ifp
->if_dt_bytes
= ifp
->if_ibytes
+ ifp
->if_obytes
;
3410 nstat_ifnet_threshold_reached(ifp
->if_index
);
3413 * No need for atomic operations as they are modified here
3414 * only from within the DLIL input thread context.
3416 if (inp
->tstats
.packets
!= 0) {
3417 inp
->pstats
.ifi_poll_packets
+= inp
->tstats
.packets
;
3418 inp
->tstats
.packets
= 0;
3420 if (inp
->tstats
.bytes
!= 0) {
3421 inp
->pstats
.ifi_poll_bytes
+= inp
->tstats
.bytes
;
3422 inp
->tstats
.bytes
= 0;
3426 __private_extern__
void
3427 dlil_input_packet_list(struct ifnet
*ifp
, struct mbuf
*m
)
3429 return (dlil_input_packet_list_common(ifp
, m
, 0,
3430 IFNET_MODEL_INPUT_POLL_OFF
, FALSE
));
3433 __private_extern__
void
3434 dlil_input_packet_list_extended(struct ifnet
*ifp
, struct mbuf
*m
,
3435 u_int32_t cnt
, ifnet_model_t mode
)
3437 return (dlil_input_packet_list_common(ifp
, m
, cnt
, mode
, TRUE
));
3441 dlil_input_packet_list_common(struct ifnet
*ifp_param
, struct mbuf
*m
,
3442 u_int32_t cnt
, ifnet_model_t mode
, boolean_t ext
)
3445 protocol_family_t protocol_family
;
3447 ifnet_t ifp
= ifp_param
;
3448 char * frame_header
;
3449 struct if_proto
* last_ifproto
= NULL
;
3450 mbuf_t pkt_first
= NULL
;
3451 mbuf_t
* pkt_next
= NULL
;
3452 u_int32_t poll_thresh
= 0, poll_ival
= 0;
3454 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_START
,0,0,0,0,0);
3456 if (ext
&& mode
== IFNET_MODEL_INPUT_POLL_ON
&& cnt
> 1 &&
3457 (poll_ival
= if_rxpoll_interval_pkts
) > 0)
3461 struct if_proto
*ifproto
= NULL
;
3463 uint32_t pktf_mask
; /* pkt flags to preserve */
3465 if (ifp_param
== NULL
)
3466 ifp
= m
->m_pkthdr
.rcvif
;
3468 if ((ifp
->if_eflags
& IFEF_RXPOLL
) && poll_thresh
!= 0 &&
3469 poll_ival
> 0 && (--poll_thresh
% poll_ival
) == 0)
3472 /* Check if this mbuf looks valid */
3473 MBUF_INPUT_CHECK(m
, ifp
);
3475 next_packet
= m
->m_nextpkt
;
3476 m
->m_nextpkt
= NULL
;
3477 frame_header
= m
->m_pkthdr
.pkt_hdr
;
3478 m
->m_pkthdr
.pkt_hdr
= NULL
;
3481 * Get an IO reference count if the interface is not
3482 * loopback (lo0) and it is attached; lo0 never goes
3483 * away, so optimize for that.
3485 if (ifp
!= lo_ifp
) {
3486 if (!ifnet_is_attached(ifp
, 1)) {
3494 * If this arrived on lo0, preserve interface addr
3495 * info to allow for connectivity between loopback
3496 * and local interface addresses.
3498 pktf_mask
= (PKTF_LOOP
|PKTF_IFAINFO
);
3501 /* make sure packet comes in clean */
3502 m_classifier_init(m
, pktf_mask
);
3504 ifp_inc_traffic_class_in(ifp
, m
);
3506 /* find which protocol family this packet is for */
3507 ifnet_lock_shared(ifp
);
3508 error
= (*ifp
->if_demux
)(ifp
, m
, frame_header
,
3510 ifnet_lock_done(ifp
);
3512 if (error
== EJUSTRETURN
)
3514 protocol_family
= 0;
3517 if (hwcksum_dbg
!= 0 && !(ifp
->if_flags
& IFF_LOOPBACK
) &&
3518 !(m
->m_pkthdr
.pkt_flags
& PKTF_LOOP
))
3519 dlil_input_cksum_dbg(ifp
, m
, frame_header
,
3523 * For partial checksum offload, we expect the driver to
3524 * set the start offset indicating the start of the span
3525 * that is covered by the hardware-computed checksum;
3526 * adjust this start offset accordingly because the data
3527 * pointer has been advanced beyond the link-layer header.
3529 * Don't adjust if the interface is a bridge member, as
3530 * the adjustment will occur from the context of the
3531 * bridge interface during input.
3533 if (ifp
->if_bridge
== NULL
&& (m
->m_pkthdr
.csum_flags
&
3534 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
3535 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
3538 if (frame_header
== NULL
||
3539 frame_header
< (char *)mbuf_datastart(m
) ||
3540 frame_header
> (char *)m
->m_data
||
3541 (adj
= (m
->m_data
- frame_header
)) >
3542 m
->m_pkthdr
.csum_rx_start
) {
3543 m
->m_pkthdr
.csum_data
= 0;
3544 m
->m_pkthdr
.csum_flags
&= ~CSUM_DATA_VALID
;
3545 hwcksum_in_invalidated
++;
3547 m
->m_pkthdr
.csum_rx_start
-= adj
;
3551 pktap_input(ifp
, protocol_family
, m
, frame_header
);
3553 if (m
->m_flags
& (M_BCAST
|M_MCAST
))
3554 atomic_add_64(&ifp
->if_imcasts
, 1);
3556 /* run interface filters, exclude VLAN packets PR-3586856 */
3557 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) == 0) {
3558 error
= dlil_interface_filters_input(ifp
, &m
,
3559 &frame_header
, protocol_family
);
3561 if (error
!= EJUSTRETURN
)
3566 if (error
!= 0 || ((m
->m_flags
& M_PROMISC
) != 0) ) {
3571 /* Lookup the protocol attachment to this interface */
3572 if (protocol_family
== 0) {
3574 } else if (last_ifproto
!= NULL
&& last_ifproto
->ifp
== ifp
&&
3575 (last_ifproto
->protocol_family
== protocol_family
)) {
3576 VERIFY(ifproto
== NULL
);
3577 ifproto
= last_ifproto
;
3578 if_proto_ref(last_ifproto
);
3580 VERIFY(ifproto
== NULL
);
3581 ifnet_lock_shared(ifp
);
3582 /* callee holds a proto refcnt upon success */
3583 ifproto
= find_attached_proto(ifp
, protocol_family
);
3584 ifnet_lock_done(ifp
);
3586 if (ifproto
== NULL
) {
3587 /* no protocol for this packet, discard */
3591 if (ifproto
!= last_ifproto
) {
3592 if (last_ifproto
!= NULL
) {
3593 /* pass up the list for the previous protocol */
3594 dlil_ifproto_input(last_ifproto
, pkt_first
);
3596 if_proto_free(last_ifproto
);
3598 last_ifproto
= ifproto
;
3599 if_proto_ref(ifproto
);
3601 /* extend the list */
3602 m
->m_pkthdr
.pkt_hdr
= frame_header
;
3603 if (pkt_first
== NULL
) {
3608 pkt_next
= &m
->m_nextpkt
;
3611 if (next_packet
== NULL
&& last_ifproto
!= NULL
) {
3612 /* pass up the last list of packets */
3613 dlil_ifproto_input(last_ifproto
, pkt_first
);
3614 if_proto_free(last_ifproto
);
3615 last_ifproto
= NULL
;
3617 if (ifproto
!= NULL
) {
3618 if_proto_free(ifproto
);
3624 /* update the driver's multicast filter, if needed */
3625 if (ifp
->if_updatemcasts
> 0 && if_mcasts_update(ifp
) == 0)
3626 ifp
->if_updatemcasts
= 0;
3628 ifnet_decr_iorefcnt(ifp
);
3631 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT
| DBG_FUNC_END
,0,0,0,0,0);
3635 if_mcasts_update(struct ifnet
*ifp
)
3639 err
= ifnet_ioctl(ifp
, 0, SIOCADDMULTI
, NULL
);
3640 if (err
== EAFNOSUPPORT
)
3642 printf("%s: %s %d suspended link-layer multicast membership(s) "
3643 "(err=%d)\n", if_name(ifp
),
3644 (err
== 0 ? "successfully restored" : "failed to restore"),
3645 ifp
->if_updatemcasts
, err
);
3647 /* just return success */
3652 #define TMP_IF_PROTO_ARR_SIZE 10
3654 dlil_event_internal(struct ifnet
*ifp
, struct kev_msg
*event
)
3656 struct ifnet_filter
*filter
= NULL
;
3657 struct if_proto
*proto
= NULL
;
3658 int if_proto_count
= 0;
3659 struct if_proto
**tmp_ifproto_arr
= NULL
;
3660 struct if_proto
*tmp_ifproto_stack_arr
[TMP_IF_PROTO_ARR_SIZE
] = {NULL
};
3661 int tmp_ifproto_arr_idx
= 0;
3662 bool tmp_malloc
= false;
3665 * Pass the event to the interface filters
3667 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3668 /* prevent filter list from changing in case we drop the lock */
3669 if_flt_monitor_busy(ifp
);
3670 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
3671 if (filter
->filt_event
!= NULL
) {
3672 lck_mtx_unlock(&ifp
->if_flt_lock
);
3674 filter
->filt_event(filter
->filt_cookie
, ifp
,
3675 filter
->filt_protocol
, event
);
3677 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
3680 /* we're done with the filter list */
3681 if_flt_monitor_unbusy(ifp
);
3682 lck_mtx_unlock(&ifp
->if_flt_lock
);
3684 /* Get an io ref count if the interface is attached */
3685 if (!ifnet_is_attached(ifp
, 1))
3689 * An embedded tmp_list_entry in if_proto may still get
3690 * over-written by another thread after giving up ifnet lock,
3691 * therefore we are avoiding embedded pointers here.
3693 ifnet_lock_shared(ifp
);
3694 if_proto_count
= dlil_ifp_proto_count(ifp
);
3695 if (if_proto_count
) {
3697 VERIFY(ifp
->if_proto_hash
!= NULL
);
3698 if (if_proto_count
<= TMP_IF_PROTO_ARR_SIZE
) {
3699 tmp_ifproto_arr
= tmp_ifproto_stack_arr
;
3701 MALLOC(tmp_ifproto_arr
, struct if_proto
**,
3702 sizeof (*tmp_ifproto_arr
) * if_proto_count
,
3704 if (tmp_ifproto_arr
== NULL
) {
3705 ifnet_lock_done(ifp
);
3711 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
3712 SLIST_FOREACH(proto
, &ifp
->if_proto_hash
[i
],
3714 if_proto_ref(proto
);
3715 tmp_ifproto_arr
[tmp_ifproto_arr_idx
] = proto
;
3716 tmp_ifproto_arr_idx
++;
3719 VERIFY(if_proto_count
== tmp_ifproto_arr_idx
);
3721 ifnet_lock_done(ifp
);
3723 for (tmp_ifproto_arr_idx
= 0; tmp_ifproto_arr_idx
< if_proto_count
;
3724 tmp_ifproto_arr_idx
++) {
3725 proto
= tmp_ifproto_arr
[tmp_ifproto_arr_idx
];
3726 VERIFY(proto
!= NULL
);
3727 proto_media_event eventp
=
3728 (proto
->proto_kpi
== kProtoKPI_v1
?
3729 proto
->kpi
.v1
.event
:
3730 proto
->kpi
.v2
.event
);
3732 if (eventp
!= NULL
) {
3733 eventp(ifp
, proto
->protocol_family
,
3736 if_proto_free(proto
);
3741 FREE(tmp_ifproto_arr
, M_TEMP
);
3744 /* Pass the event to the interface */
3745 if (ifp
->if_event
!= NULL
)
3746 ifp
->if_event(ifp
, event
);
3748 /* Release the io ref count */
3749 ifnet_decr_iorefcnt(ifp
);
3751 return (kev_post_msg(event
));
3755 ifnet_event(ifnet_t ifp
, struct kern_event_msg
*event
)
3757 struct kev_msg kev_msg
;
3760 if (ifp
== NULL
|| event
== NULL
)
3763 bzero(&kev_msg
, sizeof (kev_msg
));
3764 kev_msg
.vendor_code
= event
->vendor_code
;
3765 kev_msg
.kev_class
= event
->kev_class
;
3766 kev_msg
.kev_subclass
= event
->kev_subclass
;
3767 kev_msg
.event_code
= event
->event_code
;
3768 kev_msg
.dv
[0].data_ptr
= &event
->event_data
[0];
3769 kev_msg
.dv
[0].data_length
= event
->total_size
- KEV_MSG_HEADER_SIZE
;
3770 kev_msg
.dv
[1].data_length
= 0;
3772 result
= dlil_event_internal(ifp
, &kev_msg
);
3778 #include <netinet/ip6.h>
3779 #include <netinet/ip.h>
3781 dlil_get_socket_type(struct mbuf
**mp
, int family
, int raw
)
3785 struct ip6_hdr
*ip6
;
3786 int type
= SOCK_RAW
;
3791 m
= m_pullup(*mp
, sizeof(struct ip
));
3795 ip
= mtod(m
, struct ip
*);
3796 if (ip
->ip_p
== IPPROTO_TCP
)
3798 else if (ip
->ip_p
== IPPROTO_UDP
)
3802 m
= m_pullup(*mp
, sizeof(struct ip6_hdr
));
3806 ip6
= mtod(m
, struct ip6_hdr
*);
3807 if (ip6
->ip6_nxt
== IPPROTO_TCP
)
3809 else if (ip6
->ip6_nxt
== IPPROTO_UDP
)
3820 * This is mostly called from the context of the DLIL input thread;
3821 * because of that there is no need for atomic operations.
3823 static __inline
void
3824 ifp_inc_traffic_class_in(struct ifnet
*ifp
, struct mbuf
*m
)
3826 if (!(m
->m_flags
& M_PKTHDR
))
3829 switch (m_get_traffic_class(m
)) {
3831 ifp
->if_tc
.ifi_ibepackets
++;
3832 ifp
->if_tc
.ifi_ibebytes
+= m
->m_pkthdr
.len
;
3835 ifp
->if_tc
.ifi_ibkpackets
++;
3836 ifp
->if_tc
.ifi_ibkbytes
+= m
->m_pkthdr
.len
;
3839 ifp
->if_tc
.ifi_ivipackets
++;
3840 ifp
->if_tc
.ifi_ivibytes
+= m
->m_pkthdr
.len
;
3843 ifp
->if_tc
.ifi_ivopackets
++;
3844 ifp
->if_tc
.ifi_ivobytes
+= m
->m_pkthdr
.len
;
3850 if (mbuf_is_traffic_class_privileged(m
)) {
3851 ifp
->if_tc
.ifi_ipvpackets
++;
3852 ifp
->if_tc
.ifi_ipvbytes
+= m
->m_pkthdr
.len
;
3857 * This is called from DLIL output, hence multiple threads could end
3858 * up modifying the statistics. We trade off acccuracy for performance
3859 * by not using atomic operations here.
3861 static __inline
void
3862 ifp_inc_traffic_class_out(struct ifnet
*ifp
, struct mbuf
*m
)
3864 if (!(m
->m_flags
& M_PKTHDR
))
3867 switch (m_get_traffic_class(m
)) {
3869 ifp
->if_tc
.ifi_obepackets
++;
3870 ifp
->if_tc
.ifi_obebytes
+= m
->m_pkthdr
.len
;
3873 ifp
->if_tc
.ifi_obkpackets
++;
3874 ifp
->if_tc
.ifi_obkbytes
+= m
->m_pkthdr
.len
;
3877 ifp
->if_tc
.ifi_ovipackets
++;
3878 ifp
->if_tc
.ifi_ovibytes
+= m
->m_pkthdr
.len
;
3881 ifp
->if_tc
.ifi_ovopackets
++;
3882 ifp
->if_tc
.ifi_ovobytes
+= m
->m_pkthdr
.len
;
3888 if (mbuf_is_traffic_class_privileged(m
)) {
3889 ifp
->if_tc
.ifi_opvpackets
++;
3890 ifp
->if_tc
.ifi_opvbytes
+= m
->m_pkthdr
.len
;
3895 dlil_count_chain_len(mbuf_t m
, struct chain_len_stats
*cls
)
3908 atomic_add_64(&cls
->cls_one
, 1);
3911 atomic_add_64(&cls
->cls_two
, 1);
3914 atomic_add_64(&cls
->cls_three
, 1);
3917 atomic_add_64(&cls
->cls_four
, 1);
3921 atomic_add_64(&cls
->cls_five_or_more
, 1);
3929 * Caller should have a lock on the protocol domain if the protocol
3930 * doesn't support finer grained locking. In most cases, the lock
3931 * will be held from the socket layer and won't be released until
3932 * we return back to the socket layer.
3934 * This does mean that we must take a protocol lock before we take
3935 * an interface lock if we're going to take both. This makes sense
3936 * because a protocol is likely to interact with an ifp while it
3937 * is under the protocol lock.
3939 * An advisory code will be returned if adv is not null. This
3940 * can be used to provide feedback about interface queues to the
3944 dlil_output(ifnet_t ifp
, protocol_family_t proto_family
, mbuf_t packetlist
,
3945 void *route
, const struct sockaddr
*dest
, int raw
, struct flowadv
*adv
)
3947 char *frame_type
= NULL
;
3948 char *dst_linkaddr
= NULL
;
3950 char frame_type_buffer
[MAX_FRAME_TYPE_SIZE
* 4];
3951 char dst_linkaddr_buffer
[MAX_LINKADDR
* 4];
3952 struct if_proto
*proto
= NULL
;
3954 mbuf_t send_head
= NULL
;
3955 mbuf_t
*send_tail
= &send_head
;
3957 u_int32_t pre
= 0, post
= 0;
3958 u_int32_t fpkts
= 0, fbytes
= 0;
3961 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
3963 /* Get an io refcnt if the interface is attached to prevent ifnet_detach
3964 * from happening while this operation is in progress */
3965 if (!ifnet_is_attached(ifp
, 1)) {
3971 /* update the driver's multicast filter, if needed */
3972 if (ifp
->if_updatemcasts
> 0 && if_mcasts_update(ifp
) == 0)
3973 ifp
->if_updatemcasts
= 0;
3975 frame_type
= frame_type_buffer
;
3976 dst_linkaddr
= dst_linkaddr_buffer
;
3979 ifnet_lock_shared(ifp
);
3980 /* callee holds a proto refcnt upon success */
3981 proto
= find_attached_proto(ifp
, proto_family
);
3982 if (proto
== NULL
) {
3983 ifnet_lock_done(ifp
);
3987 ifnet_lock_done(ifp
);
3991 if (packetlist
== NULL
)
3995 packetlist
= packetlist
->m_nextpkt
;
3996 m
->m_nextpkt
= NULL
;
3999 proto_media_preout preoutp
= (proto
->proto_kpi
== kProtoKPI_v1
?
4000 proto
->kpi
.v1
.pre_output
: proto
->kpi
.v2
.pre_output
);
4002 if (preoutp
!= NULL
) {
4003 retval
= preoutp(ifp
, proto_family
, &m
, dest
, route
,
4004 frame_type
, dst_linkaddr
);
4007 if (retval
== EJUSTRETURN
)
4016 retval
= mac_ifnet_check_transmit(ifp
, m
, proto_family
,
4017 dlil_get_socket_type(&m
, proto_family
, raw
));
4026 if (!raw
&& proto_family
== PF_INET
) {
4027 struct ip
*ip
= mtod(m
, struct ip
*);
4028 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
4029 struct ip
*, ip
, struct ifnet
*, ifp
,
4030 struct ip
*, ip
, struct ip6_hdr
*, NULL
);
4032 } else if (!raw
&& proto_family
== PF_INET6
) {
4033 struct ip6_hdr
*ip6
= mtod(m
, struct ip6_hdr
*);
4034 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
4035 struct ip6_hdr
*, ip6
, struct ifnet
*, ifp
,
4036 struct ip
*, NULL
, struct ip6_hdr
*, ip6
);
4038 #endif /* CONFIG_DTRACE */
4040 if (raw
== 0 && ifp
->if_framer
!= NULL
) {
4044 * If this is a broadcast packet that needs to be
4045 * looped back into the system, set the inbound ifp
4046 * to that of the outbound ifp. This will allow
4047 * us to determine that it is a legitimate packet
4048 * for the system. Only set the ifp if it's not
4049 * already set, just to be safe.
4051 if ((m
->m_flags
& (M_BCAST
| M_LOOP
)) &&
4052 m
->m_pkthdr
.rcvif
== NULL
) {
4053 m
->m_pkthdr
.rcvif
= ifp
;
4057 retval
= ifp
->if_framer(ifp
, &m
, dest
, dst_linkaddr
,
4058 frame_type
, &pre
, &post
);
4060 if (retval
!= EJUSTRETURN
)
4066 * For partial checksum offload, adjust the start
4067 * and stuff offsets based on the prepended header.
4069 if ((m
->m_pkthdr
.csum_flags
&
4070 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) ==
4071 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
4072 m
->m_pkthdr
.csum_tx_stuff
+= pre
;
4073 m
->m_pkthdr
.csum_tx_start
+= pre
;
4076 if (hwcksum_dbg
!= 0 && !(ifp
->if_flags
& IFF_LOOPBACK
))
4077 dlil_output_cksum_dbg(ifp
, m
, pre
,
4081 * Clear the ifp if it was set above, and to be
4082 * safe, only if it is still the same as the
4083 * outbound ifp we have in context. If it was
4084 * looped back, then a copy of it was sent to the
4085 * loopback interface with the rcvif set, and we
4086 * are clearing the one that will go down to the
4089 if (rcvif_set
&& m
->m_pkthdr
.rcvif
== ifp
)
4090 m
->m_pkthdr
.rcvif
= NULL
;
4094 * Let interface filters (if any) do their thing ...
4096 /* Do not pass VLAN tagged packets to filters PR-3586856 */
4097 if ((m
->m_pkthdr
.csum_flags
& CSUM_VLAN_TAG_VALID
) == 0) {
4098 retval
= dlil_interface_filters_output(ifp
,
4101 if (retval
!= EJUSTRETURN
)
4107 * Strip away M_PROTO1 bit prior to sending packet
4108 * to the driver as this field may be used by the driver
4110 m
->m_flags
&= ~M_PROTO1
;
4113 * If the underlying interface is not capable of handling a
4114 * packet whose data portion spans across physically disjoint
4115 * pages, we need to "normalize" the packet so that we pass
4116 * down a chain of mbufs where each mbuf points to a span that
4117 * resides in the system page boundary. If the packet does
4118 * not cross page(s), the following is a no-op.
4120 if (!(ifp
->if_hwassist
& IFNET_MULTIPAGES
)) {
4121 if ((m
= m_normalize(m
)) == NULL
)
4126 * If this is a TSO packet, make sure the interface still
4127 * advertise TSO capability.
4129 if (TSO_IPV4_NOTOK(ifp
, m
) || TSO_IPV6_NOTOK(ifp
, m
)) {
4136 * If the packet service class is not background,
4137 * update the timestamp to indicate recent activity
4138 * on a foreground socket.
4140 if ((m
->m_pkthdr
.pkt_flags
& PKTF_FLOW_ID
) &&
4141 m
->m_pkthdr
.pkt_flowsrc
== FLOWSRC_INPCB
) {
4142 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_SO_BACKGROUND
))
4143 ifp
->if_fg_sendts
= net_uptime();
4145 if (m
->m_pkthdr
.pkt_flags
& PKTF_SO_REALTIME
)
4146 ifp
->if_rt_sendts
= net_uptime();
4149 ifp_inc_traffic_class_out(ifp
, m
);
4150 pktap_output(ifp
, proto_family
, m
, pre
, post
);
4153 * Count the number of elements in the mbuf chain
4155 if (tx_chain_len_count
) {
4156 dlil_count_chain_len(m
, &tx_chain_len_stats
);
4160 * Finally, call the driver.
4162 if (ifp
->if_eflags
& (IFEF_SENDLIST
| IFEF_ENQUEUE_MULTI
)) {
4163 if (m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) {
4164 flen
+= (m_pktlen(m
) - (pre
+ post
));
4165 m
->m_pkthdr
.pkt_flags
&= ~PKTF_FORWARDED
;
4168 send_tail
= &m
->m_nextpkt
;
4170 if (m
->m_pkthdr
.pkt_flags
& PKTF_FORWARDED
) {
4171 flen
= (m_pktlen(m
) - (pre
+ post
));
4172 m
->m_pkthdr
.pkt_flags
&= ~PKTF_FORWARDED
;
4176 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
,
4178 retval
= (*ifp
->if_output
)(ifp
, m
);
4179 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
4180 if (adv
!= NULL
&& adv
->code
== FADV_SUCCESS
) {
4181 adv
->code
= (retval
== EQFULL
?
4182 FADV_FLOW_CONTROLLED
:
4187 if (retval
== 0 && flen
> 0) {
4191 if (retval
!= 0 && dlil_verbose
) {
4192 printf("%s: output error on %s retval = %d\n",
4193 __func__
, if_name(ifp
),
4196 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
,
4199 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4204 packetlist
= packetlist
->m_nextpkt
;
4205 m
->m_nextpkt
= NULL
;
4207 } while (m
!= NULL
);
4209 if (send_head
!= NULL
) {
4210 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_START
,
4212 if (ifp
->if_eflags
& IFEF_SENDLIST
) {
4213 retval
= (*ifp
->if_output
)(ifp
, send_head
);
4214 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
4216 adv
->code
= (retval
== EQFULL
?
4217 FADV_FLOW_CONTROLLED
:
4222 if (retval
== 0 && flen
> 0) {
4226 if (retval
!= 0 && dlil_verbose
) {
4227 printf("%s: output error on %s retval = %d\n",
4228 __func__
, if_name(ifp
), retval
);
4231 struct mbuf
*send_m
;
4233 VERIFY(ifp
->if_eflags
& IFEF_ENQUEUE_MULTI
);
4234 while (send_head
!= NULL
) {
4236 send_head
= send_m
->m_nextpkt
;
4237 send_m
->m_nextpkt
= NULL
;
4238 retval
= (*ifp
->if_output
)(ifp
, send_m
);
4239 if (retval
== EQFULL
|| retval
== EQSUSPENDED
) {
4241 adv
->code
= (retval
== EQFULL
?
4242 FADV_FLOW_CONTROLLED
:
4252 if (retval
!= 0 && dlil_verbose
) {
4253 printf("%s: output error on %s retval = %d\n",
4254 __func__
, if_name(ifp
), retval
);
4262 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4265 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4269 ifp
->if_fbytes
+= fbytes
;
4271 ifp
->if_fpackets
+= fpkts
;
4273 if_proto_free(proto
);
4274 if (packetlist
) /* if any packets are left, clean up */
4275 mbuf_freem_list(packetlist
);
4276 if (retval
== EJUSTRETURN
)
4279 ifnet_decr_iorefcnt(ifp
);
4285 ifnet_ioctl(ifnet_t ifp
, protocol_family_t proto_fam
, u_long ioctl_code
,
4288 struct ifnet_filter
*filter
;
4289 int retval
= EOPNOTSUPP
;
4292 if (ifp
== NULL
|| ioctl_code
== 0)
4295 /* Get an io ref count if the interface is attached */
4296 if (!ifnet_is_attached(ifp
, 1))
4297 return (EOPNOTSUPP
);
4299 /* Run the interface filters first.
4300 * We want to run all filters before calling the protocol,
4301 * interface family, or interface.
4303 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4304 /* prevent filter list from changing in case we drop the lock */
4305 if_flt_monitor_busy(ifp
);
4306 TAILQ_FOREACH(filter
, &ifp
->if_flt_head
, filt_next
) {
4307 if (filter
->filt_ioctl
!= NULL
&& (filter
->filt_protocol
== 0 ||
4308 filter
->filt_protocol
== proto_fam
)) {
4309 lck_mtx_unlock(&ifp
->if_flt_lock
);
4311 result
= filter
->filt_ioctl(filter
->filt_cookie
, ifp
,
4312 proto_fam
, ioctl_code
, ioctl_arg
);
4314 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
4316 /* Only update retval if no one has handled the ioctl */
4317 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
4318 if (result
== ENOTSUP
)
4319 result
= EOPNOTSUPP
;
4321 if (retval
!= 0 && retval
!= EOPNOTSUPP
) {
4322 /* we're done with the filter list */
4323 if_flt_monitor_unbusy(ifp
);
4324 lck_mtx_unlock(&ifp
->if_flt_lock
);
4330 /* we're done with the filter list */
4331 if_flt_monitor_unbusy(ifp
);
4332 lck_mtx_unlock(&ifp
->if_flt_lock
);
4334 /* Allow the protocol to handle the ioctl */
4335 if (proto_fam
!= 0) {
4336 struct if_proto
*proto
;
4338 /* callee holds a proto refcnt upon success */
4339 ifnet_lock_shared(ifp
);
4340 proto
= find_attached_proto(ifp
, proto_fam
);
4341 ifnet_lock_done(ifp
);
4342 if (proto
!= NULL
) {
4343 proto_media_ioctl ioctlp
=
4344 (proto
->proto_kpi
== kProtoKPI_v1
?
4345 proto
->kpi
.v1
.ioctl
: proto
->kpi
.v2
.ioctl
);
4346 result
= EOPNOTSUPP
;
4348 result
= ioctlp(ifp
, proto_fam
, ioctl_code
,
4350 if_proto_free(proto
);
4352 /* Only update retval if no one has handled the ioctl */
4353 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
4354 if (result
== ENOTSUP
)
4355 result
= EOPNOTSUPP
;
4357 if (retval
&& retval
!= EOPNOTSUPP
)
4363 /* retval is either 0 or EOPNOTSUPP */
4366 * Let the interface handle this ioctl.
4367 * If it returns EOPNOTSUPP, ignore that, we may have
4368 * already handled this in the protocol or family.
4371 result
= (*ifp
->if_ioctl
)(ifp
, ioctl_code
, ioctl_arg
);
4373 /* Only update retval if no one has handled the ioctl */
4374 if (retval
== EOPNOTSUPP
|| result
== EJUSTRETURN
) {
4375 if (result
== ENOTSUP
)
4376 result
= EOPNOTSUPP
;
4378 if (retval
&& retval
!= EOPNOTSUPP
) {
4384 if (retval
== EJUSTRETURN
)
4387 ifnet_decr_iorefcnt(ifp
);
4392 __private_extern__ errno_t
4393 dlil_set_bpf_tap(ifnet_t ifp
, bpf_tap_mode mode
, bpf_packet_func callback
)
4398 if (ifp
->if_set_bpf_tap
) {
4399 /* Get an io reference on the interface if it is attached */
4400 if (!ifnet_is_attached(ifp
, 1))
4402 error
= ifp
->if_set_bpf_tap(ifp
, mode
, callback
);
4403 ifnet_decr_iorefcnt(ifp
);
4409 dlil_resolve_multi(struct ifnet
*ifp
, const struct sockaddr
*proto_addr
,
4410 struct sockaddr
*ll_addr
, size_t ll_len
)
4412 errno_t result
= EOPNOTSUPP
;
4413 struct if_proto
*proto
;
4414 const struct sockaddr
*verify
;
4415 proto_media_resolve_multi resolvep
;
4417 if (!ifnet_is_attached(ifp
, 1))
4420 bzero(ll_addr
, ll_len
);
4422 /* Call the protocol first; callee holds a proto refcnt upon success */
4423 ifnet_lock_shared(ifp
);
4424 proto
= find_attached_proto(ifp
, proto_addr
->sa_family
);
4425 ifnet_lock_done(ifp
);
4426 if (proto
!= NULL
) {
4427 resolvep
= (proto
->proto_kpi
== kProtoKPI_v1
?
4428 proto
->kpi
.v1
.resolve_multi
: proto
->kpi
.v2
.resolve_multi
);
4429 if (resolvep
!= NULL
)
4430 result
= resolvep(ifp
, proto_addr
,
4431 (struct sockaddr_dl
*)(void *)ll_addr
, ll_len
);
4432 if_proto_free(proto
);
4435 /* Let the interface verify the multicast address */
4436 if ((result
== EOPNOTSUPP
|| result
== 0) && ifp
->if_check_multi
) {
4440 verify
= proto_addr
;
4441 result
= ifp
->if_check_multi(ifp
, verify
);
4444 ifnet_decr_iorefcnt(ifp
);
4448 __private_extern__ errno_t
4449 dlil_send_arp_internal(ifnet_t ifp
, u_short arpop
,
4450 const struct sockaddr_dl
* sender_hw
, const struct sockaddr
* sender_proto
,
4451 const struct sockaddr_dl
* target_hw
, const struct sockaddr
* target_proto
)
4453 struct if_proto
*proto
;
4456 /* callee holds a proto refcnt upon success */
4457 ifnet_lock_shared(ifp
);
4458 proto
= find_attached_proto(ifp
, target_proto
->sa_family
);
4459 ifnet_lock_done(ifp
);
4460 if (proto
== NULL
) {
4463 proto_media_send_arp arpp
;
4464 arpp
= (proto
->proto_kpi
== kProtoKPI_v1
?
4465 proto
->kpi
.v1
.send_arp
: proto
->kpi
.v2
.send_arp
);
4471 arpstat
.txrequests
++;
4472 if (target_hw
!= NULL
)
4473 arpstat
.txurequests
++;
4476 arpstat
.txreplies
++;
4479 result
= arpp(ifp
, arpop
, sender_hw
, sender_proto
,
4480 target_hw
, target_proto
);
4482 if_proto_free(proto
);
4488 struct net_thread_marks
{ };
4489 static const struct net_thread_marks net_thread_marks_base
= { };
4491 __private_extern__
const net_thread_marks_t net_thread_marks_none
=
4492 &net_thread_marks_base
;
4494 __private_extern__ net_thread_marks_t
4495 net_thread_marks_push(u_int32_t push
)
4497 static const char *const base
= (const void*)&net_thread_marks_base
;
4501 struct uthread
*uth
= get_bsdthread_info(current_thread());
4503 pop
= push
& ~uth
->uu_network_marks
;
4505 uth
->uu_network_marks
|= pop
;
4508 return ((net_thread_marks_t
)&base
[pop
]);
4511 __private_extern__ net_thread_marks_t
4512 net_thread_unmarks_push(u_int32_t unpush
)
4514 static const char *const base
= (const void*)&net_thread_marks_base
;
4515 u_int32_t unpop
= 0;
4518 struct uthread
*uth
= get_bsdthread_info(current_thread());
4520 unpop
= unpush
& uth
->uu_network_marks
;
4522 uth
->uu_network_marks
&= ~unpop
;
4525 return ((net_thread_marks_t
)&base
[unpop
]);
4528 __private_extern__
void
4529 net_thread_marks_pop(net_thread_marks_t popx
)
4531 static const char *const base
= (const void*)&net_thread_marks_base
;
4532 const ptrdiff_t pop
= (const char *)popx
- (const char *)base
;
4535 static const ptrdiff_t ones
= (ptrdiff_t)(u_int32_t
)~0U;
4536 struct uthread
*uth
= get_bsdthread_info(current_thread());
4538 VERIFY((pop
& ones
) == pop
);
4539 VERIFY((ptrdiff_t)(uth
->uu_network_marks
& pop
) == pop
);
4540 uth
->uu_network_marks
&= ~pop
;
4544 __private_extern__
void
4545 net_thread_unmarks_pop(net_thread_marks_t unpopx
)
4547 static const char *const base
= (const void*)&net_thread_marks_base
;
4548 ptrdiff_t unpop
= (const char *)unpopx
- (const char *)base
;
4551 static const ptrdiff_t ones
= (ptrdiff_t)(u_int32_t
)~0U;
4552 struct uthread
*uth
= get_bsdthread_info(current_thread());
4554 VERIFY((unpop
& ones
) == unpop
);
4555 VERIFY((ptrdiff_t)(uth
->uu_network_marks
& unpop
) == 0);
4556 uth
->uu_network_marks
|= unpop
;
4560 __private_extern__ u_int32_t
4561 net_thread_is_marked(u_int32_t check
)
4564 struct uthread
*uth
= get_bsdthread_info(current_thread());
4565 return (uth
->uu_network_marks
& check
);
4571 __private_extern__ u_int32_t
4572 net_thread_is_unmarked(u_int32_t check
)
4575 struct uthread
*uth
= get_bsdthread_info(current_thread());
4576 return (~uth
->uu_network_marks
& check
);
4582 static __inline__
int
4583 _is_announcement(const struct sockaddr_in
* sender_sin
,
4584 const struct sockaddr_in
* target_sin
)
4586 if (sender_sin
== NULL
) {
4589 return (sender_sin
->sin_addr
.s_addr
== target_sin
->sin_addr
.s_addr
);
4592 __private_extern__ errno_t
4593 dlil_send_arp(ifnet_t ifp
, u_short arpop
, const struct sockaddr_dl
* sender_hw
,
4594 const struct sockaddr
* sender_proto
, const struct sockaddr_dl
* target_hw
,
4595 const struct sockaddr
* target_proto0
, u_int32_t rtflags
)
4598 const struct sockaddr_in
* sender_sin
;
4599 const struct sockaddr_in
* target_sin
;
4600 struct sockaddr_inarp target_proto_sinarp
;
4601 struct sockaddr
*target_proto
= (void *)(uintptr_t)target_proto0
;
4603 if (target_proto
== NULL
|| (sender_proto
!= NULL
&&
4604 sender_proto
->sa_family
!= target_proto
->sa_family
))
4608 * If the target is a (default) router, provide that
4609 * information to the send_arp callback routine.
4611 if (rtflags
& RTF_ROUTER
) {
4612 bcopy(target_proto
, &target_proto_sinarp
,
4613 sizeof (struct sockaddr_in
));
4614 target_proto_sinarp
.sin_other
|= SIN_ROUTER
;
4615 target_proto
= (struct sockaddr
*)&target_proto_sinarp
;
4619 * If this is an ARP request and the target IP is IPv4LL,
4620 * send the request on all interfaces. The exception is
4621 * an announcement, which must only appear on the specific
4624 sender_sin
= (struct sockaddr_in
*)(void *)(uintptr_t)sender_proto
;
4625 target_sin
= (struct sockaddr_in
*)(void *)(uintptr_t)target_proto
;
4626 if (target_proto
->sa_family
== AF_INET
&&
4627 IN_LINKLOCAL(ntohl(target_sin
->sin_addr
.s_addr
)) &&
4628 ipv4_ll_arp_aware
!= 0 && arpop
== ARPOP_REQUEST
&&
4629 !_is_announcement(target_sin
, sender_sin
)) {
4636 if (ifnet_list_get(IFNET_FAMILY_ANY
, &ifp_list
, &count
) == 0) {
4637 for (ifp_on
= 0; ifp_on
< count
; ifp_on
++) {
4639 ifaddr_t source_hw
= NULL
;
4640 ifaddr_t source_ip
= NULL
;
4641 struct sockaddr_in source_ip_copy
;
4642 struct ifnet
*cur_ifp
= ifp_list
[ifp_on
];
4645 * Only arp on interfaces marked for IPv4LL
4646 * ARPing. This may mean that we don't ARP on
4647 * the interface the subnet route points to.
4649 if (!(cur_ifp
->if_eflags
& IFEF_ARPLL
))
4652 /* Find the source IP address */
4653 ifnet_lock_shared(cur_ifp
);
4654 source_hw
= cur_ifp
->if_lladdr
;
4655 TAILQ_FOREACH(source_ip
, &cur_ifp
->if_addrhead
,
4657 IFA_LOCK(source_ip
);
4658 if (source_ip
->ifa_addr
!= NULL
&&
4659 source_ip
->ifa_addr
->sa_family
==
4661 /* Copy the source IP address */
4663 *(struct sockaddr_in
*)
4664 (void *)source_ip
->ifa_addr
;
4665 IFA_UNLOCK(source_ip
);
4668 IFA_UNLOCK(source_ip
);
4671 /* No IP Source, don't arp */
4672 if (source_ip
== NULL
) {
4673 ifnet_lock_done(cur_ifp
);
4677 IFA_ADDREF(source_hw
);
4678 ifnet_lock_done(cur_ifp
);
4681 new_result
= dlil_send_arp_internal(cur_ifp
,
4682 arpop
, (struct sockaddr_dl
*)(void *)
4683 source_hw
->ifa_addr
,
4684 (struct sockaddr
*)&source_ip_copy
, NULL
,
4687 IFA_REMREF(source_hw
);
4688 if (result
== ENOTSUP
) {
4689 result
= new_result
;
4692 ifnet_list_free(ifp_list
);
4695 result
= dlil_send_arp_internal(ifp
, arpop
, sender_hw
,
4696 sender_proto
, target_hw
, target_proto
);
4703 * Caller must hold ifnet head lock.
4706 ifnet_lookup(struct ifnet
*ifp
)
4710 lck_rw_assert(&ifnet_head_lock
, LCK_RW_ASSERT_HELD
);
4711 TAILQ_FOREACH(_ifp
, &ifnet_head
, if_link
) {
4715 return (_ifp
!= NULL
);
4718 * Caller has to pass a non-zero refio argument to get a
4719 * IO reference count. This will prevent ifnet_detach from
4720 * being called when there are outstanding io reference counts.
4723 ifnet_is_attached(struct ifnet
*ifp
, int refio
)
4727 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
4728 if ((ret
= ((ifp
->if_refflags
& (IFRF_ATTACHED
| IFRF_DETACHING
)) ==
4733 lck_mtx_unlock(&ifp
->if_ref_lock
);
4739 ifnet_decr_iorefcnt(struct ifnet
*ifp
)
4741 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
4742 VERIFY(ifp
->if_refio
> 0);
4743 VERIFY((ifp
->if_refflags
& (IFRF_ATTACHED
| IFRF_DETACHING
)) != 0);
4746 /* if there are no more outstanding io references, wakeup the
4747 * ifnet_detach thread if detaching flag is set.
4749 if (ifp
->if_refio
== 0 &&
4750 (ifp
->if_refflags
& IFRF_DETACHING
) != 0) {
4751 wakeup(&(ifp
->if_refio
));
4753 lck_mtx_unlock(&ifp
->if_ref_lock
);
4757 dlil_if_trace(struct dlil_ifnet
*dl_if
, int refhold
)
4759 struct dlil_ifnet_dbg
*dl_if_dbg
= (struct dlil_ifnet_dbg
*)dl_if
;
4764 if (!(dl_if
->dl_if_flags
& DLIF_DEBUG
)) {
4765 panic("%s: dl_if %p has no debug structure", __func__
, dl_if
);
4770 cnt
= &dl_if_dbg
->dldbg_if_refhold_cnt
;
4771 tr
= dl_if_dbg
->dldbg_if_refhold
;
4773 cnt
= &dl_if_dbg
->dldbg_if_refrele_cnt
;
4774 tr
= dl_if_dbg
->dldbg_if_refrele
;
4777 idx
= atomic_add_16_ov(cnt
, 1) % IF_REF_TRACE_HIST_SIZE
;
4778 ctrace_record(&tr
[idx
]);
4782 dlil_if_ref(struct ifnet
*ifp
)
4784 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
4789 lck_mtx_lock_spin(&dl_if
->dl_if_lock
);
4790 ++dl_if
->dl_if_refcnt
;
4791 if (dl_if
->dl_if_refcnt
== 0) {
4792 panic("%s: wraparound refcnt for ifp=%p", __func__
, ifp
);
4795 if (dl_if
->dl_if_trace
!= NULL
)
4796 (*dl_if
->dl_if_trace
)(dl_if
, TRUE
);
4797 lck_mtx_unlock(&dl_if
->dl_if_lock
);
4803 dlil_if_free(struct ifnet
*ifp
)
4805 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
4810 lck_mtx_lock_spin(&dl_if
->dl_if_lock
);
4811 if (dl_if
->dl_if_refcnt
== 0) {
4812 panic("%s: negative refcnt for ifp=%p", __func__
, ifp
);
4815 --dl_if
->dl_if_refcnt
;
4816 if (dl_if
->dl_if_trace
!= NULL
)
4817 (*dl_if
->dl_if_trace
)(dl_if
, FALSE
);
4818 lck_mtx_unlock(&dl_if
->dl_if_lock
);
4824 dlil_attach_protocol_internal(struct if_proto
*proto
,
4825 const struct ifnet_demux_desc
*demux_list
, u_int32_t demux_count
)
4827 struct kev_dl_proto_data ev_pr_data
;
4828 struct ifnet
*ifp
= proto
->ifp
;
4830 u_int32_t hash_value
= proto_hash_value(proto
->protocol_family
);
4831 struct if_proto
*prev_proto
;
4832 struct if_proto
*_proto
;
4834 /* callee holds a proto refcnt upon success */
4835 ifnet_lock_exclusive(ifp
);
4836 _proto
= find_attached_proto(ifp
, proto
->protocol_family
);
4837 if (_proto
!= NULL
) {
4838 ifnet_lock_done(ifp
);
4839 if_proto_free(_proto
);
4844 * Call family module add_proto routine so it can refine the
4845 * demux descriptors as it wishes.
4847 retval
= ifp
->if_add_proto(ifp
, proto
->protocol_family
, demux_list
,
4850 ifnet_lock_done(ifp
);
4855 * Insert the protocol in the hash
4857 prev_proto
= SLIST_FIRST(&ifp
->if_proto_hash
[hash_value
]);
4858 while (prev_proto
!= NULL
&& SLIST_NEXT(prev_proto
, next_hash
) != NULL
)
4859 prev_proto
= SLIST_NEXT(prev_proto
, next_hash
);
4861 SLIST_INSERT_AFTER(prev_proto
, proto
, next_hash
);
4863 SLIST_INSERT_HEAD(&ifp
->if_proto_hash
[hash_value
],
4866 /* hold a proto refcnt for attach */
4867 if_proto_ref(proto
);
4870 * The reserved field carries the number of protocol still attached
4871 * (subject to change)
4873 ev_pr_data
.proto_family
= proto
->protocol_family
;
4874 ev_pr_data
.proto_remaining_count
= dlil_ifp_proto_count(ifp
);
4875 ifnet_lock_done(ifp
);
4877 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_PROTO_ATTACHED
,
4878 (struct net_event_data
*)&ev_pr_data
,
4879 sizeof (struct kev_dl_proto_data
));
4884 ifnet_attach_protocol(ifnet_t ifp
, protocol_family_t protocol
,
4885 const struct ifnet_attach_proto_param
*proto_details
)
4888 struct if_proto
*ifproto
= NULL
;
4890 ifnet_head_lock_shared();
4891 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
) {
4895 /* Check that the interface is in the global list */
4896 if (!ifnet_lookup(ifp
)) {
4901 ifproto
= zalloc(dlif_proto_zone
);
4902 if (ifproto
== NULL
) {
4906 bzero(ifproto
, dlif_proto_size
);
4908 /* refcnt held above during lookup */
4910 ifproto
->protocol_family
= protocol
;
4911 ifproto
->proto_kpi
= kProtoKPI_v1
;
4912 ifproto
->kpi
.v1
.input
= proto_details
->input
;
4913 ifproto
->kpi
.v1
.pre_output
= proto_details
->pre_output
;
4914 ifproto
->kpi
.v1
.event
= proto_details
->event
;
4915 ifproto
->kpi
.v1
.ioctl
= proto_details
->ioctl
;
4916 ifproto
->kpi
.v1
.detached
= proto_details
->detached
;
4917 ifproto
->kpi
.v1
.resolve_multi
= proto_details
->resolve
;
4918 ifproto
->kpi
.v1
.send_arp
= proto_details
->send_arp
;
4920 retval
= dlil_attach_protocol_internal(ifproto
,
4921 proto_details
->demux_list
, proto_details
->demux_count
);
4924 printf("%s: attached v1 protocol %d\n", if_name(ifp
),
4929 if (retval
!= 0 && retval
!= EEXIST
&& ifp
!= NULL
) {
4930 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
4931 if_name(ifp
), protocol
, retval
);
4934 if (retval
!= 0 && ifproto
!= NULL
)
4935 zfree(dlif_proto_zone
, ifproto
);
4940 ifnet_attach_protocol_v2(ifnet_t ifp
, protocol_family_t protocol
,
4941 const struct ifnet_attach_proto_param_v2
*proto_details
)
4944 struct if_proto
*ifproto
= NULL
;
4946 ifnet_head_lock_shared();
4947 if (ifp
== NULL
|| protocol
== 0 || proto_details
== NULL
) {
4951 /* Check that the interface is in the global list */
4952 if (!ifnet_lookup(ifp
)) {
4957 ifproto
= zalloc(dlif_proto_zone
);
4958 if (ifproto
== NULL
) {
4962 bzero(ifproto
, sizeof(*ifproto
));
4964 /* refcnt held above during lookup */
4966 ifproto
->protocol_family
= protocol
;
4967 ifproto
->proto_kpi
= kProtoKPI_v2
;
4968 ifproto
->kpi
.v2
.input
= proto_details
->input
;
4969 ifproto
->kpi
.v2
.pre_output
= proto_details
->pre_output
;
4970 ifproto
->kpi
.v2
.event
= proto_details
->event
;
4971 ifproto
->kpi
.v2
.ioctl
= proto_details
->ioctl
;
4972 ifproto
->kpi
.v2
.detached
= proto_details
->detached
;
4973 ifproto
->kpi
.v2
.resolve_multi
= proto_details
->resolve
;
4974 ifproto
->kpi
.v2
.send_arp
= proto_details
->send_arp
;
4976 retval
= dlil_attach_protocol_internal(ifproto
,
4977 proto_details
->demux_list
, proto_details
->demux_count
);
4980 printf("%s: attached v2 protocol %d\n", if_name(ifp
),
4985 if (retval
!= 0 && retval
!= EEXIST
&& ifp
!= NULL
) {
4986 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
4987 if_name(ifp
), protocol
, retval
);
4990 if (retval
!= 0 && ifproto
!= NULL
)
4991 zfree(dlif_proto_zone
, ifproto
);
4996 ifnet_detach_protocol(ifnet_t ifp
, protocol_family_t proto_family
)
4998 struct if_proto
*proto
= NULL
;
5001 if (ifp
== NULL
|| proto_family
== 0) {
5006 ifnet_lock_exclusive(ifp
);
5007 /* callee holds a proto refcnt upon success */
5008 proto
= find_attached_proto(ifp
, proto_family
);
5009 if (proto
== NULL
) {
5011 ifnet_lock_done(ifp
);
5015 /* call family module del_proto */
5016 if (ifp
->if_del_proto
)
5017 ifp
->if_del_proto(ifp
, proto
->protocol_family
);
5019 SLIST_REMOVE(&ifp
->if_proto_hash
[proto_hash_value(proto_family
)],
5020 proto
, if_proto
, next_hash
);
5022 if (proto
->proto_kpi
== kProtoKPI_v1
) {
5023 proto
->kpi
.v1
.input
= ifproto_media_input_v1
;
5024 proto
->kpi
.v1
.pre_output
= ifproto_media_preout
;
5025 proto
->kpi
.v1
.event
= ifproto_media_event
;
5026 proto
->kpi
.v1
.ioctl
= ifproto_media_ioctl
;
5027 proto
->kpi
.v1
.resolve_multi
= ifproto_media_resolve_multi
;
5028 proto
->kpi
.v1
.send_arp
= ifproto_media_send_arp
;
5030 proto
->kpi
.v2
.input
= ifproto_media_input_v2
;
5031 proto
->kpi
.v2
.pre_output
= ifproto_media_preout
;
5032 proto
->kpi
.v2
.event
= ifproto_media_event
;
5033 proto
->kpi
.v2
.ioctl
= ifproto_media_ioctl
;
5034 proto
->kpi
.v2
.resolve_multi
= ifproto_media_resolve_multi
;
5035 proto
->kpi
.v2
.send_arp
= ifproto_media_send_arp
;
5037 proto
->detached
= 1;
5038 ifnet_lock_done(ifp
);
5041 printf("%s: detached %s protocol %d\n", if_name(ifp
),
5042 (proto
->proto_kpi
== kProtoKPI_v1
) ?
5043 "v1" : "v2", proto_family
);
5046 /* release proto refcnt held during protocol attach */
5047 if_proto_free(proto
);
5050 * Release proto refcnt held during lookup; the rest of
5051 * protocol detach steps will happen when the last proto
5052 * reference is released.
5054 if_proto_free(proto
);
5062 ifproto_media_input_v1(struct ifnet
*ifp
, protocol_family_t protocol
,
5063 struct mbuf
*packet
, char *header
)
5065 #pragma unused(ifp, protocol, packet, header)
5070 ifproto_media_input_v2(struct ifnet
*ifp
, protocol_family_t protocol
,
5071 struct mbuf
*packet
)
5073 #pragma unused(ifp, protocol, packet)
5079 ifproto_media_preout(struct ifnet
*ifp
, protocol_family_t protocol
,
5080 mbuf_t
*packet
, const struct sockaddr
*dest
, void *route
, char *frame_type
,
5081 char *link_layer_dest
)
5083 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
5089 ifproto_media_event(struct ifnet
*ifp
, protocol_family_t protocol
,
5090 const struct kev_msg
*event
)
5092 #pragma unused(ifp, protocol, event)
5096 ifproto_media_ioctl(struct ifnet
*ifp
, protocol_family_t protocol
,
5097 unsigned long command
, void *argument
)
5099 #pragma unused(ifp, protocol, command, argument)
5104 ifproto_media_resolve_multi(ifnet_t ifp
, const struct sockaddr
*proto_addr
,
5105 struct sockaddr_dl
*out_ll
, size_t ll_len
)
5107 #pragma unused(ifp, proto_addr, out_ll, ll_len)
5112 ifproto_media_send_arp(struct ifnet
*ifp
, u_short arpop
,
5113 const struct sockaddr_dl
*sender_hw
, const struct sockaddr
*sender_proto
,
5114 const struct sockaddr_dl
*target_hw
, const struct sockaddr
*target_proto
)
5116 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
5120 extern int if_next_index(void);
5121 extern int tcp_ecn_outbound
;
5124 ifnet_attach(ifnet_t ifp
, const struct sockaddr_dl
*ll_addr
)
5126 struct ifnet
*tmp_if
;
5128 struct if_data_internal if_data_saved
;
5129 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
5130 struct dlil_threading_info
*dl_inp
;
5131 u_int32_t sflags
= 0;
5138 * Serialize ifnet attach using dlil_ifnet_lock, in order to
5139 * prevent the interface from being configured while it is
5140 * embryonic, as ifnet_head_lock is dropped and reacquired
5141 * below prior to marking the ifnet with IFRF_ATTACHED.
5144 ifnet_head_lock_exclusive();
5145 /* Verify we aren't already on the list */
5146 TAILQ_FOREACH(tmp_if
, &ifnet_head
, if_link
) {
5147 if (tmp_if
== ifp
) {
5154 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5155 if (ifp
->if_refflags
& IFRF_ATTACHED
) {
5156 panic_plain("%s: flags mismatch (attached set) ifp=%p",
5160 lck_mtx_unlock(&ifp
->if_ref_lock
);
5162 ifnet_lock_exclusive(ifp
);
5165 VERIFY(ifp
->if_detaching_link
.tqe_next
== NULL
);
5166 VERIFY(ifp
->if_detaching_link
.tqe_prev
== NULL
);
5168 if (ll_addr
!= NULL
) {
5169 if (ifp
->if_addrlen
== 0) {
5170 ifp
->if_addrlen
= ll_addr
->sdl_alen
;
5171 } else if (ll_addr
->sdl_alen
!= ifp
->if_addrlen
) {
5172 ifnet_lock_done(ifp
);
5180 * Allow interfaces without protocol families to attach
5181 * only if they have the necessary fields filled out.
5183 if (ifp
->if_add_proto
== NULL
|| ifp
->if_del_proto
== NULL
) {
5184 DLIL_PRINTF("%s: Attempt to attach interface without "
5185 "family module - %d\n", __func__
, ifp
->if_family
);
5186 ifnet_lock_done(ifp
);
5192 /* Allocate protocol hash table */
5193 VERIFY(ifp
->if_proto_hash
== NULL
);
5194 ifp
->if_proto_hash
= zalloc(dlif_phash_zone
);
5195 if (ifp
->if_proto_hash
== NULL
) {
5196 ifnet_lock_done(ifp
);
5201 bzero(ifp
->if_proto_hash
, dlif_phash_size
);
5203 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
5204 VERIFY(TAILQ_EMPTY(&ifp
->if_flt_head
));
5205 TAILQ_INIT(&ifp
->if_flt_head
);
5206 VERIFY(ifp
->if_flt_busy
== 0);
5207 VERIFY(ifp
->if_flt_waiters
== 0);
5208 lck_mtx_unlock(&ifp
->if_flt_lock
);
5210 VERIFY(TAILQ_EMPTY(&ifp
->if_prefixhead
));
5211 TAILQ_INIT(&ifp
->if_prefixhead
);
5213 if (!(dl_if
->dl_if_flags
& DLIF_REUSE
)) {
5214 VERIFY(LIST_EMPTY(&ifp
->if_multiaddrs
));
5215 LIST_INIT(&ifp
->if_multiaddrs
);
5218 VERIFY(ifp
->if_allhostsinm
== NULL
);
5219 VERIFY(TAILQ_EMPTY(&ifp
->if_addrhead
));
5220 TAILQ_INIT(&ifp
->if_addrhead
);
5222 if (ifp
->if_index
== 0) {
5223 int idx
= if_next_index();
5227 ifnet_lock_done(ifp
);
5232 ifp
->if_index
= idx
;
5234 /* There should not be anything occupying this slot */
5235 VERIFY(ifindex2ifnet
[ifp
->if_index
] == NULL
);
5237 /* allocate (if needed) and initialize a link address */
5238 VERIFY(!(dl_if
->dl_if_flags
& DLIF_REUSE
) || ifp
->if_lladdr
!= NULL
);
5239 ifa
= dlil_alloc_lladdr(ifp
, ll_addr
);
5241 ifnet_lock_done(ifp
);
5247 VERIFY(ifnet_addrs
[ifp
->if_index
- 1] == NULL
);
5248 ifnet_addrs
[ifp
->if_index
- 1] = ifa
;
5250 /* make this address the first on the list */
5252 /* hold a reference for ifnet_addrs[] */
5253 IFA_ADDREF_LOCKED(ifa
);
5254 /* if_attach_link_ifa() holds a reference for ifa_link */
5255 if_attach_link_ifa(ifp
, ifa
);
5259 mac_ifnet_label_associate(ifp
);
5262 TAILQ_INSERT_TAIL(&ifnet_head
, ifp
, if_link
);
5263 ifindex2ifnet
[ifp
->if_index
] = ifp
;
5265 /* Hold a reference to the underlying dlil_ifnet */
5266 ifnet_reference(ifp
);
5268 /* Clear stats (save and restore other fields that we care) */
5269 if_data_saved
= ifp
->if_data
;
5270 bzero(&ifp
->if_data
, sizeof (ifp
->if_data
));
5271 ifp
->if_data
.ifi_type
= if_data_saved
.ifi_type
;
5272 ifp
->if_data
.ifi_typelen
= if_data_saved
.ifi_typelen
;
5273 ifp
->if_data
.ifi_physical
= if_data_saved
.ifi_physical
;
5274 ifp
->if_data
.ifi_addrlen
= if_data_saved
.ifi_addrlen
;
5275 ifp
->if_data
.ifi_hdrlen
= if_data_saved
.ifi_hdrlen
;
5276 ifp
->if_data
.ifi_mtu
= if_data_saved
.ifi_mtu
;
5277 ifp
->if_data
.ifi_baudrate
= if_data_saved
.ifi_baudrate
;
5278 ifp
->if_data
.ifi_hwassist
= if_data_saved
.ifi_hwassist
;
5279 ifp
->if_data
.ifi_tso_v4_mtu
= if_data_saved
.ifi_tso_v4_mtu
;
5280 ifp
->if_data
.ifi_tso_v6_mtu
= if_data_saved
.ifi_tso_v6_mtu
;
5281 ifnet_touch_lastchange(ifp
);
5283 VERIFY(ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_NORMAL
||
5284 ifp
->if_output_sched_model
== IFNET_SCHED_MODEL_DRIVER_MANAGED
);
5286 /* By default, use SFB and enable flow advisory */
5287 sflags
= PKTSCHEDF_QALG_SFB
;
5289 sflags
|= PKTSCHEDF_QALG_FLOWCTL
;
5291 if (if_delaybased_queue
)
5292 sflags
|= PKTSCHEDF_QALG_DELAYBASED
;
5294 /* Initialize transmit queue(s) */
5295 err
= ifclassq_setup(ifp
, sflags
, (dl_if
->dl_if_flags
& DLIF_REUSE
));
5297 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
5298 "err=%d", __func__
, ifp
, err
);
5302 /* Sanity checks on the input thread storage */
5303 dl_inp
= &dl_if
->dl_if_inpstorage
;
5304 bzero(&dl_inp
->stats
, sizeof (dl_inp
->stats
));
5305 VERIFY(dl_inp
->input_waiting
== 0);
5306 VERIFY(dl_inp
->wtot
== 0);
5307 VERIFY(dl_inp
->ifp
== NULL
);
5308 VERIFY(qhead(&dl_inp
->rcvq_pkts
) == NULL
&& qempty(&dl_inp
->rcvq_pkts
));
5309 VERIFY(qlimit(&dl_inp
->rcvq_pkts
) == 0);
5310 VERIFY(!dl_inp
->net_affinity
);
5311 VERIFY(ifp
->if_inp
== NULL
);
5312 VERIFY(dl_inp
->input_thr
== THREAD_NULL
);
5313 VERIFY(dl_inp
->wloop_thr
== THREAD_NULL
);
5314 VERIFY(dl_inp
->poll_thr
== THREAD_NULL
);
5315 VERIFY(dl_inp
->tag
== 0);
5316 VERIFY(dl_inp
->mode
== IFNET_MODEL_INPUT_POLL_OFF
);
5317 bzero(&dl_inp
->tstats
, sizeof (dl_inp
->tstats
));
5318 bzero(&dl_inp
->pstats
, sizeof (dl_inp
->pstats
));
5319 bzero(&dl_inp
->sstats
, sizeof (dl_inp
->sstats
));
5320 #if IFNET_INPUT_SANITY_CHK
5321 VERIFY(dl_inp
->input_mbuf_cnt
== 0);
5322 #endif /* IFNET_INPUT_SANITY_CHK */
5325 * A specific DLIL input thread is created per Ethernet/cellular
5326 * interface or for an interface which supports opportunistic
5327 * input polling. Pseudo interfaces or other types of interfaces
5328 * use the main input thread instead.
5330 if ((net_rxpoll
&& (ifp
->if_eflags
& IFEF_RXPOLL
)) ||
5331 ifp
->if_type
== IFT_ETHER
|| ifp
->if_type
== IFT_CELLULAR
) {
5332 ifp
->if_inp
= dl_inp
;
5333 err
= dlil_create_input_thread(ifp
, ifp
->if_inp
);
5335 panic_plain("%s: ifp=%p couldn't get an input thread; "
5336 "err=%d", __func__
, ifp
, err
);
5342 * If the driver supports the new transmit model, calculate flow hash
5343 * and create a workloop starter thread to invoke the if_start callback
5344 * where the packets may be dequeued and transmitted.
5346 if (ifp
->if_eflags
& IFEF_TXSTART
) {
5347 ifp
->if_flowhash
= ifnet_calc_flowhash(ifp
);
5348 VERIFY(ifp
->if_flowhash
!= 0);
5350 VERIFY(ifp
->if_start
!= NULL
);
5351 VERIFY(ifp
->if_start_thread
== THREAD_NULL
);
5353 ifnet_set_start_cycle(ifp
, NULL
);
5354 ifp
->if_start_active
= 0;
5355 ifp
->if_start_req
= 0;
5356 ifp
->if_start_flags
= 0;
5357 if ((err
= kernel_thread_start(ifnet_start_thread_fn
, ifp
,
5358 &ifp
->if_start_thread
)) != KERN_SUCCESS
) {
5359 panic_plain("%s: ifp=%p couldn't get a start thread; "
5360 "err=%d", __func__
, ifp
, err
);
5363 ml_thread_policy(ifp
->if_start_thread
, MACHINE_GROUP
,
5364 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_WORKLOOP
));
5366 ifp
->if_flowhash
= 0;
5370 * If the driver supports the new receive model, create a poller
5371 * thread to invoke if_input_poll callback where the packets may
5372 * be dequeued from the driver and processed for reception.
5374 if (ifp
->if_eflags
& IFEF_RXPOLL
) {
5375 VERIFY(ifp
->if_input_poll
!= NULL
);
5376 VERIFY(ifp
->if_input_ctl
!= NULL
);
5377 VERIFY(ifp
->if_poll_thread
== THREAD_NULL
);
5379 ifnet_set_poll_cycle(ifp
, NULL
);
5380 ifp
->if_poll_update
= 0;
5381 ifp
->if_poll_active
= 0;
5382 ifp
->if_poll_req
= 0;
5383 if ((err
= kernel_thread_start(ifnet_poll_thread_fn
, ifp
,
5384 &ifp
->if_poll_thread
)) != KERN_SUCCESS
) {
5385 panic_plain("%s: ifp=%p couldn't get a poll thread; "
5386 "err=%d", __func__
, ifp
, err
);
5389 ml_thread_policy(ifp
->if_poll_thread
, MACHINE_GROUP
,
5390 (MACHINE_NETWORK_GROUP
|MACHINE_NETWORK_WORKLOOP
));
5393 VERIFY(ifp
->if_desc
.ifd_maxlen
== IF_DESCSIZE
);
5394 VERIFY(ifp
->if_desc
.ifd_len
== 0);
5395 VERIFY(ifp
->if_desc
.ifd_desc
!= NULL
);
5397 /* Record attach PC stacktrace */
5398 ctrace_record(&((struct dlil_ifnet
*)ifp
)->dl_if_attach
);
5400 ifp
->if_updatemcasts
= 0;
5401 if (!LIST_EMPTY(&ifp
->if_multiaddrs
)) {
5402 struct ifmultiaddr
*ifma
;
5403 LIST_FOREACH(ifma
, &ifp
->if_multiaddrs
, ifma_link
) {
5405 if (ifma
->ifma_addr
->sa_family
== AF_LINK
||
5406 ifma
->ifma_addr
->sa_family
== AF_UNSPEC
)
5407 ifp
->if_updatemcasts
++;
5411 printf("%s: attached with %d suspended link-layer multicast "
5412 "membership(s)\n", if_name(ifp
),
5413 ifp
->if_updatemcasts
);
5416 /* Clear logging parameters */
5417 bzero(&ifp
->if_log
, sizeof (ifp
->if_log
));
5418 ifp
->if_fg_sendts
= 0;
5420 VERIFY(ifp
->if_delegated
.ifp
== NULL
);
5421 VERIFY(ifp
->if_delegated
.type
== 0);
5422 VERIFY(ifp
->if_delegated
.family
== 0);
5423 VERIFY(ifp
->if_delegated
.subfamily
== 0);
5424 VERIFY(ifp
->if_delegated
.expensive
== 0);
5426 bzero(&ifp
->if_agentids
, sizeof(ifp
->if_agentids
));
5428 /* Reset interface state */
5429 bzero(&ifp
->if_interface_state
, sizeof(ifp
->if_interface_state
));
5430 ifp
->if_interface_state
.valid_bitmask
|=
5431 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
5432 ifp
->if_interface_state
.interface_availability
=
5433 IF_INTERFACE_STATE_INTERFACE_AVAILABLE
;
5435 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
5436 if (ifp
== lo_ifp
) {
5437 ifp
->if_interface_state
.lqm_state
= IFNET_LQM_THRESH_GOOD
;
5438 ifp
->if_interface_state
.valid_bitmask
|=
5439 IF_INTERFACE_STATE_LQM_STATE_VALID
;
5441 ifp
->if_interface_state
.lqm_state
= IFNET_LQM_THRESH_UNKNOWN
;
5445 * Enable ECN capability on this interface depending on the
5446 * value of ECN global setting
5448 if (tcp_ecn_outbound
== 2 && !IFNET_IS_CELLULAR(ifp
)) {
5449 ifp
->if_eflags
|= IFEF_ECN_ENABLE
;
5450 ifp
->if_eflags
&= ~IFEF_ECN_DISABLE
;
5453 ifnet_lock_done(ifp
);
5456 lck_mtx_lock(&ifp
->if_cached_route_lock
);
5457 /* Enable forwarding cached route */
5458 ifp
->if_fwd_cacheok
= 1;
5459 /* Clean up any existing cached routes */
5460 ROUTE_RELEASE(&ifp
->if_fwd_route
);
5461 bzero(&ifp
->if_fwd_route
, sizeof (ifp
->if_fwd_route
));
5462 ROUTE_RELEASE(&ifp
->if_src_route
);
5463 bzero(&ifp
->if_src_route
, sizeof (ifp
->if_src_route
));
5464 ROUTE_RELEASE(&ifp
->if_src_route6
);
5465 bzero(&ifp
->if_src_route6
, sizeof (ifp
->if_src_route6
));
5466 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
5468 ifnet_llreach_ifattach(ifp
, (dl_if
->dl_if_flags
& DLIF_REUSE
));
5471 * Allocate and attach IGMPv3/MLDv2 interface specific variables
5472 * and trees; do this before the ifnet is marked as attached.
5473 * The ifnet keeps the reference to the info structures even after
5474 * the ifnet is detached, since the network-layer records still
5475 * refer to the info structures even after that. This also
5476 * makes it possible for them to still function after the ifnet
5477 * is recycled or reattached.
5480 if (IGMP_IFINFO(ifp
) == NULL
) {
5481 IGMP_IFINFO(ifp
) = igmp_domifattach(ifp
, M_WAITOK
);
5482 VERIFY(IGMP_IFINFO(ifp
) != NULL
);
5484 VERIFY(IGMP_IFINFO(ifp
)->igi_ifp
== ifp
);
5485 igmp_domifreattach(IGMP_IFINFO(ifp
));
5489 if (MLD_IFINFO(ifp
) == NULL
) {
5490 MLD_IFINFO(ifp
) = mld_domifattach(ifp
, M_WAITOK
);
5491 VERIFY(MLD_IFINFO(ifp
) != NULL
);
5493 VERIFY(MLD_IFINFO(ifp
)->mli_ifp
== ifp
);
5494 mld_domifreattach(MLD_IFINFO(ifp
));
5498 VERIFY(ifp
->if_data_threshold
== 0);
5501 * Finally, mark this ifnet as attached.
5503 lck_mtx_lock(rnh_lock
);
5504 ifnet_lock_exclusive(ifp
);
5505 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5506 ifp
->if_refflags
= IFRF_ATTACHED
;
5507 lck_mtx_unlock(&ifp
->if_ref_lock
);
5509 /* boot-args override; enable idle notification */
5510 (void) ifnet_set_idle_flags_locked(ifp
, IFRF_IDLE_NOTIFY
,
5513 /* apply previous request(s) to set the idle flags, if any */
5514 (void) ifnet_set_idle_flags_locked(ifp
, ifp
->if_idle_new_flags
,
5515 ifp
->if_idle_new_flags_mask
);
5518 ifnet_lock_done(ifp
);
5519 lck_mtx_unlock(rnh_lock
);
5524 * Attach packet filter to this interface, if enabled.
5526 pf_ifnet_hook(ifp
, 1);
5529 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_ATTACHED
, NULL
, 0);
5532 printf("%s: attached%s\n", if_name(ifp
),
5533 (dl_if
->dl_if_flags
& DLIF_REUSE
) ? " (recycled)" : "");
5540 * Prepare the storage for the first/permanent link address, which must
5541 * must have the same lifetime as the ifnet itself. Although the link
5542 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
5543 * its location in memory must never change as it may still be referred
5544 * to by some parts of the system afterwards (unfortunate implementation
5545 * artifacts inherited from BSD.)
5547 * Caller must hold ifnet lock as writer.
5549 static struct ifaddr
*
5550 dlil_alloc_lladdr(struct ifnet
*ifp
, const struct sockaddr_dl
*ll_addr
)
5552 struct ifaddr
*ifa
, *oifa
;
5553 struct sockaddr_dl
*asdl
, *msdl
;
5554 char workbuf
[IFNAMSIZ
*2];
5555 int namelen
, masklen
, socksize
;
5556 struct dlil_ifnet
*dl_if
= (struct dlil_ifnet
*)ifp
;
5558 ifnet_lock_assert(ifp
, IFNET_LCK_ASSERT_EXCLUSIVE
);
5559 VERIFY(ll_addr
== NULL
|| ll_addr
->sdl_alen
== ifp
->if_addrlen
);
5561 namelen
= snprintf(workbuf
, sizeof (workbuf
), "%s",
5563 masklen
= offsetof(struct sockaddr_dl
, sdl_data
[0]) + namelen
;
5564 socksize
= masklen
+ ifp
->if_addrlen
;
5565 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
5566 if ((u_int32_t
)socksize
< sizeof (struct sockaddr_dl
))
5567 socksize
= sizeof(struct sockaddr_dl
);
5568 socksize
= ROUNDUP(socksize
);
5571 ifa
= ifp
->if_lladdr
;
5572 if (socksize
> DLIL_SDLMAXLEN
||
5573 (ifa
!= NULL
&& ifa
!= &dl_if
->dl_if_lladdr
.ifa
)) {
5575 * Rare, but in the event that the link address requires
5576 * more storage space than DLIL_SDLMAXLEN, allocate the
5577 * largest possible storages for address and mask, such
5578 * that we can reuse the same space when if_addrlen grows.
5579 * This same space will be used when if_addrlen shrinks.
5581 if (ifa
== NULL
|| ifa
== &dl_if
->dl_if_lladdr
.ifa
) {
5582 int ifasize
= sizeof (*ifa
) + 2 * SOCK_MAXADDRLEN
;
5583 ifa
= _MALLOC(ifasize
, M_IFADDR
, M_WAITOK
| M_ZERO
);
5587 /* Don't set IFD_ALLOC, as this is permanent */
5588 ifa
->ifa_debug
= IFD_LINK
;
5591 /* address and mask sockaddr_dl locations */
5592 asdl
= (struct sockaddr_dl
*)(ifa
+ 1);
5593 bzero(asdl
, SOCK_MAXADDRLEN
);
5594 msdl
= (struct sockaddr_dl
*)(void *)
5595 ((char *)asdl
+ SOCK_MAXADDRLEN
);
5596 bzero(msdl
, SOCK_MAXADDRLEN
);
5598 VERIFY(ifa
== NULL
|| ifa
== &dl_if
->dl_if_lladdr
.ifa
);
5600 * Use the storage areas for address and mask within the
5601 * dlil_ifnet structure. This is the most common case.
5604 ifa
= &dl_if
->dl_if_lladdr
.ifa
;
5606 /* Don't set IFD_ALLOC, as this is permanent */
5607 ifa
->ifa_debug
= IFD_LINK
;
5610 /* address and mask sockaddr_dl locations */
5611 asdl
= (struct sockaddr_dl
*)(void *)&dl_if
->dl_if_lladdr
.asdl
;
5612 bzero(asdl
, sizeof (dl_if
->dl_if_lladdr
.asdl
));
5613 msdl
= (struct sockaddr_dl
*)(void *)&dl_if
->dl_if_lladdr
.msdl
;
5614 bzero(msdl
, sizeof (dl_if
->dl_if_lladdr
.msdl
));
5617 /* hold a permanent reference for the ifnet itself */
5618 IFA_ADDREF_LOCKED(ifa
);
5619 oifa
= ifp
->if_lladdr
;
5620 ifp
->if_lladdr
= ifa
;
5622 VERIFY(ifa
->ifa_debug
== IFD_LINK
);
5624 ifa
->ifa_rtrequest
= link_rtrequest
;
5625 ifa
->ifa_addr
= (struct sockaddr
*)asdl
;
5626 asdl
->sdl_len
= socksize
;
5627 asdl
->sdl_family
= AF_LINK
;
5628 bcopy(workbuf
, asdl
->sdl_data
, namelen
);
5629 asdl
->sdl_nlen
= namelen
;
5630 asdl
->sdl_index
= ifp
->if_index
;
5631 asdl
->sdl_type
= ifp
->if_type
;
5632 if (ll_addr
!= NULL
) {
5633 asdl
->sdl_alen
= ll_addr
->sdl_alen
;
5634 bcopy(CONST_LLADDR(ll_addr
), LLADDR(asdl
), asdl
->sdl_alen
);
5638 ifa
->ifa_netmask
= (struct sockaddr
*)msdl
;
5639 msdl
->sdl_len
= masklen
;
5640 while (namelen
!= 0)
5641 msdl
->sdl_data
[--namelen
] = 0xff;
5651 if_purgeaddrs(struct ifnet
*ifp
)
5657 in6_purgeaddrs(ifp
);
5662 ifnet_detach(ifnet_t ifp
)
5664 struct ifnet
*delegated_ifp
;
5669 lck_mtx_lock(rnh_lock
);
5670 ifnet_head_lock_exclusive();
5671 ifnet_lock_exclusive(ifp
);
5674 * Check to see if this interface has previously triggered
5675 * aggressive protocol draining; if so, decrement the global
5676 * refcnt and clear PR_AGGDRAIN on the route domain if
5677 * there are no more of such an interface around.
5679 (void) ifnet_set_idle_flags_locked(ifp
, 0, ~0);
5681 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
5682 if (!(ifp
->if_refflags
& IFRF_ATTACHED
)) {
5683 lck_mtx_unlock(&ifp
->if_ref_lock
);
5684 ifnet_lock_done(ifp
);
5686 lck_mtx_unlock(rnh_lock
);
5688 } else if (ifp
->if_refflags
& IFRF_DETACHING
) {
5689 /* Interface has already been detached */
5690 lck_mtx_unlock(&ifp
->if_ref_lock
);
5691 ifnet_lock_done(ifp
);
5693 lck_mtx_unlock(rnh_lock
);
5696 /* Indicate this interface is being detached */
5697 ifp
->if_refflags
&= ~IFRF_ATTACHED
;
5698 ifp
->if_refflags
|= IFRF_DETACHING
;
5699 lck_mtx_unlock(&ifp
->if_ref_lock
);
5702 printf("%s: detaching\n", if_name(ifp
));
5704 /* Reset ECN enable/disable flags */
5705 ifp
->if_eflags
&= ~IFEF_ECN_DISABLE
;
5706 ifp
->if_eflags
&= ~IFEF_ECN_ENABLE
;
5709 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
5710 * no longer be visible during lookups from this point.
5712 VERIFY(ifindex2ifnet
[ifp
->if_index
] == ifp
);
5713 TAILQ_REMOVE(&ifnet_head
, ifp
, if_link
);
5714 ifp
->if_link
.tqe_next
= NULL
;
5715 ifp
->if_link
.tqe_prev
= NULL
;
5716 ifindex2ifnet
[ifp
->if_index
] = NULL
;
5718 /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */
5719 ifp
->if_eflags
&= ~(IFEF_IPV4_ROUTER
| IFEF_IPV6_ROUTER
);
5721 /* Record detach PC stacktrace */
5722 ctrace_record(&((struct dlil_ifnet
*)ifp
)->dl_if_detach
);
5724 /* Clear logging parameters */
5725 bzero(&ifp
->if_log
, sizeof (ifp
->if_log
));
5727 /* Clear delegated interface info (reference released below) */
5728 delegated_ifp
= ifp
->if_delegated
.ifp
;
5729 bzero(&ifp
->if_delegated
, sizeof (ifp
->if_delegated
));
5731 /* Reset interface state */
5732 bzero(&ifp
->if_interface_state
, sizeof(ifp
->if_interface_state
));
5734 ifnet_lock_done(ifp
);
5736 lck_mtx_unlock(rnh_lock
);
5738 /* Release reference held on the delegated interface */
5739 if (delegated_ifp
!= NULL
)
5740 ifnet_release(delegated_ifp
);
5742 /* Reset Link Quality Metric (unless loopback [lo0]) */
5744 if_lqm_update(ifp
, IFNET_LQM_THRESH_OFF
, 0);
5746 /* Reset TCP local statistics */
5747 if (ifp
->if_tcp_stat
!= NULL
)
5748 bzero(ifp
->if_tcp_stat
, sizeof(*ifp
->if_tcp_stat
));
5750 /* Reset UDP local statistics */
5751 if (ifp
->if_udp_stat
!= NULL
)
5752 bzero(ifp
->if_udp_stat
, sizeof(*ifp
->if_udp_stat
));
5754 /* Reset ifnet IPv4 stats */
5755 if (ifp
->if_ipv4_stat
!= NULL
)
5756 bzero(ifp
->if_ipv4_stat
, sizeof(*ifp
->if_ipv4_stat
));
5758 /* Reset ifnet IPv6 stats */
5759 if (ifp
->if_ipv6_stat
!= NULL
)
5760 bzero(ifp
->if_ipv6_stat
, sizeof(*ifp
->if_ipv6_stat
));
5762 /* Release memory held for interface link status report */
5763 if (ifp
->if_link_status
!= NULL
) {
5764 FREE(ifp
->if_link_status
, M_TEMP
);
5765 ifp
->if_link_status
= NULL
;
5768 /* Let BPF know we're detaching */
5771 /* Mark the interface as DOWN */
5774 /* Disable forwarding cached route */
5775 lck_mtx_lock(&ifp
->if_cached_route_lock
);
5776 ifp
->if_fwd_cacheok
= 0;
5777 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
5779 ifp
->if_data_threshold
= 0;
5781 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
5782 * references to the info structures and leave them attached to
5786 igmp_domifdetach(ifp
);
5789 mld_domifdetach(ifp
);
5792 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHING
, NULL
, 0);
5794 /* Let worker thread take care of the rest, to avoid reentrancy */
5796 ifnet_detaching_enqueue(ifp
);
5803 ifnet_detaching_enqueue(struct ifnet
*ifp
)
5805 dlil_if_lock_assert();
5807 ++ifnet_detaching_cnt
;
5808 VERIFY(ifnet_detaching_cnt
!= 0);
5809 TAILQ_INSERT_TAIL(&ifnet_detaching_head
, ifp
, if_detaching_link
);
5810 wakeup((caddr_t
)&ifnet_delayed_run
);
5813 static struct ifnet
*
5814 ifnet_detaching_dequeue(void)
5818 dlil_if_lock_assert();
5820 ifp
= TAILQ_FIRST(&ifnet_detaching_head
);
5821 VERIFY(ifnet_detaching_cnt
!= 0 || ifp
== NULL
);
5823 VERIFY(ifnet_detaching_cnt
!= 0);
5824 --ifnet_detaching_cnt
;
5825 TAILQ_REMOVE(&ifnet_detaching_head
, ifp
, if_detaching_link
);
5826 ifp
->if_detaching_link
.tqe_next
= NULL
;
5827 ifp
->if_detaching_link
.tqe_prev
= NULL
;
5833 ifnet_detacher_thread_cont(int err
)
5839 dlil_if_lock_assert();
5840 while (ifnet_detaching_cnt
== 0) {
5841 (void) msleep0(&ifnet_delayed_run
, &dlil_ifnet_lock
,
5842 (PZERO
- 1), "ifnet_detacher_cont", 0,
5843 ifnet_detacher_thread_cont
);
5847 VERIFY(TAILQ_FIRST(&ifnet_detaching_head
) != NULL
);
5849 /* Take care of detaching ifnet */
5850 ifp
= ifnet_detaching_dequeue();
5853 ifnet_detach_final(ifp
);
5862 ifnet_detacher_thread_func(void *v
, wait_result_t w
)
5864 #pragma unused(v, w)
5866 (void) msleep0(&ifnet_delayed_run
, &dlil_ifnet_lock
,
5867 (PZERO
- 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont
);
5869 * msleep0() shouldn't have returned as PCATCH was not set;
5870 * therefore assert in this case.
5877 ifnet_detach_final(struct ifnet
*ifp
)
5879 struct ifnet_filter
*filter
, *filter_next
;
5880 struct ifnet_filter_head fhead
;
5881 struct dlil_threading_info
*inp
;
5883 ifnet_detached_func if_free
;
5886 lck_mtx_lock(&ifp
->if_ref_lock
);
5887 if (!(ifp
->if_refflags
& IFRF_DETACHING
)) {
5888 panic("%s: flags mismatch (detaching not set) ifp=%p",
5894 * Wait until the existing IO references get released
5895 * before we proceed with ifnet_detach. This is not a
5896 * common case, so block without using a continuation.
5898 while (ifp
->if_refio
> 0) {
5899 printf("%s: Waiting for IO references on %s interface "
5900 "to be released\n", __func__
, if_name(ifp
));
5901 (void) msleep(&(ifp
->if_refio
), &ifp
->if_ref_lock
,
5902 (PZERO
- 1), "ifnet_ioref_wait", NULL
);
5904 lck_mtx_unlock(&ifp
->if_ref_lock
);
5906 /* Drain and destroy send queue */
5907 ifclassq_teardown(ifp
);
5909 /* Detach interface filters */
5910 lck_mtx_lock(&ifp
->if_flt_lock
);
5911 if_flt_monitor_enter(ifp
);
5913 lck_mtx_assert(&ifp
->if_flt_lock
, LCK_MTX_ASSERT_OWNED
);
5914 fhead
= ifp
->if_flt_head
;
5915 TAILQ_INIT(&ifp
->if_flt_head
);
5917 for (filter
= TAILQ_FIRST(&fhead
); filter
; filter
= filter_next
) {
5918 filter_next
= TAILQ_NEXT(filter
, filt_next
);
5919 lck_mtx_unlock(&ifp
->if_flt_lock
);
5921 dlil_detach_filter_internal(filter
, 1);
5922 lck_mtx_lock(&ifp
->if_flt_lock
);
5924 if_flt_monitor_leave(ifp
);
5925 lck_mtx_unlock(&ifp
->if_flt_lock
);
5927 /* Tell upper layers to drop their network addresses */
5930 ifnet_lock_exclusive(ifp
);
5932 /* Uplumb all protocols */
5933 for (i
= 0; i
< PROTO_HASH_SLOTS
; i
++) {
5934 struct if_proto
*proto
;
5936 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
5937 while (proto
!= NULL
) {
5938 protocol_family_t family
= proto
->protocol_family
;
5939 ifnet_lock_done(ifp
);
5940 proto_unplumb(family
, ifp
);
5941 ifnet_lock_exclusive(ifp
);
5942 proto
= SLIST_FIRST(&ifp
->if_proto_hash
[i
]);
5944 /* There should not be any protocols left */
5945 VERIFY(SLIST_EMPTY(&ifp
->if_proto_hash
[i
]));
5947 zfree(dlif_phash_zone
, ifp
->if_proto_hash
);
5948 ifp
->if_proto_hash
= NULL
;
5950 /* Detach (permanent) link address from if_addrhead */
5951 ifa
= TAILQ_FIRST(&ifp
->if_addrhead
);
5952 VERIFY(ifnet_addrs
[ifp
->if_index
- 1] == ifa
);
5954 if_detach_link_ifa(ifp
, ifa
);
5957 /* Remove (permanent) link address from ifnet_addrs[] */
5959 ifnet_addrs
[ifp
->if_index
- 1] = NULL
;
5961 /* This interface should not be on {ifnet_head,detaching} */
5962 VERIFY(ifp
->if_link
.tqe_next
== NULL
);
5963 VERIFY(ifp
->if_link
.tqe_prev
== NULL
);
5964 VERIFY(ifp
->if_detaching_link
.tqe_next
== NULL
);
5965 VERIFY(ifp
->if_detaching_link
.tqe_prev
== NULL
);
5967 /* Prefix list should be empty by now */
5968 VERIFY(TAILQ_EMPTY(&ifp
->if_prefixhead
));
5970 /* The slot should have been emptied */
5971 VERIFY(ifindex2ifnet
[ifp
->if_index
] == NULL
);
5973 /* There should not be any addresses left */
5974 VERIFY(TAILQ_EMPTY(&ifp
->if_addrhead
));
5977 * Signal the starter thread to terminate itself.
5979 if (ifp
->if_start_thread
!= THREAD_NULL
) {
5980 lck_mtx_lock_spin(&ifp
->if_start_lock
);
5981 ifp
->if_start_flags
= 0;
5982 ifp
->if_start_thread
= THREAD_NULL
;
5983 wakeup_one((caddr_t
)&ifp
->if_start_thread
);
5984 lck_mtx_unlock(&ifp
->if_start_lock
);
5988 * Signal the poller thread to terminate itself.
5990 if (ifp
->if_poll_thread
!= THREAD_NULL
) {
5991 lck_mtx_lock_spin(&ifp
->if_poll_lock
);
5992 ifp
->if_poll_thread
= THREAD_NULL
;
5993 wakeup_one((caddr_t
)&ifp
->if_poll_thread
);
5994 lck_mtx_unlock(&ifp
->if_poll_lock
);
5998 * If thread affinity was set for the workloop thread, we will need
5999 * to tear down the affinity and release the extra reference count
6000 * taken at attach time. Does not apply to lo0 or other interfaces
6001 * without dedicated input threads.
6003 if ((inp
= ifp
->if_inp
) != NULL
) {
6004 VERIFY(inp
!= dlil_main_input_thread
);
6006 if (inp
->net_affinity
) {
6007 struct thread
*tp
, *wtp
, *ptp
;
6009 lck_mtx_lock_spin(&inp
->input_lck
);
6010 wtp
= inp
->wloop_thr
;
6011 inp
->wloop_thr
= THREAD_NULL
;
6012 ptp
= inp
->poll_thr
;
6013 inp
->poll_thr
= THREAD_NULL
;
6014 tp
= inp
->input_thr
; /* don't nullify now */
6016 inp
->net_affinity
= FALSE
;
6017 lck_mtx_unlock(&inp
->input_lck
);
6019 /* Tear down poll thread affinity */
6021 VERIFY(ifp
->if_eflags
& IFEF_RXPOLL
);
6022 (void) dlil_affinity_set(ptp
,
6023 THREAD_AFFINITY_TAG_NULL
);
6024 thread_deallocate(ptp
);
6027 /* Tear down workloop thread affinity */
6029 (void) dlil_affinity_set(wtp
,
6030 THREAD_AFFINITY_TAG_NULL
);
6031 thread_deallocate(wtp
);
6034 /* Tear down DLIL input thread affinity */
6035 (void) dlil_affinity_set(tp
, THREAD_AFFINITY_TAG_NULL
);
6036 thread_deallocate(tp
);
6039 /* disassociate ifp DLIL input thread */
6042 lck_mtx_lock_spin(&inp
->input_lck
);
6043 inp
->input_waiting
|= DLIL_INPUT_TERMINATE
;
6044 if (!(inp
->input_waiting
& DLIL_INPUT_RUNNING
)) {
6045 wakeup_one((caddr_t
)&inp
->input_waiting
);
6047 lck_mtx_unlock(&inp
->input_lck
);
6050 /* The driver might unload, so point these to ourselves */
6051 if_free
= ifp
->if_free
;
6052 ifp
->if_output
= ifp_if_output
;
6053 ifp
->if_pre_enqueue
= ifp_if_output
;
6054 ifp
->if_start
= ifp_if_start
;
6055 ifp
->if_output_ctl
= ifp_if_ctl
;
6056 ifp
->if_input_poll
= ifp_if_input_poll
;
6057 ifp
->if_input_ctl
= ifp_if_ctl
;
6058 ifp
->if_ioctl
= ifp_if_ioctl
;
6059 ifp
->if_set_bpf_tap
= ifp_if_set_bpf_tap
;
6060 ifp
->if_free
= ifp_if_free
;
6061 ifp
->if_demux
= ifp_if_demux
;
6062 ifp
->if_event
= ifp_if_event
;
6063 ifp
->if_framer_legacy
= ifp_if_framer
;
6064 ifp
->if_framer
= ifp_if_framer_extended
;
6065 ifp
->if_add_proto
= ifp_if_add_proto
;
6066 ifp
->if_del_proto
= ifp_if_del_proto
;
6067 ifp
->if_check_multi
= ifp_if_check_multi
;
6069 /* wipe out interface description */
6070 VERIFY(ifp
->if_desc
.ifd_maxlen
== IF_DESCSIZE
);
6071 ifp
->if_desc
.ifd_len
= 0;
6072 VERIFY(ifp
->if_desc
.ifd_desc
!= NULL
);
6073 bzero(ifp
->if_desc
.ifd_desc
, IF_DESCSIZE
);
6075 /* there shouldn't be any delegation by now */
6076 VERIFY(ifp
->if_delegated
.ifp
== NULL
);
6077 VERIFY(ifp
->if_delegated
.type
== 0);
6078 VERIFY(ifp
->if_delegated
.family
== 0);
6079 VERIFY(ifp
->if_delegated
.subfamily
== 0);
6080 VERIFY(ifp
->if_delegated
.expensive
== 0);
6082 ifnet_lock_done(ifp
);
6086 * Detach this interface from packet filter, if enabled.
6088 pf_ifnet_hook(ifp
, 0);
6091 /* Filter list should be empty */
6092 lck_mtx_lock_spin(&ifp
->if_flt_lock
);
6093 VERIFY(TAILQ_EMPTY(&ifp
->if_flt_head
));
6094 VERIFY(ifp
->if_flt_busy
== 0);
6095 VERIFY(ifp
->if_flt_waiters
== 0);
6096 lck_mtx_unlock(&ifp
->if_flt_lock
);
6098 /* Last chance to drain send queue */
6101 /* Last chance to cleanup any cached route */
6102 lck_mtx_lock(&ifp
->if_cached_route_lock
);
6103 VERIFY(!ifp
->if_fwd_cacheok
);
6104 ROUTE_RELEASE(&ifp
->if_fwd_route
);
6105 bzero(&ifp
->if_fwd_route
, sizeof (ifp
->if_fwd_route
));
6106 ROUTE_RELEASE(&ifp
->if_src_route
);
6107 bzero(&ifp
->if_src_route
, sizeof (ifp
->if_src_route
));
6108 ROUTE_RELEASE(&ifp
->if_src_route6
);
6109 bzero(&ifp
->if_src_route6
, sizeof (ifp
->if_src_route6
));
6110 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6112 VERIFY(ifp
->if_data_threshold
== 0);
6114 ifnet_llreach_ifdetach(ifp
);
6116 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_IF_DETACHED
, NULL
, 0);
6118 if (if_free
!= NULL
)
6122 * Finally, mark this ifnet as detached.
6124 lck_mtx_lock_spin(&ifp
->if_ref_lock
);
6125 if (!(ifp
->if_refflags
& IFRF_DETACHING
)) {
6126 panic("%s: flags mismatch (detaching not set) ifp=%p",
6130 ifp
->if_refflags
&= ~IFRF_DETACHING
;
6131 lck_mtx_unlock(&ifp
->if_ref_lock
);
6134 printf("%s: detached\n", if_name(ifp
));
6136 /* Release reference held during ifnet attach */
6141 ifp_if_output(struct ifnet
*ifp
, struct mbuf
*m
)
6149 ifp_if_start(struct ifnet
*ifp
)
6155 ifp_if_input_poll(struct ifnet
*ifp
, u_int32_t flags
, u_int32_t max_cnt
,
6156 struct mbuf
**m_head
, struct mbuf
**m_tail
, u_int32_t
*cnt
, u_int32_t
*len
)
6158 #pragma unused(ifp, flags, max_cnt)
6170 ifp_if_ctl(struct ifnet
*ifp
, ifnet_ctl_cmd_t cmd
, u_int32_t arglen
, void *arg
)
6172 #pragma unused(ifp, cmd, arglen, arg)
6173 return (EOPNOTSUPP
);
6177 ifp_if_demux(struct ifnet
*ifp
, struct mbuf
*m
, char *fh
, protocol_family_t
*pf
)
6179 #pragma unused(ifp, fh, pf)
6181 return (EJUSTRETURN
);
6185 ifp_if_add_proto(struct ifnet
*ifp
, protocol_family_t pf
,
6186 const struct ifnet_demux_desc
*da
, u_int32_t dc
)
6188 #pragma unused(ifp, pf, da, dc)
6193 ifp_if_del_proto(struct ifnet
*ifp
, protocol_family_t pf
)
6195 #pragma unused(ifp, pf)
6200 ifp_if_check_multi(struct ifnet
*ifp
, const struct sockaddr
*sa
)
6202 #pragma unused(ifp, sa)
6203 return (EOPNOTSUPP
);
6207 ifp_if_framer(struct ifnet
*ifp
, struct mbuf
**m
,
6208 const struct sockaddr
*sa
, const char *ll
, const char *t
)
6210 #pragma unused(ifp, m, sa, ll, t)
6211 return (ifp_if_framer_extended(ifp
, m
, sa
, ll
, t
, NULL
, NULL
));
6215 ifp_if_framer_extended(struct ifnet
*ifp
, struct mbuf
**m
,
6216 const struct sockaddr
*sa
, const char *ll
, const char *t
,
6217 u_int32_t
*pre
, u_int32_t
*post
)
6219 #pragma unused(ifp, sa, ll, t)
6228 return (EJUSTRETURN
);
6232 ifp_if_ioctl(struct ifnet
*ifp
, unsigned long cmd
, void *arg
)
6234 #pragma unused(ifp, cmd, arg)
6235 return (EOPNOTSUPP
);
6239 ifp_if_set_bpf_tap(struct ifnet
*ifp
, bpf_tap_mode tm
, bpf_packet_func f
)
6241 #pragma unused(ifp, tm, f)
6242 /* XXX not sure what to do here */
6247 ifp_if_free(struct ifnet
*ifp
)
6253 ifp_if_event(struct ifnet
*ifp
, const struct kev_msg
*e
)
6255 #pragma unused(ifp, e)
6259 int dlil_if_acquire(u_int32_t family
, const void *uniqueid
,
6260 size_t uniqueid_len
, struct ifnet
**ifp
)
6262 struct ifnet
*ifp1
= NULL
;
6263 struct dlil_ifnet
*dlifp1
= NULL
;
6264 void *buf
, *base
, **pbuf
;
6268 TAILQ_FOREACH(dlifp1
, &dlil_ifnet_head
, dl_if_link
) {
6269 ifp1
= (struct ifnet
*)dlifp1
;
6271 if (ifp1
->if_family
!= family
)
6274 lck_mtx_lock(&dlifp1
->dl_if_lock
);
6275 /* same uniqueid and same len or no unique id specified */
6276 if ((uniqueid_len
== dlifp1
->dl_if_uniqueid_len
) &&
6277 !bcmp(uniqueid
, dlifp1
->dl_if_uniqueid
, uniqueid_len
)) {
6278 /* check for matching interface in use */
6279 if (dlifp1
->dl_if_flags
& DLIF_INUSE
) {
6282 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
6286 dlifp1
->dl_if_flags
|= (DLIF_INUSE
|DLIF_REUSE
);
6287 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
6292 lck_mtx_unlock(&dlifp1
->dl_if_lock
);
6295 /* no interface found, allocate a new one */
6296 buf
= zalloc(dlif_zone
);
6301 bzero(buf
, dlif_bufsize
);
6303 /* Get the 64-bit aligned base address for this object */
6304 base
= (void *)P2ROUNDUP((intptr_t)buf
+ sizeof (u_int64_t
),
6305 sizeof (u_int64_t
));
6306 VERIFY(((intptr_t)base
+ dlif_size
) <= ((intptr_t)buf
+ dlif_bufsize
));
6309 * Wind back a pointer size from the aligned base and
6310 * save the original address so we can free it later.
6312 pbuf
= (void **)((intptr_t)base
- sizeof (void *));
6317 MALLOC(dlifp1
->dl_if_uniqueid
, void *, uniqueid_len
,
6319 if (dlifp1
->dl_if_uniqueid
== NULL
) {
6320 zfree(dlif_zone
, dlifp1
);
6324 bcopy(uniqueid
, dlifp1
->dl_if_uniqueid
, uniqueid_len
);
6325 dlifp1
->dl_if_uniqueid_len
= uniqueid_len
;
6328 ifp1
= (struct ifnet
*)dlifp1
;
6329 dlifp1
->dl_if_flags
= DLIF_INUSE
;
6331 dlifp1
->dl_if_flags
|= DLIF_DEBUG
;
6332 dlifp1
->dl_if_trace
= dlil_if_trace
;
6334 ifp1
->if_name
= dlifp1
->dl_if_namestorage
;
6335 ifp1
->if_xname
= dlifp1
->dl_if_xnamestorage
;
6337 /* initialize interface description */
6338 ifp1
->if_desc
.ifd_maxlen
= IF_DESCSIZE
;
6339 ifp1
->if_desc
.ifd_len
= 0;
6340 ifp1
->if_desc
.ifd_desc
= dlifp1
->dl_if_descstorage
;
6343 mac_ifnet_label_init(ifp1
);
6346 if ((ret
= dlil_alloc_local_stats(ifp1
)) != 0) {
6347 DLIL_PRINTF("%s: failed to allocate if local stats, "
6348 "error: %d\n", __func__
, ret
);
6349 /* This probably shouldn't be fatal */
6353 lck_mtx_init(&dlifp1
->dl_if_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6354 lck_rw_init(&ifp1
->if_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6355 lck_mtx_init(&ifp1
->if_ref_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6356 lck_mtx_init(&ifp1
->if_flt_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6357 lck_mtx_init(&ifp1
->if_addrconfig_lock
, ifnet_lock_group
,
6359 lck_rw_init(&ifp1
->if_llreach_lock
, ifnet_lock_group
, ifnet_lock_attr
);
6361 lck_rw_init(&ifp1
->if_inetdata_lock
, ifnet_lock_group
,
6363 ifp1
->if_inetdata
= NULL
;
6366 lck_rw_init(&ifp1
->if_inet6data_lock
, ifnet_lock_group
,
6368 ifp1
->if_inet6data
= NULL
;
6370 lck_rw_init(&ifp1
->if_link_status_lock
, ifnet_lock_group
,
6372 ifp1
->if_link_status
= NULL
;
6374 /* for send data paths */
6375 lck_mtx_init(&ifp1
->if_start_lock
, ifnet_snd_lock_group
,
6377 lck_mtx_init(&ifp1
->if_cached_route_lock
, ifnet_snd_lock_group
,
6379 lck_mtx_init(&ifp1
->if_snd
.ifcq_lock
, ifnet_snd_lock_group
,
6382 /* for receive data paths */
6383 lck_mtx_init(&ifp1
->if_poll_lock
, ifnet_rcv_lock_group
,
6386 TAILQ_INSERT_TAIL(&dlil_ifnet_head
, dlifp1
, dl_if_link
);
6393 VERIFY(dlifp1
== NULL
|| (IS_P2ALIGNED(dlifp1
, sizeof (u_int64_t
)) &&
6394 IS_P2ALIGNED(&ifp1
->if_data
, sizeof (u_int64_t
))));
6399 __private_extern__
void
6400 dlil_if_release(ifnet_t ifp
)
6402 struct dlil_ifnet
*dlifp
= (struct dlil_ifnet
*)ifp
;
6404 ifnet_lock_exclusive(ifp
);
6405 lck_mtx_lock(&dlifp
->dl_if_lock
);
6406 dlifp
->dl_if_flags
&= ~DLIF_INUSE
;
6407 strlcpy(dlifp
->dl_if_namestorage
, ifp
->if_name
, IFNAMSIZ
);
6408 ifp
->if_name
= dlifp
->dl_if_namestorage
;
6409 /* Reset external name (name + unit) */
6410 ifp
->if_xname
= dlifp
->dl_if_xnamestorage
;
6411 snprintf(__DECONST(char *, ifp
->if_xname
), IFXNAMSIZ
,
6412 "%s?", ifp
->if_name
);
6413 lck_mtx_unlock(&dlifp
->dl_if_lock
);
6416 * We can either recycle the MAC label here or in dlil_if_acquire().
6417 * It seems logical to do it here but this means that anything that
6418 * still has a handle on ifp will now see it as unlabeled.
6419 * Since the interface is "dead" that may be OK. Revisit later.
6421 mac_ifnet_label_recycle(ifp
);
6423 ifnet_lock_done(ifp
);
6426 __private_extern__
void
6429 lck_mtx_lock(&dlil_ifnet_lock
);
6432 __private_extern__
void
6433 dlil_if_unlock(void)
6435 lck_mtx_unlock(&dlil_ifnet_lock
);
6438 __private_extern__
void
6439 dlil_if_lock_assert(void)
6441 lck_mtx_assert(&dlil_ifnet_lock
, LCK_MTX_ASSERT_OWNED
);
6444 __private_extern__
void
6445 dlil_proto_unplumb_all(struct ifnet
*ifp
)
6448 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
6449 * each bucket contains exactly one entry; PF_VLAN does not need an
6452 * if_proto_hash[3] is for other protocols; we expect anything
6453 * in this bucket to respond to the DETACHING event (which would
6454 * have happened by now) and do the unplumb then.
6456 (void) proto_unplumb(PF_INET
, ifp
);
6458 (void) proto_unplumb(PF_INET6
, ifp
);
6463 ifp_src_route_copyout(struct ifnet
*ifp
, struct route
*dst
)
6465 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
6466 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
6468 route_copyout(dst
, &ifp
->if_src_route
, sizeof (*dst
));
6470 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6474 ifp_src_route_copyin(struct ifnet
*ifp
, struct route
*src
)
6476 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
6477 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
6479 if (ifp
->if_fwd_cacheok
) {
6480 route_copyin(src
, &ifp
->if_src_route
, sizeof (*src
));
6484 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6489 ifp_src_route6_copyout(struct ifnet
*ifp
, struct route_in6
*dst
)
6491 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
6492 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
6494 route_copyout((struct route
*)dst
, (struct route
*)&ifp
->if_src_route6
,
6497 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6501 ifp_src_route6_copyin(struct ifnet
*ifp
, struct route_in6
*src
)
6503 lck_mtx_lock_spin(&ifp
->if_cached_route_lock
);
6504 lck_mtx_convert_spin(&ifp
->if_cached_route_lock
);
6506 if (ifp
->if_fwd_cacheok
) {
6507 route_copyin((struct route
*)src
,
6508 (struct route
*)&ifp
->if_src_route6
, sizeof (*src
));
6512 lck_mtx_unlock(&ifp
->if_cached_route_lock
);
6517 ifnet_cached_rtlookup_inet(struct ifnet
*ifp
, struct in_addr src_ip
)
6519 struct route src_rt
;
6520 struct sockaddr_in
*dst
;
6522 dst
= (struct sockaddr_in
*)(void *)(&src_rt
.ro_dst
);
6524 ifp_src_route_copyout(ifp
, &src_rt
);
6526 if (ROUTE_UNUSABLE(&src_rt
) || src_ip
.s_addr
!= dst
->sin_addr
.s_addr
) {
6527 ROUTE_RELEASE(&src_rt
);
6528 if (dst
->sin_family
!= AF_INET
) {
6529 bzero(&src_rt
.ro_dst
, sizeof (src_rt
.ro_dst
));
6530 dst
->sin_len
= sizeof (src_rt
.ro_dst
);
6531 dst
->sin_family
= AF_INET
;
6533 dst
->sin_addr
= src_ip
;
6535 if (src_rt
.ro_rt
== NULL
) {
6536 src_rt
.ro_rt
= rtalloc1_scoped((struct sockaddr
*)dst
,
6537 0, 0, ifp
->if_index
);
6539 if (src_rt
.ro_rt
!= NULL
) {
6540 /* retain a ref, copyin consumes one */
6541 struct rtentry
*rte
= src_rt
.ro_rt
;
6543 ifp_src_route_copyin(ifp
, &src_rt
);
6549 return (src_rt
.ro_rt
);
6554 ifnet_cached_rtlookup_inet6(struct ifnet
*ifp
, struct in6_addr
*src_ip6
)
6556 struct route_in6 src_rt
;
6558 ifp_src_route6_copyout(ifp
, &src_rt
);
6560 if (ROUTE_UNUSABLE(&src_rt
) ||
6561 !IN6_ARE_ADDR_EQUAL(src_ip6
, &src_rt
.ro_dst
.sin6_addr
)) {
6562 ROUTE_RELEASE(&src_rt
);
6563 if (src_rt
.ro_dst
.sin6_family
!= AF_INET6
) {
6564 bzero(&src_rt
.ro_dst
, sizeof (src_rt
.ro_dst
));
6565 src_rt
.ro_dst
.sin6_len
= sizeof (src_rt
.ro_dst
);
6566 src_rt
.ro_dst
.sin6_family
= AF_INET6
;
6568 src_rt
.ro_dst
.sin6_scope_id
= in6_addr2scopeid(ifp
, src_ip6
);
6569 bcopy(src_ip6
, &src_rt
.ro_dst
.sin6_addr
,
6570 sizeof (src_rt
.ro_dst
.sin6_addr
));
6572 if (src_rt
.ro_rt
== NULL
) {
6573 src_rt
.ro_rt
= rtalloc1_scoped(
6574 (struct sockaddr
*)&src_rt
.ro_dst
, 0, 0,
6577 if (src_rt
.ro_rt
!= NULL
) {
6578 /* retain a ref, copyin consumes one */
6579 struct rtentry
*rte
= src_rt
.ro_rt
;
6581 ifp_src_route6_copyin(ifp
, &src_rt
);
6587 return (src_rt
.ro_rt
);
6592 if_lqm_update(struct ifnet
*ifp
, int lqm
, int locked
)
6594 struct kev_dl_link_quality_metric_data ev_lqm_data
;
6596 VERIFY(lqm
>= IFNET_LQM_MIN
&& lqm
<= IFNET_LQM_MAX
);
6598 /* Normalize to edge */
6599 if (lqm
>= 0 && lqm
<= IFNET_LQM_THRESH_BAD
)
6600 lqm
= IFNET_LQM_THRESH_BAD
;
6601 else if (lqm
> IFNET_LQM_THRESH_BAD
&& lqm
<= IFNET_LQM_THRESH_POOR
)
6602 lqm
= IFNET_LQM_THRESH_POOR
;
6603 else if (lqm
> IFNET_LQM_THRESH_POOR
&& lqm
<= IFNET_LQM_THRESH_GOOD
)
6604 lqm
= IFNET_LQM_THRESH_GOOD
;
6607 * Take the lock if needed
6610 ifnet_lock_exclusive(ifp
);
6612 if (lqm
== ifp
->if_interface_state
.lqm_state
&&
6613 (ifp
->if_interface_state
.valid_bitmask
&
6614 IF_INTERFACE_STATE_LQM_STATE_VALID
)) {
6616 * Release the lock if was not held by the caller
6619 ifnet_lock_done(ifp
);
6620 return; /* nothing to update */
6622 ifp
->if_interface_state
.valid_bitmask
|=
6623 IF_INTERFACE_STATE_LQM_STATE_VALID
;
6624 ifp
->if_interface_state
.lqm_state
= lqm
;
6627 * Don't want to hold the lock when issuing kernel events
6629 ifnet_lock_done(ifp
);
6631 bzero(&ev_lqm_data
, sizeof (ev_lqm_data
));
6632 ev_lqm_data
.link_quality_metric
= lqm
;
6634 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_LINK_QUALITY_METRIC_CHANGED
,
6635 (struct net_event_data
*)&ev_lqm_data
, sizeof (ev_lqm_data
));
6638 * Reacquire the lock for the caller
6641 ifnet_lock_exclusive(ifp
);
6645 if_rrc_state_update(struct ifnet
*ifp
, unsigned int rrc_state
)
6647 struct kev_dl_rrc_state kev
;
6649 if (rrc_state
== ifp
->if_interface_state
.rrc_state
&&
6650 (ifp
->if_interface_state
.valid_bitmask
&
6651 IF_INTERFACE_STATE_RRC_STATE_VALID
))
6654 ifp
->if_interface_state
.valid_bitmask
|=
6655 IF_INTERFACE_STATE_RRC_STATE_VALID
;
6657 ifp
->if_interface_state
.rrc_state
= rrc_state
;
6660 * Don't want to hold the lock when issuing kernel events
6662 ifnet_lock_done(ifp
);
6664 bzero(&kev
, sizeof(struct kev_dl_rrc_state
));
6665 kev
.rrc_state
= rrc_state
;
6667 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_RRC_STATE_CHANGED
,
6668 (struct net_event_data
*)&kev
, sizeof(struct kev_dl_rrc_state
));
6670 ifnet_lock_exclusive(ifp
);
6674 if_state_update(struct ifnet
*ifp
,
6675 struct if_interface_state
* if_interface_state
)
6677 u_short if_index_available
= 0;
6679 ifnet_lock_exclusive(ifp
);
6681 if ((ifp
->if_type
!= IFT_CELLULAR
) &&
6682 (if_interface_state
->valid_bitmask
&
6683 IF_INTERFACE_STATE_RRC_STATE_VALID
)) {
6684 ifnet_lock_done(ifp
);
6687 if ((if_interface_state
->valid_bitmask
&
6688 IF_INTERFACE_STATE_LQM_STATE_VALID
) &&
6689 (if_interface_state
->lqm_state
< IFNET_LQM_MIN
||
6690 if_interface_state
->lqm_state
> IFNET_LQM_MAX
)) {
6691 ifnet_lock_done(ifp
);
6694 if ((if_interface_state
->valid_bitmask
&
6695 IF_INTERFACE_STATE_RRC_STATE_VALID
) &&
6696 if_interface_state
->rrc_state
!=
6697 IF_INTERFACE_STATE_RRC_STATE_IDLE
&&
6698 if_interface_state
->rrc_state
!=
6699 IF_INTERFACE_STATE_RRC_STATE_CONNECTED
) {
6700 ifnet_lock_done(ifp
);
6704 if (if_interface_state
->valid_bitmask
&
6705 IF_INTERFACE_STATE_LQM_STATE_VALID
) {
6706 if_lqm_update(ifp
, if_interface_state
->lqm_state
, 1);
6708 if (if_interface_state
->valid_bitmask
&
6709 IF_INTERFACE_STATE_RRC_STATE_VALID
) {
6710 if_rrc_state_update(ifp
, if_interface_state
->rrc_state
);
6712 if (if_interface_state
->valid_bitmask
&
6713 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
) {
6714 ifp
->if_interface_state
.valid_bitmask
|=
6715 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
6716 ifp
->if_interface_state
.interface_availability
=
6717 if_interface_state
->interface_availability
;
6719 if (ifp
->if_interface_state
.interface_availability
==
6720 IF_INTERFACE_STATE_INTERFACE_AVAILABLE
) {
6721 if_index_available
= ifp
->if_index
;
6724 ifnet_lock_done(ifp
);
6727 * Check if the TCP connections going on this interface should be
6728 * forced to send probe packets instead of waiting for TCP timers
6729 * to fire. This will be done when there is an explicit
6730 * notification that the interface became available.
6732 if (if_index_available
> 0)
6733 tcp_interface_send_probe(if_index_available
);
6739 if_get_state(struct ifnet
*ifp
,
6740 struct if_interface_state
* if_interface_state
)
6742 ifnet_lock_shared(ifp
);
6744 if_interface_state
->valid_bitmask
= 0;
6746 if (ifp
->if_interface_state
.valid_bitmask
&
6747 IF_INTERFACE_STATE_RRC_STATE_VALID
) {
6748 if_interface_state
->valid_bitmask
|=
6749 IF_INTERFACE_STATE_RRC_STATE_VALID
;
6750 if_interface_state
->rrc_state
=
6751 ifp
->if_interface_state
.rrc_state
;
6753 if (ifp
->if_interface_state
.valid_bitmask
&
6754 IF_INTERFACE_STATE_LQM_STATE_VALID
) {
6755 if_interface_state
->valid_bitmask
|=
6756 IF_INTERFACE_STATE_LQM_STATE_VALID
;
6757 if_interface_state
->lqm_state
=
6758 ifp
->if_interface_state
.lqm_state
;
6760 if (ifp
->if_interface_state
.valid_bitmask
&
6761 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
) {
6762 if_interface_state
->valid_bitmask
|=
6763 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID
;
6764 if_interface_state
->interface_availability
=
6765 ifp
->if_interface_state
.interface_availability
;
6768 ifnet_lock_done(ifp
);
6772 if_probe_connectivity(struct ifnet
*ifp
, u_int32_t conn_probe
)
6774 ifnet_lock_exclusive(ifp
);
6775 if (conn_probe
> 1) {
6776 ifnet_lock_done(ifp
);
6779 if (conn_probe
== 0)
6780 ifp
->if_eflags
&= ~IFEF_PROBE_CONNECTIVITY
;
6782 ifp
->if_eflags
|= IFEF_PROBE_CONNECTIVITY
;
6783 ifnet_lock_done(ifp
);
6785 tcp_probe_connectivity(ifp
, conn_probe
);
6791 uuid_get_ethernet(u_int8_t
*node
)
6794 struct sockaddr_dl
*sdl
;
6796 ifnet_head_lock_shared();
6797 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
6798 ifnet_lock_shared(ifp
);
6799 IFA_LOCK_SPIN(ifp
->if_lladdr
);
6800 sdl
= (struct sockaddr_dl
*)(void *)ifp
->if_lladdr
->ifa_addr
;
6801 if (sdl
->sdl_type
== IFT_ETHER
) {
6802 memcpy(node
, LLADDR(sdl
), ETHER_ADDR_LEN
);
6803 IFA_UNLOCK(ifp
->if_lladdr
);
6804 ifnet_lock_done(ifp
);
6808 IFA_UNLOCK(ifp
->if_lladdr
);
6809 ifnet_lock_done(ifp
);
6817 sysctl_rxpoll SYSCTL_HANDLER_ARGS
6819 #pragma unused(arg1, arg2)
6825 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
6826 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
6829 if (net_rxpoll
== 0)
6837 sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
6839 #pragma unused(arg1, arg2)
6843 q
= if_rxpoll_mode_holdtime
;
6845 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
6846 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
6849 if (q
< IF_RXPOLL_MODE_HOLDTIME_MIN
)
6850 q
= IF_RXPOLL_MODE_HOLDTIME_MIN
;
6852 if_rxpoll_mode_holdtime
= q
;
6858 sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
6860 #pragma unused(arg1, arg2)
6864 q
= if_rxpoll_sample_holdtime
;
6866 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
6867 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
6870 if (q
< IF_RXPOLL_SAMPLETIME_MIN
)
6871 q
= IF_RXPOLL_SAMPLETIME_MIN
;
6873 if_rxpoll_sample_holdtime
= q
;
6879 sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
6881 #pragma unused(arg1, arg2)
6885 q
= if_rxpoll_interval_time
;
6887 err
= sysctl_handle_quad(oidp
, &q
, 0, req
);
6888 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
6891 if (q
< IF_RXPOLL_INTERVALTIME_MIN
)
6892 q
= IF_RXPOLL_INTERVALTIME_MIN
;
6894 if_rxpoll_interval_time
= q
;
6900 sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
6902 #pragma unused(arg1, arg2)
6906 i
= if_rxpoll_wlowat
;
6908 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
6909 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
6912 if (i
== 0 || i
>= if_rxpoll_whiwat
)
6915 if_rxpoll_wlowat
= i
;
6920 sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
6922 #pragma unused(arg1, arg2)
6926 i
= if_rxpoll_whiwat
;
6928 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
6929 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
6932 if (i
<= if_rxpoll_wlowat
)
6935 if_rxpoll_whiwat
= i
;
6940 sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
6942 #pragma unused(arg1, arg2)
6947 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
6948 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
6951 if (i
< IF_SNDQ_MINLEN
)
6959 sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
6961 #pragma unused(arg1, arg2)
6966 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
6967 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
6970 if (i
< IF_RCVQ_MINLEN
)
6978 dlil_node_present(struct ifnet
*ifp
, struct sockaddr
*sa
,
6979 int32_t rssi
, int lqm
, int npm
, u_int8_t srvinfo
[48])
6981 struct kev_dl_node_presence kev
;
6982 struct sockaddr_dl
*sdl
;
6983 struct sockaddr_in6
*sin6
;
6987 VERIFY(sa
->sa_family
== AF_LINK
|| sa
->sa_family
== AF_INET6
);
6989 bzero(&kev
, sizeof (kev
));
6990 sin6
= &kev
.sin6_node_address
;
6991 sdl
= &kev
.sdl_node_address
;
6992 nd6_alt_node_addr_decompose(ifp
, sa
, sdl
, sin6
);
6994 kev
.link_quality_metric
= lqm
;
6995 kev
.node_proximity_metric
= npm
;
6996 bcopy(srvinfo
, kev
.node_service_info
, sizeof (kev
.node_service_info
));
6998 nd6_alt_node_present(ifp
, sin6
, sdl
, rssi
, lqm
, npm
);
6999 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_PRESENCE
,
7000 &kev
.link_data
, sizeof (kev
));
7004 dlil_node_absent(struct ifnet
*ifp
, struct sockaddr
*sa
)
7006 struct kev_dl_node_absence kev
;
7007 struct sockaddr_in6
*sin6
;
7008 struct sockaddr_dl
*sdl
;
7012 VERIFY(sa
->sa_family
== AF_LINK
|| sa
->sa_family
== AF_INET6
);
7014 bzero(&kev
, sizeof (kev
));
7015 sin6
= &kev
.sin6_node_address
;
7016 sdl
= &kev
.sdl_node_address
;
7017 nd6_alt_node_addr_decompose(ifp
, sa
, sdl
, sin6
);
7019 nd6_alt_node_absent(ifp
, sin6
);
7020 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_NODE_ABSENCE
,
7021 &kev
.link_data
, sizeof (kev
));
7025 dlil_ifaddr_bytes(const struct sockaddr_dl
*sdl
, size_t *sizep
,
7026 kauth_cred_t
*credp
)
7028 const u_int8_t
*bytes
;
7031 bytes
= CONST_LLADDR(sdl
);
7032 size
= sdl
->sdl_alen
;
7035 if (dlil_lladdr_ckreq
) {
7036 switch (sdl
->sdl_type
) {
7045 if (credp
&& mac_system_check_info(*credp
, "net.link.addr")) {
7046 static const u_int8_t unspec
[FIREWIRE_EUI64_LEN
] = {
7050 switch (sdl
->sdl_type
) {
7052 VERIFY(size
== ETHER_ADDR_LEN
);
7056 VERIFY(size
== FIREWIRE_EUI64_LEN
);
7066 #pragma unused(credp)
7069 if (sizep
!= NULL
) *sizep
= size
;
7074 dlil_report_issues(struct ifnet
*ifp
, u_int8_t modid
[DLIL_MODIDLEN
],
7075 u_int8_t info
[DLIL_MODARGLEN
])
7077 struct kev_dl_issues kev
;
7080 VERIFY(ifp
!= NULL
);
7081 VERIFY(modid
!= NULL
);
7082 _CASSERT(sizeof (kev
.modid
) == DLIL_MODIDLEN
);
7083 _CASSERT(sizeof (kev
.info
) == DLIL_MODARGLEN
);
7085 bzero(&kev
, sizeof (kev
));
7088 kev
.timestamp
= tv
.tv_sec
;
7089 bcopy(modid
, &kev
.modid
, DLIL_MODIDLEN
);
7091 bcopy(info
, &kev
.info
, DLIL_MODARGLEN
);
7093 dlil_post_msg(ifp
, KEV_DL_SUBCLASS
, KEV_DL_ISSUES
,
7094 &kev
.link_data
, sizeof (kev
));
7098 ifnet_getset_opportunistic(ifnet_t ifp
, u_long cmd
, struct ifreq
*ifr
,
7101 u_int32_t level
= IFNET_THROTTLE_OFF
;
7104 VERIFY(cmd
== SIOCSIFOPPORTUNISTIC
|| cmd
== SIOCGIFOPPORTUNISTIC
);
7106 if (cmd
== SIOCSIFOPPORTUNISTIC
) {
7108 * XXX: Use priv_check_cred() instead of root check?
7110 if ((result
= proc_suser(p
)) != 0)
7113 if (ifr
->ifr_opportunistic
.ifo_flags
==
7114 IFRIFOF_BLOCK_OPPORTUNISTIC
)
7115 level
= IFNET_THROTTLE_OPPORTUNISTIC
;
7116 else if (ifr
->ifr_opportunistic
.ifo_flags
== 0)
7117 level
= IFNET_THROTTLE_OFF
;
7122 result
= ifnet_set_throttle(ifp
, level
);
7123 } else if ((result
= ifnet_get_throttle(ifp
, &level
)) == 0) {
7124 ifr
->ifr_opportunistic
.ifo_flags
= 0;
7125 if (level
== IFNET_THROTTLE_OPPORTUNISTIC
) {
7126 ifr
->ifr_opportunistic
.ifo_flags
|=
7127 IFRIFOF_BLOCK_OPPORTUNISTIC
;
7132 * Return the count of current opportunistic connections
7133 * over the interface.
7137 flags
|= (cmd
== SIOCSIFOPPORTUNISTIC
) ?
7138 INPCB_OPPORTUNISTIC_SETCMD
: 0;
7139 flags
|= (level
== IFNET_THROTTLE_OPPORTUNISTIC
) ?
7140 INPCB_OPPORTUNISTIC_THROTTLEON
: 0;
7141 ifr
->ifr_opportunistic
.ifo_inuse
=
7142 udp_count_opportunistic(ifp
->if_index
, flags
) +
7143 tcp_count_opportunistic(ifp
->if_index
, flags
);
7146 if (result
== EALREADY
)
7153 ifnet_get_throttle(struct ifnet
*ifp
, u_int32_t
*level
)
7155 struct ifclassq
*ifq
;
7158 if (!(ifp
->if_eflags
& IFEF_TXSTART
))
7161 *level
= IFNET_THROTTLE_OFF
;
7165 /* Throttling works only for IFCQ, not ALTQ instances */
7166 if (IFCQ_IS_ENABLED(ifq
))
7167 IFCQ_GET_THROTTLE(ifq
, *level
, err
);
7174 ifnet_set_throttle(struct ifnet
*ifp
, u_int32_t level
)
7176 struct ifclassq
*ifq
;
7179 if (!(ifp
->if_eflags
& IFEF_TXSTART
))
7185 case IFNET_THROTTLE_OFF
:
7186 case IFNET_THROTTLE_OPPORTUNISTIC
:
7188 /* Throttling works only for IFCQ, not ALTQ instances */
7189 if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq
)))
7191 #endif /* PF_ALTQ */
7198 if (IFCQ_IS_ENABLED(ifq
))
7199 IFCQ_SET_THROTTLE(ifq
, level
, err
);
7203 printf("%s: throttling level set to %d\n", if_name(ifp
),
7205 if (level
== IFNET_THROTTLE_OFF
)
7213 ifnet_getset_log(ifnet_t ifp
, u_long cmd
, struct ifreq
*ifr
,
7219 int level
, category
, subcategory
;
7221 VERIFY(cmd
== SIOCSIFLOG
|| cmd
== SIOCGIFLOG
);
7223 if (cmd
== SIOCSIFLOG
) {
7224 if ((result
= priv_check_cred(kauth_cred_get(),
7225 PRIV_NET_INTERFACE_CONTROL
, 0)) != 0)
7228 level
= ifr
->ifr_log
.ifl_level
;
7229 if (level
< IFNET_LOG_MIN
|| level
> IFNET_LOG_MAX
)
7232 flags
= ifr
->ifr_log
.ifl_flags
;
7233 if ((flags
&= IFNET_LOGF_MASK
) == 0)
7236 category
= ifr
->ifr_log
.ifl_category
;
7237 subcategory
= ifr
->ifr_log
.ifl_subcategory
;
7240 result
= ifnet_set_log(ifp
, level
, flags
,
7241 category
, subcategory
);
7243 result
= ifnet_get_log(ifp
, &level
, &flags
, &category
,
7246 ifr
->ifr_log
.ifl_level
= level
;
7247 ifr
->ifr_log
.ifl_flags
= flags
;
7248 ifr
->ifr_log
.ifl_category
= category
;
7249 ifr
->ifr_log
.ifl_subcategory
= subcategory
;
7257 ifnet_set_log(struct ifnet
*ifp
, int32_t level
, uint32_t flags
,
7258 int32_t category
, int32_t subcategory
)
7262 VERIFY(level
>= IFNET_LOG_MIN
&& level
<= IFNET_LOG_MAX
);
7263 VERIFY(flags
& IFNET_LOGF_MASK
);
7266 * The logging level applies to all facilities; make sure to
7267 * update them all with the most current level.
7269 flags
|= ifp
->if_log
.flags
;
7271 if (ifp
->if_output_ctl
!= NULL
) {
7272 struct ifnet_log_params l
;
7274 bzero(&l
, sizeof (l
));
7277 l
.flags
&= ~IFNET_LOGF_DLIL
;
7278 l
.category
= category
;
7279 l
.subcategory
= subcategory
;
7281 /* Send this request to lower layers */
7283 err
= ifp
->if_output_ctl(ifp
, IFNET_CTL_SET_LOG
,
7286 } else if ((flags
& ~IFNET_LOGF_DLIL
) && ifp
->if_output_ctl
== NULL
) {
7288 * If targeted to the lower layers without an output
7289 * control callback registered on the interface, just
7290 * silently ignore facilities other than ours.
7292 flags
&= IFNET_LOGF_DLIL
;
7293 if (flags
== 0 && (!(ifp
->if_log
.flags
& IFNET_LOGF_DLIL
)))
7298 if ((ifp
->if_log
.level
= level
) == IFNET_LOG_DEFAULT
)
7299 ifp
->if_log
.flags
= 0;
7301 ifp
->if_log
.flags
|= flags
;
7303 log(LOG_INFO
, "%s: logging level set to %d flags=%b "
7304 "arg=%b, category=%d subcategory=%d\n", if_name(ifp
),
7305 ifp
->if_log
.level
, ifp
->if_log
.flags
,
7306 IFNET_LOGF_BITS
, flags
, IFNET_LOGF_BITS
,
7307 category
, subcategory
);
7314 ifnet_get_log(struct ifnet
*ifp
, int32_t *level
, uint32_t *flags
,
7315 int32_t *category
, int32_t *subcategory
)
7318 *level
= ifp
->if_log
.level
;
7320 *flags
= ifp
->if_log
.flags
;
7321 if (category
!= NULL
)
7322 *category
= ifp
->if_log
.category
;
7323 if (subcategory
!= NULL
)
7324 *subcategory
= ifp
->if_log
.subcategory
;
7330 ifnet_notify_address(struct ifnet
*ifp
, int af
)
7332 struct ifnet_notify_address_params na
;
7335 (void) pf_ifaddr_hook(ifp
);
7338 if (ifp
->if_output_ctl
== NULL
)
7339 return (EOPNOTSUPP
);
7341 bzero(&na
, sizeof (na
));
7342 na
.address_family
= af
;
7344 return (ifp
->if_output_ctl(ifp
, IFNET_CTL_NOTIFY_ADDRESS
,
7349 ifnet_flowid(struct ifnet
*ifp
, uint32_t *flowid
)
7351 if (ifp
== NULL
|| flowid
== NULL
) {
7353 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
7354 !(ifp
->if_refflags
& IFRF_ATTACHED
)) {
7358 *flowid
= ifp
->if_flowhash
;
7364 ifnet_disable_output(struct ifnet
*ifp
)
7370 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
7371 !(ifp
->if_refflags
& IFRF_ATTACHED
)) {
7375 if ((err
= ifnet_fc_add(ifp
)) == 0) {
7376 lck_mtx_lock_spin(&ifp
->if_start_lock
);
7377 ifp
->if_start_flags
|= IFSF_FLOW_CONTROLLED
;
7378 lck_mtx_unlock(&ifp
->if_start_lock
);
7384 ifnet_enable_output(struct ifnet
*ifp
)
7388 } else if (!(ifp
->if_eflags
& IFEF_TXSTART
) ||
7389 !(ifp
->if_refflags
& IFRF_ATTACHED
)) {
7393 ifnet_start_common(ifp
, 1);
7398 ifnet_flowadv(uint32_t flowhash
)
7400 struct ifnet_fc_entry
*ifce
;
7403 ifce
= ifnet_fc_get(flowhash
);
7407 VERIFY(ifce
->ifce_ifp
!= NULL
);
7408 ifp
= ifce
->ifce_ifp
;
7410 /* flow hash gets recalculated per attach, so check */
7411 if (ifnet_is_attached(ifp
, 1)) {
7412 if (ifp
->if_flowhash
== flowhash
)
7413 (void) ifnet_enable_output(ifp
);
7414 ifnet_decr_iorefcnt(ifp
);
7416 ifnet_fc_entry_free(ifce
);
7420 * Function to compare ifnet_fc_entries in ifnet flow control tree
7423 ifce_cmp(const struct ifnet_fc_entry
*fc1
, const struct ifnet_fc_entry
*fc2
)
7425 return (fc1
->ifce_flowhash
- fc2
->ifce_flowhash
);
7429 ifnet_fc_add(struct ifnet
*ifp
)
7431 struct ifnet_fc_entry keyfc
, *ifce
;
7434 VERIFY(ifp
!= NULL
&& (ifp
->if_eflags
& IFEF_TXSTART
));
7435 VERIFY(ifp
->if_flowhash
!= 0);
7436 flowhash
= ifp
->if_flowhash
;
7438 bzero(&keyfc
, sizeof (keyfc
));
7439 keyfc
.ifce_flowhash
= flowhash
;
7441 lck_mtx_lock_spin(&ifnet_fc_lock
);
7442 ifce
= RB_FIND(ifnet_fc_tree
, &ifnet_fc_tree
, &keyfc
);
7443 if (ifce
!= NULL
&& ifce
->ifce_ifp
== ifp
) {
7444 /* Entry is already in ifnet_fc_tree, return */
7445 lck_mtx_unlock(&ifnet_fc_lock
);
7451 * There is a different fc entry with the same flow hash
7452 * but different ifp pointer. There can be a collision
7453 * on flow hash but the probability is low. Let's just
7454 * avoid adding a second one when there is a collision.
7456 lck_mtx_unlock(&ifnet_fc_lock
);
7460 /* become regular mutex */
7461 lck_mtx_convert_spin(&ifnet_fc_lock
);
7463 ifce
= zalloc_noblock(ifnet_fc_zone
);
7465 /* memory allocation failed */
7466 lck_mtx_unlock(&ifnet_fc_lock
);
7469 bzero(ifce
, ifnet_fc_zone_size
);
7471 ifce
->ifce_flowhash
= flowhash
;
7472 ifce
->ifce_ifp
= ifp
;
7474 RB_INSERT(ifnet_fc_tree
, &ifnet_fc_tree
, ifce
);
7475 lck_mtx_unlock(&ifnet_fc_lock
);
7479 static struct ifnet_fc_entry
*
7480 ifnet_fc_get(uint32_t flowhash
)
7482 struct ifnet_fc_entry keyfc
, *ifce
;
7485 bzero(&keyfc
, sizeof (keyfc
));
7486 keyfc
.ifce_flowhash
= flowhash
;
7488 lck_mtx_lock_spin(&ifnet_fc_lock
);
7489 ifce
= RB_FIND(ifnet_fc_tree
, &ifnet_fc_tree
, &keyfc
);
7491 /* Entry is not present in ifnet_fc_tree, return */
7492 lck_mtx_unlock(&ifnet_fc_lock
);
7496 RB_REMOVE(ifnet_fc_tree
, &ifnet_fc_tree
, ifce
);
7498 VERIFY(ifce
->ifce_ifp
!= NULL
);
7499 ifp
= ifce
->ifce_ifp
;
7501 /* become regular mutex */
7502 lck_mtx_convert_spin(&ifnet_fc_lock
);
7504 if (!ifnet_is_attached(ifp
, 0)) {
7506 * This ifp is not attached or in the process of being
7507 * detached; just don't process it.
7509 ifnet_fc_entry_free(ifce
);
7512 lck_mtx_unlock(&ifnet_fc_lock
);
7518 ifnet_fc_entry_free(struct ifnet_fc_entry
*ifce
)
7520 zfree(ifnet_fc_zone
, ifce
);
7524 ifnet_calc_flowhash(struct ifnet
*ifp
)
7526 struct ifnet_flowhash_key fh
__attribute__((aligned(8)));
7527 uint32_t flowhash
= 0;
7529 if (ifnet_flowhash_seed
== 0)
7530 ifnet_flowhash_seed
= RandomULong();
7532 bzero(&fh
, sizeof (fh
));
7534 (void) snprintf(fh
.ifk_name
, sizeof (fh
.ifk_name
), "%s", ifp
->if_name
);
7535 fh
.ifk_unit
= ifp
->if_unit
;
7536 fh
.ifk_flags
= ifp
->if_flags
;
7537 fh
.ifk_eflags
= ifp
->if_eflags
;
7538 fh
.ifk_capabilities
= ifp
->if_capabilities
;
7539 fh
.ifk_capenable
= ifp
->if_capenable
;
7540 fh
.ifk_output_sched_model
= ifp
->if_output_sched_model
;
7541 fh
.ifk_rand1
= RandomULong();
7542 fh
.ifk_rand2
= RandomULong();
7545 flowhash
= net_flowhash(&fh
, sizeof (fh
), ifnet_flowhash_seed
);
7546 if (flowhash
== 0) {
7547 /* try to get a non-zero flowhash */
7548 ifnet_flowhash_seed
= RandomULong();
7556 ifnet_set_netsignature(struct ifnet
*ifp
, uint8_t family
, uint8_t len
,
7557 uint16_t flags
, uint8_t *data
)
7559 #pragma unused(flags)
7564 if_inetdata_lock_exclusive(ifp
);
7565 if (IN_IFEXTRA(ifp
) != NULL
) {
7567 /* Allow clearing the signature */
7568 IN_IFEXTRA(ifp
)->netsig_len
= 0;
7569 bzero(IN_IFEXTRA(ifp
)->netsig
,
7570 sizeof (IN_IFEXTRA(ifp
)->netsig
));
7571 if_inetdata_lock_done(ifp
);
7573 } else if (len
> sizeof (IN_IFEXTRA(ifp
)->netsig
)) {
7575 if_inetdata_lock_done(ifp
);
7578 IN_IFEXTRA(ifp
)->netsig_len
= len
;
7579 bcopy(data
, IN_IFEXTRA(ifp
)->netsig
, len
);
7583 if_inetdata_lock_done(ifp
);
7587 if_inet6data_lock_exclusive(ifp
);
7588 if (IN6_IFEXTRA(ifp
) != NULL
) {
7590 /* Allow clearing the signature */
7591 IN6_IFEXTRA(ifp
)->netsig_len
= 0;
7592 bzero(IN6_IFEXTRA(ifp
)->netsig
,
7593 sizeof (IN6_IFEXTRA(ifp
)->netsig
));
7594 if_inet6data_lock_done(ifp
);
7596 } else if (len
> sizeof (IN6_IFEXTRA(ifp
)->netsig
)) {
7598 if_inet6data_lock_done(ifp
);
7601 IN6_IFEXTRA(ifp
)->netsig_len
= len
;
7602 bcopy(data
, IN6_IFEXTRA(ifp
)->netsig
, len
);
7606 if_inet6data_lock_done(ifp
);
7618 ifnet_get_netsignature(struct ifnet
*ifp
, uint8_t family
, uint8_t *len
,
7619 uint16_t *flags
, uint8_t *data
)
7623 if (ifp
== NULL
|| len
== NULL
|| flags
== NULL
|| data
== NULL
)
7628 if_inetdata_lock_shared(ifp
);
7629 if (IN_IFEXTRA(ifp
) != NULL
) {
7630 if (*len
== 0 || *len
< IN_IFEXTRA(ifp
)->netsig_len
) {
7632 if_inetdata_lock_done(ifp
);
7635 if ((*len
= IN_IFEXTRA(ifp
)->netsig_len
) > 0)
7636 bcopy(IN_IFEXTRA(ifp
)->netsig
, data
, *len
);
7642 if_inetdata_lock_done(ifp
);
7646 if_inet6data_lock_shared(ifp
);
7647 if (IN6_IFEXTRA(ifp
) != NULL
) {
7648 if (*len
== 0 || *len
< IN6_IFEXTRA(ifp
)->netsig_len
) {
7650 if_inet6data_lock_done(ifp
);
7653 if ((*len
= IN6_IFEXTRA(ifp
)->netsig_len
) > 0)
7654 bcopy(IN6_IFEXTRA(ifp
)->netsig
, data
, *len
);
7660 if_inet6data_lock_done(ifp
);
7675 dlil_output_cksum_dbg(struct ifnet
*ifp
, struct mbuf
*m
, uint32_t hoff
,
7676 protocol_family_t pf
)
7681 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_FINALIZE_FORCED
) ||
7682 (m
->m_pkthdr
.csum_flags
& (CSUM_TSO_IPV4
|CSUM_TSO_IPV6
)))
7687 did_sw
= in_finalize_cksum(m
, hoff
, m
->m_pkthdr
.csum_flags
);
7688 if (did_sw
& CSUM_DELAY_IP
)
7689 hwcksum_dbg_finalized_hdr
++;
7690 if (did_sw
& CSUM_DELAY_DATA
)
7691 hwcksum_dbg_finalized_data
++;
7696 * Checksum offload should not have been enabled when
7697 * extension headers exist; that also means that we
7698 * cannot force-finalize packets with extension headers.
7699 * Indicate to the callee should it skip such case by
7700 * setting optlen to -1.
7702 did_sw
= in6_finalize_cksum(m
, hoff
, -1, -1,
7703 m
->m_pkthdr
.csum_flags
);
7704 if (did_sw
& CSUM_DELAY_IPV6_DATA
)
7705 hwcksum_dbg_finalized_data
++;
7714 dlil_input_cksum_dbg(struct ifnet
*ifp
, struct mbuf
*m
, char *frame_header
,
7715 protocol_family_t pf
)
7720 if (frame_header
== NULL
||
7721 frame_header
< (char *)mbuf_datastart(m
) ||
7722 frame_header
> (char *)m
->m_data
) {
7723 printf("%s: frame header pointer 0x%llx out of range "
7724 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp
),
7725 (uint64_t)VM_KERNEL_ADDRPERM(frame_header
),
7726 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m
)),
7727 (uint64_t)VM_KERNEL_ADDRPERM(m
->m_data
),
7728 (uint64_t)VM_KERNEL_ADDRPERM(m
));
7731 hlen
= (m
->m_data
- frame_header
);
7744 * Force partial checksum offload; useful to simulate cases
7745 * where the hardware does not support partial checksum offload,
7746 * in order to validate correctness throughout the layers above.
7748 if (hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
) {
7749 uint32_t foff
= hwcksum_dbg_partial_rxoff_forced
;
7751 if (foff
> (uint32_t)m
->m_pkthdr
.len
)
7754 m
->m_pkthdr
.csum_flags
&= ~CSUM_RX_FLAGS
;
7756 /* Compute 16-bit 1's complement sum from forced offset */
7757 sum
= m_sum16(m
, foff
, (m
->m_pkthdr
.len
- foff
));
7759 m
->m_pkthdr
.csum_flags
|= (CSUM_DATA_VALID
| CSUM_PARTIAL
);
7760 m
->m_pkthdr
.csum_rx_val
= sum
;
7761 m
->m_pkthdr
.csum_rx_start
= (foff
+ hlen
);
7763 hwcksum_dbg_partial_forced
++;
7764 hwcksum_dbg_partial_forced_bytes
+= m
->m_pkthdr
.len
;
7768 * Partial checksum offload verification (and adjustment);
7769 * useful to validate and test cases where the hardware
7770 * supports partial checksum offload.
7772 if ((m
->m_pkthdr
.csum_flags
&
7773 (CSUM_DATA_VALID
| CSUM_PARTIAL
| CSUM_PSEUDO_HDR
)) ==
7774 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
7777 /* Start offset must begin after frame header */
7778 rxoff
= m
->m_pkthdr
.csum_rx_start
;
7780 hwcksum_dbg_bad_rxoff
++;
7782 printf("%s: partial cksum start offset %d "
7783 "is less than frame header length %d for "
7784 "mbuf 0x%llx\n", if_name(ifp
), rxoff
, hlen
,
7785 (uint64_t)VM_KERNEL_ADDRPERM(m
));
7791 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
)) {
7793 * Compute the expected 16-bit 1's complement sum;
7794 * skip this if we've already computed it above
7795 * when partial checksum offload is forced.
7797 sum
= m_sum16(m
, rxoff
, (m
->m_pkthdr
.len
- rxoff
));
7799 /* Hardware or driver is buggy */
7800 if (sum
!= m
->m_pkthdr
.csum_rx_val
) {
7801 hwcksum_dbg_bad_cksum
++;
7803 printf("%s: bad partial cksum value "
7804 "0x%x (expected 0x%x) for mbuf "
7805 "0x%llx [rx_start %d]\n",
7807 m
->m_pkthdr
.csum_rx_val
, sum
,
7808 (uint64_t)VM_KERNEL_ADDRPERM(m
),
7809 m
->m_pkthdr
.csum_rx_start
);
7814 hwcksum_dbg_verified
++;
7817 * This code allows us to emulate various hardwares that
7818 * perform 16-bit 1's complement sum beginning at various
7819 * start offset values.
7821 if (hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_RXOFF_ADJ
) {
7822 uint32_t aoff
= hwcksum_dbg_partial_rxoff_adj
;
7824 if (aoff
== rxoff
|| aoff
> (uint32_t)m
->m_pkthdr
.len
)
7827 sum
= m_adj_sum16(m
, rxoff
, aoff
, sum
);
7829 m
->m_pkthdr
.csum_rx_val
= sum
;
7830 m
->m_pkthdr
.csum_rx_start
= (aoff
+ hlen
);
7832 hwcksum_dbg_adjusted
++;
7838 sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
7840 #pragma unused(arg1, arg2)
7844 i
= hwcksum_dbg_mode
;
7846 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7847 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7850 if (hwcksum_dbg
== 0)
7853 if ((i
& ~HWCKSUM_DBG_MASK
) != 0)
7856 hwcksum_dbg_mode
= (i
& HWCKSUM_DBG_MASK
);
7862 sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
7864 #pragma unused(arg1, arg2)
7868 i
= hwcksum_dbg_partial_rxoff_forced
;
7870 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7871 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7874 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_FORCED
))
7877 hwcksum_dbg_partial_rxoff_forced
= i
;
7883 sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
7885 #pragma unused(arg1, arg2)
7889 i
= hwcksum_dbg_partial_rxoff_adj
;
7891 err
= sysctl_handle_int(oidp
, &i
, 0, req
);
7892 if (err
!= 0 || req
->newptr
== USER_ADDR_NULL
)
7895 if (!(hwcksum_dbg_mode
& HWCKSUM_DBG_PARTIAL_RXOFF_ADJ
))
7898 hwcksum_dbg_partial_rxoff_adj
= i
;
7904 sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
7906 #pragma unused(oidp, arg1, arg2)
7909 if (req
->oldptr
== USER_ADDR_NULL
) {
7912 if (req
->newptr
!= USER_ADDR_NULL
) {
7915 err
= SYSCTL_OUT(req
, &tx_chain_len_stats
,
7916 sizeof(struct chain_len_stats
));
7923 /* Blob for sum16 verification */
7924 static uint8_t sumdata
[] = {
7925 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
7926 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
7927 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
7928 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
7929 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
7930 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
7931 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
7932 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
7933 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
7934 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
7935 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
7936 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
7937 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
7938 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
7939 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
7940 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
7941 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
7942 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
7943 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
7944 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
7945 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
7946 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
7947 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
7948 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
7949 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
7950 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
7951 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
7952 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
7953 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
7954 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
7955 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
7956 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
7957 0xc8, 0x28, 0x02, 0x00, 0x00
7960 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
7976 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
7979 dlil_verify_sum16(void)
7985 /* Make sure test data plus extra room for alignment fits in cluster */
7986 _CASSERT((sizeof (sumdata
) + (sizeof (uint64_t) * 2)) <= MCLBYTES
);
7988 m
= m_getcl(M_WAITOK
, MT_DATA
, M_PKTHDR
);
7989 MH_ALIGN(m
, sizeof (uint32_t)); /* 32-bit starting alignment */
7990 buf
= mtod(m
, uint8_t *); /* base address */
7992 for (n
= 0; n
< SUMTBL_MAX
; n
++) {
7993 uint16_t len
= sumtbl
[n
].len
;
7996 /* Verify for all possible alignments */
7997 for (i
= 0; i
< (int)sizeof (uint64_t); i
++) {
8001 /* Copy over test data to mbuf */
8002 VERIFY(len
<= sizeof (sumdata
));
8004 bcopy(sumdata
, c
, len
);
8006 /* Zero-offset test (align by data pointer) */
8007 m
->m_data
= (caddr_t
)c
;
8009 sum
= m_sum16(m
, 0, len
);
8011 /* Something is horribly broken; stop now */
8012 if (sum
!= sumtbl
[n
].sum
) {
8013 panic("%s: broken m_sum16 for len=%d align=%d "
8014 "sum=0x%04x [expected=0x%04x]\n", __func__
,
8015 len
, i
, sum
, sumtbl
[n
].sum
);
8019 /* Alignment test by offset (fixed data pointer) */
8020 m
->m_data
= (caddr_t
)buf
;
8022 sum
= m_sum16(m
, i
, len
);
8024 /* Something is horribly broken; stop now */
8025 if (sum
!= sumtbl
[n
].sum
) {
8026 panic("%s: broken m_sum16 for len=%d offset=%d "
8027 "sum=0x%04x [expected=0x%04x]\n", __func__
,
8028 len
, i
, sum
, sumtbl
[n
].sum
);
8032 /* Simple sum16 contiguous buffer test by aligment */
8033 sum
= b_sum16(c
, len
);
8035 /* Something is horribly broken; stop now */
8036 if (sum
!= sumtbl
[n
].sum
) {
8037 panic("%s: broken b_sum16 for len=%d align=%d "
8038 "sum=0x%04x [expected=0x%04x]\n", __func__
,
8039 len
, i
, sum
, sumtbl
[n
].sum
);
8047 printf("DLIL: SUM16 self-tests PASSED\n");
8051 #define CASE_STRINGIFY(x) case x: return #x
8053 __private_extern__
const char *
8054 dlil_kev_dl_code_str(u_int32_t event_code
)
8056 switch (event_code
) {
8057 CASE_STRINGIFY(KEV_DL_SIFFLAGS
);
8058 CASE_STRINGIFY(KEV_DL_SIFMETRICS
);
8059 CASE_STRINGIFY(KEV_DL_SIFMTU
);
8060 CASE_STRINGIFY(KEV_DL_SIFPHYS
);
8061 CASE_STRINGIFY(KEV_DL_SIFMEDIA
);
8062 CASE_STRINGIFY(KEV_DL_SIFGENERIC
);
8063 CASE_STRINGIFY(KEV_DL_ADDMULTI
);
8064 CASE_STRINGIFY(KEV_DL_DELMULTI
);
8065 CASE_STRINGIFY(KEV_DL_IF_ATTACHED
);
8066 CASE_STRINGIFY(KEV_DL_IF_DETACHING
);
8067 CASE_STRINGIFY(KEV_DL_IF_DETACHED
);
8068 CASE_STRINGIFY(KEV_DL_LINK_OFF
);
8069 CASE_STRINGIFY(KEV_DL_LINK_ON
);
8070 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED
);
8071 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED
);
8072 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED
);
8073 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED
);
8074 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT
);
8075 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED
);
8076 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED
);
8077 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE
);
8078 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE
);
8079 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED
);
8080 CASE_STRINGIFY(KEV_DL_ISSUES
);
8081 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED
);
8089 * Mirror the arguments of ifnet_get_local_ports_extended()
8095 sysctl_get_ports_used SYSCTL_HANDLER_ARGS
8097 #pragma unused(oidp)
8098 int *name
= (int *)arg1
;
8102 protocol_family_t protocol
;
8105 u_int8_t
*bitfield
= NULL
;
8116 if (req
->oldptr
== USER_ADDR_NULL
) {
8117 req
->oldidx
= bitstr_size(65536);
8120 if (req
->oldlen
< bitstr_size(65536)) {
8130 ifnet_head_lock_shared();
8131 if (idx
> if_index
) {
8136 ifp
= ifindex2ifnet
[idx
];
8139 bitfield
= _MALLOC(bitstr_size(65536), M_TEMP
, M_WAITOK
);
8140 if (bitfield
== NULL
) {
8144 error
= ifnet_get_local_ports_extended(ifp
, protocol
, flags
, bitfield
);
8146 printf("%s: ifnet_get_local_ports_extended() error %d\n",
8150 error
= SYSCTL_OUT(req
, bitfield
, bitstr_size(65536));
8152 if (bitfield
!= NULL
)
8153 _FREE(bitfield
, M_TEMP
);