]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/dlil.c
xnu-6153.121.1.tar.gz
[apple/xnu.git] / bsd / net / dlil.c
1 /*
2 * Copyright (c) 1999-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
32 * Version 2.0.
33 */
34 #include <stddef.h>
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/socket.h>
42 #include <sys/domain.h>
43 #include <sys/user.h>
44 #include <sys/random.h>
45 #include <sys/socketvar.h>
46 #include <net/if_dl.h>
47 #include <net/if.h>
48 #include <net/route.h>
49 #include <net/if_var.h>
50 #include <net/dlil.h>
51 #include <net/if_arp.h>
52 #include <net/iptap.h>
53 #include <net/pktap.h>
54 #include <sys/kern_event.h>
55 #include <sys/kdebug.h>
56 #include <sys/mcache.h>
57 #include <sys/syslog.h>
58 #include <sys/protosw.h>
59 #include <sys/priv.h>
60
61 #include <kern/assert.h>
62 #include <kern/task.h>
63 #include <kern/thread.h>
64 #include <kern/sched_prim.h>
65 #include <kern/locks.h>
66 #include <kern/zalloc.h>
67
68 #include <net/kpi_protocol.h>
69 #include <net/if_types.h>
70 #include <net/if_ipsec.h>
71 #include <net/if_llreach.h>
72 #include <net/if_utun.h>
73 #include <net/kpi_interfacefilter.h>
74 #include <net/classq/classq.h>
75 #include <net/classq/classq_sfb.h>
76 #include <net/flowhash.h>
77 #include <net/ntstat.h>
78 #include <net/if_llatbl.h>
79 #include <net/net_api_stats.h>
80 #include <net/if_ports_used.h>
81 #include <net/if_vlan_var.h>
82 #include <netinet/in.h>
83 #if INET
84 #include <netinet/in_var.h>
85 #include <netinet/igmp_var.h>
86 #include <netinet/ip_var.h>
87 #include <netinet/tcp.h>
88 #include <netinet/tcp_var.h>
89 #include <netinet/udp.h>
90 #include <netinet/udp_var.h>
91 #include <netinet/if_ether.h>
92 #include <netinet/in_pcb.h>
93 #include <netinet/in_tclass.h>
94 #include <netinet/ip.h>
95 #include <netinet/ip_icmp.h>
96 #include <netinet/icmp_var.h>
97 #endif /* INET */
98
99 #if INET6
100 #include <net/nat464_utils.h>
101 #include <netinet6/in6_var.h>
102 #include <netinet6/nd6.h>
103 #include <netinet6/mld6_var.h>
104 #include <netinet6/scope6_var.h>
105 #include <netinet/ip6.h>
106 #include <netinet/icmp6.h>
107 #endif /* INET6 */
108 #include <net/pf_pbuf.h>
109 #include <libkern/OSAtomic.h>
110 #include <libkern/tree.h>
111
112 #include <dev/random/randomdev.h>
113 #include <machine/machine_routines.h>
114
115 #include <mach/thread_act.h>
116 #include <mach/sdt.h>
117
118 #if CONFIG_MACF
119 #include <sys/kauth.h>
120 #include <security/mac_framework.h>
121 #include <net/ethernet.h>
122 #include <net/firewire.h>
123 #endif
124
125 #if PF
126 #include <net/pfvar.h>
127 #endif /* PF */
128 #include <net/pktsched/pktsched.h>
129 #include <net/pktsched/pktsched_netem.h>
130
131 #if NECP
132 #include <net/necp.h>
133 #endif /* NECP */
134
135
136 #include <os/log.h>
137
138 #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
139 #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
140 #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
141 #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
142 #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
143
144 #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
145 #define MAX_LINKADDR 4 /* LONGWORDS */
146 #define M_NKE M_IFADDR
147
148 #if 1
149 #define DLIL_PRINTF printf
150 #else
151 #define DLIL_PRINTF kprintf
152 #endif
153
154 #define IF_DATA_REQUIRE_ALIGNED_64(f) \
155 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
156
157 #define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
158 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
159
160 enum {
161 kProtoKPI_v1 = 1,
162 kProtoKPI_v2 = 2
163 };
164
165 /*
166 * List of if_proto structures in if_proto_hash[] is protected by
167 * the ifnet lock. The rest of the fields are initialized at protocol
168 * attach time and never change, thus no lock required as long as
169 * a reference to it is valid, via if_proto_ref().
170 */
171 struct if_proto {
172 SLIST_ENTRY(if_proto) next_hash;
173 u_int32_t refcount;
174 u_int32_t detached;
175 struct ifnet *ifp;
176 protocol_family_t protocol_family;
177 int proto_kpi;
178 union {
179 struct {
180 proto_media_input input;
181 proto_media_preout pre_output;
182 proto_media_event event;
183 proto_media_ioctl ioctl;
184 proto_media_detached detached;
185 proto_media_resolve_multi resolve_multi;
186 proto_media_send_arp send_arp;
187 } v1;
188 struct {
189 proto_media_input_v2 input;
190 proto_media_preout pre_output;
191 proto_media_event event;
192 proto_media_ioctl ioctl;
193 proto_media_detached detached;
194 proto_media_resolve_multi resolve_multi;
195 proto_media_send_arp send_arp;
196 } v2;
197 } kpi;
198 };
199
200 SLIST_HEAD(proto_hash_entry, if_proto);
201
202 #define DLIL_SDLDATALEN \
203 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
204
205 struct dlil_ifnet {
206 struct ifnet dl_if; /* public ifnet */
207 /*
208 * DLIL private fields, protected by dl_if_lock
209 */
210 decl_lck_mtx_data(, dl_if_lock);
211 TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */
212 u_int32_t dl_if_flags; /* flags (below) */
213 u_int32_t dl_if_refcnt; /* refcnt */
214 void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */
215 void *dl_if_uniqueid; /* unique interface id */
216 size_t dl_if_uniqueid_len; /* length of the unique id */
217 char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */
218 char dl_if_xnamestorage[IFXNAMSIZ]; /* external name storage */
219 struct {
220 struct ifaddr ifa; /* lladdr ifa */
221 u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */
222 u_int8_t msdl[DLIL_SDLMAXLEN]; /* mask storage */
223 } dl_if_lladdr;
224 u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */
225 struct dlil_threading_info dl_if_inpstorage; /* input thread storage */
226 ctrace_t dl_if_attach; /* attach PC stacktrace */
227 ctrace_t dl_if_detach; /* detach PC stacktrace */
228 };
229
230 /* Values for dl_if_flags (private to DLIL) */
231 #define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
232 #define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
233 #define DLIF_DEBUG 0x4 /* has debugging info */
234
235 #define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
236
237 /* For gdb */
238 __private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
239
240 struct dlil_ifnet_dbg {
241 struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */
242 u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */
243 u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */
244 /*
245 * Circular lists of ifnet_{reference,release} callers.
246 */
247 ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE];
248 ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE];
249 };
250
251 #define DLIL_TO_IFP(s) (&s->dl_if)
252 #define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
253
254 struct ifnet_filter {
255 TAILQ_ENTRY(ifnet_filter) filt_next;
256 u_int32_t filt_skip;
257 u_int32_t filt_flags;
258 ifnet_t filt_ifp;
259 const char *filt_name;
260 void *filt_cookie;
261 protocol_family_t filt_protocol;
262 iff_input_func filt_input;
263 iff_output_func filt_output;
264 iff_event_func filt_event;
265 iff_ioctl_func filt_ioctl;
266 iff_detached_func filt_detached;
267 };
268
269 struct proto_input_entry;
270
271 static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head;
272 static lck_grp_t *dlil_lock_group;
273 lck_grp_t *ifnet_lock_group;
274 static lck_grp_t *ifnet_head_lock_group;
275 static lck_grp_t *ifnet_snd_lock_group;
276 static lck_grp_t *ifnet_rcv_lock_group;
277 lck_attr_t *ifnet_lock_attr;
278 decl_lck_rw_data(static, ifnet_head_lock);
279 decl_lck_mtx_data(static, dlil_ifnet_lock);
280 u_int32_t dlil_filter_disable_tso_count = 0;
281
282 #if DEBUG
283 static unsigned int ifnet_debug = 1; /* debugging (enabled) */
284 #else
285 static unsigned int ifnet_debug; /* debugging (disabled) */
286 #endif /* !DEBUG */
287 static unsigned int dlif_size; /* size of dlil_ifnet to allocate */
288 static unsigned int dlif_bufsize; /* size of dlif_size + headroom */
289 static struct zone *dlif_zone; /* zone for dlil_ifnet */
290
291 #define DLIF_ZONE_MAX IFNETS_MAX /* maximum elements in zone */
292 #define DLIF_ZONE_NAME "ifnet" /* zone name */
293
294 static unsigned int dlif_filt_size; /* size of ifnet_filter */
295 static struct zone *dlif_filt_zone; /* zone for ifnet_filter */
296
297 #define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
298 #define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
299
300 static unsigned int dlif_phash_size; /* size of ifnet proto hash table */
301 static struct zone *dlif_phash_zone; /* zone for ifnet proto hash table */
302
303 #define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
304 #define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
305
306 static unsigned int dlif_proto_size; /* size of if_proto */
307 static struct zone *dlif_proto_zone; /* zone for if_proto */
308
309 #define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
310 #define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
311
312 static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */
313 static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */
314 static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */
315
316 #define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
317 #define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
318
319 static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */
320 static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */
321 static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */
322
323 #define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
324 #define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
325
326 static u_int32_t net_rtref;
327
328 static struct dlil_main_threading_info dlil_main_input_thread_info;
329 __private_extern__ struct dlil_threading_info *dlil_main_input_thread =
330 (struct dlil_threading_info *)&dlil_main_input_thread_info;
331
332 static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg, bool update_generation);
333 static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
334 static void dlil_if_trace(struct dlil_ifnet *, int);
335 static void if_proto_ref(struct if_proto *);
336 static void if_proto_free(struct if_proto *);
337 static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t);
338 static u_int32_t dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
339 u_int32_t list_count);
340 static void if_flt_monitor_busy(struct ifnet *);
341 static void if_flt_monitor_unbusy(struct ifnet *);
342 static void if_flt_monitor_enter(struct ifnet *);
343 static void if_flt_monitor_leave(struct ifnet *);
344 static int dlil_interface_filters_input(struct ifnet *, struct mbuf **,
345 char **, protocol_family_t);
346 static int dlil_interface_filters_output(struct ifnet *, struct mbuf **,
347 protocol_family_t);
348 static struct ifaddr *dlil_alloc_lladdr(struct ifnet *,
349 const struct sockaddr_dl *);
350 static int ifnet_lookup(struct ifnet *);
351 static void if_purgeaddrs(struct ifnet *);
352
353 static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
354 struct mbuf *, char *);
355 static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
356 struct mbuf *);
357 static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
358 mbuf_t *, const struct sockaddr *, void *, char *, char *);
359 static void ifproto_media_event(struct ifnet *, protocol_family_t,
360 const struct kev_msg *);
361 static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
362 unsigned long, void *);
363 static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
364 struct sockaddr_dl *, size_t);
365 static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
366 const struct sockaddr_dl *, const struct sockaddr *,
367 const struct sockaddr_dl *, const struct sockaddr *);
368
369 static errno_t ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
370 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
371 boolean_t poll, struct thread *tp);
372 static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
373 struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
374 static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
375 static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
376 protocol_family_t *);
377 static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
378 const struct ifnet_demux_desc *, u_int32_t);
379 static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
380 static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
381 #if CONFIG_EMBEDDED
382 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
383 const struct sockaddr *, const char *, const char *,
384 u_int32_t *, u_int32_t *);
385 #else
386 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
387 const struct sockaddr *, const char *, const char *);
388 #endif /* CONFIG_EMBEDDED */
389 static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
390 const struct sockaddr *, const char *, const char *,
391 u_int32_t *, u_int32_t *);
392 static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
393 static void ifp_if_free(struct ifnet *);
394 static void ifp_if_event(struct ifnet *, const struct kev_msg *);
395 static __inline void ifp_inc_traffic_class_in(struct ifnet *, struct mbuf *);
396 static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *);
397
398 static void dlil_main_input_thread_func(void *, wait_result_t);
399 static void dlil_main_input_thread_cont(void *, wait_result_t);
400
401 static void dlil_input_thread_func(void *, wait_result_t);
402 static void dlil_input_thread_cont(void *, wait_result_t);
403
404 static void dlil_rxpoll_input_thread_func(void *, wait_result_t);
405 static void dlil_rxpoll_input_thread_cont(void *, wait_result_t);
406
407 static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *);
408 static void dlil_terminate_input_thread(struct dlil_threading_info *);
409 static void dlil_input_stats_add(const struct ifnet_stat_increment_param *,
410 struct dlil_threading_info *, struct ifnet *, boolean_t);
411 static boolean_t dlil_input_stats_sync(struct ifnet *,
412 struct dlil_threading_info *);
413 static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *,
414 u_int32_t, ifnet_model_t, boolean_t);
415 static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
416 const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
417 static int dlil_is_clat_needed(protocol_family_t, mbuf_t );
418 static errno_t dlil_clat46(ifnet_t, protocol_family_t *, mbuf_t *);
419 static errno_t dlil_clat64(ifnet_t, protocol_family_t *, mbuf_t *);
420 #if DEBUG || DEVELOPMENT
421 static void dlil_verify_sum16(void);
422 #endif /* DEBUG || DEVELOPMENT */
423 static void dlil_output_cksum_dbg(struct ifnet *, struct mbuf *, uint32_t,
424 protocol_family_t);
425 static void dlil_input_cksum_dbg(struct ifnet *, struct mbuf *, char *,
426 protocol_family_t);
427
428 static void dlil_incr_pending_thread_count(void);
429 static void dlil_decr_pending_thread_count(void);
430
431 static void ifnet_detacher_thread_func(void *, wait_result_t);
432 static int ifnet_detacher_thread_cont(int);
433 static void ifnet_detach_final(struct ifnet *);
434 static void ifnet_detaching_enqueue(struct ifnet *);
435 static struct ifnet *ifnet_detaching_dequeue(void);
436
437 static void ifnet_start_thread_func(void *, wait_result_t);
438 static void ifnet_start_thread_cont(void *, wait_result_t);
439
440 static void ifnet_poll_thread_func(void *, wait_result_t);
441 static void ifnet_poll_thread_cont(void *, wait_result_t);
442
443 static errno_t ifnet_enqueue_common(struct ifnet *, classq_pkt_t *,
444 boolean_t, boolean_t *);
445
446 static void ifp_src_route_copyout(struct ifnet *, struct route *);
447 static void ifp_src_route_copyin(struct ifnet *, struct route *);
448 #if INET6
449 static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
450 static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
451 #endif /* INET6 */
452
453 static int sysctl_rxpoll SYSCTL_HANDLER_ARGS;
454 static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS;
455 static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS;
456 static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS;
457 static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS;
458 static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS;
459 static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS;
460 static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS;
461 static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS;
462 static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS;
463 static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS;
464
465 struct chain_len_stats tx_chain_len_stats;
466 static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS;
467
468 #if TEST_INPUT_THREAD_TERMINATION
469 static int sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS;
470 #endif /* TEST_INPUT_THREAD_TERMINATION */
471
472 /* The following are protected by dlil_ifnet_lock */
473 static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
474 static u_int32_t ifnet_detaching_cnt;
475 static void *ifnet_delayed_run; /* wait channel for detaching thread */
476
477 decl_lck_mtx_data(static, ifnet_fc_lock);
478
479 static uint32_t ifnet_flowhash_seed;
480
481 struct ifnet_flowhash_key {
482 char ifk_name[IFNAMSIZ];
483 uint32_t ifk_unit;
484 uint32_t ifk_flags;
485 uint32_t ifk_eflags;
486 uint32_t ifk_capabilities;
487 uint32_t ifk_capenable;
488 uint32_t ifk_output_sched_model;
489 uint32_t ifk_rand1;
490 uint32_t ifk_rand2;
491 };
492
493 /* Flow control entry per interface */
494 struct ifnet_fc_entry {
495 RB_ENTRY(ifnet_fc_entry) ifce_entry;
496 u_int32_t ifce_flowhash;
497 struct ifnet *ifce_ifp;
498 };
499
500 static uint32_t ifnet_calc_flowhash(struct ifnet *);
501 static int ifce_cmp(const struct ifnet_fc_entry *,
502 const struct ifnet_fc_entry *);
503 static int ifnet_fc_add(struct ifnet *);
504 static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t);
505 static void ifnet_fc_entry_free(struct ifnet_fc_entry *);
506
507 /* protected by ifnet_fc_lock */
508 RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree;
509 RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
510 RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
511
512 static unsigned int ifnet_fc_zone_size; /* sizeof ifnet_fc_entry */
513 static struct zone *ifnet_fc_zone; /* ifnet_fc_entry zone */
514
515 #define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
516 #define IFNET_FC_ZONE_MAX 32
517
518 extern void bpfdetach(struct ifnet *);
519 extern void proto_input_run(void);
520
521 extern uint32_t udp_count_opportunistic(unsigned int ifindex,
522 u_int32_t flags);
523 extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
524 u_int32_t flags);
525
526 __private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
527
528 #if CONFIG_MACF
529 #ifdef CONFIG_EMBEDDED
530 int dlil_lladdr_ckreq = 1;
531 #else
532 int dlil_lladdr_ckreq = 0;
533 #endif
534 #endif
535
536 #if DEBUG
537 int dlil_verbose = 1;
538 #else
539 int dlil_verbose = 0;
540 #endif /* DEBUG */
541 #if IFNET_INPUT_SANITY_CHK
542 /* sanity checking of input packet lists received */
543 static u_int32_t dlil_input_sanity_check = 0;
544 #endif /* IFNET_INPUT_SANITY_CHK */
545 /* rate limit debug messages */
546 struct timespec dlil_dbgrate = { .tv_sec = 1, .tv_nsec = 0 };
547
548 SYSCTL_DECL(_net_link_generic_system);
549
550 SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose,
551 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages");
552
553 #define IF_SNDQ_MINLEN 32
554 u_int32_t if_sndq_maxlen = IFQ_MAXLEN;
555 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen,
556 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sndq_maxlen, IFQ_MAXLEN,
557 sysctl_sndq_maxlen, "I", "Default transmit queue max length");
558
559 #define IF_RCVQ_MINLEN 32
560 #define IF_RCVQ_MAXLEN 256
561 u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN;
562 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen,
563 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN,
564 sysctl_rcvq_maxlen, "I", "Default receive queue max length");
565
566 #define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
567 u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY;
568 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay,
569 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY,
570 "ilog2 of EWMA decay rate of avg inbound packets");
571
572 #define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
573 #define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
574 static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME;
575 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time,
576 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime,
577 IF_RXPOLL_MODE_HOLDTIME, sysctl_rxpoll_mode_holdtime,
578 "Q", "input poll mode freeze time");
579
580 #define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
581 #define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
582 static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME;
583 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_sample_time,
584 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime,
585 IF_RXPOLL_SAMPLETIME, sysctl_rxpoll_sample_holdtime,
586 "Q", "input poll sampling time");
587
588 static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVALTIME;
589 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_interval_time,
590 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time,
591 IF_RXPOLL_INTERVALTIME, sysctl_rxpoll_interval_time,
592 "Q", "input poll interval (time)");
593
594 #define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
595 u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS;
596 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts,
597 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts,
598 IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)");
599
600 #define IF_RXPOLL_WLOWAT 10
601 static u_int32_t if_sysctl_rxpoll_wlowat = IF_RXPOLL_WLOWAT;
602 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat,
603 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sysctl_rxpoll_wlowat,
604 IF_RXPOLL_WLOWAT, sysctl_rxpoll_wlowat,
605 "I", "input poll wakeup low watermark");
606
607 #define IF_RXPOLL_WHIWAT 100
608 static u_int32_t if_sysctl_rxpoll_whiwat = IF_RXPOLL_WHIWAT;
609 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat,
610 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sysctl_rxpoll_whiwat,
611 IF_RXPOLL_WHIWAT, sysctl_rxpoll_whiwat,
612 "I", "input poll wakeup high watermark");
613
614 static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */
615 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max,
616 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_max, 0,
617 "max packets per poll call");
618
619 u_int32_t if_rxpoll = 1;
620 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll,
621 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll, 0,
622 sysctl_rxpoll, "I", "enable opportunistic input polling");
623
624 #if TEST_INPUT_THREAD_TERMINATION
625 static u_int32_t if_input_thread_termination_spin = 0;
626 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, input_thread_termination_spin,
627 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
628 &if_input_thread_termination_spin, 0,
629 sysctl_input_thread_termination_spin,
630 "I", "input thread termination spin limit");
631 #endif /* TEST_INPUT_THREAD_TERMINATION */
632
633 static u_int32_t cur_dlil_input_threads = 0;
634 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_threads,
635 CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads, 0,
636 "Current number of DLIL input threads");
637
638 #if IFNET_INPUT_SANITY_CHK
639 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check,
640 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check, 0,
641 "Turn on sanity checking in DLIL input");
642 #endif /* IFNET_INPUT_SANITY_CHK */
643
644 static u_int32_t if_flowadv = 1;
645 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, flow_advisory,
646 CTLFLAG_RW | CTLFLAG_LOCKED, &if_flowadv, 1,
647 "enable flow-advisory mechanism");
648
649 static u_int32_t if_delaybased_queue = 1;
650 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, delaybased_queue,
651 CTLFLAG_RW | CTLFLAG_LOCKED, &if_delaybased_queue, 1,
652 "enable delay based dynamic queue sizing");
653
654 static uint64_t hwcksum_in_invalidated = 0;
655 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
656 hwcksum_in_invalidated, CTLFLAG_RD | CTLFLAG_LOCKED,
657 &hwcksum_in_invalidated, "inbound packets with invalidated hardware cksum");
658
659 uint32_t hwcksum_dbg = 0;
660 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_dbg,
661 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg, 0,
662 "enable hardware cksum debugging");
663
664 u_int32_t ifnet_start_delayed = 0;
665 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delayed,
666 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_start_delayed, 0,
667 "number of times start was delayed");
668
669 u_int32_t ifnet_delay_start_disabled = 0;
670 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delay_disabled,
671 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_delay_start_disabled, 0,
672 "number of times start was delayed");
673
674 #define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
675 #define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
676 #define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
677 #define HWCKSUM_DBG_MASK \
678 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
679 HWCKSUM_DBG_FINALIZE_FORCED)
680
681 static uint32_t hwcksum_dbg_mode = 0;
682 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_mode,
683 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_mode,
684 0, sysctl_hwcksum_dbg_mode, "I", "hardware cksum debugging mode");
685
686 static uint64_t hwcksum_dbg_partial_forced = 0;
687 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
688 hwcksum_dbg_partial_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
689 &hwcksum_dbg_partial_forced, "packets forced using partial cksum");
690
691 static uint64_t hwcksum_dbg_partial_forced_bytes = 0;
692 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
693 hwcksum_dbg_partial_forced_bytes, CTLFLAG_RD | CTLFLAG_LOCKED,
694 &hwcksum_dbg_partial_forced_bytes, "bytes forced using partial cksum");
695
696 static uint32_t hwcksum_dbg_partial_rxoff_forced = 0;
697 SYSCTL_PROC(_net_link_generic_system, OID_AUTO,
698 hwcksum_dbg_partial_rxoff_forced, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
699 &hwcksum_dbg_partial_rxoff_forced, 0,
700 sysctl_hwcksum_dbg_partial_rxoff_forced, "I",
701 "forced partial cksum rx offset");
702
703 static uint32_t hwcksum_dbg_partial_rxoff_adj = 0;
704 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_partial_rxoff_adj,
705 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_partial_rxoff_adj,
706 0, sysctl_hwcksum_dbg_partial_rxoff_adj, "I",
707 "adjusted partial cksum rx offset");
708
709 static uint64_t hwcksum_dbg_verified = 0;
710 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
711 hwcksum_dbg_verified, CTLFLAG_RD | CTLFLAG_LOCKED,
712 &hwcksum_dbg_verified, "packets verified for having good checksum");
713
714 static uint64_t hwcksum_dbg_bad_cksum = 0;
715 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
716 hwcksum_dbg_bad_cksum, CTLFLAG_RD | CTLFLAG_LOCKED,
717 &hwcksum_dbg_bad_cksum, "packets with bad hardware calculated checksum");
718
719 static uint64_t hwcksum_dbg_bad_rxoff = 0;
720 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
721 hwcksum_dbg_bad_rxoff, CTLFLAG_RD | CTLFLAG_LOCKED,
722 &hwcksum_dbg_bad_rxoff, "packets with invalid rxoff");
723
724 static uint64_t hwcksum_dbg_adjusted = 0;
725 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
726 hwcksum_dbg_adjusted, CTLFLAG_RD | CTLFLAG_LOCKED,
727 &hwcksum_dbg_adjusted, "packets with rxoff adjusted");
728
729 static uint64_t hwcksum_dbg_finalized_hdr = 0;
730 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
731 hwcksum_dbg_finalized_hdr, CTLFLAG_RD | CTLFLAG_LOCKED,
732 &hwcksum_dbg_finalized_hdr, "finalized headers");
733
734 static uint64_t hwcksum_dbg_finalized_data = 0;
735 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
736 hwcksum_dbg_finalized_data, CTLFLAG_RD | CTLFLAG_LOCKED,
737 &hwcksum_dbg_finalized_data, "finalized payloads");
738
739 uint32_t hwcksum_tx = 1;
740 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_tx,
741 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_tx, 0,
742 "enable transmit hardware checksum offload");
743
744 uint32_t hwcksum_rx = 1;
745 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_rx,
746 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_rx, 0,
747 "enable receive hardware checksum offload");
748
749 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, tx_chain_len_stats,
750 CTLFLAG_RD | CTLFLAG_LOCKED, 0, 9,
751 sysctl_tx_chain_len_stats, "S", "");
752
753 uint32_t tx_chain_len_count = 0;
754 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, tx_chain_len_count,
755 CTLFLAG_RW | CTLFLAG_LOCKED, &tx_chain_len_count, 0, "");
756
757 static uint32_t threshold_notify = 1; /* enable/disable */
758 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_notify,
759 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_notify, 0, "");
760
761 static uint32_t threshold_interval = 2; /* in seconds */
762 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_interval,
763 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_interval, 0, "");
764
765 #if (DEVELOPMENT || DEBUG)
766 static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS;
767 SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_kao_frames,
768 CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_kao_frames, "");
769 #endif /* DEVELOPMENT || DEBUG */
770
771 struct net_api_stats net_api_stats;
772 SYSCTL_STRUCT(_net, OID_AUTO, api_stats, CTLFLAG_RD | CTLFLAG_LOCKED,
773 &net_api_stats, net_api_stats, "");
774
775
776 unsigned int net_rxpoll = 1;
777 unsigned int net_affinity = 1;
778 static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
779
780 extern u_int32_t inject_buckets;
781
782 static lck_grp_attr_t *dlil_grp_attributes = NULL;
783 static lck_attr_t *dlil_lck_attributes = NULL;
784
785 /* DLIL data threshold thread call */
786 static void dlil_dt_tcall_fn(thread_call_param_t, thread_call_param_t);
787
788 void
789 ifnet_filter_update_tso(boolean_t filter_enable)
790 {
791 /*
792 * update filter count and route_generation ID to let TCP
793 * know it should reevalute doing TSO or not
794 */
795 OSAddAtomic(filter_enable ? 1 : -1, &dlil_filter_disable_tso_count);
796 routegenid_update();
797 }
798
799
800 #define DLIL_INPUT_CHECK(m, ifp) { \
801 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
802 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
803 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
804 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
805 /* NOTREACHED */ \
806 } \
807 }
808
809 #define DLIL_EWMA(old, new, decay) do { \
810 u_int32_t _avg; \
811 if ((_avg = (old)) > 0) \
812 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
813 else \
814 _avg = (new); \
815 (old) = _avg; \
816 } while (0)
817
818 #define MBPS (1ULL * 1000 * 1000)
819 #define GBPS (MBPS * 1000)
820
821 struct rxpoll_time_tbl {
822 u_int64_t speed; /* downlink speed */
823 u_int32_t plowat; /* packets low watermark */
824 u_int32_t phiwat; /* packets high watermark */
825 u_int32_t blowat; /* bytes low watermark */
826 u_int32_t bhiwat; /* bytes high watermark */
827 };
828
829 static struct rxpoll_time_tbl rxpoll_tbl[] = {
830 { .speed = 10 * MBPS, .plowat = 2, .phiwat = 8, .blowat = (1 * 1024), .bhiwat = (6 * 1024) },
831 { .speed = 100 * MBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
832 { .speed = 1 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
833 { .speed = 10 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
834 { .speed = 100 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
835 { .speed = 0, .plowat = 0, .phiwat = 0, .blowat = 0, .bhiwat = 0 }
836 };
837
838 decl_lck_mtx_data(static, dlil_thread_sync_lock);
839 static uint32_t dlil_pending_thread_cnt = 0;
840 static void
841 dlil_incr_pending_thread_count(void)
842 {
843 LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_MTX_ASSERT_NOTOWNED);
844 lck_mtx_lock(&dlil_thread_sync_lock);
845 dlil_pending_thread_cnt++;
846 lck_mtx_unlock(&dlil_thread_sync_lock);
847 }
848
849 static void
850 dlil_decr_pending_thread_count(void)
851 {
852 LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_MTX_ASSERT_NOTOWNED);
853 lck_mtx_lock(&dlil_thread_sync_lock);
854 VERIFY(dlil_pending_thread_cnt > 0);
855 dlil_pending_thread_cnt--;
856 if (dlil_pending_thread_cnt == 0) {
857 wakeup(&dlil_pending_thread_cnt);
858 }
859 lck_mtx_unlock(&dlil_thread_sync_lock);
860 }
861
862 int
863 proto_hash_value(u_int32_t protocol_family)
864 {
865 /*
866 * dlil_proto_unplumb_all() depends on the mapping between
867 * the hash bucket index and the protocol family defined
868 * here; future changes must be applied there as well.
869 */
870 switch (protocol_family) {
871 case PF_INET:
872 return 0;
873 case PF_INET6:
874 return 1;
875 case PF_VLAN:
876 return 2;
877 case PF_802154:
878 return 3;
879 case PF_UNSPEC:
880 default:
881 return 4;
882 }
883 }
884
885 /*
886 * Caller must already be holding ifnet lock.
887 */
888 static struct if_proto *
889 find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family)
890 {
891 struct if_proto *proto = NULL;
892 u_int32_t i = proto_hash_value(protocol_family);
893
894 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
895
896 if (ifp->if_proto_hash != NULL) {
897 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
898 }
899
900 while (proto != NULL && proto->protocol_family != protocol_family) {
901 proto = SLIST_NEXT(proto, next_hash);
902 }
903
904 if (proto != NULL) {
905 if_proto_ref(proto);
906 }
907
908 return proto;
909 }
910
911 static void
912 if_proto_ref(struct if_proto *proto)
913 {
914 atomic_add_32(&proto->refcount, 1);
915 }
916
917 extern void if_rtproto_del(struct ifnet *ifp, int protocol);
918
919 static void
920 if_proto_free(struct if_proto *proto)
921 {
922 u_int32_t oldval;
923 struct ifnet *ifp = proto->ifp;
924 u_int32_t proto_family = proto->protocol_family;
925 struct kev_dl_proto_data ev_pr_data;
926
927 oldval = atomic_add_32_ov(&proto->refcount, -1);
928 if (oldval > 1) {
929 return;
930 }
931
932 /* No more reference on this, protocol must have been detached */
933 VERIFY(proto->detached);
934
935 if (proto->proto_kpi == kProtoKPI_v1) {
936 if (proto->kpi.v1.detached) {
937 proto->kpi.v1.detached(ifp, proto->protocol_family);
938 }
939 }
940 if (proto->proto_kpi == kProtoKPI_v2) {
941 if (proto->kpi.v2.detached) {
942 proto->kpi.v2.detached(ifp, proto->protocol_family);
943 }
944 }
945
946 /*
947 * Cleanup routes that may still be in the routing table for that
948 * interface/protocol pair.
949 */
950 if_rtproto_del(ifp, proto_family);
951
952 /*
953 * The reserved field carries the number of protocol still attached
954 * (subject to change)
955 */
956 ifnet_lock_shared(ifp);
957 ev_pr_data.proto_family = proto_family;
958 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
959 ifnet_lock_done(ifp);
960
961 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
962 (struct net_event_data *)&ev_pr_data,
963 sizeof(struct kev_dl_proto_data));
964
965 if (ev_pr_data.proto_remaining_count == 0) {
966 /*
967 * The protocol count has gone to zero, mark the interface down.
968 * This used to be done by configd.KernelEventMonitor, but that
969 * is inherently prone to races (rdar://problem/30810208).
970 */
971 (void) ifnet_set_flags(ifp, 0, IFF_UP);
972 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
973 dlil_post_sifflags_msg(ifp);
974 }
975
976 zfree(dlif_proto_zone, proto);
977 }
978
979 __private_extern__ void
980 ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what)
981 {
982 #if !MACH_ASSERT
983 #pragma unused(ifp)
984 #endif
985 unsigned int type = 0;
986 int ass = 1;
987
988 switch (what) {
989 case IFNET_LCK_ASSERT_EXCLUSIVE:
990 type = LCK_RW_ASSERT_EXCLUSIVE;
991 break;
992
993 case IFNET_LCK_ASSERT_SHARED:
994 type = LCK_RW_ASSERT_SHARED;
995 break;
996
997 case IFNET_LCK_ASSERT_OWNED:
998 type = LCK_RW_ASSERT_HELD;
999 break;
1000
1001 case IFNET_LCK_ASSERT_NOTOWNED:
1002 /* nothing to do here for RW lock; bypass assert */
1003 ass = 0;
1004 break;
1005
1006 default:
1007 panic("bad ifnet assert type: %d", what);
1008 /* NOTREACHED */
1009 }
1010 if (ass) {
1011 LCK_RW_ASSERT(&ifp->if_lock, type);
1012 }
1013 }
1014
1015 __private_extern__ void
1016 ifnet_lock_shared(struct ifnet *ifp)
1017 {
1018 lck_rw_lock_shared(&ifp->if_lock);
1019 }
1020
1021 __private_extern__ void
1022 ifnet_lock_exclusive(struct ifnet *ifp)
1023 {
1024 lck_rw_lock_exclusive(&ifp->if_lock);
1025 }
1026
1027 __private_extern__ void
1028 ifnet_lock_done(struct ifnet *ifp)
1029 {
1030 lck_rw_done(&ifp->if_lock);
1031 }
1032
1033 #if INET
1034 __private_extern__ void
1035 if_inetdata_lock_shared(struct ifnet *ifp)
1036 {
1037 lck_rw_lock_shared(&ifp->if_inetdata_lock);
1038 }
1039
1040 __private_extern__ void
1041 if_inetdata_lock_exclusive(struct ifnet *ifp)
1042 {
1043 lck_rw_lock_exclusive(&ifp->if_inetdata_lock);
1044 }
1045
1046 __private_extern__ void
1047 if_inetdata_lock_done(struct ifnet *ifp)
1048 {
1049 lck_rw_done(&ifp->if_inetdata_lock);
1050 }
1051 #endif
1052
1053 #if INET6
1054 __private_extern__ void
1055 if_inet6data_lock_shared(struct ifnet *ifp)
1056 {
1057 lck_rw_lock_shared(&ifp->if_inet6data_lock);
1058 }
1059
1060 __private_extern__ void
1061 if_inet6data_lock_exclusive(struct ifnet *ifp)
1062 {
1063 lck_rw_lock_exclusive(&ifp->if_inet6data_lock);
1064 }
1065
1066 __private_extern__ void
1067 if_inet6data_lock_done(struct ifnet *ifp)
1068 {
1069 lck_rw_done(&ifp->if_inet6data_lock);
1070 }
1071 #endif
1072
1073 __private_extern__ void
1074 ifnet_head_lock_shared(void)
1075 {
1076 lck_rw_lock_shared(&ifnet_head_lock);
1077 }
1078
1079 __private_extern__ void
1080 ifnet_head_lock_exclusive(void)
1081 {
1082 lck_rw_lock_exclusive(&ifnet_head_lock);
1083 }
1084
1085 __private_extern__ void
1086 ifnet_head_done(void)
1087 {
1088 lck_rw_done(&ifnet_head_lock);
1089 }
1090
1091 __private_extern__ void
1092 ifnet_head_assert_exclusive(void)
1093 {
1094 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_EXCLUSIVE);
1095 }
1096
1097 /*
1098 * dlil_ifp_protolist
1099 * - get the list of protocols attached to the interface, or just the number
1100 * of attached protocols
1101 * - if the number returned is greater than 'list_count', truncation occurred
1102 *
1103 * Note:
1104 * - caller must already be holding ifnet lock.
1105 */
1106 static u_int32_t
1107 dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
1108 u_int32_t list_count)
1109 {
1110 u_int32_t count = 0;
1111 int i;
1112
1113 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
1114
1115 if (ifp->if_proto_hash == NULL) {
1116 goto done;
1117 }
1118
1119 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
1120 struct if_proto *proto;
1121 SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
1122 if (list != NULL && count < list_count) {
1123 list[count] = proto->protocol_family;
1124 }
1125 count++;
1126 }
1127 }
1128 done:
1129 return count;
1130 }
1131
1132 __private_extern__ u_int32_t
1133 if_get_protolist(struct ifnet * ifp, u_int32_t *protolist, u_int32_t count)
1134 {
1135 ifnet_lock_shared(ifp);
1136 count = dlil_ifp_protolist(ifp, protolist, count);
1137 ifnet_lock_done(ifp);
1138 return count;
1139 }
1140
1141 __private_extern__ void
1142 if_free_protolist(u_int32_t *list)
1143 {
1144 _FREE(list, M_TEMP);
1145 }
1146
1147 __private_extern__ int
1148 dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
1149 u_int32_t event_code, struct net_event_data *event_data,
1150 u_int32_t event_data_len)
1151 {
1152 struct net_event_data ev_data;
1153 struct kev_msg ev_msg;
1154
1155 bzero(&ev_msg, sizeof(ev_msg));
1156 bzero(&ev_data, sizeof(ev_data));
1157 /*
1158 * a net event always starts with a net_event_data structure
1159 * but the caller can generate a simple net event or
1160 * provide a longer event structure to post
1161 */
1162 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1163 ev_msg.kev_class = KEV_NETWORK_CLASS;
1164 ev_msg.kev_subclass = event_subclass;
1165 ev_msg.event_code = event_code;
1166
1167 if (event_data == NULL) {
1168 event_data = &ev_data;
1169 event_data_len = sizeof(struct net_event_data);
1170 }
1171
1172 strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
1173 event_data->if_family = ifp->if_family;
1174 event_data->if_unit = (u_int32_t)ifp->if_unit;
1175
1176 ev_msg.dv[0].data_length = event_data_len;
1177 ev_msg.dv[0].data_ptr = event_data;
1178 ev_msg.dv[1].data_length = 0;
1179
1180 bool update_generation = true;
1181 if (event_subclass == KEV_DL_SUBCLASS) {
1182 /* Don't update interface generation for frequent link quality and state changes */
1183 switch (event_code) {
1184 case KEV_DL_LINK_QUALITY_METRIC_CHANGED:
1185 case KEV_DL_RRC_STATE_CHANGED:
1186 case KEV_DL_NODE_PRESENCE:
1187 case KEV_DL_NODE_ABSENCE:
1188 case KEV_DL_MASTER_ELECTED:
1189 update_generation = false;
1190 break;
1191 default:
1192 break;
1193 }
1194 }
1195
1196 return dlil_event_internal(ifp, &ev_msg, update_generation);
1197 }
1198
1199 __private_extern__ int
1200 dlil_alloc_local_stats(struct ifnet *ifp)
1201 {
1202 int ret = EINVAL;
1203 void *buf, *base, **pbuf;
1204
1205 if (ifp == NULL) {
1206 goto end;
1207 }
1208
1209 if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) {
1210 /* allocate tcpstat_local structure */
1211 buf = zalloc(dlif_tcpstat_zone);
1212 if (buf == NULL) {
1213 ret = ENOMEM;
1214 goto end;
1215 }
1216 bzero(buf, dlif_tcpstat_bufsize);
1217
1218 /* Get the 64-bit aligned base address for this object */
1219 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
1220 sizeof(u_int64_t));
1221 VERIFY(((intptr_t)base + dlif_tcpstat_size) <=
1222 ((intptr_t)buf + dlif_tcpstat_bufsize));
1223
1224 /*
1225 * Wind back a pointer size from the aligned base and
1226 * save the original address so we can free it later.
1227 */
1228 pbuf = (void **)((intptr_t)base - sizeof(void *));
1229 *pbuf = buf;
1230 ifp->if_tcp_stat = base;
1231
1232 /* allocate udpstat_local structure */
1233 buf = zalloc(dlif_udpstat_zone);
1234 if (buf == NULL) {
1235 ret = ENOMEM;
1236 goto end;
1237 }
1238 bzero(buf, dlif_udpstat_bufsize);
1239
1240 /* Get the 64-bit aligned base address for this object */
1241 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
1242 sizeof(u_int64_t));
1243 VERIFY(((intptr_t)base + dlif_udpstat_size) <=
1244 ((intptr_t)buf + dlif_udpstat_bufsize));
1245
1246 /*
1247 * Wind back a pointer size from the aligned base and
1248 * save the original address so we can free it later.
1249 */
1250 pbuf = (void **)((intptr_t)base - sizeof(void *));
1251 *pbuf = buf;
1252 ifp->if_udp_stat = base;
1253
1254 VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof(u_int64_t)) &&
1255 IS_P2ALIGNED(ifp->if_udp_stat, sizeof(u_int64_t)));
1256
1257 ret = 0;
1258 }
1259
1260 if (ifp->if_ipv4_stat == NULL) {
1261 MALLOC(ifp->if_ipv4_stat, struct if_tcp_ecn_stat *,
1262 sizeof(struct if_tcp_ecn_stat), M_TEMP, M_WAITOK | M_ZERO);
1263 if (ifp->if_ipv4_stat == NULL) {
1264 ret = ENOMEM;
1265 goto end;
1266 }
1267 }
1268
1269 if (ifp->if_ipv6_stat == NULL) {
1270 MALLOC(ifp->if_ipv6_stat, struct if_tcp_ecn_stat *,
1271 sizeof(struct if_tcp_ecn_stat), M_TEMP, M_WAITOK | M_ZERO);
1272 if (ifp->if_ipv6_stat == NULL) {
1273 ret = ENOMEM;
1274 goto end;
1275 }
1276 }
1277 end:
1278 if (ifp != NULL && ret != 0) {
1279 if (ifp->if_tcp_stat != NULL) {
1280 pbuf = (void **)
1281 ((intptr_t)ifp->if_tcp_stat - sizeof(void *));
1282 zfree(dlif_tcpstat_zone, *pbuf);
1283 ifp->if_tcp_stat = NULL;
1284 }
1285 if (ifp->if_udp_stat != NULL) {
1286 pbuf = (void **)
1287 ((intptr_t)ifp->if_udp_stat - sizeof(void *));
1288 zfree(dlif_udpstat_zone, *pbuf);
1289 ifp->if_udp_stat = NULL;
1290 }
1291 if (ifp->if_ipv4_stat != NULL) {
1292 FREE(ifp->if_ipv4_stat, M_TEMP);
1293 ifp->if_ipv4_stat = NULL;
1294 }
1295 if (ifp->if_ipv6_stat != NULL) {
1296 FREE(ifp->if_ipv6_stat, M_TEMP);
1297 ifp->if_ipv6_stat = NULL;
1298 }
1299 }
1300
1301 return ret;
1302 }
1303
1304 static void
1305 dlil_reset_rxpoll_params(ifnet_t ifp)
1306 {
1307 ASSERT(ifp != NULL);
1308 ifnet_set_poll_cycle(ifp, NULL);
1309 ifp->if_poll_update = 0;
1310 ifp->if_poll_flags = 0;
1311 ifp->if_poll_req = 0;
1312 ifp->if_poll_mode = IFNET_MODEL_INPUT_POLL_OFF;
1313 bzero(&ifp->if_poll_tstats, sizeof(ifp->if_poll_tstats));
1314 bzero(&ifp->if_poll_pstats, sizeof(ifp->if_poll_pstats));
1315 bzero(&ifp->if_poll_sstats, sizeof(ifp->if_poll_sstats));
1316 net_timerclear(&ifp->if_poll_mode_holdtime);
1317 net_timerclear(&ifp->if_poll_mode_lasttime);
1318 net_timerclear(&ifp->if_poll_sample_holdtime);
1319 net_timerclear(&ifp->if_poll_sample_lasttime);
1320 net_timerclear(&ifp->if_poll_dbg_lasttime);
1321 }
1322
1323 static int
1324 dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp)
1325 {
1326 boolean_t dlil_rxpoll_input;
1327 thread_continue_t func;
1328 u_int32_t limit;
1329 int error;
1330
1331 dlil_rxpoll_input = (ifp != NULL && net_rxpoll &&
1332 (ifp->if_eflags & IFEF_RXPOLL) && (ifp->if_xflags & IFXF_LEGACY));
1333
1334 /* NULL ifp indicates the main input thread, called at dlil_init time */
1335 if (ifp == NULL) {
1336 func = dlil_main_input_thread_func;
1337 VERIFY(inp == dlil_main_input_thread);
1338 (void) strlcat(inp->input_name,
1339 "main_input", DLIL_THREADNAME_LEN);
1340 } else if (dlil_rxpoll_input) {
1341 func = dlil_rxpoll_input_thread_func;
1342 VERIFY(inp != dlil_main_input_thread);
1343 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
1344 "%s_input_poll", if_name(ifp));
1345 } else {
1346 func = dlil_input_thread_func;
1347 VERIFY(inp != dlil_main_input_thread);
1348 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
1349 "%s_input", if_name(ifp));
1350 }
1351 VERIFY(inp->input_thr == THREAD_NULL);
1352
1353 inp->lck_grp = lck_grp_alloc_init(inp->input_name, dlil_grp_attributes);
1354 lck_mtx_init(&inp->input_lck, inp->lck_grp, dlil_lck_attributes);
1355
1356 inp->ifp = ifp; /* NULL for main input thread */
1357 /*
1358 * For interfaces that support opportunistic polling, set the
1359 * low and high watermarks for outstanding inbound packets/bytes.
1360 * Also define freeze times for transitioning between modes
1361 * and updating the average.
1362 */
1363 if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1364 limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN);
1365 if (ifp->if_xflags & IFXF_LEGACY) {
1366 (void) dlil_rxpoll_set_params(ifp, NULL, FALSE);
1367 }
1368 } else {
1369 limit = (u_int32_t)-1;
1370 }
1371
1372 _qinit(&inp->rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
1373 if (inp == dlil_main_input_thread) {
1374 struct dlil_main_threading_info *inpm =
1375 (struct dlil_main_threading_info *)inp;
1376 _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
1377 }
1378
1379 error = kernel_thread_start(func, inp, &inp->input_thr);
1380 if (error == KERN_SUCCESS) {
1381 ml_thread_policy(inp->input_thr, MACHINE_GROUP,
1382 (MACHINE_NETWORK_GROUP | MACHINE_NETWORK_NETISR));
1383 /*
1384 * We create an affinity set so that the matching workloop
1385 * thread or the starter thread (for loopback) can be
1386 * scheduled on the same processor set as the input thread.
1387 */
1388 if (net_affinity) {
1389 struct thread *tp = inp->input_thr;
1390 u_int32_t tag;
1391 /*
1392 * Randomize to reduce the probability
1393 * of affinity tag namespace collision.
1394 */
1395 read_frandom(&tag, sizeof(tag));
1396 if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) {
1397 thread_reference(tp);
1398 inp->tag = tag;
1399 inp->net_affinity = TRUE;
1400 }
1401 }
1402 } else if (inp == dlil_main_input_thread) {
1403 panic_plain("%s: couldn't create main input thread", __func__);
1404 /* NOTREACHED */
1405 } else {
1406 panic_plain("%s: couldn't create %s input thread", __func__,
1407 if_name(ifp));
1408 /* NOTREACHED */
1409 }
1410 OSAddAtomic(1, &cur_dlil_input_threads);
1411
1412 return error;
1413 }
1414
1415 #if TEST_INPUT_THREAD_TERMINATION
1416 static int
1417 sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
1418 {
1419 #pragma unused(arg1, arg2)
1420 uint32_t i;
1421 int err;
1422
1423 i = if_input_thread_termination_spin;
1424
1425 err = sysctl_handle_int(oidp, &i, 0, req);
1426 if (err != 0 || req->newptr == USER_ADDR_NULL) {
1427 return err;
1428 }
1429
1430 if (net_rxpoll == 0) {
1431 return ENXIO;
1432 }
1433
1434 if_input_thread_termination_spin = i;
1435 return err;
1436 }
1437 #endif /* TEST_INPUT_THREAD_TERMINATION */
1438
1439 static void
1440 dlil_clean_threading_info(struct dlil_threading_info *inp)
1441 {
1442 lck_mtx_destroy(&inp->input_lck, inp->lck_grp);
1443 lck_grp_free(inp->lck_grp);
1444
1445 inp->input_waiting = 0;
1446 inp->wtot = 0;
1447 bzero(inp->input_name, sizeof(inp->input_name));
1448 inp->ifp = NULL;
1449 VERIFY(qhead(&inp->rcvq_pkts) == NULL && qempty(&inp->rcvq_pkts));
1450 qlimit(&inp->rcvq_pkts) = 0;
1451 bzero(&inp->stats, sizeof(inp->stats));
1452
1453 VERIFY(!inp->net_affinity);
1454 inp->input_thr = THREAD_NULL;
1455 VERIFY(inp->wloop_thr == THREAD_NULL);
1456 VERIFY(inp->poll_thr == THREAD_NULL);
1457 VERIFY(inp->tag == 0);
1458 #if IFNET_INPUT_SANITY_CHK
1459 inp->input_mbuf_cnt = 0;
1460 #endif /* IFNET_INPUT_SANITY_CHK */
1461 }
1462
1463 static void
1464 dlil_terminate_input_thread(struct dlil_threading_info *inp)
1465 {
1466 struct ifnet *ifp = inp->ifp;
1467 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
1468
1469 VERIFY(current_thread() == inp->input_thr);
1470 VERIFY(inp != dlil_main_input_thread);
1471
1472 OSAddAtomic(-1, &cur_dlil_input_threads);
1473
1474 #if TEST_INPUT_THREAD_TERMINATION
1475 { /* do something useless that won't get optimized away */
1476 uint32_t v = 1;
1477 for (uint32_t i = 0;
1478 i < if_input_thread_termination_spin;
1479 i++) {
1480 v = (i + 1) * v;
1481 }
1482 DLIL_PRINTF("the value is %d\n", v);
1483 }
1484 #endif /* TEST_INPUT_THREAD_TERMINATION */
1485
1486 lck_mtx_lock_spin(&inp->input_lck);
1487 _getq_all(&inp->rcvq_pkts, &pkt, NULL, NULL, NULL);
1488 VERIFY((inp->input_waiting & DLIL_INPUT_TERMINATE) != 0);
1489 inp->input_waiting |= DLIL_INPUT_TERMINATE_COMPLETE;
1490 wakeup_one((caddr_t)&inp->input_waiting);
1491 lck_mtx_unlock(&inp->input_lck);
1492
1493 /* free up pending packets */
1494 if (pkt.cp_mbuf != NULL) {
1495 mbuf_freem_list(pkt.cp_mbuf);
1496 }
1497
1498 /* for the extra refcnt from kernel_thread_start() */
1499 thread_deallocate(current_thread());
1500
1501 if (dlil_verbose) {
1502 DLIL_PRINTF("%s: input thread terminated\n",
1503 if_name(ifp));
1504 }
1505
1506 /* this is the end */
1507 thread_terminate(current_thread());
1508 /* NOTREACHED */
1509 }
1510
1511 static kern_return_t
1512 dlil_affinity_set(struct thread *tp, u_int32_t tag)
1513 {
1514 thread_affinity_policy_data_t policy;
1515
1516 bzero(&policy, sizeof(policy));
1517 policy.affinity_tag = tag;
1518 return thread_policy_set(tp, THREAD_AFFINITY_POLICY,
1519 (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT);
1520 }
1521
1522 void
1523 dlil_init(void)
1524 {
1525 thread_t thread = THREAD_NULL;
1526
1527 /*
1528 * The following fields must be 64-bit aligned for atomic operations.
1529 */
1530 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1531 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1532 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1533 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1534 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1535 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1536 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1537 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1538 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1539 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1540 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1541 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1542 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1543 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1544 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1545
1546 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1547 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1548 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1549 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1550 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1551 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1552 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1553 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1554 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1555 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1556 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1557 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1558 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1559 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1560 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1561
1562 /*
1563 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1564 */
1565 _CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
1566 _CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
1567 _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
1568 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
1569 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
1570 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6);
1571 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
1572 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
1573 _CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
1574 _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT == IFNET_CSUM_ZERO_INVERT);
1575 _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
1576 _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
1577 _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
1578 _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
1579
1580 /*
1581 * ... as well as the mbuf checksum flags counterparts.
1582 */
1583 _CASSERT(CSUM_IP == IF_HWASSIST_CSUM_IP);
1584 _CASSERT(CSUM_TCP == IF_HWASSIST_CSUM_TCP);
1585 _CASSERT(CSUM_UDP == IF_HWASSIST_CSUM_UDP);
1586 _CASSERT(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS);
1587 _CASSERT(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT);
1588 _CASSERT(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6);
1589 _CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
1590 _CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
1591 _CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
1592 _CASSERT(CSUM_ZERO_INVERT == IF_HWASSIST_CSUM_ZERO_INVERT);
1593 _CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
1594
1595 /*
1596 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1597 */
1598 _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
1599 _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
1600
1601 _CASSERT(IFRLOGF_DLIL == IFNET_LOGF_DLIL);
1602 _CASSERT(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY);
1603 _CASSERT(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER);
1604 _CASSERT(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE);
1605
1606 _CASSERT(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY);
1607 _CASSERT(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY);
1608 _CASSERT(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE);
1609
1610 _CASSERT(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY);
1611 _CASSERT(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK);
1612 _CASSERT(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET);
1613 _CASSERT(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP);
1614 _CASSERT(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN);
1615 _CASSERT(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN);
1616 _CASSERT(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP);
1617 _CASSERT(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC);
1618 _CASSERT(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC);
1619 _CASSERT(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP);
1620 _CASSERT(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF);
1621 _CASSERT(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH);
1622 _CASSERT(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF);
1623 _CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
1624 _CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
1625 _CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
1626 _CASSERT(IFRTYPE_FAMILY_6LOWPAN == IFNET_FAMILY_6LOWPAN);
1627 _CASSERT(IFRTYPE_FAMILY_UTUN == IFNET_FAMILY_UTUN);
1628 _CASSERT(IFRTYPE_FAMILY_IPSEC == IFNET_FAMILY_IPSEC);
1629
1630 _CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
1631 _CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
1632 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH);
1633 _CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
1634 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
1635 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED);
1636 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC);
1637 _CASSERT(IFRTYPE_SUBFAMILY_QUICKRELAY == IFNET_SUBFAMILY_QUICKRELAY);
1638 _CASSERT(IFRTYPE_SUBFAMILY_DEFAULT == IFNET_SUBFAMILY_DEFAULT);
1639
1640 _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN);
1641 _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN);
1642
1643 PE_parse_boot_argn("net_affinity", &net_affinity,
1644 sizeof(net_affinity));
1645
1646 PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof(net_rxpoll));
1647
1648 PE_parse_boot_argn("net_rtref", &net_rtref, sizeof(net_rtref));
1649
1650 PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof(ifnet_debug));
1651
1652 VERIFY(dlil_pending_thread_cnt == 0);
1653 dlif_size = (ifnet_debug == 0) ? sizeof(struct dlil_ifnet) :
1654 sizeof(struct dlil_ifnet_dbg);
1655 /* Enforce 64-bit alignment for dlil_ifnet structure */
1656 dlif_bufsize = dlif_size + sizeof(void *) + sizeof(u_int64_t);
1657 dlif_bufsize = P2ROUNDUP(dlif_bufsize, sizeof(u_int64_t));
1658 dlif_zone = zinit(dlif_bufsize, DLIF_ZONE_MAX * dlif_bufsize,
1659 0, DLIF_ZONE_NAME);
1660 if (dlif_zone == NULL) {
1661 panic_plain("%s: failed allocating %s", __func__,
1662 DLIF_ZONE_NAME);
1663 /* NOTREACHED */
1664 }
1665 zone_change(dlif_zone, Z_EXPAND, TRUE);
1666 zone_change(dlif_zone, Z_CALLERACCT, FALSE);
1667
1668 dlif_filt_size = sizeof(struct ifnet_filter);
1669 dlif_filt_zone = zinit(dlif_filt_size,
1670 DLIF_FILT_ZONE_MAX * dlif_filt_size, 0, DLIF_FILT_ZONE_NAME);
1671 if (dlif_filt_zone == NULL) {
1672 panic_plain("%s: failed allocating %s", __func__,
1673 DLIF_FILT_ZONE_NAME);
1674 /* NOTREACHED */
1675 }
1676 zone_change(dlif_filt_zone, Z_EXPAND, TRUE);
1677 zone_change(dlif_filt_zone, Z_CALLERACCT, FALSE);
1678
1679 dlif_phash_size = sizeof(struct proto_hash_entry) * PROTO_HASH_SLOTS;
1680 dlif_phash_zone = zinit(dlif_phash_size,
1681 DLIF_PHASH_ZONE_MAX * dlif_phash_size, 0, DLIF_PHASH_ZONE_NAME);
1682 if (dlif_phash_zone == NULL) {
1683 panic_plain("%s: failed allocating %s", __func__,
1684 DLIF_PHASH_ZONE_NAME);
1685 /* NOTREACHED */
1686 }
1687 zone_change(dlif_phash_zone, Z_EXPAND, TRUE);
1688 zone_change(dlif_phash_zone, Z_CALLERACCT, FALSE);
1689
1690 dlif_proto_size = sizeof(struct if_proto);
1691 dlif_proto_zone = zinit(dlif_proto_size,
1692 DLIF_PROTO_ZONE_MAX * dlif_proto_size, 0, DLIF_PROTO_ZONE_NAME);
1693 if (dlif_proto_zone == NULL) {
1694 panic_plain("%s: failed allocating %s", __func__,
1695 DLIF_PROTO_ZONE_NAME);
1696 /* NOTREACHED */
1697 }
1698 zone_change(dlif_proto_zone, Z_EXPAND, TRUE);
1699 zone_change(dlif_proto_zone, Z_CALLERACCT, FALSE);
1700
1701 dlif_tcpstat_size = sizeof(struct tcpstat_local);
1702 /* Enforce 64-bit alignment for tcpstat_local structure */
1703 dlif_tcpstat_bufsize =
1704 dlif_tcpstat_size + sizeof(void *) + sizeof(u_int64_t);
1705 dlif_tcpstat_bufsize =
1706 P2ROUNDUP(dlif_tcpstat_bufsize, sizeof(u_int64_t));
1707 dlif_tcpstat_zone = zinit(dlif_tcpstat_bufsize,
1708 DLIF_TCPSTAT_ZONE_MAX * dlif_tcpstat_bufsize, 0,
1709 DLIF_TCPSTAT_ZONE_NAME);
1710 if (dlif_tcpstat_zone == NULL) {
1711 panic_plain("%s: failed allocating %s", __func__,
1712 DLIF_TCPSTAT_ZONE_NAME);
1713 /* NOTREACHED */
1714 }
1715 zone_change(dlif_tcpstat_zone, Z_EXPAND, TRUE);
1716 zone_change(dlif_tcpstat_zone, Z_CALLERACCT, FALSE);
1717
1718 dlif_udpstat_size = sizeof(struct udpstat_local);
1719 /* Enforce 64-bit alignment for udpstat_local structure */
1720 dlif_udpstat_bufsize =
1721 dlif_udpstat_size + sizeof(void *) + sizeof(u_int64_t);
1722 dlif_udpstat_bufsize =
1723 P2ROUNDUP(dlif_udpstat_bufsize, sizeof(u_int64_t));
1724 dlif_udpstat_zone = zinit(dlif_udpstat_bufsize,
1725 DLIF_TCPSTAT_ZONE_MAX * dlif_udpstat_bufsize, 0,
1726 DLIF_UDPSTAT_ZONE_NAME);
1727 if (dlif_udpstat_zone == NULL) {
1728 panic_plain("%s: failed allocating %s", __func__,
1729 DLIF_UDPSTAT_ZONE_NAME);
1730 /* NOTREACHED */
1731 }
1732 zone_change(dlif_udpstat_zone, Z_EXPAND, TRUE);
1733 zone_change(dlif_udpstat_zone, Z_CALLERACCT, FALSE);
1734
1735 ifnet_llreach_init();
1736 eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt);
1737
1738 TAILQ_INIT(&dlil_ifnet_head);
1739 TAILQ_INIT(&ifnet_head);
1740 TAILQ_INIT(&ifnet_detaching_head);
1741 TAILQ_INIT(&ifnet_ordered_head);
1742
1743 /* Setup the lock groups we will use */
1744 dlil_grp_attributes = lck_grp_attr_alloc_init();
1745
1746 dlil_lock_group = lck_grp_alloc_init("DLIL internal locks",
1747 dlil_grp_attributes);
1748 ifnet_lock_group = lck_grp_alloc_init("ifnet locks",
1749 dlil_grp_attributes);
1750 ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock",
1751 dlil_grp_attributes);
1752 ifnet_rcv_lock_group = lck_grp_alloc_init("ifnet rcv locks",
1753 dlil_grp_attributes);
1754 ifnet_snd_lock_group = lck_grp_alloc_init("ifnet snd locks",
1755 dlil_grp_attributes);
1756
1757 /* Setup the lock attributes we will use */
1758 dlil_lck_attributes = lck_attr_alloc_init();
1759
1760 ifnet_lock_attr = lck_attr_alloc_init();
1761
1762 lck_rw_init(&ifnet_head_lock, ifnet_head_lock_group,
1763 dlil_lck_attributes);
1764 lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes);
1765 lck_mtx_init(&dlil_thread_sync_lock, dlil_lock_group, dlil_lck_attributes);
1766
1767 /* Setup interface flow control related items */
1768 lck_mtx_init(&ifnet_fc_lock, dlil_lock_group, dlil_lck_attributes);
1769
1770 ifnet_fc_zone_size = sizeof(struct ifnet_fc_entry);
1771 ifnet_fc_zone = zinit(ifnet_fc_zone_size,
1772 IFNET_FC_ZONE_MAX * ifnet_fc_zone_size, 0, IFNET_FC_ZONE_NAME);
1773 if (ifnet_fc_zone == NULL) {
1774 panic_plain("%s: failed allocating %s", __func__,
1775 IFNET_FC_ZONE_NAME);
1776 /* NOTREACHED */
1777 }
1778 zone_change(ifnet_fc_zone, Z_EXPAND, TRUE);
1779 zone_change(ifnet_fc_zone, Z_CALLERACCT, FALSE);
1780
1781 /* Initialize interface address subsystem */
1782 ifa_init();
1783
1784 #if PF
1785 /* Initialize the packet filter */
1786 pfinit();
1787 #endif /* PF */
1788
1789 /* Initialize queue algorithms */
1790 classq_init();
1791
1792 /* Initialize packet schedulers */
1793 pktsched_init();
1794
1795 /* Initialize flow advisory subsystem */
1796 flowadv_init();
1797
1798 /* Initialize the pktap virtual interface */
1799 pktap_init();
1800
1801 /* Initialize the service class to dscp map */
1802 net_qos_map_init();
1803
1804 /* Initialize the interface port list */
1805 if_ports_used_init();
1806
1807 /* Initialize the interface low power mode event handler */
1808 if_low_power_evhdlr_init();
1809
1810 #if DEBUG || DEVELOPMENT
1811 /* Run self-tests */
1812 dlil_verify_sum16();
1813 #endif /* DEBUG || DEVELOPMENT */
1814
1815 /* Initialize link layer table */
1816 lltable_glbl_init();
1817
1818 /*
1819 * Create and start up the main DLIL input thread and the interface
1820 * detacher threads once everything is initialized.
1821 */
1822 dlil_incr_pending_thread_count();
1823 dlil_create_input_thread(NULL, dlil_main_input_thread);
1824
1825 /*
1826 * Create ifnet detacher thread.
1827 * When an interface gets detached, part of the detach processing
1828 * is delayed. The interface is added to delayed detach list
1829 * and this thread is woken up to call ifnet_detach_final
1830 * on these interfaces.
1831 */
1832 dlil_incr_pending_thread_count();
1833 if (kernel_thread_start(ifnet_detacher_thread_func,
1834 NULL, &thread) != KERN_SUCCESS) {
1835 panic_plain("%s: couldn't create detacher thread", __func__);
1836 /* NOTREACHED */
1837 }
1838 thread_deallocate(thread);
1839
1840 /*
1841 * Wait for the created kernel threads for dlil to get
1842 * scheduled and run at least once before we proceed
1843 */
1844 lck_mtx_lock(&dlil_thread_sync_lock);
1845 while (dlil_pending_thread_cnt != 0) {
1846 DLIL_PRINTF("%s: Waiting for all the create dlil kernel threads "
1847 "to get scheduled at least once.\n", __func__);
1848 (void) msleep(&dlil_pending_thread_cnt, &dlil_thread_sync_lock, (PZERO - 1),
1849 __func__, NULL);
1850 LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_ASSERT_OWNED);
1851 }
1852 lck_mtx_unlock(&dlil_thread_sync_lock);
1853 DLIL_PRINTF("%s: All the created dlil kernel threads have been scheduled "
1854 "at least once. Proceeding.\n", __func__);
1855 }
1856
1857 static void
1858 if_flt_monitor_busy(struct ifnet *ifp)
1859 {
1860 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1861
1862 ++ifp->if_flt_busy;
1863 VERIFY(ifp->if_flt_busy != 0);
1864 }
1865
1866 static void
1867 if_flt_monitor_unbusy(struct ifnet *ifp)
1868 {
1869 if_flt_monitor_leave(ifp);
1870 }
1871
1872 static void
1873 if_flt_monitor_enter(struct ifnet *ifp)
1874 {
1875 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1876
1877 while (ifp->if_flt_busy) {
1878 ++ifp->if_flt_waiters;
1879 (void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock,
1880 (PZERO - 1), "if_flt_monitor", NULL);
1881 }
1882 if_flt_monitor_busy(ifp);
1883 }
1884
1885 static void
1886 if_flt_monitor_leave(struct ifnet *ifp)
1887 {
1888 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1889
1890 VERIFY(ifp->if_flt_busy != 0);
1891 --ifp->if_flt_busy;
1892
1893 if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) {
1894 ifp->if_flt_waiters = 0;
1895 wakeup(&ifp->if_flt_head);
1896 }
1897 }
1898
1899 __private_extern__ int
1900 dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
1901 interface_filter_t *filter_ref, u_int32_t flags)
1902 {
1903 int retval = 0;
1904 struct ifnet_filter *filter = NULL;
1905
1906 ifnet_head_lock_shared();
1907 /* Check that the interface is in the global list */
1908 if (!ifnet_lookup(ifp)) {
1909 retval = ENXIO;
1910 goto done;
1911 }
1912
1913 filter = zalloc(dlif_filt_zone);
1914 if (filter == NULL) {
1915 retval = ENOMEM;
1916 goto done;
1917 }
1918 bzero(filter, dlif_filt_size);
1919
1920 /* refcnt held above during lookup */
1921 filter->filt_flags = flags;
1922 filter->filt_ifp = ifp;
1923 filter->filt_cookie = if_filter->iff_cookie;
1924 filter->filt_name = if_filter->iff_name;
1925 filter->filt_protocol = if_filter->iff_protocol;
1926 /*
1927 * Do not install filter callbacks for internal coproc interface
1928 */
1929 if (!IFNET_IS_INTCOPROC(ifp)) {
1930 filter->filt_input = if_filter->iff_input;
1931 filter->filt_output = if_filter->iff_output;
1932 filter->filt_event = if_filter->iff_event;
1933 filter->filt_ioctl = if_filter->iff_ioctl;
1934 }
1935 filter->filt_detached = if_filter->iff_detached;
1936
1937 lck_mtx_lock(&ifp->if_flt_lock);
1938 if_flt_monitor_enter(ifp);
1939
1940 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1941 TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
1942
1943 if_flt_monitor_leave(ifp);
1944 lck_mtx_unlock(&ifp->if_flt_lock);
1945
1946 *filter_ref = filter;
1947
1948 /*
1949 * Bump filter count and route_generation ID to let TCP
1950 * know it shouldn't do TSO on this connection
1951 */
1952 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1953 ifnet_filter_update_tso(TRUE);
1954 }
1955 OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_count);
1956 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_total);
1957 if ((filter->filt_flags & DLIL_IFF_INTERNAL)) {
1958 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_os_total);
1959 }
1960 if (dlil_verbose) {
1961 DLIL_PRINTF("%s: %s filter attached\n", if_name(ifp),
1962 if_filter->iff_name);
1963 }
1964 done:
1965 ifnet_head_done();
1966 if (retval != 0 && ifp != NULL) {
1967 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1968 if_name(ifp), if_filter->iff_name, retval);
1969 }
1970 if (retval != 0 && filter != NULL) {
1971 zfree(dlif_filt_zone, filter);
1972 }
1973
1974 return retval;
1975 }
1976
1977 static int
1978 dlil_detach_filter_internal(interface_filter_t filter, int detached)
1979 {
1980 int retval = 0;
1981
1982 if (detached == 0) {
1983 ifnet_t ifp = NULL;
1984
1985 ifnet_head_lock_shared();
1986 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1987 interface_filter_t entry = NULL;
1988
1989 lck_mtx_lock(&ifp->if_flt_lock);
1990 TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
1991 if (entry != filter || entry->filt_skip) {
1992 continue;
1993 }
1994 /*
1995 * We've found a match; since it's possible
1996 * that the thread gets blocked in the monitor,
1997 * we do the lock dance. Interface should
1998 * not be detached since we still have a use
1999 * count held during filter attach.
2000 */
2001 entry->filt_skip = 1; /* skip input/output */
2002 lck_mtx_unlock(&ifp->if_flt_lock);
2003 ifnet_head_done();
2004
2005 lck_mtx_lock(&ifp->if_flt_lock);
2006 if_flt_monitor_enter(ifp);
2007 LCK_MTX_ASSERT(&ifp->if_flt_lock,
2008 LCK_MTX_ASSERT_OWNED);
2009
2010 /* Remove the filter from the list */
2011 TAILQ_REMOVE(&ifp->if_flt_head, filter,
2012 filt_next);
2013
2014 if_flt_monitor_leave(ifp);
2015 lck_mtx_unlock(&ifp->if_flt_lock);
2016 if (dlil_verbose) {
2017 DLIL_PRINTF("%s: %s filter detached\n",
2018 if_name(ifp), filter->filt_name);
2019 }
2020 goto destroy;
2021 }
2022 lck_mtx_unlock(&ifp->if_flt_lock);
2023 }
2024 ifnet_head_done();
2025
2026 /* filter parameter is not a valid filter ref */
2027 retval = EINVAL;
2028 goto done;
2029 }
2030
2031 if (dlil_verbose) {
2032 DLIL_PRINTF("%s filter detached\n", filter->filt_name);
2033 }
2034
2035 destroy:
2036
2037 /* Call the detached function if there is one */
2038 if (filter->filt_detached) {
2039 filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
2040 }
2041
2042 /*
2043 * Decrease filter count and route_generation ID to let TCP
2044 * know it should reevalute doing TSO or not
2045 */
2046 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
2047 ifnet_filter_update_tso(FALSE);
2048 }
2049
2050 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_count) > 0);
2051
2052 /* Free the filter */
2053 zfree(dlif_filt_zone, filter);
2054 filter = NULL;
2055 done:
2056 if (retval != 0 && filter != NULL) {
2057 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
2058 filter->filt_name, retval);
2059 }
2060
2061 return retval;
2062 }
2063
2064 __private_extern__ void
2065 dlil_detach_filter(interface_filter_t filter)
2066 {
2067 if (filter == NULL) {
2068 return;
2069 }
2070 dlil_detach_filter_internal(filter, 0);
2071 }
2072
2073 __attribute__((noreturn))
2074 static void
2075 dlil_main_input_thread_func(void *v, wait_result_t w)
2076 {
2077 #pragma unused(w)
2078 struct dlil_threading_info *inp = v;
2079
2080 VERIFY(inp == dlil_main_input_thread);
2081 VERIFY(inp->ifp == NULL);
2082 VERIFY(current_thread() == inp->input_thr);
2083
2084 dlil_decr_pending_thread_count();
2085 lck_mtx_lock(&inp->input_lck);
2086 VERIFY(!(inp->input_waiting & DLIL_INPUT_RUNNING));
2087 (void) assert_wait(&inp->input_waiting, THREAD_UNINT);
2088 lck_mtx_unlock(&inp->input_lck);
2089 (void) thread_block_parameter(dlil_main_input_thread_cont, inp);
2090 /* NOTREACHED */
2091 __builtin_unreachable();
2092 }
2093
2094 /*
2095 * Main input thread:
2096 *
2097 * a) handles all inbound packets for lo0
2098 * b) handles all inbound packets for interfaces with no dedicated
2099 * input thread (e.g. anything but Ethernet/PDP or those that support
2100 * opportunistic polling.)
2101 * c) protocol registrations
2102 * d) packet injections
2103 */
2104 __attribute__((noreturn))
2105 static void
2106 dlil_main_input_thread_cont(void *v, wait_result_t wres)
2107 {
2108 struct dlil_main_threading_info *inpm = v;
2109 struct dlil_threading_info *inp = v;
2110
2111 /* main input thread is uninterruptible */
2112 VERIFY(wres != THREAD_INTERRUPTED);
2113 lck_mtx_lock_spin(&inp->input_lck);
2114 VERIFY(!(inp->input_waiting & (DLIL_INPUT_TERMINATE |
2115 DLIL_INPUT_RUNNING)));
2116 inp->input_waiting |= DLIL_INPUT_RUNNING;
2117
2118 while (1) {
2119 struct mbuf *m = NULL, *m_loop = NULL;
2120 u_int32_t m_cnt, m_cnt_loop;
2121 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
2122 boolean_t proto_req;
2123
2124 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2125
2126 proto_req = (inp->input_waiting &
2127 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER));
2128
2129 /* Packets for non-dedicated interfaces other than lo0 */
2130 m_cnt = qlen(&inp->rcvq_pkts);
2131 _getq_all(&inp->rcvq_pkts, &pkt, NULL, NULL, NULL);
2132 m = pkt.cp_mbuf;
2133
2134 /* Packets exclusive to lo0 */
2135 m_cnt_loop = qlen(&inpm->lo_rcvq_pkts);
2136 _getq_all(&inpm->lo_rcvq_pkts, &pkt, NULL, NULL, NULL);
2137 m_loop = pkt.cp_mbuf;
2138
2139 inp->wtot = 0;
2140
2141 lck_mtx_unlock(&inp->input_lck);
2142
2143 /*
2144 * NOTE warning %%% attention !!!!
2145 * We should think about putting some thread starvation
2146 * safeguards if we deal with long chains of packets.
2147 */
2148 if (m_loop != NULL) {
2149 dlil_input_packet_list_extended(lo_ifp, m_loop,
2150 m_cnt_loop, IFNET_MODEL_INPUT_POLL_OFF);
2151 }
2152
2153 if (m != NULL) {
2154 dlil_input_packet_list_extended(NULL, m,
2155 m_cnt, IFNET_MODEL_INPUT_POLL_OFF);
2156 }
2157
2158 if (proto_req) {
2159 proto_input_run();
2160 }
2161
2162 lck_mtx_lock_spin(&inp->input_lck);
2163 VERIFY(inp->input_waiting & DLIL_INPUT_RUNNING);
2164 /* main input thread cannot be terminated */
2165 VERIFY(!(inp->input_waiting & DLIL_INPUT_TERMINATE));
2166 if (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
2167 break;
2168 }
2169 }
2170
2171 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2172 (void) assert_wait(&inp->input_waiting, THREAD_UNINT);
2173 lck_mtx_unlock(&inp->input_lck);
2174 (void) thread_block_parameter(dlil_main_input_thread_cont, inp);
2175
2176 VERIFY(0); /* we should never get here */
2177 /* NOTREACHED */
2178 __builtin_unreachable();
2179 }
2180
2181 /*
2182 * Input thread for interfaces with legacy input model.
2183 */
2184 __attribute__((noreturn))
2185 static void
2186 dlil_input_thread_func(void *v, wait_result_t w)
2187 {
2188 #pragma unused(w)
2189 char thread_name[MAXTHREADNAMESIZE];
2190 struct dlil_threading_info *inp = v;
2191 struct ifnet *ifp = inp->ifp;
2192
2193 VERIFY(inp != dlil_main_input_thread);
2194 VERIFY(ifp != NULL);
2195 VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll ||
2196 !(ifp->if_xflags & IFXF_LEGACY));
2197 VERIFY(ifp->if_poll_mode == IFNET_MODEL_INPUT_POLL_OFF ||
2198 !(ifp->if_xflags & IFXF_LEGACY));
2199 VERIFY(current_thread() == inp->input_thr);
2200
2201 /* construct the name for this thread, and then apply it */
2202 bzero(thread_name, sizeof(thread_name));
2203 (void) snprintf(thread_name, sizeof(thread_name),
2204 "dlil_input_%s", ifp->if_xname);
2205 thread_set_thread_name(inp->input_thr, thread_name);
2206 ifnet_decr_pending_thread_count(ifp);
2207
2208 lck_mtx_lock(&inp->input_lck);
2209 VERIFY(!(inp->input_waiting & DLIL_INPUT_RUNNING));
2210 (void) assert_wait(&inp->input_waiting, THREAD_UNINT);
2211 lck_mtx_unlock(&inp->input_lck);
2212 (void) thread_block_parameter(dlil_input_thread_cont, inp);
2213 /* NOTREACHED */
2214 __builtin_unreachable();
2215 }
2216
2217 __attribute__((noreturn))
2218 static void
2219 dlil_input_thread_cont(void *v, wait_result_t wres)
2220 {
2221 struct dlil_threading_info *inp = v;
2222 struct ifnet *ifp = inp->ifp;
2223
2224 lck_mtx_lock_spin(&inp->input_lck);
2225 if (__improbable(wres == THREAD_INTERRUPTED ||
2226 (inp->input_waiting & DLIL_INPUT_TERMINATE))) {
2227 goto terminate;
2228 }
2229
2230 VERIFY(!(inp->input_waiting & DLIL_INPUT_RUNNING));
2231 inp->input_waiting |= DLIL_INPUT_RUNNING;
2232
2233 while (1) {
2234 struct mbuf *m = NULL;
2235 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
2236 boolean_t notify = FALSE;
2237 u_int32_t m_cnt;
2238
2239 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2240
2241 /*
2242 * Protocol registration and injection must always use
2243 * the main input thread; in theory the latter can utilize
2244 * the corresponding input thread where the packet arrived
2245 * on, but that requires our knowing the interface in advance
2246 * (and the benefits might not worth the trouble.)
2247 */
2248 VERIFY(!(inp->input_waiting &
2249 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)));
2250
2251 /* Packets for this interface */
2252 m_cnt = qlen(&inp->rcvq_pkts);
2253 _getq_all(&inp->rcvq_pkts, &pkt, NULL, NULL, NULL);
2254 m = pkt.cp_mbuf;
2255
2256 inp->wtot = 0;
2257
2258 notify = dlil_input_stats_sync(ifp, inp);
2259
2260 lck_mtx_unlock(&inp->input_lck);
2261
2262 if (notify) {
2263 ifnet_notify_data_threshold(ifp);
2264 }
2265
2266 /*
2267 * NOTE warning %%% attention !!!!
2268 * We should think about putting some thread starvation
2269 * safeguards if we deal with long chains of packets.
2270 */
2271 if (m != NULL) {
2272 dlil_input_packet_list_extended(NULL, m,
2273 m_cnt, ifp->if_poll_mode);
2274 }
2275
2276 lck_mtx_lock_spin(&inp->input_lck);
2277 VERIFY(inp->input_waiting & DLIL_INPUT_RUNNING);
2278 if (!(inp->input_waiting & ~(DLIL_INPUT_RUNNING |
2279 DLIL_INPUT_TERMINATE))) {
2280 break;
2281 }
2282 }
2283
2284 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2285
2286 if (__improbable(inp->input_waiting & DLIL_INPUT_TERMINATE)) {
2287 terminate:
2288 lck_mtx_unlock(&inp->input_lck);
2289 dlil_terminate_input_thread(inp);
2290 /* NOTREACHED */
2291 } else {
2292 (void) assert_wait(&inp->input_waiting, THREAD_UNINT);
2293 lck_mtx_unlock(&inp->input_lck);
2294 (void) thread_block_parameter(dlil_input_thread_cont, inp);
2295 /* NOTREACHED */
2296 }
2297
2298 VERIFY(0); /* we should never get here */
2299 /* NOTREACHED */
2300 __builtin_unreachable();
2301 }
2302
2303 /*
2304 * Input thread for interfaces with opportunistic polling input model.
2305 */
2306 __attribute__((noreturn))
2307 static void
2308 dlil_rxpoll_input_thread_func(void *v, wait_result_t w)
2309 {
2310 #pragma unused(w)
2311 char thread_name[MAXTHREADNAMESIZE];
2312 struct dlil_threading_info *inp = v;
2313 struct ifnet *ifp = inp->ifp;
2314
2315 VERIFY(inp != dlil_main_input_thread);
2316 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL) &&
2317 (ifp->if_xflags & IFXF_LEGACY));
2318 VERIFY(current_thread() == inp->input_thr);
2319
2320 /* construct the name for this thread, and then apply it */
2321 bzero(thread_name, sizeof(thread_name));
2322 (void) snprintf(thread_name, sizeof(thread_name),
2323 "dlil_input_poll_%s", ifp->if_xname);
2324 thread_set_thread_name(inp->input_thr, thread_name);
2325 ifnet_decr_pending_thread_count(ifp);
2326
2327 lck_mtx_lock(&inp->input_lck);
2328 VERIFY(!(inp->input_waiting & DLIL_INPUT_RUNNING));
2329 (void) assert_wait(&inp->input_waiting, THREAD_UNINT);
2330 lck_mtx_unlock(&inp->input_lck);
2331 (void) thread_block_parameter(dlil_rxpoll_input_thread_cont, inp);
2332 /* NOTREACHED */
2333 __builtin_unreachable();
2334 }
2335
2336 __attribute__((noreturn))
2337 static void
2338 dlil_rxpoll_input_thread_cont(void *v, wait_result_t wres)
2339 {
2340 struct dlil_threading_info *inp = v;
2341 struct ifnet *ifp = inp->ifp;
2342 struct timespec ts;
2343
2344 lck_mtx_lock_spin(&inp->input_lck);
2345 if (__improbable(wres == THREAD_INTERRUPTED ||
2346 (inp->input_waiting & DLIL_INPUT_TERMINATE))) {
2347 goto terminate;
2348 }
2349
2350 VERIFY(!(inp->input_waiting & DLIL_INPUT_RUNNING));
2351 inp->input_waiting |= DLIL_INPUT_RUNNING;
2352
2353 while (1) {
2354 struct mbuf *m = NULL;
2355 u_int32_t m_cnt, m_size, poll_req = 0;
2356 ifnet_model_t mode;
2357 struct timespec now, delta;
2358 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
2359 boolean_t notify;
2360 u_int64_t ival;
2361
2362 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2363
2364 if ((ival = ifp->if_rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN) {
2365 ival = IF_RXPOLL_INTERVALTIME_MIN;
2366 }
2367
2368 /* Link parameters changed? */
2369 if (ifp->if_poll_update != 0) {
2370 ifp->if_poll_update = 0;
2371 (void) dlil_rxpoll_set_params(ifp, NULL, TRUE);
2372 }
2373
2374 /* Current operating mode */
2375 mode = ifp->if_poll_mode;
2376
2377 /*
2378 * Protocol registration and injection must always use
2379 * the main input thread; in theory the latter can utilize
2380 * the corresponding input thread where the packet arrived
2381 * on, but that requires our knowing the interface in advance
2382 * (and the benefits might not worth the trouble.)
2383 */
2384 VERIFY(!(inp->input_waiting &
2385 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)));
2386
2387 /* Total count of all packets */
2388 m_cnt = qlen(&inp->rcvq_pkts);
2389
2390 /* Total bytes of all packets */
2391 m_size = qsize(&inp->rcvq_pkts);
2392
2393 /* Packets for this interface */
2394 _getq_all(&inp->rcvq_pkts, &pkt, NULL, NULL, NULL);
2395 m = pkt.cp_mbuf;
2396 VERIFY(m != NULL || m_cnt == 0);
2397
2398 nanouptime(&now);
2399 if (!net_timerisset(&ifp->if_poll_sample_lasttime)) {
2400 *(&ifp->if_poll_sample_lasttime) = *(&now);
2401 }
2402
2403 net_timersub(&now, &ifp->if_poll_sample_lasttime, &delta);
2404 if (if_rxpoll && net_timerisset(&ifp->if_poll_sample_holdtime)) {
2405 u_int32_t ptot, btot;
2406
2407 /* Accumulate statistics for current sampling */
2408 PKTCNTR_ADD(&ifp->if_poll_sstats, m_cnt, m_size);
2409
2410 if (net_timercmp(&delta, &ifp->if_poll_sample_holdtime, <)) {
2411 goto skip;
2412 }
2413
2414 *(&ifp->if_poll_sample_lasttime) = *(&now);
2415
2416 /* Calculate min/max of inbound bytes */
2417 btot = (u_int32_t)ifp->if_poll_sstats.bytes;
2418 if (ifp->if_rxpoll_bmin == 0 || ifp->if_rxpoll_bmin > btot) {
2419 ifp->if_rxpoll_bmin = btot;
2420 }
2421 if (btot > ifp->if_rxpoll_bmax) {
2422 ifp->if_rxpoll_bmax = btot;
2423 }
2424
2425 /* Calculate EWMA of inbound bytes */
2426 DLIL_EWMA(ifp->if_rxpoll_bavg, btot, if_rxpoll_decay);
2427
2428 /* Calculate min/max of inbound packets */
2429 ptot = (u_int32_t)ifp->if_poll_sstats.packets;
2430 if (ifp->if_rxpoll_pmin == 0 || ifp->if_rxpoll_pmin > ptot) {
2431 ifp->if_rxpoll_pmin = ptot;
2432 }
2433 if (ptot > ifp->if_rxpoll_pmax) {
2434 ifp->if_rxpoll_pmax = ptot;
2435 }
2436
2437 /* Calculate EWMA of inbound packets */
2438 DLIL_EWMA(ifp->if_rxpoll_pavg, ptot, if_rxpoll_decay);
2439
2440 /* Reset sampling statistics */
2441 PKTCNTR_CLEAR(&ifp->if_poll_sstats);
2442
2443 /* Calculate EWMA of wakeup requests */
2444 DLIL_EWMA(ifp->if_rxpoll_wavg, inp->wtot, if_rxpoll_decay);
2445 inp->wtot = 0;
2446
2447 if (dlil_verbose) {
2448 if (!net_timerisset(&ifp->if_poll_dbg_lasttime)) {
2449 *(&ifp->if_poll_dbg_lasttime) = *(&now);
2450 }
2451 net_timersub(&now, &ifp->if_poll_dbg_lasttime, &delta);
2452 if (net_timercmp(&delta, &dlil_dbgrate, >=)) {
2453 *(&ifp->if_poll_dbg_lasttime) = *(&now);
2454 DLIL_PRINTF("%s: [%s] pkts avg %d max %d "
2455 "limits [%d/%d], wreq avg %d "
2456 "limits [%d/%d], bytes avg %d "
2457 "limits [%d/%d]\n", if_name(ifp),
2458 (ifp->if_poll_mode ==
2459 IFNET_MODEL_INPUT_POLL_ON) ?
2460 "ON" : "OFF", ifp->if_rxpoll_pavg,
2461 ifp->if_rxpoll_pmax,
2462 ifp->if_rxpoll_plowat,
2463 ifp->if_rxpoll_phiwat,
2464 ifp->if_rxpoll_wavg,
2465 ifp->if_rxpoll_wlowat,
2466 ifp->if_rxpoll_whiwat,
2467 ifp->if_rxpoll_bavg,
2468 ifp->if_rxpoll_blowat,
2469 ifp->if_rxpoll_bhiwat);
2470 }
2471 }
2472
2473 /* Perform mode transition, if necessary */
2474 if (!net_timerisset(&ifp->if_poll_mode_lasttime)) {
2475 *(&ifp->if_poll_mode_lasttime) = *(&now);
2476 }
2477
2478 net_timersub(&now, &ifp->if_poll_mode_lasttime, &delta);
2479 if (net_timercmp(&delta, &ifp->if_poll_mode_holdtime, <)) {
2480 goto skip;
2481 }
2482
2483 if (ifp->if_rxpoll_pavg <= ifp->if_rxpoll_plowat &&
2484 ifp->if_rxpoll_bavg <= ifp->if_rxpoll_blowat &&
2485 ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_OFF) {
2486 mode = IFNET_MODEL_INPUT_POLL_OFF;
2487 } else if (ifp->if_rxpoll_pavg >= ifp->if_rxpoll_phiwat &&
2488 (ifp->if_rxpoll_bavg >= ifp->if_rxpoll_bhiwat ||
2489 ifp->if_rxpoll_wavg >= ifp->if_rxpoll_whiwat) &&
2490 ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_ON) {
2491 mode = IFNET_MODEL_INPUT_POLL_ON;
2492 }
2493
2494 if (mode != ifp->if_poll_mode) {
2495 ifp->if_poll_mode = mode;
2496 *(&ifp->if_poll_mode_lasttime) = *(&now);
2497 poll_req++;
2498 }
2499 }
2500 skip:
2501 notify = dlil_input_stats_sync(ifp, inp);
2502
2503 lck_mtx_unlock(&inp->input_lck);
2504
2505 if (notify) {
2506 ifnet_notify_data_threshold(ifp);
2507 }
2508
2509 /*
2510 * If there's a mode change and interface is still attached,
2511 * perform a downcall to the driver for the new mode. Also
2512 * hold an IO refcnt on the interface to prevent it from
2513 * being detached (will be release below.)
2514 */
2515 if (poll_req != 0 && ifnet_is_attached(ifp, 1)) {
2516 struct ifnet_model_params p = {
2517 .model = mode, .reserved = { 0 }
2518 };
2519 errno_t err;
2520
2521 if (dlil_verbose) {
2522 DLIL_PRINTF("%s: polling is now %s, "
2523 "pkts avg %d max %d limits [%d/%d], "
2524 "wreq avg %d limits [%d/%d], "
2525 "bytes avg %d limits [%d/%d]\n",
2526 if_name(ifp),
2527 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2528 "ON" : "OFF", ifp->if_rxpoll_pavg,
2529 ifp->if_rxpoll_pmax, ifp->if_rxpoll_plowat,
2530 ifp->if_rxpoll_phiwat, ifp->if_rxpoll_wavg,
2531 ifp->if_rxpoll_wlowat, ifp->if_rxpoll_whiwat,
2532 ifp->if_rxpoll_bavg, ifp->if_rxpoll_blowat,
2533 ifp->if_rxpoll_bhiwat);
2534 }
2535
2536 if ((err = ((*ifp->if_input_ctl)(ifp,
2537 IFNET_CTL_SET_INPUT_MODEL, sizeof(p), &p))) != 0) {
2538 DLIL_PRINTF("%s: error setting polling mode "
2539 "to %s (%d)\n", if_name(ifp),
2540 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2541 "ON" : "OFF", err);
2542 }
2543
2544 switch (mode) {
2545 case IFNET_MODEL_INPUT_POLL_OFF:
2546 ifnet_set_poll_cycle(ifp, NULL);
2547 ifp->if_rxpoll_offreq++;
2548 if (err != 0) {
2549 ifp->if_rxpoll_offerr++;
2550 }
2551 break;
2552
2553 case IFNET_MODEL_INPUT_POLL_ON:
2554 net_nsectimer(&ival, &ts);
2555 ifnet_set_poll_cycle(ifp, &ts);
2556 ifnet_poll(ifp);
2557 ifp->if_rxpoll_onreq++;
2558 if (err != 0) {
2559 ifp->if_rxpoll_onerr++;
2560 }
2561 break;
2562
2563 default:
2564 VERIFY(0);
2565 /* NOTREACHED */
2566 }
2567
2568 /* Release the IO refcnt */
2569 ifnet_decr_iorefcnt(ifp);
2570 }
2571
2572 /*
2573 * NOTE warning %%% attention !!!!
2574 * We should think about putting some thread starvation
2575 * safeguards if we deal with long chains of packets.
2576 */
2577 if (m != NULL) {
2578 dlil_input_packet_list_extended(NULL, m, m_cnt, mode);
2579 }
2580
2581 lck_mtx_lock_spin(&inp->input_lck);
2582 VERIFY(inp->input_waiting & DLIL_INPUT_RUNNING);
2583 if (!(inp->input_waiting & ~(DLIL_INPUT_RUNNING |
2584 DLIL_INPUT_TERMINATE))) {
2585 break;
2586 }
2587 }
2588
2589 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2590
2591 if (__improbable(inp->input_waiting & DLIL_INPUT_TERMINATE)) {
2592 terminate:
2593 lck_mtx_unlock(&inp->input_lck);
2594 dlil_terminate_input_thread(inp);
2595 /* NOTREACHED */
2596 } else {
2597 (void) assert_wait(&inp->input_waiting, THREAD_UNINT);
2598 lck_mtx_unlock(&inp->input_lck);
2599 (void) thread_block_parameter(dlil_rxpoll_input_thread_cont,
2600 inp);
2601 /* NOTREACHED */
2602 }
2603
2604 VERIFY(0); /* we should never get here */
2605 /* NOTREACHED */
2606 __builtin_unreachable();
2607 }
2608
2609 errno_t
2610 dlil_rxpoll_validate_params(struct ifnet_poll_params *p)
2611 {
2612 if (p != NULL) {
2613 if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
2614 (p->packets_lowat != 0 && p->packets_hiwat == 0)) {
2615 return EINVAL;
2616 }
2617 if (p->packets_lowat != 0 && /* hiwat must be non-zero */
2618 p->packets_lowat >= p->packets_hiwat) {
2619 return EINVAL;
2620 }
2621 if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
2622 (p->bytes_lowat != 0 && p->bytes_hiwat == 0)) {
2623 return EINVAL;
2624 }
2625 if (p->bytes_lowat != 0 && /* hiwat must be non-zero */
2626 p->bytes_lowat >= p->bytes_hiwat) {
2627 return EINVAL;
2628 }
2629 if (p->interval_time != 0 &&
2630 p->interval_time < IF_RXPOLL_INTERVALTIME_MIN) {
2631 p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
2632 }
2633 }
2634 return 0;
2635 }
2636
2637 void
2638 dlil_rxpoll_update_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2639 {
2640 u_int64_t sample_holdtime, inbw;
2641
2642 if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
2643 sample_holdtime = 0; /* polling is disabled */
2644 ifp->if_rxpoll_wlowat = ifp->if_rxpoll_plowat =
2645 ifp->if_rxpoll_blowat = 0;
2646 ifp->if_rxpoll_whiwat = ifp->if_rxpoll_phiwat =
2647 ifp->if_rxpoll_bhiwat = (u_int32_t)-1;
2648 ifp->if_rxpoll_plim = 0;
2649 ifp->if_rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
2650 } else {
2651 u_int32_t plowat, phiwat, blowat, bhiwat, plim;
2652 u_int64_t ival;
2653 unsigned int n, i;
2654
2655 for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
2656 if (inbw < rxpoll_tbl[i].speed) {
2657 break;
2658 }
2659 n = i;
2660 }
2661 /* auto-tune if caller didn't specify a value */
2662 plowat = ((p == NULL || p->packets_lowat == 0) ?
2663 rxpoll_tbl[n].plowat : p->packets_lowat);
2664 phiwat = ((p == NULL || p->packets_hiwat == 0) ?
2665 rxpoll_tbl[n].phiwat : p->packets_hiwat);
2666 blowat = ((p == NULL || p->bytes_lowat == 0) ?
2667 rxpoll_tbl[n].blowat : p->bytes_lowat);
2668 bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
2669 rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
2670 plim = ((p == NULL || p->packets_limit == 0) ?
2671 if_rxpoll_max : p->packets_limit);
2672 ival = ((p == NULL || p->interval_time == 0) ?
2673 if_rxpoll_interval_time : p->interval_time);
2674
2675 VERIFY(plowat != 0 && phiwat != 0);
2676 VERIFY(blowat != 0 && bhiwat != 0);
2677 VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
2678
2679 sample_holdtime = if_rxpoll_sample_holdtime;
2680 ifp->if_rxpoll_wlowat = if_sysctl_rxpoll_wlowat;
2681 ifp->if_rxpoll_whiwat = if_sysctl_rxpoll_whiwat;
2682 ifp->if_rxpoll_plowat = plowat;
2683 ifp->if_rxpoll_phiwat = phiwat;
2684 ifp->if_rxpoll_blowat = blowat;
2685 ifp->if_rxpoll_bhiwat = bhiwat;
2686 ifp->if_rxpoll_plim = plim;
2687 ifp->if_rxpoll_ival = ival;
2688 }
2689
2690 net_nsectimer(&if_rxpoll_mode_holdtime, &ifp->if_poll_mode_holdtime);
2691 net_nsectimer(&sample_holdtime, &ifp->if_poll_sample_holdtime);
2692
2693 if (dlil_verbose) {
2694 DLIL_PRINTF("%s: speed %llu bps, sample per %llu nsec, "
2695 "poll interval %llu nsec, pkts per poll %u, "
2696 "pkt limits [%u/%u], wreq limits [%u/%u], "
2697 "bytes limits [%u/%u]\n", if_name(ifp),
2698 inbw, sample_holdtime, ifp->if_rxpoll_ival,
2699 ifp->if_rxpoll_plim, ifp->if_rxpoll_plowat,
2700 ifp->if_rxpoll_phiwat, ifp->if_rxpoll_wlowat,
2701 ifp->if_rxpoll_whiwat, ifp->if_rxpoll_blowat,
2702 ifp->if_rxpoll_bhiwat);
2703 }
2704 }
2705
2706 /*
2707 * Must be called on an attached ifnet (caller is expected to check.)
2708 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2709 */
2710 errno_t
2711 dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
2712 boolean_t locked)
2713 {
2714 errno_t err;
2715 struct dlil_threading_info *inp;
2716
2717 VERIFY(ifp != NULL);
2718 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
2719 return ENXIO;
2720 }
2721 err = dlil_rxpoll_validate_params(p);
2722 if (err != 0) {
2723 return err;
2724 }
2725
2726 if (!locked) {
2727 lck_mtx_lock(&inp->input_lck);
2728 }
2729 LCK_MTX_ASSERT(&inp->input_lck, LCK_MTX_ASSERT_OWNED);
2730 /*
2731 * Normally, we'd reset the parameters to the auto-tuned values
2732 * if the the input thread detects a change in link rate. If the
2733 * driver provides its own parameters right after a link rate
2734 * changes, but before the input thread gets to run, we want to
2735 * make sure to keep the driver's values. Clearing if_poll_update
2736 * will achieve that.
2737 */
2738 if (p != NULL && !locked && ifp->if_poll_update != 0) {
2739 ifp->if_poll_update = 0;
2740 }
2741 dlil_rxpoll_update_params(ifp, p);
2742 if (!locked) {
2743 lck_mtx_unlock(&inp->input_lck);
2744 }
2745 return 0;
2746 }
2747
2748 /*
2749 * Must be called on an attached ifnet (caller is expected to check.)
2750 */
2751 errno_t
2752 dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2753 {
2754 struct dlil_threading_info *inp;
2755
2756 VERIFY(ifp != NULL && p != NULL);
2757 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
2758 return ENXIO;
2759 }
2760
2761 bzero(p, sizeof(*p));
2762
2763 lck_mtx_lock(&inp->input_lck);
2764 p->packets_limit = ifp->if_rxpoll_plim;
2765 p->packets_lowat = ifp->if_rxpoll_plowat;
2766 p->packets_hiwat = ifp->if_rxpoll_phiwat;
2767 p->bytes_lowat = ifp->if_rxpoll_blowat;
2768 p->bytes_hiwat = ifp->if_rxpoll_bhiwat;
2769 p->interval_time = ifp->if_rxpoll_ival;
2770 lck_mtx_unlock(&inp->input_lck);
2771
2772 return 0;
2773 }
2774
2775 errno_t
2776 ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
2777 const struct ifnet_stat_increment_param *s)
2778 {
2779 return ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE);
2780 }
2781
2782 errno_t
2783 ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
2784 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2785 {
2786 return ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE);
2787 }
2788
2789 errno_t
2790 ifnet_input_poll(struct ifnet *ifp, struct mbuf *m_head,
2791 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2792 {
2793 return ifnet_input_common(ifp, m_head, m_tail, s,
2794 (m_head != NULL), TRUE);
2795 }
2796
2797 static errno_t
2798 ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2799 const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
2800 {
2801 dlil_input_func input_func;
2802 struct ifnet_stat_increment_param _s;
2803 u_int32_t m_cnt = 0, m_size = 0;
2804 struct mbuf *last;
2805 errno_t err = 0;
2806
2807 if ((m_head == NULL && !poll) || (s == NULL && ext)) {
2808 if (m_head != NULL) {
2809 mbuf_freem_list(m_head);
2810 }
2811 return EINVAL;
2812 }
2813
2814 VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
2815 VERIFY(m_tail == NULL || ext);
2816 VERIFY(s != NULL || !ext);
2817
2818 /*
2819 * Drop the packet(s) if the parameters are invalid, or if the
2820 * interface is no longer attached; else hold an IO refcnt to
2821 * prevent it from being detached (will be released below.)
2822 */
2823 if (ifp == NULL || (ifp != lo_ifp && !ifnet_datamov_begin(ifp))) {
2824 if (m_head != NULL) {
2825 mbuf_freem_list(m_head);
2826 }
2827 return EINVAL;
2828 }
2829
2830 input_func = ifp->if_input_dlil;
2831 VERIFY(input_func != NULL);
2832
2833 if (m_tail == NULL) {
2834 last = m_head;
2835 while (m_head != NULL) {
2836 #if IFNET_INPUT_SANITY_CHK
2837 if (dlil_input_sanity_check != 0) {
2838 DLIL_INPUT_CHECK(last, ifp);
2839 }
2840 #endif /* IFNET_INPUT_SANITY_CHK */
2841 m_cnt++;
2842 m_size += m_length(last);
2843 if (mbuf_nextpkt(last) == NULL) {
2844 break;
2845 }
2846 last = mbuf_nextpkt(last);
2847 }
2848 m_tail = last;
2849 } else {
2850 #if IFNET_INPUT_SANITY_CHK
2851 if (dlil_input_sanity_check != 0) {
2852 last = m_head;
2853 while (1) {
2854 DLIL_INPUT_CHECK(last, ifp);
2855 m_cnt++;
2856 m_size += m_length(last);
2857 if (mbuf_nextpkt(last) == NULL) {
2858 break;
2859 }
2860 last = mbuf_nextpkt(last);
2861 }
2862 } else {
2863 m_cnt = s->packets_in;
2864 m_size = s->bytes_in;
2865 last = m_tail;
2866 }
2867 #else
2868 m_cnt = s->packets_in;
2869 m_size = s->bytes_in;
2870 last = m_tail;
2871 #endif /* IFNET_INPUT_SANITY_CHK */
2872 }
2873
2874 if (last != m_tail) {
2875 panic_plain("%s: invalid input packet chain for %s, "
2876 "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
2877 m_tail, last);
2878 }
2879
2880 /*
2881 * Assert packet count only for the extended variant, for backwards
2882 * compatibility, since this came directly from the device driver.
2883 * Relax this assertion for input bytes, as the driver may have
2884 * included the link-layer headers in the computation; hence
2885 * m_size is just an approximation.
2886 */
2887 if (ext && s->packets_in != m_cnt) {
2888 panic_plain("%s: input packet count mismatch for %s, "
2889 "%d instead of %d\n", __func__, if_name(ifp),
2890 s->packets_in, m_cnt);
2891 }
2892
2893 if (s == NULL) {
2894 bzero(&_s, sizeof(_s));
2895 s = &_s;
2896 } else {
2897 _s = *s;
2898 }
2899 _s.packets_in = m_cnt;
2900 _s.bytes_in = m_size;
2901
2902 err = (*input_func)(ifp, m_head, m_tail, s, poll, current_thread());
2903
2904 if (ifp != lo_ifp) {
2905 /* Release the IO refcnt */
2906 ifnet_datamov_end(ifp);
2907 }
2908
2909 return err;
2910 }
2911
2912
2913 errno_t
2914 dlil_output_handler(struct ifnet *ifp, struct mbuf *m)
2915 {
2916 return ifp->if_output(ifp, m);
2917 }
2918
2919 errno_t
2920 dlil_input_handler(struct ifnet *ifp, struct mbuf *m_head,
2921 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
2922 boolean_t poll, struct thread *tp)
2923 {
2924 struct dlil_threading_info *inp;
2925 u_int32_t m_cnt = s->packets_in;
2926 u_int32_t m_size = s->bytes_in;
2927 boolean_t notify = FALSE;
2928
2929 if ((inp = ifp->if_inp) == NULL) {
2930 inp = dlil_main_input_thread;
2931 }
2932
2933 /*
2934 * If there is a matching DLIL input thread associated with an
2935 * affinity set, associate this thread with the same set. We
2936 * will only do this once.
2937 */
2938 lck_mtx_lock_spin(&inp->input_lck);
2939 if (inp != dlil_main_input_thread && inp->net_affinity && tp != NULL &&
2940 ((!poll && inp->wloop_thr == THREAD_NULL) ||
2941 (poll && inp->poll_thr == THREAD_NULL))) {
2942 u_int32_t tag = inp->tag;
2943
2944 if (poll) {
2945 VERIFY(inp->poll_thr == THREAD_NULL);
2946 inp->poll_thr = tp;
2947 } else {
2948 VERIFY(inp->wloop_thr == THREAD_NULL);
2949 inp->wloop_thr = tp;
2950 }
2951 lck_mtx_unlock(&inp->input_lck);
2952
2953 /* Associate the current thread with the new affinity tag */
2954 (void) dlil_affinity_set(tp, tag);
2955
2956 /*
2957 * Take a reference on the current thread; during detach,
2958 * we will need to refer to it in order to tear down its
2959 * affinity.
2960 */
2961 thread_reference(tp);
2962 lck_mtx_lock_spin(&inp->input_lck);
2963 }
2964
2965 VERIFY(m_head != NULL || (m_tail == NULL && m_cnt == 0));
2966
2967 /*
2968 * Because of loopbacked multicast we cannot stuff the ifp in
2969 * the rcvif of the packet header: loopback (lo0) packets use a
2970 * dedicated list so that we can later associate them with lo_ifp
2971 * on their way up the stack. Packets for other interfaces without
2972 * dedicated input threads go to the regular list.
2973 */
2974 if (m_head != NULL) {
2975 classq_pkt_t head, tail;
2976 CLASSQ_PKT_INIT_MBUF(&head, m_head);
2977 CLASSQ_PKT_INIT_MBUF(&tail, m_tail);
2978 if (inp == dlil_main_input_thread && ifp == lo_ifp) {
2979 struct dlil_main_threading_info *inpm =
2980 (struct dlil_main_threading_info *)inp;
2981 _addq_multi(&inpm->lo_rcvq_pkts, &head, &tail,
2982 m_cnt, m_size);
2983 } else {
2984 _addq_multi(&inp->rcvq_pkts, &head, &tail,
2985 m_cnt, m_size);
2986 }
2987 }
2988
2989 #if IFNET_INPUT_SANITY_CHK
2990 if (dlil_input_sanity_check != 0) {
2991 u_int32_t count;
2992 struct mbuf *m0;
2993
2994 for (m0 = m_head, count = 0; m0; m0 = mbuf_nextpkt(m0)) {
2995 count++;
2996 }
2997
2998 if (count != m_cnt) {
2999 panic_plain("%s: invalid packet count %d "
3000 "(expected %d)\n", if_name(ifp),
3001 count, m_cnt);
3002 /* NOTREACHED */
3003 }
3004
3005 inp->input_mbuf_cnt += m_cnt;
3006 }
3007 #endif /* IFNET_INPUT_SANITY_CHK */
3008
3009 dlil_input_stats_add(s, inp, ifp, poll);
3010 /*
3011 * If we're using the main input thread, synchronize the
3012 * stats now since we have the interface context. All
3013 * other cases involving dedicated input threads will
3014 * have their stats synchronized there.
3015 */
3016 if (inp == dlil_main_input_thread) {
3017 notify = dlil_input_stats_sync(ifp, inp);
3018 }
3019
3020 inp->input_waiting |= DLIL_INPUT_WAITING;
3021 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
3022 inp->wtot++;
3023 wakeup_one((caddr_t)&inp->input_waiting);
3024 }
3025 lck_mtx_unlock(&inp->input_lck);
3026
3027 if (notify) {
3028 ifnet_notify_data_threshold(ifp);
3029 }
3030
3031 return 0;
3032 }
3033
3034
3035 static void
3036 ifnet_start_common(struct ifnet *ifp, boolean_t resetfc)
3037 {
3038 if (!(ifp->if_eflags & IFEF_TXSTART)) {
3039 return;
3040 }
3041 /*
3042 * If the starter thread is inactive, signal it to do work,
3043 * unless the interface is being flow controlled from below,
3044 * e.g. a virtual interface being flow controlled by a real
3045 * network interface beneath it, or it's been disabled via
3046 * a call to ifnet_disable_output().
3047 */
3048 lck_mtx_lock_spin(&ifp->if_start_lock);
3049 if (resetfc) {
3050 ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
3051 } else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
3052 lck_mtx_unlock(&ifp->if_start_lock);
3053 return;
3054 }
3055 ifp->if_start_req++;
3056 if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
3057 (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
3058 IFCQ_LEN(&ifp->if_snd) >= ifp->if_start_delay_qlen ||
3059 ifp->if_start_delayed == 0)) {
3060 (void) thread_wakeup_thread((caddr_t)&ifp->if_start_thread,
3061 ifp->if_start_thread);
3062 }
3063 lck_mtx_unlock(&ifp->if_start_lock);
3064 }
3065
3066 void
3067 ifnet_start(struct ifnet *ifp)
3068 {
3069 ifnet_start_common(ifp, FALSE);
3070 }
3071
3072 __attribute__((noreturn))
3073 static void
3074 ifnet_start_thread_func(void *v, wait_result_t w)
3075 {
3076 #pragma unused(w)
3077 struct ifnet *ifp = v;
3078 char thread_name[MAXTHREADNAMESIZE];
3079
3080 /* Construct the name for this thread, and then apply it. */
3081 bzero(thread_name, sizeof(thread_name));
3082 (void) snprintf(thread_name, sizeof(thread_name),
3083 "ifnet_start_%s", ifp->if_xname);
3084 ASSERT(ifp->if_start_thread == current_thread());
3085 thread_set_thread_name(current_thread(), thread_name);
3086
3087 /*
3088 * Treat the dedicated starter thread for lo0 as equivalent to
3089 * the driver workloop thread; if net_affinity is enabled for
3090 * the main input thread, associate this starter thread to it
3091 * by binding them with the same affinity tag. This is done
3092 * only once (as we only have one lo_ifp which never goes away.)
3093 */
3094 if (ifp == lo_ifp) {
3095 struct dlil_threading_info *inp = dlil_main_input_thread;
3096 struct thread *tp = current_thread();
3097
3098 lck_mtx_lock(&inp->input_lck);
3099 if (inp->net_affinity) {
3100 u_int32_t tag = inp->tag;
3101
3102 VERIFY(inp->wloop_thr == THREAD_NULL);
3103 VERIFY(inp->poll_thr == THREAD_NULL);
3104 inp->wloop_thr = tp;
3105 lck_mtx_unlock(&inp->input_lck);
3106
3107 /* Associate this thread with the affinity tag */
3108 (void) dlil_affinity_set(tp, tag);
3109 } else {
3110 lck_mtx_unlock(&inp->input_lck);
3111 }
3112 }
3113 ifnet_decr_pending_thread_count(ifp);
3114
3115 lck_mtx_lock(&ifp->if_start_lock);
3116 VERIFY(!ifp->if_start_active);
3117 (void) assert_wait(&ifp->if_start_thread, THREAD_UNINT);
3118 lck_mtx_unlock(&ifp->if_start_lock);
3119 (void) thread_block_parameter(ifnet_start_thread_cont, ifp);
3120 /* NOTREACHED */
3121 __builtin_unreachable();
3122 }
3123
3124 __attribute__((noreturn))
3125 static void
3126 ifnet_start_thread_cont(void *v, wait_result_t wres)
3127 {
3128 struct ifnet *ifp = v;
3129 struct ifclassq *ifq = &ifp->if_snd;
3130
3131 lck_mtx_lock(&ifp->if_start_lock);
3132 if (__improbable(wres == THREAD_INTERRUPTED ||
3133 ifp->if_start_thread == THREAD_NULL)) {
3134 goto terminate;
3135 }
3136
3137 ifp->if_start_active = 1;
3138
3139 /*
3140 * Keep on servicing until no more request.
3141 */
3142 for (;;) {
3143 u_int32_t req = ifp->if_start_req;
3144 if (!IFCQ_IS_EMPTY(ifq) &&
3145 (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
3146 ifp->if_start_delayed == 0 &&
3147 IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
3148 (ifp->if_eflags & IFEF_DELAY_START)) {
3149 ifp->if_start_delayed = 1;
3150 ifnet_start_delayed++;
3151 break;
3152 } else {
3153 ifp->if_start_delayed = 0;
3154 }
3155 lck_mtx_unlock(&ifp->if_start_lock);
3156
3157 /*
3158 * If no longer attached, don't call start because ifp
3159 * is being destroyed; else hold an IO refcnt to
3160 * prevent the interface from being detached (will be
3161 * released below.)
3162 */
3163 if (!ifnet_datamov_begin(ifp)) {
3164 lck_mtx_lock_spin(&ifp->if_start_lock);
3165 break;
3166 }
3167
3168 /* invoke the driver's start routine */
3169 ((*ifp->if_start)(ifp));
3170
3171 /*
3172 * Release the io ref count taken above.
3173 */
3174 ifnet_datamov_end(ifp);
3175
3176 lck_mtx_lock_spin(&ifp->if_start_lock);
3177
3178 /*
3179 * If there's no pending request or if the
3180 * interface has been disabled, we're done.
3181 */
3182 if (req == ifp->if_start_req ||
3183 (ifp->if_start_flags & IFSF_FLOW_CONTROLLED)) {
3184 break;
3185 }
3186 }
3187
3188 ifp->if_start_req = 0;
3189 ifp->if_start_active = 0;
3190
3191
3192 if (__probable(ifp->if_start_thread != THREAD_NULL)) {
3193 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
3194 struct timespec delay_start_ts;
3195 struct timespec *ts;
3196
3197 /*
3198 * Wakeup N ns from now if rate-controlled by TBR, and if
3199 * there are still packets in the send queue which haven't
3200 * been dequeued so far; else sleep indefinitely (ts = NULL)
3201 * until ifnet_start() is called again.
3202 */
3203 ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
3204 &ifp->if_start_cycle : NULL);
3205
3206 if (ts == NULL && ifp->if_start_delayed == 1) {
3207 delay_start_ts.tv_sec = 0;
3208 delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
3209 ts = &delay_start_ts;
3210 }
3211
3212 if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0) {
3213 ts = NULL;
3214 }
3215
3216 if (__improbable(ts != NULL)) {
3217 clock_interval_to_deadline((ts->tv_nsec +
3218 (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
3219 }
3220
3221 (void) assert_wait_deadline(&ifp->if_start_thread,
3222 THREAD_UNINT, deadline);
3223 lck_mtx_unlock(&ifp->if_start_lock);
3224 (void) thread_block_parameter(ifnet_start_thread_cont, ifp);
3225 /* NOTREACHED */
3226 } else {
3227 terminate:
3228 /* interface is detached? */
3229 ifnet_set_start_cycle(ifp, NULL);
3230 lck_mtx_unlock(&ifp->if_start_lock);
3231 ifnet_purge(ifp);
3232
3233 if (dlil_verbose) {
3234 DLIL_PRINTF("%s: starter thread terminated\n",
3235 if_name(ifp));
3236 }
3237
3238 /* for the extra refcnt from kernel_thread_start() */
3239 thread_deallocate(current_thread());
3240 /* this is the end */
3241 thread_terminate(current_thread());
3242 /* NOTREACHED */
3243 }
3244
3245 /* must never get here */
3246 VERIFY(0);
3247 /* NOTREACHED */
3248 __builtin_unreachable();
3249 }
3250
3251 void
3252 ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
3253 {
3254 if (ts == NULL) {
3255 bzero(&ifp->if_start_cycle, sizeof(ifp->if_start_cycle));
3256 } else {
3257 *(&ifp->if_start_cycle) = *ts;
3258 }
3259
3260 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
3261 DLIL_PRINTF("%s: restart interval set to %lu nsec\n",
3262 if_name(ifp), ts->tv_nsec);
3263 }
3264 }
3265
3266 void
3267 ifnet_poll(struct ifnet *ifp)
3268 {
3269 /*
3270 * If the poller thread is inactive, signal it to do work.
3271 */
3272 lck_mtx_lock_spin(&ifp->if_poll_lock);
3273 ifp->if_poll_req++;
3274 if (!(ifp->if_poll_flags & IF_POLLF_RUNNING) &&
3275 ifp->if_poll_thread != THREAD_NULL) {
3276 wakeup_one((caddr_t)&ifp->if_poll_thread);
3277 }
3278 lck_mtx_unlock(&ifp->if_poll_lock);
3279 }
3280
3281 __attribute__((noreturn))
3282 static void
3283 ifnet_poll_thread_func(void *v, wait_result_t w)
3284 {
3285 #pragma unused(w)
3286 char thread_name[MAXTHREADNAMESIZE];
3287 struct ifnet *ifp = v;
3288
3289 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
3290 VERIFY(current_thread() == ifp->if_poll_thread);
3291
3292 /* construct the name for this thread, and then apply it */
3293 bzero(thread_name, sizeof(thread_name));
3294 (void) snprintf(thread_name, sizeof(thread_name),
3295 "ifnet_poller_%s", ifp->if_xname);
3296 thread_set_thread_name(ifp->if_poll_thread, thread_name);
3297 ifnet_decr_pending_thread_count(ifp);
3298
3299 lck_mtx_lock(&ifp->if_poll_lock);
3300 (void) assert_wait(&ifp->if_poll_thread, THREAD_UNINT);
3301 lck_mtx_unlock(&ifp->if_poll_lock);
3302 (void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
3303 /* NOTREACHED */
3304 __builtin_unreachable();
3305 }
3306
3307 __attribute__((noreturn))
3308 static void
3309 ifnet_poll_thread_cont(void *v, wait_result_t wres)
3310 {
3311 struct dlil_threading_info *inp;
3312 struct ifnet *ifp = v;
3313 struct ifnet_stat_increment_param s;
3314 struct timespec start_time;
3315
3316 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
3317
3318 bzero(&s, sizeof(s));
3319 net_timerclear(&start_time);
3320
3321 lck_mtx_lock_spin(&ifp->if_poll_lock);
3322 if (__improbable(wres == THREAD_INTERRUPTED ||
3323 ifp->if_poll_thread == THREAD_NULL)) {
3324 goto terminate;
3325 }
3326
3327 inp = ifp->if_inp;
3328 VERIFY(inp != NULL);
3329
3330 ifp->if_poll_flags |= IF_POLLF_RUNNING;
3331
3332 /*
3333 * Keep on servicing until no more request.
3334 */
3335 for (;;) {
3336 struct mbuf *m_head, *m_tail;
3337 u_int32_t m_lim, m_cnt, m_totlen;
3338 u_int16_t req = ifp->if_poll_req;
3339
3340 m_lim = (ifp->if_rxpoll_plim != 0) ? ifp->if_rxpoll_plim :
3341 MAX((qlimit(&inp->rcvq_pkts)), (ifp->if_rxpoll_phiwat << 2));
3342 lck_mtx_unlock(&ifp->if_poll_lock);
3343
3344 /*
3345 * If no longer attached, there's nothing to do;
3346 * else hold an IO refcnt to prevent the interface
3347 * from being detached (will be released below.)
3348 */
3349 if (!ifnet_is_attached(ifp, 1)) {
3350 lck_mtx_lock_spin(&ifp->if_poll_lock);
3351 break;
3352 }
3353
3354 if (dlil_verbose > 1) {
3355 DLIL_PRINTF("%s: polling up to %d pkts, "
3356 "pkts avg %d max %d, wreq avg %d, "
3357 "bytes avg %d\n",
3358 if_name(ifp), m_lim,
3359 ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
3360 ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
3361 }
3362
3363 /* invoke the driver's input poll routine */
3364 ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
3365 &m_cnt, &m_totlen));
3366
3367 if (m_head != NULL) {
3368 VERIFY(m_tail != NULL && m_cnt > 0);
3369
3370 if (dlil_verbose > 1) {
3371 DLIL_PRINTF("%s: polled %d pkts, "
3372 "pkts avg %d max %d, wreq avg %d, "
3373 "bytes avg %d\n",
3374 if_name(ifp), m_cnt,
3375 ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
3376 ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
3377 }
3378
3379 /* stats are required for extended variant */
3380 s.packets_in = m_cnt;
3381 s.bytes_in = m_totlen;
3382
3383 (void) ifnet_input_common(ifp, m_head, m_tail,
3384 &s, TRUE, TRUE);
3385 } else {
3386 if (dlil_verbose > 1) {
3387 DLIL_PRINTF("%s: no packets, "
3388 "pkts avg %d max %d, wreq avg %d, "
3389 "bytes avg %d\n",
3390 if_name(ifp), ifp->if_rxpoll_pavg,
3391 ifp->if_rxpoll_pmax, ifp->if_rxpoll_wavg,
3392 ifp->if_rxpoll_bavg);
3393 }
3394
3395 (void) ifnet_input_common(ifp, NULL, NULL,
3396 NULL, FALSE, TRUE);
3397 }
3398
3399 /* Release the io ref count */
3400 ifnet_decr_iorefcnt(ifp);
3401
3402 lck_mtx_lock_spin(&ifp->if_poll_lock);
3403
3404 /* if there's no pending request, we're done */
3405 if (req == ifp->if_poll_req ||
3406 ifp->if_poll_thread == THREAD_NULL) {
3407 break;
3408 }
3409 }
3410
3411 ifp->if_poll_req = 0;
3412 ifp->if_poll_flags &= ~IF_POLLF_RUNNING;
3413
3414 if (ifp->if_poll_thread != THREAD_NULL) {
3415 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
3416 struct timespec *ts;
3417
3418 /*
3419 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
3420 * until ifnet_poll() is called again.
3421 */
3422 ts = &ifp->if_poll_cycle;
3423 if (ts->tv_sec == 0 && ts->tv_nsec == 0) {
3424 ts = NULL;
3425 }
3426
3427 if (ts != NULL) {
3428 clock_interval_to_deadline((ts->tv_nsec +
3429 (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
3430 }
3431
3432 (void) assert_wait_deadline(&ifp->if_poll_thread,
3433 THREAD_UNINT, deadline);
3434 lck_mtx_unlock(&ifp->if_poll_lock);
3435 (void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
3436 /* NOTREACHED */
3437 } else {
3438 terminate:
3439 /* interface is detached (maybe while asleep)? */
3440 ifnet_set_poll_cycle(ifp, NULL);
3441 lck_mtx_unlock(&ifp->if_poll_lock);
3442
3443 if (dlil_verbose) {
3444 DLIL_PRINTF("%s: poller thread terminated\n",
3445 if_name(ifp));
3446 }
3447
3448 /* for the extra refcnt from kernel_thread_start() */
3449 thread_deallocate(current_thread());
3450 /* this is the end */
3451 thread_terminate(current_thread());
3452 /* NOTREACHED */
3453 }
3454
3455 /* must never get here */
3456 VERIFY(0);
3457 /* NOTREACHED */
3458 __builtin_unreachable();
3459 }
3460
3461 void
3462 ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
3463 {
3464 if (ts == NULL) {
3465 bzero(&ifp->if_poll_cycle, sizeof(ifp->if_poll_cycle));
3466 } else {
3467 *(&ifp->if_poll_cycle) = *ts;
3468 }
3469
3470 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
3471 DLIL_PRINTF("%s: poll interval set to %lu nsec\n",
3472 if_name(ifp), ts->tv_nsec);
3473 }
3474 }
3475
3476 void
3477 ifnet_purge(struct ifnet *ifp)
3478 {
3479 if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART)) {
3480 if_qflush(ifp, 0);
3481 }
3482 }
3483
3484 void
3485 ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
3486 {
3487 IFCQ_LOCK_ASSERT_HELD(ifq);
3488
3489 if (!(IFCQ_IS_READY(ifq))) {
3490 return;
3491 }
3492
3493 if (IFCQ_TBR_IS_ENABLED(ifq)) {
3494 struct tb_profile tb = {
3495 .rate = ifq->ifcq_tbr.tbr_rate_raw,
3496 .percent = ifq->ifcq_tbr.tbr_percent, .depth = 0
3497 };
3498 (void) ifclassq_tbr_set(ifq, &tb, FALSE);
3499 }
3500
3501 ifclassq_update(ifq, ev);
3502 }
3503
3504 void
3505 ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
3506 {
3507 switch (ev) {
3508 case CLASSQ_EV_LINK_BANDWIDTH:
3509 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
3510 ifp->if_poll_update++;
3511 }
3512 break;
3513
3514 default:
3515 break;
3516 }
3517 }
3518
3519 errno_t
3520 ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
3521 {
3522 struct ifclassq *ifq;
3523 u_int32_t omodel;
3524 errno_t err;
3525
3526 if (ifp == NULL || model >= IFNET_SCHED_MODEL_MAX) {
3527 return EINVAL;
3528 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3529 return ENXIO;
3530 }
3531
3532 ifq = &ifp->if_snd;
3533 IFCQ_LOCK(ifq);
3534 omodel = ifp->if_output_sched_model;
3535 ifp->if_output_sched_model = model;
3536 if ((err = ifclassq_pktsched_setup(ifq)) != 0) {
3537 ifp->if_output_sched_model = omodel;
3538 }
3539 IFCQ_UNLOCK(ifq);
3540
3541 return err;
3542 }
3543
3544 errno_t
3545 ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3546 {
3547 if (ifp == NULL) {
3548 return EINVAL;
3549 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3550 return ENXIO;
3551 }
3552
3553 ifclassq_set_maxlen(&ifp->if_snd, maxqlen);
3554
3555 return 0;
3556 }
3557
3558 errno_t
3559 ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3560 {
3561 if (ifp == NULL || maxqlen == NULL) {
3562 return EINVAL;
3563 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3564 return ENXIO;
3565 }
3566
3567 *maxqlen = ifclassq_get_maxlen(&ifp->if_snd);
3568
3569 return 0;
3570 }
3571
3572 errno_t
3573 ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
3574 {
3575 errno_t err;
3576
3577 if (ifp == NULL || pkts == NULL) {
3578 err = EINVAL;
3579 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3580 err = ENXIO;
3581 } else {
3582 err = ifclassq_get_len(&ifp->if_snd, MBUF_SC_UNSPEC,
3583 pkts, NULL);
3584 }
3585
3586 return err;
3587 }
3588
3589 errno_t
3590 ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
3591 u_int32_t *pkts, u_int32_t *bytes)
3592 {
3593 errno_t err;
3594
3595 if (ifp == NULL || !MBUF_VALID_SC(sc) ||
3596 (pkts == NULL && bytes == NULL)) {
3597 err = EINVAL;
3598 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3599 err = ENXIO;
3600 } else {
3601 err = ifclassq_get_len(&ifp->if_snd, sc, pkts, bytes);
3602 }
3603
3604 return err;
3605 }
3606
3607 errno_t
3608 ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3609 {
3610 struct dlil_threading_info *inp;
3611
3612 if (ifp == NULL) {
3613 return EINVAL;
3614 } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
3615 return ENXIO;
3616 }
3617
3618 if (maxqlen == 0) {
3619 maxqlen = if_rcvq_maxlen;
3620 } else if (maxqlen < IF_RCVQ_MINLEN) {
3621 maxqlen = IF_RCVQ_MINLEN;
3622 }
3623
3624 inp = ifp->if_inp;
3625 lck_mtx_lock(&inp->input_lck);
3626 qlimit(&inp->rcvq_pkts) = maxqlen;
3627 lck_mtx_unlock(&inp->input_lck);
3628
3629 return 0;
3630 }
3631
3632 errno_t
3633 ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3634 {
3635 struct dlil_threading_info *inp;
3636
3637 if (ifp == NULL || maxqlen == NULL) {
3638 return EINVAL;
3639 } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
3640 return ENXIO;
3641 }
3642
3643 inp = ifp->if_inp;
3644 lck_mtx_lock(&inp->input_lck);
3645 *maxqlen = qlimit(&inp->rcvq_pkts);
3646 lck_mtx_unlock(&inp->input_lck);
3647 return 0;
3648 }
3649
3650 void
3651 ifnet_enqueue_multi_setup(struct ifnet *ifp, uint16_t delay_qlen,
3652 uint16_t delay_timeout)
3653 {
3654 if (delay_qlen > 0 && delay_timeout > 0) {
3655 ifp->if_eflags |= IFEF_ENQUEUE_MULTI;
3656 ifp->if_start_delay_qlen = min(100, delay_qlen);
3657 ifp->if_start_delay_timeout = min(20000, delay_timeout);
3658 /* convert timeout to nanoseconds */
3659 ifp->if_start_delay_timeout *= 1000;
3660 kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
3661 ifp->if_xname, (uint32_t)delay_qlen,
3662 (uint32_t)delay_timeout);
3663 } else {
3664 ifp->if_eflags &= ~IFEF_ENQUEUE_MULTI;
3665 }
3666 }
3667
3668 /*
3669 * This function clears the DSCP bits in the IPV4/V6 header pointed to by buf.
3670 * While it's ok for buf to be not 32 bit aligned, the caller must ensure that
3671 * buf holds the full header.
3672 */
3673 static __attribute__((noinline)) void
3674 ifnet_mcast_clear_dscp(uint8_t *buf, uint8_t ip_ver)
3675 {
3676 struct ip *ip;
3677 struct ip6_hdr *ip6;
3678 uint8_t lbuf[64] __attribute__((aligned(8)));
3679 uint8_t *p = buf;
3680
3681 if (ip_ver == IPVERSION) {
3682 uint8_t old_tos;
3683 uint32_t sum;
3684
3685 if (__improbable(!IP_HDR_ALIGNED_P(p))) {
3686 DTRACE_IP1(not__aligned__v4, uint8_t *, buf);
3687 bcopy(buf, lbuf, sizeof(struct ip));
3688 p = lbuf;
3689 }
3690 ip = (struct ip *)(void *)p;
3691 if (__probable((ip->ip_tos & ~IPTOS_ECN_MASK) == 0)) {
3692 return;
3693 }
3694
3695 DTRACE_IP1(clear__v4, struct ip *, ip);
3696 old_tos = ip->ip_tos;
3697 ip->ip_tos &= IPTOS_ECN_MASK;
3698 sum = ip->ip_sum + htons(old_tos) - htons(ip->ip_tos);
3699 sum = (sum >> 16) + (sum & 0xffff);
3700 ip->ip_sum = (uint16_t)(sum & 0xffff);
3701
3702 if (__improbable(p == lbuf)) {
3703 bcopy(lbuf, buf, sizeof(struct ip));
3704 }
3705 } else {
3706 uint32_t flow;
3707 ASSERT(ip_ver == IPV6_VERSION);
3708
3709 if (__improbable(!IP_HDR_ALIGNED_P(p))) {
3710 DTRACE_IP1(not__aligned__v6, uint8_t *, buf);
3711 bcopy(buf, lbuf, sizeof(struct ip6_hdr));
3712 p = lbuf;
3713 }
3714 ip6 = (struct ip6_hdr *)(void *)p;
3715 flow = ntohl(ip6->ip6_flow);
3716 if (__probable((flow & IP6FLOW_DSCP_MASK) == 0)) {
3717 return;
3718 }
3719
3720 DTRACE_IP1(clear__v6, struct ip6_hdr *, ip6);
3721 ip6->ip6_flow = htonl(flow & ~IP6FLOW_DSCP_MASK);
3722
3723 if (__improbable(p == lbuf)) {
3724 bcopy(lbuf, buf, sizeof(struct ip6_hdr));
3725 }
3726 }
3727 }
3728
3729 static inline errno_t
3730 ifnet_enqueue_ifclassq(struct ifnet *ifp, classq_pkt_t *p, boolean_t flush,
3731 boolean_t *pdrop)
3732 {
3733 volatile uint64_t *fg_ts = NULL;
3734 volatile uint64_t *rt_ts = NULL;
3735 struct timespec now;
3736 u_int64_t now_nsec = 0;
3737 int error = 0;
3738 uint8_t *mcast_buf = NULL;
3739 uint8_t ip_ver;
3740
3741 ASSERT(ifp->if_eflags & IFEF_TXSTART);
3742
3743 /*
3744 * If packet already carries a timestamp, either from dlil_output()
3745 * or from flowswitch, use it here. Otherwise, record timestamp.
3746 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
3747 * the timestamp value is used internally there.
3748 */
3749 switch (p->cp_ptype) {
3750 case QP_MBUF:
3751 ASSERT(p->cp_mbuf->m_flags & M_PKTHDR);
3752 ASSERT(p->cp_mbuf->m_nextpkt == NULL);
3753
3754 if (!(p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_TS_VALID) ||
3755 p->cp_mbuf->m_pkthdr.pkt_timestamp == 0) {
3756 nanouptime(&now);
3757 net_timernsec(&now, &now_nsec);
3758 p->cp_mbuf->m_pkthdr.pkt_timestamp = now_nsec;
3759 }
3760 p->cp_mbuf->m_pkthdr.pkt_flags &= ~PKTF_TS_VALID;
3761 /*
3762 * If the packet service class is not background,
3763 * update the timestamp to indicate recent activity
3764 * on a foreground socket.
3765 */
3766 if ((p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
3767 p->cp_mbuf->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3768 if (!(p->cp_mbuf->m_pkthdr.pkt_flags &
3769 PKTF_SO_BACKGROUND)) {
3770 ifp->if_fg_sendts = _net_uptime;
3771 if (fg_ts != NULL) {
3772 *fg_ts = _net_uptime;
3773 }
3774 }
3775 if (p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_SO_REALTIME) {
3776 ifp->if_rt_sendts = _net_uptime;
3777 if (rt_ts != NULL) {
3778 *rt_ts = _net_uptime;
3779 }
3780 }
3781 }
3782
3783 /*
3784 * Some Wi-Fi AP implementations do not correctly handle
3785 * multicast IP packets with DSCP bits set (radr://9331522).
3786 * As a workaround we clear the DSCP bits and set the service
3787 * class to BE.
3788 */
3789 if ((p->cp_mbuf->m_flags & M_MCAST) != 0 &&
3790 IFNET_IS_WIFI_INFRA(ifp)) {
3791 size_t len = mbuf_len(p->cp_mbuf), hlen;
3792 struct ether_header *eh;
3793 boolean_t pullup = FALSE;
3794 uint16_t etype;
3795
3796 if (__improbable(len < sizeof(struct ether_header))) {
3797 DTRACE_IP1(small__ether, size_t, len);
3798 if ((p->cp_mbuf = m_pullup(p->cp_mbuf,
3799 sizeof(struct ether_header))) == NULL) {
3800 return ENOMEM;
3801 }
3802 }
3803 eh = (struct ether_header *)mbuf_data(p->cp_mbuf);
3804 etype = ntohs(eh->ether_type);
3805 if (etype == ETHERTYPE_IP) {
3806 hlen = sizeof(struct ether_header) +
3807 sizeof(struct ip);
3808 if (len < hlen) {
3809 DTRACE_IP1(small__v4, size_t, len);
3810 pullup = TRUE;
3811 }
3812 ip_ver = IPVERSION;
3813 } else if (etype == ETHERTYPE_IPV6) {
3814 hlen = sizeof(struct ether_header) +
3815 sizeof(struct ip6_hdr);
3816 if (len < hlen) {
3817 DTRACE_IP1(small__v6, size_t, len);
3818 pullup = TRUE;
3819 }
3820 ip_ver = IPV6_VERSION;
3821 } else {
3822 DTRACE_IP1(invalid__etype, uint16_t, etype);
3823 break;
3824 }
3825 if (pullup) {
3826 if ((p->cp_mbuf = m_pullup(p->cp_mbuf, hlen)) ==
3827 NULL) {
3828 return ENOMEM;
3829 }
3830
3831 eh = (struct ether_header *)mbuf_data(
3832 p->cp_mbuf);
3833 }
3834 mbuf_set_service_class(p->cp_mbuf, MBUF_SC_BE);
3835 mcast_buf = (uint8_t *)(eh + 1);
3836 /*
3837 * ifnet_mcast_clear_dscp() will finish the work below.
3838 * Note that the pullups above ensure that mcast_buf
3839 * points to a full IP header.
3840 */
3841 }
3842 break;
3843
3844
3845 default:
3846 VERIFY(0);
3847 /* NOTREACHED */
3848 __builtin_unreachable();
3849 }
3850
3851 if (mcast_buf != NULL) {
3852 ifnet_mcast_clear_dscp(mcast_buf, ip_ver);
3853 }
3854
3855 if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
3856 if (now_nsec == 0) {
3857 nanouptime(&now);
3858 net_timernsec(&now, &now_nsec);
3859 }
3860 /*
3861 * If the driver chose to delay start callback for
3862 * coalescing multiple packets, Then use the following
3863 * heuristics to make sure that start callback will
3864 * be delayed only when bulk data transfer is detected.
3865 * 1. number of packets enqueued in (delay_win * 2) is
3866 * greater than or equal to the delay qlen.
3867 * 2. If delay_start is enabled it will stay enabled for
3868 * another 10 idle windows. This is to take into account
3869 * variable RTT and burst traffic.
3870 * 3. If the time elapsed since last enqueue is more
3871 * than 200ms we disable delaying start callback. This is
3872 * is to take idle time into account.
3873 */
3874 u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
3875 if (ifp->if_start_delay_swin > 0) {
3876 if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
3877 ifp->if_start_delay_cnt++;
3878 } else if ((now_nsec - ifp->if_start_delay_swin)
3879 >= (200 * 1000 * 1000)) {
3880 ifp->if_start_delay_swin = now_nsec;
3881 ifp->if_start_delay_cnt = 1;
3882 ifp->if_start_delay_idle = 0;
3883 if (ifp->if_eflags & IFEF_DELAY_START) {
3884 ifp->if_eflags &=
3885 ~(IFEF_DELAY_START);
3886 ifnet_delay_start_disabled++;
3887 }
3888 } else {
3889 if (ifp->if_start_delay_cnt >=
3890 ifp->if_start_delay_qlen) {
3891 ifp->if_eflags |= IFEF_DELAY_START;
3892 ifp->if_start_delay_idle = 0;
3893 } else {
3894 if (ifp->if_start_delay_idle >= 10) {
3895 ifp->if_eflags &=
3896 ~(IFEF_DELAY_START);
3897 ifnet_delay_start_disabled++;
3898 } else {
3899 ifp->if_start_delay_idle++;
3900 }
3901 }
3902 ifp->if_start_delay_swin = now_nsec;
3903 ifp->if_start_delay_cnt = 1;
3904 }
3905 } else {
3906 ifp->if_start_delay_swin = now_nsec;
3907 ifp->if_start_delay_cnt = 1;
3908 ifp->if_start_delay_idle = 0;
3909 ifp->if_eflags &= ~(IFEF_DELAY_START);
3910 }
3911 } else {
3912 ifp->if_eflags &= ~(IFEF_DELAY_START);
3913 }
3914
3915 /* enqueue the packet (caller consumes object) */
3916 error = ifclassq_enqueue(&ifp->if_snd, p, pdrop);
3917
3918 /*
3919 * Tell the driver to start dequeueing; do this even when the queue
3920 * for the packet is suspended (EQSUSPENDED), as the driver could still
3921 * be dequeueing from other unsuspended queues.
3922 */
3923 if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
3924 ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED)) {
3925 ifnet_start(ifp);
3926 }
3927
3928 return error;
3929 }
3930
3931 int
3932 ifnet_enqueue_netem(void *handle, pktsched_pkt_t *pkts, uint32_t n_pkts)
3933 {
3934 struct ifnet *ifp = handle;
3935 boolean_t pdrop; /* dummy */
3936 uint32_t i;
3937
3938 ASSERT(n_pkts >= 1);
3939 for (i = 0; i < n_pkts - 1; i++) {
3940 (void) ifnet_enqueue_ifclassq(ifp, &pkts[i].pktsched_pkt,
3941 FALSE, &pdrop);
3942 }
3943 /* flush with the last packet */
3944 (void) ifnet_enqueue_ifclassq(ifp, &pkts[i].pktsched_pkt, TRUE, &pdrop);
3945
3946 return 0;
3947 }
3948
3949 static inline errno_t
3950 ifnet_enqueue_common(struct ifnet *ifp, classq_pkt_t *pkt, boolean_t flush,
3951 boolean_t *pdrop)
3952 {
3953 if (ifp->if_output_netem != NULL) {
3954 return netem_enqueue(ifp->if_output_netem, pkt, pdrop);
3955 } else {
3956 return ifnet_enqueue_ifclassq(ifp, pkt, flush, pdrop);
3957 }
3958 }
3959
3960 errno_t
3961 ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
3962 {
3963 boolean_t pdrop;
3964 return ifnet_enqueue_mbuf(ifp, m, TRUE, &pdrop);
3965 }
3966
3967 errno_t
3968 ifnet_enqueue_mbuf(struct ifnet *ifp, struct mbuf *m, boolean_t flush,
3969 boolean_t *pdrop)
3970 {
3971 classq_pkt_t pkt;
3972
3973 if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
3974 m->m_nextpkt != NULL) {
3975 if (m != NULL) {
3976 m_freem_list(m);
3977 *pdrop = TRUE;
3978 }
3979 return EINVAL;
3980 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3981 !IF_FULLY_ATTACHED(ifp)) {
3982 /* flag tested without lock for performance */
3983 m_freem(m);
3984 *pdrop = TRUE;
3985 return ENXIO;
3986 } else if (!(ifp->if_flags & IFF_UP)) {
3987 m_freem(m);
3988 *pdrop = TRUE;
3989 return ENETDOWN;
3990 }
3991
3992 CLASSQ_PKT_INIT_MBUF(&pkt, m);
3993 return ifnet_enqueue_common(ifp, &pkt, flush, pdrop);
3994 }
3995
3996
3997 errno_t
3998 ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
3999 {
4000 errno_t rc;
4001 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
4002
4003 if (ifp == NULL || mp == NULL) {
4004 return EINVAL;
4005 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4006 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4007 return ENXIO;
4008 }
4009 if (!ifnet_is_attached(ifp, 1)) {
4010 return ENXIO;
4011 }
4012
4013 rc = ifclassq_dequeue(&ifp->if_snd, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
4014 &pkt, NULL, NULL, NULL);
4015 VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
4016 ifnet_decr_iorefcnt(ifp);
4017 *mp = pkt.cp_mbuf;
4018 return rc;
4019 }
4020
4021 errno_t
4022 ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
4023 struct mbuf **mp)
4024 {
4025 errno_t rc;
4026 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
4027
4028 if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc)) {
4029 return EINVAL;
4030 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4031 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4032 return ENXIO;
4033 }
4034 if (!ifnet_is_attached(ifp, 1)) {
4035 return ENXIO;
4036 }
4037
4038 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, 1,
4039 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt, NULL, NULL, NULL);
4040 VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
4041 ifnet_decr_iorefcnt(ifp);
4042 *mp = pkt.cp_mbuf;
4043 return rc;
4044 }
4045
4046 errno_t
4047 ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t pkt_limit,
4048 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
4049 {
4050 errno_t rc;
4051 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
4052 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
4053
4054 if (ifp == NULL || head == NULL || pkt_limit < 1) {
4055 return EINVAL;
4056 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4057 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4058 return ENXIO;
4059 }
4060 if (!ifnet_is_attached(ifp, 1)) {
4061 return ENXIO;
4062 }
4063
4064 rc = ifclassq_dequeue(&ifp->if_snd, pkt_limit,
4065 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail, cnt, len);
4066 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
4067 ifnet_decr_iorefcnt(ifp);
4068 *head = pkt_head.cp_mbuf;
4069 if (tail != NULL) {
4070 *tail = pkt_tail.cp_mbuf;
4071 }
4072 return rc;
4073 }
4074
4075 errno_t
4076 ifnet_dequeue_multi_bytes(struct ifnet *ifp, u_int32_t byte_limit,
4077 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
4078 {
4079 errno_t rc;
4080 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
4081 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
4082
4083 if (ifp == NULL || head == NULL || byte_limit < 1) {
4084 return EINVAL;
4085 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4086 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4087 return ENXIO;
4088 }
4089 if (!ifnet_is_attached(ifp, 1)) {
4090 return ENXIO;
4091 }
4092
4093 rc = ifclassq_dequeue(&ifp->if_snd, CLASSQ_DEQUEUE_MAX_PKT_LIMIT,
4094 byte_limit, &pkt_head, &pkt_tail, cnt, len);
4095 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
4096 ifnet_decr_iorefcnt(ifp);
4097 *head = pkt_head.cp_mbuf;
4098 if (tail != NULL) {
4099 *tail = pkt_tail.cp_mbuf;
4100 }
4101 return rc;
4102 }
4103
4104 errno_t
4105 ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
4106 u_int32_t pkt_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
4107 u_int32_t *len)
4108 {
4109 errno_t rc;
4110 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
4111 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
4112
4113 if (ifp == NULL || head == NULL || pkt_limit < 1 ||
4114 !MBUF_VALID_SC(sc)) {
4115 return EINVAL;
4116 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4117 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4118 return ENXIO;
4119 }
4120 if (!ifnet_is_attached(ifp, 1)) {
4121 return ENXIO;
4122 }
4123
4124 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, pkt_limit,
4125 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail,
4126 cnt, len);
4127 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
4128 ifnet_decr_iorefcnt(ifp);
4129 *head = pkt_head.cp_mbuf;
4130 if (tail != NULL) {
4131 *tail = pkt_tail.cp_mbuf;
4132 }
4133 return rc;
4134 }
4135
4136 #if !CONFIG_EMBEDDED
4137 errno_t
4138 ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
4139 const struct sockaddr *dest, const char *dest_linkaddr,
4140 const char *frame_type, u_int32_t *pre, u_int32_t *post)
4141 {
4142 if (pre != NULL) {
4143 *pre = 0;
4144 }
4145 if (post != NULL) {
4146 *post = 0;
4147 }
4148
4149 return ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type);
4150 }
4151 #endif /* !CONFIG_EMBEDDED */
4152
4153 static boolean_t
4154 packet_has_vlan_tag(struct mbuf * m)
4155 {
4156 u_int tag = 0;
4157
4158 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) != 0) {
4159 tag = EVL_VLANOFTAG(m->m_pkthdr.vlan_tag);
4160 if (tag == 0) {
4161 /* the packet is just priority-tagged, clear the bit */
4162 m->m_pkthdr.csum_flags &= ~CSUM_VLAN_TAG_VALID;
4163 }
4164 }
4165 return tag != 0;
4166 }
4167
4168 static int
4169 dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
4170 char **frame_header_p, protocol_family_t protocol_family)
4171 {
4172 boolean_t is_vlan_packet = FALSE;
4173 struct ifnet_filter *filter;
4174 struct mbuf *m = *m_p;
4175
4176 is_vlan_packet = packet_has_vlan_tag(m);
4177
4178 /*
4179 * Pass the inbound packet to the interface filters
4180 */
4181 lck_mtx_lock_spin(&ifp->if_flt_lock);
4182 /* prevent filter list from changing in case we drop the lock */
4183 if_flt_monitor_busy(ifp);
4184 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4185 int result;
4186
4187 /* exclude VLAN packets from external filters PR-3586856 */
4188 if (is_vlan_packet &&
4189 (filter->filt_flags & DLIL_IFF_INTERNAL) == 0) {
4190 continue;
4191 }
4192
4193 if (!filter->filt_skip && filter->filt_input != NULL &&
4194 (filter->filt_protocol == 0 ||
4195 filter->filt_protocol == protocol_family)) {
4196 lck_mtx_unlock(&ifp->if_flt_lock);
4197
4198 result = (*filter->filt_input)(filter->filt_cookie,
4199 ifp, protocol_family, m_p, frame_header_p);
4200
4201 lck_mtx_lock_spin(&ifp->if_flt_lock);
4202 if (result != 0) {
4203 /* we're done with the filter list */
4204 if_flt_monitor_unbusy(ifp);
4205 lck_mtx_unlock(&ifp->if_flt_lock);
4206 return result;
4207 }
4208 }
4209 }
4210 /* we're done with the filter list */
4211 if_flt_monitor_unbusy(ifp);
4212 lck_mtx_unlock(&ifp->if_flt_lock);
4213
4214 /*
4215 * Strip away M_PROTO1 bit prior to sending packet up the stack as
4216 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
4217 */
4218 if (*m_p != NULL) {
4219 (*m_p)->m_flags &= ~M_PROTO1;
4220 }
4221
4222 return 0;
4223 }
4224
4225 static int
4226 dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p,
4227 protocol_family_t protocol_family)
4228 {
4229 boolean_t is_vlan_packet;
4230 struct ifnet_filter *filter;
4231 struct mbuf *m = *m_p;
4232
4233 is_vlan_packet = packet_has_vlan_tag(m);
4234
4235 /*
4236 * Pass the outbound packet to the interface filters
4237 */
4238 lck_mtx_lock_spin(&ifp->if_flt_lock);
4239 /* prevent filter list from changing in case we drop the lock */
4240 if_flt_monitor_busy(ifp);
4241 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4242 int result;
4243
4244 /* exclude VLAN packets from external filters PR-3586856 */
4245 if (is_vlan_packet &&
4246 (filter->filt_flags & DLIL_IFF_INTERNAL) == 0) {
4247 continue;
4248 }
4249
4250 if (!filter->filt_skip && filter->filt_output != NULL &&
4251 (filter->filt_protocol == 0 ||
4252 filter->filt_protocol == protocol_family)) {
4253 lck_mtx_unlock(&ifp->if_flt_lock);
4254
4255 result = filter->filt_output(filter->filt_cookie, ifp,
4256 protocol_family, m_p);
4257
4258 lck_mtx_lock_spin(&ifp->if_flt_lock);
4259 if (result != 0) {
4260 /* we're done with the filter list */
4261 if_flt_monitor_unbusy(ifp);
4262 lck_mtx_unlock(&ifp->if_flt_lock);
4263 return result;
4264 }
4265 }
4266 }
4267 /* we're done with the filter list */
4268 if_flt_monitor_unbusy(ifp);
4269 lck_mtx_unlock(&ifp->if_flt_lock);
4270
4271 return 0;
4272 }
4273
4274 static void
4275 dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m)
4276 {
4277 int error;
4278
4279 if (ifproto->proto_kpi == kProtoKPI_v1) {
4280 /* Version 1 protocols get one packet at a time */
4281 while (m != NULL) {
4282 char * frame_header;
4283 mbuf_t next_packet;
4284
4285 next_packet = m->m_nextpkt;
4286 m->m_nextpkt = NULL;
4287 frame_header = m->m_pkthdr.pkt_hdr;
4288 m->m_pkthdr.pkt_hdr = NULL;
4289 error = (*ifproto->kpi.v1.input)(ifproto->ifp,
4290 ifproto->protocol_family, m, frame_header);
4291 if (error != 0 && error != EJUSTRETURN) {
4292 m_freem(m);
4293 }
4294 m = next_packet;
4295 }
4296 } else if (ifproto->proto_kpi == kProtoKPI_v2) {
4297 /* Version 2 protocols support packet lists */
4298 error = (*ifproto->kpi.v2.input)(ifproto->ifp,
4299 ifproto->protocol_family, m);
4300 if (error != 0 && error != EJUSTRETURN) {
4301 m_freem_list(m);
4302 }
4303 }
4304 }
4305
4306 static void
4307 dlil_input_stats_add(const struct ifnet_stat_increment_param *s,
4308 struct dlil_threading_info *inp, struct ifnet *ifp, boolean_t poll)
4309 {
4310 struct ifnet_stat_increment_param *d = &inp->stats;
4311
4312 if (s->packets_in != 0) {
4313 d->packets_in += s->packets_in;
4314 }
4315 if (s->bytes_in != 0) {
4316 d->bytes_in += s->bytes_in;
4317 }
4318 if (s->errors_in != 0) {
4319 d->errors_in += s->errors_in;
4320 }
4321
4322 if (s->packets_out != 0) {
4323 d->packets_out += s->packets_out;
4324 }
4325 if (s->bytes_out != 0) {
4326 d->bytes_out += s->bytes_out;
4327 }
4328 if (s->errors_out != 0) {
4329 d->errors_out += s->errors_out;
4330 }
4331
4332 if (s->collisions != 0) {
4333 d->collisions += s->collisions;
4334 }
4335 if (s->dropped != 0) {
4336 d->dropped += s->dropped;
4337 }
4338
4339 if (poll) {
4340 PKTCNTR_ADD(&ifp->if_poll_tstats, s->packets_in, s->bytes_in);
4341 }
4342 }
4343
4344 static boolean_t
4345 dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp)
4346 {
4347 struct ifnet_stat_increment_param *s = &inp->stats;
4348
4349 /*
4350 * Use of atomic operations is unavoidable here because
4351 * these stats may also be incremented elsewhere via KPIs.
4352 */
4353 if (s->packets_in != 0) {
4354 atomic_add_64(&ifp->if_data.ifi_ipackets, s->packets_in);
4355 s->packets_in = 0;
4356 }
4357 if (s->bytes_in != 0) {
4358 atomic_add_64(&ifp->if_data.ifi_ibytes, s->bytes_in);
4359 s->bytes_in = 0;
4360 }
4361 if (s->errors_in != 0) {
4362 atomic_add_64(&ifp->if_data.ifi_ierrors, s->errors_in);
4363 s->errors_in = 0;
4364 }
4365
4366 if (s->packets_out != 0) {
4367 atomic_add_64(&ifp->if_data.ifi_opackets, s->packets_out);
4368 s->packets_out = 0;
4369 }
4370 if (s->bytes_out != 0) {
4371 atomic_add_64(&ifp->if_data.ifi_obytes, s->bytes_out);
4372 s->bytes_out = 0;
4373 }
4374 if (s->errors_out != 0) {
4375 atomic_add_64(&ifp->if_data.ifi_oerrors, s->errors_out);
4376 s->errors_out = 0;
4377 }
4378
4379 if (s->collisions != 0) {
4380 atomic_add_64(&ifp->if_data.ifi_collisions, s->collisions);
4381 s->collisions = 0;
4382 }
4383 if (s->dropped != 0) {
4384 atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped);
4385 s->dropped = 0;
4386 }
4387
4388 /*
4389 * No need for atomic operations as they are modified here
4390 * only from within the DLIL input thread context.
4391 */
4392 if (ifp->if_poll_tstats.packets != 0) {
4393 ifp->if_poll_pstats.ifi_poll_packets += ifp->if_poll_tstats.packets;
4394 ifp->if_poll_tstats.packets = 0;
4395 }
4396 if (ifp->if_poll_tstats.bytes != 0) {
4397 ifp->if_poll_pstats.ifi_poll_bytes += ifp->if_poll_tstats.bytes;
4398 ifp->if_poll_tstats.bytes = 0;
4399 }
4400
4401 return ifp->if_data_threshold != 0;
4402 }
4403
4404 __private_extern__ void
4405 dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m)
4406 {
4407 return dlil_input_packet_list_common(ifp, m, 0,
4408 IFNET_MODEL_INPUT_POLL_OFF, FALSE);
4409 }
4410
4411 __private_extern__ void
4412 dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m,
4413 u_int32_t cnt, ifnet_model_t mode)
4414 {
4415 return dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE);
4416 }
4417
4418 static void
4419 dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m,
4420 u_int32_t cnt, ifnet_model_t mode, boolean_t ext)
4421 {
4422 int error = 0;
4423 protocol_family_t protocol_family;
4424 mbuf_t next_packet;
4425 ifnet_t ifp = ifp_param;
4426 char *frame_header = NULL;
4427 struct if_proto *last_ifproto = NULL;
4428 mbuf_t pkt_first = NULL;
4429 mbuf_t *pkt_next = NULL;
4430 u_int32_t poll_thresh = 0, poll_ival = 0;
4431
4432 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
4433
4434 if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 &&
4435 (poll_ival = if_rxpoll_interval_pkts) > 0) {
4436 poll_thresh = cnt;
4437 }
4438
4439 while (m != NULL) {
4440 struct if_proto *ifproto = NULL;
4441 int iorefcnt = 0;
4442 uint32_t pktf_mask; /* pkt flags to preserve */
4443
4444 if (ifp_param == NULL) {
4445 ifp = m->m_pkthdr.rcvif;
4446 }
4447
4448 if ((ifp->if_eflags & IFEF_RXPOLL) &&
4449 (ifp->if_xflags & IFXF_LEGACY) && poll_thresh != 0 &&
4450 poll_ival > 0 && (--poll_thresh % poll_ival) == 0) {
4451 ifnet_poll(ifp);
4452 }
4453
4454 /* Check if this mbuf looks valid */
4455 MBUF_INPUT_CHECK(m, ifp);
4456
4457 next_packet = m->m_nextpkt;
4458 m->m_nextpkt = NULL;
4459 frame_header = m->m_pkthdr.pkt_hdr;
4460 m->m_pkthdr.pkt_hdr = NULL;
4461
4462 /*
4463 * Get an IO reference count if the interface is not
4464 * loopback (lo0) and it is attached; lo0 never goes
4465 * away, so optimize for that.
4466 */
4467 if (ifp != lo_ifp) {
4468 if (!ifnet_datamov_begin(ifp)) {
4469 m_freem(m);
4470 goto next;
4471 }
4472 iorefcnt = 1;
4473 /*
4474 * Preserve the time stamp if it was set.
4475 */
4476 pktf_mask = PKTF_TS_VALID;
4477 } else {
4478 /*
4479 * If this arrived on lo0, preserve interface addr
4480 * info to allow for connectivity between loopback
4481 * and local interface addresses.
4482 */
4483 pktf_mask = (PKTF_LOOP | PKTF_IFAINFO);
4484 }
4485
4486 /* make sure packet comes in clean */
4487 m_classifier_init(m, pktf_mask);
4488
4489 ifp_inc_traffic_class_in(ifp, m);
4490
4491 /* find which protocol family this packet is for */
4492 ifnet_lock_shared(ifp);
4493 error = (*ifp->if_demux)(ifp, m, frame_header,
4494 &protocol_family);
4495 ifnet_lock_done(ifp);
4496 if (error != 0) {
4497 if (error == EJUSTRETURN) {
4498 goto next;
4499 }
4500 protocol_family = 0;
4501 }
4502
4503 pktap_input(ifp, protocol_family, m, frame_header);
4504
4505 /* Drop v4 packets received on CLAT46 enabled interface */
4506 if (protocol_family == PF_INET && IS_INTF_CLAT46(ifp)) {
4507 m_freem(m);
4508 ip6stat.ip6s_clat464_in_v4_drop++;
4509 goto next;
4510 }
4511
4512 /* Translate the packet if it is received on CLAT interface */
4513 if (protocol_family == PF_INET6 && IS_INTF_CLAT46(ifp)
4514 && dlil_is_clat_needed(protocol_family, m)) {
4515 char *data = NULL;
4516 struct ether_header eh;
4517 struct ether_header *ehp = NULL;
4518
4519 if (ifp->if_type == IFT_ETHER) {
4520 ehp = (struct ether_header *)(void *)frame_header;
4521 /* Skip RX Ethernet packets if they are not IPV6 */
4522 if (ntohs(ehp->ether_type) != ETHERTYPE_IPV6) {
4523 goto skip_clat;
4524 }
4525
4526 /* Keep a copy of frame_header for Ethernet packets */
4527 bcopy(frame_header, (caddr_t)&eh, ETHER_HDR_LEN);
4528 }
4529 error = dlil_clat64(ifp, &protocol_family, &m);
4530 data = (char *) mbuf_data(m);
4531 if (error != 0) {
4532 m_freem(m);
4533 ip6stat.ip6s_clat464_in_drop++;
4534 goto next;
4535 }
4536 /* Native v6 should be No-op */
4537 if (protocol_family != PF_INET) {
4538 goto skip_clat;
4539 }
4540
4541 /* Do this only for translated v4 packets. */
4542 switch (ifp->if_type) {
4543 case IFT_CELLULAR:
4544 frame_header = data;
4545 break;
4546 case IFT_ETHER:
4547 /*
4548 * Drop if the mbuf doesn't have enough
4549 * space for Ethernet header
4550 */
4551 if (M_LEADINGSPACE(m) < ETHER_HDR_LEN) {
4552 m_free(m);
4553 ip6stat.ip6s_clat464_in_drop++;
4554 goto next;
4555 }
4556 /*
4557 * Set the frame_header ETHER_HDR_LEN bytes
4558 * preceeding the data pointer. Change
4559 * the ether_type too.
4560 */
4561 frame_header = data - ETHER_HDR_LEN;
4562 eh.ether_type = htons(ETHERTYPE_IP);
4563 bcopy((caddr_t)&eh, frame_header, ETHER_HDR_LEN);
4564 break;
4565 }
4566 }
4567 skip_clat:
4568 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK) &&
4569 !(m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
4570 dlil_input_cksum_dbg(ifp, m, frame_header,
4571 protocol_family);
4572 }
4573 /*
4574 * For partial checksum offload, we expect the driver to
4575 * set the start offset indicating the start of the span
4576 * that is covered by the hardware-computed checksum;
4577 * adjust this start offset accordingly because the data
4578 * pointer has been advanced beyond the link-layer header.
4579 *
4580 * Virtual lan types (bridge, vlan, bond) can call
4581 * dlil_input_packet_list() with the same packet with the
4582 * checksum flags set. Set a flag indicating that the
4583 * adjustment has already been done.
4584 */
4585 if ((m->m_pkthdr.csum_flags & CSUM_ADJUST_DONE) != 0) {
4586 /* adjustment has already been done */
4587 } else if ((m->m_pkthdr.csum_flags &
4588 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
4589 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
4590 int adj;
4591 if (frame_header == NULL ||
4592 frame_header < (char *)mbuf_datastart(m) ||
4593 frame_header > (char *)m->m_data ||
4594 (adj = (m->m_data - frame_header)) >
4595 m->m_pkthdr.csum_rx_start) {
4596 m->m_pkthdr.csum_data = 0;
4597 m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
4598 hwcksum_in_invalidated++;
4599 } else {
4600 m->m_pkthdr.csum_rx_start -= adj;
4601 }
4602 /* make sure we don't adjust more than once */
4603 m->m_pkthdr.csum_flags |= CSUM_ADJUST_DONE;
4604 }
4605 if (clat_debug) {
4606 pktap_input(ifp, protocol_family, m, frame_header);
4607 }
4608
4609 if (m->m_flags & (M_BCAST | M_MCAST)) {
4610 atomic_add_64(&ifp->if_imcasts, 1);
4611 }
4612
4613 /* run interface filters */
4614 error = dlil_interface_filters_input(ifp, &m,
4615 &frame_header, protocol_family);
4616 if (error != 0) {
4617 if (error != EJUSTRETURN) {
4618 m_freem(m);
4619 }
4620 goto next;
4621 }
4622 if ((m->m_flags & M_PROMISC) != 0) {
4623 m_freem(m);
4624 goto next;
4625 }
4626
4627 /* Lookup the protocol attachment to this interface */
4628 if (protocol_family == 0) {
4629 ifproto = NULL;
4630 } else if (last_ifproto != NULL && last_ifproto->ifp == ifp &&
4631 (last_ifproto->protocol_family == protocol_family)) {
4632 VERIFY(ifproto == NULL);
4633 ifproto = last_ifproto;
4634 if_proto_ref(last_ifproto);
4635 } else {
4636 VERIFY(ifproto == NULL);
4637 ifnet_lock_shared(ifp);
4638 /* callee holds a proto refcnt upon success */
4639 ifproto = find_attached_proto(ifp, protocol_family);
4640 ifnet_lock_done(ifp);
4641 }
4642 if (ifproto == NULL) {
4643 /* no protocol for this packet, discard */
4644 m_freem(m);
4645 goto next;
4646 }
4647 if (ifproto != last_ifproto) {
4648 if (last_ifproto != NULL) {
4649 /* pass up the list for the previous protocol */
4650 dlil_ifproto_input(last_ifproto, pkt_first);
4651 pkt_first = NULL;
4652 if_proto_free(last_ifproto);
4653 }
4654 last_ifproto = ifproto;
4655 if_proto_ref(ifproto);
4656 }
4657 /* extend the list */
4658 m->m_pkthdr.pkt_hdr = frame_header;
4659 if (pkt_first == NULL) {
4660 pkt_first = m;
4661 } else {
4662 *pkt_next = m;
4663 }
4664 pkt_next = &m->m_nextpkt;
4665
4666 next:
4667 if (next_packet == NULL && last_ifproto != NULL) {
4668 /* pass up the last list of packets */
4669 dlil_ifproto_input(last_ifproto, pkt_first);
4670 if_proto_free(last_ifproto);
4671 last_ifproto = NULL;
4672 }
4673 if (ifproto != NULL) {
4674 if_proto_free(ifproto);
4675 ifproto = NULL;
4676 }
4677
4678 m = next_packet;
4679
4680 /* update the driver's multicast filter, if needed */
4681 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) {
4682 ifp->if_updatemcasts = 0;
4683 }
4684 if (iorefcnt == 1) {
4685 ifnet_datamov_end(ifp);
4686 }
4687 }
4688
4689 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4690 }
4691
4692 errno_t
4693 if_mcasts_update(struct ifnet *ifp)
4694 {
4695 errno_t err;
4696
4697 err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
4698 if (err == EAFNOSUPPORT) {
4699 err = 0;
4700 }
4701 DLIL_PRINTF("%s: %s %d suspended link-layer multicast membership(s) "
4702 "(err=%d)\n", if_name(ifp),
4703 (err == 0 ? "successfully restored" : "failed to restore"),
4704 ifp->if_updatemcasts, err);
4705
4706 /* just return success */
4707 return 0;
4708 }
4709
4710 /* If ifp is set, we will increment the generation for the interface */
4711 int
4712 dlil_post_complete_msg(struct ifnet *ifp, struct kev_msg *event)
4713 {
4714 if (ifp != NULL) {
4715 ifnet_increment_generation(ifp);
4716 }
4717
4718 #if NECP
4719 necp_update_all_clients();
4720 #endif /* NECP */
4721
4722 return kev_post_msg(event);
4723 }
4724
4725 __private_extern__ void
4726 dlil_post_sifflags_msg(struct ifnet * ifp)
4727 {
4728 struct kev_msg ev_msg;
4729 struct net_event_data ev_data;
4730
4731 bzero(&ev_data, sizeof(ev_data));
4732 bzero(&ev_msg, sizeof(ev_msg));
4733 ev_msg.vendor_code = KEV_VENDOR_APPLE;
4734 ev_msg.kev_class = KEV_NETWORK_CLASS;
4735 ev_msg.kev_subclass = KEV_DL_SUBCLASS;
4736 ev_msg.event_code = KEV_DL_SIFFLAGS;
4737 strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
4738 ev_data.if_family = ifp->if_family;
4739 ev_data.if_unit = (u_int32_t) ifp->if_unit;
4740 ev_msg.dv[0].data_length = sizeof(struct net_event_data);
4741 ev_msg.dv[0].data_ptr = &ev_data;
4742 ev_msg.dv[1].data_length = 0;
4743 dlil_post_complete_msg(ifp, &ev_msg);
4744 }
4745
4746 #define TMP_IF_PROTO_ARR_SIZE 10
4747 static int
4748 dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_generation)
4749 {
4750 struct ifnet_filter *filter = NULL;
4751 struct if_proto *proto = NULL;
4752 int if_proto_count = 0;
4753 struct if_proto **tmp_ifproto_arr = NULL;
4754 struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
4755 int tmp_ifproto_arr_idx = 0;
4756 bool tmp_malloc = false;
4757
4758 /*
4759 * Pass the event to the interface filters
4760 */
4761 lck_mtx_lock_spin(&ifp->if_flt_lock);
4762 /* prevent filter list from changing in case we drop the lock */
4763 if_flt_monitor_busy(ifp);
4764 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4765 if (filter->filt_event != NULL) {
4766 lck_mtx_unlock(&ifp->if_flt_lock);
4767
4768 filter->filt_event(filter->filt_cookie, ifp,
4769 filter->filt_protocol, event);
4770
4771 lck_mtx_lock_spin(&ifp->if_flt_lock);
4772 }
4773 }
4774 /* we're done with the filter list */
4775 if_flt_monitor_unbusy(ifp);
4776 lck_mtx_unlock(&ifp->if_flt_lock);
4777
4778 /* Get an io ref count if the interface is attached */
4779 if (!ifnet_is_attached(ifp, 1)) {
4780 goto done;
4781 }
4782
4783 /*
4784 * An embedded tmp_list_entry in if_proto may still get
4785 * over-written by another thread after giving up ifnet lock,
4786 * therefore we are avoiding embedded pointers here.
4787 */
4788 ifnet_lock_shared(ifp);
4789 if_proto_count = dlil_ifp_protolist(ifp, NULL, 0);
4790 if (if_proto_count) {
4791 int i;
4792 VERIFY(ifp->if_proto_hash != NULL);
4793 if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
4794 tmp_ifproto_arr = tmp_ifproto_stack_arr;
4795 } else {
4796 MALLOC(tmp_ifproto_arr, struct if_proto **,
4797 sizeof(*tmp_ifproto_arr) * if_proto_count,
4798 M_TEMP, M_ZERO);
4799 if (tmp_ifproto_arr == NULL) {
4800 ifnet_lock_done(ifp);
4801 goto cleanup;
4802 }
4803 tmp_malloc = true;
4804 }
4805
4806 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
4807 SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
4808 next_hash) {
4809 if_proto_ref(proto);
4810 tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
4811 tmp_ifproto_arr_idx++;
4812 }
4813 }
4814 VERIFY(if_proto_count == tmp_ifproto_arr_idx);
4815 }
4816 ifnet_lock_done(ifp);
4817
4818 for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
4819 tmp_ifproto_arr_idx++) {
4820 proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
4821 VERIFY(proto != NULL);
4822 proto_media_event eventp =
4823 (proto->proto_kpi == kProtoKPI_v1 ?
4824 proto->kpi.v1.event :
4825 proto->kpi.v2.event);
4826
4827 if (eventp != NULL) {
4828 eventp(ifp, proto->protocol_family,
4829 event);
4830 }
4831 if_proto_free(proto);
4832 }
4833
4834 cleanup:
4835 if (tmp_malloc) {
4836 FREE(tmp_ifproto_arr, M_TEMP);
4837 }
4838
4839 /* Pass the event to the interface */
4840 if (ifp->if_event != NULL) {
4841 ifp->if_event(ifp, event);
4842 }
4843
4844 /* Release the io ref count */
4845 ifnet_decr_iorefcnt(ifp);
4846 done:
4847 return dlil_post_complete_msg(update_generation ? ifp : NULL, event);
4848 }
4849
4850 errno_t
4851 ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
4852 {
4853 struct kev_msg kev_msg;
4854 int result = 0;
4855
4856 if (ifp == NULL || event == NULL) {
4857 return EINVAL;
4858 }
4859
4860 bzero(&kev_msg, sizeof(kev_msg));
4861 kev_msg.vendor_code = event->vendor_code;
4862 kev_msg.kev_class = event->kev_class;
4863 kev_msg.kev_subclass = event->kev_subclass;
4864 kev_msg.event_code = event->event_code;
4865 kev_msg.dv[0].data_ptr = &event->event_data[0];
4866 kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
4867 kev_msg.dv[1].data_length = 0;
4868
4869 result = dlil_event_internal(ifp, &kev_msg, TRUE);
4870
4871 return result;
4872 }
4873
4874 #if CONFIG_MACF_NET
4875 #include <netinet/ip6.h>
4876 #include <netinet/ip.h>
4877 static int
4878 dlil_get_socket_type(struct mbuf **mp, int family, int raw)
4879 {
4880 struct mbuf *m;
4881 struct ip *ip;
4882 struct ip6_hdr *ip6;
4883 int type = SOCK_RAW;
4884
4885 if (!raw) {
4886 switch (family) {
4887 case PF_INET:
4888 m = m_pullup(*mp, sizeof(struct ip));
4889 if (m == NULL) {
4890 break;
4891 }
4892 *mp = m;
4893 ip = mtod(m, struct ip *);
4894 if (ip->ip_p == IPPROTO_TCP) {
4895 type = SOCK_STREAM;
4896 } else if (ip->ip_p == IPPROTO_UDP) {
4897 type = SOCK_DGRAM;
4898 }
4899 break;
4900 case PF_INET6:
4901 m = m_pullup(*mp, sizeof(struct ip6_hdr));
4902 if (m == NULL) {
4903 break;
4904 }
4905 *mp = m;
4906 ip6 = mtod(m, struct ip6_hdr *);
4907 if (ip6->ip6_nxt == IPPROTO_TCP) {
4908 type = SOCK_STREAM;
4909 } else if (ip6->ip6_nxt == IPPROTO_UDP) {
4910 type = SOCK_DGRAM;
4911 }
4912 break;
4913 }
4914 }
4915
4916 return type;
4917 }
4918 #endif
4919
4920 static void
4921 dlil_count_chain_len(mbuf_t m, struct chain_len_stats *cls)
4922 {
4923 mbuf_t n = m;
4924 int chainlen = 0;
4925
4926 while (n != NULL) {
4927 chainlen++;
4928 n = n->m_next;
4929 }
4930 switch (chainlen) {
4931 case 0:
4932 break;
4933 case 1:
4934 atomic_add_64(&cls->cls_one, 1);
4935 break;
4936 case 2:
4937 atomic_add_64(&cls->cls_two, 1);
4938 break;
4939 case 3:
4940 atomic_add_64(&cls->cls_three, 1);
4941 break;
4942 case 4:
4943 atomic_add_64(&cls->cls_four, 1);
4944 break;
4945 case 5:
4946 default:
4947 atomic_add_64(&cls->cls_five_or_more, 1);
4948 break;
4949 }
4950 }
4951
4952 /*
4953 * dlil_output
4954 *
4955 * Caller should have a lock on the protocol domain if the protocol
4956 * doesn't support finer grained locking. In most cases, the lock
4957 * will be held from the socket layer and won't be released until
4958 * we return back to the socket layer.
4959 *
4960 * This does mean that we must take a protocol lock before we take
4961 * an interface lock if we're going to take both. This makes sense
4962 * because a protocol is likely to interact with an ifp while it
4963 * is under the protocol lock.
4964 *
4965 * An advisory code will be returned if adv is not null. This
4966 * can be used to provide feedback about interface queues to the
4967 * application.
4968 */
4969 errno_t
4970 dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
4971 void *route, const struct sockaddr *dest, int raw, struct flowadv *adv)
4972 {
4973 char *frame_type = NULL;
4974 char *dst_linkaddr = NULL;
4975 int retval = 0;
4976 char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
4977 char dst_linkaddr_buffer[MAX_LINKADDR * 4];
4978 struct if_proto *proto = NULL;
4979 mbuf_t m = NULL;
4980 mbuf_t send_head = NULL;
4981 mbuf_t *send_tail = &send_head;
4982 int iorefcnt = 0;
4983 u_int32_t pre = 0, post = 0;
4984 u_int32_t fpkts = 0, fbytes = 0;
4985 int32_t flen = 0;
4986 struct timespec now;
4987 u_int64_t now_nsec;
4988 boolean_t did_clat46 = FALSE;
4989 protocol_family_t old_proto_family = proto_family;
4990 struct sockaddr_in6 dest6;
4991 struct rtentry *rt = NULL;
4992 u_int32_t m_loop_set = 0;
4993
4994 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
4995
4996 /*
4997 * Get an io refcnt if the interface is attached to prevent ifnet_detach
4998 * from happening while this operation is in progress
4999 */
5000 if (!ifnet_datamov_begin(ifp)) {
5001 retval = ENXIO;
5002 goto cleanup;
5003 }
5004 iorefcnt = 1;
5005
5006 VERIFY(ifp->if_output_dlil != NULL);
5007
5008 /* update the driver's multicast filter, if needed */
5009 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) {
5010 ifp->if_updatemcasts = 0;
5011 }
5012
5013 frame_type = frame_type_buffer;
5014 dst_linkaddr = dst_linkaddr_buffer;
5015
5016 if (raw == 0) {
5017 ifnet_lock_shared(ifp);
5018 /* callee holds a proto refcnt upon success */
5019 proto = find_attached_proto(ifp, proto_family);
5020 if (proto == NULL) {
5021 ifnet_lock_done(ifp);
5022 retval = ENXIO;
5023 goto cleanup;
5024 }
5025 ifnet_lock_done(ifp);
5026 }
5027
5028 preout_again:
5029 if (packetlist == NULL) {
5030 goto cleanup;
5031 }
5032
5033 m = packetlist;
5034 packetlist = packetlist->m_nextpkt;
5035 m->m_nextpkt = NULL;
5036
5037 /*
5038 * Perform address family translation for the first
5039 * packet outside the loop in order to perform address
5040 * lookup for the translated proto family.
5041 */
5042 if (proto_family == PF_INET && IS_INTF_CLAT46(ifp) &&
5043 (ifp->if_type == IFT_CELLULAR ||
5044 dlil_is_clat_needed(proto_family, m))) {
5045 retval = dlil_clat46(ifp, &proto_family, &m);
5046 /*
5047 * Go to the next packet if translation fails
5048 */
5049 if (retval != 0) {
5050 m_freem(m);
5051 m = NULL;
5052 ip6stat.ip6s_clat464_out_drop++;
5053 /* Make sure that the proto family is PF_INET */
5054 ASSERT(proto_family == PF_INET);
5055 goto preout_again;
5056 }
5057 /*
5058 * Free the old one and make it point to the IPv6 proto structure.
5059 *
5060 * Change proto for the first time we have successfully
5061 * performed address family translation.
5062 */
5063 if (!did_clat46 && proto_family == PF_INET6) {
5064 did_clat46 = TRUE;
5065
5066 if (proto != NULL) {
5067 if_proto_free(proto);
5068 }
5069 ifnet_lock_shared(ifp);
5070 /* callee holds a proto refcnt upon success */
5071 proto = find_attached_proto(ifp, proto_family);
5072 if (proto == NULL) {
5073 ifnet_lock_done(ifp);
5074 retval = ENXIO;
5075 m_freem(m);
5076 m = NULL;
5077 goto cleanup;
5078 }
5079 ifnet_lock_done(ifp);
5080 if (ifp->if_type == IFT_ETHER) {
5081 /* Update the dest to translated v6 address */
5082 dest6.sin6_len = sizeof(struct sockaddr_in6);
5083 dest6.sin6_family = AF_INET6;
5084 dest6.sin6_addr = (mtod(m, struct ip6_hdr *))->ip6_dst;
5085 dest = (const struct sockaddr *)&dest6;
5086
5087 /*
5088 * Lookup route to the translated destination
5089 * Free this route ref during cleanup
5090 */
5091 rt = rtalloc1_scoped((struct sockaddr *)&dest6,
5092 0, 0, ifp->if_index);
5093
5094 route = rt;
5095 }
5096 }
5097 }
5098
5099 /*
5100 * This path gets packet chain going to the same destination.
5101 * The pre output routine is used to either trigger resolution of
5102 * the next hop or retreive the next hop's link layer addressing.
5103 * For ex: ether_inet(6)_pre_output routine.
5104 *
5105 * If the routine returns EJUSTRETURN, it implies that packet has
5106 * been queued, and therefore we have to call preout_again for the
5107 * following packet in the chain.
5108 *
5109 * For errors other than EJUSTRETURN, the current packet is freed
5110 * and the rest of the chain (pointed by packetlist is freed as
5111 * part of clean up.
5112 *
5113 * Else if there is no error the retrieved information is used for
5114 * all the packets in the chain.
5115 */
5116 if (raw == 0) {
5117 proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ?
5118 proto->kpi.v1.pre_output : proto->kpi.v2.pre_output);
5119 retval = 0;
5120 if (preoutp != NULL) {
5121 retval = preoutp(ifp, proto_family, &m, dest, route,
5122 frame_type, dst_linkaddr);
5123
5124 if (retval != 0) {
5125 if (retval == EJUSTRETURN) {
5126 goto preout_again;
5127 }
5128 m_freem(m);
5129 m = NULL;
5130 goto cleanup;
5131 }
5132 }
5133 }
5134
5135 #if CONFIG_MACF_NET
5136 retval = mac_ifnet_check_transmit(ifp, m, proto_family,
5137 dlil_get_socket_type(&m, proto_family, raw));
5138 if (retval != 0) {
5139 m_freem(m);
5140 goto cleanup;
5141 }
5142 #endif
5143
5144 do {
5145 /*
5146 * Perform address family translation if needed.
5147 * For now we only support stateless 4 to 6 translation
5148 * on the out path.
5149 *
5150 * The routine below translates IP header, updates protocol
5151 * checksum and also translates ICMP.
5152 *
5153 * We skip the first packet as it is already translated and
5154 * the proto family is set to PF_INET6.
5155 */
5156 if (proto_family == PF_INET && IS_INTF_CLAT46(ifp) &&
5157 (ifp->if_type == IFT_CELLULAR ||
5158 dlil_is_clat_needed(proto_family, m))) {
5159 retval = dlil_clat46(ifp, &proto_family, &m);
5160 /* Goto the next packet if the translation fails */
5161 if (retval != 0) {
5162 m_freem(m);
5163 m = NULL;
5164 ip6stat.ip6s_clat464_out_drop++;
5165 goto next;
5166 }
5167 }
5168
5169 #if CONFIG_DTRACE
5170 if (!raw && proto_family == PF_INET) {
5171 struct ip *ip = mtod(m, struct ip *);
5172 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
5173 struct ip *, ip, struct ifnet *, ifp,
5174 struct ip *, ip, struct ip6_hdr *, NULL);
5175 } else if (!raw && proto_family == PF_INET6) {
5176 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
5177 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
5178 struct ip6_hdr *, ip6, struct ifnet *, ifp,
5179 struct ip *, NULL, struct ip6_hdr *, ip6);
5180 }
5181 #endif /* CONFIG_DTRACE */
5182
5183 if (raw == 0 && ifp->if_framer != NULL) {
5184 int rcvif_set = 0;
5185
5186 /*
5187 * If this is a broadcast packet that needs to be
5188 * looped back into the system, set the inbound ifp
5189 * to that of the outbound ifp. This will allow
5190 * us to determine that it is a legitimate packet
5191 * for the system. Only set the ifp if it's not
5192 * already set, just to be safe.
5193 */
5194 if ((m->m_flags & (M_BCAST | M_LOOP)) &&
5195 m->m_pkthdr.rcvif == NULL) {
5196 m->m_pkthdr.rcvif = ifp;
5197 rcvif_set = 1;
5198 }
5199 m_loop_set = m->m_flags & M_LOOP;
5200 retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
5201 frame_type, &pre, &post);
5202 if (retval != 0) {
5203 if (retval != EJUSTRETURN) {
5204 m_freem(m);
5205 }
5206 goto next;
5207 }
5208
5209 /*
5210 * For partial checksum offload, adjust the start
5211 * and stuff offsets based on the prepended header.
5212 */
5213 if ((m->m_pkthdr.csum_flags &
5214 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
5215 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
5216 m->m_pkthdr.csum_tx_stuff += pre;
5217 m->m_pkthdr.csum_tx_start += pre;
5218 }
5219
5220 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK)) {
5221 dlil_output_cksum_dbg(ifp, m, pre,
5222 proto_family);
5223 }
5224
5225 /*
5226 * Clear the ifp if it was set above, and to be
5227 * safe, only if it is still the same as the
5228 * outbound ifp we have in context. If it was
5229 * looped back, then a copy of it was sent to the
5230 * loopback interface with the rcvif set, and we
5231 * are clearing the one that will go down to the
5232 * layer below.
5233 */
5234 if (rcvif_set && m->m_pkthdr.rcvif == ifp) {
5235 m->m_pkthdr.rcvif = NULL;
5236 }
5237 }
5238
5239 /*
5240 * Let interface filters (if any) do their thing ...
5241 */
5242 retval = dlil_interface_filters_output(ifp, &m, proto_family);
5243 if (retval != 0) {
5244 if (retval != EJUSTRETURN) {
5245 m_freem(m);
5246 }
5247 goto next;
5248 }
5249 /*
5250 * Strip away M_PROTO1 bit prior to sending packet
5251 * to the driver as this field may be used by the driver
5252 */
5253 m->m_flags &= ~M_PROTO1;
5254
5255 /*
5256 * If the underlying interface is not capable of handling a
5257 * packet whose data portion spans across physically disjoint
5258 * pages, we need to "normalize" the packet so that we pass
5259 * down a chain of mbufs where each mbuf points to a span that
5260 * resides in the system page boundary. If the packet does
5261 * not cross page(s), the following is a no-op.
5262 */
5263 if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) {
5264 if ((m = m_normalize(m)) == NULL) {
5265 goto next;
5266 }
5267 }
5268
5269 /*
5270 * If this is a TSO packet, make sure the interface still
5271 * advertise TSO capability.
5272 */
5273 if (TSO_IPV4_NOTOK(ifp, m) || TSO_IPV6_NOTOK(ifp, m)) {
5274 retval = EMSGSIZE;
5275 m_freem(m);
5276 goto cleanup;
5277 }
5278
5279 ifp_inc_traffic_class_out(ifp, m);
5280 pktap_output(ifp, proto_family, m, pre, post);
5281
5282 /*
5283 * Count the number of elements in the mbuf chain
5284 */
5285 if (tx_chain_len_count) {
5286 dlil_count_chain_len(m, &tx_chain_len_stats);
5287 }
5288
5289 /*
5290 * Record timestamp; ifnet_enqueue() will use this info
5291 * rather than redoing the work. An optimization could
5292 * involve doing this just once at the top, if there are
5293 * no interface filters attached, but that's probably
5294 * not a big deal.
5295 */
5296 nanouptime(&now);
5297 net_timernsec(&now, &now_nsec);
5298 (void) mbuf_set_timestamp(m, now_nsec, TRUE);
5299
5300 /*
5301 * Discard partial sum information if this packet originated
5302 * from another interface; the packet would already have the
5303 * final checksum and we shouldn't recompute it.
5304 */
5305 if ((m->m_pkthdr.pkt_flags & PKTF_FORWARDED) &&
5306 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
5307 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
5308 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
5309 m->m_pkthdr.csum_data = 0;
5310 }
5311
5312 /*
5313 * Finally, call the driver.
5314 */
5315 if (ifp->if_eflags & (IFEF_SENDLIST | IFEF_ENQUEUE_MULTI)) {
5316 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
5317 flen += (m_pktlen(m) - (pre + post));
5318 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
5319 }
5320 *send_tail = m;
5321 send_tail = &m->m_nextpkt;
5322 } else {
5323 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
5324 flen = (m_pktlen(m) - (pre + post));
5325 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
5326 } else {
5327 flen = 0;
5328 }
5329 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
5330 0, 0, 0, 0, 0);
5331 retval = (*ifp->if_output_dlil)(ifp, m);
5332 if (retval == EQFULL || retval == EQSUSPENDED) {
5333 if (adv != NULL && adv->code == FADV_SUCCESS) {
5334 adv->code = (retval == EQFULL ?
5335 FADV_FLOW_CONTROLLED :
5336 FADV_SUSPENDED);
5337 }
5338 retval = 0;
5339 }
5340 if (retval == 0 && flen > 0) {
5341 fbytes += flen;
5342 fpkts++;
5343 }
5344 if (retval != 0 && dlil_verbose) {
5345 DLIL_PRINTF("%s: output error on %s retval = %d\n",
5346 __func__, if_name(ifp),
5347 retval);
5348 }
5349 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END,
5350 0, 0, 0, 0, 0);
5351 }
5352 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
5353
5354 next:
5355 m = packetlist;
5356 if (m != NULL) {
5357 m->m_flags |= m_loop_set;
5358 packetlist = packetlist->m_nextpkt;
5359 m->m_nextpkt = NULL;
5360 }
5361 /* Reset the proto family to old proto family for CLAT */
5362 if (did_clat46) {
5363 proto_family = old_proto_family;
5364 }
5365 } while (m != NULL);
5366
5367 if (send_head != NULL) {
5368 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
5369 0, 0, 0, 0, 0);
5370 if (ifp->if_eflags & IFEF_SENDLIST) {
5371 retval = (*ifp->if_output_dlil)(ifp, send_head);
5372 if (retval == EQFULL || retval == EQSUSPENDED) {
5373 if (adv != NULL) {
5374 adv->code = (retval == EQFULL ?
5375 FADV_FLOW_CONTROLLED :
5376 FADV_SUSPENDED);
5377 }
5378 retval = 0;
5379 }
5380 if (retval == 0 && flen > 0) {
5381 fbytes += flen;
5382 fpkts++;
5383 }
5384 if (retval != 0 && dlil_verbose) {
5385 DLIL_PRINTF("%s: output error on %s retval = %d\n",
5386 __func__, if_name(ifp), retval);
5387 }
5388 } else {
5389 struct mbuf *send_m;
5390 int enq_cnt = 0;
5391 VERIFY(ifp->if_eflags & IFEF_ENQUEUE_MULTI);
5392 while (send_head != NULL) {
5393 send_m = send_head;
5394 send_head = send_m->m_nextpkt;
5395 send_m->m_nextpkt = NULL;
5396 retval = (*ifp->if_output_dlil)(ifp, send_m);
5397 if (retval == EQFULL || retval == EQSUSPENDED) {
5398 if (adv != NULL) {
5399 adv->code = (retval == EQFULL ?
5400 FADV_FLOW_CONTROLLED :
5401 FADV_SUSPENDED);
5402 }
5403 retval = 0;
5404 }
5405 if (retval == 0) {
5406 enq_cnt++;
5407 if (flen > 0) {
5408 fpkts++;
5409 }
5410 }
5411 if (retval != 0 && dlil_verbose) {
5412 DLIL_PRINTF("%s: output error on %s "
5413 "retval = %d\n",
5414 __func__, if_name(ifp), retval);
5415 }
5416 }
5417 if (enq_cnt > 0) {
5418 fbytes += flen;
5419 ifnet_start(ifp);
5420 }
5421 }
5422 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
5423 }
5424
5425 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
5426
5427 cleanup:
5428 if (fbytes > 0) {
5429 ifp->if_fbytes += fbytes;
5430 }
5431 if (fpkts > 0) {
5432 ifp->if_fpackets += fpkts;
5433 }
5434 if (proto != NULL) {
5435 if_proto_free(proto);
5436 }
5437 if (packetlist) { /* if any packets are left, clean up */
5438 mbuf_freem_list(packetlist);
5439 }
5440 if (retval == EJUSTRETURN) {
5441 retval = 0;
5442 }
5443 if (iorefcnt == 1) {
5444 ifnet_datamov_end(ifp);
5445 }
5446 if (rt != NULL) {
5447 rtfree(rt);
5448 rt = NULL;
5449 }
5450
5451 return retval;
5452 }
5453
5454 /*
5455 * This routine checks if the destination address is not a loopback, link-local,
5456 * multicast or broadcast address.
5457 */
5458 static int
5459 dlil_is_clat_needed(protocol_family_t proto_family, mbuf_t m)
5460 {
5461 int ret = 0;
5462 switch (proto_family) {
5463 case PF_INET: {
5464 struct ip *iph = mtod(m, struct ip *);
5465 if (CLAT46_NEEDED(ntohl(iph->ip_dst.s_addr))) {
5466 ret = 1;
5467 }
5468 break;
5469 }
5470 case PF_INET6: {
5471 struct ip6_hdr *ip6h = mtod(m, struct ip6_hdr *);
5472 if ((size_t)m_pktlen(m) >= sizeof(struct ip6_hdr) &&
5473 CLAT64_NEEDED(&ip6h->ip6_dst)) {
5474 ret = 1;
5475 }
5476 break;
5477 }
5478 }
5479
5480 return ret;
5481 }
5482 /*
5483 * @brief This routine translates IPv4 packet to IPv6 packet,
5484 * updates protocol checksum and also translates ICMP for code
5485 * along with inner header translation.
5486 *
5487 * @param ifp Pointer to the interface
5488 * @param proto_family pointer to protocol family. It is updated if function
5489 * performs the translation successfully.
5490 * @param m Pointer to the pointer pointing to the packet. Needed because this
5491 * routine can end up changing the mbuf to a different one.
5492 *
5493 * @return 0 on success or else a negative value.
5494 */
5495 static errno_t
5496 dlil_clat46(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
5497 {
5498 VERIFY(*proto_family == PF_INET);
5499 VERIFY(IS_INTF_CLAT46(ifp));
5500
5501 pbuf_t pbuf_store, *pbuf = NULL;
5502 struct ip *iph = NULL;
5503 struct in_addr osrc, odst;
5504 uint8_t proto = 0;
5505 struct in6_ifaddr *ia6_clat_src = NULL;
5506 struct in6_addr *src = NULL;
5507 struct in6_addr dst;
5508 int error = 0;
5509 uint32_t off = 0;
5510 uint64_t tot_len = 0;
5511 uint16_t ip_id_val = 0;
5512 uint16_t ip_frag_off = 0;
5513
5514 boolean_t is_frag = FALSE;
5515 boolean_t is_first_frag = TRUE;
5516 boolean_t is_last_frag = TRUE;
5517
5518 pbuf_init_mbuf(&pbuf_store, *m, ifp);
5519 pbuf = &pbuf_store;
5520 iph = pbuf->pb_data;
5521
5522 osrc = iph->ip_src;
5523 odst = iph->ip_dst;
5524 proto = iph->ip_p;
5525 off = iph->ip_hl << 2;
5526 ip_id_val = iph->ip_id;
5527 ip_frag_off = ntohs(iph->ip_off) & IP_OFFMASK;
5528
5529 tot_len = ntohs(iph->ip_len);
5530
5531 /*
5532 * For packets that are not first frags
5533 * we only need to adjust CSUM.
5534 * For 4 to 6, Fragmentation header gets appended
5535 * after proto translation.
5536 */
5537 if (ntohs(iph->ip_off) & ~(IP_DF | IP_RF)) {
5538 is_frag = TRUE;
5539
5540 /* If the offset is not zero, it is not first frag */
5541 if (ip_frag_off != 0) {
5542 is_first_frag = FALSE;
5543 }
5544
5545 /* If IP_MF is set, then it is not last frag */
5546 if (ntohs(iph->ip_off) & IP_MF) {
5547 is_last_frag = FALSE;
5548 }
5549 }
5550
5551 /*
5552 * Retrive the local IPv6 CLAT46 address reserved for stateless
5553 * translation.
5554 */
5555 ia6_clat_src = in6ifa_ifpwithflag(ifp, IN6_IFF_CLAT46);
5556 if (ia6_clat_src == NULL) {
5557 ip6stat.ip6s_clat464_out_nov6addr_drop++;
5558 error = -1;
5559 goto cleanup;
5560 }
5561
5562 src = &ia6_clat_src->ia_addr.sin6_addr;
5563
5564 /*
5565 * Translate IPv4 destination to IPv6 destination by using the
5566 * prefixes learned through prior PLAT discovery.
5567 */
5568 if ((error = nat464_synthesize_ipv6(ifp, &odst, &dst)) != 0) {
5569 ip6stat.ip6s_clat464_out_v6synthfail_drop++;
5570 goto cleanup;
5571 }
5572
5573 /* Translate the IP header part first */
5574 error = (nat464_translate_46(pbuf, off, iph->ip_tos, iph->ip_p,
5575 iph->ip_ttl, *src, dst, tot_len) == NT_NAT64) ? 0 : -1;
5576
5577 iph = NULL; /* Invalidate iph as pbuf has been modified */
5578
5579 if (error != 0) {
5580 ip6stat.ip6s_clat464_out_46transfail_drop++;
5581 goto cleanup;
5582 }
5583
5584 /*
5585 * Translate protocol header, update checksum, checksum flags
5586 * and related fields.
5587 */
5588 error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc, (struct nat464_addr *)&odst,
5589 proto, PF_INET, PF_INET6, NT_OUT, !is_first_frag) == NT_NAT64) ? 0 : -1;
5590
5591 if (error != 0) {
5592 ip6stat.ip6s_clat464_out_46proto_transfail_drop++;
5593 goto cleanup;
5594 }
5595
5596 /* Now insert the IPv6 fragment header */
5597 if (is_frag) {
5598 error = nat464_insert_frag46(pbuf, ip_id_val, ip_frag_off, is_last_frag);
5599
5600 if (error != 0) {
5601 ip6stat.ip6s_clat464_out_46frag_transfail_drop++;
5602 goto cleanup;
5603 }
5604 }
5605
5606 cleanup:
5607 if (ia6_clat_src != NULL) {
5608 IFA_REMREF(&ia6_clat_src->ia_ifa);
5609 }
5610
5611 if (pbuf_is_valid(pbuf)) {
5612 *m = pbuf->pb_mbuf;
5613 pbuf->pb_mbuf = NULL;
5614 pbuf_destroy(pbuf);
5615 } else {
5616 error = -1;
5617 ip6stat.ip6s_clat464_out_invalpbuf_drop++;
5618 }
5619
5620 if (error == 0) {
5621 *proto_family = PF_INET6;
5622 ip6stat.ip6s_clat464_out_success++;
5623 }
5624
5625 return error;
5626 }
5627
5628 /*
5629 * @brief This routine translates incoming IPv6 to IPv4 packet,
5630 * updates protocol checksum and also translates ICMPv6 outer
5631 * and inner headers
5632 *
5633 * @return 0 on success or else a negative value.
5634 */
5635 static errno_t
5636 dlil_clat64(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
5637 {
5638 VERIFY(*proto_family == PF_INET6);
5639 VERIFY(IS_INTF_CLAT46(ifp));
5640
5641 struct ip6_hdr *ip6h = NULL;
5642 struct in6_addr osrc, odst;
5643 uint8_t proto = 0;
5644 struct in6_ifaddr *ia6_clat_dst = NULL;
5645 struct in_ifaddr *ia4_clat_dst = NULL;
5646 struct in_addr *dst = NULL;
5647 struct in_addr src;
5648 int error = 0;
5649 uint32_t off = 0;
5650 u_int64_t tot_len = 0;
5651 uint8_t tos = 0;
5652 boolean_t is_first_frag = TRUE;
5653
5654 /* Incoming mbuf does not contain valid IP6 header */
5655 if ((size_t)(*m)->m_pkthdr.len < sizeof(struct ip6_hdr) ||
5656 ((size_t)(*m)->m_len < sizeof(struct ip6_hdr) &&
5657 (*m = m_pullup(*m, sizeof(struct ip6_hdr))) == NULL)) {
5658 ip6stat.ip6s_clat464_in_tooshort_drop++;
5659 return -1;
5660 }
5661
5662 ip6h = mtod(*m, struct ip6_hdr *);
5663 /* Validate that mbuf contains IP payload equal to ip6_plen */
5664 if ((size_t)(*m)->m_pkthdr.len < ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr)) {
5665 ip6stat.ip6s_clat464_in_tooshort_drop++;
5666 return -1;
5667 }
5668
5669 osrc = ip6h->ip6_src;
5670 odst = ip6h->ip6_dst;
5671
5672 /*
5673 * Retrieve the local CLAT46 reserved IPv6 address.
5674 * Let the packet pass if we don't find one, as the flag
5675 * may get set before IPv6 configuration has taken place.
5676 */
5677 ia6_clat_dst = in6ifa_ifpwithflag(ifp, IN6_IFF_CLAT46);
5678 if (ia6_clat_dst == NULL) {
5679 goto done;
5680 }
5681
5682 /*
5683 * Check if the original dest in the packet is same as the reserved
5684 * CLAT46 IPv6 address
5685 */
5686 if (IN6_ARE_ADDR_EQUAL(&odst, &ia6_clat_dst->ia_addr.sin6_addr)) {
5687 pbuf_t pbuf_store, *pbuf = NULL;
5688 pbuf_init_mbuf(&pbuf_store, *m, ifp);
5689 pbuf = &pbuf_store;
5690
5691 /*
5692 * Retrive the local CLAT46 IPv4 address reserved for stateless
5693 * translation.
5694 */
5695 ia4_clat_dst = inifa_ifpclatv4(ifp);
5696 if (ia4_clat_dst == NULL) {
5697 IFA_REMREF(&ia6_clat_dst->ia_ifa);
5698 ip6stat.ip6s_clat464_in_nov4addr_drop++;
5699 error = -1;
5700 goto cleanup;
5701 }
5702 IFA_REMREF(&ia6_clat_dst->ia_ifa);
5703
5704 /* Translate IPv6 src to IPv4 src by removing the NAT64 prefix */
5705 dst = &ia4_clat_dst->ia_addr.sin_addr;
5706 if ((error = nat464_synthesize_ipv4(ifp, &osrc, &src)) != 0) {
5707 ip6stat.ip6s_clat464_in_v4synthfail_drop++;
5708 error = -1;
5709 goto cleanup;
5710 }
5711
5712 ip6h = pbuf->pb_data;
5713 off = sizeof(struct ip6_hdr);
5714 proto = ip6h->ip6_nxt;
5715 tos = (ntohl(ip6h->ip6_flow) >> 20) & 0xff;
5716 tot_len = ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr);
5717
5718 /*
5719 * Translate the IP header and update the fragmentation
5720 * header if needed
5721 */
5722 error = (nat464_translate_64(pbuf, off, tos, &proto,
5723 ip6h->ip6_hlim, src, *dst, tot_len, &is_first_frag) == NT_NAT64) ?
5724 0 : -1;
5725
5726 ip6h = NULL; /* Invalidate ip6h as pbuf has been changed */
5727
5728 if (error != 0) {
5729 ip6stat.ip6s_clat464_in_64transfail_drop++;
5730 goto cleanup;
5731 }
5732
5733 /*
5734 * Translate protocol header, update checksum, checksum flags
5735 * and related fields.
5736 */
5737 error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc,
5738 (struct nat464_addr *)&odst, proto, PF_INET6, PF_INET,
5739 NT_IN, !is_first_frag) == NT_NAT64) ? 0 : -1;
5740
5741 if (error != 0) {
5742 ip6stat.ip6s_clat464_in_64proto_transfail_drop++;
5743 goto cleanup;
5744 }
5745
5746 cleanup:
5747 if (ia4_clat_dst != NULL) {
5748 IFA_REMREF(&ia4_clat_dst->ia_ifa);
5749 }
5750
5751 if (pbuf_is_valid(pbuf)) {
5752 *m = pbuf->pb_mbuf;
5753 pbuf->pb_mbuf = NULL;
5754 pbuf_destroy(pbuf);
5755 } else {
5756 error = -1;
5757 ip6stat.ip6s_clat464_in_invalpbuf_drop++;
5758 }
5759
5760 if (error == 0) {
5761 *proto_family = PF_INET;
5762 ip6stat.ip6s_clat464_in_success++;
5763 }
5764 } /* CLAT traffic */
5765
5766 done:
5767 return error;
5768 }
5769
5770 errno_t
5771 ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
5772 void *ioctl_arg)
5773 {
5774 struct ifnet_filter *filter;
5775 int retval = EOPNOTSUPP;
5776 int result = 0;
5777
5778 if (ifp == NULL || ioctl_code == 0) {
5779 return EINVAL;
5780 }
5781
5782 /* Get an io ref count if the interface is attached */
5783 if (!ifnet_is_attached(ifp, 1)) {
5784 return EOPNOTSUPP;
5785 }
5786
5787 /*
5788 * Run the interface filters first.
5789 * We want to run all filters before calling the protocol,
5790 * interface family, or interface.
5791 */
5792 lck_mtx_lock_spin(&ifp->if_flt_lock);
5793 /* prevent filter list from changing in case we drop the lock */
5794 if_flt_monitor_busy(ifp);
5795 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
5796 if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
5797 filter->filt_protocol == proto_fam)) {
5798 lck_mtx_unlock(&ifp->if_flt_lock);
5799
5800 result = filter->filt_ioctl(filter->filt_cookie, ifp,
5801 proto_fam, ioctl_code, ioctl_arg);
5802
5803 lck_mtx_lock_spin(&ifp->if_flt_lock);
5804
5805 /* Only update retval if no one has handled the ioctl */
5806 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
5807 if (result == ENOTSUP) {
5808 result = EOPNOTSUPP;
5809 }
5810 retval = result;
5811 if (retval != 0 && retval != EOPNOTSUPP) {
5812 /* we're done with the filter list */
5813 if_flt_monitor_unbusy(ifp);
5814 lck_mtx_unlock(&ifp->if_flt_lock);
5815 goto cleanup;
5816 }
5817 }
5818 }
5819 }
5820 /* we're done with the filter list */
5821 if_flt_monitor_unbusy(ifp);
5822 lck_mtx_unlock(&ifp->if_flt_lock);
5823
5824 /* Allow the protocol to handle the ioctl */
5825 if (proto_fam != 0) {
5826 struct if_proto *proto;
5827
5828 /* callee holds a proto refcnt upon success */
5829 ifnet_lock_shared(ifp);
5830 proto = find_attached_proto(ifp, proto_fam);
5831 ifnet_lock_done(ifp);
5832 if (proto != NULL) {
5833 proto_media_ioctl ioctlp =
5834 (proto->proto_kpi == kProtoKPI_v1 ?
5835 proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
5836 result = EOPNOTSUPP;
5837 if (ioctlp != NULL) {
5838 result = ioctlp(ifp, proto_fam, ioctl_code,
5839 ioctl_arg);
5840 }
5841 if_proto_free(proto);
5842
5843 /* Only update retval if no one has handled the ioctl */
5844 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
5845 if (result == ENOTSUP) {
5846 result = EOPNOTSUPP;
5847 }
5848 retval = result;
5849 if (retval && retval != EOPNOTSUPP) {
5850 goto cleanup;
5851 }
5852 }
5853 }
5854 }
5855
5856 /* retval is either 0 or EOPNOTSUPP */
5857
5858 /*
5859 * Let the interface handle this ioctl.
5860 * If it returns EOPNOTSUPP, ignore that, we may have
5861 * already handled this in the protocol or family.
5862 */
5863 if (ifp->if_ioctl) {
5864 result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
5865 }
5866
5867 /* Only update retval if no one has handled the ioctl */
5868 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
5869 if (result == ENOTSUP) {
5870 result = EOPNOTSUPP;
5871 }
5872 retval = result;
5873 if (retval && retval != EOPNOTSUPP) {
5874 goto cleanup;
5875 }
5876 }
5877
5878 cleanup:
5879 if (retval == EJUSTRETURN) {
5880 retval = 0;
5881 }
5882
5883 ifnet_decr_iorefcnt(ifp);
5884
5885 return retval;
5886 }
5887
5888 __private_extern__ errno_t
5889 dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
5890 {
5891 errno_t error = 0;
5892
5893
5894 if (ifp->if_set_bpf_tap) {
5895 /* Get an io reference on the interface if it is attached */
5896 if (!ifnet_is_attached(ifp, 1)) {
5897 return ENXIO;
5898 }
5899 error = ifp->if_set_bpf_tap(ifp, mode, callback);
5900 ifnet_decr_iorefcnt(ifp);
5901 }
5902 return error;
5903 }
5904
5905 errno_t
5906 dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
5907 struct sockaddr *ll_addr, size_t ll_len)
5908 {
5909 errno_t result = EOPNOTSUPP;
5910 struct if_proto *proto;
5911 const struct sockaddr *verify;
5912 proto_media_resolve_multi resolvep;
5913
5914 if (!ifnet_is_attached(ifp, 1)) {
5915 return result;
5916 }
5917
5918 bzero(ll_addr, ll_len);
5919
5920 /* Call the protocol first; callee holds a proto refcnt upon success */
5921 ifnet_lock_shared(ifp);
5922 proto = find_attached_proto(ifp, proto_addr->sa_family);
5923 ifnet_lock_done(ifp);
5924 if (proto != NULL) {
5925 resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
5926 proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
5927 if (resolvep != NULL) {
5928 result = resolvep(ifp, proto_addr,
5929 (struct sockaddr_dl *)(void *)ll_addr, ll_len);
5930 }
5931 if_proto_free(proto);
5932 }
5933
5934 /* Let the interface verify the multicast address */
5935 if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
5936 if (result == 0) {
5937 verify = ll_addr;
5938 } else {
5939 verify = proto_addr;
5940 }
5941 result = ifp->if_check_multi(ifp, verify);
5942 }
5943
5944 ifnet_decr_iorefcnt(ifp);
5945 return result;
5946 }
5947
5948 __private_extern__ errno_t
5949 dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
5950 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
5951 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
5952 {
5953 struct if_proto *proto;
5954 errno_t result = 0;
5955
5956 /* callee holds a proto refcnt upon success */
5957 ifnet_lock_shared(ifp);
5958 proto = find_attached_proto(ifp, target_proto->sa_family);
5959 ifnet_lock_done(ifp);
5960 if (proto == NULL) {
5961 result = ENOTSUP;
5962 } else {
5963 proto_media_send_arp arpp;
5964 arpp = (proto->proto_kpi == kProtoKPI_v1 ?
5965 proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
5966 if (arpp == NULL) {
5967 result = ENOTSUP;
5968 } else {
5969 switch (arpop) {
5970 case ARPOP_REQUEST:
5971 arpstat.txrequests++;
5972 if (target_hw != NULL) {
5973 arpstat.txurequests++;
5974 }
5975 break;
5976 case ARPOP_REPLY:
5977 arpstat.txreplies++;
5978 break;
5979 }
5980 result = arpp(ifp, arpop, sender_hw, sender_proto,
5981 target_hw, target_proto);
5982 }
5983 if_proto_free(proto);
5984 }
5985
5986 return result;
5987 }
5988
5989 struct net_thread_marks { };
5990 static const struct net_thread_marks net_thread_marks_base = { };
5991
5992 __private_extern__ const net_thread_marks_t net_thread_marks_none =
5993 &net_thread_marks_base;
5994
5995 __private_extern__ net_thread_marks_t
5996 net_thread_marks_push(u_int32_t push)
5997 {
5998 static const char *const base = (const void*)&net_thread_marks_base;
5999 u_int32_t pop = 0;
6000
6001 if (push != 0) {
6002 struct uthread *uth = get_bsdthread_info(current_thread());
6003
6004 pop = push & ~uth->uu_network_marks;
6005 if (pop != 0) {
6006 uth->uu_network_marks |= pop;
6007 }
6008 }
6009
6010 return (net_thread_marks_t)&base[pop];
6011 }
6012
6013 __private_extern__ net_thread_marks_t
6014 net_thread_unmarks_push(u_int32_t unpush)
6015 {
6016 static const char *const base = (const void*)&net_thread_marks_base;
6017 u_int32_t unpop = 0;
6018
6019 if (unpush != 0) {
6020 struct uthread *uth = get_bsdthread_info(current_thread());
6021
6022 unpop = unpush & uth->uu_network_marks;
6023 if (unpop != 0) {
6024 uth->uu_network_marks &= ~unpop;
6025 }
6026 }
6027
6028 return (net_thread_marks_t)&base[unpop];
6029 }
6030
6031 __private_extern__ void
6032 net_thread_marks_pop(net_thread_marks_t popx)
6033 {
6034 static const char *const base = (const void*)&net_thread_marks_base;
6035 const ptrdiff_t pop = (const char *)popx - (const char *)base;
6036
6037 if (pop != 0) {
6038 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
6039 struct uthread *uth = get_bsdthread_info(current_thread());
6040
6041 VERIFY((pop & ones) == pop);
6042 VERIFY((ptrdiff_t)(uth->uu_network_marks & pop) == pop);
6043 uth->uu_network_marks &= ~pop;
6044 }
6045 }
6046
6047 __private_extern__ void
6048 net_thread_unmarks_pop(net_thread_marks_t unpopx)
6049 {
6050 static const char *const base = (const void*)&net_thread_marks_base;
6051 ptrdiff_t unpop = (const char *)unpopx - (const char *)base;
6052
6053 if (unpop != 0) {
6054 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
6055 struct uthread *uth = get_bsdthread_info(current_thread());
6056
6057 VERIFY((unpop & ones) == unpop);
6058 VERIFY((ptrdiff_t)(uth->uu_network_marks & unpop) == 0);
6059 uth->uu_network_marks |= unpop;
6060 }
6061 }
6062
6063 __private_extern__ u_int32_t
6064 net_thread_is_marked(u_int32_t check)
6065 {
6066 if (check != 0) {
6067 struct uthread *uth = get_bsdthread_info(current_thread());
6068 return uth->uu_network_marks & check;
6069 } else {
6070 return 0;
6071 }
6072 }
6073
6074 __private_extern__ u_int32_t
6075 net_thread_is_unmarked(u_int32_t check)
6076 {
6077 if (check != 0) {
6078 struct uthread *uth = get_bsdthread_info(current_thread());
6079 return ~uth->uu_network_marks & check;
6080 } else {
6081 return 0;
6082 }
6083 }
6084
6085 static __inline__ int
6086 _is_announcement(const struct sockaddr_in * sender_sin,
6087 const struct sockaddr_in * target_sin)
6088 {
6089 if (target_sin == NULL || sender_sin == NULL) {
6090 return FALSE;
6091 }
6092
6093 return sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr;
6094 }
6095
6096 __private_extern__ errno_t
6097 dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
6098 const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw,
6099 const struct sockaddr *target_proto0, u_int32_t rtflags)
6100 {
6101 errno_t result = 0;
6102 const struct sockaddr_in * sender_sin;
6103 const struct sockaddr_in * target_sin;
6104 struct sockaddr_inarp target_proto_sinarp;
6105 struct sockaddr *target_proto = (void *)(uintptr_t)target_proto0;
6106
6107 if (target_proto == NULL || sender_proto == NULL) {
6108 return EINVAL;
6109 }
6110
6111 if (sender_proto->sa_family != target_proto->sa_family) {
6112 return EINVAL;
6113 }
6114
6115 /*
6116 * If the target is a (default) router, provide that
6117 * information to the send_arp callback routine.
6118 */
6119 if (rtflags & RTF_ROUTER) {
6120 bcopy(target_proto, &target_proto_sinarp,
6121 sizeof(struct sockaddr_in));
6122 target_proto_sinarp.sin_other |= SIN_ROUTER;
6123 target_proto = (struct sockaddr *)&target_proto_sinarp;
6124 }
6125
6126 /*
6127 * If this is an ARP request and the target IP is IPv4LL,
6128 * send the request on all interfaces. The exception is
6129 * an announcement, which must only appear on the specific
6130 * interface.
6131 */
6132 sender_sin = (struct sockaddr_in *)(void *)(uintptr_t)sender_proto;
6133 target_sin = (struct sockaddr_in *)(void *)(uintptr_t)target_proto;
6134 if (target_proto->sa_family == AF_INET &&
6135 IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
6136 ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
6137 !_is_announcement(sender_sin, target_sin)) {
6138 ifnet_t *ifp_list;
6139 u_int32_t count;
6140 u_int32_t ifp_on;
6141
6142 result = ENOTSUP;
6143
6144 if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
6145 for (ifp_on = 0; ifp_on < count; ifp_on++) {
6146 errno_t new_result;
6147 ifaddr_t source_hw = NULL;
6148 ifaddr_t source_ip = NULL;
6149 struct sockaddr_in source_ip_copy;
6150 struct ifnet *cur_ifp = ifp_list[ifp_on];
6151
6152 /*
6153 * Only arp on interfaces marked for IPv4LL
6154 * ARPing. This may mean that we don't ARP on
6155 * the interface the subnet route points to.
6156 */
6157 if (!(cur_ifp->if_eflags & IFEF_ARPLL)) {
6158 continue;
6159 }
6160
6161 /* Find the source IP address */
6162 ifnet_lock_shared(cur_ifp);
6163 source_hw = cur_ifp->if_lladdr;
6164 TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
6165 ifa_link) {
6166 IFA_LOCK(source_ip);
6167 if (source_ip->ifa_addr != NULL &&
6168 source_ip->ifa_addr->sa_family ==
6169 AF_INET) {
6170 /* Copy the source IP address */
6171 source_ip_copy =
6172 *(struct sockaddr_in *)
6173 (void *)source_ip->ifa_addr;
6174 IFA_UNLOCK(source_ip);
6175 break;
6176 }
6177 IFA_UNLOCK(source_ip);
6178 }
6179
6180 /* No IP Source, don't arp */
6181 if (source_ip == NULL) {
6182 ifnet_lock_done(cur_ifp);
6183 continue;
6184 }
6185
6186 IFA_ADDREF(source_hw);
6187 ifnet_lock_done(cur_ifp);
6188
6189 /* Send the ARP */
6190 new_result = dlil_send_arp_internal(cur_ifp,
6191 arpop, (struct sockaddr_dl *)(void *)
6192 source_hw->ifa_addr,
6193 (struct sockaddr *)&source_ip_copy, NULL,
6194 target_proto);
6195
6196 IFA_REMREF(source_hw);
6197 if (result == ENOTSUP) {
6198 result = new_result;
6199 }
6200 }
6201 ifnet_list_free(ifp_list);
6202 }
6203 } else {
6204 result = dlil_send_arp_internal(ifp, arpop, sender_hw,
6205 sender_proto, target_hw, target_proto);
6206 }
6207
6208 return result;
6209 }
6210
6211 /*
6212 * Caller must hold ifnet head lock.
6213 */
6214 static int
6215 ifnet_lookup(struct ifnet *ifp)
6216 {
6217 struct ifnet *_ifp;
6218
6219 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_HELD);
6220 TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
6221 if (_ifp == ifp) {
6222 break;
6223 }
6224 }
6225 return _ifp != NULL;
6226 }
6227
6228 /*
6229 * Caller has to pass a non-zero refio argument to get a
6230 * IO reference count. This will prevent ifnet_detach from
6231 * being called when there are outstanding io reference counts.
6232 */
6233 int
6234 ifnet_is_attached(struct ifnet *ifp, int refio)
6235 {
6236 int ret;
6237
6238 lck_mtx_lock_spin(&ifp->if_ref_lock);
6239 if ((ret = IF_FULLY_ATTACHED(ifp))) {
6240 if (refio > 0) {
6241 ifp->if_refio++;
6242 }
6243 }
6244 lck_mtx_unlock(&ifp->if_ref_lock);
6245
6246 return ret;
6247 }
6248
6249 void
6250 ifnet_incr_pending_thread_count(struct ifnet *ifp)
6251 {
6252 lck_mtx_lock_spin(&ifp->if_ref_lock);
6253 ifp->if_threads_pending++;
6254 lck_mtx_unlock(&ifp->if_ref_lock);
6255 }
6256
6257 void
6258 ifnet_decr_pending_thread_count(struct ifnet *ifp)
6259 {
6260 lck_mtx_lock_spin(&ifp->if_ref_lock);
6261 VERIFY(ifp->if_threads_pending > 0);
6262 ifp->if_threads_pending--;
6263 if (ifp->if_threads_pending == 0) {
6264 wakeup(&ifp->if_threads_pending);
6265 }
6266 lck_mtx_unlock(&ifp->if_ref_lock);
6267 }
6268
6269 /*
6270 * Caller must ensure the interface is attached; the assumption is that
6271 * there is at least an outstanding IO reference count held already.
6272 * Most callers would call ifnet_is_{attached,data_ready}() instead.
6273 */
6274 void
6275 ifnet_incr_iorefcnt(struct ifnet *ifp)
6276 {
6277 lck_mtx_lock_spin(&ifp->if_ref_lock);
6278 VERIFY(IF_FULLY_ATTACHED(ifp));
6279 VERIFY(ifp->if_refio > 0);
6280 ifp->if_refio++;
6281 lck_mtx_unlock(&ifp->if_ref_lock);
6282 }
6283
6284 __attribute__((always_inline))
6285 static void
6286 ifnet_decr_iorefcnt_locked(struct ifnet *ifp)
6287 {
6288 LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_MTX_ASSERT_OWNED);
6289
6290 VERIFY(ifp->if_refio > 0);
6291 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
6292
6293 ifp->if_refio--;
6294 VERIFY(ifp->if_refio != 0 || ifp->if_datamov == 0);
6295
6296 /*
6297 * if there are no more outstanding io references, wakeup the
6298 * ifnet_detach thread if detaching flag is set.
6299 */
6300 if (ifp->if_refio == 0 && (ifp->if_refflags & IFRF_DETACHING)) {
6301 wakeup(&(ifp->if_refio));
6302 }
6303 }
6304
6305 void
6306 ifnet_decr_iorefcnt(struct ifnet *ifp)
6307 {
6308 lck_mtx_lock_spin(&ifp->if_ref_lock);
6309 ifnet_decr_iorefcnt_locked(ifp);
6310 lck_mtx_unlock(&ifp->if_ref_lock);
6311 }
6312
6313 boolean_t
6314 ifnet_datamov_begin(struct ifnet *ifp)
6315 {
6316 boolean_t ret;
6317
6318 lck_mtx_lock_spin(&ifp->if_ref_lock);
6319 if ((ret = IF_FULLY_ATTACHED_AND_READY(ifp))) {
6320 ifp->if_refio++;
6321 ifp->if_datamov++;
6322 }
6323 lck_mtx_unlock(&ifp->if_ref_lock);
6324
6325 return ret;
6326 }
6327
6328 void
6329 ifnet_datamov_end(struct ifnet *ifp)
6330 {
6331 lck_mtx_lock_spin(&ifp->if_ref_lock);
6332 VERIFY(ifp->if_datamov > 0);
6333 /*
6334 * if there's no more thread moving data, wakeup any
6335 * drainers that's blocked waiting for this.
6336 */
6337 if (--ifp->if_datamov == 0 && ifp->if_drainers > 0) {
6338 wakeup(&(ifp->if_datamov));
6339 }
6340 ifnet_decr_iorefcnt_locked(ifp);
6341 lck_mtx_unlock(&ifp->if_ref_lock);
6342 }
6343
6344 void
6345 ifnet_datamov_suspend(struct ifnet *ifp)
6346 {
6347 lck_mtx_lock_spin(&ifp->if_ref_lock);
6348 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
6349 ifp->if_refio++;
6350 if (ifp->if_suspend++ == 0) {
6351 VERIFY(ifp->if_refflags & IFRF_READY);
6352 ifp->if_refflags &= ~IFRF_READY;
6353 }
6354 lck_mtx_unlock(&ifp->if_ref_lock);
6355 }
6356
6357 void
6358 ifnet_datamov_drain(struct ifnet *ifp)
6359 {
6360 lck_mtx_lock(&ifp->if_ref_lock);
6361 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
6362 /* data movement must already be suspended */
6363 VERIFY(ifp->if_suspend > 0);
6364 VERIFY(!(ifp->if_refflags & IFRF_READY));
6365 ifp->if_drainers++;
6366 while (ifp->if_datamov != 0) {
6367 (void) msleep(&(ifp->if_datamov), &ifp->if_ref_lock,
6368 (PZERO - 1), __func__, NULL);
6369 }
6370 VERIFY(!(ifp->if_refflags & IFRF_READY));
6371 VERIFY(ifp->if_drainers > 0);
6372 ifp->if_drainers--;
6373 lck_mtx_unlock(&ifp->if_ref_lock);
6374
6375 /* purge the interface queues */
6376 if ((ifp->if_eflags & IFEF_TXSTART) != 0) {
6377 if_qflush(ifp, 0);
6378 }
6379 }
6380
6381 void
6382 ifnet_datamov_resume(struct ifnet *ifp)
6383 {
6384 lck_mtx_lock(&ifp->if_ref_lock);
6385 /* data movement must already be suspended */
6386 VERIFY(ifp->if_suspend > 0);
6387 if (--ifp->if_suspend == 0) {
6388 VERIFY(!(ifp->if_refflags & IFRF_READY));
6389 ifp->if_refflags |= IFRF_READY;
6390 }
6391 ifnet_decr_iorefcnt_locked(ifp);
6392 lck_mtx_unlock(&ifp->if_ref_lock);
6393 }
6394
6395 static void
6396 dlil_if_trace(struct dlil_ifnet *dl_if, int refhold)
6397 {
6398 struct dlil_ifnet_dbg *dl_if_dbg = (struct dlil_ifnet_dbg *)dl_if;
6399 ctrace_t *tr;
6400 u_int32_t idx;
6401 u_int16_t *cnt;
6402
6403 if (!(dl_if->dl_if_flags & DLIF_DEBUG)) {
6404 panic("%s: dl_if %p has no debug structure", __func__, dl_if);
6405 /* NOTREACHED */
6406 }
6407
6408 if (refhold) {
6409 cnt = &dl_if_dbg->dldbg_if_refhold_cnt;
6410 tr = dl_if_dbg->dldbg_if_refhold;
6411 } else {
6412 cnt = &dl_if_dbg->dldbg_if_refrele_cnt;
6413 tr = dl_if_dbg->dldbg_if_refrele;
6414 }
6415
6416 idx = atomic_add_16_ov(cnt, 1) % IF_REF_TRACE_HIST_SIZE;
6417 ctrace_record(&tr[idx]);
6418 }
6419
6420 errno_t
6421 dlil_if_ref(struct ifnet *ifp)
6422 {
6423 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
6424
6425 if (dl_if == NULL) {
6426 return EINVAL;
6427 }
6428
6429 lck_mtx_lock_spin(&dl_if->dl_if_lock);
6430 ++dl_if->dl_if_refcnt;
6431 if (dl_if->dl_if_refcnt == 0) {
6432 panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
6433 /* NOTREACHED */
6434 }
6435 if (dl_if->dl_if_trace != NULL) {
6436 (*dl_if->dl_if_trace)(dl_if, TRUE);
6437 }
6438 lck_mtx_unlock(&dl_if->dl_if_lock);
6439
6440 return 0;
6441 }
6442
6443 errno_t
6444 dlil_if_free(struct ifnet *ifp)
6445 {
6446 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
6447 bool need_release = FALSE;
6448
6449 if (dl_if == NULL) {
6450 return EINVAL;
6451 }
6452
6453 lck_mtx_lock_spin(&dl_if->dl_if_lock);
6454 switch (dl_if->dl_if_refcnt) {
6455 case 0:
6456 panic("%s: negative refcnt for ifp=%p", __func__, ifp);
6457 /* NOTREACHED */
6458 break;
6459 case 1:
6460 if ((ifp->if_refflags & IFRF_EMBRYONIC) != 0) {
6461 need_release = TRUE;
6462 }
6463 break;
6464 default:
6465 break;
6466 }
6467 --dl_if->dl_if_refcnt;
6468 if (dl_if->dl_if_trace != NULL) {
6469 (*dl_if->dl_if_trace)(dl_if, FALSE);
6470 }
6471 lck_mtx_unlock(&dl_if->dl_if_lock);
6472 if (need_release) {
6473 dlil_if_release(ifp);
6474 }
6475 return 0;
6476 }
6477
6478 static errno_t
6479 dlil_attach_protocol_internal(struct if_proto *proto,
6480 const struct ifnet_demux_desc *demux_list, u_int32_t demux_count,
6481 uint32_t * proto_count)
6482 {
6483 struct kev_dl_proto_data ev_pr_data;
6484 struct ifnet *ifp = proto->ifp;
6485 int retval = 0;
6486 u_int32_t hash_value = proto_hash_value(proto->protocol_family);
6487 struct if_proto *prev_proto;
6488 struct if_proto *_proto;
6489
6490 /* callee holds a proto refcnt upon success */
6491 ifnet_lock_exclusive(ifp);
6492 _proto = find_attached_proto(ifp, proto->protocol_family);
6493 if (_proto != NULL) {
6494 ifnet_lock_done(ifp);
6495 if_proto_free(_proto);
6496 return EEXIST;
6497 }
6498
6499 /*
6500 * Call family module add_proto routine so it can refine the
6501 * demux descriptors as it wishes.
6502 */
6503 retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
6504 demux_count);
6505 if (retval) {
6506 ifnet_lock_done(ifp);
6507 return retval;
6508 }
6509
6510 /*
6511 * Insert the protocol in the hash
6512 */
6513 prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
6514 while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL) {
6515 prev_proto = SLIST_NEXT(prev_proto, next_hash);
6516 }
6517 if (prev_proto) {
6518 SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
6519 } else {
6520 SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
6521 proto, next_hash);
6522 }
6523
6524 /* hold a proto refcnt for attach */
6525 if_proto_ref(proto);
6526
6527 /*
6528 * The reserved field carries the number of protocol still attached
6529 * (subject to change)
6530 */
6531 ev_pr_data.proto_family = proto->protocol_family;
6532 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
6533
6534 ifnet_lock_done(ifp);
6535
6536 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
6537 (struct net_event_data *)&ev_pr_data,
6538 sizeof(struct kev_dl_proto_data));
6539 if (proto_count != NULL) {
6540 *proto_count = ev_pr_data.proto_remaining_count;
6541 }
6542 return retval;
6543 }
6544
6545 errno_t
6546 ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
6547 const struct ifnet_attach_proto_param *proto_details)
6548 {
6549 int retval = 0;
6550 struct if_proto *ifproto = NULL;
6551 uint32_t proto_count = 0;
6552
6553 ifnet_head_lock_shared();
6554 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
6555 retval = EINVAL;
6556 goto end;
6557 }
6558 /* Check that the interface is in the global list */
6559 if (!ifnet_lookup(ifp)) {
6560 retval = ENXIO;
6561 goto end;
6562 }
6563
6564 ifproto = zalloc(dlif_proto_zone);
6565 if (ifproto == NULL) {
6566 retval = ENOMEM;
6567 goto end;
6568 }
6569 bzero(ifproto, dlif_proto_size);
6570
6571 /* refcnt held above during lookup */
6572 ifproto->ifp = ifp;
6573 ifproto->protocol_family = protocol;
6574 ifproto->proto_kpi = kProtoKPI_v1;
6575 ifproto->kpi.v1.input = proto_details->input;
6576 ifproto->kpi.v1.pre_output = proto_details->pre_output;
6577 ifproto->kpi.v1.event = proto_details->event;
6578 ifproto->kpi.v1.ioctl = proto_details->ioctl;
6579 ifproto->kpi.v1.detached = proto_details->detached;
6580 ifproto->kpi.v1.resolve_multi = proto_details->resolve;
6581 ifproto->kpi.v1.send_arp = proto_details->send_arp;
6582
6583 retval = dlil_attach_protocol_internal(ifproto,
6584 proto_details->demux_list, proto_details->demux_count,
6585 &proto_count);
6586
6587 end:
6588 if (retval != 0 && retval != EEXIST) {
6589 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
6590 ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
6591 } else {
6592 if (dlil_verbose) {
6593 DLIL_PRINTF("%s: attached v1 protocol %d (count = %d)\n",
6594 ifp != NULL ? if_name(ifp) : "N/A",
6595 protocol, proto_count);
6596 }
6597 }
6598 ifnet_head_done();
6599 if (retval == 0) {
6600 /*
6601 * A protocol has been attached, mark the interface up.
6602 * This used to be done by configd.KernelEventMonitor, but that
6603 * is inherently prone to races (rdar://problem/30810208).
6604 */
6605 (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
6606 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
6607 dlil_post_sifflags_msg(ifp);
6608 } else if (ifproto != NULL) {
6609 zfree(dlif_proto_zone, ifproto);
6610 }
6611 return retval;
6612 }
6613
6614 errno_t
6615 ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
6616 const struct ifnet_attach_proto_param_v2 *proto_details)
6617 {
6618 int retval = 0;
6619 struct if_proto *ifproto = NULL;
6620 uint32_t proto_count = 0;
6621
6622 ifnet_head_lock_shared();
6623 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
6624 retval = EINVAL;
6625 goto end;
6626 }
6627 /* Check that the interface is in the global list */
6628 if (!ifnet_lookup(ifp)) {
6629 retval = ENXIO;
6630 goto end;
6631 }
6632
6633 ifproto = zalloc(dlif_proto_zone);
6634 if (ifproto == NULL) {
6635 retval = ENOMEM;
6636 goto end;
6637 }
6638 bzero(ifproto, sizeof(*ifproto));
6639
6640 /* refcnt held above during lookup */
6641 ifproto->ifp = ifp;
6642 ifproto->protocol_family = protocol;
6643 ifproto->proto_kpi = kProtoKPI_v2;
6644 ifproto->kpi.v2.input = proto_details->input;
6645 ifproto->kpi.v2.pre_output = proto_details->pre_output;
6646 ifproto->kpi.v2.event = proto_details->event;
6647 ifproto->kpi.v2.ioctl = proto_details->ioctl;
6648 ifproto->kpi.v2.detached = proto_details->detached;
6649 ifproto->kpi.v2.resolve_multi = proto_details->resolve;
6650 ifproto->kpi.v2.send_arp = proto_details->send_arp;
6651
6652 retval = dlil_attach_protocol_internal(ifproto,
6653 proto_details->demux_list, proto_details->demux_count,
6654 &proto_count);
6655
6656 end:
6657 if (retval != 0 && retval != EEXIST) {
6658 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
6659 ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
6660 } else {
6661 if (dlil_verbose) {
6662 DLIL_PRINTF("%s: attached v2 protocol %d (count = %d)\n",
6663 ifp != NULL ? if_name(ifp) : "N/A",
6664 protocol, proto_count);
6665 }
6666 }
6667 ifnet_head_done();
6668 if (retval == 0) {
6669 /*
6670 * A protocol has been attached, mark the interface up.
6671 * This used to be done by configd.KernelEventMonitor, but that
6672 * is inherently prone to races (rdar://problem/30810208).
6673 */
6674 (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
6675 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
6676 dlil_post_sifflags_msg(ifp);
6677 } else if (ifproto != NULL) {
6678 zfree(dlif_proto_zone, ifproto);
6679 }
6680 return retval;
6681 }
6682
6683 errno_t
6684 ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
6685 {
6686 struct if_proto *proto = NULL;
6687 int retval = 0;
6688
6689 if (ifp == NULL || proto_family == 0) {
6690 retval = EINVAL;
6691 goto end;
6692 }
6693
6694 ifnet_lock_exclusive(ifp);
6695 /* callee holds a proto refcnt upon success */
6696 proto = find_attached_proto(ifp, proto_family);
6697 if (proto == NULL) {
6698 retval = ENXIO;
6699 ifnet_lock_done(ifp);
6700 goto end;
6701 }
6702
6703 /* call family module del_proto */
6704 if (ifp->if_del_proto) {
6705 ifp->if_del_proto(ifp, proto->protocol_family);
6706 }
6707
6708 SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
6709 proto, if_proto, next_hash);
6710
6711 if (proto->proto_kpi == kProtoKPI_v1) {
6712 proto->kpi.v1.input = ifproto_media_input_v1;
6713 proto->kpi.v1.pre_output = ifproto_media_preout;
6714 proto->kpi.v1.event = ifproto_media_event;
6715 proto->kpi.v1.ioctl = ifproto_media_ioctl;
6716 proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
6717 proto->kpi.v1.send_arp = ifproto_media_send_arp;
6718 } else {
6719 proto->kpi.v2.input = ifproto_media_input_v2;
6720 proto->kpi.v2.pre_output = ifproto_media_preout;
6721 proto->kpi.v2.event = ifproto_media_event;
6722 proto->kpi.v2.ioctl = ifproto_media_ioctl;
6723 proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
6724 proto->kpi.v2.send_arp = ifproto_media_send_arp;
6725 }
6726 proto->detached = 1;
6727 ifnet_lock_done(ifp);
6728
6729 if (dlil_verbose) {
6730 DLIL_PRINTF("%s: detached %s protocol %d\n", if_name(ifp),
6731 (proto->proto_kpi == kProtoKPI_v1) ?
6732 "v1" : "v2", proto_family);
6733 }
6734
6735 /* release proto refcnt held during protocol attach */
6736 if_proto_free(proto);
6737
6738 /*
6739 * Release proto refcnt held during lookup; the rest of
6740 * protocol detach steps will happen when the last proto
6741 * reference is released.
6742 */
6743 if_proto_free(proto);
6744
6745 end:
6746 return retval;
6747 }
6748
6749
6750 static errno_t
6751 ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
6752 struct mbuf *packet, char *header)
6753 {
6754 #pragma unused(ifp, protocol, packet, header)
6755 return ENXIO;
6756 }
6757
6758 static errno_t
6759 ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
6760 struct mbuf *packet)
6761 {
6762 #pragma unused(ifp, protocol, packet)
6763 return ENXIO;
6764 }
6765
6766 static errno_t
6767 ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
6768 mbuf_t *packet, const struct sockaddr *dest, void *route, char *frame_type,
6769 char *link_layer_dest)
6770 {
6771 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
6772 return ENXIO;
6773 }
6774
6775 static void
6776 ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
6777 const struct kev_msg *event)
6778 {
6779 #pragma unused(ifp, protocol, event)
6780 }
6781
6782 static errno_t
6783 ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
6784 unsigned long command, void *argument)
6785 {
6786 #pragma unused(ifp, protocol, command, argument)
6787 return ENXIO;
6788 }
6789
6790 static errno_t
6791 ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
6792 struct sockaddr_dl *out_ll, size_t ll_len)
6793 {
6794 #pragma unused(ifp, proto_addr, out_ll, ll_len)
6795 return ENXIO;
6796 }
6797
6798 static errno_t
6799 ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
6800 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
6801 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
6802 {
6803 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
6804 return ENXIO;
6805 }
6806
6807 extern int if_next_index(void);
6808 extern int tcp_ecn_outbound;
6809
6810 errno_t
6811 ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
6812 {
6813 struct ifnet *tmp_if;
6814 struct ifaddr *ifa;
6815 struct if_data_internal if_data_saved;
6816 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
6817 struct dlil_threading_info *dl_inp;
6818 u_int32_t sflags = 0;
6819 int err;
6820
6821 if (ifp == NULL) {
6822 return EINVAL;
6823 }
6824
6825 /*
6826 * Serialize ifnet attach using dlil_ifnet_lock, in order to
6827 * prevent the interface from being configured while it is
6828 * embryonic, as ifnet_head_lock is dropped and reacquired
6829 * below prior to marking the ifnet with IFRF_ATTACHED.
6830 */
6831 dlil_if_lock();
6832 ifnet_head_lock_exclusive();
6833 /* Verify we aren't already on the list */
6834 TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
6835 if (tmp_if == ifp) {
6836 ifnet_head_done();
6837 dlil_if_unlock();
6838 return EEXIST;
6839 }
6840 }
6841
6842 lck_mtx_lock_spin(&ifp->if_ref_lock);
6843 if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
6844 panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
6845 __func__, ifp);
6846 /* NOTREACHED */
6847 }
6848 lck_mtx_unlock(&ifp->if_ref_lock);
6849
6850 ifnet_lock_exclusive(ifp);
6851
6852 /* Sanity check */
6853 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
6854 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
6855 VERIFY(ifp->if_threads_pending == 0);
6856
6857 if (ll_addr != NULL) {
6858 if (ifp->if_addrlen == 0) {
6859 ifp->if_addrlen = ll_addr->sdl_alen;
6860 } else if (ll_addr->sdl_alen != ifp->if_addrlen) {
6861 ifnet_lock_done(ifp);
6862 ifnet_head_done();
6863 dlil_if_unlock();
6864 return EINVAL;
6865 }
6866 }
6867
6868 /*
6869 * Allow interfaces without protocol families to attach
6870 * only if they have the necessary fields filled out.
6871 */
6872 if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
6873 DLIL_PRINTF("%s: Attempt to attach interface without "
6874 "family module - %d\n", __func__, ifp->if_family);
6875 ifnet_lock_done(ifp);
6876 ifnet_head_done();
6877 dlil_if_unlock();
6878 return ENODEV;
6879 }
6880
6881 /* Allocate protocol hash table */
6882 VERIFY(ifp->if_proto_hash == NULL);
6883 ifp->if_proto_hash = zalloc(dlif_phash_zone);
6884 if (ifp->if_proto_hash == NULL) {
6885 ifnet_lock_done(ifp);
6886 ifnet_head_done();
6887 dlil_if_unlock();
6888 return ENOBUFS;
6889 }
6890 bzero(ifp->if_proto_hash, dlif_phash_size);
6891
6892 lck_mtx_lock_spin(&ifp->if_flt_lock);
6893 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
6894 TAILQ_INIT(&ifp->if_flt_head);
6895 VERIFY(ifp->if_flt_busy == 0);
6896 VERIFY(ifp->if_flt_waiters == 0);
6897 lck_mtx_unlock(&ifp->if_flt_lock);
6898
6899 if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
6900 VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
6901 LIST_INIT(&ifp->if_multiaddrs);
6902 }
6903
6904 VERIFY(ifp->if_allhostsinm == NULL);
6905 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
6906 TAILQ_INIT(&ifp->if_addrhead);
6907
6908 if (ifp->if_index == 0) {
6909 int idx = if_next_index();
6910
6911 if (idx == -1) {
6912 ifp->if_index = 0;
6913 ifnet_lock_done(ifp);
6914 ifnet_head_done();
6915 dlil_if_unlock();
6916 return ENOBUFS;
6917 }
6918 ifp->if_index = idx;
6919 }
6920 /* There should not be anything occupying this slot */
6921 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
6922
6923 /* allocate (if needed) and initialize a link address */
6924 ifa = dlil_alloc_lladdr(ifp, ll_addr);
6925 if (ifa == NULL) {
6926 ifnet_lock_done(ifp);
6927 ifnet_head_done();
6928 dlil_if_unlock();
6929 return ENOBUFS;
6930 }
6931
6932 VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
6933 ifnet_addrs[ifp->if_index - 1] = ifa;
6934
6935 /* make this address the first on the list */
6936 IFA_LOCK(ifa);
6937 /* hold a reference for ifnet_addrs[] */
6938 IFA_ADDREF_LOCKED(ifa);
6939 /* if_attach_link_ifa() holds a reference for ifa_link */
6940 if_attach_link_ifa(ifp, ifa);
6941 IFA_UNLOCK(ifa);
6942
6943 #if CONFIG_MACF_NET
6944 mac_ifnet_label_associate(ifp);
6945 #endif
6946
6947 TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
6948 ifindex2ifnet[ifp->if_index] = ifp;
6949
6950 /* Hold a reference to the underlying dlil_ifnet */
6951 ifnet_reference(ifp);
6952
6953 /* Clear stats (save and restore other fields that we care) */
6954 if_data_saved = ifp->if_data;
6955 bzero(&ifp->if_data, sizeof(ifp->if_data));
6956 ifp->if_data.ifi_type = if_data_saved.ifi_type;
6957 ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
6958 ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
6959 ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
6960 ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
6961 ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
6962 ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
6963 ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
6964 ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
6965 ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
6966 ifnet_touch_lastchange(ifp);
6967
6968 VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
6969 ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED ||
6970 ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL);
6971
6972 /* By default, use SFB and enable flow advisory */
6973 sflags = PKTSCHEDF_QALG_SFB;
6974 if (if_flowadv) {
6975 sflags |= PKTSCHEDF_QALG_FLOWCTL;
6976 }
6977
6978 if (if_delaybased_queue) {
6979 sflags |= PKTSCHEDF_QALG_DELAYBASED;
6980 }
6981
6982 if (ifp->if_output_sched_model ==
6983 IFNET_SCHED_MODEL_DRIVER_MANAGED) {
6984 sflags |= PKTSCHEDF_QALG_DRIVER_MANAGED;
6985 }
6986
6987 /* Initialize transmit queue(s) */
6988 err = ifclassq_setup(ifp, sflags, (dl_if->dl_if_flags & DLIF_REUSE));
6989 if (err != 0) {
6990 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
6991 "err=%d", __func__, ifp, err);
6992 /* NOTREACHED */
6993 }
6994
6995 /* Sanity checks on the input thread storage */
6996 dl_inp = &dl_if->dl_if_inpstorage;
6997 bzero(&dl_inp->stats, sizeof(dl_inp->stats));
6998 VERIFY(dl_inp->input_waiting == 0);
6999 VERIFY(dl_inp->wtot == 0);
7000 VERIFY(dl_inp->ifp == NULL);
7001 VERIFY(qhead(&dl_inp->rcvq_pkts) == NULL && qempty(&dl_inp->rcvq_pkts));
7002 VERIFY(qlimit(&dl_inp->rcvq_pkts) == 0);
7003 VERIFY(!dl_inp->net_affinity);
7004 VERIFY(ifp->if_inp == NULL);
7005 VERIFY(dl_inp->input_thr == THREAD_NULL);
7006 VERIFY(dl_inp->wloop_thr == THREAD_NULL);
7007 VERIFY(dl_inp->poll_thr == THREAD_NULL);
7008 VERIFY(dl_inp->tag == 0);
7009
7010 #if IFNET_INPUT_SANITY_CHK
7011 VERIFY(dl_inp->input_mbuf_cnt == 0);
7012 #endif /* IFNET_INPUT_SANITY_CHK */
7013
7014 VERIFY(ifp->if_poll_thread == THREAD_NULL);
7015 dlil_reset_rxpoll_params(ifp);
7016 /*
7017 * A specific DLIL input thread is created per non-loopback interface.
7018 */
7019 if (ifp->if_family != IFNET_FAMILY_LOOPBACK) {
7020 ifp->if_inp = dl_inp;
7021 ifnet_incr_pending_thread_count(ifp);
7022 err = dlil_create_input_thread(ifp, ifp->if_inp);
7023 if (err != 0) {
7024 panic_plain("%s: ifp=%p couldn't get an input thread; "
7025 "err=%d", __func__, ifp, err);
7026 /* NOTREACHED */
7027 }
7028 }
7029 /*
7030 * If the driver supports the new transmit model, calculate flow hash
7031 * and create a workloop starter thread to invoke the if_start callback
7032 * where the packets may be dequeued and transmitted.
7033 */
7034 if (ifp->if_eflags & IFEF_TXSTART) {
7035 ifp->if_flowhash = ifnet_calc_flowhash(ifp);
7036 VERIFY(ifp->if_flowhash != 0);
7037 VERIFY(ifp->if_start_thread == THREAD_NULL);
7038
7039 ifnet_set_start_cycle(ifp, NULL);
7040 ifp->if_start_active = 0;
7041 ifp->if_start_req = 0;
7042 ifp->if_start_flags = 0;
7043 VERIFY(ifp->if_start != NULL);
7044 ifnet_incr_pending_thread_count(ifp);
7045 if ((err = kernel_thread_start(ifnet_start_thread_func,
7046 ifp, &ifp->if_start_thread)) != KERN_SUCCESS) {
7047 panic_plain("%s: "
7048 "ifp=%p couldn't get a start thread; "
7049 "err=%d", __func__, ifp, err);
7050 /* NOTREACHED */
7051 }
7052 ml_thread_policy(ifp->if_start_thread, MACHINE_GROUP,
7053 (MACHINE_NETWORK_GROUP | MACHINE_NETWORK_WORKLOOP));
7054 } else {
7055 ifp->if_flowhash = 0;
7056 }
7057
7058 /* Reset polling parameters */
7059 ifnet_set_poll_cycle(ifp, NULL);
7060 ifp->if_poll_update = 0;
7061 ifp->if_poll_flags = 0;
7062 ifp->if_poll_req = 0;
7063 VERIFY(ifp->if_poll_thread == THREAD_NULL);
7064
7065 /*
7066 * If the driver supports the new receive model, create a poller
7067 * thread to invoke if_input_poll callback where the packets may
7068 * be dequeued from the driver and processed for reception.
7069 * if the interface is netif compat then the poller thread is managed by netif.
7070 */
7071 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL) &&
7072 (ifp->if_xflags & IFXF_LEGACY)) {
7073 VERIFY(ifp->if_input_poll != NULL);
7074 VERIFY(ifp->if_input_ctl != NULL);
7075 ifnet_incr_pending_thread_count(ifp);
7076 if ((err = kernel_thread_start(ifnet_poll_thread_func, ifp,
7077 &ifp->if_poll_thread)) != KERN_SUCCESS) {
7078 panic_plain("%s: ifp=%p couldn't get a poll thread; "
7079 "err=%d", __func__, ifp, err);
7080 /* NOTREACHED */
7081 }
7082 ml_thread_policy(ifp->if_poll_thread, MACHINE_GROUP,
7083 (MACHINE_NETWORK_GROUP | MACHINE_NETWORK_WORKLOOP));
7084 }
7085
7086 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
7087 VERIFY(ifp->if_desc.ifd_len == 0);
7088 VERIFY(ifp->if_desc.ifd_desc != NULL);
7089
7090 /* Record attach PC stacktrace */
7091 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
7092
7093 ifp->if_updatemcasts = 0;
7094 if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
7095 struct ifmultiaddr *ifma;
7096 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
7097 IFMA_LOCK(ifma);
7098 if (ifma->ifma_addr->sa_family == AF_LINK ||
7099 ifma->ifma_addr->sa_family == AF_UNSPEC) {
7100 ifp->if_updatemcasts++;
7101 }
7102 IFMA_UNLOCK(ifma);
7103 }
7104
7105 DLIL_PRINTF("%s: attached with %d suspended link-layer multicast "
7106 "membership(s)\n", if_name(ifp),
7107 ifp->if_updatemcasts);
7108 }
7109
7110 /* Clear logging parameters */
7111 bzero(&ifp->if_log, sizeof(ifp->if_log));
7112
7113 /* Clear foreground/realtime activity timestamps */
7114 ifp->if_fg_sendts = 0;
7115 ifp->if_rt_sendts = 0;
7116
7117 VERIFY(ifp->if_delegated.ifp == NULL);
7118 VERIFY(ifp->if_delegated.type == 0);
7119 VERIFY(ifp->if_delegated.family == 0);
7120 VERIFY(ifp->if_delegated.subfamily == 0);
7121 VERIFY(ifp->if_delegated.expensive == 0);
7122 VERIFY(ifp->if_delegated.constrained == 0);
7123
7124 VERIFY(ifp->if_agentids == NULL);
7125 VERIFY(ifp->if_agentcount == 0);
7126
7127 /* Reset interface state */
7128 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
7129 ifp->if_interface_state.valid_bitmask |=
7130 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
7131 ifp->if_interface_state.interface_availability =
7132 IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
7133
7134 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
7135 if (ifp == lo_ifp) {
7136 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
7137 ifp->if_interface_state.valid_bitmask |=
7138 IF_INTERFACE_STATE_LQM_STATE_VALID;
7139 } else {
7140 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
7141 }
7142
7143 /*
7144 * Enable ECN capability on this interface depending on the
7145 * value of ECN global setting
7146 */
7147 if (tcp_ecn_outbound == 2 && !IFNET_IS_CELLULAR(ifp)) {
7148 ifp->if_eflags |= IFEF_ECN_ENABLE;
7149 ifp->if_eflags &= ~IFEF_ECN_DISABLE;
7150 }
7151
7152 /*
7153 * Built-in Cyclops always on policy for WiFi infra
7154 */
7155 if (IFNET_IS_WIFI_INFRA(ifp) && net_qos_policy_wifi_enabled != 0) {
7156 errno_t error;
7157
7158 error = if_set_qosmarking_mode(ifp,
7159 IFRTYPE_QOSMARKING_FASTLANE);
7160 if (error != 0) {
7161 DLIL_PRINTF("%s if_set_qosmarking_mode(%s) error %d\n",
7162 __func__, ifp->if_xname, error);
7163 } else {
7164 ifp->if_eflags |= IFEF_QOSMARKING_ENABLED;
7165 #if (DEVELOPMENT || DEBUG)
7166 DLIL_PRINTF("%s fastlane enabled on %s\n",
7167 __func__, ifp->if_xname);
7168 #endif /* (DEVELOPMENT || DEBUG) */
7169 }
7170 }
7171
7172 ifnet_lock_done(ifp);
7173 ifnet_head_done();
7174
7175
7176 lck_mtx_lock(&ifp->if_cached_route_lock);
7177 /* Enable forwarding cached route */
7178 ifp->if_fwd_cacheok = 1;
7179 /* Clean up any existing cached routes */
7180 ROUTE_RELEASE(&ifp->if_fwd_route);
7181 bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
7182 ROUTE_RELEASE(&ifp->if_src_route);
7183 bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
7184 ROUTE_RELEASE(&ifp->if_src_route6);
7185 bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
7186 lck_mtx_unlock(&ifp->if_cached_route_lock);
7187
7188 ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
7189
7190 /*
7191 * Allocate and attach IGMPv3/MLDv2 interface specific variables
7192 * and trees; do this before the ifnet is marked as attached.
7193 * The ifnet keeps the reference to the info structures even after
7194 * the ifnet is detached, since the network-layer records still
7195 * refer to the info structures even after that. This also
7196 * makes it possible for them to still function after the ifnet
7197 * is recycled or reattached.
7198 */
7199 #if INET
7200 if (IGMP_IFINFO(ifp) == NULL) {
7201 IGMP_IFINFO(ifp) = igmp_domifattach(ifp, M_WAITOK);
7202 VERIFY(IGMP_IFINFO(ifp) != NULL);
7203 } else {
7204 VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
7205 igmp_domifreattach(IGMP_IFINFO(ifp));
7206 }
7207 #endif /* INET */
7208 #if INET6
7209 if (MLD_IFINFO(ifp) == NULL) {
7210 MLD_IFINFO(ifp) = mld_domifattach(ifp, M_WAITOK);
7211 VERIFY(MLD_IFINFO(ifp) != NULL);
7212 } else {
7213 VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
7214 mld_domifreattach(MLD_IFINFO(ifp));
7215 }
7216 #endif /* INET6 */
7217
7218 VERIFY(ifp->if_data_threshold == 0);
7219 VERIFY(ifp->if_dt_tcall != NULL);
7220
7221 /*
7222 * Wait for the created kernel threads for I/O to get
7223 * scheduled and run at least once before we proceed
7224 * to mark interface as attached.
7225 */
7226 lck_mtx_lock(&ifp->if_ref_lock);
7227 while (ifp->if_threads_pending != 0) {
7228 DLIL_PRINTF("%s: Waiting for all kernel threads created for "
7229 "interface %s to get scheduled at least once.\n",
7230 __func__, ifp->if_xname);
7231 (void) msleep(&ifp->if_threads_pending, &ifp->if_ref_lock, (PZERO - 1),
7232 __func__, NULL);
7233 LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_ASSERT_OWNED);
7234 }
7235 lck_mtx_unlock(&ifp->if_ref_lock);
7236 DLIL_PRINTF("%s: All kernel threads created for interface %s have been scheduled "
7237 "at least once. Proceeding.\n", __func__, ifp->if_xname);
7238
7239 /* Final mark this ifnet as attached. */
7240 lck_mtx_lock(rnh_lock);
7241 ifnet_lock_exclusive(ifp);
7242 lck_mtx_lock_spin(&ifp->if_ref_lock);
7243 ifp->if_refflags = (IFRF_ATTACHED | IFRF_READY); /* clears embryonic */
7244 lck_mtx_unlock(&ifp->if_ref_lock);
7245 if (net_rtref) {
7246 /* boot-args override; enable idle notification */
7247 (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
7248 IFRF_IDLE_NOTIFY);
7249 } else {
7250 /* apply previous request(s) to set the idle flags, if any */
7251 (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
7252 ifp->if_idle_new_flags_mask);
7253 }
7254 ifnet_lock_done(ifp);
7255 lck_mtx_unlock(rnh_lock);
7256 dlil_if_unlock();
7257
7258 #if PF
7259 /*
7260 * Attach packet filter to this interface, if enabled.
7261 */
7262 pf_ifnet_hook(ifp, 1);
7263 #endif /* PF */
7264
7265 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0);
7266
7267 if (dlil_verbose) {
7268 DLIL_PRINTF("%s: attached%s\n", if_name(ifp),
7269 (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
7270 }
7271
7272 return 0;
7273 }
7274
7275 /*
7276 * Prepare the storage for the first/permanent link address, which must
7277 * must have the same lifetime as the ifnet itself. Although the link
7278 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
7279 * its location in memory must never change as it may still be referred
7280 * to by some parts of the system afterwards (unfortunate implementation
7281 * artifacts inherited from BSD.)
7282 *
7283 * Caller must hold ifnet lock as writer.
7284 */
7285 static struct ifaddr *
7286 dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
7287 {
7288 struct ifaddr *ifa, *oifa;
7289 struct sockaddr_dl *asdl, *msdl;
7290 char workbuf[IFNAMSIZ * 2];
7291 int namelen, masklen, socksize;
7292 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
7293
7294 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
7295 VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);
7296
7297 namelen = scnprintf(workbuf, sizeof(workbuf), "%s",
7298 if_name(ifp));
7299 masklen = offsetof(struct sockaddr_dl, sdl_data[0])
7300 + ((namelen > 0) ? namelen : 0);
7301 socksize = masklen + ifp->if_addrlen;
7302 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
7303 if ((u_int32_t)socksize < sizeof(struct sockaddr_dl)) {
7304 socksize = sizeof(struct sockaddr_dl);
7305 }
7306 socksize = ROUNDUP(socksize);
7307 #undef ROUNDUP
7308
7309 ifa = ifp->if_lladdr;
7310 if (socksize > DLIL_SDLMAXLEN ||
7311 (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) {
7312 /*
7313 * Rare, but in the event that the link address requires
7314 * more storage space than DLIL_SDLMAXLEN, allocate the
7315 * largest possible storages for address and mask, such
7316 * that we can reuse the same space when if_addrlen grows.
7317 * This same space will be used when if_addrlen shrinks.
7318 */
7319 if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
7320 int ifasize = sizeof(*ifa) + 2 * SOCK_MAXADDRLEN;
7321 ifa = _MALLOC(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
7322 if (ifa == NULL) {
7323 return NULL;
7324 }
7325 ifa_lock_init(ifa);
7326 /* Don't set IFD_ALLOC, as this is permanent */
7327 ifa->ifa_debug = IFD_LINK;
7328 }
7329 IFA_LOCK(ifa);
7330 /* address and mask sockaddr_dl locations */
7331 asdl = (struct sockaddr_dl *)(ifa + 1);
7332 bzero(asdl, SOCK_MAXADDRLEN);
7333 msdl = (struct sockaddr_dl *)(void *)
7334 ((char *)asdl + SOCK_MAXADDRLEN);
7335 bzero(msdl, SOCK_MAXADDRLEN);
7336 } else {
7337 VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
7338 /*
7339 * Use the storage areas for address and mask within the
7340 * dlil_ifnet structure. This is the most common case.
7341 */
7342 if (ifa == NULL) {
7343 ifa = &dl_if->dl_if_lladdr.ifa;
7344 ifa_lock_init(ifa);
7345 /* Don't set IFD_ALLOC, as this is permanent */
7346 ifa->ifa_debug = IFD_LINK;
7347 }
7348 IFA_LOCK(ifa);
7349 /* address and mask sockaddr_dl locations */
7350 asdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.asdl;
7351 bzero(asdl, sizeof(dl_if->dl_if_lladdr.asdl));
7352 msdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.msdl;
7353 bzero(msdl, sizeof(dl_if->dl_if_lladdr.msdl));
7354 }
7355
7356 /* hold a permanent reference for the ifnet itself */
7357 IFA_ADDREF_LOCKED(ifa);
7358 oifa = ifp->if_lladdr;
7359 ifp->if_lladdr = ifa;
7360
7361 VERIFY(ifa->ifa_debug == IFD_LINK);
7362 ifa->ifa_ifp = ifp;
7363 ifa->ifa_rtrequest = link_rtrequest;
7364 ifa->ifa_addr = (struct sockaddr *)asdl;
7365 asdl->sdl_len = socksize;
7366 asdl->sdl_family = AF_LINK;
7367 if (namelen > 0) {
7368 bcopy(workbuf, asdl->sdl_data, min(namelen,
7369 sizeof(asdl->sdl_data)));
7370 asdl->sdl_nlen = namelen;
7371 } else {
7372 asdl->sdl_nlen = 0;
7373 }
7374 asdl->sdl_index = ifp->if_index;
7375 asdl->sdl_type = ifp->if_type;
7376 if (ll_addr != NULL) {
7377 asdl->sdl_alen = ll_addr->sdl_alen;
7378 bcopy(CONST_LLADDR(ll_addr), LLADDR(asdl), asdl->sdl_alen);
7379 } else {
7380 asdl->sdl_alen = 0;
7381 }
7382 ifa->ifa_netmask = (struct sockaddr *)msdl;
7383 msdl->sdl_len = masklen;
7384 while (namelen > 0) {
7385 msdl->sdl_data[--namelen] = 0xff;
7386 }
7387 IFA_UNLOCK(ifa);
7388
7389 if (oifa != NULL) {
7390 IFA_REMREF(oifa);
7391 }
7392
7393 return ifa;
7394 }
7395
7396 static void
7397 if_purgeaddrs(struct ifnet *ifp)
7398 {
7399 #if INET
7400 in_purgeaddrs(ifp);
7401 #endif /* INET */
7402 #if INET6
7403 in6_purgeaddrs(ifp);
7404 #endif /* INET6 */
7405 }
7406
7407 errno_t
7408 ifnet_detach(ifnet_t ifp)
7409 {
7410 struct ifnet *delegated_ifp;
7411 struct nd_ifinfo *ndi = NULL;
7412
7413 if (ifp == NULL) {
7414 return EINVAL;
7415 }
7416
7417 ndi = ND_IFINFO(ifp);
7418 if (NULL != ndi) {
7419 ndi->cga_initialized = FALSE;
7420 }
7421
7422 lck_mtx_lock(rnh_lock);
7423 ifnet_head_lock_exclusive();
7424 ifnet_lock_exclusive(ifp);
7425
7426 if (ifp->if_output_netem != NULL) {
7427 netem_destroy(ifp->if_output_netem);
7428 ifp->if_output_netem = NULL;
7429 }
7430
7431 /*
7432 * Check to see if this interface has previously triggered
7433 * aggressive protocol draining; if so, decrement the global
7434 * refcnt and clear PR_AGGDRAIN on the route domain if
7435 * there are no more of such an interface around.
7436 */
7437 (void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
7438
7439 lck_mtx_lock_spin(&ifp->if_ref_lock);
7440 if (!(ifp->if_refflags & IFRF_ATTACHED)) {
7441 lck_mtx_unlock(&ifp->if_ref_lock);
7442 ifnet_lock_done(ifp);
7443 ifnet_head_done();
7444 lck_mtx_unlock(rnh_lock);
7445 return EINVAL;
7446 } else if (ifp->if_refflags & IFRF_DETACHING) {
7447 /* Interface has already been detached */
7448 lck_mtx_unlock(&ifp->if_ref_lock);
7449 ifnet_lock_done(ifp);
7450 ifnet_head_done();
7451 lck_mtx_unlock(rnh_lock);
7452 return ENXIO;
7453 }
7454 VERIFY(!(ifp->if_refflags & IFRF_EMBRYONIC));
7455 /* Indicate this interface is being detached */
7456 ifp->if_refflags &= ~IFRF_ATTACHED;
7457 ifp->if_refflags |= IFRF_DETACHING;
7458 lck_mtx_unlock(&ifp->if_ref_lock);
7459
7460 if (dlil_verbose) {
7461 DLIL_PRINTF("%s: detaching\n", if_name(ifp));
7462 }
7463
7464 /* clean up flow control entry object if there's any */
7465 if (ifp->if_eflags & IFEF_TXSTART) {
7466 ifnet_flowadv(ifp->if_flowhash);
7467 }
7468
7469 /* Reset ECN enable/disable flags */
7470 ifp->if_eflags &= ~IFEF_ECN_DISABLE;
7471 ifp->if_eflags &= ~IFEF_ECN_ENABLE;
7472
7473 /* Reset CLAT46 flag */
7474 ifp->if_eflags &= ~IFEF_CLAT46;
7475
7476 /*
7477 * We do not reset the TCP keep alive counters in case
7478 * a TCP connection stays connection after the interface
7479 * went down
7480 */
7481 if (ifp->if_tcp_kao_cnt > 0) {
7482 os_log(OS_LOG_DEFAULT, "%s %s tcp_kao_cnt %u not zero",
7483 __func__, if_name(ifp), ifp->if_tcp_kao_cnt);
7484 }
7485 ifp->if_tcp_kao_max = 0;
7486
7487 /*
7488 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
7489 * no longer be visible during lookups from this point.
7490 */
7491 VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
7492 TAILQ_REMOVE(&ifnet_head, ifp, if_link);
7493 ifp->if_link.tqe_next = NULL;
7494 ifp->if_link.tqe_prev = NULL;
7495 if (ifp->if_ordered_link.tqe_next != NULL ||
7496 ifp->if_ordered_link.tqe_prev != NULL) {
7497 ifnet_remove_from_ordered_list(ifp);
7498 }
7499 ifindex2ifnet[ifp->if_index] = NULL;
7500
7501 /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */
7502 ifp->if_eflags &= ~(IFEF_IPV4_ROUTER | IFEF_IPV6_ROUTER);
7503
7504 /* Record detach PC stacktrace */
7505 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
7506
7507 /* Clear logging parameters */
7508 bzero(&ifp->if_log, sizeof(ifp->if_log));
7509
7510 /* Clear delegated interface info (reference released below) */
7511 delegated_ifp = ifp->if_delegated.ifp;
7512 bzero(&ifp->if_delegated, sizeof(ifp->if_delegated));
7513
7514 /* Reset interface state */
7515 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
7516
7517 ifnet_lock_done(ifp);
7518 ifnet_head_done();
7519 lck_mtx_unlock(rnh_lock);
7520
7521
7522 /* Release reference held on the delegated interface */
7523 if (delegated_ifp != NULL) {
7524 ifnet_release(delegated_ifp);
7525 }
7526
7527 /* Reset Link Quality Metric (unless loopback [lo0]) */
7528 if (ifp != lo_ifp) {
7529 if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
7530 }
7531
7532 /* Reset TCP local statistics */
7533 if (ifp->if_tcp_stat != NULL) {
7534 bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
7535 }
7536
7537 /* Reset UDP local statistics */
7538 if (ifp->if_udp_stat != NULL) {
7539 bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
7540 }
7541
7542 /* Reset ifnet IPv4 stats */
7543 if (ifp->if_ipv4_stat != NULL) {
7544 bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
7545 }
7546
7547 /* Reset ifnet IPv6 stats */
7548 if (ifp->if_ipv6_stat != NULL) {
7549 bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
7550 }
7551
7552 /* Release memory held for interface link status report */
7553 if (ifp->if_link_status != NULL) {
7554 FREE(ifp->if_link_status, M_TEMP);
7555 ifp->if_link_status = NULL;
7556 }
7557
7558 /* Clear agent IDs */
7559 if (ifp->if_agentids != NULL) {
7560 FREE(ifp->if_agentids, M_NETAGENT);
7561 ifp->if_agentids = NULL;
7562 }
7563 ifp->if_agentcount = 0;
7564
7565
7566 /* Let BPF know we're detaching */
7567 bpfdetach(ifp);
7568
7569 /* Mark the interface as DOWN */
7570 if_down(ifp);
7571
7572 /* Disable forwarding cached route */
7573 lck_mtx_lock(&ifp->if_cached_route_lock);
7574 ifp->if_fwd_cacheok = 0;
7575 lck_mtx_unlock(&ifp->if_cached_route_lock);
7576
7577 /* Disable data threshold and wait for any pending event posting */
7578 ifp->if_data_threshold = 0;
7579 VERIFY(ifp->if_dt_tcall != NULL);
7580 (void) thread_call_cancel_wait(ifp->if_dt_tcall);
7581
7582 /*
7583 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
7584 * references to the info structures and leave them attached to
7585 * this ifnet.
7586 */
7587 #if INET
7588 igmp_domifdetach(ifp);
7589 #endif /* INET */
7590 #if INET6
7591 mld_domifdetach(ifp);
7592 #endif /* INET6 */
7593
7594 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0);
7595
7596 /* Let worker thread take care of the rest, to avoid reentrancy */
7597 dlil_if_lock();
7598 ifnet_detaching_enqueue(ifp);
7599 dlil_if_unlock();
7600
7601 return 0;
7602 }
7603
7604 static void
7605 ifnet_detaching_enqueue(struct ifnet *ifp)
7606 {
7607 dlil_if_lock_assert();
7608
7609 ++ifnet_detaching_cnt;
7610 VERIFY(ifnet_detaching_cnt != 0);
7611 TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
7612 wakeup((caddr_t)&ifnet_delayed_run);
7613 }
7614
7615 static struct ifnet *
7616 ifnet_detaching_dequeue(void)
7617 {
7618 struct ifnet *ifp;
7619
7620 dlil_if_lock_assert();
7621
7622 ifp = TAILQ_FIRST(&ifnet_detaching_head);
7623 VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
7624 if (ifp != NULL) {
7625 VERIFY(ifnet_detaching_cnt != 0);
7626 --ifnet_detaching_cnt;
7627 TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
7628 ifp->if_detaching_link.tqe_next = NULL;
7629 ifp->if_detaching_link.tqe_prev = NULL;
7630 }
7631 return ifp;
7632 }
7633
7634 static int
7635 ifnet_detacher_thread_cont(int err)
7636 {
7637 #pragma unused(err)
7638 struct ifnet *ifp;
7639
7640 for (;;) {
7641 dlil_if_lock_assert();
7642 while (ifnet_detaching_cnt == 0) {
7643 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
7644 (PZERO - 1), "ifnet_detacher_cont", 0,
7645 ifnet_detacher_thread_cont);
7646 /* NOTREACHED */
7647 }
7648
7649 net_update_uptime();
7650
7651 VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
7652
7653 /* Take care of detaching ifnet */
7654 ifp = ifnet_detaching_dequeue();
7655 if (ifp != NULL) {
7656 dlil_if_unlock();
7657 ifnet_detach_final(ifp);
7658 dlil_if_lock();
7659 }
7660 }
7661 }
7662
7663 __dead2
7664 static void
7665 ifnet_detacher_thread_func(void *v, wait_result_t w)
7666 {
7667 #pragma unused(v, w)
7668 dlil_decr_pending_thread_count();
7669 dlil_if_lock();
7670 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
7671 (PZERO - 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont);
7672 /*
7673 * msleep0() shouldn't have returned as PCATCH was not set;
7674 * therefore assert in this case.
7675 */
7676 dlil_if_unlock();
7677 VERIFY(0);
7678 }
7679
7680 static void
7681 ifnet_detach_final(struct ifnet *ifp)
7682 {
7683 struct ifnet_filter *filter, *filter_next;
7684 struct ifnet_filter_head fhead;
7685 struct dlil_threading_info *inp;
7686 struct ifaddr *ifa;
7687 ifnet_detached_func if_free;
7688 int i;
7689
7690 lck_mtx_lock(&ifp->if_ref_lock);
7691 if (!(ifp->if_refflags & IFRF_DETACHING)) {
7692 panic("%s: flags mismatch (detaching not set) ifp=%p",
7693 __func__, ifp);
7694 /* NOTREACHED */
7695 }
7696
7697 /*
7698 * Wait until the existing IO references get released
7699 * before we proceed with ifnet_detach. This is not a
7700 * common case, so block without using a continuation.
7701 */
7702 while (ifp->if_refio > 0) {
7703 DLIL_PRINTF("%s: Waiting for IO references on %s interface "
7704 "to be released\n", __func__, if_name(ifp));
7705 (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
7706 (PZERO - 1), "ifnet_ioref_wait", NULL);
7707 }
7708
7709 VERIFY(ifp->if_datamov == 0);
7710 VERIFY(ifp->if_drainers == 0);
7711 VERIFY(ifp->if_suspend == 0);
7712 ifp->if_refflags &= ~IFRF_READY;
7713 lck_mtx_unlock(&ifp->if_ref_lock);
7714
7715 /* Drain and destroy send queue */
7716 ifclassq_teardown(ifp);
7717
7718 /* Detach interface filters */
7719 lck_mtx_lock(&ifp->if_flt_lock);
7720 if_flt_monitor_enter(ifp);
7721
7722 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
7723 fhead = ifp->if_flt_head;
7724 TAILQ_INIT(&ifp->if_flt_head);
7725
7726 for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
7727 filter_next = TAILQ_NEXT(filter, filt_next);
7728 lck_mtx_unlock(&ifp->if_flt_lock);
7729
7730 dlil_detach_filter_internal(filter, 1);
7731 lck_mtx_lock(&ifp->if_flt_lock);
7732 }
7733 if_flt_monitor_leave(ifp);
7734 lck_mtx_unlock(&ifp->if_flt_lock);
7735
7736 /* Tell upper layers to drop their network addresses */
7737 if_purgeaddrs(ifp);
7738
7739 ifnet_lock_exclusive(ifp);
7740
7741 /* Uplumb all protocols */
7742 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
7743 struct if_proto *proto;
7744
7745 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
7746 while (proto != NULL) {
7747 protocol_family_t family = proto->protocol_family;
7748 ifnet_lock_done(ifp);
7749 proto_unplumb(family, ifp);
7750 ifnet_lock_exclusive(ifp);
7751 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
7752 }
7753 /* There should not be any protocols left */
7754 VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
7755 }
7756 zfree(dlif_phash_zone, ifp->if_proto_hash);
7757 ifp->if_proto_hash = NULL;
7758
7759 /* Detach (permanent) link address from if_addrhead */
7760 ifa = TAILQ_FIRST(&ifp->if_addrhead);
7761 VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
7762 IFA_LOCK(ifa);
7763 if_detach_link_ifa(ifp, ifa);
7764 IFA_UNLOCK(ifa);
7765
7766 /* Remove (permanent) link address from ifnet_addrs[] */
7767 IFA_REMREF(ifa);
7768 ifnet_addrs[ifp->if_index - 1] = NULL;
7769
7770 /* This interface should not be on {ifnet_head,detaching} */
7771 VERIFY(ifp->if_link.tqe_next == NULL);
7772 VERIFY(ifp->if_link.tqe_prev == NULL);
7773 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
7774 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
7775 VERIFY(ifp->if_ordered_link.tqe_next == NULL);
7776 VERIFY(ifp->if_ordered_link.tqe_prev == NULL);
7777
7778 /* The slot should have been emptied */
7779 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
7780
7781 /* There should not be any addresses left */
7782 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
7783
7784 /*
7785 * Signal the starter thread to terminate itself.
7786 */
7787 if (ifp->if_start_thread != THREAD_NULL) {
7788 lck_mtx_lock_spin(&ifp->if_start_lock);
7789 ifp->if_start_flags = 0;
7790 ifp->if_start_thread = THREAD_NULL;
7791 wakeup_one((caddr_t)&ifp->if_start_thread);
7792 lck_mtx_unlock(&ifp->if_start_lock);
7793 }
7794
7795 /*
7796 * Signal the poller thread to terminate itself.
7797 */
7798 if (ifp->if_poll_thread != THREAD_NULL) {
7799 lck_mtx_lock_spin(&ifp->if_poll_lock);
7800 ifp->if_poll_thread = THREAD_NULL;
7801 wakeup_one((caddr_t)&ifp->if_poll_thread);
7802 lck_mtx_unlock(&ifp->if_poll_lock);
7803 }
7804
7805 /*
7806 * If thread affinity was set for the workloop thread, we will need
7807 * to tear down the affinity and release the extra reference count
7808 * taken at attach time. Does not apply to lo0 or other interfaces
7809 * without dedicated input threads.
7810 */
7811 if ((inp = ifp->if_inp) != NULL) {
7812 VERIFY(inp != dlil_main_input_thread);
7813
7814 if (inp->net_affinity) {
7815 struct thread *tp, *wtp, *ptp;
7816
7817 lck_mtx_lock_spin(&inp->input_lck);
7818 wtp = inp->wloop_thr;
7819 inp->wloop_thr = THREAD_NULL;
7820 ptp = inp->poll_thr;
7821 inp->poll_thr = THREAD_NULL;
7822 tp = inp->input_thr; /* don't nullify now */
7823 inp->tag = 0;
7824 inp->net_affinity = FALSE;
7825 lck_mtx_unlock(&inp->input_lck);
7826
7827 /* Tear down poll thread affinity */
7828 if (ptp != NULL) {
7829 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
7830 VERIFY(ifp->if_xflags & IFXF_LEGACY);
7831 (void) dlil_affinity_set(ptp,
7832 THREAD_AFFINITY_TAG_NULL);
7833 thread_deallocate(ptp);
7834 }
7835
7836 /* Tear down workloop thread affinity */
7837 if (wtp != NULL) {
7838 (void) dlil_affinity_set(wtp,
7839 THREAD_AFFINITY_TAG_NULL);
7840 thread_deallocate(wtp);
7841 }
7842
7843 /* Tear down DLIL input thread affinity */
7844 (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
7845 thread_deallocate(tp);
7846 }
7847
7848 /* disassociate ifp DLIL input thread */
7849 ifp->if_inp = NULL;
7850
7851 /* tell the input thread to terminate */
7852 lck_mtx_lock_spin(&inp->input_lck);
7853 inp->input_waiting |= DLIL_INPUT_TERMINATE;
7854 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
7855 wakeup_one((caddr_t)&inp->input_waiting);
7856 }
7857 lck_mtx_unlock(&inp->input_lck);
7858 ifnet_lock_done(ifp);
7859
7860 /* wait for the input thread to terminate */
7861 lck_mtx_lock_spin(&inp->input_lck);
7862 while ((inp->input_waiting & DLIL_INPUT_TERMINATE_COMPLETE)
7863 == 0) {
7864 (void) msleep(&inp->input_waiting, &inp->input_lck,
7865 (PZERO - 1) | PSPIN, inp->input_name, NULL);
7866 }
7867 lck_mtx_unlock(&inp->input_lck);
7868 ifnet_lock_exclusive(ifp);
7869
7870 /* clean-up input thread state */
7871 dlil_clean_threading_info(inp);
7872 /* clean-up poll parameters */
7873 VERIFY(ifp->if_poll_thread == THREAD_NULL);
7874 dlil_reset_rxpoll_params(ifp);
7875 }
7876
7877 /* The driver might unload, so point these to ourselves */
7878 if_free = ifp->if_free;
7879 ifp->if_output_dlil = ifp_if_output;
7880 ifp->if_output = ifp_if_output;
7881 ifp->if_pre_enqueue = ifp_if_output;
7882 ifp->if_start = ifp_if_start;
7883 ifp->if_output_ctl = ifp_if_ctl;
7884 ifp->if_input_dlil = ifp_if_input;
7885 ifp->if_input_poll = ifp_if_input_poll;
7886 ifp->if_input_ctl = ifp_if_ctl;
7887 ifp->if_ioctl = ifp_if_ioctl;
7888 ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
7889 ifp->if_free = ifp_if_free;
7890 ifp->if_demux = ifp_if_demux;
7891 ifp->if_event = ifp_if_event;
7892 ifp->if_framer_legacy = ifp_if_framer;
7893 ifp->if_framer = ifp_if_framer_extended;
7894 ifp->if_add_proto = ifp_if_add_proto;
7895 ifp->if_del_proto = ifp_if_del_proto;
7896 ifp->if_check_multi = ifp_if_check_multi;
7897
7898 /* wipe out interface description */
7899 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
7900 ifp->if_desc.ifd_len = 0;
7901 VERIFY(ifp->if_desc.ifd_desc != NULL);
7902 bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
7903
7904 /* there shouldn't be any delegation by now */
7905 VERIFY(ifp->if_delegated.ifp == NULL);
7906 VERIFY(ifp->if_delegated.type == 0);
7907 VERIFY(ifp->if_delegated.family == 0);
7908 VERIFY(ifp->if_delegated.subfamily == 0);
7909 VERIFY(ifp->if_delegated.expensive == 0);
7910 VERIFY(ifp->if_delegated.constrained == 0);
7911
7912 /* QoS marking get cleared */
7913 ifp->if_eflags &= ~IFEF_QOSMARKING_ENABLED;
7914 if_set_qosmarking_mode(ifp, IFRTYPE_QOSMARKING_MODE_NONE);
7915
7916
7917 ifnet_lock_done(ifp);
7918
7919 #if PF
7920 /*
7921 * Detach this interface from packet filter, if enabled.
7922 */
7923 pf_ifnet_hook(ifp, 0);
7924 #endif /* PF */
7925
7926 /* Filter list should be empty */
7927 lck_mtx_lock_spin(&ifp->if_flt_lock);
7928 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
7929 VERIFY(ifp->if_flt_busy == 0);
7930 VERIFY(ifp->if_flt_waiters == 0);
7931 lck_mtx_unlock(&ifp->if_flt_lock);
7932
7933 /* Last chance to drain send queue */
7934 if_qflush(ifp, 0);
7935
7936 /* Last chance to cleanup any cached route */
7937 lck_mtx_lock(&ifp->if_cached_route_lock);
7938 VERIFY(!ifp->if_fwd_cacheok);
7939 ROUTE_RELEASE(&ifp->if_fwd_route);
7940 bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
7941 ROUTE_RELEASE(&ifp->if_src_route);
7942 bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
7943 ROUTE_RELEASE(&ifp->if_src_route6);
7944 bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
7945 lck_mtx_unlock(&ifp->if_cached_route_lock);
7946
7947 VERIFY(ifp->if_data_threshold == 0);
7948 VERIFY(ifp->if_dt_tcall != NULL);
7949 VERIFY(!thread_call_isactive(ifp->if_dt_tcall));
7950
7951 ifnet_llreach_ifdetach(ifp);
7952
7953 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0);
7954
7955 /*
7956 * Finally, mark this ifnet as detached.
7957 */
7958 lck_mtx_lock_spin(&ifp->if_ref_lock);
7959 if (!(ifp->if_refflags & IFRF_DETACHING)) {
7960 panic("%s: flags mismatch (detaching not set) ifp=%p",
7961 __func__, ifp);
7962 /* NOTREACHED */
7963 }
7964 ifp->if_refflags &= ~IFRF_DETACHING;
7965 lck_mtx_unlock(&ifp->if_ref_lock);
7966 if (if_free != NULL) {
7967 if_free(ifp);
7968 }
7969
7970 if (dlil_verbose) {
7971 DLIL_PRINTF("%s: detached\n", if_name(ifp));
7972 }
7973
7974 /* Release reference held during ifnet attach */
7975 ifnet_release(ifp);
7976 }
7977
7978 errno_t
7979 ifp_if_output(struct ifnet *ifp, struct mbuf *m)
7980 {
7981 #pragma unused(ifp)
7982 m_freem_list(m);
7983 return 0;
7984 }
7985
7986 void
7987 ifp_if_start(struct ifnet *ifp)
7988 {
7989 ifnet_purge(ifp);
7990 }
7991
7992 static errno_t
7993 ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
7994 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
7995 boolean_t poll, struct thread *tp)
7996 {
7997 #pragma unused(ifp, m_tail, s, poll, tp)
7998 m_freem_list(m_head);
7999 return ENXIO;
8000 }
8001
8002 static void
8003 ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
8004 struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
8005 {
8006 #pragma unused(ifp, flags, max_cnt)
8007 if (m_head != NULL) {
8008 *m_head = NULL;
8009 }
8010 if (m_tail != NULL) {
8011 *m_tail = NULL;
8012 }
8013 if (cnt != NULL) {
8014 *cnt = 0;
8015 }
8016 if (len != NULL) {
8017 *len = 0;
8018 }
8019 }
8020
8021 static errno_t
8022 ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
8023 {
8024 #pragma unused(ifp, cmd, arglen, arg)
8025 return EOPNOTSUPP;
8026 }
8027
8028 static errno_t
8029 ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
8030 {
8031 #pragma unused(ifp, fh, pf)
8032 m_freem(m);
8033 return EJUSTRETURN;
8034 }
8035
8036 static errno_t
8037 ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
8038 const struct ifnet_demux_desc *da, u_int32_t dc)
8039 {
8040 #pragma unused(ifp, pf, da, dc)
8041 return EINVAL;
8042 }
8043
8044 static errno_t
8045 ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
8046 {
8047 #pragma unused(ifp, pf)
8048 return EINVAL;
8049 }
8050
8051 static errno_t
8052 ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
8053 {
8054 #pragma unused(ifp, sa)
8055 return EOPNOTSUPP;
8056 }
8057
8058 #if CONFIG_EMBEDDED
8059 static errno_t
8060 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
8061 const struct sockaddr *sa, const char *ll, const char *t,
8062 u_int32_t *pre, u_int32_t *post)
8063 #else
8064 static errno_t
8065 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
8066 const struct sockaddr *sa, const char *ll, const char *t)
8067 #endif /* !CONFIG_EMBEDDED */
8068 {
8069 #pragma unused(ifp, m, sa, ll, t)
8070 #if CONFIG_EMBEDDED
8071 return ifp_if_framer_extended(ifp, m, sa, ll, t, pre, post);
8072 #else
8073 return ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL);
8074 #endif /* !CONFIG_EMBEDDED */
8075 }
8076
8077 static errno_t
8078 ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
8079 const struct sockaddr *sa, const char *ll, const char *t,
8080 u_int32_t *pre, u_int32_t *post)
8081 {
8082 #pragma unused(ifp, sa, ll, t)
8083 m_freem(*m);
8084 *m = NULL;
8085
8086 if (pre != NULL) {
8087 *pre = 0;
8088 }
8089 if (post != NULL) {
8090 *post = 0;
8091 }
8092
8093 return EJUSTRETURN;
8094 }
8095
8096 errno_t
8097 ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
8098 {
8099 #pragma unused(ifp, cmd, arg)
8100 return EOPNOTSUPP;
8101 }
8102
8103 static errno_t
8104 ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
8105 {
8106 #pragma unused(ifp, tm, f)
8107 /* XXX not sure what to do here */
8108 return 0;
8109 }
8110
8111 static void
8112 ifp_if_free(struct ifnet *ifp)
8113 {
8114 #pragma unused(ifp)
8115 }
8116
8117 static void
8118 ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
8119 {
8120 #pragma unused(ifp, e)
8121 }
8122
8123 int
8124 dlil_if_acquire(u_int32_t family, const void *uniqueid,
8125 size_t uniqueid_len, const char *ifxname, struct ifnet **ifp)
8126 {
8127 struct ifnet *ifp1 = NULL;
8128 struct dlil_ifnet *dlifp1 = NULL;
8129 struct dlil_ifnet *dlifp1_saved = NULL;
8130 void *buf, *base, **pbuf;
8131 int ret = 0;
8132
8133 VERIFY(*ifp == NULL);
8134 dlil_if_lock();
8135 /*
8136 * We absolutely can't have an interface with the same name
8137 * in in-use state.
8138 * To make sure of that list has to be traversed completely
8139 */
8140 TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
8141 ifp1 = (struct ifnet *)dlifp1;
8142
8143 if (ifp1->if_family != family) {
8144 continue;
8145 }
8146
8147 /*
8148 * If interface is in use, return EBUSY if either unique id
8149 * or interface extended names are the same
8150 */
8151 lck_mtx_lock(&dlifp1->dl_if_lock);
8152 if (strncmp(ifxname, ifp1->if_xname, IFXNAMSIZ) == 0) {
8153 if (dlifp1->dl_if_flags & DLIF_INUSE) {
8154 lck_mtx_unlock(&dlifp1->dl_if_lock);
8155 ret = EBUSY;
8156 goto end;
8157 }
8158 }
8159
8160 if (uniqueid_len) {
8161 if (uniqueid_len == dlifp1->dl_if_uniqueid_len &&
8162 bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len) == 0) {
8163 if (dlifp1->dl_if_flags & DLIF_INUSE) {
8164 lck_mtx_unlock(&dlifp1->dl_if_lock);
8165 ret = EBUSY;
8166 goto end;
8167 } else {
8168 /* Cache the first interface that can be recycled */
8169 if (*ifp == NULL) {
8170 *ifp = ifp1;
8171 dlifp1_saved = dlifp1;
8172 }
8173 /*
8174 * XXX Do not break or jump to end as we have to traverse
8175 * the whole list to ensure there are no name collisions
8176 */
8177 }
8178 }
8179 }
8180 lck_mtx_unlock(&dlifp1->dl_if_lock);
8181 }
8182
8183 /* If there's an interface that can be recycled, use that */
8184 if (*ifp != NULL) {
8185 if (dlifp1_saved != NULL) {
8186 lck_mtx_lock(&dlifp1_saved->dl_if_lock);
8187 dlifp1_saved->dl_if_flags |= (DLIF_INUSE | DLIF_REUSE);
8188 lck_mtx_unlock(&dlifp1_saved->dl_if_lock);
8189 dlifp1_saved = NULL;
8190 }
8191 goto end;
8192 }
8193
8194 /* no interface found, allocate a new one */
8195 buf = zalloc(dlif_zone);
8196 if (buf == NULL) {
8197 ret = ENOMEM;
8198 goto end;
8199 }
8200 bzero(buf, dlif_bufsize);
8201
8202 /* Get the 64-bit aligned base address for this object */
8203 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
8204 sizeof(u_int64_t));
8205 VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize));
8206
8207 /*
8208 * Wind back a pointer size from the aligned base and
8209 * save the original address so we can free it later.
8210 */
8211 pbuf = (void **)((intptr_t)base - sizeof(void *));
8212 *pbuf = buf;
8213 dlifp1 = base;
8214
8215 if (uniqueid_len) {
8216 MALLOC(dlifp1->dl_if_uniqueid, void *, uniqueid_len,
8217 M_NKE, M_WAITOK);
8218 if (dlifp1->dl_if_uniqueid == NULL) {
8219 zfree(dlif_zone, buf);
8220 ret = ENOMEM;
8221 goto end;
8222 }
8223 bcopy(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len);
8224 dlifp1->dl_if_uniqueid_len = uniqueid_len;
8225 }
8226
8227 ifp1 = (struct ifnet *)dlifp1;
8228 dlifp1->dl_if_flags = DLIF_INUSE;
8229 if (ifnet_debug) {
8230 dlifp1->dl_if_flags |= DLIF_DEBUG;
8231 dlifp1->dl_if_trace = dlil_if_trace;
8232 }
8233 ifp1->if_name = dlifp1->dl_if_namestorage;
8234 ifp1->if_xname = dlifp1->dl_if_xnamestorage;
8235
8236 /* initialize interface description */
8237 ifp1->if_desc.ifd_maxlen = IF_DESCSIZE;
8238 ifp1->if_desc.ifd_len = 0;
8239 ifp1->if_desc.ifd_desc = dlifp1->dl_if_descstorage;
8240
8241
8242 #if CONFIG_MACF_NET
8243 mac_ifnet_label_init(ifp1);
8244 #endif
8245
8246 if ((ret = dlil_alloc_local_stats(ifp1)) != 0) {
8247 DLIL_PRINTF("%s: failed to allocate if local stats, "
8248 "error: %d\n", __func__, ret);
8249 /* This probably shouldn't be fatal */
8250 ret = 0;
8251 }
8252
8253 lck_mtx_init(&dlifp1->dl_if_lock, ifnet_lock_group, ifnet_lock_attr);
8254 lck_rw_init(&ifp1->if_lock, ifnet_lock_group, ifnet_lock_attr);
8255 lck_mtx_init(&ifp1->if_ref_lock, ifnet_lock_group, ifnet_lock_attr);
8256 lck_mtx_init(&ifp1->if_flt_lock, ifnet_lock_group, ifnet_lock_attr);
8257 lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group,
8258 ifnet_lock_attr);
8259 lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr);
8260 #if INET
8261 lck_rw_init(&ifp1->if_inetdata_lock, ifnet_lock_group,
8262 ifnet_lock_attr);
8263 ifp1->if_inetdata = NULL;
8264 #endif
8265 #if INET6
8266 lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group,
8267 ifnet_lock_attr);
8268 ifp1->if_inet6data = NULL;
8269 #endif
8270 lck_rw_init(&ifp1->if_link_status_lock, ifnet_lock_group,
8271 ifnet_lock_attr);
8272 ifp1->if_link_status = NULL;
8273
8274 /* for send data paths */
8275 lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group,
8276 ifnet_lock_attr);
8277 lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_snd_lock_group,
8278 ifnet_lock_attr);
8279 lck_mtx_init(&ifp1->if_snd.ifcq_lock, ifnet_snd_lock_group,
8280 ifnet_lock_attr);
8281
8282 /* for receive data paths */
8283 lck_mtx_init(&ifp1->if_poll_lock, ifnet_rcv_lock_group,
8284 ifnet_lock_attr);
8285
8286 /* thread call allocation is done with sleeping zalloc */
8287 ifp1->if_dt_tcall = thread_call_allocate_with_options(dlil_dt_tcall_fn,
8288 ifp1, THREAD_CALL_PRIORITY_KERNEL, THREAD_CALL_OPTIONS_ONCE);
8289 if (ifp1->if_dt_tcall == NULL) {
8290 panic_plain("%s: couldn't create if_dt_tcall", __func__);
8291 /* NOTREACHED */
8292 }
8293
8294 TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
8295
8296 *ifp = ifp1;
8297
8298 end:
8299 dlil_if_unlock();
8300
8301 VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof(u_int64_t)) &&
8302 IS_P2ALIGNED(&ifp1->if_data, sizeof(u_int64_t))));
8303
8304 return ret;
8305 }
8306
8307 __private_extern__ void
8308 dlil_if_release(ifnet_t ifp)
8309 {
8310 struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
8311
8312 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_count) > 0);
8313 if (!(ifp->if_xflags & IFXF_ALLOC_KPI)) {
8314 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_os_count) > 0);
8315 }
8316
8317 ifnet_lock_exclusive(ifp);
8318 lck_mtx_lock(&dlifp->dl_if_lock);
8319 dlifp->dl_if_flags &= ~DLIF_INUSE;
8320 strlcpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ);
8321 ifp->if_name = dlifp->dl_if_namestorage;
8322 /* Reset external name (name + unit) */
8323 ifp->if_xname = dlifp->dl_if_xnamestorage;
8324 snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ,
8325 "%s?", ifp->if_name);
8326 lck_mtx_unlock(&dlifp->dl_if_lock);
8327 #if CONFIG_MACF_NET
8328 /*
8329 * We can either recycle the MAC label here or in dlil_if_acquire().
8330 * It seems logical to do it here but this means that anything that
8331 * still has a handle on ifp will now see it as unlabeled.
8332 * Since the interface is "dead" that may be OK. Revisit later.
8333 */
8334 mac_ifnet_label_recycle(ifp);
8335 #endif
8336 ifnet_lock_done(ifp);
8337 }
8338
8339 __private_extern__ void
8340 dlil_if_lock(void)
8341 {
8342 lck_mtx_lock(&dlil_ifnet_lock);
8343 }
8344
8345 __private_extern__ void
8346 dlil_if_unlock(void)
8347 {
8348 lck_mtx_unlock(&dlil_ifnet_lock);
8349 }
8350
8351 __private_extern__ void
8352 dlil_if_lock_assert(void)
8353 {
8354 LCK_MTX_ASSERT(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
8355 }
8356
8357 __private_extern__ void
8358 dlil_proto_unplumb_all(struct ifnet *ifp)
8359 {
8360 /*
8361 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
8362 * each bucket contains exactly one entry; PF_VLAN does not need an
8363 * explicit unplumb.
8364 *
8365 * if_proto_hash[3] is for other protocols; we expect anything
8366 * in this bucket to respond to the DETACHING event (which would
8367 * have happened by now) and do the unplumb then.
8368 */
8369 (void) proto_unplumb(PF_INET, ifp);
8370 #if INET6
8371 (void) proto_unplumb(PF_INET6, ifp);
8372 #endif /* INET6 */
8373 }
8374
8375 static void
8376 ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
8377 {
8378 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
8379 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
8380
8381 route_copyout(dst, &ifp->if_src_route, sizeof(*dst));
8382
8383 lck_mtx_unlock(&ifp->if_cached_route_lock);
8384 }
8385
8386 static void
8387 ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
8388 {
8389 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
8390 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
8391
8392 if (ifp->if_fwd_cacheok) {
8393 route_copyin(src, &ifp->if_src_route, sizeof(*src));
8394 } else {
8395 ROUTE_RELEASE(src);
8396 }
8397 lck_mtx_unlock(&ifp->if_cached_route_lock);
8398 }
8399
8400 #if INET6
8401 static void
8402 ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
8403 {
8404 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
8405 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
8406
8407 route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
8408 sizeof(*dst));
8409
8410 lck_mtx_unlock(&ifp->if_cached_route_lock);
8411 }
8412
8413 static void
8414 ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
8415 {
8416 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
8417 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
8418
8419 if (ifp->if_fwd_cacheok) {
8420 route_copyin((struct route *)src,
8421 (struct route *)&ifp->if_src_route6, sizeof(*src));
8422 } else {
8423 ROUTE_RELEASE(src);
8424 }
8425 lck_mtx_unlock(&ifp->if_cached_route_lock);
8426 }
8427 #endif /* INET6 */
8428
8429 struct rtentry *
8430 ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
8431 {
8432 struct route src_rt;
8433 struct sockaddr_in *dst;
8434
8435 dst = (struct sockaddr_in *)(void *)(&src_rt.ro_dst);
8436
8437 ifp_src_route_copyout(ifp, &src_rt);
8438
8439 if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
8440 ROUTE_RELEASE(&src_rt);
8441 if (dst->sin_family != AF_INET) {
8442 bzero(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
8443 dst->sin_len = sizeof(src_rt.ro_dst);
8444 dst->sin_family = AF_INET;
8445 }
8446 dst->sin_addr = src_ip;
8447
8448 VERIFY(src_rt.ro_rt == NULL);
8449 src_rt.ro_rt = rtalloc1_scoped((struct sockaddr *)dst,
8450 0, 0, ifp->if_index);
8451
8452 if (src_rt.ro_rt != NULL) {
8453 /* retain a ref, copyin consumes one */
8454 struct rtentry *rte = src_rt.ro_rt;
8455 RT_ADDREF(rte);
8456 ifp_src_route_copyin(ifp, &src_rt);
8457 src_rt.ro_rt = rte;
8458 }
8459 }
8460
8461 return src_rt.ro_rt;
8462 }
8463
8464 #if INET6
8465 struct rtentry *
8466 ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
8467 {
8468 struct route_in6 src_rt;
8469
8470 ifp_src_route6_copyout(ifp, &src_rt);
8471
8472 if (ROUTE_UNUSABLE(&src_rt) ||
8473 !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
8474 ROUTE_RELEASE(&src_rt);
8475 if (src_rt.ro_dst.sin6_family != AF_INET6) {
8476 bzero(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
8477 src_rt.ro_dst.sin6_len = sizeof(src_rt.ro_dst);
8478 src_rt.ro_dst.sin6_family = AF_INET6;
8479 }
8480 src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
8481 bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
8482 sizeof(src_rt.ro_dst.sin6_addr));
8483
8484 if (src_rt.ro_rt == NULL) {
8485 src_rt.ro_rt = rtalloc1_scoped(
8486 (struct sockaddr *)&src_rt.ro_dst, 0, 0,
8487 ifp->if_index);
8488
8489 if (src_rt.ro_rt != NULL) {
8490 /* retain a ref, copyin consumes one */
8491 struct rtentry *rte = src_rt.ro_rt;
8492 RT_ADDREF(rte);
8493 ifp_src_route6_copyin(ifp, &src_rt);
8494 src_rt.ro_rt = rte;
8495 }
8496 }
8497 }
8498
8499 return src_rt.ro_rt;
8500 }
8501 #endif /* INET6 */
8502
8503 void
8504 if_lqm_update(struct ifnet *ifp, int lqm, int locked)
8505 {
8506 struct kev_dl_link_quality_metric_data ev_lqm_data;
8507
8508 VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
8509
8510 /* Normalize to edge */
8511 if (lqm >= 0 && lqm <= IFNET_LQM_THRESH_ABORT) {
8512 lqm = IFNET_LQM_THRESH_ABORT;
8513 atomic_bitset_32(&tcbinfo.ipi_flags,
8514 INPCBINFO_HANDLE_LQM_ABORT);
8515 inpcb_timer_sched(&tcbinfo, INPCB_TIMER_FAST);
8516 } else if (lqm > IFNET_LQM_THRESH_ABORT &&
8517 lqm <= IFNET_LQM_THRESH_MINIMALLY_VIABLE) {
8518 lqm = IFNET_LQM_THRESH_MINIMALLY_VIABLE;
8519 } else if (lqm > IFNET_LQM_THRESH_MINIMALLY_VIABLE &&
8520 lqm <= IFNET_LQM_THRESH_POOR) {
8521 lqm = IFNET_LQM_THRESH_POOR;
8522 } else if (lqm > IFNET_LQM_THRESH_POOR &&
8523 lqm <= IFNET_LQM_THRESH_GOOD) {
8524 lqm = IFNET_LQM_THRESH_GOOD;
8525 }
8526
8527 /*
8528 * Take the lock if needed
8529 */
8530 if (!locked) {
8531 ifnet_lock_exclusive(ifp);
8532 }
8533
8534 if (lqm == ifp->if_interface_state.lqm_state &&
8535 (ifp->if_interface_state.valid_bitmask &
8536 IF_INTERFACE_STATE_LQM_STATE_VALID)) {
8537 /*
8538 * Release the lock if was not held by the caller
8539 */
8540 if (!locked) {
8541 ifnet_lock_done(ifp);
8542 }
8543 return; /* nothing to update */
8544 }
8545 ifp->if_interface_state.valid_bitmask |=
8546 IF_INTERFACE_STATE_LQM_STATE_VALID;
8547 ifp->if_interface_state.lqm_state = lqm;
8548
8549 /*
8550 * Don't want to hold the lock when issuing kernel events
8551 */
8552 ifnet_lock_done(ifp);
8553
8554 bzero(&ev_lqm_data, sizeof(ev_lqm_data));
8555 ev_lqm_data.link_quality_metric = lqm;
8556
8557 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
8558 (struct net_event_data *)&ev_lqm_data, sizeof(ev_lqm_data));
8559
8560 /*
8561 * Reacquire the lock for the caller
8562 */
8563 if (locked) {
8564 ifnet_lock_exclusive(ifp);
8565 }
8566 }
8567
8568 static void
8569 if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
8570 {
8571 struct kev_dl_rrc_state kev;
8572
8573 if (rrc_state == ifp->if_interface_state.rrc_state &&
8574 (ifp->if_interface_state.valid_bitmask &
8575 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
8576 return;
8577 }
8578
8579 ifp->if_interface_state.valid_bitmask |=
8580 IF_INTERFACE_STATE_RRC_STATE_VALID;
8581
8582 ifp->if_interface_state.rrc_state = rrc_state;
8583
8584 /*
8585 * Don't want to hold the lock when issuing kernel events
8586 */
8587 ifnet_lock_done(ifp);
8588
8589 bzero(&kev, sizeof(struct kev_dl_rrc_state));
8590 kev.rrc_state = rrc_state;
8591
8592 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
8593 (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state));
8594
8595 ifnet_lock_exclusive(ifp);
8596 }
8597
8598 errno_t
8599 if_state_update(struct ifnet *ifp,
8600 struct if_interface_state *if_interface_state)
8601 {
8602 u_short if_index_available = 0;
8603
8604 ifnet_lock_exclusive(ifp);
8605
8606 if ((ifp->if_type != IFT_CELLULAR) &&
8607 (if_interface_state->valid_bitmask &
8608 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
8609 ifnet_lock_done(ifp);
8610 return ENOTSUP;
8611 }
8612 if ((if_interface_state->valid_bitmask &
8613 IF_INTERFACE_STATE_LQM_STATE_VALID) &&
8614 (if_interface_state->lqm_state < IFNET_LQM_MIN ||
8615 if_interface_state->lqm_state > IFNET_LQM_MAX)) {
8616 ifnet_lock_done(ifp);
8617 return EINVAL;
8618 }
8619 if ((if_interface_state->valid_bitmask &
8620 IF_INTERFACE_STATE_RRC_STATE_VALID) &&
8621 if_interface_state->rrc_state !=
8622 IF_INTERFACE_STATE_RRC_STATE_IDLE &&
8623 if_interface_state->rrc_state !=
8624 IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
8625 ifnet_lock_done(ifp);
8626 return EINVAL;
8627 }
8628
8629 if (if_interface_state->valid_bitmask &
8630 IF_INTERFACE_STATE_LQM_STATE_VALID) {
8631 if_lqm_update(ifp, if_interface_state->lqm_state, 1);
8632 }
8633 if (if_interface_state->valid_bitmask &
8634 IF_INTERFACE_STATE_RRC_STATE_VALID) {
8635 if_rrc_state_update(ifp, if_interface_state->rrc_state);
8636 }
8637 if (if_interface_state->valid_bitmask &
8638 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
8639 ifp->if_interface_state.valid_bitmask |=
8640 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
8641 ifp->if_interface_state.interface_availability =
8642 if_interface_state->interface_availability;
8643
8644 if (ifp->if_interface_state.interface_availability ==
8645 IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
8646 os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) available\n",
8647 __func__, if_name(ifp), ifp->if_index);
8648 if_index_available = ifp->if_index;
8649 } else {
8650 os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) unavailable)\n",
8651 __func__, if_name(ifp), ifp->if_index);
8652 }
8653 }
8654 ifnet_lock_done(ifp);
8655
8656 /*
8657 * Check if the TCP connections going on this interface should be
8658 * forced to send probe packets instead of waiting for TCP timers
8659 * to fire. This is done on an explicit notification such as
8660 * SIOCSIFINTERFACESTATE which marks the interface as available.
8661 */
8662 if (if_index_available > 0) {
8663 tcp_interface_send_probe(if_index_available);
8664 }
8665
8666 return 0;
8667 }
8668
8669 void
8670 if_get_state(struct ifnet *ifp,
8671 struct if_interface_state *if_interface_state)
8672 {
8673 ifnet_lock_shared(ifp);
8674
8675 if_interface_state->valid_bitmask = 0;
8676
8677 if (ifp->if_interface_state.valid_bitmask &
8678 IF_INTERFACE_STATE_RRC_STATE_VALID) {
8679 if_interface_state->valid_bitmask |=
8680 IF_INTERFACE_STATE_RRC_STATE_VALID;
8681 if_interface_state->rrc_state =
8682 ifp->if_interface_state.rrc_state;
8683 }
8684 if (ifp->if_interface_state.valid_bitmask &
8685 IF_INTERFACE_STATE_LQM_STATE_VALID) {
8686 if_interface_state->valid_bitmask |=
8687 IF_INTERFACE_STATE_LQM_STATE_VALID;
8688 if_interface_state->lqm_state =
8689 ifp->if_interface_state.lqm_state;
8690 }
8691 if (ifp->if_interface_state.valid_bitmask &
8692 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
8693 if_interface_state->valid_bitmask |=
8694 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
8695 if_interface_state->interface_availability =
8696 ifp->if_interface_state.interface_availability;
8697 }
8698
8699 ifnet_lock_done(ifp);
8700 }
8701
8702 errno_t
8703 if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
8704 {
8705 ifnet_lock_exclusive(ifp);
8706 if (conn_probe > 1) {
8707 ifnet_lock_done(ifp);
8708 return EINVAL;
8709 }
8710 if (conn_probe == 0) {
8711 ifp->if_eflags &= ~IFEF_PROBE_CONNECTIVITY;
8712 } else {
8713 ifp->if_eflags |= IFEF_PROBE_CONNECTIVITY;
8714 }
8715 ifnet_lock_done(ifp);
8716
8717 #if NECP
8718 necp_update_all_clients();
8719 #endif /* NECP */
8720
8721 tcp_probe_connectivity(ifp, conn_probe);
8722 return 0;
8723 }
8724
8725 /* for uuid.c */
8726 static int
8727 get_ether_index(int * ret_other_index)
8728 {
8729 struct ifnet *ifp;
8730 int en0_index = 0;
8731 int other_en_index = 0;
8732 int any_ether_index = 0;
8733 short best_unit = 0;
8734
8735 *ret_other_index = 0;
8736 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
8737 /*
8738 * find en0, or if not en0, the lowest unit en*, and if not
8739 * that, any ethernet
8740 */
8741 ifnet_lock_shared(ifp);
8742 if (strcmp(ifp->if_name, "en") == 0) {
8743 if (ifp->if_unit == 0) {
8744 /* found en0, we're done */
8745 en0_index = ifp->if_index;
8746 ifnet_lock_done(ifp);
8747 break;
8748 }
8749 if (other_en_index == 0 || ifp->if_unit < best_unit) {
8750 other_en_index = ifp->if_index;
8751 best_unit = ifp->if_unit;
8752 }
8753 } else if (ifp->if_type == IFT_ETHER && any_ether_index == 0) {
8754 any_ether_index = ifp->if_index;
8755 }
8756 ifnet_lock_done(ifp);
8757 }
8758 if (en0_index == 0) {
8759 if (other_en_index != 0) {
8760 *ret_other_index = other_en_index;
8761 } else if (any_ether_index != 0) {
8762 *ret_other_index = any_ether_index;
8763 }
8764 }
8765 return en0_index;
8766 }
8767
8768 int
8769 uuid_get_ethernet(u_int8_t *node)
8770 {
8771 static int en0_index;
8772 struct ifnet *ifp;
8773 int other_index = 0;
8774 int the_index = 0;
8775 int ret;
8776
8777 ifnet_head_lock_shared();
8778 if (en0_index == 0 || ifindex2ifnet[en0_index] == NULL) {
8779 en0_index = get_ether_index(&other_index);
8780 }
8781 if (en0_index != 0) {
8782 the_index = en0_index;
8783 } else if (other_index != 0) {
8784 the_index = other_index;
8785 }
8786 if (the_index != 0) {
8787 ifp = ifindex2ifnet[the_index];
8788 VERIFY(ifp != NULL);
8789 memcpy(node, IF_LLADDR(ifp), ETHER_ADDR_LEN);
8790 ret = 0;
8791 } else {
8792 ret = -1;
8793 }
8794 ifnet_head_done();
8795 return ret;
8796 }
8797
8798 static int
8799 sysctl_rxpoll SYSCTL_HANDLER_ARGS
8800 {
8801 #pragma unused(arg1, arg2)
8802 uint32_t i;
8803 int err;
8804
8805 i = if_rxpoll;
8806
8807 err = sysctl_handle_int(oidp, &i, 0, req);
8808 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8809 return err;
8810 }
8811
8812 if (net_rxpoll == 0) {
8813 return ENXIO;
8814 }
8815
8816 if_rxpoll = i;
8817 return err;
8818 }
8819
8820 static int
8821 sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
8822 {
8823 #pragma unused(arg1, arg2)
8824 uint64_t q;
8825 int err;
8826
8827 q = if_rxpoll_mode_holdtime;
8828
8829 err = sysctl_handle_quad(oidp, &q, 0, req);
8830 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8831 return err;
8832 }
8833
8834 if (q < IF_RXPOLL_MODE_HOLDTIME_MIN) {
8835 q = IF_RXPOLL_MODE_HOLDTIME_MIN;
8836 }
8837
8838 if_rxpoll_mode_holdtime = q;
8839
8840 return err;
8841 }
8842
8843 static int
8844 sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
8845 {
8846 #pragma unused(arg1, arg2)
8847 uint64_t q;
8848 int err;
8849
8850 q = if_rxpoll_sample_holdtime;
8851
8852 err = sysctl_handle_quad(oidp, &q, 0, req);
8853 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8854 return err;
8855 }
8856
8857 if (q < IF_RXPOLL_SAMPLETIME_MIN) {
8858 q = IF_RXPOLL_SAMPLETIME_MIN;
8859 }
8860
8861 if_rxpoll_sample_holdtime = q;
8862
8863 return err;
8864 }
8865
8866 static int
8867 sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
8868 {
8869 #pragma unused(arg1, arg2)
8870 uint64_t q;
8871 int err;
8872
8873 q = if_rxpoll_interval_time;
8874
8875 err = sysctl_handle_quad(oidp, &q, 0, req);
8876 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8877 return err;
8878 }
8879
8880 if (q < IF_RXPOLL_INTERVALTIME_MIN) {
8881 q = IF_RXPOLL_INTERVALTIME_MIN;
8882 }
8883
8884 if_rxpoll_interval_time = q;
8885
8886 return err;
8887 }
8888
8889 static int
8890 sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
8891 {
8892 #pragma unused(arg1, arg2)
8893 uint32_t i;
8894 int err;
8895
8896 i = if_sysctl_rxpoll_wlowat;
8897
8898 err = sysctl_handle_int(oidp, &i, 0, req);
8899 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8900 return err;
8901 }
8902
8903 if (i == 0 || i >= if_sysctl_rxpoll_whiwat) {
8904 return EINVAL;
8905 }
8906
8907 if_sysctl_rxpoll_wlowat = i;
8908 return err;
8909 }
8910
8911 static int
8912 sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
8913 {
8914 #pragma unused(arg1, arg2)
8915 uint32_t i;
8916 int err;
8917
8918 i = if_sysctl_rxpoll_whiwat;
8919
8920 err = sysctl_handle_int(oidp, &i, 0, req);
8921 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8922 return err;
8923 }
8924
8925 if (i <= if_sysctl_rxpoll_wlowat) {
8926 return EINVAL;
8927 }
8928
8929 if_sysctl_rxpoll_whiwat = i;
8930 return err;
8931 }
8932
8933 static int
8934 sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
8935 {
8936 #pragma unused(arg1, arg2)
8937 int i, err;
8938
8939 i = if_sndq_maxlen;
8940
8941 err = sysctl_handle_int(oidp, &i, 0, req);
8942 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8943 return err;
8944 }
8945
8946 if (i < IF_SNDQ_MINLEN) {
8947 i = IF_SNDQ_MINLEN;
8948 }
8949
8950 if_sndq_maxlen = i;
8951 return err;
8952 }
8953
8954 static int
8955 sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
8956 {
8957 #pragma unused(arg1, arg2)
8958 int i, err;
8959
8960 i = if_rcvq_maxlen;
8961
8962 err = sysctl_handle_int(oidp, &i, 0, req);
8963 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8964 return err;
8965 }
8966
8967 if (i < IF_RCVQ_MINLEN) {
8968 i = IF_RCVQ_MINLEN;
8969 }
8970
8971 if_rcvq_maxlen = i;
8972 return err;
8973 }
8974
8975 int
8976 dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
8977 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
8978 {
8979 struct kev_dl_node_presence kev;
8980 struct sockaddr_dl *sdl;
8981 struct sockaddr_in6 *sin6;
8982 int ret = 0;
8983
8984 VERIFY(ifp);
8985 VERIFY(sa);
8986 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
8987
8988 bzero(&kev, sizeof(kev));
8989 sin6 = &kev.sin6_node_address;
8990 sdl = &kev.sdl_node_address;
8991 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
8992 kev.rssi = rssi;
8993 kev.link_quality_metric = lqm;
8994 kev.node_proximity_metric = npm;
8995 bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
8996
8997 ret = nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
8998 if (ret == 0) {
8999 int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
9000 &kev.link_data, sizeof(kev));
9001 if (err != 0) {
9002 log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with"
9003 "error %d\n", __func__, err);
9004 }
9005 }
9006 return ret;
9007 }
9008
9009 void
9010 dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
9011 {
9012 struct kev_dl_node_absence kev = {};
9013 struct sockaddr_in6 *kev_sin6 = NULL;
9014 struct sockaddr_dl *kev_sdl = NULL;
9015
9016 VERIFY(ifp != NULL);
9017 VERIFY(sa != NULL);
9018 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
9019
9020 kev_sin6 = &kev.sin6_node_address;
9021 kev_sdl = &kev.sdl_node_address;
9022
9023 if (sa->sa_family == AF_INET6) {
9024 /*
9025 * If IPv6 address is given, get the link layer
9026 * address from what was cached in the neighbor cache
9027 */
9028 VERIFY(sa->sa_len <= sizeof(*kev_sin6));
9029 bcopy(sa, kev_sin6, sa->sa_len);
9030 nd6_alt_node_absent(ifp, kev_sin6, kev_sdl);
9031 } else {
9032 /*
9033 * If passed address is AF_LINK type, derive the address
9034 * based on the link address.
9035 */
9036 nd6_alt_node_addr_decompose(ifp, sa, kev_sdl, kev_sin6);
9037 nd6_alt_node_absent(ifp, kev_sin6, NULL);
9038 }
9039
9040 kev_sdl->sdl_type = ifp->if_type;
9041 kev_sdl->sdl_index = ifp->if_index;
9042
9043 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
9044 &kev.link_data, sizeof(kev));
9045 }
9046
9047 int
9048 dlil_node_present_v2(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr_dl *sdl,
9049 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
9050 {
9051 struct kev_dl_node_presence kev = {};
9052 struct sockaddr_dl *kev_sdl = NULL;
9053 struct sockaddr_in6 *kev_sin6 = NULL;
9054 int ret = 0;
9055
9056 VERIFY(ifp != NULL);
9057 VERIFY(sa != NULL && sdl != NULL);
9058 VERIFY(sa->sa_family == AF_INET6 && sdl->sdl_family == AF_LINK);
9059
9060 kev_sin6 = &kev.sin6_node_address;
9061 kev_sdl = &kev.sdl_node_address;
9062
9063 VERIFY(sdl->sdl_len <= sizeof(*kev_sdl));
9064 bcopy(sdl, kev_sdl, sdl->sdl_len);
9065 kev_sdl->sdl_type = ifp->if_type;
9066 kev_sdl->sdl_index = ifp->if_index;
9067
9068 VERIFY(sa->sa_len <= sizeof(*kev_sin6));
9069 bcopy(sa, kev_sin6, sa->sa_len);
9070
9071 kev.rssi = rssi;
9072 kev.link_quality_metric = lqm;
9073 kev.node_proximity_metric = npm;
9074 bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
9075
9076 ret = nd6_alt_node_present(ifp, SIN6(sa), sdl, rssi, lqm, npm);
9077 if (ret == 0) {
9078 int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
9079 &kev.link_data, sizeof(kev));
9080 if (err != 0) {
9081 log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with",
9082 "error %d\n", __func__, err);
9083 }
9084 }
9085 return ret;
9086 }
9087
9088 const void *
9089 dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
9090 kauth_cred_t *credp)
9091 {
9092 const u_int8_t *bytes;
9093 size_t size;
9094
9095 bytes = CONST_LLADDR(sdl);
9096 size = sdl->sdl_alen;
9097
9098 #if CONFIG_MACF
9099 if (dlil_lladdr_ckreq) {
9100 switch (sdl->sdl_type) {
9101 case IFT_ETHER:
9102 case IFT_IEEE1394:
9103 break;
9104 default:
9105 credp = NULL;
9106 break;
9107 }
9108 ;
9109
9110 if (credp && mac_system_check_info(*credp, "net.link.addr")) {
9111 static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
9112 [0] = 2
9113 };
9114
9115 bytes = unspec;
9116 }
9117 }
9118 #else
9119 #pragma unused(credp)
9120 #endif
9121
9122 if (sizep != NULL) {
9123 *sizep = size;
9124 }
9125 return bytes;
9126 }
9127
9128 void
9129 dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
9130 u_int8_t info[DLIL_MODARGLEN])
9131 {
9132 struct kev_dl_issues kev;
9133 struct timeval tv;
9134
9135 VERIFY(ifp != NULL);
9136 VERIFY(modid != NULL);
9137 _CASSERT(sizeof(kev.modid) == DLIL_MODIDLEN);
9138 _CASSERT(sizeof(kev.info) == DLIL_MODARGLEN);
9139
9140 bzero(&kev, sizeof(kev));
9141
9142 microtime(&tv);
9143 kev.timestamp = tv.tv_sec;
9144 bcopy(modid, &kev.modid, DLIL_MODIDLEN);
9145 if (info != NULL) {
9146 bcopy(info, &kev.info, DLIL_MODARGLEN);
9147 }
9148
9149 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
9150 &kev.link_data, sizeof(kev));
9151 }
9152
9153 errno_t
9154 ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
9155 struct proc *p)
9156 {
9157 u_int32_t level = IFNET_THROTTLE_OFF;
9158 errno_t result = 0;
9159
9160 VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
9161
9162 if (cmd == SIOCSIFOPPORTUNISTIC) {
9163 /*
9164 * XXX: Use priv_check_cred() instead of root check?
9165 */
9166 if ((result = proc_suser(p)) != 0) {
9167 return result;
9168 }
9169
9170 if (ifr->ifr_opportunistic.ifo_flags ==
9171 IFRIFOF_BLOCK_OPPORTUNISTIC) {
9172 level = IFNET_THROTTLE_OPPORTUNISTIC;
9173 } else if (ifr->ifr_opportunistic.ifo_flags == 0) {
9174 level = IFNET_THROTTLE_OFF;
9175 } else {
9176 result = EINVAL;
9177 }
9178
9179 if (result == 0) {
9180 result = ifnet_set_throttle(ifp, level);
9181 }
9182 } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
9183 ifr->ifr_opportunistic.ifo_flags = 0;
9184 if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
9185 ifr->ifr_opportunistic.ifo_flags |=
9186 IFRIFOF_BLOCK_OPPORTUNISTIC;
9187 }
9188 }
9189
9190 /*
9191 * Return the count of current opportunistic connections
9192 * over the interface.
9193 */
9194 if (result == 0) {
9195 uint32_t flags = 0;
9196 flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
9197 INPCB_OPPORTUNISTIC_SETCMD : 0;
9198 flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
9199 INPCB_OPPORTUNISTIC_THROTTLEON : 0;
9200 ifr->ifr_opportunistic.ifo_inuse =
9201 udp_count_opportunistic(ifp->if_index, flags) +
9202 tcp_count_opportunistic(ifp->if_index, flags);
9203 }
9204
9205 if (result == EALREADY) {
9206 result = 0;
9207 }
9208
9209 return result;
9210 }
9211
9212 int
9213 ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
9214 {
9215 struct ifclassq *ifq;
9216 int err = 0;
9217
9218 if (!(ifp->if_eflags & IFEF_TXSTART)) {
9219 return ENXIO;
9220 }
9221
9222 *level = IFNET_THROTTLE_OFF;
9223
9224 ifq = &ifp->if_snd;
9225 IFCQ_LOCK(ifq);
9226 /* Throttling works only for IFCQ, not ALTQ instances */
9227 if (IFCQ_IS_ENABLED(ifq)) {
9228 IFCQ_GET_THROTTLE(ifq, *level, err);
9229 }
9230 IFCQ_UNLOCK(ifq);
9231
9232 return err;
9233 }
9234
9235 int
9236 ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
9237 {
9238 struct ifclassq *ifq;
9239 int err = 0;
9240
9241 if (!(ifp->if_eflags & IFEF_TXSTART)) {
9242 return ENXIO;
9243 }
9244
9245 ifq = &ifp->if_snd;
9246
9247 switch (level) {
9248 case IFNET_THROTTLE_OFF:
9249 case IFNET_THROTTLE_OPPORTUNISTIC:
9250 break;
9251 default:
9252 return EINVAL;
9253 }
9254
9255 IFCQ_LOCK(ifq);
9256 if (IFCQ_IS_ENABLED(ifq)) {
9257 IFCQ_SET_THROTTLE(ifq, level, err);
9258 }
9259 IFCQ_UNLOCK(ifq);
9260
9261 if (err == 0) {
9262 DLIL_PRINTF("%s: throttling level set to %d\n", if_name(ifp),
9263 level);
9264 #if NECP
9265 necp_update_all_clients();
9266 #endif /* NECP */
9267 if (level == IFNET_THROTTLE_OFF) {
9268 ifnet_start(ifp);
9269 }
9270 }
9271
9272 return err;
9273 }
9274
9275 errno_t
9276 ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
9277 struct proc *p)
9278 {
9279 #pragma unused(p)
9280 errno_t result = 0;
9281 uint32_t flags;
9282 int level, category, subcategory;
9283
9284 VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG);
9285
9286 if (cmd == SIOCSIFLOG) {
9287 if ((result = priv_check_cred(kauth_cred_get(),
9288 PRIV_NET_INTERFACE_CONTROL, 0)) != 0) {
9289 return result;
9290 }
9291
9292 level = ifr->ifr_log.ifl_level;
9293 if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX) {
9294 result = EINVAL;
9295 }
9296
9297 flags = ifr->ifr_log.ifl_flags;
9298 if ((flags &= IFNET_LOGF_MASK) == 0) {
9299 result = EINVAL;
9300 }
9301
9302 category = ifr->ifr_log.ifl_category;
9303 subcategory = ifr->ifr_log.ifl_subcategory;
9304
9305 if (result == 0) {
9306 result = ifnet_set_log(ifp, level, flags,
9307 category, subcategory);
9308 }
9309 } else {
9310 result = ifnet_get_log(ifp, &level, &flags, &category,
9311 &subcategory);
9312 if (result == 0) {
9313 ifr->ifr_log.ifl_level = level;
9314 ifr->ifr_log.ifl_flags = flags;
9315 ifr->ifr_log.ifl_category = category;
9316 ifr->ifr_log.ifl_subcategory = subcategory;
9317 }
9318 }
9319
9320 return result;
9321 }
9322
9323 int
9324 ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags,
9325 int32_t category, int32_t subcategory)
9326 {
9327 int err = 0;
9328
9329 VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX);
9330 VERIFY(flags & IFNET_LOGF_MASK);
9331
9332 /*
9333 * The logging level applies to all facilities; make sure to
9334 * update them all with the most current level.
9335 */
9336 flags |= ifp->if_log.flags;
9337
9338 if (ifp->if_output_ctl != NULL) {
9339 struct ifnet_log_params l;
9340
9341 bzero(&l, sizeof(l));
9342 l.level = level;
9343 l.flags = flags;
9344 l.flags &= ~IFNET_LOGF_DLIL;
9345 l.category = category;
9346 l.subcategory = subcategory;
9347
9348 /* Send this request to lower layers */
9349 if (l.flags != 0) {
9350 err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
9351 sizeof(l), &l);
9352 }
9353 } else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
9354 /*
9355 * If targeted to the lower layers without an output
9356 * control callback registered on the interface, just
9357 * silently ignore facilities other than ours.
9358 */
9359 flags &= IFNET_LOGF_DLIL;
9360 if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL))) {
9361 level = 0;
9362 }
9363 }
9364
9365 if (err == 0) {
9366 if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT) {
9367 ifp->if_log.flags = 0;
9368 } else {
9369 ifp->if_log.flags |= flags;
9370 }
9371
9372 log(LOG_INFO, "%s: logging level set to %d flags=%b "
9373 "arg=%b, category=%d subcategory=%d\n", if_name(ifp),
9374 ifp->if_log.level, ifp->if_log.flags,
9375 IFNET_LOGF_BITS, flags, IFNET_LOGF_BITS,
9376 category, subcategory);
9377 }
9378
9379 return err;
9380 }
9381
9382 int
9383 ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
9384 int32_t *category, int32_t *subcategory)
9385 {
9386 if (level != NULL) {
9387 *level = ifp->if_log.level;
9388 }
9389 if (flags != NULL) {
9390 *flags = ifp->if_log.flags;
9391 }
9392 if (category != NULL) {
9393 *category = ifp->if_log.category;
9394 }
9395 if (subcategory != NULL) {
9396 *subcategory = ifp->if_log.subcategory;
9397 }
9398
9399 return 0;
9400 }
9401
9402 int
9403 ifnet_notify_address(struct ifnet *ifp, int af)
9404 {
9405 struct ifnet_notify_address_params na;
9406
9407 #if PF
9408 (void) pf_ifaddr_hook(ifp);
9409 #endif /* PF */
9410
9411 if (ifp->if_output_ctl == NULL) {
9412 return EOPNOTSUPP;
9413 }
9414
9415 bzero(&na, sizeof(na));
9416 na.address_family = af;
9417
9418 return ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
9419 sizeof(na), &na);
9420 }
9421
9422 errno_t
9423 ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
9424 {
9425 if (ifp == NULL || flowid == NULL) {
9426 return EINVAL;
9427 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
9428 !IF_FULLY_ATTACHED(ifp)) {
9429 return ENXIO;
9430 }
9431
9432 *flowid = ifp->if_flowhash;
9433
9434 return 0;
9435 }
9436
9437 errno_t
9438 ifnet_disable_output(struct ifnet *ifp)
9439 {
9440 int err;
9441
9442 if (ifp == NULL) {
9443 return EINVAL;
9444 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
9445 !IF_FULLY_ATTACHED(ifp)) {
9446 return ENXIO;
9447 }
9448
9449 if ((err = ifnet_fc_add(ifp)) == 0) {
9450 lck_mtx_lock_spin(&ifp->if_start_lock);
9451 ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
9452 lck_mtx_unlock(&ifp->if_start_lock);
9453 }
9454 return err;
9455 }
9456
9457 errno_t
9458 ifnet_enable_output(struct ifnet *ifp)
9459 {
9460 if (ifp == NULL) {
9461 return EINVAL;
9462 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
9463 !IF_FULLY_ATTACHED(ifp)) {
9464 return ENXIO;
9465 }
9466
9467 ifnet_start_common(ifp, TRUE);
9468 return 0;
9469 }
9470
9471 void
9472 ifnet_flowadv(uint32_t flowhash)
9473 {
9474 struct ifnet_fc_entry *ifce;
9475 struct ifnet *ifp;
9476
9477 ifce = ifnet_fc_get(flowhash);
9478 if (ifce == NULL) {
9479 return;
9480 }
9481
9482 VERIFY(ifce->ifce_ifp != NULL);
9483 ifp = ifce->ifce_ifp;
9484
9485 /* flow hash gets recalculated per attach, so check */
9486 if (ifnet_is_attached(ifp, 1)) {
9487 if (ifp->if_flowhash == flowhash) {
9488 (void) ifnet_enable_output(ifp);
9489 }
9490 ifnet_decr_iorefcnt(ifp);
9491 }
9492 ifnet_fc_entry_free(ifce);
9493 }
9494
9495 /*
9496 * Function to compare ifnet_fc_entries in ifnet flow control tree
9497 */
9498 static inline int
9499 ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
9500 {
9501 return fc1->ifce_flowhash - fc2->ifce_flowhash;
9502 }
9503
9504 static int
9505 ifnet_fc_add(struct ifnet *ifp)
9506 {
9507 struct ifnet_fc_entry keyfc, *ifce;
9508 uint32_t flowhash;
9509
9510 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART));
9511 VERIFY(ifp->if_flowhash != 0);
9512 flowhash = ifp->if_flowhash;
9513
9514 bzero(&keyfc, sizeof(keyfc));
9515 keyfc.ifce_flowhash = flowhash;
9516
9517 lck_mtx_lock_spin(&ifnet_fc_lock);
9518 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
9519 if (ifce != NULL && ifce->ifce_ifp == ifp) {
9520 /* Entry is already in ifnet_fc_tree, return */
9521 lck_mtx_unlock(&ifnet_fc_lock);
9522 return 0;
9523 }
9524
9525 if (ifce != NULL) {
9526 /*
9527 * There is a different fc entry with the same flow hash
9528 * but different ifp pointer. There can be a collision
9529 * on flow hash but the probability is low. Let's just
9530 * avoid adding a second one when there is a collision.
9531 */
9532 lck_mtx_unlock(&ifnet_fc_lock);
9533 return EAGAIN;
9534 }
9535
9536 /* become regular mutex */
9537 lck_mtx_convert_spin(&ifnet_fc_lock);
9538
9539 ifce = zalloc(ifnet_fc_zone);
9540 if (ifce == NULL) {
9541 /* memory allocation failed */
9542 lck_mtx_unlock(&ifnet_fc_lock);
9543 return ENOMEM;
9544 }
9545 bzero(ifce, ifnet_fc_zone_size);
9546
9547 ifce->ifce_flowhash = flowhash;
9548 ifce->ifce_ifp = ifp;
9549
9550 RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
9551 lck_mtx_unlock(&ifnet_fc_lock);
9552 return 0;
9553 }
9554
9555 static struct ifnet_fc_entry *
9556 ifnet_fc_get(uint32_t flowhash)
9557 {
9558 struct ifnet_fc_entry keyfc, *ifce;
9559 struct ifnet *ifp;
9560
9561 bzero(&keyfc, sizeof(keyfc));
9562 keyfc.ifce_flowhash = flowhash;
9563
9564 lck_mtx_lock_spin(&ifnet_fc_lock);
9565 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
9566 if (ifce == NULL) {
9567 /* Entry is not present in ifnet_fc_tree, return */
9568 lck_mtx_unlock(&ifnet_fc_lock);
9569 return NULL;
9570 }
9571
9572 RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
9573
9574 VERIFY(ifce->ifce_ifp != NULL);
9575 ifp = ifce->ifce_ifp;
9576
9577 /* become regular mutex */
9578 lck_mtx_convert_spin(&ifnet_fc_lock);
9579
9580 if (!ifnet_is_attached(ifp, 0)) {
9581 /*
9582 * This ifp is not attached or in the process of being
9583 * detached; just don't process it.
9584 */
9585 ifnet_fc_entry_free(ifce);
9586 ifce = NULL;
9587 }
9588 lck_mtx_unlock(&ifnet_fc_lock);
9589
9590 return ifce;
9591 }
9592
9593 static void
9594 ifnet_fc_entry_free(struct ifnet_fc_entry *ifce)
9595 {
9596 zfree(ifnet_fc_zone, ifce);
9597 }
9598
9599 static uint32_t
9600 ifnet_calc_flowhash(struct ifnet *ifp)
9601 {
9602 struct ifnet_flowhash_key fh __attribute__((aligned(8)));
9603 uint32_t flowhash = 0;
9604
9605 if (ifnet_flowhash_seed == 0) {
9606 ifnet_flowhash_seed = RandomULong();
9607 }
9608
9609 bzero(&fh, sizeof(fh));
9610
9611 (void) snprintf(fh.ifk_name, sizeof(fh.ifk_name), "%s", ifp->if_name);
9612 fh.ifk_unit = ifp->if_unit;
9613 fh.ifk_flags = ifp->if_flags;
9614 fh.ifk_eflags = ifp->if_eflags;
9615 fh.ifk_capabilities = ifp->if_capabilities;
9616 fh.ifk_capenable = ifp->if_capenable;
9617 fh.ifk_output_sched_model = ifp->if_output_sched_model;
9618 fh.ifk_rand1 = RandomULong();
9619 fh.ifk_rand2 = RandomULong();
9620
9621 try_again:
9622 flowhash = net_flowhash(&fh, sizeof(fh), ifnet_flowhash_seed);
9623 if (flowhash == 0) {
9624 /* try to get a non-zero flowhash */
9625 ifnet_flowhash_seed = RandomULong();
9626 goto try_again;
9627 }
9628
9629 return flowhash;
9630 }
9631
9632 int
9633 ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
9634 uint16_t flags, uint8_t *data)
9635 {
9636 #pragma unused(flags)
9637 int error = 0;
9638
9639 switch (family) {
9640 case AF_INET:
9641 if_inetdata_lock_exclusive(ifp);
9642 if (IN_IFEXTRA(ifp) != NULL) {
9643 if (len == 0) {
9644 /* Allow clearing the signature */
9645 IN_IFEXTRA(ifp)->netsig_len = 0;
9646 bzero(IN_IFEXTRA(ifp)->netsig,
9647 sizeof(IN_IFEXTRA(ifp)->netsig));
9648 if_inetdata_lock_done(ifp);
9649 break;
9650 } else if (len > sizeof(IN_IFEXTRA(ifp)->netsig)) {
9651 error = EINVAL;
9652 if_inetdata_lock_done(ifp);
9653 break;
9654 }
9655 IN_IFEXTRA(ifp)->netsig_len = len;
9656 bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
9657 } else {
9658 error = ENOMEM;
9659 }
9660 if_inetdata_lock_done(ifp);
9661 break;
9662
9663 case AF_INET6:
9664 if_inet6data_lock_exclusive(ifp);
9665 if (IN6_IFEXTRA(ifp) != NULL) {
9666 if (len == 0) {
9667 /* Allow clearing the signature */
9668 IN6_IFEXTRA(ifp)->netsig_len = 0;
9669 bzero(IN6_IFEXTRA(ifp)->netsig,
9670 sizeof(IN6_IFEXTRA(ifp)->netsig));
9671 if_inet6data_lock_done(ifp);
9672 break;
9673 } else if (len > sizeof(IN6_IFEXTRA(ifp)->netsig)) {
9674 error = EINVAL;
9675 if_inet6data_lock_done(ifp);
9676 break;
9677 }
9678 IN6_IFEXTRA(ifp)->netsig_len = len;
9679 bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
9680 } else {
9681 error = ENOMEM;
9682 }
9683 if_inet6data_lock_done(ifp);
9684 break;
9685
9686 default:
9687 error = EINVAL;
9688 break;
9689 }
9690
9691 return error;
9692 }
9693
9694 int
9695 ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
9696 uint16_t *flags, uint8_t *data)
9697 {
9698 int error = 0;
9699
9700 if (ifp == NULL || len == NULL || data == NULL) {
9701 return EINVAL;
9702 }
9703
9704 switch (family) {
9705 case AF_INET:
9706 if_inetdata_lock_shared(ifp);
9707 if (IN_IFEXTRA(ifp) != NULL) {
9708 if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
9709 error = EINVAL;
9710 if_inetdata_lock_done(ifp);
9711 break;
9712 }
9713 if ((*len = IN_IFEXTRA(ifp)->netsig_len) > 0) {
9714 bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
9715 } else {
9716 error = ENOENT;
9717 }
9718 } else {
9719 error = ENOMEM;
9720 }
9721 if_inetdata_lock_done(ifp);
9722 break;
9723
9724 case AF_INET6:
9725 if_inet6data_lock_shared(ifp);
9726 if (IN6_IFEXTRA(ifp) != NULL) {
9727 if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
9728 error = EINVAL;
9729 if_inet6data_lock_done(ifp);
9730 break;
9731 }
9732 if ((*len = IN6_IFEXTRA(ifp)->netsig_len) > 0) {
9733 bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
9734 } else {
9735 error = ENOENT;
9736 }
9737 } else {
9738 error = ENOMEM;
9739 }
9740 if_inet6data_lock_done(ifp);
9741 break;
9742
9743 default:
9744 error = EINVAL;
9745 break;
9746 }
9747
9748 if (error == 0 && flags != NULL) {
9749 *flags = 0;
9750 }
9751
9752 return error;
9753 }
9754
9755 #if INET6
9756 int
9757 ifnet_set_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
9758 {
9759 int i, error = 0, one_set = 0;
9760
9761 if_inet6data_lock_exclusive(ifp);
9762
9763 if (IN6_IFEXTRA(ifp) == NULL) {
9764 error = ENOMEM;
9765 goto out;
9766 }
9767
9768 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
9769 uint32_t prefix_len =
9770 prefixes[i].prefix_len;
9771 struct in6_addr *prefix =
9772 &prefixes[i].ipv6_prefix;
9773
9774 if (prefix_len == 0) {
9775 clat_log0((LOG_DEBUG,
9776 "NAT64 prefixes purged from Interface %s\n",
9777 if_name(ifp)));
9778 /* Allow clearing the signature */
9779 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = 0;
9780 bzero(&IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
9781 sizeof(struct in6_addr));
9782
9783 continue;
9784 } else if (prefix_len != NAT64_PREFIX_LEN_32 &&
9785 prefix_len != NAT64_PREFIX_LEN_40 &&
9786 prefix_len != NAT64_PREFIX_LEN_48 &&
9787 prefix_len != NAT64_PREFIX_LEN_56 &&
9788 prefix_len != NAT64_PREFIX_LEN_64 &&
9789 prefix_len != NAT64_PREFIX_LEN_96) {
9790 clat_log0((LOG_DEBUG,
9791 "NAT64 prefixlen is incorrect %d\n", prefix_len));
9792 error = EINVAL;
9793 goto out;
9794 }
9795
9796 if (IN6_IS_SCOPE_EMBED(prefix)) {
9797 clat_log0((LOG_DEBUG,
9798 "NAT64 prefix has interface/link local scope.\n"));
9799 error = EINVAL;
9800 goto out;
9801 }
9802
9803 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = prefix_len;
9804 bcopy(prefix, &IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
9805 sizeof(struct in6_addr));
9806 clat_log0((LOG_DEBUG,
9807 "NAT64 prefix set to %s with prefixlen: %d\n",
9808 ip6_sprintf(prefix), prefix_len));
9809 one_set = 1;
9810 }
9811
9812 out:
9813 if_inet6data_lock_done(ifp);
9814
9815 if (error == 0 && one_set != 0) {
9816 necp_update_all_clients();
9817 }
9818
9819 return error;
9820 }
9821
9822 int
9823 ifnet_get_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
9824 {
9825 int i, found_one = 0, error = 0;
9826
9827 if (ifp == NULL) {
9828 return EINVAL;
9829 }
9830
9831 if_inet6data_lock_shared(ifp);
9832
9833 if (IN6_IFEXTRA(ifp) == NULL) {
9834 error = ENOMEM;
9835 goto out;
9836 }
9837
9838 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
9839 if (IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len != 0) {
9840 found_one = 1;
9841 }
9842 }
9843
9844 if (found_one == 0) {
9845 error = ENOENT;
9846 goto out;
9847 }
9848
9849 if (prefixes) {
9850 bcopy(IN6_IFEXTRA(ifp)->nat64_prefixes, prefixes,
9851 sizeof(IN6_IFEXTRA(ifp)->nat64_prefixes));
9852 }
9853
9854 out:
9855 if_inet6data_lock_done(ifp);
9856
9857 return error;
9858 }
9859 #endif
9860
9861 static void
9862 dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff,
9863 protocol_family_t pf)
9864 {
9865 #pragma unused(ifp)
9866 uint32_t did_sw;
9867
9868 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_FINALIZE_FORCED) ||
9869 (m->m_pkthdr.csum_flags & (CSUM_TSO_IPV4 | CSUM_TSO_IPV6))) {
9870 return;
9871 }
9872
9873 switch (pf) {
9874 case PF_INET:
9875 did_sw = in_finalize_cksum(m, hoff, m->m_pkthdr.csum_flags);
9876 if (did_sw & CSUM_DELAY_IP) {
9877 hwcksum_dbg_finalized_hdr++;
9878 }
9879 if (did_sw & CSUM_DELAY_DATA) {
9880 hwcksum_dbg_finalized_data++;
9881 }
9882 break;
9883 #if INET6
9884 case PF_INET6:
9885 /*
9886 * Checksum offload should not have been enabled when
9887 * extension headers exist; that also means that we
9888 * cannot force-finalize packets with extension headers.
9889 * Indicate to the callee should it skip such case by
9890 * setting optlen to -1.
9891 */
9892 did_sw = in6_finalize_cksum(m, hoff, -1, -1,
9893 m->m_pkthdr.csum_flags);
9894 if (did_sw & CSUM_DELAY_IPV6_DATA) {
9895 hwcksum_dbg_finalized_data++;
9896 }
9897 break;
9898 #endif /* INET6 */
9899 default:
9900 return;
9901 }
9902 }
9903
9904 static void
9905 dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header,
9906 protocol_family_t pf)
9907 {
9908 uint16_t sum = 0;
9909 uint32_t hlen;
9910
9911 if (frame_header == NULL ||
9912 frame_header < (char *)mbuf_datastart(m) ||
9913 frame_header > (char *)m->m_data) {
9914 DLIL_PRINTF("%s: frame header pointer 0x%llx out of range "
9915 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp),
9916 (uint64_t)VM_KERNEL_ADDRPERM(frame_header),
9917 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
9918 (uint64_t)VM_KERNEL_ADDRPERM(m->m_data),
9919 (uint64_t)VM_KERNEL_ADDRPERM(m));
9920 return;
9921 }
9922 hlen = (m->m_data - frame_header);
9923
9924 switch (pf) {
9925 case PF_INET:
9926 #if INET6
9927 case PF_INET6:
9928 #endif /* INET6 */
9929 break;
9930 default:
9931 return;
9932 }
9933
9934 /*
9935 * Force partial checksum offload; useful to simulate cases
9936 * where the hardware does not support partial checksum offload,
9937 * in order to validate correctness throughout the layers above.
9938 */
9939 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED) {
9940 uint32_t foff = hwcksum_dbg_partial_rxoff_forced;
9941
9942 if (foff > (uint32_t)m->m_pkthdr.len) {
9943 return;
9944 }
9945
9946 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
9947
9948 /* Compute 16-bit 1's complement sum from forced offset */
9949 sum = m_sum16(m, foff, (m->m_pkthdr.len - foff));
9950
9951 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
9952 m->m_pkthdr.csum_rx_val = sum;
9953 m->m_pkthdr.csum_rx_start = (foff + hlen);
9954
9955 hwcksum_dbg_partial_forced++;
9956 hwcksum_dbg_partial_forced_bytes += m->m_pkthdr.len;
9957 }
9958
9959 /*
9960 * Partial checksum offload verification (and adjustment);
9961 * useful to validate and test cases where the hardware
9962 * supports partial checksum offload.
9963 */
9964 if ((m->m_pkthdr.csum_flags &
9965 (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
9966 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
9967 uint32_t rxoff;
9968
9969 /* Start offset must begin after frame header */
9970 rxoff = m->m_pkthdr.csum_rx_start;
9971 if (hlen > rxoff) {
9972 hwcksum_dbg_bad_rxoff++;
9973 if (dlil_verbose) {
9974 DLIL_PRINTF("%s: partial cksum start offset %d "
9975 "is less than frame header length %d for "
9976 "mbuf 0x%llx\n", if_name(ifp), rxoff, hlen,
9977 (uint64_t)VM_KERNEL_ADDRPERM(m));
9978 }
9979 return;
9980 }
9981 rxoff -= hlen;
9982
9983 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
9984 /*
9985 * Compute the expected 16-bit 1's complement sum;
9986 * skip this if we've already computed it above
9987 * when partial checksum offload is forced.
9988 */
9989 sum = m_sum16(m, rxoff, (m->m_pkthdr.len - rxoff));
9990
9991 /* Hardware or driver is buggy */
9992 if (sum != m->m_pkthdr.csum_rx_val) {
9993 hwcksum_dbg_bad_cksum++;
9994 if (dlil_verbose) {
9995 DLIL_PRINTF("%s: bad partial cksum value "
9996 "0x%x (expected 0x%x) for mbuf "
9997 "0x%llx [rx_start %d]\n",
9998 if_name(ifp),
9999 m->m_pkthdr.csum_rx_val, sum,
10000 (uint64_t)VM_KERNEL_ADDRPERM(m),
10001 m->m_pkthdr.csum_rx_start);
10002 }
10003 return;
10004 }
10005 }
10006 hwcksum_dbg_verified++;
10007
10008 /*
10009 * This code allows us to emulate various hardwares that
10010 * perform 16-bit 1's complement sum beginning at various
10011 * start offset values.
10012 */
10013 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ) {
10014 uint32_t aoff = hwcksum_dbg_partial_rxoff_adj;
10015
10016 if (aoff == rxoff || aoff > (uint32_t)m->m_pkthdr.len) {
10017 return;
10018 }
10019
10020 sum = m_adj_sum16(m, rxoff, aoff,
10021 m_pktlen(m) - aoff, sum);
10022
10023 m->m_pkthdr.csum_rx_val = sum;
10024 m->m_pkthdr.csum_rx_start = (aoff + hlen);
10025
10026 hwcksum_dbg_adjusted++;
10027 }
10028 }
10029 }
10030
10031 static int
10032 sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
10033 {
10034 #pragma unused(arg1, arg2)
10035 u_int32_t i;
10036 int err;
10037
10038 i = hwcksum_dbg_mode;
10039
10040 err = sysctl_handle_int(oidp, &i, 0, req);
10041 if (err != 0 || req->newptr == USER_ADDR_NULL) {
10042 return err;
10043 }
10044
10045 if (hwcksum_dbg == 0) {
10046 return ENODEV;
10047 }
10048
10049 if ((i & ~HWCKSUM_DBG_MASK) != 0) {
10050 return EINVAL;
10051 }
10052
10053 hwcksum_dbg_mode = (i & HWCKSUM_DBG_MASK);
10054
10055 return err;
10056 }
10057
10058 static int
10059 sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
10060 {
10061 #pragma unused(arg1, arg2)
10062 u_int32_t i;
10063 int err;
10064
10065 i = hwcksum_dbg_partial_rxoff_forced;
10066
10067 err = sysctl_handle_int(oidp, &i, 0, req);
10068 if (err != 0 || req->newptr == USER_ADDR_NULL) {
10069 return err;
10070 }
10071
10072 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
10073 return ENODEV;
10074 }
10075
10076 hwcksum_dbg_partial_rxoff_forced = i;
10077
10078 return err;
10079 }
10080
10081 static int
10082 sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
10083 {
10084 #pragma unused(arg1, arg2)
10085 u_int32_t i;
10086 int err;
10087
10088 i = hwcksum_dbg_partial_rxoff_adj;
10089
10090 err = sysctl_handle_int(oidp, &i, 0, req);
10091 if (err != 0 || req->newptr == USER_ADDR_NULL) {
10092 return err;
10093 }
10094
10095 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ)) {
10096 return ENODEV;
10097 }
10098
10099 hwcksum_dbg_partial_rxoff_adj = i;
10100
10101 return err;
10102 }
10103
10104 static int
10105 sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
10106 {
10107 #pragma unused(oidp, arg1, arg2)
10108 int err;
10109
10110 if (req->oldptr == USER_ADDR_NULL) {
10111 }
10112 if (req->newptr != USER_ADDR_NULL) {
10113 return EPERM;
10114 }
10115 err = SYSCTL_OUT(req, &tx_chain_len_stats,
10116 sizeof(struct chain_len_stats));
10117
10118 return err;
10119 }
10120
10121
10122 #if DEBUG || DEVELOPMENT
10123 /* Blob for sum16 verification */
10124 static uint8_t sumdata[] = {
10125 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
10126 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
10127 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
10128 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
10129 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
10130 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
10131 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
10132 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
10133 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
10134 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
10135 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
10136 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
10137 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
10138 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
10139 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
10140 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
10141 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
10142 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
10143 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
10144 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
10145 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
10146 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
10147 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
10148 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
10149 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
10150 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
10151 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
10152 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
10153 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
10154 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
10155 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
10156 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
10157 0xc8, 0x28, 0x02, 0x00, 0x00
10158 };
10159
10160 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
10161 static struct {
10162 boolean_t init;
10163 uint16_t len;
10164 uint16_t sumr; /* reference */
10165 uint16_t sumrp; /* reference, precomputed */
10166 } sumtbl[] = {
10167 { FALSE, 0, 0, 0x0000 },
10168 { FALSE, 1, 0, 0x001f },
10169 { FALSE, 2, 0, 0x8b1f },
10170 { FALSE, 3, 0, 0x8b27 },
10171 { FALSE, 7, 0, 0x790e },
10172 { FALSE, 11, 0, 0xcb6d },
10173 { FALSE, 20, 0, 0x20dd },
10174 { FALSE, 27, 0, 0xbabd },
10175 { FALSE, 32, 0, 0xf3e8 },
10176 { FALSE, 37, 0, 0x197d },
10177 { FALSE, 43, 0, 0x9eae },
10178 { FALSE, 64, 0, 0x4678 },
10179 { FALSE, 127, 0, 0x9399 },
10180 { FALSE, 256, 0, 0xd147 },
10181 { FALSE, 325, 0, 0x0358 },
10182 };
10183 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
10184
10185 static void
10186 dlil_verify_sum16(void)
10187 {
10188 struct mbuf *m;
10189 uint8_t *buf;
10190 int n;
10191
10192 /* Make sure test data plus extra room for alignment fits in cluster */
10193 _CASSERT((sizeof(sumdata) + (sizeof(uint64_t) * 2)) <= MCLBYTES);
10194
10195 kprintf("DLIL: running SUM16 self-tests ... ");
10196
10197 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
10198 m_align(m, sizeof(sumdata) + (sizeof(uint64_t) * 2));
10199
10200 buf = mtod(m, uint8_t *); /* base address */
10201
10202 for (n = 0; n < SUMTBL_MAX; n++) {
10203 uint16_t len = sumtbl[n].len;
10204 int i;
10205
10206 /* Verify for all possible alignments */
10207 for (i = 0; i < (int)sizeof(uint64_t); i++) {
10208 uint16_t sum, sumr;
10209 uint8_t *c;
10210
10211 /* Copy over test data to mbuf */
10212 VERIFY(len <= sizeof(sumdata));
10213 c = buf + i;
10214 bcopy(sumdata, c, len);
10215
10216 /* Zero-offset test (align by data pointer) */
10217 m->m_data = (caddr_t)c;
10218 m->m_len = len;
10219 sum = m_sum16(m, 0, len);
10220
10221 if (!sumtbl[n].init) {
10222 sumr = in_cksum_mbuf_ref(m, len, 0, 0);
10223 sumtbl[n].sumr = sumr;
10224 sumtbl[n].init = TRUE;
10225 } else {
10226 sumr = sumtbl[n].sumr;
10227 }
10228
10229 /* Something is horribly broken; stop now */
10230 if (sumr != sumtbl[n].sumrp) {
10231 panic_plain("\n%s: broken in_cksum_mbuf_ref() "
10232 "for len=%d align=%d sum=0x%04x "
10233 "[expected=0x%04x]\n", __func__,
10234 len, i, sum, sumr);
10235 /* NOTREACHED */
10236 } else if (sum != sumr) {
10237 panic_plain("\n%s: broken m_sum16() for len=%d "
10238 "align=%d sum=0x%04x [expected=0x%04x]\n",
10239 __func__, len, i, sum, sumr);
10240 /* NOTREACHED */
10241 }
10242
10243 /* Alignment test by offset (fixed data pointer) */
10244 m->m_data = (caddr_t)buf;
10245 m->m_len = i + len;
10246 sum = m_sum16(m, i, len);
10247
10248 /* Something is horribly broken; stop now */
10249 if (sum != sumr) {
10250 panic_plain("\n%s: broken m_sum16() for len=%d "
10251 "offset=%d sum=0x%04x [expected=0x%04x]\n",
10252 __func__, len, i, sum, sumr);
10253 /* NOTREACHED */
10254 }
10255 #if INET
10256 /* Simple sum16 contiguous buffer test by aligment */
10257 sum = b_sum16(c, len);
10258
10259 /* Something is horribly broken; stop now */
10260 if (sum != sumr) {
10261 panic_plain("\n%s: broken b_sum16() for len=%d "
10262 "align=%d sum=0x%04x [expected=0x%04x]\n",
10263 __func__, len, i, sum, sumr);
10264 /* NOTREACHED */
10265 }
10266 #endif /* INET */
10267 }
10268 }
10269 m_freem(m);
10270
10271 kprintf("PASSED\n");
10272 }
10273 #endif /* DEBUG || DEVELOPMENT */
10274
10275 #define CASE_STRINGIFY(x) case x: return #x
10276
10277 __private_extern__ const char *
10278 dlil_kev_dl_code_str(u_int32_t event_code)
10279 {
10280 switch (event_code) {
10281 CASE_STRINGIFY(KEV_DL_SIFFLAGS);
10282 CASE_STRINGIFY(KEV_DL_SIFMETRICS);
10283 CASE_STRINGIFY(KEV_DL_SIFMTU);
10284 CASE_STRINGIFY(KEV_DL_SIFPHYS);
10285 CASE_STRINGIFY(KEV_DL_SIFMEDIA);
10286 CASE_STRINGIFY(KEV_DL_SIFGENERIC);
10287 CASE_STRINGIFY(KEV_DL_ADDMULTI);
10288 CASE_STRINGIFY(KEV_DL_DELMULTI);
10289 CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
10290 CASE_STRINGIFY(KEV_DL_IF_DETACHING);
10291 CASE_STRINGIFY(KEV_DL_IF_DETACHED);
10292 CASE_STRINGIFY(KEV_DL_LINK_OFF);
10293 CASE_STRINGIFY(KEV_DL_LINK_ON);
10294 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
10295 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
10296 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
10297 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
10298 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
10299 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
10300 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
10301 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
10302 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
10303 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED);
10304 CASE_STRINGIFY(KEV_DL_ISSUES);
10305 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
10306 default:
10307 break;
10308 }
10309 return "";
10310 }
10311
10312 static void
10313 dlil_dt_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
10314 {
10315 #pragma unused(arg1)
10316 struct ifnet *ifp = arg0;
10317
10318 if (ifnet_is_attached(ifp, 1)) {
10319 nstat_ifnet_threshold_reached(ifp->if_index);
10320 ifnet_decr_iorefcnt(ifp);
10321 }
10322 }
10323
10324 void
10325 ifnet_notify_data_threshold(struct ifnet *ifp)
10326 {
10327 uint64_t bytes = (ifp->if_ibytes + ifp->if_obytes);
10328 uint64_t oldbytes = ifp->if_dt_bytes;
10329
10330 ASSERT(ifp->if_dt_tcall != NULL);
10331
10332 /*
10333 * If we went over the threshold, notify NetworkStatistics.
10334 * We rate-limit it based on the threshold interval value.
10335 */
10336 if (threshold_notify && (bytes - oldbytes) > ifp->if_data_threshold &&
10337 OSCompareAndSwap64(oldbytes, bytes, &ifp->if_dt_bytes) &&
10338 !thread_call_isactive(ifp->if_dt_tcall)) {
10339 uint64_t tival = (threshold_interval * NSEC_PER_SEC);
10340 uint64_t now = mach_absolute_time(), deadline = now;
10341 uint64_t ival;
10342
10343 if (tival != 0) {
10344 nanoseconds_to_absolutetime(tival, &ival);
10345 clock_deadline_for_periodic_event(ival, now, &deadline);
10346 (void) thread_call_enter_delayed(ifp->if_dt_tcall,
10347 deadline);
10348 } else {
10349 (void) thread_call_enter(ifp->if_dt_tcall);
10350 }
10351 }
10352 }
10353
10354 #if (DEVELOPMENT || DEBUG)
10355 /*
10356 * The sysctl variable name contains the input parameters of
10357 * ifnet_get_keepalive_offload_frames()
10358 * ifp (interface index): name[0]
10359 * frames_array_count: name[1]
10360 * frame_data_offset: name[2]
10361 * The return length gives used_frames_count
10362 */
10363 static int
10364 sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
10365 {
10366 #pragma unused(oidp)
10367 int *name = (int *)arg1;
10368 u_int namelen = arg2;
10369 int idx;
10370 ifnet_t ifp = NULL;
10371 u_int32_t frames_array_count;
10372 size_t frame_data_offset;
10373 u_int32_t used_frames_count;
10374 struct ifnet_keepalive_offload_frame *frames_array = NULL;
10375 int error = 0;
10376 u_int32_t i;
10377
10378 /*
10379 * Only root can get look at other people TCP frames
10380 */
10381 error = proc_suser(current_proc());
10382 if (error != 0) {
10383 goto done;
10384 }
10385 /*
10386 * Validate the input parameters
10387 */
10388 if (req->newptr != USER_ADDR_NULL) {
10389 error = EPERM;
10390 goto done;
10391 }
10392 if (namelen != 3) {
10393 error = EINVAL;
10394 goto done;
10395 }
10396 if (req->oldptr == USER_ADDR_NULL) {
10397 error = EINVAL;
10398 goto done;
10399 }
10400 if (req->oldlen == 0) {
10401 error = EINVAL;
10402 goto done;
10403 }
10404 idx = name[0];
10405 frames_array_count = name[1];
10406 frame_data_offset = name[2];
10407
10408 /* Make sure the passed buffer is large enough */
10409 if (frames_array_count * sizeof(struct ifnet_keepalive_offload_frame) >
10410 req->oldlen) {
10411 error = ENOMEM;
10412 goto done;
10413 }
10414
10415 ifnet_head_lock_shared();
10416 if (!IF_INDEX_IN_RANGE(idx)) {
10417 ifnet_head_done();
10418 error = ENOENT;
10419 goto done;
10420 }
10421 ifp = ifindex2ifnet[idx];
10422 ifnet_head_done();
10423
10424 frames_array = _MALLOC(frames_array_count *
10425 sizeof(struct ifnet_keepalive_offload_frame), M_TEMP, M_WAITOK);
10426 if (frames_array == NULL) {
10427 error = ENOMEM;
10428 goto done;
10429 }
10430
10431 error = ifnet_get_keepalive_offload_frames(ifp, frames_array,
10432 frames_array_count, frame_data_offset, &used_frames_count);
10433 if (error != 0) {
10434 DLIL_PRINTF("%s: ifnet_get_keepalive_offload_frames error %d\n",
10435 __func__, error);
10436 goto done;
10437 }
10438
10439 for (i = 0; i < used_frames_count; i++) {
10440 error = SYSCTL_OUT(req, frames_array + i,
10441 sizeof(struct ifnet_keepalive_offload_frame));
10442 if (error != 0) {
10443 goto done;
10444 }
10445 }
10446 done:
10447 if (frames_array != NULL) {
10448 _FREE(frames_array, M_TEMP);
10449 }
10450 return error;
10451 }
10452 #endif /* DEVELOPMENT || DEBUG */
10453
10454 void
10455 ifnet_update_stats_per_flow(struct ifnet_stats_per_flow *ifs,
10456 struct ifnet *ifp)
10457 {
10458 tcp_update_stats_per_flow(ifs, ifp);
10459 }