]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/dlil.c
8acee164d7e49b5a07134f78bba886e32cbafe7c
[apple/xnu.git] / bsd / net / dlil.c
1 /*
2 * Copyright (c) 1999-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
32 * Version 2.0.
33 */
34 #include <stddef.h>
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/socket.h>
42 #include <sys/domain.h>
43 #include <sys/user.h>
44 #include <sys/random.h>
45 #include <sys/socketvar.h>
46 #include <net/if_dl.h>
47 #include <net/if.h>
48 #include <net/route.h>
49 #include <net/if_var.h>
50 #include <net/dlil.h>
51 #include <net/if_arp.h>
52 #include <net/iptap.h>
53 #include <net/pktap.h>
54 #include <sys/kern_event.h>
55 #include <sys/kdebug.h>
56 #include <sys/mcache.h>
57 #include <sys/syslog.h>
58 #include <sys/protosw.h>
59 #include <sys/priv.h>
60
61 #include <kern/assert.h>
62 #include <kern/task.h>
63 #include <kern/thread.h>
64 #include <kern/sched_prim.h>
65 #include <kern/locks.h>
66 #include <kern/zalloc.h>
67
68 #include <net/kpi_protocol.h>
69 #include <net/if_types.h>
70 #include <net/if_ipsec.h>
71 #include <net/if_llreach.h>
72 #include <net/if_utun.h>
73 #include <net/kpi_interfacefilter.h>
74 #include <net/classq/classq.h>
75 #include <net/classq/classq_sfb.h>
76 #include <net/flowhash.h>
77 #include <net/ntstat.h>
78 #include <net/if_llatbl.h>
79 #include <net/net_api_stats.h>
80 #include <net/if_ports_used.h>
81 #include <netinet/in.h>
82 #if INET
83 #include <netinet/in_var.h>
84 #include <netinet/igmp_var.h>
85 #include <netinet/ip_var.h>
86 #include <netinet/tcp.h>
87 #include <netinet/tcp_var.h>
88 #include <netinet/udp.h>
89 #include <netinet/udp_var.h>
90 #include <netinet/if_ether.h>
91 #include <netinet/in_pcb.h>
92 #include <netinet/in_tclass.h>
93 #include <netinet/ip.h>
94 #include <netinet/ip_icmp.h>
95 #include <netinet/icmp_var.h>
96 #endif /* INET */
97
98 #if INET6
99 #include <net/nat464_utils.h>
100 #include <netinet6/in6_var.h>
101 #include <netinet6/nd6.h>
102 #include <netinet6/mld6_var.h>
103 #include <netinet6/scope6_var.h>
104 #include <netinet/ip6.h>
105 #include <netinet/icmp6.h>
106 #endif /* INET6 */
107 #include <net/pf_pbuf.h>
108 #include <libkern/OSAtomic.h>
109 #include <libkern/tree.h>
110
111 #include <dev/random/randomdev.h>
112 #include <machine/machine_routines.h>
113
114 #include <mach/thread_act.h>
115 #include <mach/sdt.h>
116
117 #if CONFIG_MACF
118 #include <sys/kauth.h>
119 #include <security/mac_framework.h>
120 #include <net/ethernet.h>
121 #include <net/firewire.h>
122 #endif
123
124 #if PF
125 #include <net/pfvar.h>
126 #endif /* PF */
127 #include <net/pktsched/pktsched.h>
128 #include <net/pktsched/pktsched_netem.h>
129
130 #if NECP
131 #include <net/necp.h>
132 #endif /* NECP */
133
134
135 #include <os/log.h>
136
137 #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
138 #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
139 #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
140 #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
141 #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
142
143 #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
144 #define MAX_LINKADDR 4 /* LONGWORDS */
145 #define M_NKE M_IFADDR
146
147 #if 1
148 #define DLIL_PRINTF printf
149 #else
150 #define DLIL_PRINTF kprintf
151 #endif
152
153 #define IF_DATA_REQUIRE_ALIGNED_64(f) \
154 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
155
156 #define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
157 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
158
159 enum {
160 kProtoKPI_v1 = 1,
161 kProtoKPI_v2 = 2
162 };
163
164 /*
165 * List of if_proto structures in if_proto_hash[] is protected by
166 * the ifnet lock. The rest of the fields are initialized at protocol
167 * attach time and never change, thus no lock required as long as
168 * a reference to it is valid, via if_proto_ref().
169 */
170 struct if_proto {
171 SLIST_ENTRY(if_proto) next_hash;
172 u_int32_t refcount;
173 u_int32_t detached;
174 struct ifnet *ifp;
175 protocol_family_t protocol_family;
176 int proto_kpi;
177 union {
178 struct {
179 proto_media_input input;
180 proto_media_preout pre_output;
181 proto_media_event event;
182 proto_media_ioctl ioctl;
183 proto_media_detached detached;
184 proto_media_resolve_multi resolve_multi;
185 proto_media_send_arp send_arp;
186 } v1;
187 struct {
188 proto_media_input_v2 input;
189 proto_media_preout pre_output;
190 proto_media_event event;
191 proto_media_ioctl ioctl;
192 proto_media_detached detached;
193 proto_media_resolve_multi resolve_multi;
194 proto_media_send_arp send_arp;
195 } v2;
196 } kpi;
197 };
198
199 SLIST_HEAD(proto_hash_entry, if_proto);
200
201 #define DLIL_SDLDATALEN \
202 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
203
204 struct dlil_ifnet {
205 struct ifnet dl_if; /* public ifnet */
206 /*
207 * DLIL private fields, protected by dl_if_lock
208 */
209 decl_lck_mtx_data(, dl_if_lock);
210 TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */
211 u_int32_t dl_if_flags; /* flags (below) */
212 u_int32_t dl_if_refcnt; /* refcnt */
213 void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */
214 void *dl_if_uniqueid; /* unique interface id */
215 size_t dl_if_uniqueid_len; /* length of the unique id */
216 char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */
217 char dl_if_xnamestorage[IFXNAMSIZ]; /* external name storage */
218 struct {
219 struct ifaddr ifa; /* lladdr ifa */
220 u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */
221 u_int8_t msdl[DLIL_SDLMAXLEN]; /* mask storage */
222 } dl_if_lladdr;
223 u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */
224 struct dlil_threading_info dl_if_inpstorage; /* input thread storage */
225 ctrace_t dl_if_attach; /* attach PC stacktrace */
226 ctrace_t dl_if_detach; /* detach PC stacktrace */
227 };
228
229 /* Values for dl_if_flags (private to DLIL) */
230 #define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
231 #define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
232 #define DLIF_DEBUG 0x4 /* has debugging info */
233
234 #define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
235
236 /* For gdb */
237 __private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
238
239 struct dlil_ifnet_dbg {
240 struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */
241 u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */
242 u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */
243 /*
244 * Circular lists of ifnet_{reference,release} callers.
245 */
246 ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE];
247 ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE];
248 };
249
250 #define DLIL_TO_IFP(s) (&s->dl_if)
251 #define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
252
253 struct ifnet_filter {
254 TAILQ_ENTRY(ifnet_filter) filt_next;
255 u_int32_t filt_skip;
256 u_int32_t filt_flags;
257 ifnet_t filt_ifp;
258 const char *filt_name;
259 void *filt_cookie;
260 protocol_family_t filt_protocol;
261 iff_input_func filt_input;
262 iff_output_func filt_output;
263 iff_event_func filt_event;
264 iff_ioctl_func filt_ioctl;
265 iff_detached_func filt_detached;
266 };
267
268 struct proto_input_entry;
269
270 static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head;
271 static lck_grp_t *dlil_lock_group;
272 lck_grp_t *ifnet_lock_group;
273 static lck_grp_t *ifnet_head_lock_group;
274 static lck_grp_t *ifnet_snd_lock_group;
275 static lck_grp_t *ifnet_rcv_lock_group;
276 lck_attr_t *ifnet_lock_attr;
277 decl_lck_rw_data(static, ifnet_head_lock);
278 decl_lck_mtx_data(static, dlil_ifnet_lock);
279 u_int32_t dlil_filter_disable_tso_count = 0;
280
281 #if DEBUG
282 static unsigned int ifnet_debug = 1; /* debugging (enabled) */
283 #else
284 static unsigned int ifnet_debug; /* debugging (disabled) */
285 #endif /* !DEBUG */
286 static unsigned int dlif_size; /* size of dlil_ifnet to allocate */
287 static unsigned int dlif_bufsize; /* size of dlif_size + headroom */
288 static struct zone *dlif_zone; /* zone for dlil_ifnet */
289
290 #define DLIF_ZONE_MAX IFNETS_MAX /* maximum elements in zone */
291 #define DLIF_ZONE_NAME "ifnet" /* zone name */
292
293 static unsigned int dlif_filt_size; /* size of ifnet_filter */
294 static struct zone *dlif_filt_zone; /* zone for ifnet_filter */
295
296 #define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
297 #define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
298
299 static unsigned int dlif_phash_size; /* size of ifnet proto hash table */
300 static struct zone *dlif_phash_zone; /* zone for ifnet proto hash table */
301
302 #define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
303 #define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
304
305 static unsigned int dlif_proto_size; /* size of if_proto */
306 static struct zone *dlif_proto_zone; /* zone for if_proto */
307
308 #define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
309 #define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
310
311 static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */
312 static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */
313 static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */
314
315 #define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
316 #define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
317
318 static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */
319 static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */
320 static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */
321
322 #define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
323 #define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
324
325 static u_int32_t net_rtref;
326
327 static struct dlil_main_threading_info dlil_main_input_thread_info;
328 __private_extern__ struct dlil_threading_info *dlil_main_input_thread =
329 (struct dlil_threading_info *)&dlil_main_input_thread_info;
330
331 static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg, bool update_generation);
332 static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
333 static void dlil_if_trace(struct dlil_ifnet *, int);
334 static void if_proto_ref(struct if_proto *);
335 static void if_proto_free(struct if_proto *);
336 static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t);
337 static u_int32_t dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
338 u_int32_t list_count);
339 static void if_flt_monitor_busy(struct ifnet *);
340 static void if_flt_monitor_unbusy(struct ifnet *);
341 static void if_flt_monitor_enter(struct ifnet *);
342 static void if_flt_monitor_leave(struct ifnet *);
343 static int dlil_interface_filters_input(struct ifnet *, struct mbuf **,
344 char **, protocol_family_t);
345 static int dlil_interface_filters_output(struct ifnet *, struct mbuf **,
346 protocol_family_t);
347 static struct ifaddr *dlil_alloc_lladdr(struct ifnet *,
348 const struct sockaddr_dl *);
349 static int ifnet_lookup(struct ifnet *);
350 static void if_purgeaddrs(struct ifnet *);
351
352 static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
353 struct mbuf *, char *);
354 static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
355 struct mbuf *);
356 static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
357 mbuf_t *, const struct sockaddr *, void *, char *, char *);
358 static void ifproto_media_event(struct ifnet *, protocol_family_t,
359 const struct kev_msg *);
360 static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
361 unsigned long, void *);
362 static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
363 struct sockaddr_dl *, size_t);
364 static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
365 const struct sockaddr_dl *, const struct sockaddr *,
366 const struct sockaddr_dl *, const struct sockaddr *);
367
368 static errno_t ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
369 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
370 boolean_t poll, struct thread *tp);
371 static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
372 struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
373 static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
374 static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
375 protocol_family_t *);
376 static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
377 const struct ifnet_demux_desc *, u_int32_t);
378 static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
379 static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
380 #if CONFIG_EMBEDDED
381 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
382 const struct sockaddr *, const char *, const char *,
383 u_int32_t *, u_int32_t *);
384 #else
385 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
386 const struct sockaddr *, const char *, const char *);
387 #endif /* CONFIG_EMBEDDED */
388 static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
389 const struct sockaddr *, const char *, const char *,
390 u_int32_t *, u_int32_t *);
391 static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
392 static void ifp_if_free(struct ifnet *);
393 static void ifp_if_event(struct ifnet *, const struct kev_msg *);
394 static __inline void ifp_inc_traffic_class_in(struct ifnet *, struct mbuf *);
395 static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *);
396
397 static void dlil_main_input_thread_func(void *, wait_result_t);
398 static void dlil_main_input_thread_cont(void *, wait_result_t);
399
400 static void dlil_input_thread_func(void *, wait_result_t);
401 static void dlil_input_thread_cont(void *, wait_result_t);
402
403 static void dlil_rxpoll_input_thread_func(void *, wait_result_t);
404 static void dlil_rxpoll_input_thread_cont(void *, wait_result_t);
405
406 static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *);
407 static void dlil_terminate_input_thread(struct dlil_threading_info *);
408 static void dlil_input_stats_add(const struct ifnet_stat_increment_param *,
409 struct dlil_threading_info *, struct ifnet *, boolean_t);
410 static boolean_t dlil_input_stats_sync(struct ifnet *,
411 struct dlil_threading_info *);
412 static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *,
413 u_int32_t, ifnet_model_t, boolean_t);
414 static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
415 const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
416 static int dlil_is_clat_needed(protocol_family_t, mbuf_t );
417 static errno_t dlil_clat46(ifnet_t, protocol_family_t *, mbuf_t *);
418 static errno_t dlil_clat64(ifnet_t, protocol_family_t *, mbuf_t *);
419 #if DEBUG || DEVELOPMENT
420 static void dlil_verify_sum16(void);
421 #endif /* DEBUG || DEVELOPMENT */
422 static void dlil_output_cksum_dbg(struct ifnet *, struct mbuf *, uint32_t,
423 protocol_family_t);
424 static void dlil_input_cksum_dbg(struct ifnet *, struct mbuf *, char *,
425 protocol_family_t);
426
427 static void dlil_incr_pending_thread_count(void);
428 static void dlil_decr_pending_thread_count(void);
429
430 static void ifnet_detacher_thread_func(void *, wait_result_t);
431 static int ifnet_detacher_thread_cont(int);
432 static void ifnet_detach_final(struct ifnet *);
433 static void ifnet_detaching_enqueue(struct ifnet *);
434 static struct ifnet *ifnet_detaching_dequeue(void);
435
436 static void ifnet_start_thread_func(void *, wait_result_t);
437 static void ifnet_start_thread_cont(void *, wait_result_t);
438
439 static void ifnet_poll_thread_func(void *, wait_result_t);
440 static void ifnet_poll_thread_cont(void *, wait_result_t);
441
442 static errno_t ifnet_enqueue_common(struct ifnet *, classq_pkt_t *,
443 boolean_t, boolean_t *);
444
445 static void ifp_src_route_copyout(struct ifnet *, struct route *);
446 static void ifp_src_route_copyin(struct ifnet *, struct route *);
447 #if INET6
448 static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
449 static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
450 #endif /* INET6 */
451
452 static int sysctl_rxpoll SYSCTL_HANDLER_ARGS;
453 static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS;
454 static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS;
455 static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS;
456 static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS;
457 static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS;
458 static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS;
459 static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS;
460 static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS;
461 static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS;
462 static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS;
463
464 struct chain_len_stats tx_chain_len_stats;
465 static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS;
466
467 #if TEST_INPUT_THREAD_TERMINATION
468 static int sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS;
469 #endif /* TEST_INPUT_THREAD_TERMINATION */
470
471 /* The following are protected by dlil_ifnet_lock */
472 static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
473 static u_int32_t ifnet_detaching_cnt;
474 static void *ifnet_delayed_run; /* wait channel for detaching thread */
475
476 decl_lck_mtx_data(static, ifnet_fc_lock);
477
478 static uint32_t ifnet_flowhash_seed;
479
480 struct ifnet_flowhash_key {
481 char ifk_name[IFNAMSIZ];
482 uint32_t ifk_unit;
483 uint32_t ifk_flags;
484 uint32_t ifk_eflags;
485 uint32_t ifk_capabilities;
486 uint32_t ifk_capenable;
487 uint32_t ifk_output_sched_model;
488 uint32_t ifk_rand1;
489 uint32_t ifk_rand2;
490 };
491
492 /* Flow control entry per interface */
493 struct ifnet_fc_entry {
494 RB_ENTRY(ifnet_fc_entry) ifce_entry;
495 u_int32_t ifce_flowhash;
496 struct ifnet *ifce_ifp;
497 };
498
499 static uint32_t ifnet_calc_flowhash(struct ifnet *);
500 static int ifce_cmp(const struct ifnet_fc_entry *,
501 const struct ifnet_fc_entry *);
502 static int ifnet_fc_add(struct ifnet *);
503 static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t);
504 static void ifnet_fc_entry_free(struct ifnet_fc_entry *);
505
506 /* protected by ifnet_fc_lock */
507 RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree;
508 RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
509 RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
510
511 static unsigned int ifnet_fc_zone_size; /* sizeof ifnet_fc_entry */
512 static struct zone *ifnet_fc_zone; /* ifnet_fc_entry zone */
513
514 #define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
515 #define IFNET_FC_ZONE_MAX 32
516
517 extern void bpfdetach(struct ifnet *);
518 extern void proto_input_run(void);
519
520 extern uint32_t udp_count_opportunistic(unsigned int ifindex,
521 u_int32_t flags);
522 extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
523 u_int32_t flags);
524
525 __private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
526
527 #if CONFIG_MACF
528 #ifdef CONFIG_EMBEDDED
529 int dlil_lladdr_ckreq = 1;
530 #else
531 int dlil_lladdr_ckreq = 0;
532 #endif
533 #endif
534
535 #if DEBUG
536 int dlil_verbose = 1;
537 #else
538 int dlil_verbose = 0;
539 #endif /* DEBUG */
540 #if IFNET_INPUT_SANITY_CHK
541 /* sanity checking of input packet lists received */
542 static u_int32_t dlil_input_sanity_check = 0;
543 #endif /* IFNET_INPUT_SANITY_CHK */
544 /* rate limit debug messages */
545 struct timespec dlil_dbgrate = { .tv_sec = 1, .tv_nsec = 0 };
546
547 SYSCTL_DECL(_net_link_generic_system);
548
549 SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose,
550 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages");
551
552 #define IF_SNDQ_MINLEN 32
553 u_int32_t if_sndq_maxlen = IFQ_MAXLEN;
554 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen,
555 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sndq_maxlen, IFQ_MAXLEN,
556 sysctl_sndq_maxlen, "I", "Default transmit queue max length");
557
558 #define IF_RCVQ_MINLEN 32
559 #define IF_RCVQ_MAXLEN 256
560 u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN;
561 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen,
562 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN,
563 sysctl_rcvq_maxlen, "I", "Default receive queue max length");
564
565 #define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
566 u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY;
567 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay,
568 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY,
569 "ilog2 of EWMA decay rate of avg inbound packets");
570
571 #define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
572 #define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
573 static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME;
574 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time,
575 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime,
576 IF_RXPOLL_MODE_HOLDTIME, sysctl_rxpoll_mode_holdtime,
577 "Q", "input poll mode freeze time");
578
579 #define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
580 #define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
581 static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME;
582 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_sample_time,
583 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime,
584 IF_RXPOLL_SAMPLETIME, sysctl_rxpoll_sample_holdtime,
585 "Q", "input poll sampling time");
586
587 static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVALTIME;
588 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_interval_time,
589 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time,
590 IF_RXPOLL_INTERVALTIME, sysctl_rxpoll_interval_time,
591 "Q", "input poll interval (time)");
592
593 #define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
594 u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS;
595 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts,
596 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts,
597 IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)");
598
599 #define IF_RXPOLL_WLOWAT 10
600 static u_int32_t if_sysctl_rxpoll_wlowat = IF_RXPOLL_WLOWAT;
601 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat,
602 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sysctl_rxpoll_wlowat,
603 IF_RXPOLL_WLOWAT, sysctl_rxpoll_wlowat,
604 "I", "input poll wakeup low watermark");
605
606 #define IF_RXPOLL_WHIWAT 100
607 static u_int32_t if_sysctl_rxpoll_whiwat = IF_RXPOLL_WHIWAT;
608 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat,
609 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sysctl_rxpoll_whiwat,
610 IF_RXPOLL_WHIWAT, sysctl_rxpoll_whiwat,
611 "I", "input poll wakeup high watermark");
612
613 static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */
614 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max,
615 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_max, 0,
616 "max packets per poll call");
617
618 u_int32_t if_rxpoll = 1;
619 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll,
620 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll, 0,
621 sysctl_rxpoll, "I", "enable opportunistic input polling");
622
623 #if TEST_INPUT_THREAD_TERMINATION
624 static u_int32_t if_input_thread_termination_spin = 0;
625 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, input_thread_termination_spin,
626 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
627 &if_input_thread_termination_spin, 0,
628 sysctl_input_thread_termination_spin,
629 "I", "input thread termination spin limit");
630 #endif /* TEST_INPUT_THREAD_TERMINATION */
631
632 static u_int32_t cur_dlil_input_threads = 0;
633 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_threads,
634 CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads, 0,
635 "Current number of DLIL input threads");
636
637 #if IFNET_INPUT_SANITY_CHK
638 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check,
639 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check, 0,
640 "Turn on sanity checking in DLIL input");
641 #endif /* IFNET_INPUT_SANITY_CHK */
642
643 static u_int32_t if_flowadv = 1;
644 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, flow_advisory,
645 CTLFLAG_RW | CTLFLAG_LOCKED, &if_flowadv, 1,
646 "enable flow-advisory mechanism");
647
648 static u_int32_t if_delaybased_queue = 1;
649 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, delaybased_queue,
650 CTLFLAG_RW | CTLFLAG_LOCKED, &if_delaybased_queue, 1,
651 "enable delay based dynamic queue sizing");
652
653 static uint64_t hwcksum_in_invalidated = 0;
654 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
655 hwcksum_in_invalidated, CTLFLAG_RD | CTLFLAG_LOCKED,
656 &hwcksum_in_invalidated, "inbound packets with invalidated hardware cksum");
657
658 uint32_t hwcksum_dbg = 0;
659 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_dbg,
660 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg, 0,
661 "enable hardware cksum debugging");
662
663 u_int32_t ifnet_start_delayed = 0;
664 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delayed,
665 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_start_delayed, 0,
666 "number of times start was delayed");
667
668 u_int32_t ifnet_delay_start_disabled = 0;
669 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delay_disabled,
670 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_delay_start_disabled, 0,
671 "number of times start was delayed");
672
673 #define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
674 #define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
675 #define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
676 #define HWCKSUM_DBG_MASK \
677 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
678 HWCKSUM_DBG_FINALIZE_FORCED)
679
680 static uint32_t hwcksum_dbg_mode = 0;
681 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_mode,
682 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_mode,
683 0, sysctl_hwcksum_dbg_mode, "I", "hardware cksum debugging mode");
684
685 static uint64_t hwcksum_dbg_partial_forced = 0;
686 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
687 hwcksum_dbg_partial_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
688 &hwcksum_dbg_partial_forced, "packets forced using partial cksum");
689
690 static uint64_t hwcksum_dbg_partial_forced_bytes = 0;
691 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
692 hwcksum_dbg_partial_forced_bytes, CTLFLAG_RD | CTLFLAG_LOCKED,
693 &hwcksum_dbg_partial_forced_bytes, "bytes forced using partial cksum");
694
695 static uint32_t hwcksum_dbg_partial_rxoff_forced = 0;
696 SYSCTL_PROC(_net_link_generic_system, OID_AUTO,
697 hwcksum_dbg_partial_rxoff_forced, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
698 &hwcksum_dbg_partial_rxoff_forced, 0,
699 sysctl_hwcksum_dbg_partial_rxoff_forced, "I",
700 "forced partial cksum rx offset");
701
702 static uint32_t hwcksum_dbg_partial_rxoff_adj = 0;
703 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_partial_rxoff_adj,
704 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_partial_rxoff_adj,
705 0, sysctl_hwcksum_dbg_partial_rxoff_adj, "I",
706 "adjusted partial cksum rx offset");
707
708 static uint64_t hwcksum_dbg_verified = 0;
709 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
710 hwcksum_dbg_verified, CTLFLAG_RD | CTLFLAG_LOCKED,
711 &hwcksum_dbg_verified, "packets verified for having good checksum");
712
713 static uint64_t hwcksum_dbg_bad_cksum = 0;
714 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
715 hwcksum_dbg_bad_cksum, CTLFLAG_RD | CTLFLAG_LOCKED,
716 &hwcksum_dbg_bad_cksum, "packets with bad hardware calculated checksum");
717
718 static uint64_t hwcksum_dbg_bad_rxoff = 0;
719 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
720 hwcksum_dbg_bad_rxoff, CTLFLAG_RD | CTLFLAG_LOCKED,
721 &hwcksum_dbg_bad_rxoff, "packets with invalid rxoff");
722
723 static uint64_t hwcksum_dbg_adjusted = 0;
724 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
725 hwcksum_dbg_adjusted, CTLFLAG_RD | CTLFLAG_LOCKED,
726 &hwcksum_dbg_adjusted, "packets with rxoff adjusted");
727
728 static uint64_t hwcksum_dbg_finalized_hdr = 0;
729 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
730 hwcksum_dbg_finalized_hdr, CTLFLAG_RD | CTLFLAG_LOCKED,
731 &hwcksum_dbg_finalized_hdr, "finalized headers");
732
733 static uint64_t hwcksum_dbg_finalized_data = 0;
734 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
735 hwcksum_dbg_finalized_data, CTLFLAG_RD | CTLFLAG_LOCKED,
736 &hwcksum_dbg_finalized_data, "finalized payloads");
737
738 uint32_t hwcksum_tx = 1;
739 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_tx,
740 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_tx, 0,
741 "enable transmit hardware checksum offload");
742
743 uint32_t hwcksum_rx = 1;
744 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_rx,
745 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_rx, 0,
746 "enable receive hardware checksum offload");
747
748 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, tx_chain_len_stats,
749 CTLFLAG_RD | CTLFLAG_LOCKED, 0, 9,
750 sysctl_tx_chain_len_stats, "S", "");
751
752 uint32_t tx_chain_len_count = 0;
753 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, tx_chain_len_count,
754 CTLFLAG_RW | CTLFLAG_LOCKED, &tx_chain_len_count, 0, "");
755
756 static uint32_t threshold_notify = 1; /* enable/disable */
757 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_notify,
758 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_notify, 0, "");
759
760 static uint32_t threshold_interval = 2; /* in seconds */
761 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_interval,
762 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_interval, 0, "");
763
764 #if (DEVELOPMENT || DEBUG)
765 static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS;
766 SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_kao_frames,
767 CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_kao_frames, "");
768 #endif /* DEVELOPMENT || DEBUG */
769
770 struct net_api_stats net_api_stats;
771 SYSCTL_STRUCT(_net, OID_AUTO, api_stats, CTLFLAG_RD | CTLFLAG_LOCKED,
772 &net_api_stats, net_api_stats, "");
773
774
775 unsigned int net_rxpoll = 1;
776 unsigned int net_affinity = 1;
777 static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
778
779 extern u_int32_t inject_buckets;
780
781 static lck_grp_attr_t *dlil_grp_attributes = NULL;
782 static lck_attr_t *dlil_lck_attributes = NULL;
783
784 /* DLIL data threshold thread call */
785 static void dlil_dt_tcall_fn(thread_call_param_t, thread_call_param_t);
786
787 void
788 ifnet_filter_update_tso(boolean_t filter_enable)
789 {
790 /*
791 * update filter count and route_generation ID to let TCP
792 * know it should reevalute doing TSO or not
793 */
794 OSAddAtomic(filter_enable ? 1 : -1, &dlil_filter_disable_tso_count);
795 routegenid_update();
796 }
797
798
799 #define DLIL_INPUT_CHECK(m, ifp) { \
800 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
801 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
802 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
803 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
804 /* NOTREACHED */ \
805 } \
806 }
807
808 #define DLIL_EWMA(old, new, decay) do { \
809 u_int32_t _avg; \
810 if ((_avg = (old)) > 0) \
811 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
812 else \
813 _avg = (new); \
814 (old) = _avg; \
815 } while (0)
816
817 #define MBPS (1ULL * 1000 * 1000)
818 #define GBPS (MBPS * 1000)
819
820 struct rxpoll_time_tbl {
821 u_int64_t speed; /* downlink speed */
822 u_int32_t plowat; /* packets low watermark */
823 u_int32_t phiwat; /* packets high watermark */
824 u_int32_t blowat; /* bytes low watermark */
825 u_int32_t bhiwat; /* bytes high watermark */
826 };
827
828 static struct rxpoll_time_tbl rxpoll_tbl[] = {
829 { .speed = 10 * MBPS, .plowat = 2, .phiwat = 8, .blowat = (1 * 1024), .bhiwat = (6 * 1024) },
830 { .speed = 100 * MBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
831 { .speed = 1 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
832 { .speed = 10 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
833 { .speed = 100 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
834 { .speed = 0, .plowat = 0, .phiwat = 0, .blowat = 0, .bhiwat = 0 }
835 };
836
837 decl_lck_mtx_data(static, dlil_thread_sync_lock);
838 static uint32_t dlil_pending_thread_cnt = 0;
839 static void
840 dlil_incr_pending_thread_count(void)
841 {
842 LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_MTX_ASSERT_NOTOWNED);
843 lck_mtx_lock(&dlil_thread_sync_lock);
844 dlil_pending_thread_cnt++;
845 lck_mtx_unlock(&dlil_thread_sync_lock);
846 }
847
848 static void
849 dlil_decr_pending_thread_count(void)
850 {
851 LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_MTX_ASSERT_NOTOWNED);
852 lck_mtx_lock(&dlil_thread_sync_lock);
853 VERIFY(dlil_pending_thread_cnt > 0);
854 dlil_pending_thread_cnt--;
855 if (dlil_pending_thread_cnt == 0) {
856 wakeup(&dlil_pending_thread_cnt);
857 }
858 lck_mtx_unlock(&dlil_thread_sync_lock);
859 }
860
861 int
862 proto_hash_value(u_int32_t protocol_family)
863 {
864 /*
865 * dlil_proto_unplumb_all() depends on the mapping between
866 * the hash bucket index and the protocol family defined
867 * here; future changes must be applied there as well.
868 */
869 switch (protocol_family) {
870 case PF_INET:
871 return 0;
872 case PF_INET6:
873 return 1;
874 case PF_VLAN:
875 return 2;
876 case PF_802154:
877 return 3;
878 case PF_UNSPEC:
879 default:
880 return 4;
881 }
882 }
883
884 /*
885 * Caller must already be holding ifnet lock.
886 */
887 static struct if_proto *
888 find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family)
889 {
890 struct if_proto *proto = NULL;
891 u_int32_t i = proto_hash_value(protocol_family);
892
893 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
894
895 if (ifp->if_proto_hash != NULL) {
896 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
897 }
898
899 while (proto != NULL && proto->protocol_family != protocol_family) {
900 proto = SLIST_NEXT(proto, next_hash);
901 }
902
903 if (proto != NULL) {
904 if_proto_ref(proto);
905 }
906
907 return proto;
908 }
909
910 static void
911 if_proto_ref(struct if_proto *proto)
912 {
913 atomic_add_32(&proto->refcount, 1);
914 }
915
916 extern void if_rtproto_del(struct ifnet *ifp, int protocol);
917
918 static void
919 if_proto_free(struct if_proto *proto)
920 {
921 u_int32_t oldval;
922 struct ifnet *ifp = proto->ifp;
923 u_int32_t proto_family = proto->protocol_family;
924 struct kev_dl_proto_data ev_pr_data;
925
926 oldval = atomic_add_32_ov(&proto->refcount, -1);
927 if (oldval > 1) {
928 return;
929 }
930
931 /* No more reference on this, protocol must have been detached */
932 VERIFY(proto->detached);
933
934 if (proto->proto_kpi == kProtoKPI_v1) {
935 if (proto->kpi.v1.detached) {
936 proto->kpi.v1.detached(ifp, proto->protocol_family);
937 }
938 }
939 if (proto->proto_kpi == kProtoKPI_v2) {
940 if (proto->kpi.v2.detached) {
941 proto->kpi.v2.detached(ifp, proto->protocol_family);
942 }
943 }
944
945 /*
946 * Cleanup routes that may still be in the routing table for that
947 * interface/protocol pair.
948 */
949 if_rtproto_del(ifp, proto_family);
950
951 /*
952 * The reserved field carries the number of protocol still attached
953 * (subject to change)
954 */
955 ifnet_lock_shared(ifp);
956 ev_pr_data.proto_family = proto_family;
957 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
958 ifnet_lock_done(ifp);
959
960 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
961 (struct net_event_data *)&ev_pr_data,
962 sizeof(struct kev_dl_proto_data));
963
964 if (ev_pr_data.proto_remaining_count == 0) {
965 /*
966 * The protocol count has gone to zero, mark the interface down.
967 * This used to be done by configd.KernelEventMonitor, but that
968 * is inherently prone to races (rdar://problem/30810208).
969 */
970 (void) ifnet_set_flags(ifp, 0, IFF_UP);
971 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
972 dlil_post_sifflags_msg(ifp);
973 }
974
975 zfree(dlif_proto_zone, proto);
976 }
977
978 __private_extern__ void
979 ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what)
980 {
981 #if !MACH_ASSERT
982 #pragma unused(ifp)
983 #endif
984 unsigned int type = 0;
985 int ass = 1;
986
987 switch (what) {
988 case IFNET_LCK_ASSERT_EXCLUSIVE:
989 type = LCK_RW_ASSERT_EXCLUSIVE;
990 break;
991
992 case IFNET_LCK_ASSERT_SHARED:
993 type = LCK_RW_ASSERT_SHARED;
994 break;
995
996 case IFNET_LCK_ASSERT_OWNED:
997 type = LCK_RW_ASSERT_HELD;
998 break;
999
1000 case IFNET_LCK_ASSERT_NOTOWNED:
1001 /* nothing to do here for RW lock; bypass assert */
1002 ass = 0;
1003 break;
1004
1005 default:
1006 panic("bad ifnet assert type: %d", what);
1007 /* NOTREACHED */
1008 }
1009 if (ass) {
1010 LCK_RW_ASSERT(&ifp->if_lock, type);
1011 }
1012 }
1013
1014 __private_extern__ void
1015 ifnet_lock_shared(struct ifnet *ifp)
1016 {
1017 lck_rw_lock_shared(&ifp->if_lock);
1018 }
1019
1020 __private_extern__ void
1021 ifnet_lock_exclusive(struct ifnet *ifp)
1022 {
1023 lck_rw_lock_exclusive(&ifp->if_lock);
1024 }
1025
1026 __private_extern__ void
1027 ifnet_lock_done(struct ifnet *ifp)
1028 {
1029 lck_rw_done(&ifp->if_lock);
1030 }
1031
1032 #if INET
1033 __private_extern__ void
1034 if_inetdata_lock_shared(struct ifnet *ifp)
1035 {
1036 lck_rw_lock_shared(&ifp->if_inetdata_lock);
1037 }
1038
1039 __private_extern__ void
1040 if_inetdata_lock_exclusive(struct ifnet *ifp)
1041 {
1042 lck_rw_lock_exclusive(&ifp->if_inetdata_lock);
1043 }
1044
1045 __private_extern__ void
1046 if_inetdata_lock_done(struct ifnet *ifp)
1047 {
1048 lck_rw_done(&ifp->if_inetdata_lock);
1049 }
1050 #endif
1051
1052 #if INET6
1053 __private_extern__ void
1054 if_inet6data_lock_shared(struct ifnet *ifp)
1055 {
1056 lck_rw_lock_shared(&ifp->if_inet6data_lock);
1057 }
1058
1059 __private_extern__ void
1060 if_inet6data_lock_exclusive(struct ifnet *ifp)
1061 {
1062 lck_rw_lock_exclusive(&ifp->if_inet6data_lock);
1063 }
1064
1065 __private_extern__ void
1066 if_inet6data_lock_done(struct ifnet *ifp)
1067 {
1068 lck_rw_done(&ifp->if_inet6data_lock);
1069 }
1070 #endif
1071
1072 __private_extern__ void
1073 ifnet_head_lock_shared(void)
1074 {
1075 lck_rw_lock_shared(&ifnet_head_lock);
1076 }
1077
1078 __private_extern__ void
1079 ifnet_head_lock_exclusive(void)
1080 {
1081 lck_rw_lock_exclusive(&ifnet_head_lock);
1082 }
1083
1084 __private_extern__ void
1085 ifnet_head_done(void)
1086 {
1087 lck_rw_done(&ifnet_head_lock);
1088 }
1089
1090 __private_extern__ void
1091 ifnet_head_assert_exclusive(void)
1092 {
1093 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_EXCLUSIVE);
1094 }
1095
1096 /*
1097 * dlil_ifp_protolist
1098 * - get the list of protocols attached to the interface, or just the number
1099 * of attached protocols
1100 * - if the number returned is greater than 'list_count', truncation occurred
1101 *
1102 * Note:
1103 * - caller must already be holding ifnet lock.
1104 */
1105 static u_int32_t
1106 dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
1107 u_int32_t list_count)
1108 {
1109 u_int32_t count = 0;
1110 int i;
1111
1112 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
1113
1114 if (ifp->if_proto_hash == NULL) {
1115 goto done;
1116 }
1117
1118 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
1119 struct if_proto *proto;
1120 SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
1121 if (list != NULL && count < list_count) {
1122 list[count] = proto->protocol_family;
1123 }
1124 count++;
1125 }
1126 }
1127 done:
1128 return count;
1129 }
1130
1131 __private_extern__ u_int32_t
1132 if_get_protolist(struct ifnet * ifp, u_int32_t *protolist, u_int32_t count)
1133 {
1134 ifnet_lock_shared(ifp);
1135 count = dlil_ifp_protolist(ifp, protolist, count);
1136 ifnet_lock_done(ifp);
1137 return count;
1138 }
1139
1140 __private_extern__ void
1141 if_free_protolist(u_int32_t *list)
1142 {
1143 _FREE(list, M_TEMP);
1144 }
1145
1146 __private_extern__ int
1147 dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
1148 u_int32_t event_code, struct net_event_data *event_data,
1149 u_int32_t event_data_len)
1150 {
1151 struct net_event_data ev_data;
1152 struct kev_msg ev_msg;
1153
1154 bzero(&ev_msg, sizeof(ev_msg));
1155 bzero(&ev_data, sizeof(ev_data));
1156 /*
1157 * a net event always starts with a net_event_data structure
1158 * but the caller can generate a simple net event or
1159 * provide a longer event structure to post
1160 */
1161 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1162 ev_msg.kev_class = KEV_NETWORK_CLASS;
1163 ev_msg.kev_subclass = event_subclass;
1164 ev_msg.event_code = event_code;
1165
1166 if (event_data == NULL) {
1167 event_data = &ev_data;
1168 event_data_len = sizeof(struct net_event_data);
1169 }
1170
1171 strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
1172 event_data->if_family = ifp->if_family;
1173 event_data->if_unit = (u_int32_t)ifp->if_unit;
1174
1175 ev_msg.dv[0].data_length = event_data_len;
1176 ev_msg.dv[0].data_ptr = event_data;
1177 ev_msg.dv[1].data_length = 0;
1178
1179 bool update_generation = true;
1180 if (event_subclass == KEV_DL_SUBCLASS) {
1181 /* Don't update interface generation for frequent link quality and state changes */
1182 switch (event_code) {
1183 case KEV_DL_LINK_QUALITY_METRIC_CHANGED:
1184 case KEV_DL_RRC_STATE_CHANGED:
1185 case KEV_DL_NODE_PRESENCE:
1186 case KEV_DL_NODE_ABSENCE:
1187 case KEV_DL_MASTER_ELECTED:
1188 update_generation = false;
1189 break;
1190 default:
1191 break;
1192 }
1193 }
1194
1195 return dlil_event_internal(ifp, &ev_msg, update_generation);
1196 }
1197
1198 __private_extern__ int
1199 dlil_alloc_local_stats(struct ifnet *ifp)
1200 {
1201 int ret = EINVAL;
1202 void *buf, *base, **pbuf;
1203
1204 if (ifp == NULL) {
1205 goto end;
1206 }
1207
1208 if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) {
1209 /* allocate tcpstat_local structure */
1210 buf = zalloc(dlif_tcpstat_zone);
1211 if (buf == NULL) {
1212 ret = ENOMEM;
1213 goto end;
1214 }
1215 bzero(buf, dlif_tcpstat_bufsize);
1216
1217 /* Get the 64-bit aligned base address for this object */
1218 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
1219 sizeof(u_int64_t));
1220 VERIFY(((intptr_t)base + dlif_tcpstat_size) <=
1221 ((intptr_t)buf + dlif_tcpstat_bufsize));
1222
1223 /*
1224 * Wind back a pointer size from the aligned base and
1225 * save the original address so we can free it later.
1226 */
1227 pbuf = (void **)((intptr_t)base - sizeof(void *));
1228 *pbuf = buf;
1229 ifp->if_tcp_stat = base;
1230
1231 /* allocate udpstat_local structure */
1232 buf = zalloc(dlif_udpstat_zone);
1233 if (buf == NULL) {
1234 ret = ENOMEM;
1235 goto end;
1236 }
1237 bzero(buf, dlif_udpstat_bufsize);
1238
1239 /* Get the 64-bit aligned base address for this object */
1240 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
1241 sizeof(u_int64_t));
1242 VERIFY(((intptr_t)base + dlif_udpstat_size) <=
1243 ((intptr_t)buf + dlif_udpstat_bufsize));
1244
1245 /*
1246 * Wind back a pointer size from the aligned base and
1247 * save the original address so we can free it later.
1248 */
1249 pbuf = (void **)((intptr_t)base - sizeof(void *));
1250 *pbuf = buf;
1251 ifp->if_udp_stat = base;
1252
1253 VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof(u_int64_t)) &&
1254 IS_P2ALIGNED(ifp->if_udp_stat, sizeof(u_int64_t)));
1255
1256 ret = 0;
1257 }
1258
1259 if (ifp->if_ipv4_stat == NULL) {
1260 MALLOC(ifp->if_ipv4_stat, struct if_tcp_ecn_stat *,
1261 sizeof(struct if_tcp_ecn_stat), M_TEMP, M_WAITOK | M_ZERO);
1262 if (ifp->if_ipv4_stat == NULL) {
1263 ret = ENOMEM;
1264 goto end;
1265 }
1266 }
1267
1268 if (ifp->if_ipv6_stat == NULL) {
1269 MALLOC(ifp->if_ipv6_stat, struct if_tcp_ecn_stat *,
1270 sizeof(struct if_tcp_ecn_stat), M_TEMP, M_WAITOK | M_ZERO);
1271 if (ifp->if_ipv6_stat == NULL) {
1272 ret = ENOMEM;
1273 goto end;
1274 }
1275 }
1276 end:
1277 if (ifp != NULL && ret != 0) {
1278 if (ifp->if_tcp_stat != NULL) {
1279 pbuf = (void **)
1280 ((intptr_t)ifp->if_tcp_stat - sizeof(void *));
1281 zfree(dlif_tcpstat_zone, *pbuf);
1282 ifp->if_tcp_stat = NULL;
1283 }
1284 if (ifp->if_udp_stat != NULL) {
1285 pbuf = (void **)
1286 ((intptr_t)ifp->if_udp_stat - sizeof(void *));
1287 zfree(dlif_udpstat_zone, *pbuf);
1288 ifp->if_udp_stat = NULL;
1289 }
1290 if (ifp->if_ipv4_stat != NULL) {
1291 FREE(ifp->if_ipv4_stat, M_TEMP);
1292 ifp->if_ipv4_stat = NULL;
1293 }
1294 if (ifp->if_ipv6_stat != NULL) {
1295 FREE(ifp->if_ipv6_stat, M_TEMP);
1296 ifp->if_ipv6_stat = NULL;
1297 }
1298 }
1299
1300 return ret;
1301 }
1302
1303 static void
1304 dlil_reset_rxpoll_params(ifnet_t ifp)
1305 {
1306 ASSERT(ifp != NULL);
1307 ifnet_set_poll_cycle(ifp, NULL);
1308 ifp->if_poll_update = 0;
1309 ifp->if_poll_flags = 0;
1310 ifp->if_poll_req = 0;
1311 ifp->if_poll_mode = IFNET_MODEL_INPUT_POLL_OFF;
1312 bzero(&ifp->if_poll_tstats, sizeof(ifp->if_poll_tstats));
1313 bzero(&ifp->if_poll_pstats, sizeof(ifp->if_poll_pstats));
1314 bzero(&ifp->if_poll_sstats, sizeof(ifp->if_poll_sstats));
1315 net_timerclear(&ifp->if_poll_mode_holdtime);
1316 net_timerclear(&ifp->if_poll_mode_lasttime);
1317 net_timerclear(&ifp->if_poll_sample_holdtime);
1318 net_timerclear(&ifp->if_poll_sample_lasttime);
1319 net_timerclear(&ifp->if_poll_dbg_lasttime);
1320 }
1321
1322 static int
1323 dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp)
1324 {
1325 boolean_t dlil_rxpoll_input;
1326 thread_continue_t func;
1327 u_int32_t limit;
1328 int error;
1329
1330 dlil_rxpoll_input = (ifp != NULL && net_rxpoll &&
1331 (ifp->if_eflags & IFEF_RXPOLL) && (ifp->if_xflags & IFXF_LEGACY));
1332
1333 /* NULL ifp indicates the main input thread, called at dlil_init time */
1334 if (ifp == NULL) {
1335 func = dlil_main_input_thread_func;
1336 VERIFY(inp == dlil_main_input_thread);
1337 (void) strlcat(inp->input_name,
1338 "main_input", DLIL_THREADNAME_LEN);
1339 } else if (dlil_rxpoll_input) {
1340 func = dlil_rxpoll_input_thread_func;
1341 VERIFY(inp != dlil_main_input_thread);
1342 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
1343 "%s_input_poll", if_name(ifp));
1344 } else {
1345 func = dlil_input_thread_func;
1346 VERIFY(inp != dlil_main_input_thread);
1347 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
1348 "%s_input", if_name(ifp));
1349 }
1350 VERIFY(inp->input_thr == THREAD_NULL);
1351
1352 inp->lck_grp = lck_grp_alloc_init(inp->input_name, dlil_grp_attributes);
1353 lck_mtx_init(&inp->input_lck, inp->lck_grp, dlil_lck_attributes);
1354
1355 inp->ifp = ifp; /* NULL for main input thread */
1356 /*
1357 * For interfaces that support opportunistic polling, set the
1358 * low and high watermarks for outstanding inbound packets/bytes.
1359 * Also define freeze times for transitioning between modes
1360 * and updating the average.
1361 */
1362 if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1363 limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN);
1364 if (ifp->if_xflags & IFXF_LEGACY) {
1365 (void) dlil_rxpoll_set_params(ifp, NULL, FALSE);
1366 }
1367 } else {
1368 limit = (u_int32_t)-1;
1369 }
1370
1371 _qinit(&inp->rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
1372 if (inp == dlil_main_input_thread) {
1373 struct dlil_main_threading_info *inpm =
1374 (struct dlil_main_threading_info *)inp;
1375 _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
1376 }
1377
1378 error = kernel_thread_start(func, inp, &inp->input_thr);
1379 if (error == KERN_SUCCESS) {
1380 ml_thread_policy(inp->input_thr, MACHINE_GROUP,
1381 (MACHINE_NETWORK_GROUP | MACHINE_NETWORK_NETISR));
1382 /*
1383 * We create an affinity set so that the matching workloop
1384 * thread or the starter thread (for loopback) can be
1385 * scheduled on the same processor set as the input thread.
1386 */
1387 if (net_affinity) {
1388 struct thread *tp = inp->input_thr;
1389 u_int32_t tag;
1390 /*
1391 * Randomize to reduce the probability
1392 * of affinity tag namespace collision.
1393 */
1394 read_frandom(&tag, sizeof(tag));
1395 if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) {
1396 thread_reference(tp);
1397 inp->tag = tag;
1398 inp->net_affinity = TRUE;
1399 }
1400 }
1401 } else if (inp == dlil_main_input_thread) {
1402 panic_plain("%s: couldn't create main input thread", __func__);
1403 /* NOTREACHED */
1404 } else {
1405 panic_plain("%s: couldn't create %s input thread", __func__,
1406 if_name(ifp));
1407 /* NOTREACHED */
1408 }
1409 OSAddAtomic(1, &cur_dlil_input_threads);
1410
1411 return error;
1412 }
1413
1414 #if TEST_INPUT_THREAD_TERMINATION
1415 static int
1416 sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
1417 {
1418 #pragma unused(arg1, arg2)
1419 uint32_t i;
1420 int err;
1421
1422 i = if_input_thread_termination_spin;
1423
1424 err = sysctl_handle_int(oidp, &i, 0, req);
1425 if (err != 0 || req->newptr == USER_ADDR_NULL) {
1426 return err;
1427 }
1428
1429 if (net_rxpoll == 0) {
1430 return ENXIO;
1431 }
1432
1433 if_input_thread_termination_spin = i;
1434 return err;
1435 }
1436 #endif /* TEST_INPUT_THREAD_TERMINATION */
1437
1438 static void
1439 dlil_clean_threading_info(struct dlil_threading_info *inp)
1440 {
1441 lck_mtx_destroy(&inp->input_lck, inp->lck_grp);
1442 lck_grp_free(inp->lck_grp);
1443
1444 inp->input_waiting = 0;
1445 inp->wtot = 0;
1446 bzero(inp->input_name, sizeof(inp->input_name));
1447 inp->ifp = NULL;
1448 VERIFY(qhead(&inp->rcvq_pkts) == NULL && qempty(&inp->rcvq_pkts));
1449 qlimit(&inp->rcvq_pkts) = 0;
1450 bzero(&inp->stats, sizeof(inp->stats));
1451
1452 VERIFY(!inp->net_affinity);
1453 inp->input_thr = THREAD_NULL;
1454 VERIFY(inp->wloop_thr == THREAD_NULL);
1455 VERIFY(inp->poll_thr == THREAD_NULL);
1456 VERIFY(inp->tag == 0);
1457 #if IFNET_INPUT_SANITY_CHK
1458 inp->input_mbuf_cnt = 0;
1459 #endif /* IFNET_INPUT_SANITY_CHK */
1460 }
1461
1462 static void
1463 dlil_terminate_input_thread(struct dlil_threading_info *inp)
1464 {
1465 struct ifnet *ifp = inp->ifp;
1466 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
1467
1468 VERIFY(current_thread() == inp->input_thr);
1469 VERIFY(inp != dlil_main_input_thread);
1470
1471 OSAddAtomic(-1, &cur_dlil_input_threads);
1472
1473 #if TEST_INPUT_THREAD_TERMINATION
1474 { /* do something useless that won't get optimized away */
1475 uint32_t v = 1;
1476 for (uint32_t i = 0;
1477 i < if_input_thread_termination_spin;
1478 i++) {
1479 v = (i + 1) * v;
1480 }
1481 DLIL_PRINTF("the value is %d\n", v);
1482 }
1483 #endif /* TEST_INPUT_THREAD_TERMINATION */
1484
1485 lck_mtx_lock_spin(&inp->input_lck);
1486 _getq_all(&inp->rcvq_pkts, &pkt, NULL, NULL, NULL);
1487 VERIFY((inp->input_waiting & DLIL_INPUT_TERMINATE) != 0);
1488 inp->input_waiting |= DLIL_INPUT_TERMINATE_COMPLETE;
1489 wakeup_one((caddr_t)&inp->input_waiting);
1490 lck_mtx_unlock(&inp->input_lck);
1491
1492 /* free up pending packets */
1493 if (pkt.cp_mbuf != NULL) {
1494 mbuf_freem_list(pkt.cp_mbuf);
1495 }
1496
1497 /* for the extra refcnt from kernel_thread_start() */
1498 thread_deallocate(current_thread());
1499
1500 if (dlil_verbose) {
1501 DLIL_PRINTF("%s: input thread terminated\n",
1502 if_name(ifp));
1503 }
1504
1505 /* this is the end */
1506 thread_terminate(current_thread());
1507 /* NOTREACHED */
1508 }
1509
1510 static kern_return_t
1511 dlil_affinity_set(struct thread *tp, u_int32_t tag)
1512 {
1513 thread_affinity_policy_data_t policy;
1514
1515 bzero(&policy, sizeof(policy));
1516 policy.affinity_tag = tag;
1517 return thread_policy_set(tp, THREAD_AFFINITY_POLICY,
1518 (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT);
1519 }
1520
1521 void
1522 dlil_init(void)
1523 {
1524 thread_t thread = THREAD_NULL;
1525
1526 /*
1527 * The following fields must be 64-bit aligned for atomic operations.
1528 */
1529 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1530 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1531 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1532 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1533 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1534 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1535 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1536 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1537 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1538 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1539 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1540 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1541 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1542 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1543 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1544
1545 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1546 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1547 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1548 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1549 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1550 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1551 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1552 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1553 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1554 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1555 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1556 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1557 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1558 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1559 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1560
1561 /*
1562 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1563 */
1564 _CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
1565 _CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
1566 _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
1567 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
1568 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
1569 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6);
1570 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
1571 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
1572 _CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
1573 _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT == IFNET_CSUM_ZERO_INVERT);
1574 _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
1575 _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
1576 _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
1577 _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
1578
1579 /*
1580 * ... as well as the mbuf checksum flags counterparts.
1581 */
1582 _CASSERT(CSUM_IP == IF_HWASSIST_CSUM_IP);
1583 _CASSERT(CSUM_TCP == IF_HWASSIST_CSUM_TCP);
1584 _CASSERT(CSUM_UDP == IF_HWASSIST_CSUM_UDP);
1585 _CASSERT(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS);
1586 _CASSERT(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT);
1587 _CASSERT(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6);
1588 _CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
1589 _CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
1590 _CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
1591 _CASSERT(CSUM_ZERO_INVERT == IF_HWASSIST_CSUM_ZERO_INVERT);
1592 _CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
1593
1594 /*
1595 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1596 */
1597 _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
1598 _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
1599
1600 _CASSERT(IFRLOGF_DLIL == IFNET_LOGF_DLIL);
1601 _CASSERT(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY);
1602 _CASSERT(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER);
1603 _CASSERT(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE);
1604
1605 _CASSERT(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY);
1606 _CASSERT(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY);
1607 _CASSERT(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE);
1608
1609 _CASSERT(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY);
1610 _CASSERT(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK);
1611 _CASSERT(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET);
1612 _CASSERT(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP);
1613 _CASSERT(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN);
1614 _CASSERT(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN);
1615 _CASSERT(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP);
1616 _CASSERT(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC);
1617 _CASSERT(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC);
1618 _CASSERT(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP);
1619 _CASSERT(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF);
1620 _CASSERT(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH);
1621 _CASSERT(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF);
1622 _CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
1623 _CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
1624 _CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
1625 _CASSERT(IFRTYPE_FAMILY_6LOWPAN == IFNET_FAMILY_6LOWPAN);
1626 _CASSERT(IFRTYPE_FAMILY_UTUN == IFNET_FAMILY_UTUN);
1627 _CASSERT(IFRTYPE_FAMILY_IPSEC == IFNET_FAMILY_IPSEC);
1628
1629 _CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
1630 _CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
1631 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH);
1632 _CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
1633 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
1634 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED);
1635 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC);
1636 _CASSERT(IFRTYPE_SUBFAMILY_QUICKRELAY == IFNET_SUBFAMILY_QUICKRELAY);
1637 _CASSERT(IFRTYPE_SUBFAMILY_DEFAULT == IFNET_SUBFAMILY_DEFAULT);
1638
1639 _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN);
1640 _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN);
1641
1642 PE_parse_boot_argn("net_affinity", &net_affinity,
1643 sizeof(net_affinity));
1644
1645 PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof(net_rxpoll));
1646
1647 PE_parse_boot_argn("net_rtref", &net_rtref, sizeof(net_rtref));
1648
1649 PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof(ifnet_debug));
1650
1651 VERIFY(dlil_pending_thread_cnt == 0);
1652 dlif_size = (ifnet_debug == 0) ? sizeof(struct dlil_ifnet) :
1653 sizeof(struct dlil_ifnet_dbg);
1654 /* Enforce 64-bit alignment for dlil_ifnet structure */
1655 dlif_bufsize = dlif_size + sizeof(void *) + sizeof(u_int64_t);
1656 dlif_bufsize = P2ROUNDUP(dlif_bufsize, sizeof(u_int64_t));
1657 dlif_zone = zinit(dlif_bufsize, DLIF_ZONE_MAX * dlif_bufsize,
1658 0, DLIF_ZONE_NAME);
1659 if (dlif_zone == NULL) {
1660 panic_plain("%s: failed allocating %s", __func__,
1661 DLIF_ZONE_NAME);
1662 /* NOTREACHED */
1663 }
1664 zone_change(dlif_zone, Z_EXPAND, TRUE);
1665 zone_change(dlif_zone, Z_CALLERACCT, FALSE);
1666
1667 dlif_filt_size = sizeof(struct ifnet_filter);
1668 dlif_filt_zone = zinit(dlif_filt_size,
1669 DLIF_FILT_ZONE_MAX * dlif_filt_size, 0, DLIF_FILT_ZONE_NAME);
1670 if (dlif_filt_zone == NULL) {
1671 panic_plain("%s: failed allocating %s", __func__,
1672 DLIF_FILT_ZONE_NAME);
1673 /* NOTREACHED */
1674 }
1675 zone_change(dlif_filt_zone, Z_EXPAND, TRUE);
1676 zone_change(dlif_filt_zone, Z_CALLERACCT, FALSE);
1677
1678 dlif_phash_size = sizeof(struct proto_hash_entry) * PROTO_HASH_SLOTS;
1679 dlif_phash_zone = zinit(dlif_phash_size,
1680 DLIF_PHASH_ZONE_MAX * dlif_phash_size, 0, DLIF_PHASH_ZONE_NAME);
1681 if (dlif_phash_zone == NULL) {
1682 panic_plain("%s: failed allocating %s", __func__,
1683 DLIF_PHASH_ZONE_NAME);
1684 /* NOTREACHED */
1685 }
1686 zone_change(dlif_phash_zone, Z_EXPAND, TRUE);
1687 zone_change(dlif_phash_zone, Z_CALLERACCT, FALSE);
1688
1689 dlif_proto_size = sizeof(struct if_proto);
1690 dlif_proto_zone = zinit(dlif_proto_size,
1691 DLIF_PROTO_ZONE_MAX * dlif_proto_size, 0, DLIF_PROTO_ZONE_NAME);
1692 if (dlif_proto_zone == NULL) {
1693 panic_plain("%s: failed allocating %s", __func__,
1694 DLIF_PROTO_ZONE_NAME);
1695 /* NOTREACHED */
1696 }
1697 zone_change(dlif_proto_zone, Z_EXPAND, TRUE);
1698 zone_change(dlif_proto_zone, Z_CALLERACCT, FALSE);
1699
1700 dlif_tcpstat_size = sizeof(struct tcpstat_local);
1701 /* Enforce 64-bit alignment for tcpstat_local structure */
1702 dlif_tcpstat_bufsize =
1703 dlif_tcpstat_size + sizeof(void *) + sizeof(u_int64_t);
1704 dlif_tcpstat_bufsize =
1705 P2ROUNDUP(dlif_tcpstat_bufsize, sizeof(u_int64_t));
1706 dlif_tcpstat_zone = zinit(dlif_tcpstat_bufsize,
1707 DLIF_TCPSTAT_ZONE_MAX * dlif_tcpstat_bufsize, 0,
1708 DLIF_TCPSTAT_ZONE_NAME);
1709 if (dlif_tcpstat_zone == NULL) {
1710 panic_plain("%s: failed allocating %s", __func__,
1711 DLIF_TCPSTAT_ZONE_NAME);
1712 /* NOTREACHED */
1713 }
1714 zone_change(dlif_tcpstat_zone, Z_EXPAND, TRUE);
1715 zone_change(dlif_tcpstat_zone, Z_CALLERACCT, FALSE);
1716
1717 dlif_udpstat_size = sizeof(struct udpstat_local);
1718 /* Enforce 64-bit alignment for udpstat_local structure */
1719 dlif_udpstat_bufsize =
1720 dlif_udpstat_size + sizeof(void *) + sizeof(u_int64_t);
1721 dlif_udpstat_bufsize =
1722 P2ROUNDUP(dlif_udpstat_bufsize, sizeof(u_int64_t));
1723 dlif_udpstat_zone = zinit(dlif_udpstat_bufsize,
1724 DLIF_TCPSTAT_ZONE_MAX * dlif_udpstat_bufsize, 0,
1725 DLIF_UDPSTAT_ZONE_NAME);
1726 if (dlif_udpstat_zone == NULL) {
1727 panic_plain("%s: failed allocating %s", __func__,
1728 DLIF_UDPSTAT_ZONE_NAME);
1729 /* NOTREACHED */
1730 }
1731 zone_change(dlif_udpstat_zone, Z_EXPAND, TRUE);
1732 zone_change(dlif_udpstat_zone, Z_CALLERACCT, FALSE);
1733
1734 ifnet_llreach_init();
1735 eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt);
1736
1737 TAILQ_INIT(&dlil_ifnet_head);
1738 TAILQ_INIT(&ifnet_head);
1739 TAILQ_INIT(&ifnet_detaching_head);
1740 TAILQ_INIT(&ifnet_ordered_head);
1741
1742 /* Setup the lock groups we will use */
1743 dlil_grp_attributes = lck_grp_attr_alloc_init();
1744
1745 dlil_lock_group = lck_grp_alloc_init("DLIL internal locks",
1746 dlil_grp_attributes);
1747 ifnet_lock_group = lck_grp_alloc_init("ifnet locks",
1748 dlil_grp_attributes);
1749 ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock",
1750 dlil_grp_attributes);
1751 ifnet_rcv_lock_group = lck_grp_alloc_init("ifnet rcv locks",
1752 dlil_grp_attributes);
1753 ifnet_snd_lock_group = lck_grp_alloc_init("ifnet snd locks",
1754 dlil_grp_attributes);
1755
1756 /* Setup the lock attributes we will use */
1757 dlil_lck_attributes = lck_attr_alloc_init();
1758
1759 ifnet_lock_attr = lck_attr_alloc_init();
1760
1761 lck_rw_init(&ifnet_head_lock, ifnet_head_lock_group,
1762 dlil_lck_attributes);
1763 lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes);
1764 lck_mtx_init(&dlil_thread_sync_lock, dlil_lock_group, dlil_lck_attributes);
1765
1766 /* Setup interface flow control related items */
1767 lck_mtx_init(&ifnet_fc_lock, dlil_lock_group, dlil_lck_attributes);
1768
1769 ifnet_fc_zone_size = sizeof(struct ifnet_fc_entry);
1770 ifnet_fc_zone = zinit(ifnet_fc_zone_size,
1771 IFNET_FC_ZONE_MAX * ifnet_fc_zone_size, 0, IFNET_FC_ZONE_NAME);
1772 if (ifnet_fc_zone == NULL) {
1773 panic_plain("%s: failed allocating %s", __func__,
1774 IFNET_FC_ZONE_NAME);
1775 /* NOTREACHED */
1776 }
1777 zone_change(ifnet_fc_zone, Z_EXPAND, TRUE);
1778 zone_change(ifnet_fc_zone, Z_CALLERACCT, FALSE);
1779
1780 /* Initialize interface address subsystem */
1781 ifa_init();
1782
1783 #if PF
1784 /* Initialize the packet filter */
1785 pfinit();
1786 #endif /* PF */
1787
1788 /* Initialize queue algorithms */
1789 classq_init();
1790
1791 /* Initialize packet schedulers */
1792 pktsched_init();
1793
1794 /* Initialize flow advisory subsystem */
1795 flowadv_init();
1796
1797 /* Initialize the pktap virtual interface */
1798 pktap_init();
1799
1800 /* Initialize the service class to dscp map */
1801 net_qos_map_init();
1802
1803 /* Initialize the interface port list */
1804 if_ports_used_init();
1805
1806 /* Initialize the interface low power mode event handler */
1807 if_low_power_evhdlr_init();
1808
1809 #if DEBUG || DEVELOPMENT
1810 /* Run self-tests */
1811 dlil_verify_sum16();
1812 #endif /* DEBUG || DEVELOPMENT */
1813
1814 /* Initialize link layer table */
1815 lltable_glbl_init();
1816
1817 /*
1818 * Create and start up the main DLIL input thread and the interface
1819 * detacher threads once everything is initialized.
1820 */
1821 dlil_incr_pending_thread_count();
1822 dlil_create_input_thread(NULL, dlil_main_input_thread);
1823
1824 /*
1825 * Create ifnet detacher thread.
1826 * When an interface gets detached, part of the detach processing
1827 * is delayed. The interface is added to delayed detach list
1828 * and this thread is woken up to call ifnet_detach_final
1829 * on these interfaces.
1830 */
1831 dlil_incr_pending_thread_count();
1832 if (kernel_thread_start(ifnet_detacher_thread_func,
1833 NULL, &thread) != KERN_SUCCESS) {
1834 panic_plain("%s: couldn't create detacher thread", __func__);
1835 /* NOTREACHED */
1836 }
1837 thread_deallocate(thread);
1838
1839 /*
1840 * Wait for the created kernel threads for dlil to get
1841 * scheduled and run at least once before we proceed
1842 */
1843 lck_mtx_lock(&dlil_thread_sync_lock);
1844 while (dlil_pending_thread_cnt != 0) {
1845 DLIL_PRINTF("%s: Waiting for all the create dlil kernel threads "
1846 "to get scheduled at least once.\n", __func__);
1847 (void) msleep(&dlil_pending_thread_cnt, &dlil_thread_sync_lock, (PZERO - 1),
1848 __func__, NULL);
1849 LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_ASSERT_OWNED);
1850 }
1851 lck_mtx_unlock(&dlil_thread_sync_lock);
1852 DLIL_PRINTF("%s: All the created dlil kernel threads have been scheduled "
1853 "at least once. Proceeding.\n", __func__);
1854 }
1855
1856 static void
1857 if_flt_monitor_busy(struct ifnet *ifp)
1858 {
1859 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1860
1861 ++ifp->if_flt_busy;
1862 VERIFY(ifp->if_flt_busy != 0);
1863 }
1864
1865 static void
1866 if_flt_monitor_unbusy(struct ifnet *ifp)
1867 {
1868 if_flt_monitor_leave(ifp);
1869 }
1870
1871 static void
1872 if_flt_monitor_enter(struct ifnet *ifp)
1873 {
1874 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1875
1876 while (ifp->if_flt_busy) {
1877 ++ifp->if_flt_waiters;
1878 (void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock,
1879 (PZERO - 1), "if_flt_monitor", NULL);
1880 }
1881 if_flt_monitor_busy(ifp);
1882 }
1883
1884 static void
1885 if_flt_monitor_leave(struct ifnet *ifp)
1886 {
1887 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1888
1889 VERIFY(ifp->if_flt_busy != 0);
1890 --ifp->if_flt_busy;
1891
1892 if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) {
1893 ifp->if_flt_waiters = 0;
1894 wakeup(&ifp->if_flt_head);
1895 }
1896 }
1897
1898 __private_extern__ int
1899 dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
1900 interface_filter_t *filter_ref, u_int32_t flags)
1901 {
1902 int retval = 0;
1903 struct ifnet_filter *filter = NULL;
1904
1905 ifnet_head_lock_shared();
1906 /* Check that the interface is in the global list */
1907 if (!ifnet_lookup(ifp)) {
1908 retval = ENXIO;
1909 goto done;
1910 }
1911
1912 filter = zalloc(dlif_filt_zone);
1913 if (filter == NULL) {
1914 retval = ENOMEM;
1915 goto done;
1916 }
1917 bzero(filter, dlif_filt_size);
1918
1919 /* refcnt held above during lookup */
1920 filter->filt_flags = flags;
1921 filter->filt_ifp = ifp;
1922 filter->filt_cookie = if_filter->iff_cookie;
1923 filter->filt_name = if_filter->iff_name;
1924 filter->filt_protocol = if_filter->iff_protocol;
1925 /*
1926 * Do not install filter callbacks for internal coproc interface
1927 */
1928 if (!IFNET_IS_INTCOPROC(ifp)) {
1929 filter->filt_input = if_filter->iff_input;
1930 filter->filt_output = if_filter->iff_output;
1931 filter->filt_event = if_filter->iff_event;
1932 filter->filt_ioctl = if_filter->iff_ioctl;
1933 }
1934 filter->filt_detached = if_filter->iff_detached;
1935
1936 lck_mtx_lock(&ifp->if_flt_lock);
1937 if_flt_monitor_enter(ifp);
1938
1939 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1940 TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
1941
1942 if_flt_monitor_leave(ifp);
1943 lck_mtx_unlock(&ifp->if_flt_lock);
1944
1945 *filter_ref = filter;
1946
1947 /*
1948 * Bump filter count and route_generation ID to let TCP
1949 * know it shouldn't do TSO on this connection
1950 */
1951 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1952 ifnet_filter_update_tso(TRUE);
1953 }
1954 OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_count);
1955 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_total);
1956 if ((filter->filt_flags & DLIL_IFF_INTERNAL)) {
1957 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_os_total);
1958 }
1959 if (dlil_verbose) {
1960 DLIL_PRINTF("%s: %s filter attached\n", if_name(ifp),
1961 if_filter->iff_name);
1962 }
1963 done:
1964 ifnet_head_done();
1965 if (retval != 0 && ifp != NULL) {
1966 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1967 if_name(ifp), if_filter->iff_name, retval);
1968 }
1969 if (retval != 0 && filter != NULL) {
1970 zfree(dlif_filt_zone, filter);
1971 }
1972
1973 return retval;
1974 }
1975
1976 static int
1977 dlil_detach_filter_internal(interface_filter_t filter, int detached)
1978 {
1979 int retval = 0;
1980
1981 if (detached == 0) {
1982 ifnet_t ifp = NULL;
1983
1984 ifnet_head_lock_shared();
1985 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1986 interface_filter_t entry = NULL;
1987
1988 lck_mtx_lock(&ifp->if_flt_lock);
1989 TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
1990 if (entry != filter || entry->filt_skip) {
1991 continue;
1992 }
1993 /*
1994 * We've found a match; since it's possible
1995 * that the thread gets blocked in the monitor,
1996 * we do the lock dance. Interface should
1997 * not be detached since we still have a use
1998 * count held during filter attach.
1999 */
2000 entry->filt_skip = 1; /* skip input/output */
2001 lck_mtx_unlock(&ifp->if_flt_lock);
2002 ifnet_head_done();
2003
2004 lck_mtx_lock(&ifp->if_flt_lock);
2005 if_flt_monitor_enter(ifp);
2006 LCK_MTX_ASSERT(&ifp->if_flt_lock,
2007 LCK_MTX_ASSERT_OWNED);
2008
2009 /* Remove the filter from the list */
2010 TAILQ_REMOVE(&ifp->if_flt_head, filter,
2011 filt_next);
2012
2013 if_flt_monitor_leave(ifp);
2014 lck_mtx_unlock(&ifp->if_flt_lock);
2015 if (dlil_verbose) {
2016 DLIL_PRINTF("%s: %s filter detached\n",
2017 if_name(ifp), filter->filt_name);
2018 }
2019 goto destroy;
2020 }
2021 lck_mtx_unlock(&ifp->if_flt_lock);
2022 }
2023 ifnet_head_done();
2024
2025 /* filter parameter is not a valid filter ref */
2026 retval = EINVAL;
2027 goto done;
2028 }
2029
2030 if (dlil_verbose) {
2031 DLIL_PRINTF("%s filter detached\n", filter->filt_name);
2032 }
2033
2034 destroy:
2035
2036 /* Call the detached function if there is one */
2037 if (filter->filt_detached) {
2038 filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
2039 }
2040
2041 /*
2042 * Decrease filter count and route_generation ID to let TCP
2043 * know it should reevalute doing TSO or not
2044 */
2045 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
2046 ifnet_filter_update_tso(FALSE);
2047 }
2048
2049 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_count) > 0);
2050
2051 /* Free the filter */
2052 zfree(dlif_filt_zone, filter);
2053 filter = NULL;
2054 done:
2055 if (retval != 0 && filter != NULL) {
2056 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
2057 filter->filt_name, retval);
2058 }
2059
2060 return retval;
2061 }
2062
2063 __private_extern__ void
2064 dlil_detach_filter(interface_filter_t filter)
2065 {
2066 if (filter == NULL) {
2067 return;
2068 }
2069 dlil_detach_filter_internal(filter, 0);
2070 }
2071
2072 __attribute__((noreturn))
2073 static void
2074 dlil_main_input_thread_func(void *v, wait_result_t w)
2075 {
2076 #pragma unused(w)
2077 struct dlil_threading_info *inp = v;
2078
2079 VERIFY(inp == dlil_main_input_thread);
2080 VERIFY(inp->ifp == NULL);
2081 VERIFY(current_thread() == inp->input_thr);
2082
2083 dlil_decr_pending_thread_count();
2084 lck_mtx_lock(&inp->input_lck);
2085 VERIFY(!(inp->input_waiting & DLIL_INPUT_RUNNING));
2086 (void) assert_wait(&inp->input_waiting, THREAD_UNINT);
2087 lck_mtx_unlock(&inp->input_lck);
2088 (void) thread_block_parameter(dlil_main_input_thread_cont, inp);
2089 /* NOTREACHED */
2090 __builtin_unreachable();
2091 }
2092
2093 /*
2094 * Main input thread:
2095 *
2096 * a) handles all inbound packets for lo0
2097 * b) handles all inbound packets for interfaces with no dedicated
2098 * input thread (e.g. anything but Ethernet/PDP or those that support
2099 * opportunistic polling.)
2100 * c) protocol registrations
2101 * d) packet injections
2102 */
2103 __attribute__((noreturn))
2104 static void
2105 dlil_main_input_thread_cont(void *v, wait_result_t wres)
2106 {
2107 struct dlil_main_threading_info *inpm = v;
2108 struct dlil_threading_info *inp = v;
2109
2110 /* main input thread is uninterruptible */
2111 VERIFY(wres != THREAD_INTERRUPTED);
2112 lck_mtx_lock_spin(&inp->input_lck);
2113 VERIFY(!(inp->input_waiting & (DLIL_INPUT_TERMINATE |
2114 DLIL_INPUT_RUNNING)));
2115 inp->input_waiting |= DLIL_INPUT_RUNNING;
2116
2117 while (1) {
2118 struct mbuf *m = NULL, *m_loop = NULL;
2119 u_int32_t m_cnt, m_cnt_loop;
2120 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
2121 boolean_t proto_req;
2122
2123 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2124
2125 proto_req = (inp->input_waiting &
2126 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER));
2127
2128 /* Packets for non-dedicated interfaces other than lo0 */
2129 m_cnt = qlen(&inp->rcvq_pkts);
2130 _getq_all(&inp->rcvq_pkts, &pkt, NULL, NULL, NULL);
2131 m = pkt.cp_mbuf;
2132
2133 /* Packets exclusive to lo0 */
2134 m_cnt_loop = qlen(&inpm->lo_rcvq_pkts);
2135 _getq_all(&inpm->lo_rcvq_pkts, &pkt, NULL, NULL, NULL);
2136 m_loop = pkt.cp_mbuf;
2137
2138 inp->wtot = 0;
2139
2140 lck_mtx_unlock(&inp->input_lck);
2141
2142 /*
2143 * NOTE warning %%% attention !!!!
2144 * We should think about putting some thread starvation
2145 * safeguards if we deal with long chains of packets.
2146 */
2147 if (m_loop != NULL) {
2148 dlil_input_packet_list_extended(lo_ifp, m_loop,
2149 m_cnt_loop, IFNET_MODEL_INPUT_POLL_OFF);
2150 }
2151
2152 if (m != NULL) {
2153 dlil_input_packet_list_extended(NULL, m,
2154 m_cnt, IFNET_MODEL_INPUT_POLL_OFF);
2155 }
2156
2157 if (proto_req) {
2158 proto_input_run();
2159 }
2160
2161 lck_mtx_lock_spin(&inp->input_lck);
2162 VERIFY(inp->input_waiting & DLIL_INPUT_RUNNING);
2163 /* main input thread cannot be terminated */
2164 VERIFY(!(inp->input_waiting & DLIL_INPUT_TERMINATE));
2165 if (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
2166 break;
2167 }
2168 }
2169
2170 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2171 (void) assert_wait(&inp->input_waiting, THREAD_UNINT);
2172 lck_mtx_unlock(&inp->input_lck);
2173 (void) thread_block_parameter(dlil_main_input_thread_cont, inp);
2174
2175 VERIFY(0); /* we should never get here */
2176 /* NOTREACHED */
2177 __builtin_unreachable();
2178 }
2179
2180 /*
2181 * Input thread for interfaces with legacy input model.
2182 */
2183 __attribute__((noreturn))
2184 static void
2185 dlil_input_thread_func(void *v, wait_result_t w)
2186 {
2187 #pragma unused(w)
2188 char thread_name[MAXTHREADNAMESIZE];
2189 struct dlil_threading_info *inp = v;
2190 struct ifnet *ifp = inp->ifp;
2191
2192 VERIFY(inp != dlil_main_input_thread);
2193 VERIFY(ifp != NULL);
2194 VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll ||
2195 !(ifp->if_xflags & IFXF_LEGACY));
2196 VERIFY(ifp->if_poll_mode == IFNET_MODEL_INPUT_POLL_OFF ||
2197 !(ifp->if_xflags & IFXF_LEGACY));
2198 VERIFY(current_thread() == inp->input_thr);
2199
2200 /* construct the name for this thread, and then apply it */
2201 bzero(thread_name, sizeof(thread_name));
2202 (void) snprintf(thread_name, sizeof(thread_name),
2203 "dlil_input_%s", ifp->if_xname);
2204 thread_set_thread_name(inp->input_thr, thread_name);
2205 ifnet_decr_pending_thread_count(ifp);
2206
2207 lck_mtx_lock(&inp->input_lck);
2208 VERIFY(!(inp->input_waiting & DLIL_INPUT_RUNNING));
2209 (void) assert_wait(&inp->input_waiting, THREAD_UNINT);
2210 lck_mtx_unlock(&inp->input_lck);
2211 (void) thread_block_parameter(dlil_input_thread_cont, inp);
2212 /* NOTREACHED */
2213 __builtin_unreachable();
2214 }
2215
2216 __attribute__((noreturn))
2217 static void
2218 dlil_input_thread_cont(void *v, wait_result_t wres)
2219 {
2220 struct dlil_threading_info *inp = v;
2221 struct ifnet *ifp = inp->ifp;
2222
2223 lck_mtx_lock_spin(&inp->input_lck);
2224 if (__improbable(wres == THREAD_INTERRUPTED ||
2225 (inp->input_waiting & DLIL_INPUT_TERMINATE))) {
2226 goto terminate;
2227 }
2228
2229 VERIFY(!(inp->input_waiting & DLIL_INPUT_RUNNING));
2230 inp->input_waiting |= DLIL_INPUT_RUNNING;
2231
2232 while (1) {
2233 struct mbuf *m = NULL;
2234 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
2235 boolean_t notify = FALSE;
2236 u_int32_t m_cnt;
2237
2238 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2239
2240 /*
2241 * Protocol registration and injection must always use
2242 * the main input thread; in theory the latter can utilize
2243 * the corresponding input thread where the packet arrived
2244 * on, but that requires our knowing the interface in advance
2245 * (and the benefits might not worth the trouble.)
2246 */
2247 VERIFY(!(inp->input_waiting &
2248 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)));
2249
2250 /* Packets for this interface */
2251 m_cnt = qlen(&inp->rcvq_pkts);
2252 _getq_all(&inp->rcvq_pkts, &pkt, NULL, NULL, NULL);
2253 m = pkt.cp_mbuf;
2254
2255 inp->wtot = 0;
2256
2257 notify = dlil_input_stats_sync(ifp, inp);
2258
2259 lck_mtx_unlock(&inp->input_lck);
2260
2261 if (notify) {
2262 ifnet_notify_data_threshold(ifp);
2263 }
2264
2265 /*
2266 * NOTE warning %%% attention !!!!
2267 * We should think about putting some thread starvation
2268 * safeguards if we deal with long chains of packets.
2269 */
2270 if (m != NULL) {
2271 dlil_input_packet_list_extended(NULL, m,
2272 m_cnt, ifp->if_poll_mode);
2273 }
2274
2275 lck_mtx_lock_spin(&inp->input_lck);
2276 VERIFY(inp->input_waiting & DLIL_INPUT_RUNNING);
2277 if (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
2278 break;
2279 }
2280 }
2281
2282 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2283
2284 if (__improbable(inp->input_waiting & DLIL_INPUT_TERMINATE)) {
2285 terminate:
2286 lck_mtx_unlock(&inp->input_lck);
2287 dlil_terminate_input_thread(inp);
2288 /* NOTREACHED */
2289 } else {
2290 (void) assert_wait(&inp->input_waiting, THREAD_UNINT);
2291 lck_mtx_unlock(&inp->input_lck);
2292 (void) thread_block_parameter(dlil_input_thread_cont, inp);
2293 /* NOTREACHED */
2294 }
2295
2296 VERIFY(0); /* we should never get here */
2297 /* NOTREACHED */
2298 __builtin_unreachable();
2299 }
2300
2301 /*
2302 * Input thread for interfaces with opportunistic polling input model.
2303 */
2304 __attribute__((noreturn))
2305 static void
2306 dlil_rxpoll_input_thread_func(void *v, wait_result_t w)
2307 {
2308 #pragma unused(w)
2309 char thread_name[MAXTHREADNAMESIZE];
2310 struct dlil_threading_info *inp = v;
2311 struct ifnet *ifp = inp->ifp;
2312
2313 VERIFY(inp != dlil_main_input_thread);
2314 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL) &&
2315 (ifp->if_xflags & IFXF_LEGACY));
2316 VERIFY(current_thread() == inp->input_thr);
2317
2318 /* construct the name for this thread, and then apply it */
2319 bzero(thread_name, sizeof(thread_name));
2320 (void) snprintf(thread_name, sizeof(thread_name),
2321 "dlil_input_poll_%s", ifp->if_xname);
2322 thread_set_thread_name(inp->input_thr, thread_name);
2323 ifnet_decr_pending_thread_count(ifp);
2324
2325 lck_mtx_lock(&inp->input_lck);
2326 VERIFY(!(inp->input_waiting & DLIL_INPUT_RUNNING));
2327 (void) assert_wait(&inp->input_waiting, THREAD_UNINT);
2328 lck_mtx_unlock(&inp->input_lck);
2329 (void) thread_block_parameter(dlil_rxpoll_input_thread_cont, inp);
2330 /* NOTREACHED */
2331 __builtin_unreachable();
2332 }
2333
2334 __attribute__((noreturn))
2335 static void
2336 dlil_rxpoll_input_thread_cont(void *v, wait_result_t wres)
2337 {
2338 struct dlil_threading_info *inp = v;
2339 struct ifnet *ifp = inp->ifp;
2340 struct timespec ts;
2341
2342 lck_mtx_lock_spin(&inp->input_lck);
2343 if (__improbable(wres == THREAD_INTERRUPTED ||
2344 (inp->input_waiting & DLIL_INPUT_TERMINATE))) {
2345 goto terminate;
2346 }
2347
2348 VERIFY(!(inp->input_waiting & DLIL_INPUT_RUNNING));
2349 inp->input_waiting |= DLIL_INPUT_RUNNING;
2350
2351 while (1) {
2352 struct mbuf *m = NULL;
2353 u_int32_t m_cnt, m_size, poll_req = 0;
2354 ifnet_model_t mode;
2355 struct timespec now, delta;
2356 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
2357 boolean_t notify;
2358 u_int64_t ival;
2359
2360 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2361
2362 if ((ival = ifp->if_rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN) {
2363 ival = IF_RXPOLL_INTERVALTIME_MIN;
2364 }
2365
2366 /* Link parameters changed? */
2367 if (ifp->if_poll_update != 0) {
2368 ifp->if_poll_update = 0;
2369 (void) dlil_rxpoll_set_params(ifp, NULL, TRUE);
2370 }
2371
2372 /* Current operating mode */
2373 mode = ifp->if_poll_mode;
2374
2375 /*
2376 * Protocol registration and injection must always use
2377 * the main input thread; in theory the latter can utilize
2378 * the corresponding input thread where the packet arrived
2379 * on, but that requires our knowing the interface in advance
2380 * (and the benefits might not worth the trouble.)
2381 */
2382 VERIFY(!(inp->input_waiting &
2383 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)));
2384
2385 /* Total count of all packets */
2386 m_cnt = qlen(&inp->rcvq_pkts);
2387
2388 /* Total bytes of all packets */
2389 m_size = qsize(&inp->rcvq_pkts);
2390
2391 /* Packets for this interface */
2392 _getq_all(&inp->rcvq_pkts, &pkt, NULL, NULL, NULL);
2393 m = pkt.cp_mbuf;
2394 VERIFY(m != NULL || m_cnt == 0);
2395
2396 nanouptime(&now);
2397 if (!net_timerisset(&ifp->if_poll_sample_lasttime)) {
2398 *(&ifp->if_poll_sample_lasttime) = *(&now);
2399 }
2400
2401 net_timersub(&now, &ifp->if_poll_sample_lasttime, &delta);
2402 if (if_rxpoll && net_timerisset(&ifp->if_poll_sample_holdtime)) {
2403 u_int32_t ptot, btot;
2404
2405 /* Accumulate statistics for current sampling */
2406 PKTCNTR_ADD(&ifp->if_poll_sstats, m_cnt, m_size);
2407
2408 if (net_timercmp(&delta, &ifp->if_poll_sample_holdtime, <)) {
2409 goto skip;
2410 }
2411
2412 *(&ifp->if_poll_sample_lasttime) = *(&now);
2413
2414 /* Calculate min/max of inbound bytes */
2415 btot = (u_int32_t)ifp->if_poll_sstats.bytes;
2416 if (ifp->if_rxpoll_bmin == 0 || ifp->if_rxpoll_bmin > btot) {
2417 ifp->if_rxpoll_bmin = btot;
2418 }
2419 if (btot > ifp->if_rxpoll_bmax) {
2420 ifp->if_rxpoll_bmax = btot;
2421 }
2422
2423 /* Calculate EWMA of inbound bytes */
2424 DLIL_EWMA(ifp->if_rxpoll_bavg, btot, if_rxpoll_decay);
2425
2426 /* Calculate min/max of inbound packets */
2427 ptot = (u_int32_t)ifp->if_poll_sstats.packets;
2428 if (ifp->if_rxpoll_pmin == 0 || ifp->if_rxpoll_pmin > ptot) {
2429 ifp->if_rxpoll_pmin = ptot;
2430 }
2431 if (ptot > ifp->if_rxpoll_pmax) {
2432 ifp->if_rxpoll_pmax = ptot;
2433 }
2434
2435 /* Calculate EWMA of inbound packets */
2436 DLIL_EWMA(ifp->if_rxpoll_pavg, ptot, if_rxpoll_decay);
2437
2438 /* Reset sampling statistics */
2439 PKTCNTR_CLEAR(&ifp->if_poll_sstats);
2440
2441 /* Calculate EWMA of wakeup requests */
2442 DLIL_EWMA(ifp->if_rxpoll_wavg, inp->wtot, if_rxpoll_decay);
2443 inp->wtot = 0;
2444
2445 if (dlil_verbose) {
2446 if (!net_timerisset(&ifp->if_poll_dbg_lasttime)) {
2447 *(&ifp->if_poll_dbg_lasttime) = *(&now);
2448 }
2449 net_timersub(&now, &ifp->if_poll_dbg_lasttime, &delta);
2450 if (net_timercmp(&delta, &dlil_dbgrate, >=)) {
2451 *(&ifp->if_poll_dbg_lasttime) = *(&now);
2452 DLIL_PRINTF("%s: [%s] pkts avg %d max %d "
2453 "limits [%d/%d], wreq avg %d "
2454 "limits [%d/%d], bytes avg %d "
2455 "limits [%d/%d]\n", if_name(ifp),
2456 (ifp->if_poll_mode ==
2457 IFNET_MODEL_INPUT_POLL_ON) ?
2458 "ON" : "OFF", ifp->if_rxpoll_pavg,
2459 ifp->if_rxpoll_pmax,
2460 ifp->if_rxpoll_plowat,
2461 ifp->if_rxpoll_phiwat,
2462 ifp->if_rxpoll_wavg,
2463 ifp->if_rxpoll_wlowat,
2464 ifp->if_rxpoll_whiwat,
2465 ifp->if_rxpoll_bavg,
2466 ifp->if_rxpoll_blowat,
2467 ifp->if_rxpoll_bhiwat);
2468 }
2469 }
2470
2471 /* Perform mode transition, if necessary */
2472 if (!net_timerisset(&ifp->if_poll_mode_lasttime)) {
2473 *(&ifp->if_poll_mode_lasttime) = *(&now);
2474 }
2475
2476 net_timersub(&now, &ifp->if_poll_mode_lasttime, &delta);
2477 if (net_timercmp(&delta, &ifp->if_poll_mode_holdtime, <)) {
2478 goto skip;
2479 }
2480
2481 if (ifp->if_rxpoll_pavg <= ifp->if_rxpoll_plowat &&
2482 ifp->if_rxpoll_bavg <= ifp->if_rxpoll_blowat &&
2483 ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_OFF) {
2484 mode = IFNET_MODEL_INPUT_POLL_OFF;
2485 } else if (ifp->if_rxpoll_pavg >= ifp->if_rxpoll_phiwat &&
2486 (ifp->if_rxpoll_bavg >= ifp->if_rxpoll_bhiwat ||
2487 ifp->if_rxpoll_wavg >= ifp->if_rxpoll_whiwat) &&
2488 ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_ON) {
2489 mode = IFNET_MODEL_INPUT_POLL_ON;
2490 }
2491
2492 if (mode != ifp->if_poll_mode) {
2493 ifp->if_poll_mode = mode;
2494 *(&ifp->if_poll_mode_lasttime) = *(&now);
2495 poll_req++;
2496 }
2497 }
2498 skip:
2499 notify = dlil_input_stats_sync(ifp, inp);
2500
2501 lck_mtx_unlock(&inp->input_lck);
2502
2503 if (notify) {
2504 ifnet_notify_data_threshold(ifp);
2505 }
2506
2507 /*
2508 * If there's a mode change and interface is still attached,
2509 * perform a downcall to the driver for the new mode. Also
2510 * hold an IO refcnt on the interface to prevent it from
2511 * being detached (will be release below.)
2512 */
2513 if (poll_req != 0 && ifnet_is_attached(ifp, 1)) {
2514 struct ifnet_model_params p = {
2515 .model = mode, .reserved = { 0 }
2516 };
2517 errno_t err;
2518
2519 if (dlil_verbose) {
2520 DLIL_PRINTF("%s: polling is now %s, "
2521 "pkts avg %d max %d limits [%d/%d], "
2522 "wreq avg %d limits [%d/%d], "
2523 "bytes avg %d limits [%d/%d]\n",
2524 if_name(ifp),
2525 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2526 "ON" : "OFF", ifp->if_rxpoll_pavg,
2527 ifp->if_rxpoll_pmax, ifp->if_rxpoll_plowat,
2528 ifp->if_rxpoll_phiwat, ifp->if_rxpoll_wavg,
2529 ifp->if_rxpoll_wlowat, ifp->if_rxpoll_whiwat,
2530 ifp->if_rxpoll_bavg, ifp->if_rxpoll_blowat,
2531 ifp->if_rxpoll_bhiwat);
2532 }
2533
2534 if ((err = ((*ifp->if_input_ctl)(ifp,
2535 IFNET_CTL_SET_INPUT_MODEL, sizeof(p), &p))) != 0) {
2536 DLIL_PRINTF("%s: error setting polling mode "
2537 "to %s (%d)\n", if_name(ifp),
2538 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2539 "ON" : "OFF", err);
2540 }
2541
2542 switch (mode) {
2543 case IFNET_MODEL_INPUT_POLL_OFF:
2544 ifnet_set_poll_cycle(ifp, NULL);
2545 ifp->if_rxpoll_offreq++;
2546 if (err != 0) {
2547 ifp->if_rxpoll_offerr++;
2548 }
2549 break;
2550
2551 case IFNET_MODEL_INPUT_POLL_ON:
2552 net_nsectimer(&ival, &ts);
2553 ifnet_set_poll_cycle(ifp, &ts);
2554 ifnet_poll(ifp);
2555 ifp->if_rxpoll_onreq++;
2556 if (err != 0) {
2557 ifp->if_rxpoll_onerr++;
2558 }
2559 break;
2560
2561 default:
2562 VERIFY(0);
2563 /* NOTREACHED */
2564 }
2565
2566 /* Release the IO refcnt */
2567 ifnet_decr_iorefcnt(ifp);
2568 }
2569
2570 /*
2571 * NOTE warning %%% attention !!!!
2572 * We should think about putting some thread starvation
2573 * safeguards if we deal with long chains of packets.
2574 */
2575 if (m != NULL) {
2576 dlil_input_packet_list_extended(NULL, m, m_cnt, mode);
2577 }
2578
2579 lck_mtx_lock_spin(&inp->input_lck);
2580 VERIFY(inp->input_waiting & DLIL_INPUT_RUNNING);
2581 if (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
2582 break;
2583 }
2584 }
2585
2586 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2587
2588 if (__improbable(inp->input_waiting & DLIL_INPUT_TERMINATE)) {
2589 terminate:
2590 lck_mtx_unlock(&inp->input_lck);
2591 dlil_terminate_input_thread(inp);
2592 /* NOTREACHED */
2593 } else {
2594 (void) assert_wait(&inp->input_waiting, THREAD_UNINT);
2595 lck_mtx_unlock(&inp->input_lck);
2596 (void) thread_block_parameter(dlil_rxpoll_input_thread_cont,
2597 inp);
2598 /* NOTREACHED */
2599 }
2600
2601 VERIFY(0); /* we should never get here */
2602 /* NOTREACHED */
2603 __builtin_unreachable();
2604 }
2605
2606 errno_t
2607 dlil_rxpoll_validate_params(struct ifnet_poll_params *p)
2608 {
2609 if (p != NULL) {
2610 if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
2611 (p->packets_lowat != 0 && p->packets_hiwat == 0)) {
2612 return EINVAL;
2613 }
2614 if (p->packets_lowat != 0 && /* hiwat must be non-zero */
2615 p->packets_lowat >= p->packets_hiwat) {
2616 return EINVAL;
2617 }
2618 if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
2619 (p->bytes_lowat != 0 && p->bytes_hiwat == 0)) {
2620 return EINVAL;
2621 }
2622 if (p->bytes_lowat != 0 && /* hiwat must be non-zero */
2623 p->bytes_lowat >= p->bytes_hiwat) {
2624 return EINVAL;
2625 }
2626 if (p->interval_time != 0 &&
2627 p->interval_time < IF_RXPOLL_INTERVALTIME_MIN) {
2628 p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
2629 }
2630 }
2631 return 0;
2632 }
2633
2634 void
2635 dlil_rxpoll_update_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2636 {
2637 u_int64_t sample_holdtime, inbw;
2638
2639 if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
2640 sample_holdtime = 0; /* polling is disabled */
2641 ifp->if_rxpoll_wlowat = ifp->if_rxpoll_plowat =
2642 ifp->if_rxpoll_blowat = 0;
2643 ifp->if_rxpoll_whiwat = ifp->if_rxpoll_phiwat =
2644 ifp->if_rxpoll_bhiwat = (u_int32_t)-1;
2645 ifp->if_rxpoll_plim = 0;
2646 ifp->if_rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
2647 } else {
2648 u_int32_t plowat, phiwat, blowat, bhiwat, plim;
2649 u_int64_t ival;
2650 unsigned int n, i;
2651
2652 for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
2653 if (inbw < rxpoll_tbl[i].speed) {
2654 break;
2655 }
2656 n = i;
2657 }
2658 /* auto-tune if caller didn't specify a value */
2659 plowat = ((p == NULL || p->packets_lowat == 0) ?
2660 rxpoll_tbl[n].plowat : p->packets_lowat);
2661 phiwat = ((p == NULL || p->packets_hiwat == 0) ?
2662 rxpoll_tbl[n].phiwat : p->packets_hiwat);
2663 blowat = ((p == NULL || p->bytes_lowat == 0) ?
2664 rxpoll_tbl[n].blowat : p->bytes_lowat);
2665 bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
2666 rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
2667 plim = ((p == NULL || p->packets_limit == 0) ?
2668 if_rxpoll_max : p->packets_limit);
2669 ival = ((p == NULL || p->interval_time == 0) ?
2670 if_rxpoll_interval_time : p->interval_time);
2671
2672 VERIFY(plowat != 0 && phiwat != 0);
2673 VERIFY(blowat != 0 && bhiwat != 0);
2674 VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
2675
2676 sample_holdtime = if_rxpoll_sample_holdtime;
2677 ifp->if_rxpoll_wlowat = if_sysctl_rxpoll_wlowat;
2678 ifp->if_rxpoll_whiwat = if_sysctl_rxpoll_whiwat;
2679 ifp->if_rxpoll_plowat = plowat;
2680 ifp->if_rxpoll_phiwat = phiwat;
2681 ifp->if_rxpoll_blowat = blowat;
2682 ifp->if_rxpoll_bhiwat = bhiwat;
2683 ifp->if_rxpoll_plim = plim;
2684 ifp->if_rxpoll_ival = ival;
2685 }
2686
2687 net_nsectimer(&if_rxpoll_mode_holdtime, &ifp->if_poll_mode_holdtime);
2688 net_nsectimer(&sample_holdtime, &ifp->if_poll_sample_holdtime);
2689
2690 if (dlil_verbose) {
2691 DLIL_PRINTF("%s: speed %llu bps, sample per %llu nsec, "
2692 "poll interval %llu nsec, pkts per poll %u, "
2693 "pkt limits [%u/%u], wreq limits [%u/%u], "
2694 "bytes limits [%u/%u]\n", if_name(ifp),
2695 inbw, sample_holdtime, ifp->if_rxpoll_ival,
2696 ifp->if_rxpoll_plim, ifp->if_rxpoll_plowat,
2697 ifp->if_rxpoll_phiwat, ifp->if_rxpoll_wlowat,
2698 ifp->if_rxpoll_whiwat, ifp->if_rxpoll_blowat,
2699 ifp->if_rxpoll_bhiwat);
2700 }
2701 }
2702
2703 /*
2704 * Must be called on an attached ifnet (caller is expected to check.)
2705 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2706 */
2707 errno_t
2708 dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
2709 boolean_t locked)
2710 {
2711 errno_t err;
2712 struct dlil_threading_info *inp;
2713
2714 VERIFY(ifp != NULL);
2715 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
2716 return ENXIO;
2717 }
2718 err = dlil_rxpoll_validate_params(p);
2719 if (err != 0) {
2720 return err;
2721 }
2722
2723 if (!locked) {
2724 lck_mtx_lock(&inp->input_lck);
2725 }
2726 LCK_MTX_ASSERT(&inp->input_lck, LCK_MTX_ASSERT_OWNED);
2727 /*
2728 * Normally, we'd reset the parameters to the auto-tuned values
2729 * if the the input thread detects a change in link rate. If the
2730 * driver provides its own parameters right after a link rate
2731 * changes, but before the input thread gets to run, we want to
2732 * make sure to keep the driver's values. Clearing if_poll_update
2733 * will achieve that.
2734 */
2735 if (p != NULL && !locked && ifp->if_poll_update != 0) {
2736 ifp->if_poll_update = 0;
2737 }
2738 dlil_rxpoll_update_params(ifp, p);
2739 if (!locked) {
2740 lck_mtx_unlock(&inp->input_lck);
2741 }
2742 return 0;
2743 }
2744
2745 /*
2746 * Must be called on an attached ifnet (caller is expected to check.)
2747 */
2748 errno_t
2749 dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2750 {
2751 struct dlil_threading_info *inp;
2752
2753 VERIFY(ifp != NULL && p != NULL);
2754 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
2755 return ENXIO;
2756 }
2757
2758 bzero(p, sizeof(*p));
2759
2760 lck_mtx_lock(&inp->input_lck);
2761 p->packets_limit = ifp->if_rxpoll_plim;
2762 p->packets_lowat = ifp->if_rxpoll_plowat;
2763 p->packets_hiwat = ifp->if_rxpoll_phiwat;
2764 p->bytes_lowat = ifp->if_rxpoll_blowat;
2765 p->bytes_hiwat = ifp->if_rxpoll_bhiwat;
2766 p->interval_time = ifp->if_rxpoll_ival;
2767 lck_mtx_unlock(&inp->input_lck);
2768
2769 return 0;
2770 }
2771
2772 errno_t
2773 ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
2774 const struct ifnet_stat_increment_param *s)
2775 {
2776 return ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE);
2777 }
2778
2779 errno_t
2780 ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
2781 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2782 {
2783 return ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE);
2784 }
2785
2786 errno_t
2787 ifnet_input_poll(struct ifnet *ifp, struct mbuf *m_head,
2788 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2789 {
2790 return ifnet_input_common(ifp, m_head, m_tail, s,
2791 (m_head != NULL), TRUE);
2792 }
2793
2794 static errno_t
2795 ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2796 const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
2797 {
2798 dlil_input_func input_func;
2799 struct ifnet_stat_increment_param _s;
2800 u_int32_t m_cnt = 0, m_size = 0;
2801 struct mbuf *last;
2802 errno_t err = 0;
2803
2804 if ((m_head == NULL && !poll) || (s == NULL && ext)) {
2805 if (m_head != NULL) {
2806 mbuf_freem_list(m_head);
2807 }
2808 return EINVAL;
2809 }
2810
2811 VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
2812 VERIFY(m_tail == NULL || ext);
2813 VERIFY(s != NULL || !ext);
2814
2815 /*
2816 * Drop the packet(s) if the parameters are invalid, or if the
2817 * interface is no longer attached; else hold an IO refcnt to
2818 * prevent it from being detached (will be released below.)
2819 */
2820 if (ifp == NULL || (ifp != lo_ifp && !ifnet_datamov_begin(ifp))) {
2821 if (m_head != NULL) {
2822 mbuf_freem_list(m_head);
2823 }
2824 return EINVAL;
2825 }
2826
2827 input_func = ifp->if_input_dlil;
2828 VERIFY(input_func != NULL);
2829
2830 if (m_tail == NULL) {
2831 last = m_head;
2832 while (m_head != NULL) {
2833 #if IFNET_INPUT_SANITY_CHK
2834 if (dlil_input_sanity_check != 0) {
2835 DLIL_INPUT_CHECK(last, ifp);
2836 }
2837 #endif /* IFNET_INPUT_SANITY_CHK */
2838 m_cnt++;
2839 m_size += m_length(last);
2840 if (mbuf_nextpkt(last) == NULL) {
2841 break;
2842 }
2843 last = mbuf_nextpkt(last);
2844 }
2845 m_tail = last;
2846 } else {
2847 #if IFNET_INPUT_SANITY_CHK
2848 if (dlil_input_sanity_check != 0) {
2849 last = m_head;
2850 while (1) {
2851 DLIL_INPUT_CHECK(last, ifp);
2852 m_cnt++;
2853 m_size += m_length(last);
2854 if (mbuf_nextpkt(last) == NULL) {
2855 break;
2856 }
2857 last = mbuf_nextpkt(last);
2858 }
2859 } else {
2860 m_cnt = s->packets_in;
2861 m_size = s->bytes_in;
2862 last = m_tail;
2863 }
2864 #else
2865 m_cnt = s->packets_in;
2866 m_size = s->bytes_in;
2867 last = m_tail;
2868 #endif /* IFNET_INPUT_SANITY_CHK */
2869 }
2870
2871 if (last != m_tail) {
2872 panic_plain("%s: invalid input packet chain for %s, "
2873 "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
2874 m_tail, last);
2875 }
2876
2877 /*
2878 * Assert packet count only for the extended variant, for backwards
2879 * compatibility, since this came directly from the device driver.
2880 * Relax this assertion for input bytes, as the driver may have
2881 * included the link-layer headers in the computation; hence
2882 * m_size is just an approximation.
2883 */
2884 if (ext && s->packets_in != m_cnt) {
2885 panic_plain("%s: input packet count mismatch for %s, "
2886 "%d instead of %d\n", __func__, if_name(ifp),
2887 s->packets_in, m_cnt);
2888 }
2889
2890 if (s == NULL) {
2891 bzero(&_s, sizeof(_s));
2892 s = &_s;
2893 } else {
2894 _s = *s;
2895 }
2896 _s.packets_in = m_cnt;
2897 _s.bytes_in = m_size;
2898
2899 err = (*input_func)(ifp, m_head, m_tail, s, poll, current_thread());
2900
2901 if (ifp != lo_ifp) {
2902 /* Release the IO refcnt */
2903 ifnet_datamov_end(ifp);
2904 }
2905
2906 return err;
2907 }
2908
2909
2910 errno_t
2911 dlil_output_handler(struct ifnet *ifp, struct mbuf *m)
2912 {
2913 return ifp->if_output(ifp, m);
2914 }
2915
2916 errno_t
2917 dlil_input_handler(struct ifnet *ifp, struct mbuf *m_head,
2918 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
2919 boolean_t poll, struct thread *tp)
2920 {
2921 struct dlil_threading_info *inp;
2922 u_int32_t m_cnt = s->packets_in;
2923 u_int32_t m_size = s->bytes_in;
2924 boolean_t notify = FALSE;
2925
2926 if ((inp = ifp->if_inp) == NULL) {
2927 inp = dlil_main_input_thread;
2928 }
2929
2930 /*
2931 * If there is a matching DLIL input thread associated with an
2932 * affinity set, associate this thread with the same set. We
2933 * will only do this once.
2934 */
2935 lck_mtx_lock_spin(&inp->input_lck);
2936 if (inp != dlil_main_input_thread && inp->net_affinity && tp != NULL &&
2937 ((!poll && inp->wloop_thr == THREAD_NULL) ||
2938 (poll && inp->poll_thr == THREAD_NULL))) {
2939 u_int32_t tag = inp->tag;
2940
2941 if (poll) {
2942 VERIFY(inp->poll_thr == THREAD_NULL);
2943 inp->poll_thr = tp;
2944 } else {
2945 VERIFY(inp->wloop_thr == THREAD_NULL);
2946 inp->wloop_thr = tp;
2947 }
2948 lck_mtx_unlock(&inp->input_lck);
2949
2950 /* Associate the current thread with the new affinity tag */
2951 (void) dlil_affinity_set(tp, tag);
2952
2953 /*
2954 * Take a reference on the current thread; during detach,
2955 * we will need to refer to it in order to tear down its
2956 * affinity.
2957 */
2958 thread_reference(tp);
2959 lck_mtx_lock_spin(&inp->input_lck);
2960 }
2961
2962 VERIFY(m_head != NULL || (m_tail == NULL && m_cnt == 0));
2963
2964 /*
2965 * Because of loopbacked multicast we cannot stuff the ifp in
2966 * the rcvif of the packet header: loopback (lo0) packets use a
2967 * dedicated list so that we can later associate them with lo_ifp
2968 * on their way up the stack. Packets for other interfaces without
2969 * dedicated input threads go to the regular list.
2970 */
2971 if (m_head != NULL) {
2972 classq_pkt_t head, tail;
2973 CLASSQ_PKT_INIT_MBUF(&head, m_head);
2974 CLASSQ_PKT_INIT_MBUF(&tail, m_tail);
2975 if (inp == dlil_main_input_thread && ifp == lo_ifp) {
2976 struct dlil_main_threading_info *inpm =
2977 (struct dlil_main_threading_info *)inp;
2978 _addq_multi(&inpm->lo_rcvq_pkts, &head, &tail,
2979 m_cnt, m_size);
2980 } else {
2981 _addq_multi(&inp->rcvq_pkts, &head, &tail,
2982 m_cnt, m_size);
2983 }
2984 }
2985
2986 #if IFNET_INPUT_SANITY_CHK
2987 if (dlil_input_sanity_check != 0) {
2988 u_int32_t count;
2989 struct mbuf *m0;
2990
2991 for (m0 = m_head, count = 0; m0; m0 = mbuf_nextpkt(m0)) {
2992 count++;
2993 }
2994
2995 if (count != m_cnt) {
2996 panic_plain("%s: invalid packet count %d "
2997 "(expected %d)\n", if_name(ifp),
2998 count, m_cnt);
2999 /* NOTREACHED */
3000 }
3001
3002 inp->input_mbuf_cnt += m_cnt;
3003 }
3004 #endif /* IFNET_INPUT_SANITY_CHK */
3005
3006 dlil_input_stats_add(s, inp, ifp, poll);
3007 /*
3008 * If we're using the main input thread, synchronize the
3009 * stats now since we have the interface context. All
3010 * other cases involving dedicated input threads will
3011 * have their stats synchronized there.
3012 */
3013 if (inp == dlil_main_input_thread) {
3014 notify = dlil_input_stats_sync(ifp, inp);
3015 }
3016
3017 inp->input_waiting |= DLIL_INPUT_WAITING;
3018 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
3019 inp->wtot++;
3020 wakeup_one((caddr_t)&inp->input_waiting);
3021 }
3022 lck_mtx_unlock(&inp->input_lck);
3023
3024 if (notify) {
3025 ifnet_notify_data_threshold(ifp);
3026 }
3027
3028 return 0;
3029 }
3030
3031
3032 static void
3033 ifnet_start_common(struct ifnet *ifp, boolean_t resetfc)
3034 {
3035 if (!(ifp->if_eflags & IFEF_TXSTART)) {
3036 return;
3037 }
3038 /*
3039 * If the starter thread is inactive, signal it to do work,
3040 * unless the interface is being flow controlled from below,
3041 * e.g. a virtual interface being flow controlled by a real
3042 * network interface beneath it, or it's been disabled via
3043 * a call to ifnet_disable_output().
3044 */
3045 lck_mtx_lock_spin(&ifp->if_start_lock);
3046 if (resetfc) {
3047 ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
3048 } else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
3049 lck_mtx_unlock(&ifp->if_start_lock);
3050 return;
3051 }
3052 ifp->if_start_req++;
3053 if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
3054 (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
3055 IFCQ_LEN(&ifp->if_snd) >= ifp->if_start_delay_qlen ||
3056 ifp->if_start_delayed == 0)) {
3057 (void) thread_wakeup_thread((caddr_t)&ifp->if_start_thread,
3058 ifp->if_start_thread);
3059 }
3060 lck_mtx_unlock(&ifp->if_start_lock);
3061 }
3062
3063 void
3064 ifnet_start(struct ifnet *ifp)
3065 {
3066 ifnet_start_common(ifp, FALSE);
3067 }
3068
3069 __attribute__((noreturn))
3070 static void
3071 ifnet_start_thread_func(void *v, wait_result_t w)
3072 {
3073 #pragma unused(w)
3074 struct ifnet *ifp = v;
3075 char thread_name[MAXTHREADNAMESIZE];
3076
3077 /* Construct the name for this thread, and then apply it. */
3078 bzero(thread_name, sizeof(thread_name));
3079 (void) snprintf(thread_name, sizeof(thread_name),
3080 "ifnet_start_%s", ifp->if_xname);
3081 ASSERT(ifp->if_start_thread == current_thread());
3082 thread_set_thread_name(current_thread(), thread_name);
3083
3084 /*
3085 * Treat the dedicated starter thread for lo0 as equivalent to
3086 * the driver workloop thread; if net_affinity is enabled for
3087 * the main input thread, associate this starter thread to it
3088 * by binding them with the same affinity tag. This is done
3089 * only once (as we only have one lo_ifp which never goes away.)
3090 */
3091 if (ifp == lo_ifp) {
3092 struct dlil_threading_info *inp = dlil_main_input_thread;
3093 struct thread *tp = current_thread();
3094
3095 lck_mtx_lock(&inp->input_lck);
3096 if (inp->net_affinity) {
3097 u_int32_t tag = inp->tag;
3098
3099 VERIFY(inp->wloop_thr == THREAD_NULL);
3100 VERIFY(inp->poll_thr == THREAD_NULL);
3101 inp->wloop_thr = tp;
3102 lck_mtx_unlock(&inp->input_lck);
3103
3104 /* Associate this thread with the affinity tag */
3105 (void) dlil_affinity_set(tp, tag);
3106 } else {
3107 lck_mtx_unlock(&inp->input_lck);
3108 }
3109 }
3110 ifnet_decr_pending_thread_count(ifp);
3111
3112 lck_mtx_lock(&ifp->if_start_lock);
3113 VERIFY(!ifp->if_start_active);
3114 (void) assert_wait(&ifp->if_start_thread, THREAD_UNINT);
3115 lck_mtx_unlock(&ifp->if_start_lock);
3116 (void) thread_block_parameter(ifnet_start_thread_cont, ifp);
3117 /* NOTREACHED */
3118 __builtin_unreachable();
3119 }
3120
3121 __attribute__((noreturn))
3122 static void
3123 ifnet_start_thread_cont(void *v, wait_result_t wres)
3124 {
3125 struct ifnet *ifp = v;
3126 struct ifclassq *ifq = &ifp->if_snd;
3127
3128 lck_mtx_lock(&ifp->if_start_lock);
3129 if (__improbable(wres == THREAD_INTERRUPTED ||
3130 ifp->if_start_thread == THREAD_NULL)) {
3131 goto terminate;
3132 }
3133
3134 ifp->if_start_active = 1;
3135
3136 /*
3137 * Keep on servicing until no more request.
3138 */
3139 for (;;) {
3140 u_int32_t req = ifp->if_start_req;
3141 if (!IFCQ_IS_EMPTY(ifq) &&
3142 (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
3143 ifp->if_start_delayed == 0 &&
3144 IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
3145 (ifp->if_eflags & IFEF_DELAY_START)) {
3146 ifp->if_start_delayed = 1;
3147 ifnet_start_delayed++;
3148 break;
3149 } else {
3150 ifp->if_start_delayed = 0;
3151 }
3152 lck_mtx_unlock(&ifp->if_start_lock);
3153
3154 /*
3155 * If no longer attached, don't call start because ifp
3156 * is being destroyed; else hold an IO refcnt to
3157 * prevent the interface from being detached (will be
3158 * released below.)
3159 */
3160 if (!ifnet_datamov_begin(ifp)) {
3161 lck_mtx_lock_spin(&ifp->if_start_lock);
3162 break;
3163 }
3164
3165 /* invoke the driver's start routine */
3166 ((*ifp->if_start)(ifp));
3167
3168 /*
3169 * Release the io ref count taken above.
3170 */
3171 ifnet_datamov_end(ifp);
3172
3173 lck_mtx_lock_spin(&ifp->if_start_lock);
3174
3175 /*
3176 * If there's no pending request or if the
3177 * interface has been disabled, we're done.
3178 */
3179 if (req == ifp->if_start_req ||
3180 (ifp->if_start_flags & IFSF_FLOW_CONTROLLED)) {
3181 break;
3182 }
3183 }
3184
3185 ifp->if_start_req = 0;
3186 ifp->if_start_active = 0;
3187
3188
3189 if (__probable(ifp->if_start_thread != THREAD_NULL)) {
3190 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
3191 struct timespec delay_start_ts;
3192 struct timespec *ts;
3193
3194 /*
3195 * Wakeup N ns from now if rate-controlled by TBR, and if
3196 * there are still packets in the send queue which haven't
3197 * been dequeued so far; else sleep indefinitely (ts = NULL)
3198 * until ifnet_start() is called again.
3199 */
3200 ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
3201 &ifp->if_start_cycle : NULL);
3202
3203 if (ts == NULL && ifp->if_start_delayed == 1) {
3204 delay_start_ts.tv_sec = 0;
3205 delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
3206 ts = &delay_start_ts;
3207 }
3208
3209 if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0) {
3210 ts = NULL;
3211 }
3212
3213 if (__improbable(ts != NULL)) {
3214 clock_interval_to_deadline((ts->tv_nsec +
3215 (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
3216 }
3217
3218 (void) assert_wait_deadline(&ifp->if_start_thread,
3219 THREAD_UNINT, deadline);
3220 lck_mtx_unlock(&ifp->if_start_lock);
3221 (void) thread_block_parameter(ifnet_start_thread_cont, ifp);
3222 /* NOTREACHED */
3223 } else {
3224 terminate:
3225 /* interface is detached? */
3226 ifnet_set_start_cycle(ifp, NULL);
3227 lck_mtx_unlock(&ifp->if_start_lock);
3228 ifnet_purge(ifp);
3229
3230 if (dlil_verbose) {
3231 DLIL_PRINTF("%s: starter thread terminated\n",
3232 if_name(ifp));
3233 }
3234
3235 /* for the extra refcnt from kernel_thread_start() */
3236 thread_deallocate(current_thread());
3237 /* this is the end */
3238 thread_terminate(current_thread());
3239 /* NOTREACHED */
3240 }
3241
3242 /* must never get here */
3243 VERIFY(0);
3244 /* NOTREACHED */
3245 __builtin_unreachable();
3246 }
3247
3248 void
3249 ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
3250 {
3251 if (ts == NULL) {
3252 bzero(&ifp->if_start_cycle, sizeof(ifp->if_start_cycle));
3253 } else {
3254 *(&ifp->if_start_cycle) = *ts;
3255 }
3256
3257 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
3258 DLIL_PRINTF("%s: restart interval set to %lu nsec\n",
3259 if_name(ifp), ts->tv_nsec);
3260 }
3261 }
3262
3263 void
3264 ifnet_poll(struct ifnet *ifp)
3265 {
3266 /*
3267 * If the poller thread is inactive, signal it to do work.
3268 */
3269 lck_mtx_lock_spin(&ifp->if_poll_lock);
3270 ifp->if_poll_req++;
3271 if (!(ifp->if_poll_flags & IF_POLLF_RUNNING) &&
3272 ifp->if_poll_thread != THREAD_NULL) {
3273 wakeup_one((caddr_t)&ifp->if_poll_thread);
3274 }
3275 lck_mtx_unlock(&ifp->if_poll_lock);
3276 }
3277
3278 __attribute__((noreturn))
3279 static void
3280 ifnet_poll_thread_func(void *v, wait_result_t w)
3281 {
3282 #pragma unused(w)
3283 char thread_name[MAXTHREADNAMESIZE];
3284 struct ifnet *ifp = v;
3285
3286 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
3287 VERIFY(current_thread() == ifp->if_poll_thread);
3288
3289 /* construct the name for this thread, and then apply it */
3290 bzero(thread_name, sizeof(thread_name));
3291 (void) snprintf(thread_name, sizeof(thread_name),
3292 "ifnet_poller_%s", ifp->if_xname);
3293 thread_set_thread_name(ifp->if_poll_thread, thread_name);
3294 ifnet_decr_pending_thread_count(ifp);
3295
3296 lck_mtx_lock(&ifp->if_poll_lock);
3297 (void) assert_wait(&ifp->if_poll_thread, THREAD_UNINT);
3298 lck_mtx_unlock(&ifp->if_poll_lock);
3299 (void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
3300 /* NOTREACHED */
3301 __builtin_unreachable();
3302 }
3303
3304 __attribute__((noreturn))
3305 static void
3306 ifnet_poll_thread_cont(void *v, wait_result_t wres)
3307 {
3308 struct dlil_threading_info *inp;
3309 struct ifnet *ifp = v;
3310 struct ifnet_stat_increment_param s;
3311 struct timespec start_time;
3312
3313 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
3314
3315 bzero(&s, sizeof(s));
3316 net_timerclear(&start_time);
3317
3318 lck_mtx_lock_spin(&ifp->if_poll_lock);
3319 if (__improbable(wres == THREAD_INTERRUPTED ||
3320 ifp->if_poll_thread == THREAD_NULL)) {
3321 goto terminate;
3322 }
3323
3324 inp = ifp->if_inp;
3325 VERIFY(inp != NULL);
3326
3327 ifp->if_poll_flags |= IF_POLLF_RUNNING;
3328
3329 /*
3330 * Keep on servicing until no more request.
3331 */
3332 for (;;) {
3333 struct mbuf *m_head, *m_tail;
3334 u_int32_t m_lim, m_cnt, m_totlen;
3335 u_int16_t req = ifp->if_poll_req;
3336
3337 m_lim = (ifp->if_rxpoll_plim != 0) ? ifp->if_rxpoll_plim :
3338 MAX((qlimit(&inp->rcvq_pkts)), (ifp->if_rxpoll_phiwat << 2));
3339 lck_mtx_unlock(&ifp->if_poll_lock);
3340
3341 /*
3342 * If no longer attached, there's nothing to do;
3343 * else hold an IO refcnt to prevent the interface
3344 * from being detached (will be released below.)
3345 */
3346 if (!ifnet_is_attached(ifp, 1)) {
3347 lck_mtx_lock_spin(&ifp->if_poll_lock);
3348 break;
3349 }
3350
3351 if (dlil_verbose > 1) {
3352 DLIL_PRINTF("%s: polling up to %d pkts, "
3353 "pkts avg %d max %d, wreq avg %d, "
3354 "bytes avg %d\n",
3355 if_name(ifp), m_lim,
3356 ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
3357 ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
3358 }
3359
3360 /* invoke the driver's input poll routine */
3361 ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
3362 &m_cnt, &m_totlen));
3363
3364 if (m_head != NULL) {
3365 VERIFY(m_tail != NULL && m_cnt > 0);
3366
3367 if (dlil_verbose > 1) {
3368 DLIL_PRINTF("%s: polled %d pkts, "
3369 "pkts avg %d max %d, wreq avg %d, "
3370 "bytes avg %d\n",
3371 if_name(ifp), m_cnt,
3372 ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
3373 ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
3374 }
3375
3376 /* stats are required for extended variant */
3377 s.packets_in = m_cnt;
3378 s.bytes_in = m_totlen;
3379
3380 (void) ifnet_input_common(ifp, m_head, m_tail,
3381 &s, TRUE, TRUE);
3382 } else {
3383 if (dlil_verbose > 1) {
3384 DLIL_PRINTF("%s: no packets, "
3385 "pkts avg %d max %d, wreq avg %d, "
3386 "bytes avg %d\n",
3387 if_name(ifp), ifp->if_rxpoll_pavg,
3388 ifp->if_rxpoll_pmax, ifp->if_rxpoll_wavg,
3389 ifp->if_rxpoll_bavg);
3390 }
3391
3392 (void) ifnet_input_common(ifp, NULL, NULL,
3393 NULL, FALSE, TRUE);
3394 }
3395
3396 /* Release the io ref count */
3397 ifnet_decr_iorefcnt(ifp);
3398
3399 lck_mtx_lock_spin(&ifp->if_poll_lock);
3400
3401 /* if there's no pending request, we're done */
3402 if (req == ifp->if_poll_req ||
3403 ifp->if_poll_thread == THREAD_NULL) {
3404 break;
3405 }
3406 }
3407
3408 ifp->if_poll_req = 0;
3409 ifp->if_poll_flags &= ~IF_POLLF_RUNNING;
3410
3411 if (ifp->if_poll_thread != THREAD_NULL) {
3412 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
3413 struct timespec *ts;
3414
3415 /*
3416 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
3417 * until ifnet_poll() is called again.
3418 */
3419 ts = &ifp->if_poll_cycle;
3420 if (ts->tv_sec == 0 && ts->tv_nsec == 0) {
3421 ts = NULL;
3422 }
3423
3424 if (ts != NULL) {
3425 clock_interval_to_deadline((ts->tv_nsec +
3426 (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
3427 }
3428
3429 (void) assert_wait_deadline(&ifp->if_poll_thread,
3430 THREAD_UNINT, deadline);
3431 lck_mtx_unlock(&ifp->if_poll_lock);
3432 (void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
3433 /* NOTREACHED */
3434 } else {
3435 terminate:
3436 /* interface is detached (maybe while asleep)? */
3437 ifnet_set_poll_cycle(ifp, NULL);
3438 lck_mtx_unlock(&ifp->if_poll_lock);
3439
3440 if (dlil_verbose) {
3441 DLIL_PRINTF("%s: poller thread terminated\n",
3442 if_name(ifp));
3443 }
3444
3445 /* for the extra refcnt from kernel_thread_start() */
3446 thread_deallocate(current_thread());
3447 /* this is the end */
3448 thread_terminate(current_thread());
3449 /* NOTREACHED */
3450 }
3451
3452 /* must never get here */
3453 VERIFY(0);
3454 /* NOTREACHED */
3455 __builtin_unreachable();
3456 }
3457
3458 void
3459 ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
3460 {
3461 if (ts == NULL) {
3462 bzero(&ifp->if_poll_cycle, sizeof(ifp->if_poll_cycle));
3463 } else {
3464 *(&ifp->if_poll_cycle) = *ts;
3465 }
3466
3467 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
3468 DLIL_PRINTF("%s: poll interval set to %lu nsec\n",
3469 if_name(ifp), ts->tv_nsec);
3470 }
3471 }
3472
3473 void
3474 ifnet_purge(struct ifnet *ifp)
3475 {
3476 if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART)) {
3477 if_qflush(ifp, 0);
3478 }
3479 }
3480
3481 void
3482 ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
3483 {
3484 IFCQ_LOCK_ASSERT_HELD(ifq);
3485
3486 if (!(IFCQ_IS_READY(ifq))) {
3487 return;
3488 }
3489
3490 if (IFCQ_TBR_IS_ENABLED(ifq)) {
3491 struct tb_profile tb = {
3492 .rate = ifq->ifcq_tbr.tbr_rate_raw,
3493 .percent = ifq->ifcq_tbr.tbr_percent, .depth = 0
3494 };
3495 (void) ifclassq_tbr_set(ifq, &tb, FALSE);
3496 }
3497
3498 ifclassq_update(ifq, ev);
3499 }
3500
3501 void
3502 ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
3503 {
3504 switch (ev) {
3505 case CLASSQ_EV_LINK_BANDWIDTH:
3506 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
3507 ifp->if_poll_update++;
3508 }
3509 break;
3510
3511 default:
3512 break;
3513 }
3514 }
3515
3516 errno_t
3517 ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
3518 {
3519 struct ifclassq *ifq;
3520 u_int32_t omodel;
3521 errno_t err;
3522
3523 if (ifp == NULL || model >= IFNET_SCHED_MODEL_MAX) {
3524 return EINVAL;
3525 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3526 return ENXIO;
3527 }
3528
3529 ifq = &ifp->if_snd;
3530 IFCQ_LOCK(ifq);
3531 omodel = ifp->if_output_sched_model;
3532 ifp->if_output_sched_model = model;
3533 if ((err = ifclassq_pktsched_setup(ifq)) != 0) {
3534 ifp->if_output_sched_model = omodel;
3535 }
3536 IFCQ_UNLOCK(ifq);
3537
3538 return err;
3539 }
3540
3541 errno_t
3542 ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3543 {
3544 if (ifp == NULL) {
3545 return EINVAL;
3546 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3547 return ENXIO;
3548 }
3549
3550 ifclassq_set_maxlen(&ifp->if_snd, maxqlen);
3551
3552 return 0;
3553 }
3554
3555 errno_t
3556 ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3557 {
3558 if (ifp == NULL || maxqlen == NULL) {
3559 return EINVAL;
3560 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3561 return ENXIO;
3562 }
3563
3564 *maxqlen = ifclassq_get_maxlen(&ifp->if_snd);
3565
3566 return 0;
3567 }
3568
3569 errno_t
3570 ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
3571 {
3572 errno_t err;
3573
3574 if (ifp == NULL || pkts == NULL) {
3575 err = EINVAL;
3576 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3577 err = ENXIO;
3578 } else {
3579 err = ifclassq_get_len(&ifp->if_snd, MBUF_SC_UNSPEC,
3580 pkts, NULL);
3581 }
3582
3583 return err;
3584 }
3585
3586 errno_t
3587 ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
3588 u_int32_t *pkts, u_int32_t *bytes)
3589 {
3590 errno_t err;
3591
3592 if (ifp == NULL || !MBUF_VALID_SC(sc) ||
3593 (pkts == NULL && bytes == NULL)) {
3594 err = EINVAL;
3595 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3596 err = ENXIO;
3597 } else {
3598 err = ifclassq_get_len(&ifp->if_snd, sc, pkts, bytes);
3599 }
3600
3601 return err;
3602 }
3603
3604 errno_t
3605 ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3606 {
3607 struct dlil_threading_info *inp;
3608
3609 if (ifp == NULL) {
3610 return EINVAL;
3611 } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
3612 return ENXIO;
3613 }
3614
3615 if (maxqlen == 0) {
3616 maxqlen = if_rcvq_maxlen;
3617 } else if (maxqlen < IF_RCVQ_MINLEN) {
3618 maxqlen = IF_RCVQ_MINLEN;
3619 }
3620
3621 inp = ifp->if_inp;
3622 lck_mtx_lock(&inp->input_lck);
3623 qlimit(&inp->rcvq_pkts) = maxqlen;
3624 lck_mtx_unlock(&inp->input_lck);
3625
3626 return 0;
3627 }
3628
3629 errno_t
3630 ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3631 {
3632 struct dlil_threading_info *inp;
3633
3634 if (ifp == NULL || maxqlen == NULL) {
3635 return EINVAL;
3636 } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
3637 return ENXIO;
3638 }
3639
3640 inp = ifp->if_inp;
3641 lck_mtx_lock(&inp->input_lck);
3642 *maxqlen = qlimit(&inp->rcvq_pkts);
3643 lck_mtx_unlock(&inp->input_lck);
3644 return 0;
3645 }
3646
3647 void
3648 ifnet_enqueue_multi_setup(struct ifnet *ifp, uint16_t delay_qlen,
3649 uint16_t delay_timeout)
3650 {
3651 if (delay_qlen > 0 && delay_timeout > 0) {
3652 ifp->if_eflags |= IFEF_ENQUEUE_MULTI;
3653 ifp->if_start_delay_qlen = min(100, delay_qlen);
3654 ifp->if_start_delay_timeout = min(20000, delay_timeout);
3655 /* convert timeout to nanoseconds */
3656 ifp->if_start_delay_timeout *= 1000;
3657 kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
3658 ifp->if_xname, (uint32_t)delay_qlen,
3659 (uint32_t)delay_timeout);
3660 } else {
3661 ifp->if_eflags &= ~IFEF_ENQUEUE_MULTI;
3662 }
3663 }
3664
3665 /*
3666 * This function clears the DSCP bits in the IPV4/V6 header pointed to by buf.
3667 * While it's ok for buf to be not 32 bit aligned, the caller must ensure that
3668 * buf holds the full header.
3669 */
3670 static __attribute__((noinline)) void
3671 ifnet_mcast_clear_dscp(uint8_t *buf, uint8_t ip_ver)
3672 {
3673 struct ip *ip;
3674 struct ip6_hdr *ip6;
3675 uint8_t lbuf[64] __attribute__((aligned(8)));
3676 uint8_t *p = buf;
3677
3678 if (ip_ver == IPVERSION) {
3679 uint8_t old_tos;
3680 uint32_t sum;
3681
3682 if (__improbable(!IP_HDR_ALIGNED_P(p))) {
3683 DTRACE_IP1(not__aligned__v4, uint8_t *, buf);
3684 bcopy(buf, lbuf, sizeof(struct ip));
3685 p = lbuf;
3686 }
3687 ip = (struct ip *)(void *)p;
3688 if (__probable((ip->ip_tos & ~IPTOS_ECN_MASK) == 0)) {
3689 return;
3690 }
3691
3692 DTRACE_IP1(clear__v4, struct ip *, ip);
3693 old_tos = ip->ip_tos;
3694 ip->ip_tos &= IPTOS_ECN_MASK;
3695 sum = ip->ip_sum + htons(old_tos) - htons(ip->ip_tos);
3696 sum = (sum >> 16) + (sum & 0xffff);
3697 ip->ip_sum = (uint16_t)(sum & 0xffff);
3698
3699 if (__improbable(p == lbuf)) {
3700 bcopy(lbuf, buf, sizeof(struct ip));
3701 }
3702 } else {
3703 uint32_t flow;
3704 ASSERT(ip_ver == IPV6_VERSION);
3705
3706 if (__improbable(!IP_HDR_ALIGNED_P(p))) {
3707 DTRACE_IP1(not__aligned__v6, uint8_t *, buf);
3708 bcopy(buf, lbuf, sizeof(struct ip6_hdr));
3709 p = lbuf;
3710 }
3711 ip6 = (struct ip6_hdr *)(void *)p;
3712 flow = ntohl(ip6->ip6_flow);
3713 if (__probable((flow & IP6FLOW_DSCP_MASK) == 0)) {
3714 return;
3715 }
3716
3717 DTRACE_IP1(clear__v6, struct ip6_hdr *, ip6);
3718 ip6->ip6_flow = htonl(flow & ~IP6FLOW_DSCP_MASK);
3719
3720 if (__improbable(p == lbuf)) {
3721 bcopy(lbuf, buf, sizeof(struct ip6_hdr));
3722 }
3723 }
3724 }
3725
3726 static inline errno_t
3727 ifnet_enqueue_ifclassq(struct ifnet *ifp, classq_pkt_t *p, boolean_t flush,
3728 boolean_t *pdrop)
3729 {
3730 volatile uint64_t *fg_ts = NULL;
3731 volatile uint64_t *rt_ts = NULL;
3732 struct timespec now;
3733 u_int64_t now_nsec = 0;
3734 int error = 0;
3735 uint8_t *mcast_buf = NULL;
3736 uint8_t ip_ver;
3737
3738 ASSERT(ifp->if_eflags & IFEF_TXSTART);
3739
3740 /*
3741 * If packet already carries a timestamp, either from dlil_output()
3742 * or from flowswitch, use it here. Otherwise, record timestamp.
3743 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
3744 * the timestamp value is used internally there.
3745 */
3746 switch (p->cp_ptype) {
3747 case QP_MBUF:
3748 ASSERT(p->cp_mbuf->m_flags & M_PKTHDR);
3749 ASSERT(p->cp_mbuf->m_nextpkt == NULL);
3750
3751 if (!(p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_TS_VALID) ||
3752 p->cp_mbuf->m_pkthdr.pkt_timestamp == 0) {
3753 nanouptime(&now);
3754 net_timernsec(&now, &now_nsec);
3755 p->cp_mbuf->m_pkthdr.pkt_timestamp = now_nsec;
3756 }
3757 p->cp_mbuf->m_pkthdr.pkt_flags &= ~PKTF_TS_VALID;
3758 /*
3759 * If the packet service class is not background,
3760 * update the timestamp to indicate recent activity
3761 * on a foreground socket.
3762 */
3763 if ((p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
3764 p->cp_mbuf->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3765 if (!(p->cp_mbuf->m_pkthdr.pkt_flags &
3766 PKTF_SO_BACKGROUND)) {
3767 ifp->if_fg_sendts = _net_uptime;
3768 if (fg_ts != NULL) {
3769 *fg_ts = _net_uptime;
3770 }
3771 }
3772 if (p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_SO_REALTIME) {
3773 ifp->if_rt_sendts = _net_uptime;
3774 if (rt_ts != NULL) {
3775 *rt_ts = _net_uptime;
3776 }
3777 }
3778 }
3779
3780 /*
3781 * Some Wi-Fi AP implementations do not correctly handle
3782 * multicast IP packets with DSCP bits set (radr://9331522).
3783 * As a workaround we clear the DSCP bits and set the service
3784 * class to BE.
3785 */
3786 if ((p->cp_mbuf->m_flags & M_MCAST) != 0 &&
3787 IFNET_IS_WIFI_INFRA(ifp)) {
3788 size_t len = mbuf_len(p->cp_mbuf), hlen;
3789 struct ether_header *eh;
3790 boolean_t pullup = FALSE;
3791 uint16_t etype;
3792
3793 if (__improbable(len < sizeof(struct ether_header))) {
3794 DTRACE_IP1(small__ether, size_t, len);
3795 if ((p->cp_mbuf = m_pullup(p->cp_mbuf,
3796 sizeof(struct ether_header))) == NULL) {
3797 return ENOMEM;
3798 }
3799 }
3800 eh = (struct ether_header *)mbuf_data(p->cp_mbuf);
3801 etype = ntohs(eh->ether_type);
3802 if (etype == ETHERTYPE_IP) {
3803 hlen = sizeof(struct ether_header) +
3804 sizeof(struct ip);
3805 if (len < hlen) {
3806 DTRACE_IP1(small__v4, size_t, len);
3807 pullup = TRUE;
3808 }
3809 ip_ver = IPVERSION;
3810 } else if (etype == ETHERTYPE_IPV6) {
3811 hlen = sizeof(struct ether_header) +
3812 sizeof(struct ip6_hdr);
3813 if (len < hlen) {
3814 DTRACE_IP1(small__v6, size_t, len);
3815 pullup = TRUE;
3816 }
3817 ip_ver = IPV6_VERSION;
3818 } else {
3819 DTRACE_IP1(invalid__etype, uint16_t, etype);
3820 break;
3821 }
3822 if (pullup) {
3823 if ((p->cp_mbuf = m_pullup(p->cp_mbuf, hlen)) ==
3824 NULL) {
3825 return ENOMEM;
3826 }
3827
3828 eh = (struct ether_header *)mbuf_data(
3829 p->cp_mbuf);
3830 }
3831 mbuf_set_service_class(p->cp_mbuf, MBUF_SC_BE);
3832 mcast_buf = (uint8_t *)(eh + 1);
3833 /*
3834 * ifnet_mcast_clear_dscp() will finish the work below.
3835 * Note that the pullups above ensure that mcast_buf
3836 * points to a full IP header.
3837 */
3838 }
3839 break;
3840
3841
3842 default:
3843 VERIFY(0);
3844 /* NOTREACHED */
3845 __builtin_unreachable();
3846 }
3847
3848 if (mcast_buf != NULL) {
3849 ifnet_mcast_clear_dscp(mcast_buf, ip_ver);
3850 }
3851
3852 if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
3853 if (now_nsec == 0) {
3854 nanouptime(&now);
3855 net_timernsec(&now, &now_nsec);
3856 }
3857 /*
3858 * If the driver chose to delay start callback for
3859 * coalescing multiple packets, Then use the following
3860 * heuristics to make sure that start callback will
3861 * be delayed only when bulk data transfer is detected.
3862 * 1. number of packets enqueued in (delay_win * 2) is
3863 * greater than or equal to the delay qlen.
3864 * 2. If delay_start is enabled it will stay enabled for
3865 * another 10 idle windows. This is to take into account
3866 * variable RTT and burst traffic.
3867 * 3. If the time elapsed since last enqueue is more
3868 * than 200ms we disable delaying start callback. This is
3869 * is to take idle time into account.
3870 */
3871 u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
3872 if (ifp->if_start_delay_swin > 0) {
3873 if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
3874 ifp->if_start_delay_cnt++;
3875 } else if ((now_nsec - ifp->if_start_delay_swin)
3876 >= (200 * 1000 * 1000)) {
3877 ifp->if_start_delay_swin = now_nsec;
3878 ifp->if_start_delay_cnt = 1;
3879 ifp->if_start_delay_idle = 0;
3880 if (ifp->if_eflags & IFEF_DELAY_START) {
3881 ifp->if_eflags &=
3882 ~(IFEF_DELAY_START);
3883 ifnet_delay_start_disabled++;
3884 }
3885 } else {
3886 if (ifp->if_start_delay_cnt >=
3887 ifp->if_start_delay_qlen) {
3888 ifp->if_eflags |= IFEF_DELAY_START;
3889 ifp->if_start_delay_idle = 0;
3890 } else {
3891 if (ifp->if_start_delay_idle >= 10) {
3892 ifp->if_eflags &=
3893 ~(IFEF_DELAY_START);
3894 ifnet_delay_start_disabled++;
3895 } else {
3896 ifp->if_start_delay_idle++;
3897 }
3898 }
3899 ifp->if_start_delay_swin = now_nsec;
3900 ifp->if_start_delay_cnt = 1;
3901 }
3902 } else {
3903 ifp->if_start_delay_swin = now_nsec;
3904 ifp->if_start_delay_cnt = 1;
3905 ifp->if_start_delay_idle = 0;
3906 ifp->if_eflags &= ~(IFEF_DELAY_START);
3907 }
3908 } else {
3909 ifp->if_eflags &= ~(IFEF_DELAY_START);
3910 }
3911
3912 /* enqueue the packet (caller consumes object) */
3913 error = ifclassq_enqueue(&ifp->if_snd, p, pdrop);
3914
3915 /*
3916 * Tell the driver to start dequeueing; do this even when the queue
3917 * for the packet is suspended (EQSUSPENDED), as the driver could still
3918 * be dequeueing from other unsuspended queues.
3919 */
3920 if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
3921 ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED)) {
3922 ifnet_start(ifp);
3923 }
3924
3925 return error;
3926 }
3927
3928 int
3929 ifnet_enqueue_netem(void *handle, pktsched_pkt_t *pkts, uint32_t n_pkts)
3930 {
3931 struct ifnet *ifp = handle;
3932 boolean_t pdrop; /* dummy */
3933 uint32_t i;
3934
3935 ASSERT(n_pkts >= 1);
3936 for (i = 0; i < n_pkts - 1; i++) {
3937 (void) ifnet_enqueue_ifclassq(ifp, &pkts[i].pktsched_pkt,
3938 FALSE, &pdrop);
3939 }
3940 /* flush with the last packet */
3941 (void) ifnet_enqueue_ifclassq(ifp, &pkts[i].pktsched_pkt, TRUE, &pdrop);
3942
3943 return 0;
3944 }
3945
3946 static inline errno_t
3947 ifnet_enqueue_common(struct ifnet *ifp, classq_pkt_t *pkt, boolean_t flush,
3948 boolean_t *pdrop)
3949 {
3950 if (ifp->if_output_netem != NULL) {
3951 return netem_enqueue(ifp->if_output_netem, pkt, pdrop);
3952 } else {
3953 return ifnet_enqueue_ifclassq(ifp, pkt, flush, pdrop);
3954 }
3955 }
3956
3957 errno_t
3958 ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
3959 {
3960 boolean_t pdrop;
3961 return ifnet_enqueue_mbuf(ifp, m, TRUE, &pdrop);
3962 }
3963
3964 errno_t
3965 ifnet_enqueue_mbuf(struct ifnet *ifp, struct mbuf *m, boolean_t flush,
3966 boolean_t *pdrop)
3967 {
3968 classq_pkt_t pkt;
3969
3970 if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
3971 m->m_nextpkt != NULL) {
3972 if (m != NULL) {
3973 m_freem_list(m);
3974 *pdrop = TRUE;
3975 }
3976 return EINVAL;
3977 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3978 !IF_FULLY_ATTACHED(ifp)) {
3979 /* flag tested without lock for performance */
3980 m_freem(m);
3981 *pdrop = TRUE;
3982 return ENXIO;
3983 } else if (!(ifp->if_flags & IFF_UP)) {
3984 m_freem(m);
3985 *pdrop = TRUE;
3986 return ENETDOWN;
3987 }
3988
3989 CLASSQ_PKT_INIT_MBUF(&pkt, m);
3990 return ifnet_enqueue_common(ifp, &pkt, flush, pdrop);
3991 }
3992
3993
3994 errno_t
3995 ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
3996 {
3997 errno_t rc;
3998 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
3999
4000 if (ifp == NULL || mp == NULL) {
4001 return EINVAL;
4002 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4003 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4004 return ENXIO;
4005 }
4006 if (!ifnet_is_attached(ifp, 1)) {
4007 return ENXIO;
4008 }
4009
4010 rc = ifclassq_dequeue(&ifp->if_snd, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
4011 &pkt, NULL, NULL, NULL);
4012 VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
4013 ifnet_decr_iorefcnt(ifp);
4014 *mp = pkt.cp_mbuf;
4015 return rc;
4016 }
4017
4018 errno_t
4019 ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
4020 struct mbuf **mp)
4021 {
4022 errno_t rc;
4023 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
4024
4025 if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc)) {
4026 return EINVAL;
4027 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4028 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4029 return ENXIO;
4030 }
4031 if (!ifnet_is_attached(ifp, 1)) {
4032 return ENXIO;
4033 }
4034
4035 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, 1,
4036 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt, NULL, NULL, NULL);
4037 VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
4038 ifnet_decr_iorefcnt(ifp);
4039 *mp = pkt.cp_mbuf;
4040 return rc;
4041 }
4042
4043 errno_t
4044 ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t pkt_limit,
4045 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
4046 {
4047 errno_t rc;
4048 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
4049 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
4050
4051 if (ifp == NULL || head == NULL || pkt_limit < 1) {
4052 return EINVAL;
4053 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4054 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4055 return ENXIO;
4056 }
4057 if (!ifnet_is_attached(ifp, 1)) {
4058 return ENXIO;
4059 }
4060
4061 rc = ifclassq_dequeue(&ifp->if_snd, pkt_limit,
4062 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail, cnt, len);
4063 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
4064 ifnet_decr_iorefcnt(ifp);
4065 *head = pkt_head.cp_mbuf;
4066 if (tail != NULL) {
4067 *tail = pkt_tail.cp_mbuf;
4068 }
4069 return rc;
4070 }
4071
4072 errno_t
4073 ifnet_dequeue_multi_bytes(struct ifnet *ifp, u_int32_t byte_limit,
4074 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
4075 {
4076 errno_t rc;
4077 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
4078 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
4079
4080 if (ifp == NULL || head == NULL || byte_limit < 1) {
4081 return EINVAL;
4082 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4083 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4084 return ENXIO;
4085 }
4086 if (!ifnet_is_attached(ifp, 1)) {
4087 return ENXIO;
4088 }
4089
4090 rc = ifclassq_dequeue(&ifp->if_snd, CLASSQ_DEQUEUE_MAX_PKT_LIMIT,
4091 byte_limit, &pkt_head, &pkt_tail, cnt, len);
4092 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
4093 ifnet_decr_iorefcnt(ifp);
4094 *head = pkt_head.cp_mbuf;
4095 if (tail != NULL) {
4096 *tail = pkt_tail.cp_mbuf;
4097 }
4098 return rc;
4099 }
4100
4101 errno_t
4102 ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
4103 u_int32_t pkt_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
4104 u_int32_t *len)
4105 {
4106 errno_t rc;
4107 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
4108 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
4109
4110 if (ifp == NULL || head == NULL || pkt_limit < 1 ||
4111 !MBUF_VALID_SC(sc)) {
4112 return EINVAL;
4113 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
4114 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
4115 return ENXIO;
4116 }
4117 if (!ifnet_is_attached(ifp, 1)) {
4118 return ENXIO;
4119 }
4120
4121 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, pkt_limit,
4122 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail,
4123 cnt, len);
4124 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
4125 ifnet_decr_iorefcnt(ifp);
4126 *head = pkt_head.cp_mbuf;
4127 if (tail != NULL) {
4128 *tail = pkt_tail.cp_mbuf;
4129 }
4130 return rc;
4131 }
4132
4133 #if !CONFIG_EMBEDDED
4134 errno_t
4135 ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
4136 const struct sockaddr *dest, const char *dest_linkaddr,
4137 const char *frame_type, u_int32_t *pre, u_int32_t *post)
4138 {
4139 if (pre != NULL) {
4140 *pre = 0;
4141 }
4142 if (post != NULL) {
4143 *post = 0;
4144 }
4145
4146 return ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type);
4147 }
4148 #endif /* !CONFIG_EMBEDDED */
4149
4150 static boolean_t
4151 packet_has_vlan_tag(struct mbuf * m)
4152 {
4153 u_int tag = 0;
4154
4155 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) != 0) {
4156 tag = EVL_VLANOFTAG(m->m_pkthdr.vlan_tag);
4157 if (tag == 0) {
4158 /* the packet is just priority-tagged, clear the bit */
4159 m->m_pkthdr.csum_flags &= ~CSUM_VLAN_TAG_VALID;
4160 }
4161 }
4162 return tag != 0;
4163 }
4164
4165 static int
4166 dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
4167 char **frame_header_p, protocol_family_t protocol_family)
4168 {
4169 boolean_t is_vlan_packet = FALSE;
4170 struct ifnet_filter *filter;
4171 struct mbuf *m = *m_p;
4172
4173 is_vlan_packet = packet_has_vlan_tag(m);
4174
4175 /*
4176 * Pass the inbound packet to the interface filters
4177 */
4178 lck_mtx_lock_spin(&ifp->if_flt_lock);
4179 /* prevent filter list from changing in case we drop the lock */
4180 if_flt_monitor_busy(ifp);
4181 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4182 int result;
4183
4184 /* exclude VLAN packets from external filters PR-3586856 */
4185 if (is_vlan_packet &&
4186 (filter->filt_flags & DLIL_IFF_INTERNAL) == 0) {
4187 continue;
4188 }
4189
4190 if (!filter->filt_skip && filter->filt_input != NULL &&
4191 (filter->filt_protocol == 0 ||
4192 filter->filt_protocol == protocol_family)) {
4193 lck_mtx_unlock(&ifp->if_flt_lock);
4194
4195 result = (*filter->filt_input)(filter->filt_cookie,
4196 ifp, protocol_family, m_p, frame_header_p);
4197
4198 lck_mtx_lock_spin(&ifp->if_flt_lock);
4199 if (result != 0) {
4200 /* we're done with the filter list */
4201 if_flt_monitor_unbusy(ifp);
4202 lck_mtx_unlock(&ifp->if_flt_lock);
4203 return result;
4204 }
4205 }
4206 }
4207 /* we're done with the filter list */
4208 if_flt_monitor_unbusy(ifp);
4209 lck_mtx_unlock(&ifp->if_flt_lock);
4210
4211 /*
4212 * Strip away M_PROTO1 bit prior to sending packet up the stack as
4213 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
4214 */
4215 if (*m_p != NULL) {
4216 (*m_p)->m_flags &= ~M_PROTO1;
4217 }
4218
4219 return 0;
4220 }
4221
4222 static int
4223 dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p,
4224 protocol_family_t protocol_family)
4225 {
4226 boolean_t is_vlan_packet;
4227 struct ifnet_filter *filter;
4228 struct mbuf *m = *m_p;
4229
4230 is_vlan_packet = packet_has_vlan_tag(m);
4231
4232 /*
4233 * Pass the outbound packet to the interface filters
4234 */
4235 lck_mtx_lock_spin(&ifp->if_flt_lock);
4236 /* prevent filter list from changing in case we drop the lock */
4237 if_flt_monitor_busy(ifp);
4238 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4239 int result;
4240
4241 /* exclude VLAN packets from external filters PR-3586856 */
4242 if (is_vlan_packet &&
4243 (filter->filt_flags & DLIL_IFF_INTERNAL) == 0) {
4244 continue;
4245 }
4246
4247 if (!filter->filt_skip && filter->filt_output != NULL &&
4248 (filter->filt_protocol == 0 ||
4249 filter->filt_protocol == protocol_family)) {
4250 lck_mtx_unlock(&ifp->if_flt_lock);
4251
4252 result = filter->filt_output(filter->filt_cookie, ifp,
4253 protocol_family, m_p);
4254
4255 lck_mtx_lock_spin(&ifp->if_flt_lock);
4256 if (result != 0) {
4257 /* we're done with the filter list */
4258 if_flt_monitor_unbusy(ifp);
4259 lck_mtx_unlock(&ifp->if_flt_lock);
4260 return result;
4261 }
4262 }
4263 }
4264 /* we're done with the filter list */
4265 if_flt_monitor_unbusy(ifp);
4266 lck_mtx_unlock(&ifp->if_flt_lock);
4267
4268 return 0;
4269 }
4270
4271 static void
4272 dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m)
4273 {
4274 int error;
4275
4276 if (ifproto->proto_kpi == kProtoKPI_v1) {
4277 /* Version 1 protocols get one packet at a time */
4278 while (m != NULL) {
4279 char * frame_header;
4280 mbuf_t next_packet;
4281
4282 next_packet = m->m_nextpkt;
4283 m->m_nextpkt = NULL;
4284 frame_header = m->m_pkthdr.pkt_hdr;
4285 m->m_pkthdr.pkt_hdr = NULL;
4286 error = (*ifproto->kpi.v1.input)(ifproto->ifp,
4287 ifproto->protocol_family, m, frame_header);
4288 if (error != 0 && error != EJUSTRETURN) {
4289 m_freem(m);
4290 }
4291 m = next_packet;
4292 }
4293 } else if (ifproto->proto_kpi == kProtoKPI_v2) {
4294 /* Version 2 protocols support packet lists */
4295 error = (*ifproto->kpi.v2.input)(ifproto->ifp,
4296 ifproto->protocol_family, m);
4297 if (error != 0 && error != EJUSTRETURN) {
4298 m_freem_list(m);
4299 }
4300 }
4301 }
4302
4303 static void
4304 dlil_input_stats_add(const struct ifnet_stat_increment_param *s,
4305 struct dlil_threading_info *inp, struct ifnet *ifp, boolean_t poll)
4306 {
4307 struct ifnet_stat_increment_param *d = &inp->stats;
4308
4309 if (s->packets_in != 0) {
4310 d->packets_in += s->packets_in;
4311 }
4312 if (s->bytes_in != 0) {
4313 d->bytes_in += s->bytes_in;
4314 }
4315 if (s->errors_in != 0) {
4316 d->errors_in += s->errors_in;
4317 }
4318
4319 if (s->packets_out != 0) {
4320 d->packets_out += s->packets_out;
4321 }
4322 if (s->bytes_out != 0) {
4323 d->bytes_out += s->bytes_out;
4324 }
4325 if (s->errors_out != 0) {
4326 d->errors_out += s->errors_out;
4327 }
4328
4329 if (s->collisions != 0) {
4330 d->collisions += s->collisions;
4331 }
4332 if (s->dropped != 0) {
4333 d->dropped += s->dropped;
4334 }
4335
4336 if (poll) {
4337 PKTCNTR_ADD(&ifp->if_poll_tstats, s->packets_in, s->bytes_in);
4338 }
4339 }
4340
4341 static boolean_t
4342 dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp)
4343 {
4344 struct ifnet_stat_increment_param *s = &inp->stats;
4345
4346 /*
4347 * Use of atomic operations is unavoidable here because
4348 * these stats may also be incremented elsewhere via KPIs.
4349 */
4350 if (s->packets_in != 0) {
4351 atomic_add_64(&ifp->if_data.ifi_ipackets, s->packets_in);
4352 s->packets_in = 0;
4353 }
4354 if (s->bytes_in != 0) {
4355 atomic_add_64(&ifp->if_data.ifi_ibytes, s->bytes_in);
4356 s->bytes_in = 0;
4357 }
4358 if (s->errors_in != 0) {
4359 atomic_add_64(&ifp->if_data.ifi_ierrors, s->errors_in);
4360 s->errors_in = 0;
4361 }
4362
4363 if (s->packets_out != 0) {
4364 atomic_add_64(&ifp->if_data.ifi_opackets, s->packets_out);
4365 s->packets_out = 0;
4366 }
4367 if (s->bytes_out != 0) {
4368 atomic_add_64(&ifp->if_data.ifi_obytes, s->bytes_out);
4369 s->bytes_out = 0;
4370 }
4371 if (s->errors_out != 0) {
4372 atomic_add_64(&ifp->if_data.ifi_oerrors, s->errors_out);
4373 s->errors_out = 0;
4374 }
4375
4376 if (s->collisions != 0) {
4377 atomic_add_64(&ifp->if_data.ifi_collisions, s->collisions);
4378 s->collisions = 0;
4379 }
4380 if (s->dropped != 0) {
4381 atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped);
4382 s->dropped = 0;
4383 }
4384
4385 /*
4386 * No need for atomic operations as they are modified here
4387 * only from within the DLIL input thread context.
4388 */
4389 if (ifp->if_poll_tstats.packets != 0) {
4390 ifp->if_poll_pstats.ifi_poll_packets += ifp->if_poll_tstats.packets;
4391 ifp->if_poll_tstats.packets = 0;
4392 }
4393 if (ifp->if_poll_tstats.bytes != 0) {
4394 ifp->if_poll_pstats.ifi_poll_bytes += ifp->if_poll_tstats.bytes;
4395 ifp->if_poll_tstats.bytes = 0;
4396 }
4397
4398 return ifp->if_data_threshold != 0;
4399 }
4400
4401 __private_extern__ void
4402 dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m)
4403 {
4404 return dlil_input_packet_list_common(ifp, m, 0,
4405 IFNET_MODEL_INPUT_POLL_OFF, FALSE);
4406 }
4407
4408 __private_extern__ void
4409 dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m,
4410 u_int32_t cnt, ifnet_model_t mode)
4411 {
4412 return dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE);
4413 }
4414
4415 static void
4416 dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m,
4417 u_int32_t cnt, ifnet_model_t mode, boolean_t ext)
4418 {
4419 int error = 0;
4420 protocol_family_t protocol_family;
4421 mbuf_t next_packet;
4422 ifnet_t ifp = ifp_param;
4423 char *frame_header = NULL;
4424 struct if_proto *last_ifproto = NULL;
4425 mbuf_t pkt_first = NULL;
4426 mbuf_t *pkt_next = NULL;
4427 u_int32_t poll_thresh = 0, poll_ival = 0;
4428
4429 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
4430
4431 if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 &&
4432 (poll_ival = if_rxpoll_interval_pkts) > 0) {
4433 poll_thresh = cnt;
4434 }
4435
4436 while (m != NULL) {
4437 struct if_proto *ifproto = NULL;
4438 int iorefcnt = 0;
4439 uint32_t pktf_mask; /* pkt flags to preserve */
4440
4441 if (ifp_param == NULL) {
4442 ifp = m->m_pkthdr.rcvif;
4443 }
4444
4445 if ((ifp->if_eflags & IFEF_RXPOLL) &&
4446 (ifp->if_xflags & IFXF_LEGACY) && poll_thresh != 0 &&
4447 poll_ival > 0 && (--poll_thresh % poll_ival) == 0) {
4448 ifnet_poll(ifp);
4449 }
4450
4451 /* Check if this mbuf looks valid */
4452 MBUF_INPUT_CHECK(m, ifp);
4453
4454 next_packet = m->m_nextpkt;
4455 m->m_nextpkt = NULL;
4456 frame_header = m->m_pkthdr.pkt_hdr;
4457 m->m_pkthdr.pkt_hdr = NULL;
4458
4459 /*
4460 * Get an IO reference count if the interface is not
4461 * loopback (lo0) and it is attached; lo0 never goes
4462 * away, so optimize for that.
4463 */
4464 if (ifp != lo_ifp) {
4465 if (!ifnet_datamov_begin(ifp)) {
4466 m_freem(m);
4467 goto next;
4468 }
4469 iorefcnt = 1;
4470 /*
4471 * Preserve the time stamp if it was set.
4472 */
4473 pktf_mask = PKTF_TS_VALID;
4474 } else {
4475 /*
4476 * If this arrived on lo0, preserve interface addr
4477 * info to allow for connectivity between loopback
4478 * and local interface addresses.
4479 */
4480 pktf_mask = (PKTF_LOOP | PKTF_IFAINFO);
4481 }
4482
4483 /* make sure packet comes in clean */
4484 m_classifier_init(m, pktf_mask);
4485
4486 ifp_inc_traffic_class_in(ifp, m);
4487
4488 /* find which protocol family this packet is for */
4489 ifnet_lock_shared(ifp);
4490 error = (*ifp->if_demux)(ifp, m, frame_header,
4491 &protocol_family);
4492 ifnet_lock_done(ifp);
4493 if (error != 0) {
4494 if (error == EJUSTRETURN) {
4495 goto next;
4496 }
4497 protocol_family = 0;
4498 }
4499
4500 pktap_input(ifp, protocol_family, m, frame_header);
4501
4502 /* Drop v4 packets received on CLAT46 enabled interface */
4503 if (protocol_family == PF_INET && IS_INTF_CLAT46(ifp)) {
4504 m_freem(m);
4505 ip6stat.ip6s_clat464_in_v4_drop++;
4506 goto next;
4507 }
4508
4509 /* Translate the packet if it is received on CLAT interface */
4510 if (protocol_family == PF_INET6 && IS_INTF_CLAT46(ifp)
4511 && dlil_is_clat_needed(protocol_family, m)) {
4512 char *data = NULL;
4513 struct ether_header eh;
4514 struct ether_header *ehp = NULL;
4515
4516 if (ifp->if_type == IFT_ETHER) {
4517 ehp = (struct ether_header *)(void *)frame_header;
4518 /* Skip RX Ethernet packets if they are not IPV6 */
4519 if (ntohs(ehp->ether_type) != ETHERTYPE_IPV6) {
4520 goto skip_clat;
4521 }
4522
4523 /* Keep a copy of frame_header for Ethernet packets */
4524 bcopy(frame_header, (caddr_t)&eh, ETHER_HDR_LEN);
4525 }
4526 error = dlil_clat64(ifp, &protocol_family, &m);
4527 data = (char *) mbuf_data(m);
4528 if (error != 0) {
4529 m_freem(m);
4530 ip6stat.ip6s_clat464_in_drop++;
4531 goto next;
4532 }
4533 /* Native v6 should be No-op */
4534 if (protocol_family != PF_INET) {
4535 goto skip_clat;
4536 }
4537
4538 /* Do this only for translated v4 packets. */
4539 switch (ifp->if_type) {
4540 case IFT_CELLULAR:
4541 frame_header = data;
4542 break;
4543 case IFT_ETHER:
4544 /*
4545 * Drop if the mbuf doesn't have enough
4546 * space for Ethernet header
4547 */
4548 if (M_LEADINGSPACE(m) < ETHER_HDR_LEN) {
4549 m_free(m);
4550 ip6stat.ip6s_clat464_in_drop++;
4551 goto next;
4552 }
4553 /*
4554 * Set the frame_header ETHER_HDR_LEN bytes
4555 * preceeding the data pointer. Change
4556 * the ether_type too.
4557 */
4558 frame_header = data - ETHER_HDR_LEN;
4559 eh.ether_type = htons(ETHERTYPE_IP);
4560 bcopy((caddr_t)&eh, frame_header, ETHER_HDR_LEN);
4561 break;
4562 }
4563 }
4564 skip_clat:
4565 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK) &&
4566 !(m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
4567 dlil_input_cksum_dbg(ifp, m, frame_header,
4568 protocol_family);
4569 }
4570 /*
4571 * For partial checksum offload, we expect the driver to
4572 * set the start offset indicating the start of the span
4573 * that is covered by the hardware-computed checksum;
4574 * adjust this start offset accordingly because the data
4575 * pointer has been advanced beyond the link-layer header.
4576 *
4577 * Virtual lan types (bridge, vlan, bond) can call
4578 * dlil_input_packet_list() with the same packet with the
4579 * checksum flags set. Set a flag indicating that the
4580 * adjustment has already been done.
4581 */
4582 if ((m->m_pkthdr.csum_flags & CSUM_ADJUST_DONE) != 0) {
4583 /* adjustment has already been done */
4584 } else if ((m->m_pkthdr.csum_flags &
4585 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
4586 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
4587 int adj;
4588 if (frame_header == NULL ||
4589 frame_header < (char *)mbuf_datastart(m) ||
4590 frame_header > (char *)m->m_data ||
4591 (adj = (m->m_data - frame_header)) >
4592 m->m_pkthdr.csum_rx_start) {
4593 m->m_pkthdr.csum_data = 0;
4594 m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
4595 hwcksum_in_invalidated++;
4596 } else {
4597 m->m_pkthdr.csum_rx_start -= adj;
4598 }
4599 /* make sure we don't adjust more than once */
4600 m->m_pkthdr.csum_flags |= CSUM_ADJUST_DONE;
4601 }
4602 if (clat_debug) {
4603 pktap_input(ifp, protocol_family, m, frame_header);
4604 }
4605
4606 if (m->m_flags & (M_BCAST | M_MCAST)) {
4607 atomic_add_64(&ifp->if_imcasts, 1);
4608 }
4609
4610 /* run interface filters */
4611 error = dlil_interface_filters_input(ifp, &m,
4612 &frame_header, protocol_family);
4613 if (error != 0) {
4614 if (error != EJUSTRETURN) {
4615 m_freem(m);
4616 }
4617 goto next;
4618 }
4619 if ((m->m_flags & M_PROMISC) != 0) {
4620 m_freem(m);
4621 goto next;
4622 }
4623
4624 /* Lookup the protocol attachment to this interface */
4625 if (protocol_family == 0) {
4626 ifproto = NULL;
4627 } else if (last_ifproto != NULL && last_ifproto->ifp == ifp &&
4628 (last_ifproto->protocol_family == protocol_family)) {
4629 VERIFY(ifproto == NULL);
4630 ifproto = last_ifproto;
4631 if_proto_ref(last_ifproto);
4632 } else {
4633 VERIFY(ifproto == NULL);
4634 ifnet_lock_shared(ifp);
4635 /* callee holds a proto refcnt upon success */
4636 ifproto = find_attached_proto(ifp, protocol_family);
4637 ifnet_lock_done(ifp);
4638 }
4639 if (ifproto == NULL) {
4640 /* no protocol for this packet, discard */
4641 m_freem(m);
4642 goto next;
4643 }
4644 if (ifproto != last_ifproto) {
4645 if (last_ifproto != NULL) {
4646 /* pass up the list for the previous protocol */
4647 dlil_ifproto_input(last_ifproto, pkt_first);
4648 pkt_first = NULL;
4649 if_proto_free(last_ifproto);
4650 }
4651 last_ifproto = ifproto;
4652 if_proto_ref(ifproto);
4653 }
4654 /* extend the list */
4655 m->m_pkthdr.pkt_hdr = frame_header;
4656 if (pkt_first == NULL) {
4657 pkt_first = m;
4658 } else {
4659 *pkt_next = m;
4660 }
4661 pkt_next = &m->m_nextpkt;
4662
4663 next:
4664 if (next_packet == NULL && last_ifproto != NULL) {
4665 /* pass up the last list of packets */
4666 dlil_ifproto_input(last_ifproto, pkt_first);
4667 if_proto_free(last_ifproto);
4668 last_ifproto = NULL;
4669 }
4670 if (ifproto != NULL) {
4671 if_proto_free(ifproto);
4672 ifproto = NULL;
4673 }
4674
4675 m = next_packet;
4676
4677 /* update the driver's multicast filter, if needed */
4678 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) {
4679 ifp->if_updatemcasts = 0;
4680 }
4681 if (iorefcnt == 1) {
4682 ifnet_datamov_end(ifp);
4683 }
4684 }
4685
4686 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4687 }
4688
4689 errno_t
4690 if_mcasts_update(struct ifnet *ifp)
4691 {
4692 errno_t err;
4693
4694 err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
4695 if (err == EAFNOSUPPORT) {
4696 err = 0;
4697 }
4698 DLIL_PRINTF("%s: %s %d suspended link-layer multicast membership(s) "
4699 "(err=%d)\n", if_name(ifp),
4700 (err == 0 ? "successfully restored" : "failed to restore"),
4701 ifp->if_updatemcasts, err);
4702
4703 /* just return success */
4704 return 0;
4705 }
4706
4707 /* If ifp is set, we will increment the generation for the interface */
4708 int
4709 dlil_post_complete_msg(struct ifnet *ifp, struct kev_msg *event)
4710 {
4711 if (ifp != NULL) {
4712 ifnet_increment_generation(ifp);
4713 }
4714
4715 #if NECP
4716 necp_update_all_clients();
4717 #endif /* NECP */
4718
4719 return kev_post_msg(event);
4720 }
4721
4722 __private_extern__ void
4723 dlil_post_sifflags_msg(struct ifnet * ifp)
4724 {
4725 struct kev_msg ev_msg;
4726 struct net_event_data ev_data;
4727
4728 bzero(&ev_data, sizeof(ev_data));
4729 bzero(&ev_msg, sizeof(ev_msg));
4730 ev_msg.vendor_code = KEV_VENDOR_APPLE;
4731 ev_msg.kev_class = KEV_NETWORK_CLASS;
4732 ev_msg.kev_subclass = KEV_DL_SUBCLASS;
4733 ev_msg.event_code = KEV_DL_SIFFLAGS;
4734 strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
4735 ev_data.if_family = ifp->if_family;
4736 ev_data.if_unit = (u_int32_t) ifp->if_unit;
4737 ev_msg.dv[0].data_length = sizeof(struct net_event_data);
4738 ev_msg.dv[0].data_ptr = &ev_data;
4739 ev_msg.dv[1].data_length = 0;
4740 dlil_post_complete_msg(ifp, &ev_msg);
4741 }
4742
4743 #define TMP_IF_PROTO_ARR_SIZE 10
4744 static int
4745 dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_generation)
4746 {
4747 struct ifnet_filter *filter = NULL;
4748 struct if_proto *proto = NULL;
4749 int if_proto_count = 0;
4750 struct if_proto **tmp_ifproto_arr = NULL;
4751 struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
4752 int tmp_ifproto_arr_idx = 0;
4753 bool tmp_malloc = false;
4754
4755 /*
4756 * Pass the event to the interface filters
4757 */
4758 lck_mtx_lock_spin(&ifp->if_flt_lock);
4759 /* prevent filter list from changing in case we drop the lock */
4760 if_flt_monitor_busy(ifp);
4761 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4762 if (filter->filt_event != NULL) {
4763 lck_mtx_unlock(&ifp->if_flt_lock);
4764
4765 filter->filt_event(filter->filt_cookie, ifp,
4766 filter->filt_protocol, event);
4767
4768 lck_mtx_lock_spin(&ifp->if_flt_lock);
4769 }
4770 }
4771 /* we're done with the filter list */
4772 if_flt_monitor_unbusy(ifp);
4773 lck_mtx_unlock(&ifp->if_flt_lock);
4774
4775 /* Get an io ref count if the interface is attached */
4776 if (!ifnet_is_attached(ifp, 1)) {
4777 goto done;
4778 }
4779
4780 /*
4781 * An embedded tmp_list_entry in if_proto may still get
4782 * over-written by another thread after giving up ifnet lock,
4783 * therefore we are avoiding embedded pointers here.
4784 */
4785 ifnet_lock_shared(ifp);
4786 if_proto_count = dlil_ifp_protolist(ifp, NULL, 0);
4787 if (if_proto_count) {
4788 int i;
4789 VERIFY(ifp->if_proto_hash != NULL);
4790 if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
4791 tmp_ifproto_arr = tmp_ifproto_stack_arr;
4792 } else {
4793 MALLOC(tmp_ifproto_arr, struct if_proto **,
4794 sizeof(*tmp_ifproto_arr) * if_proto_count,
4795 M_TEMP, M_ZERO);
4796 if (tmp_ifproto_arr == NULL) {
4797 ifnet_lock_done(ifp);
4798 goto cleanup;
4799 }
4800 tmp_malloc = true;
4801 }
4802
4803 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
4804 SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
4805 next_hash) {
4806 if_proto_ref(proto);
4807 tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
4808 tmp_ifproto_arr_idx++;
4809 }
4810 }
4811 VERIFY(if_proto_count == tmp_ifproto_arr_idx);
4812 }
4813 ifnet_lock_done(ifp);
4814
4815 for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
4816 tmp_ifproto_arr_idx++) {
4817 proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
4818 VERIFY(proto != NULL);
4819 proto_media_event eventp =
4820 (proto->proto_kpi == kProtoKPI_v1 ?
4821 proto->kpi.v1.event :
4822 proto->kpi.v2.event);
4823
4824 if (eventp != NULL) {
4825 eventp(ifp, proto->protocol_family,
4826 event);
4827 }
4828 if_proto_free(proto);
4829 }
4830
4831 cleanup:
4832 if (tmp_malloc) {
4833 FREE(tmp_ifproto_arr, M_TEMP);
4834 }
4835
4836 /* Pass the event to the interface */
4837 if (ifp->if_event != NULL) {
4838 ifp->if_event(ifp, event);
4839 }
4840
4841 /* Release the io ref count */
4842 ifnet_decr_iorefcnt(ifp);
4843 done:
4844 return dlil_post_complete_msg(update_generation ? ifp : NULL, event);
4845 }
4846
4847 errno_t
4848 ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
4849 {
4850 struct kev_msg kev_msg;
4851 int result = 0;
4852
4853 if (ifp == NULL || event == NULL) {
4854 return EINVAL;
4855 }
4856
4857 bzero(&kev_msg, sizeof(kev_msg));
4858 kev_msg.vendor_code = event->vendor_code;
4859 kev_msg.kev_class = event->kev_class;
4860 kev_msg.kev_subclass = event->kev_subclass;
4861 kev_msg.event_code = event->event_code;
4862 kev_msg.dv[0].data_ptr = &event->event_data[0];
4863 kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
4864 kev_msg.dv[1].data_length = 0;
4865
4866 result = dlil_event_internal(ifp, &kev_msg, TRUE);
4867
4868 return result;
4869 }
4870
4871 #if CONFIG_MACF_NET
4872 #include <netinet/ip6.h>
4873 #include <netinet/ip.h>
4874 static int
4875 dlil_get_socket_type(struct mbuf **mp, int family, int raw)
4876 {
4877 struct mbuf *m;
4878 struct ip *ip;
4879 struct ip6_hdr *ip6;
4880 int type = SOCK_RAW;
4881
4882 if (!raw) {
4883 switch (family) {
4884 case PF_INET:
4885 m = m_pullup(*mp, sizeof(struct ip));
4886 if (m == NULL) {
4887 break;
4888 }
4889 *mp = m;
4890 ip = mtod(m, struct ip *);
4891 if (ip->ip_p == IPPROTO_TCP) {
4892 type = SOCK_STREAM;
4893 } else if (ip->ip_p == IPPROTO_UDP) {
4894 type = SOCK_DGRAM;
4895 }
4896 break;
4897 case PF_INET6:
4898 m = m_pullup(*mp, sizeof(struct ip6_hdr));
4899 if (m == NULL) {
4900 break;
4901 }
4902 *mp = m;
4903 ip6 = mtod(m, struct ip6_hdr *);
4904 if (ip6->ip6_nxt == IPPROTO_TCP) {
4905 type = SOCK_STREAM;
4906 } else if (ip6->ip6_nxt == IPPROTO_UDP) {
4907 type = SOCK_DGRAM;
4908 }
4909 break;
4910 }
4911 }
4912
4913 return type;
4914 }
4915 #endif
4916
4917 static void
4918 dlil_count_chain_len(mbuf_t m, struct chain_len_stats *cls)
4919 {
4920 mbuf_t n = m;
4921 int chainlen = 0;
4922
4923 while (n != NULL) {
4924 chainlen++;
4925 n = n->m_next;
4926 }
4927 switch (chainlen) {
4928 case 0:
4929 break;
4930 case 1:
4931 atomic_add_64(&cls->cls_one, 1);
4932 break;
4933 case 2:
4934 atomic_add_64(&cls->cls_two, 1);
4935 break;
4936 case 3:
4937 atomic_add_64(&cls->cls_three, 1);
4938 break;
4939 case 4:
4940 atomic_add_64(&cls->cls_four, 1);
4941 break;
4942 case 5:
4943 default:
4944 atomic_add_64(&cls->cls_five_or_more, 1);
4945 break;
4946 }
4947 }
4948
4949 /*
4950 * dlil_output
4951 *
4952 * Caller should have a lock on the protocol domain if the protocol
4953 * doesn't support finer grained locking. In most cases, the lock
4954 * will be held from the socket layer and won't be released until
4955 * we return back to the socket layer.
4956 *
4957 * This does mean that we must take a protocol lock before we take
4958 * an interface lock if we're going to take both. This makes sense
4959 * because a protocol is likely to interact with an ifp while it
4960 * is under the protocol lock.
4961 *
4962 * An advisory code will be returned if adv is not null. This
4963 * can be used to provide feedback about interface queues to the
4964 * application.
4965 */
4966 errno_t
4967 dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
4968 void *route, const struct sockaddr *dest, int raw, struct flowadv *adv)
4969 {
4970 char *frame_type = NULL;
4971 char *dst_linkaddr = NULL;
4972 int retval = 0;
4973 char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
4974 char dst_linkaddr_buffer[MAX_LINKADDR * 4];
4975 struct if_proto *proto = NULL;
4976 mbuf_t m = NULL;
4977 mbuf_t send_head = NULL;
4978 mbuf_t *send_tail = &send_head;
4979 int iorefcnt = 0;
4980 u_int32_t pre = 0, post = 0;
4981 u_int32_t fpkts = 0, fbytes = 0;
4982 int32_t flen = 0;
4983 struct timespec now;
4984 u_int64_t now_nsec;
4985 boolean_t did_clat46 = FALSE;
4986 protocol_family_t old_proto_family = proto_family;
4987 struct sockaddr_in6 dest6;
4988 struct rtentry *rt = NULL;
4989 u_int32_t m_loop_set = 0;
4990
4991 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
4992
4993 /*
4994 * Get an io refcnt if the interface is attached to prevent ifnet_detach
4995 * from happening while this operation is in progress
4996 */
4997 if (!ifnet_datamov_begin(ifp)) {
4998 retval = ENXIO;
4999 goto cleanup;
5000 }
5001 iorefcnt = 1;
5002
5003 VERIFY(ifp->if_output_dlil != NULL);
5004
5005 /* update the driver's multicast filter, if needed */
5006 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) {
5007 ifp->if_updatemcasts = 0;
5008 }
5009
5010 frame_type = frame_type_buffer;
5011 dst_linkaddr = dst_linkaddr_buffer;
5012
5013 if (raw == 0) {
5014 ifnet_lock_shared(ifp);
5015 /* callee holds a proto refcnt upon success */
5016 proto = find_attached_proto(ifp, proto_family);
5017 if (proto == NULL) {
5018 ifnet_lock_done(ifp);
5019 retval = ENXIO;
5020 goto cleanup;
5021 }
5022 ifnet_lock_done(ifp);
5023 }
5024
5025 preout_again:
5026 if (packetlist == NULL) {
5027 goto cleanup;
5028 }
5029
5030 m = packetlist;
5031 packetlist = packetlist->m_nextpkt;
5032 m->m_nextpkt = NULL;
5033
5034 /*
5035 * Perform address family translation for the first
5036 * packet outside the loop in order to perform address
5037 * lookup for the translated proto family.
5038 */
5039 if (proto_family == PF_INET && IS_INTF_CLAT46(ifp) &&
5040 (ifp->if_type == IFT_CELLULAR ||
5041 dlil_is_clat_needed(proto_family, m))) {
5042 retval = dlil_clat46(ifp, &proto_family, &m);
5043 /*
5044 * Go to the next packet if translation fails
5045 */
5046 if (retval != 0) {
5047 m_freem(m);
5048 m = NULL;
5049 ip6stat.ip6s_clat464_out_drop++;
5050 /* Make sure that the proto family is PF_INET */
5051 ASSERT(proto_family == PF_INET);
5052 goto preout_again;
5053 }
5054 /*
5055 * Free the old one and make it point to the IPv6 proto structure.
5056 *
5057 * Change proto for the first time we have successfully
5058 * performed address family translation.
5059 */
5060 if (!did_clat46 && proto_family == PF_INET6) {
5061 did_clat46 = TRUE;
5062
5063 if (proto != NULL) {
5064 if_proto_free(proto);
5065 }
5066 ifnet_lock_shared(ifp);
5067 /* callee holds a proto refcnt upon success */
5068 proto = find_attached_proto(ifp, proto_family);
5069 if (proto == NULL) {
5070 ifnet_lock_done(ifp);
5071 retval = ENXIO;
5072 m_freem(m);
5073 m = NULL;
5074 goto cleanup;
5075 }
5076 ifnet_lock_done(ifp);
5077 if (ifp->if_type == IFT_ETHER) {
5078 /* Update the dest to translated v6 address */
5079 dest6.sin6_len = sizeof(struct sockaddr_in6);
5080 dest6.sin6_family = AF_INET6;
5081 dest6.sin6_addr = (mtod(m, struct ip6_hdr *))->ip6_dst;
5082 dest = (const struct sockaddr *)&dest6;
5083
5084 /*
5085 * Lookup route to the translated destination
5086 * Free this route ref during cleanup
5087 */
5088 rt = rtalloc1_scoped((struct sockaddr *)&dest6,
5089 0, 0, ifp->if_index);
5090
5091 route = rt;
5092 }
5093 }
5094 }
5095
5096 /*
5097 * This path gets packet chain going to the same destination.
5098 * The pre output routine is used to either trigger resolution of
5099 * the next hop or retreive the next hop's link layer addressing.
5100 * For ex: ether_inet(6)_pre_output routine.
5101 *
5102 * If the routine returns EJUSTRETURN, it implies that packet has
5103 * been queued, and therefore we have to call preout_again for the
5104 * following packet in the chain.
5105 *
5106 * For errors other than EJUSTRETURN, the current packet is freed
5107 * and the rest of the chain (pointed by packetlist is freed as
5108 * part of clean up.
5109 *
5110 * Else if there is no error the retrieved information is used for
5111 * all the packets in the chain.
5112 */
5113 if (raw == 0) {
5114 proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ?
5115 proto->kpi.v1.pre_output : proto->kpi.v2.pre_output);
5116 retval = 0;
5117 if (preoutp != NULL) {
5118 retval = preoutp(ifp, proto_family, &m, dest, route,
5119 frame_type, dst_linkaddr);
5120
5121 if (retval != 0) {
5122 if (retval == EJUSTRETURN) {
5123 goto preout_again;
5124 }
5125 m_freem(m);
5126 m = NULL;
5127 goto cleanup;
5128 }
5129 }
5130 }
5131
5132 #if CONFIG_MACF_NET
5133 retval = mac_ifnet_check_transmit(ifp, m, proto_family,
5134 dlil_get_socket_type(&m, proto_family, raw));
5135 if (retval != 0) {
5136 m_freem(m);
5137 goto cleanup;
5138 }
5139 #endif
5140
5141 do {
5142 /*
5143 * Perform address family translation if needed.
5144 * For now we only support stateless 4 to 6 translation
5145 * on the out path.
5146 *
5147 * The routine below translates IP header, updates protocol
5148 * checksum and also translates ICMP.
5149 *
5150 * We skip the first packet as it is already translated and
5151 * the proto family is set to PF_INET6.
5152 */
5153 if (proto_family == PF_INET && IS_INTF_CLAT46(ifp) &&
5154 (ifp->if_type == IFT_CELLULAR ||
5155 dlil_is_clat_needed(proto_family, m))) {
5156 retval = dlil_clat46(ifp, &proto_family, &m);
5157 /* Goto the next packet if the translation fails */
5158 if (retval != 0) {
5159 m_freem(m);
5160 m = NULL;
5161 ip6stat.ip6s_clat464_out_drop++;
5162 goto next;
5163 }
5164 }
5165
5166 #if CONFIG_DTRACE
5167 if (!raw && proto_family == PF_INET) {
5168 struct ip *ip = mtod(m, struct ip *);
5169 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
5170 struct ip *, ip, struct ifnet *, ifp,
5171 struct ip *, ip, struct ip6_hdr *, NULL);
5172 } else if (!raw && proto_family == PF_INET6) {
5173 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
5174 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
5175 struct ip6_hdr *, ip6, struct ifnet *, ifp,
5176 struct ip *, NULL, struct ip6_hdr *, ip6);
5177 }
5178 #endif /* CONFIG_DTRACE */
5179
5180 if (raw == 0 && ifp->if_framer != NULL) {
5181 int rcvif_set = 0;
5182
5183 /*
5184 * If this is a broadcast packet that needs to be
5185 * looped back into the system, set the inbound ifp
5186 * to that of the outbound ifp. This will allow
5187 * us to determine that it is a legitimate packet
5188 * for the system. Only set the ifp if it's not
5189 * already set, just to be safe.
5190 */
5191 if ((m->m_flags & (M_BCAST | M_LOOP)) &&
5192 m->m_pkthdr.rcvif == NULL) {
5193 m->m_pkthdr.rcvif = ifp;
5194 rcvif_set = 1;
5195 }
5196 m_loop_set = m->m_flags & M_LOOP;
5197 retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
5198 frame_type, &pre, &post);
5199 if (retval != 0) {
5200 if (retval != EJUSTRETURN) {
5201 m_freem(m);
5202 }
5203 goto next;
5204 }
5205
5206 /*
5207 * For partial checksum offload, adjust the start
5208 * and stuff offsets based on the prepended header.
5209 */
5210 if ((m->m_pkthdr.csum_flags &
5211 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
5212 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
5213 m->m_pkthdr.csum_tx_stuff += pre;
5214 m->m_pkthdr.csum_tx_start += pre;
5215 }
5216
5217 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK)) {
5218 dlil_output_cksum_dbg(ifp, m, pre,
5219 proto_family);
5220 }
5221
5222 /*
5223 * Clear the ifp if it was set above, and to be
5224 * safe, only if it is still the same as the
5225 * outbound ifp we have in context. If it was
5226 * looped back, then a copy of it was sent to the
5227 * loopback interface with the rcvif set, and we
5228 * are clearing the one that will go down to the
5229 * layer below.
5230 */
5231 if (rcvif_set && m->m_pkthdr.rcvif == ifp) {
5232 m->m_pkthdr.rcvif = NULL;
5233 }
5234 }
5235
5236 /*
5237 * Let interface filters (if any) do their thing ...
5238 */
5239 retval = dlil_interface_filters_output(ifp, &m, proto_family);
5240 if (retval != 0) {
5241 if (retval != EJUSTRETURN) {
5242 m_freem(m);
5243 }
5244 goto next;
5245 }
5246 /*
5247 * Strip away M_PROTO1 bit prior to sending packet
5248 * to the driver as this field may be used by the driver
5249 */
5250 m->m_flags &= ~M_PROTO1;
5251
5252 /*
5253 * If the underlying interface is not capable of handling a
5254 * packet whose data portion spans across physically disjoint
5255 * pages, we need to "normalize" the packet so that we pass
5256 * down a chain of mbufs where each mbuf points to a span that
5257 * resides in the system page boundary. If the packet does
5258 * not cross page(s), the following is a no-op.
5259 */
5260 if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) {
5261 if ((m = m_normalize(m)) == NULL) {
5262 goto next;
5263 }
5264 }
5265
5266 /*
5267 * If this is a TSO packet, make sure the interface still
5268 * advertise TSO capability.
5269 */
5270 if (TSO_IPV4_NOTOK(ifp, m) || TSO_IPV6_NOTOK(ifp, m)) {
5271 retval = EMSGSIZE;
5272 m_freem(m);
5273 goto cleanup;
5274 }
5275
5276 ifp_inc_traffic_class_out(ifp, m);
5277 pktap_output(ifp, proto_family, m, pre, post);
5278
5279 /*
5280 * Count the number of elements in the mbuf chain
5281 */
5282 if (tx_chain_len_count) {
5283 dlil_count_chain_len(m, &tx_chain_len_stats);
5284 }
5285
5286 /*
5287 * Record timestamp; ifnet_enqueue() will use this info
5288 * rather than redoing the work. An optimization could
5289 * involve doing this just once at the top, if there are
5290 * no interface filters attached, but that's probably
5291 * not a big deal.
5292 */
5293 nanouptime(&now);
5294 net_timernsec(&now, &now_nsec);
5295 (void) mbuf_set_timestamp(m, now_nsec, TRUE);
5296
5297 /*
5298 * Discard partial sum information if this packet originated
5299 * from another interface; the packet would already have the
5300 * final checksum and we shouldn't recompute it.
5301 */
5302 if ((m->m_pkthdr.pkt_flags & PKTF_FORWARDED) &&
5303 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
5304 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
5305 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
5306 m->m_pkthdr.csum_data = 0;
5307 }
5308
5309 /*
5310 * Finally, call the driver.
5311 */
5312 if (ifp->if_eflags & (IFEF_SENDLIST | IFEF_ENQUEUE_MULTI)) {
5313 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
5314 flen += (m_pktlen(m) - (pre + post));
5315 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
5316 }
5317 *send_tail = m;
5318 send_tail = &m->m_nextpkt;
5319 } else {
5320 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
5321 flen = (m_pktlen(m) - (pre + post));
5322 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
5323 } else {
5324 flen = 0;
5325 }
5326 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
5327 0, 0, 0, 0, 0);
5328 retval = (*ifp->if_output_dlil)(ifp, m);
5329 if (retval == EQFULL || retval == EQSUSPENDED) {
5330 if (adv != NULL && adv->code == FADV_SUCCESS) {
5331 adv->code = (retval == EQFULL ?
5332 FADV_FLOW_CONTROLLED :
5333 FADV_SUSPENDED);
5334 }
5335 retval = 0;
5336 }
5337 if (retval == 0 && flen > 0) {
5338 fbytes += flen;
5339 fpkts++;
5340 }
5341 if (retval != 0 && dlil_verbose) {
5342 DLIL_PRINTF("%s: output error on %s retval = %d\n",
5343 __func__, if_name(ifp),
5344 retval);
5345 }
5346 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END,
5347 0, 0, 0, 0, 0);
5348 }
5349 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
5350
5351 next:
5352 m = packetlist;
5353 if (m != NULL) {
5354 m->m_flags |= m_loop_set;
5355 packetlist = packetlist->m_nextpkt;
5356 m->m_nextpkt = NULL;
5357 }
5358 /* Reset the proto family to old proto family for CLAT */
5359 if (did_clat46) {
5360 proto_family = old_proto_family;
5361 }
5362 } while (m != NULL);
5363
5364 if (send_head != NULL) {
5365 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
5366 0, 0, 0, 0, 0);
5367 if (ifp->if_eflags & IFEF_SENDLIST) {
5368 retval = (*ifp->if_output_dlil)(ifp, send_head);
5369 if (retval == EQFULL || retval == EQSUSPENDED) {
5370 if (adv != NULL) {
5371 adv->code = (retval == EQFULL ?
5372 FADV_FLOW_CONTROLLED :
5373 FADV_SUSPENDED);
5374 }
5375 retval = 0;
5376 }
5377 if (retval == 0 && flen > 0) {
5378 fbytes += flen;
5379 fpkts++;
5380 }
5381 if (retval != 0 && dlil_verbose) {
5382 DLIL_PRINTF("%s: output error on %s retval = %d\n",
5383 __func__, if_name(ifp), retval);
5384 }
5385 } else {
5386 struct mbuf *send_m;
5387 int enq_cnt = 0;
5388 VERIFY(ifp->if_eflags & IFEF_ENQUEUE_MULTI);
5389 while (send_head != NULL) {
5390 send_m = send_head;
5391 send_head = send_m->m_nextpkt;
5392 send_m->m_nextpkt = NULL;
5393 retval = (*ifp->if_output_dlil)(ifp, send_m);
5394 if (retval == EQFULL || retval == EQSUSPENDED) {
5395 if (adv != NULL) {
5396 adv->code = (retval == EQFULL ?
5397 FADV_FLOW_CONTROLLED :
5398 FADV_SUSPENDED);
5399 }
5400 retval = 0;
5401 }
5402 if (retval == 0) {
5403 enq_cnt++;
5404 if (flen > 0) {
5405 fpkts++;
5406 }
5407 }
5408 if (retval != 0 && dlil_verbose) {
5409 DLIL_PRINTF("%s: output error on %s "
5410 "retval = %d\n",
5411 __func__, if_name(ifp), retval);
5412 }
5413 }
5414 if (enq_cnt > 0) {
5415 fbytes += flen;
5416 ifnet_start(ifp);
5417 }
5418 }
5419 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
5420 }
5421
5422 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
5423
5424 cleanup:
5425 if (fbytes > 0) {
5426 ifp->if_fbytes += fbytes;
5427 }
5428 if (fpkts > 0) {
5429 ifp->if_fpackets += fpkts;
5430 }
5431 if (proto != NULL) {
5432 if_proto_free(proto);
5433 }
5434 if (packetlist) { /* if any packets are left, clean up */
5435 mbuf_freem_list(packetlist);
5436 }
5437 if (retval == EJUSTRETURN) {
5438 retval = 0;
5439 }
5440 if (iorefcnt == 1) {
5441 ifnet_datamov_end(ifp);
5442 }
5443 if (rt != NULL) {
5444 rtfree(rt);
5445 rt = NULL;
5446 }
5447
5448 return retval;
5449 }
5450
5451 /*
5452 * This routine checks if the destination address is not a loopback, link-local,
5453 * multicast or broadcast address.
5454 */
5455 static int
5456 dlil_is_clat_needed(protocol_family_t proto_family, mbuf_t m)
5457 {
5458 int ret = 0;
5459 switch (proto_family) {
5460 case PF_INET: {
5461 struct ip *iph = mtod(m, struct ip *);
5462 if (CLAT46_NEEDED(ntohl(iph->ip_dst.s_addr))) {
5463 ret = 1;
5464 }
5465 break;
5466 }
5467 case PF_INET6: {
5468 struct ip6_hdr *ip6h = mtod(m, struct ip6_hdr *);
5469 if ((size_t)m_pktlen(m) >= sizeof(struct ip6_hdr) &&
5470 CLAT64_NEEDED(&ip6h->ip6_dst)) {
5471 ret = 1;
5472 }
5473 break;
5474 }
5475 }
5476
5477 return ret;
5478 }
5479 /*
5480 * @brief This routine translates IPv4 packet to IPv6 packet,
5481 * updates protocol checksum and also translates ICMP for code
5482 * along with inner header translation.
5483 *
5484 * @param ifp Pointer to the interface
5485 * @param proto_family pointer to protocol family. It is updated if function
5486 * performs the translation successfully.
5487 * @param m Pointer to the pointer pointing to the packet. Needed because this
5488 * routine can end up changing the mbuf to a different one.
5489 *
5490 * @return 0 on success or else a negative value.
5491 */
5492 static errno_t
5493 dlil_clat46(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
5494 {
5495 VERIFY(*proto_family == PF_INET);
5496 VERIFY(IS_INTF_CLAT46(ifp));
5497
5498 pbuf_t pbuf_store, *pbuf = NULL;
5499 struct ip *iph = NULL;
5500 struct in_addr osrc, odst;
5501 uint8_t proto = 0;
5502 struct in6_ifaddr *ia6_clat_src = NULL;
5503 struct in6_addr *src = NULL;
5504 struct in6_addr dst;
5505 int error = 0;
5506 uint32_t off = 0;
5507 uint64_t tot_len = 0;
5508 uint16_t ip_id_val = 0;
5509 uint16_t ip_frag_off = 0;
5510
5511 boolean_t is_frag = FALSE;
5512 boolean_t is_first_frag = TRUE;
5513 boolean_t is_last_frag = TRUE;
5514
5515 pbuf_init_mbuf(&pbuf_store, *m, ifp);
5516 pbuf = &pbuf_store;
5517 iph = pbuf->pb_data;
5518
5519 osrc = iph->ip_src;
5520 odst = iph->ip_dst;
5521 proto = iph->ip_p;
5522 off = iph->ip_hl << 2;
5523 ip_id_val = iph->ip_id;
5524 ip_frag_off = ntohs(iph->ip_off) & IP_OFFMASK;
5525
5526 tot_len = ntohs(iph->ip_len);
5527
5528 /*
5529 * For packets that are not first frags
5530 * we only need to adjust CSUM.
5531 * For 4 to 6, Fragmentation header gets appended
5532 * after proto translation.
5533 */
5534 if (ntohs(iph->ip_off) & ~(IP_DF | IP_RF)) {
5535 is_frag = TRUE;
5536
5537 /* If the offset is not zero, it is not first frag */
5538 if (ip_frag_off != 0) {
5539 is_first_frag = FALSE;
5540 }
5541
5542 /* If IP_MF is set, then it is not last frag */
5543 if (ntohs(iph->ip_off) & IP_MF) {
5544 is_last_frag = FALSE;
5545 }
5546 }
5547
5548 /*
5549 * Retrive the local IPv6 CLAT46 address reserved for stateless
5550 * translation.
5551 */
5552 ia6_clat_src = in6ifa_ifpwithflag(ifp, IN6_IFF_CLAT46);
5553 if (ia6_clat_src == NULL) {
5554 ip6stat.ip6s_clat464_out_nov6addr_drop++;
5555 error = -1;
5556 goto cleanup;
5557 }
5558
5559 src = &ia6_clat_src->ia_addr.sin6_addr;
5560
5561 /*
5562 * Translate IPv4 destination to IPv6 destination by using the
5563 * prefixes learned through prior PLAT discovery.
5564 */
5565 if ((error = nat464_synthesize_ipv6(ifp, &odst, &dst)) != 0) {
5566 ip6stat.ip6s_clat464_out_v6synthfail_drop++;
5567 goto cleanup;
5568 }
5569
5570 /* Translate the IP header part first */
5571 error = (nat464_translate_46(pbuf, off, iph->ip_tos, iph->ip_p,
5572 iph->ip_ttl, *src, dst, tot_len) == NT_NAT64) ? 0 : -1;
5573
5574 iph = NULL; /* Invalidate iph as pbuf has been modified */
5575
5576 if (error != 0) {
5577 ip6stat.ip6s_clat464_out_46transfail_drop++;
5578 goto cleanup;
5579 }
5580
5581 /*
5582 * Translate protocol header, update checksum, checksum flags
5583 * and related fields.
5584 */
5585 error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc, (struct nat464_addr *)&odst,
5586 proto, PF_INET, PF_INET6, NT_OUT, !is_first_frag) == NT_NAT64) ? 0 : -1;
5587
5588 if (error != 0) {
5589 ip6stat.ip6s_clat464_out_46proto_transfail_drop++;
5590 goto cleanup;
5591 }
5592
5593 /* Now insert the IPv6 fragment header */
5594 if (is_frag) {
5595 error = nat464_insert_frag46(pbuf, ip_id_val, ip_frag_off, is_last_frag);
5596
5597 if (error != 0) {
5598 ip6stat.ip6s_clat464_out_46frag_transfail_drop++;
5599 goto cleanup;
5600 }
5601 }
5602
5603 cleanup:
5604 if (ia6_clat_src != NULL) {
5605 IFA_REMREF(&ia6_clat_src->ia_ifa);
5606 }
5607
5608 if (pbuf_is_valid(pbuf)) {
5609 *m = pbuf->pb_mbuf;
5610 pbuf->pb_mbuf = NULL;
5611 pbuf_destroy(pbuf);
5612 } else {
5613 error = -1;
5614 ip6stat.ip6s_clat464_out_invalpbuf_drop++;
5615 }
5616
5617 if (error == 0) {
5618 *proto_family = PF_INET6;
5619 ip6stat.ip6s_clat464_out_success++;
5620 }
5621
5622 return error;
5623 }
5624
5625 /*
5626 * @brief This routine translates incoming IPv6 to IPv4 packet,
5627 * updates protocol checksum and also translates ICMPv6 outer
5628 * and inner headers
5629 *
5630 * @return 0 on success or else a negative value.
5631 */
5632 static errno_t
5633 dlil_clat64(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
5634 {
5635 VERIFY(*proto_family == PF_INET6);
5636 VERIFY(IS_INTF_CLAT46(ifp));
5637
5638 struct ip6_hdr *ip6h = NULL;
5639 struct in6_addr osrc, odst;
5640 uint8_t proto = 0;
5641 struct in6_ifaddr *ia6_clat_dst = NULL;
5642 struct in_ifaddr *ia4_clat_dst = NULL;
5643 struct in_addr *dst = NULL;
5644 struct in_addr src;
5645 int error = 0;
5646 uint32_t off = 0;
5647 u_int64_t tot_len = 0;
5648 uint8_t tos = 0;
5649 boolean_t is_first_frag = TRUE;
5650
5651 /* Incoming mbuf does not contain valid IP6 header */
5652 if ((size_t)(*m)->m_pkthdr.len < sizeof(struct ip6_hdr) ||
5653 ((size_t)(*m)->m_len < sizeof(struct ip6_hdr) &&
5654 (*m = m_pullup(*m, sizeof(struct ip6_hdr))) == NULL)) {
5655 ip6stat.ip6s_clat464_in_tooshort_drop++;
5656 return -1;
5657 }
5658
5659 ip6h = mtod(*m, struct ip6_hdr *);
5660 /* Validate that mbuf contains IP payload equal to ip6_plen */
5661 if ((size_t)(*m)->m_pkthdr.len < ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr)) {
5662 ip6stat.ip6s_clat464_in_tooshort_drop++;
5663 return -1;
5664 }
5665
5666 osrc = ip6h->ip6_src;
5667 odst = ip6h->ip6_dst;
5668
5669 /*
5670 * Retrieve the local CLAT46 reserved IPv6 address.
5671 * Let the packet pass if we don't find one, as the flag
5672 * may get set before IPv6 configuration has taken place.
5673 */
5674 ia6_clat_dst = in6ifa_ifpwithflag(ifp, IN6_IFF_CLAT46);
5675 if (ia6_clat_dst == NULL) {
5676 goto done;
5677 }
5678
5679 /*
5680 * Check if the original dest in the packet is same as the reserved
5681 * CLAT46 IPv6 address
5682 */
5683 if (IN6_ARE_ADDR_EQUAL(&odst, &ia6_clat_dst->ia_addr.sin6_addr)) {
5684 pbuf_t pbuf_store, *pbuf = NULL;
5685 pbuf_init_mbuf(&pbuf_store, *m, ifp);
5686 pbuf = &pbuf_store;
5687
5688 /*
5689 * Retrive the local CLAT46 IPv4 address reserved for stateless
5690 * translation.
5691 */
5692 ia4_clat_dst = inifa_ifpclatv4(ifp);
5693 if (ia4_clat_dst == NULL) {
5694 IFA_REMREF(&ia6_clat_dst->ia_ifa);
5695 ip6stat.ip6s_clat464_in_nov4addr_drop++;
5696 error = -1;
5697 goto cleanup;
5698 }
5699 IFA_REMREF(&ia6_clat_dst->ia_ifa);
5700
5701 /* Translate IPv6 src to IPv4 src by removing the NAT64 prefix */
5702 dst = &ia4_clat_dst->ia_addr.sin_addr;
5703 if ((error = nat464_synthesize_ipv4(ifp, &osrc, &src)) != 0) {
5704 ip6stat.ip6s_clat464_in_v4synthfail_drop++;
5705 error = -1;
5706 goto cleanup;
5707 }
5708
5709 ip6h = pbuf->pb_data;
5710 off = sizeof(struct ip6_hdr);
5711 proto = ip6h->ip6_nxt;
5712 tos = (ntohl(ip6h->ip6_flow) >> 20) & 0xff;
5713 tot_len = ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr);
5714
5715 /*
5716 * Translate the IP header and update the fragmentation
5717 * header if needed
5718 */
5719 error = (nat464_translate_64(pbuf, off, tos, &proto,
5720 ip6h->ip6_hlim, src, *dst, tot_len, &is_first_frag) == NT_NAT64) ?
5721 0 : -1;
5722
5723 ip6h = NULL; /* Invalidate ip6h as pbuf has been changed */
5724
5725 if (error != 0) {
5726 ip6stat.ip6s_clat464_in_64transfail_drop++;
5727 goto cleanup;
5728 }
5729
5730 /*
5731 * Translate protocol header, update checksum, checksum flags
5732 * and related fields.
5733 */
5734 error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc,
5735 (struct nat464_addr *)&odst, proto, PF_INET6, PF_INET,
5736 NT_IN, !is_first_frag) == NT_NAT64) ? 0 : -1;
5737
5738 if (error != 0) {
5739 ip6stat.ip6s_clat464_in_64proto_transfail_drop++;
5740 goto cleanup;
5741 }
5742
5743 cleanup:
5744 if (ia4_clat_dst != NULL) {
5745 IFA_REMREF(&ia4_clat_dst->ia_ifa);
5746 }
5747
5748 if (pbuf_is_valid(pbuf)) {
5749 *m = pbuf->pb_mbuf;
5750 pbuf->pb_mbuf = NULL;
5751 pbuf_destroy(pbuf);
5752 } else {
5753 error = -1;
5754 ip6stat.ip6s_clat464_in_invalpbuf_drop++;
5755 }
5756
5757 if (error == 0) {
5758 *proto_family = PF_INET;
5759 ip6stat.ip6s_clat464_in_success++;
5760 }
5761 } /* CLAT traffic */
5762
5763 done:
5764 return error;
5765 }
5766
5767 errno_t
5768 ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
5769 void *ioctl_arg)
5770 {
5771 struct ifnet_filter *filter;
5772 int retval = EOPNOTSUPP;
5773 int result = 0;
5774
5775 if (ifp == NULL || ioctl_code == 0) {
5776 return EINVAL;
5777 }
5778
5779 /* Get an io ref count if the interface is attached */
5780 if (!ifnet_is_attached(ifp, 1)) {
5781 return EOPNOTSUPP;
5782 }
5783
5784 /*
5785 * Run the interface filters first.
5786 * We want to run all filters before calling the protocol,
5787 * interface family, or interface.
5788 */
5789 lck_mtx_lock_spin(&ifp->if_flt_lock);
5790 /* prevent filter list from changing in case we drop the lock */
5791 if_flt_monitor_busy(ifp);
5792 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
5793 if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
5794 filter->filt_protocol == proto_fam)) {
5795 lck_mtx_unlock(&ifp->if_flt_lock);
5796
5797 result = filter->filt_ioctl(filter->filt_cookie, ifp,
5798 proto_fam, ioctl_code, ioctl_arg);
5799
5800 lck_mtx_lock_spin(&ifp->if_flt_lock);
5801
5802 /* Only update retval if no one has handled the ioctl */
5803 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
5804 if (result == ENOTSUP) {
5805 result = EOPNOTSUPP;
5806 }
5807 retval = result;
5808 if (retval != 0 && retval != EOPNOTSUPP) {
5809 /* we're done with the filter list */
5810 if_flt_monitor_unbusy(ifp);
5811 lck_mtx_unlock(&ifp->if_flt_lock);
5812 goto cleanup;
5813 }
5814 }
5815 }
5816 }
5817 /* we're done with the filter list */
5818 if_flt_monitor_unbusy(ifp);
5819 lck_mtx_unlock(&ifp->if_flt_lock);
5820
5821 /* Allow the protocol to handle the ioctl */
5822 if (proto_fam != 0) {
5823 struct if_proto *proto;
5824
5825 /* callee holds a proto refcnt upon success */
5826 ifnet_lock_shared(ifp);
5827 proto = find_attached_proto(ifp, proto_fam);
5828 ifnet_lock_done(ifp);
5829 if (proto != NULL) {
5830 proto_media_ioctl ioctlp =
5831 (proto->proto_kpi == kProtoKPI_v1 ?
5832 proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
5833 result = EOPNOTSUPP;
5834 if (ioctlp != NULL) {
5835 result = ioctlp(ifp, proto_fam, ioctl_code,
5836 ioctl_arg);
5837 }
5838 if_proto_free(proto);
5839
5840 /* Only update retval if no one has handled the ioctl */
5841 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
5842 if (result == ENOTSUP) {
5843 result = EOPNOTSUPP;
5844 }
5845 retval = result;
5846 if (retval && retval != EOPNOTSUPP) {
5847 goto cleanup;
5848 }
5849 }
5850 }
5851 }
5852
5853 /* retval is either 0 or EOPNOTSUPP */
5854
5855 /*
5856 * Let the interface handle this ioctl.
5857 * If it returns EOPNOTSUPP, ignore that, we may have
5858 * already handled this in the protocol or family.
5859 */
5860 if (ifp->if_ioctl) {
5861 result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
5862 }
5863
5864 /* Only update retval if no one has handled the ioctl */
5865 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
5866 if (result == ENOTSUP) {
5867 result = EOPNOTSUPP;
5868 }
5869 retval = result;
5870 if (retval && retval != EOPNOTSUPP) {
5871 goto cleanup;
5872 }
5873 }
5874
5875 cleanup:
5876 if (retval == EJUSTRETURN) {
5877 retval = 0;
5878 }
5879
5880 ifnet_decr_iorefcnt(ifp);
5881
5882 return retval;
5883 }
5884
5885 __private_extern__ errno_t
5886 dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
5887 {
5888 errno_t error = 0;
5889
5890
5891 if (ifp->if_set_bpf_tap) {
5892 /* Get an io reference on the interface if it is attached */
5893 if (!ifnet_is_attached(ifp, 1)) {
5894 return ENXIO;
5895 }
5896 error = ifp->if_set_bpf_tap(ifp, mode, callback);
5897 ifnet_decr_iorefcnt(ifp);
5898 }
5899 return error;
5900 }
5901
5902 errno_t
5903 dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
5904 struct sockaddr *ll_addr, size_t ll_len)
5905 {
5906 errno_t result = EOPNOTSUPP;
5907 struct if_proto *proto;
5908 const struct sockaddr *verify;
5909 proto_media_resolve_multi resolvep;
5910
5911 if (!ifnet_is_attached(ifp, 1)) {
5912 return result;
5913 }
5914
5915 bzero(ll_addr, ll_len);
5916
5917 /* Call the protocol first; callee holds a proto refcnt upon success */
5918 ifnet_lock_shared(ifp);
5919 proto = find_attached_proto(ifp, proto_addr->sa_family);
5920 ifnet_lock_done(ifp);
5921 if (proto != NULL) {
5922 resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
5923 proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
5924 if (resolvep != NULL) {
5925 result = resolvep(ifp, proto_addr,
5926 (struct sockaddr_dl *)(void *)ll_addr, ll_len);
5927 }
5928 if_proto_free(proto);
5929 }
5930
5931 /* Let the interface verify the multicast address */
5932 if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
5933 if (result == 0) {
5934 verify = ll_addr;
5935 } else {
5936 verify = proto_addr;
5937 }
5938 result = ifp->if_check_multi(ifp, verify);
5939 }
5940
5941 ifnet_decr_iorefcnt(ifp);
5942 return result;
5943 }
5944
5945 __private_extern__ errno_t
5946 dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
5947 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
5948 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
5949 {
5950 struct if_proto *proto;
5951 errno_t result = 0;
5952
5953 /* callee holds a proto refcnt upon success */
5954 ifnet_lock_shared(ifp);
5955 proto = find_attached_proto(ifp, target_proto->sa_family);
5956 ifnet_lock_done(ifp);
5957 if (proto == NULL) {
5958 result = ENOTSUP;
5959 } else {
5960 proto_media_send_arp arpp;
5961 arpp = (proto->proto_kpi == kProtoKPI_v1 ?
5962 proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
5963 if (arpp == NULL) {
5964 result = ENOTSUP;
5965 } else {
5966 switch (arpop) {
5967 case ARPOP_REQUEST:
5968 arpstat.txrequests++;
5969 if (target_hw != NULL) {
5970 arpstat.txurequests++;
5971 }
5972 break;
5973 case ARPOP_REPLY:
5974 arpstat.txreplies++;
5975 break;
5976 }
5977 result = arpp(ifp, arpop, sender_hw, sender_proto,
5978 target_hw, target_proto);
5979 }
5980 if_proto_free(proto);
5981 }
5982
5983 return result;
5984 }
5985
5986 struct net_thread_marks { };
5987 static const struct net_thread_marks net_thread_marks_base = { };
5988
5989 __private_extern__ const net_thread_marks_t net_thread_marks_none =
5990 &net_thread_marks_base;
5991
5992 __private_extern__ net_thread_marks_t
5993 net_thread_marks_push(u_int32_t push)
5994 {
5995 static const char *const base = (const void*)&net_thread_marks_base;
5996 u_int32_t pop = 0;
5997
5998 if (push != 0) {
5999 struct uthread *uth = get_bsdthread_info(current_thread());
6000
6001 pop = push & ~uth->uu_network_marks;
6002 if (pop != 0) {
6003 uth->uu_network_marks |= pop;
6004 }
6005 }
6006
6007 return (net_thread_marks_t)&base[pop];
6008 }
6009
6010 __private_extern__ net_thread_marks_t
6011 net_thread_unmarks_push(u_int32_t unpush)
6012 {
6013 static const char *const base = (const void*)&net_thread_marks_base;
6014 u_int32_t unpop = 0;
6015
6016 if (unpush != 0) {
6017 struct uthread *uth = get_bsdthread_info(current_thread());
6018
6019 unpop = unpush & uth->uu_network_marks;
6020 if (unpop != 0) {
6021 uth->uu_network_marks &= ~unpop;
6022 }
6023 }
6024
6025 return (net_thread_marks_t)&base[unpop];
6026 }
6027
6028 __private_extern__ void
6029 net_thread_marks_pop(net_thread_marks_t popx)
6030 {
6031 static const char *const base = (const void*)&net_thread_marks_base;
6032 const ptrdiff_t pop = (const char *)popx - (const char *)base;
6033
6034 if (pop != 0) {
6035 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
6036 struct uthread *uth = get_bsdthread_info(current_thread());
6037
6038 VERIFY((pop & ones) == pop);
6039 VERIFY((ptrdiff_t)(uth->uu_network_marks & pop) == pop);
6040 uth->uu_network_marks &= ~pop;
6041 }
6042 }
6043
6044 __private_extern__ void
6045 net_thread_unmarks_pop(net_thread_marks_t unpopx)
6046 {
6047 static const char *const base = (const void*)&net_thread_marks_base;
6048 ptrdiff_t unpop = (const char *)unpopx - (const char *)base;
6049
6050 if (unpop != 0) {
6051 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
6052 struct uthread *uth = get_bsdthread_info(current_thread());
6053
6054 VERIFY((unpop & ones) == unpop);
6055 VERIFY((ptrdiff_t)(uth->uu_network_marks & unpop) == 0);
6056 uth->uu_network_marks |= unpop;
6057 }
6058 }
6059
6060 __private_extern__ u_int32_t
6061 net_thread_is_marked(u_int32_t check)
6062 {
6063 if (check != 0) {
6064 struct uthread *uth = get_bsdthread_info(current_thread());
6065 return uth->uu_network_marks & check;
6066 } else {
6067 return 0;
6068 }
6069 }
6070
6071 __private_extern__ u_int32_t
6072 net_thread_is_unmarked(u_int32_t check)
6073 {
6074 if (check != 0) {
6075 struct uthread *uth = get_bsdthread_info(current_thread());
6076 return ~uth->uu_network_marks & check;
6077 } else {
6078 return 0;
6079 }
6080 }
6081
6082 static __inline__ int
6083 _is_announcement(const struct sockaddr_in * sender_sin,
6084 const struct sockaddr_in * target_sin)
6085 {
6086 if (target_sin == NULL || sender_sin == NULL) {
6087 return FALSE;
6088 }
6089
6090 return sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr;
6091 }
6092
6093 __private_extern__ errno_t
6094 dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
6095 const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw,
6096 const struct sockaddr *target_proto0, u_int32_t rtflags)
6097 {
6098 errno_t result = 0;
6099 const struct sockaddr_in * sender_sin;
6100 const struct sockaddr_in * target_sin;
6101 struct sockaddr_inarp target_proto_sinarp;
6102 struct sockaddr *target_proto = (void *)(uintptr_t)target_proto0;
6103
6104 if (target_proto == NULL || sender_proto == NULL) {
6105 return EINVAL;
6106 }
6107
6108 if (sender_proto->sa_family != target_proto->sa_family) {
6109 return EINVAL;
6110 }
6111
6112 /*
6113 * If the target is a (default) router, provide that
6114 * information to the send_arp callback routine.
6115 */
6116 if (rtflags & RTF_ROUTER) {
6117 bcopy(target_proto, &target_proto_sinarp,
6118 sizeof(struct sockaddr_in));
6119 target_proto_sinarp.sin_other |= SIN_ROUTER;
6120 target_proto = (struct sockaddr *)&target_proto_sinarp;
6121 }
6122
6123 /*
6124 * If this is an ARP request and the target IP is IPv4LL,
6125 * send the request on all interfaces. The exception is
6126 * an announcement, which must only appear on the specific
6127 * interface.
6128 */
6129 sender_sin = (struct sockaddr_in *)(void *)(uintptr_t)sender_proto;
6130 target_sin = (struct sockaddr_in *)(void *)(uintptr_t)target_proto;
6131 if (target_proto->sa_family == AF_INET &&
6132 IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
6133 ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
6134 !_is_announcement(sender_sin, target_sin)) {
6135 ifnet_t *ifp_list;
6136 u_int32_t count;
6137 u_int32_t ifp_on;
6138
6139 result = ENOTSUP;
6140
6141 if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
6142 for (ifp_on = 0; ifp_on < count; ifp_on++) {
6143 errno_t new_result;
6144 ifaddr_t source_hw = NULL;
6145 ifaddr_t source_ip = NULL;
6146 struct sockaddr_in source_ip_copy;
6147 struct ifnet *cur_ifp = ifp_list[ifp_on];
6148
6149 /*
6150 * Only arp on interfaces marked for IPv4LL
6151 * ARPing. This may mean that we don't ARP on
6152 * the interface the subnet route points to.
6153 */
6154 if (!(cur_ifp->if_eflags & IFEF_ARPLL)) {
6155 continue;
6156 }
6157
6158 /* Find the source IP address */
6159 ifnet_lock_shared(cur_ifp);
6160 source_hw = cur_ifp->if_lladdr;
6161 TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
6162 ifa_link) {
6163 IFA_LOCK(source_ip);
6164 if (source_ip->ifa_addr != NULL &&
6165 source_ip->ifa_addr->sa_family ==
6166 AF_INET) {
6167 /* Copy the source IP address */
6168 source_ip_copy =
6169 *(struct sockaddr_in *)
6170 (void *)source_ip->ifa_addr;
6171 IFA_UNLOCK(source_ip);
6172 break;
6173 }
6174 IFA_UNLOCK(source_ip);
6175 }
6176
6177 /* No IP Source, don't arp */
6178 if (source_ip == NULL) {
6179 ifnet_lock_done(cur_ifp);
6180 continue;
6181 }
6182
6183 IFA_ADDREF(source_hw);
6184 ifnet_lock_done(cur_ifp);
6185
6186 /* Send the ARP */
6187 new_result = dlil_send_arp_internal(cur_ifp,
6188 arpop, (struct sockaddr_dl *)(void *)
6189 source_hw->ifa_addr,
6190 (struct sockaddr *)&source_ip_copy, NULL,
6191 target_proto);
6192
6193 IFA_REMREF(source_hw);
6194 if (result == ENOTSUP) {
6195 result = new_result;
6196 }
6197 }
6198 ifnet_list_free(ifp_list);
6199 }
6200 } else {
6201 result = dlil_send_arp_internal(ifp, arpop, sender_hw,
6202 sender_proto, target_hw, target_proto);
6203 }
6204
6205 return result;
6206 }
6207
6208 /*
6209 * Caller must hold ifnet head lock.
6210 */
6211 static int
6212 ifnet_lookup(struct ifnet *ifp)
6213 {
6214 struct ifnet *_ifp;
6215
6216 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_HELD);
6217 TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
6218 if (_ifp == ifp) {
6219 break;
6220 }
6221 }
6222 return _ifp != NULL;
6223 }
6224
6225 /*
6226 * Caller has to pass a non-zero refio argument to get a
6227 * IO reference count. This will prevent ifnet_detach from
6228 * being called when there are outstanding io reference counts.
6229 */
6230 int
6231 ifnet_is_attached(struct ifnet *ifp, int refio)
6232 {
6233 int ret;
6234
6235 lck_mtx_lock_spin(&ifp->if_ref_lock);
6236 if ((ret = IF_FULLY_ATTACHED(ifp))) {
6237 if (refio > 0) {
6238 ifp->if_refio++;
6239 }
6240 }
6241 lck_mtx_unlock(&ifp->if_ref_lock);
6242
6243 return ret;
6244 }
6245
6246 void
6247 ifnet_incr_pending_thread_count(struct ifnet *ifp)
6248 {
6249 lck_mtx_lock_spin(&ifp->if_ref_lock);
6250 ifp->if_threads_pending++;
6251 lck_mtx_unlock(&ifp->if_ref_lock);
6252 }
6253
6254 void
6255 ifnet_decr_pending_thread_count(struct ifnet *ifp)
6256 {
6257 lck_mtx_lock_spin(&ifp->if_ref_lock);
6258 VERIFY(ifp->if_threads_pending > 0);
6259 ifp->if_threads_pending--;
6260 if (ifp->if_threads_pending == 0) {
6261 wakeup(&ifp->if_threads_pending);
6262 }
6263 lck_mtx_unlock(&ifp->if_ref_lock);
6264 }
6265
6266 /*
6267 * Caller must ensure the interface is attached; the assumption is that
6268 * there is at least an outstanding IO reference count held already.
6269 * Most callers would call ifnet_is_{attached,data_ready}() instead.
6270 */
6271 void
6272 ifnet_incr_iorefcnt(struct ifnet *ifp)
6273 {
6274 lck_mtx_lock_spin(&ifp->if_ref_lock);
6275 VERIFY(IF_FULLY_ATTACHED(ifp));
6276 VERIFY(ifp->if_refio > 0);
6277 ifp->if_refio++;
6278 lck_mtx_unlock(&ifp->if_ref_lock);
6279 }
6280
6281 __attribute__((always_inline))
6282 static void
6283 ifnet_decr_iorefcnt_locked(struct ifnet *ifp)
6284 {
6285 LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_MTX_ASSERT_OWNED);
6286
6287 VERIFY(ifp->if_refio > 0);
6288 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
6289
6290 ifp->if_refio--;
6291 VERIFY(ifp->if_refio != 0 || ifp->if_datamov == 0);
6292
6293 /*
6294 * if there are no more outstanding io references, wakeup the
6295 * ifnet_detach thread if detaching flag is set.
6296 */
6297 if (ifp->if_refio == 0 && (ifp->if_refflags & IFRF_DETACHING)) {
6298 wakeup(&(ifp->if_refio));
6299 }
6300 }
6301
6302 void
6303 ifnet_decr_iorefcnt(struct ifnet *ifp)
6304 {
6305 lck_mtx_lock_spin(&ifp->if_ref_lock);
6306 ifnet_decr_iorefcnt_locked(ifp);
6307 lck_mtx_unlock(&ifp->if_ref_lock);
6308 }
6309
6310 boolean_t
6311 ifnet_datamov_begin(struct ifnet *ifp)
6312 {
6313 boolean_t ret;
6314
6315 lck_mtx_lock_spin(&ifp->if_ref_lock);
6316 if ((ret = IF_FULLY_ATTACHED_AND_READY(ifp))) {
6317 ifp->if_refio++;
6318 ifp->if_datamov++;
6319 }
6320 lck_mtx_unlock(&ifp->if_ref_lock);
6321
6322 return ret;
6323 }
6324
6325 void
6326 ifnet_datamov_end(struct ifnet *ifp)
6327 {
6328 lck_mtx_lock_spin(&ifp->if_ref_lock);
6329 VERIFY(ifp->if_datamov > 0);
6330 /*
6331 * if there's no more thread moving data, wakeup any
6332 * drainers that's blocked waiting for this.
6333 */
6334 if (--ifp->if_datamov == 0 && ifp->if_drainers > 0) {
6335 wakeup(&(ifp->if_datamov));
6336 }
6337 ifnet_decr_iorefcnt_locked(ifp);
6338 lck_mtx_unlock(&ifp->if_ref_lock);
6339 }
6340
6341 void
6342 ifnet_datamov_suspend(struct ifnet *ifp)
6343 {
6344 lck_mtx_lock_spin(&ifp->if_ref_lock);
6345 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
6346 ifp->if_refio++;
6347 if (ifp->if_suspend++ == 0) {
6348 VERIFY(ifp->if_refflags & IFRF_READY);
6349 ifp->if_refflags &= ~IFRF_READY;
6350 }
6351 lck_mtx_unlock(&ifp->if_ref_lock);
6352 }
6353
6354 void
6355 ifnet_datamov_drain(struct ifnet *ifp)
6356 {
6357 lck_mtx_lock(&ifp->if_ref_lock);
6358 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
6359 /* data movement must already be suspended */
6360 VERIFY(ifp->if_suspend > 0);
6361 VERIFY(!(ifp->if_refflags & IFRF_READY));
6362 ifp->if_drainers++;
6363 while (ifp->if_datamov != 0) {
6364 (void) msleep(&(ifp->if_datamov), &ifp->if_ref_lock,
6365 (PZERO - 1), __func__, NULL);
6366 }
6367 VERIFY(!(ifp->if_refflags & IFRF_READY));
6368 VERIFY(ifp->if_drainers > 0);
6369 ifp->if_drainers--;
6370 lck_mtx_unlock(&ifp->if_ref_lock);
6371
6372 /* purge the interface queues */
6373 if ((ifp->if_eflags & IFEF_TXSTART) != 0) {
6374 if_qflush(ifp, 0);
6375 }
6376 }
6377
6378 void
6379 ifnet_datamov_resume(struct ifnet *ifp)
6380 {
6381 lck_mtx_lock(&ifp->if_ref_lock);
6382 /* data movement must already be suspended */
6383 VERIFY(ifp->if_suspend > 0);
6384 if (--ifp->if_suspend == 0) {
6385 VERIFY(!(ifp->if_refflags & IFRF_READY));
6386 ifp->if_refflags |= IFRF_READY;
6387 }
6388 ifnet_decr_iorefcnt_locked(ifp);
6389 lck_mtx_unlock(&ifp->if_ref_lock);
6390 }
6391
6392 static void
6393 dlil_if_trace(struct dlil_ifnet *dl_if, int refhold)
6394 {
6395 struct dlil_ifnet_dbg *dl_if_dbg = (struct dlil_ifnet_dbg *)dl_if;
6396 ctrace_t *tr;
6397 u_int32_t idx;
6398 u_int16_t *cnt;
6399
6400 if (!(dl_if->dl_if_flags & DLIF_DEBUG)) {
6401 panic("%s: dl_if %p has no debug structure", __func__, dl_if);
6402 /* NOTREACHED */
6403 }
6404
6405 if (refhold) {
6406 cnt = &dl_if_dbg->dldbg_if_refhold_cnt;
6407 tr = dl_if_dbg->dldbg_if_refhold;
6408 } else {
6409 cnt = &dl_if_dbg->dldbg_if_refrele_cnt;
6410 tr = dl_if_dbg->dldbg_if_refrele;
6411 }
6412
6413 idx = atomic_add_16_ov(cnt, 1) % IF_REF_TRACE_HIST_SIZE;
6414 ctrace_record(&tr[idx]);
6415 }
6416
6417 errno_t
6418 dlil_if_ref(struct ifnet *ifp)
6419 {
6420 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
6421
6422 if (dl_if == NULL) {
6423 return EINVAL;
6424 }
6425
6426 lck_mtx_lock_spin(&dl_if->dl_if_lock);
6427 ++dl_if->dl_if_refcnt;
6428 if (dl_if->dl_if_refcnt == 0) {
6429 panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
6430 /* NOTREACHED */
6431 }
6432 if (dl_if->dl_if_trace != NULL) {
6433 (*dl_if->dl_if_trace)(dl_if, TRUE);
6434 }
6435 lck_mtx_unlock(&dl_if->dl_if_lock);
6436
6437 return 0;
6438 }
6439
6440 errno_t
6441 dlil_if_free(struct ifnet *ifp)
6442 {
6443 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
6444 bool need_release = FALSE;
6445
6446 if (dl_if == NULL) {
6447 return EINVAL;
6448 }
6449
6450 lck_mtx_lock_spin(&dl_if->dl_if_lock);
6451 switch (dl_if->dl_if_refcnt) {
6452 case 0:
6453 panic("%s: negative refcnt for ifp=%p", __func__, ifp);
6454 /* NOTREACHED */
6455 break;
6456 case 1:
6457 if ((ifp->if_refflags & IFRF_EMBRYONIC) != 0) {
6458 need_release = TRUE;
6459 }
6460 break;
6461 default:
6462 break;
6463 }
6464 --dl_if->dl_if_refcnt;
6465 if (dl_if->dl_if_trace != NULL) {
6466 (*dl_if->dl_if_trace)(dl_if, FALSE);
6467 }
6468 lck_mtx_unlock(&dl_if->dl_if_lock);
6469 if (need_release) {
6470 dlil_if_release(ifp);
6471 }
6472 return 0;
6473 }
6474
6475 static errno_t
6476 dlil_attach_protocol_internal(struct if_proto *proto,
6477 const struct ifnet_demux_desc *demux_list, u_int32_t demux_count,
6478 uint32_t * proto_count)
6479 {
6480 struct kev_dl_proto_data ev_pr_data;
6481 struct ifnet *ifp = proto->ifp;
6482 int retval = 0;
6483 u_int32_t hash_value = proto_hash_value(proto->protocol_family);
6484 struct if_proto *prev_proto;
6485 struct if_proto *_proto;
6486
6487 /* callee holds a proto refcnt upon success */
6488 ifnet_lock_exclusive(ifp);
6489 _proto = find_attached_proto(ifp, proto->protocol_family);
6490 if (_proto != NULL) {
6491 ifnet_lock_done(ifp);
6492 if_proto_free(_proto);
6493 return EEXIST;
6494 }
6495
6496 /*
6497 * Call family module add_proto routine so it can refine the
6498 * demux descriptors as it wishes.
6499 */
6500 retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
6501 demux_count);
6502 if (retval) {
6503 ifnet_lock_done(ifp);
6504 return retval;
6505 }
6506
6507 /*
6508 * Insert the protocol in the hash
6509 */
6510 prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
6511 while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL) {
6512 prev_proto = SLIST_NEXT(prev_proto, next_hash);
6513 }
6514 if (prev_proto) {
6515 SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
6516 } else {
6517 SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
6518 proto, next_hash);
6519 }
6520
6521 /* hold a proto refcnt for attach */
6522 if_proto_ref(proto);
6523
6524 /*
6525 * The reserved field carries the number of protocol still attached
6526 * (subject to change)
6527 */
6528 ev_pr_data.proto_family = proto->protocol_family;
6529 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
6530
6531 ifnet_lock_done(ifp);
6532
6533 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
6534 (struct net_event_data *)&ev_pr_data,
6535 sizeof(struct kev_dl_proto_data));
6536 if (proto_count != NULL) {
6537 *proto_count = ev_pr_data.proto_remaining_count;
6538 }
6539 return retval;
6540 }
6541
6542 errno_t
6543 ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
6544 const struct ifnet_attach_proto_param *proto_details)
6545 {
6546 int retval = 0;
6547 struct if_proto *ifproto = NULL;
6548 uint32_t proto_count = 0;
6549
6550 ifnet_head_lock_shared();
6551 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
6552 retval = EINVAL;
6553 goto end;
6554 }
6555 /* Check that the interface is in the global list */
6556 if (!ifnet_lookup(ifp)) {
6557 retval = ENXIO;
6558 goto end;
6559 }
6560
6561 ifproto = zalloc(dlif_proto_zone);
6562 if (ifproto == NULL) {
6563 retval = ENOMEM;
6564 goto end;
6565 }
6566 bzero(ifproto, dlif_proto_size);
6567
6568 /* refcnt held above during lookup */
6569 ifproto->ifp = ifp;
6570 ifproto->protocol_family = protocol;
6571 ifproto->proto_kpi = kProtoKPI_v1;
6572 ifproto->kpi.v1.input = proto_details->input;
6573 ifproto->kpi.v1.pre_output = proto_details->pre_output;
6574 ifproto->kpi.v1.event = proto_details->event;
6575 ifproto->kpi.v1.ioctl = proto_details->ioctl;
6576 ifproto->kpi.v1.detached = proto_details->detached;
6577 ifproto->kpi.v1.resolve_multi = proto_details->resolve;
6578 ifproto->kpi.v1.send_arp = proto_details->send_arp;
6579
6580 retval = dlil_attach_protocol_internal(ifproto,
6581 proto_details->demux_list, proto_details->demux_count,
6582 &proto_count);
6583
6584 end:
6585 if (retval != 0 && retval != EEXIST) {
6586 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
6587 ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
6588 } else {
6589 if (dlil_verbose) {
6590 DLIL_PRINTF("%s: attached v1 protocol %d (count = %d)\n",
6591 ifp != NULL ? if_name(ifp) : "N/A",
6592 protocol, proto_count);
6593 }
6594 }
6595 ifnet_head_done();
6596 if (retval == 0) {
6597 /*
6598 * A protocol has been attached, mark the interface up.
6599 * This used to be done by configd.KernelEventMonitor, but that
6600 * is inherently prone to races (rdar://problem/30810208).
6601 */
6602 (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
6603 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
6604 dlil_post_sifflags_msg(ifp);
6605 } else if (ifproto != NULL) {
6606 zfree(dlif_proto_zone, ifproto);
6607 }
6608 return retval;
6609 }
6610
6611 errno_t
6612 ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
6613 const struct ifnet_attach_proto_param_v2 *proto_details)
6614 {
6615 int retval = 0;
6616 struct if_proto *ifproto = NULL;
6617 uint32_t proto_count = 0;
6618
6619 ifnet_head_lock_shared();
6620 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
6621 retval = EINVAL;
6622 goto end;
6623 }
6624 /* Check that the interface is in the global list */
6625 if (!ifnet_lookup(ifp)) {
6626 retval = ENXIO;
6627 goto end;
6628 }
6629
6630 ifproto = zalloc(dlif_proto_zone);
6631 if (ifproto == NULL) {
6632 retval = ENOMEM;
6633 goto end;
6634 }
6635 bzero(ifproto, sizeof(*ifproto));
6636
6637 /* refcnt held above during lookup */
6638 ifproto->ifp = ifp;
6639 ifproto->protocol_family = protocol;
6640 ifproto->proto_kpi = kProtoKPI_v2;
6641 ifproto->kpi.v2.input = proto_details->input;
6642 ifproto->kpi.v2.pre_output = proto_details->pre_output;
6643 ifproto->kpi.v2.event = proto_details->event;
6644 ifproto->kpi.v2.ioctl = proto_details->ioctl;
6645 ifproto->kpi.v2.detached = proto_details->detached;
6646 ifproto->kpi.v2.resolve_multi = proto_details->resolve;
6647 ifproto->kpi.v2.send_arp = proto_details->send_arp;
6648
6649 retval = dlil_attach_protocol_internal(ifproto,
6650 proto_details->demux_list, proto_details->demux_count,
6651 &proto_count);
6652
6653 end:
6654 if (retval != 0 && retval != EEXIST) {
6655 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
6656 ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
6657 } else {
6658 if (dlil_verbose) {
6659 DLIL_PRINTF("%s: attached v2 protocol %d (count = %d)\n",
6660 ifp != NULL ? if_name(ifp) : "N/A",
6661 protocol, proto_count);
6662 }
6663 }
6664 ifnet_head_done();
6665 if (retval == 0) {
6666 /*
6667 * A protocol has been attached, mark the interface up.
6668 * This used to be done by configd.KernelEventMonitor, but that
6669 * is inherently prone to races (rdar://problem/30810208).
6670 */
6671 (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
6672 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
6673 dlil_post_sifflags_msg(ifp);
6674 } else if (ifproto != NULL) {
6675 zfree(dlif_proto_zone, ifproto);
6676 }
6677 return retval;
6678 }
6679
6680 errno_t
6681 ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
6682 {
6683 struct if_proto *proto = NULL;
6684 int retval = 0;
6685
6686 if (ifp == NULL || proto_family == 0) {
6687 retval = EINVAL;
6688 goto end;
6689 }
6690
6691 ifnet_lock_exclusive(ifp);
6692 /* callee holds a proto refcnt upon success */
6693 proto = find_attached_proto(ifp, proto_family);
6694 if (proto == NULL) {
6695 retval = ENXIO;
6696 ifnet_lock_done(ifp);
6697 goto end;
6698 }
6699
6700 /* call family module del_proto */
6701 if (ifp->if_del_proto) {
6702 ifp->if_del_proto(ifp, proto->protocol_family);
6703 }
6704
6705 SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
6706 proto, if_proto, next_hash);
6707
6708 if (proto->proto_kpi == kProtoKPI_v1) {
6709 proto->kpi.v1.input = ifproto_media_input_v1;
6710 proto->kpi.v1.pre_output = ifproto_media_preout;
6711 proto->kpi.v1.event = ifproto_media_event;
6712 proto->kpi.v1.ioctl = ifproto_media_ioctl;
6713 proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
6714 proto->kpi.v1.send_arp = ifproto_media_send_arp;
6715 } else {
6716 proto->kpi.v2.input = ifproto_media_input_v2;
6717 proto->kpi.v2.pre_output = ifproto_media_preout;
6718 proto->kpi.v2.event = ifproto_media_event;
6719 proto->kpi.v2.ioctl = ifproto_media_ioctl;
6720 proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
6721 proto->kpi.v2.send_arp = ifproto_media_send_arp;
6722 }
6723 proto->detached = 1;
6724 ifnet_lock_done(ifp);
6725
6726 if (dlil_verbose) {
6727 DLIL_PRINTF("%s: detached %s protocol %d\n", if_name(ifp),
6728 (proto->proto_kpi == kProtoKPI_v1) ?
6729 "v1" : "v2", proto_family);
6730 }
6731
6732 /* release proto refcnt held during protocol attach */
6733 if_proto_free(proto);
6734
6735 /*
6736 * Release proto refcnt held during lookup; the rest of
6737 * protocol detach steps will happen when the last proto
6738 * reference is released.
6739 */
6740 if_proto_free(proto);
6741
6742 end:
6743 return retval;
6744 }
6745
6746
6747 static errno_t
6748 ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
6749 struct mbuf *packet, char *header)
6750 {
6751 #pragma unused(ifp, protocol, packet, header)
6752 return ENXIO;
6753 }
6754
6755 static errno_t
6756 ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
6757 struct mbuf *packet)
6758 {
6759 #pragma unused(ifp, protocol, packet)
6760 return ENXIO;
6761 }
6762
6763 static errno_t
6764 ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
6765 mbuf_t *packet, const struct sockaddr *dest, void *route, char *frame_type,
6766 char *link_layer_dest)
6767 {
6768 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
6769 return ENXIO;
6770 }
6771
6772 static void
6773 ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
6774 const struct kev_msg *event)
6775 {
6776 #pragma unused(ifp, protocol, event)
6777 }
6778
6779 static errno_t
6780 ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
6781 unsigned long command, void *argument)
6782 {
6783 #pragma unused(ifp, protocol, command, argument)
6784 return ENXIO;
6785 }
6786
6787 static errno_t
6788 ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
6789 struct sockaddr_dl *out_ll, size_t ll_len)
6790 {
6791 #pragma unused(ifp, proto_addr, out_ll, ll_len)
6792 return ENXIO;
6793 }
6794
6795 static errno_t
6796 ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
6797 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
6798 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
6799 {
6800 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
6801 return ENXIO;
6802 }
6803
6804 extern int if_next_index(void);
6805 extern int tcp_ecn_outbound;
6806
6807 errno_t
6808 ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
6809 {
6810 struct ifnet *tmp_if;
6811 struct ifaddr *ifa;
6812 struct if_data_internal if_data_saved;
6813 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
6814 struct dlil_threading_info *dl_inp;
6815 u_int32_t sflags = 0;
6816 int err;
6817
6818 if (ifp == NULL) {
6819 return EINVAL;
6820 }
6821
6822 /*
6823 * Serialize ifnet attach using dlil_ifnet_lock, in order to
6824 * prevent the interface from being configured while it is
6825 * embryonic, as ifnet_head_lock is dropped and reacquired
6826 * below prior to marking the ifnet with IFRF_ATTACHED.
6827 */
6828 dlil_if_lock();
6829 ifnet_head_lock_exclusive();
6830 /* Verify we aren't already on the list */
6831 TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
6832 if (tmp_if == ifp) {
6833 ifnet_head_done();
6834 dlil_if_unlock();
6835 return EEXIST;
6836 }
6837 }
6838
6839 lck_mtx_lock_spin(&ifp->if_ref_lock);
6840 if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
6841 panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
6842 __func__, ifp);
6843 /* NOTREACHED */
6844 }
6845 lck_mtx_unlock(&ifp->if_ref_lock);
6846
6847 ifnet_lock_exclusive(ifp);
6848
6849 /* Sanity check */
6850 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
6851 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
6852 VERIFY(ifp->if_threads_pending == 0);
6853
6854 if (ll_addr != NULL) {
6855 if (ifp->if_addrlen == 0) {
6856 ifp->if_addrlen = ll_addr->sdl_alen;
6857 } else if (ll_addr->sdl_alen != ifp->if_addrlen) {
6858 ifnet_lock_done(ifp);
6859 ifnet_head_done();
6860 dlil_if_unlock();
6861 return EINVAL;
6862 }
6863 }
6864
6865 /*
6866 * Allow interfaces without protocol families to attach
6867 * only if they have the necessary fields filled out.
6868 */
6869 if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
6870 DLIL_PRINTF("%s: Attempt to attach interface without "
6871 "family module - %d\n", __func__, ifp->if_family);
6872 ifnet_lock_done(ifp);
6873 ifnet_head_done();
6874 dlil_if_unlock();
6875 return ENODEV;
6876 }
6877
6878 /* Allocate protocol hash table */
6879 VERIFY(ifp->if_proto_hash == NULL);
6880 ifp->if_proto_hash = zalloc(dlif_phash_zone);
6881 if (ifp->if_proto_hash == NULL) {
6882 ifnet_lock_done(ifp);
6883 ifnet_head_done();
6884 dlil_if_unlock();
6885 return ENOBUFS;
6886 }
6887 bzero(ifp->if_proto_hash, dlif_phash_size);
6888
6889 lck_mtx_lock_spin(&ifp->if_flt_lock);
6890 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
6891 TAILQ_INIT(&ifp->if_flt_head);
6892 VERIFY(ifp->if_flt_busy == 0);
6893 VERIFY(ifp->if_flt_waiters == 0);
6894 lck_mtx_unlock(&ifp->if_flt_lock);
6895
6896 if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
6897 VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
6898 LIST_INIT(&ifp->if_multiaddrs);
6899 }
6900
6901 VERIFY(ifp->if_allhostsinm == NULL);
6902 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
6903 TAILQ_INIT(&ifp->if_addrhead);
6904
6905 if (ifp->if_index == 0) {
6906 int idx = if_next_index();
6907
6908 if (idx == -1) {
6909 ifp->if_index = 0;
6910 ifnet_lock_done(ifp);
6911 ifnet_head_done();
6912 dlil_if_unlock();
6913 return ENOBUFS;
6914 }
6915 ifp->if_index = idx;
6916 }
6917 /* There should not be anything occupying this slot */
6918 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
6919
6920 /* allocate (if needed) and initialize a link address */
6921 ifa = dlil_alloc_lladdr(ifp, ll_addr);
6922 if (ifa == NULL) {
6923 ifnet_lock_done(ifp);
6924 ifnet_head_done();
6925 dlil_if_unlock();
6926 return ENOBUFS;
6927 }
6928
6929 VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
6930 ifnet_addrs[ifp->if_index - 1] = ifa;
6931
6932 /* make this address the first on the list */
6933 IFA_LOCK(ifa);
6934 /* hold a reference for ifnet_addrs[] */
6935 IFA_ADDREF_LOCKED(ifa);
6936 /* if_attach_link_ifa() holds a reference for ifa_link */
6937 if_attach_link_ifa(ifp, ifa);
6938 IFA_UNLOCK(ifa);
6939
6940 #if CONFIG_MACF_NET
6941 mac_ifnet_label_associate(ifp);
6942 #endif
6943
6944 TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
6945 ifindex2ifnet[ifp->if_index] = ifp;
6946
6947 /* Hold a reference to the underlying dlil_ifnet */
6948 ifnet_reference(ifp);
6949
6950 /* Clear stats (save and restore other fields that we care) */
6951 if_data_saved = ifp->if_data;
6952 bzero(&ifp->if_data, sizeof(ifp->if_data));
6953 ifp->if_data.ifi_type = if_data_saved.ifi_type;
6954 ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
6955 ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
6956 ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
6957 ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
6958 ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
6959 ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
6960 ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
6961 ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
6962 ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
6963 ifnet_touch_lastchange(ifp);
6964
6965 VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
6966 ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED ||
6967 ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL);
6968
6969 /* By default, use SFB and enable flow advisory */
6970 sflags = PKTSCHEDF_QALG_SFB;
6971 if (if_flowadv) {
6972 sflags |= PKTSCHEDF_QALG_FLOWCTL;
6973 }
6974
6975 if (if_delaybased_queue) {
6976 sflags |= PKTSCHEDF_QALG_DELAYBASED;
6977 }
6978
6979 if (ifp->if_output_sched_model ==
6980 IFNET_SCHED_MODEL_DRIVER_MANAGED) {
6981 sflags |= PKTSCHEDF_QALG_DRIVER_MANAGED;
6982 }
6983
6984 /* Initialize transmit queue(s) */
6985 err = ifclassq_setup(ifp, sflags, (dl_if->dl_if_flags & DLIF_REUSE));
6986 if (err != 0) {
6987 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
6988 "err=%d", __func__, ifp, err);
6989 /* NOTREACHED */
6990 }
6991
6992 /* Sanity checks on the input thread storage */
6993 dl_inp = &dl_if->dl_if_inpstorage;
6994 bzero(&dl_inp->stats, sizeof(dl_inp->stats));
6995 VERIFY(dl_inp->input_waiting == 0);
6996 VERIFY(dl_inp->wtot == 0);
6997 VERIFY(dl_inp->ifp == NULL);
6998 VERIFY(qhead(&dl_inp->rcvq_pkts) == NULL && qempty(&dl_inp->rcvq_pkts));
6999 VERIFY(qlimit(&dl_inp->rcvq_pkts) == 0);
7000 VERIFY(!dl_inp->net_affinity);
7001 VERIFY(ifp->if_inp == NULL);
7002 VERIFY(dl_inp->input_thr == THREAD_NULL);
7003 VERIFY(dl_inp->wloop_thr == THREAD_NULL);
7004 VERIFY(dl_inp->poll_thr == THREAD_NULL);
7005 VERIFY(dl_inp->tag == 0);
7006
7007 #if IFNET_INPUT_SANITY_CHK
7008 VERIFY(dl_inp->input_mbuf_cnt == 0);
7009 #endif /* IFNET_INPUT_SANITY_CHK */
7010
7011 VERIFY(ifp->if_poll_thread == THREAD_NULL);
7012 dlil_reset_rxpoll_params(ifp);
7013 /*
7014 * A specific DLIL input thread is created per non-loopback interface.
7015 */
7016 if (ifp->if_family != IFNET_FAMILY_LOOPBACK) {
7017 ifp->if_inp = dl_inp;
7018 ifnet_incr_pending_thread_count(ifp);
7019 err = dlil_create_input_thread(ifp, ifp->if_inp);
7020 if (err != 0) {
7021 panic_plain("%s: ifp=%p couldn't get an input thread; "
7022 "err=%d", __func__, ifp, err);
7023 /* NOTREACHED */
7024 }
7025 }
7026 /*
7027 * If the driver supports the new transmit model, calculate flow hash
7028 * and create a workloop starter thread to invoke the if_start callback
7029 * where the packets may be dequeued and transmitted.
7030 */
7031 if (ifp->if_eflags & IFEF_TXSTART) {
7032 ifp->if_flowhash = ifnet_calc_flowhash(ifp);
7033 VERIFY(ifp->if_flowhash != 0);
7034 VERIFY(ifp->if_start_thread == THREAD_NULL);
7035
7036 ifnet_set_start_cycle(ifp, NULL);
7037 ifp->if_start_active = 0;
7038 ifp->if_start_req = 0;
7039 ifp->if_start_flags = 0;
7040 VERIFY(ifp->if_start != NULL);
7041 ifnet_incr_pending_thread_count(ifp);
7042 if ((err = kernel_thread_start(ifnet_start_thread_func,
7043 ifp, &ifp->if_start_thread)) != KERN_SUCCESS) {
7044 panic_plain("%s: "
7045 "ifp=%p couldn't get a start thread; "
7046 "err=%d", __func__, ifp, err);
7047 /* NOTREACHED */
7048 }
7049 ml_thread_policy(ifp->if_start_thread, MACHINE_GROUP,
7050 (MACHINE_NETWORK_GROUP | MACHINE_NETWORK_WORKLOOP));
7051 } else {
7052 ifp->if_flowhash = 0;
7053 }
7054
7055 /* Reset polling parameters */
7056 ifnet_set_poll_cycle(ifp, NULL);
7057 ifp->if_poll_update = 0;
7058 ifp->if_poll_flags = 0;
7059 ifp->if_poll_req = 0;
7060 VERIFY(ifp->if_poll_thread == THREAD_NULL);
7061
7062 /*
7063 * If the driver supports the new receive model, create a poller
7064 * thread to invoke if_input_poll callback where the packets may
7065 * be dequeued from the driver and processed for reception.
7066 * if the interface is netif compat then the poller thread is managed by netif.
7067 */
7068 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL) &&
7069 (ifp->if_xflags & IFXF_LEGACY)) {
7070 VERIFY(ifp->if_input_poll != NULL);
7071 VERIFY(ifp->if_input_ctl != NULL);
7072 ifnet_incr_pending_thread_count(ifp);
7073 if ((err = kernel_thread_start(ifnet_poll_thread_func, ifp,
7074 &ifp->if_poll_thread)) != KERN_SUCCESS) {
7075 panic_plain("%s: ifp=%p couldn't get a poll thread; "
7076 "err=%d", __func__, ifp, err);
7077 /* NOTREACHED */
7078 }
7079 ml_thread_policy(ifp->if_poll_thread, MACHINE_GROUP,
7080 (MACHINE_NETWORK_GROUP | MACHINE_NETWORK_WORKLOOP));
7081 }
7082
7083 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
7084 VERIFY(ifp->if_desc.ifd_len == 0);
7085 VERIFY(ifp->if_desc.ifd_desc != NULL);
7086
7087 /* Record attach PC stacktrace */
7088 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
7089
7090 ifp->if_updatemcasts = 0;
7091 if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
7092 struct ifmultiaddr *ifma;
7093 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
7094 IFMA_LOCK(ifma);
7095 if (ifma->ifma_addr->sa_family == AF_LINK ||
7096 ifma->ifma_addr->sa_family == AF_UNSPEC) {
7097 ifp->if_updatemcasts++;
7098 }
7099 IFMA_UNLOCK(ifma);
7100 }
7101
7102 DLIL_PRINTF("%s: attached with %d suspended link-layer multicast "
7103 "membership(s)\n", if_name(ifp),
7104 ifp->if_updatemcasts);
7105 }
7106
7107 /* Clear logging parameters */
7108 bzero(&ifp->if_log, sizeof(ifp->if_log));
7109
7110 /* Clear foreground/realtime activity timestamps */
7111 ifp->if_fg_sendts = 0;
7112 ifp->if_rt_sendts = 0;
7113
7114 VERIFY(ifp->if_delegated.ifp == NULL);
7115 VERIFY(ifp->if_delegated.type == 0);
7116 VERIFY(ifp->if_delegated.family == 0);
7117 VERIFY(ifp->if_delegated.subfamily == 0);
7118 VERIFY(ifp->if_delegated.expensive == 0);
7119 VERIFY(ifp->if_delegated.constrained == 0);
7120
7121 VERIFY(ifp->if_agentids == NULL);
7122 VERIFY(ifp->if_agentcount == 0);
7123
7124 /* Reset interface state */
7125 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
7126 ifp->if_interface_state.valid_bitmask |=
7127 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
7128 ifp->if_interface_state.interface_availability =
7129 IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
7130
7131 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
7132 if (ifp == lo_ifp) {
7133 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
7134 ifp->if_interface_state.valid_bitmask |=
7135 IF_INTERFACE_STATE_LQM_STATE_VALID;
7136 } else {
7137 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
7138 }
7139
7140 /*
7141 * Enable ECN capability on this interface depending on the
7142 * value of ECN global setting
7143 */
7144 if (tcp_ecn_outbound == 2 && !IFNET_IS_CELLULAR(ifp)) {
7145 ifp->if_eflags |= IFEF_ECN_ENABLE;
7146 ifp->if_eflags &= ~IFEF_ECN_DISABLE;
7147 }
7148
7149 /*
7150 * Built-in Cyclops always on policy for WiFi infra
7151 */
7152 if (IFNET_IS_WIFI_INFRA(ifp) && net_qos_policy_wifi_enabled != 0) {
7153 errno_t error;
7154
7155 error = if_set_qosmarking_mode(ifp,
7156 IFRTYPE_QOSMARKING_FASTLANE);
7157 if (error != 0) {
7158 DLIL_PRINTF("%s if_set_qosmarking_mode(%s) error %d\n",
7159 __func__, ifp->if_xname, error);
7160 } else {
7161 ifp->if_eflags |= IFEF_QOSMARKING_ENABLED;
7162 #if (DEVELOPMENT || DEBUG)
7163 DLIL_PRINTF("%s fastlane enabled on %s\n",
7164 __func__, ifp->if_xname);
7165 #endif /* (DEVELOPMENT || DEBUG) */
7166 }
7167 }
7168
7169 ifnet_lock_done(ifp);
7170 ifnet_head_done();
7171
7172
7173 lck_mtx_lock(&ifp->if_cached_route_lock);
7174 /* Enable forwarding cached route */
7175 ifp->if_fwd_cacheok = 1;
7176 /* Clean up any existing cached routes */
7177 ROUTE_RELEASE(&ifp->if_fwd_route);
7178 bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
7179 ROUTE_RELEASE(&ifp->if_src_route);
7180 bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
7181 ROUTE_RELEASE(&ifp->if_src_route6);
7182 bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
7183 lck_mtx_unlock(&ifp->if_cached_route_lock);
7184
7185 ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
7186
7187 /*
7188 * Allocate and attach IGMPv3/MLDv2 interface specific variables
7189 * and trees; do this before the ifnet is marked as attached.
7190 * The ifnet keeps the reference to the info structures even after
7191 * the ifnet is detached, since the network-layer records still
7192 * refer to the info structures even after that. This also
7193 * makes it possible for them to still function after the ifnet
7194 * is recycled or reattached.
7195 */
7196 #if INET
7197 if (IGMP_IFINFO(ifp) == NULL) {
7198 IGMP_IFINFO(ifp) = igmp_domifattach(ifp, M_WAITOK);
7199 VERIFY(IGMP_IFINFO(ifp) != NULL);
7200 } else {
7201 VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
7202 igmp_domifreattach(IGMP_IFINFO(ifp));
7203 }
7204 #endif /* INET */
7205 #if INET6
7206 if (MLD_IFINFO(ifp) == NULL) {
7207 MLD_IFINFO(ifp) = mld_domifattach(ifp, M_WAITOK);
7208 VERIFY(MLD_IFINFO(ifp) != NULL);
7209 } else {
7210 VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
7211 mld_domifreattach(MLD_IFINFO(ifp));
7212 }
7213 #endif /* INET6 */
7214
7215 VERIFY(ifp->if_data_threshold == 0);
7216 VERIFY(ifp->if_dt_tcall != NULL);
7217
7218 /*
7219 * Wait for the created kernel threads for I/O to get
7220 * scheduled and run at least once before we proceed
7221 * to mark interface as attached.
7222 */
7223 lck_mtx_lock(&ifp->if_ref_lock);
7224 while (ifp->if_threads_pending != 0) {
7225 DLIL_PRINTF("%s: Waiting for all kernel threads created for "
7226 "interface %s to get scheduled at least once.\n",
7227 __func__, ifp->if_xname);
7228 (void) msleep(&ifp->if_threads_pending, &ifp->if_ref_lock, (PZERO - 1),
7229 __func__, NULL);
7230 LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_ASSERT_OWNED);
7231 }
7232 lck_mtx_unlock(&ifp->if_ref_lock);
7233 DLIL_PRINTF("%s: All kernel threads created for interface %s have been scheduled "
7234 "at least once. Proceeding.\n", __func__, ifp->if_xname);
7235
7236 /* Final mark this ifnet as attached. */
7237 lck_mtx_lock(rnh_lock);
7238 ifnet_lock_exclusive(ifp);
7239 lck_mtx_lock_spin(&ifp->if_ref_lock);
7240 ifp->if_refflags = (IFRF_ATTACHED | IFRF_READY); /* clears embryonic */
7241 lck_mtx_unlock(&ifp->if_ref_lock);
7242 if (net_rtref) {
7243 /* boot-args override; enable idle notification */
7244 (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
7245 IFRF_IDLE_NOTIFY);
7246 } else {
7247 /* apply previous request(s) to set the idle flags, if any */
7248 (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
7249 ifp->if_idle_new_flags_mask);
7250 }
7251 ifnet_lock_done(ifp);
7252 lck_mtx_unlock(rnh_lock);
7253 dlil_if_unlock();
7254
7255 #if PF
7256 /*
7257 * Attach packet filter to this interface, if enabled.
7258 */
7259 pf_ifnet_hook(ifp, 1);
7260 #endif /* PF */
7261
7262 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0);
7263
7264 if (dlil_verbose) {
7265 DLIL_PRINTF("%s: attached%s\n", if_name(ifp),
7266 (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
7267 }
7268
7269 return 0;
7270 }
7271
7272 /*
7273 * Prepare the storage for the first/permanent link address, which must
7274 * must have the same lifetime as the ifnet itself. Although the link
7275 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
7276 * its location in memory must never change as it may still be referred
7277 * to by some parts of the system afterwards (unfortunate implementation
7278 * artifacts inherited from BSD.)
7279 *
7280 * Caller must hold ifnet lock as writer.
7281 */
7282 static struct ifaddr *
7283 dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
7284 {
7285 struct ifaddr *ifa, *oifa;
7286 struct sockaddr_dl *asdl, *msdl;
7287 char workbuf[IFNAMSIZ * 2];
7288 int namelen, masklen, socksize;
7289 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
7290
7291 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
7292 VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);
7293
7294 namelen = scnprintf(workbuf, sizeof(workbuf), "%s",
7295 if_name(ifp));
7296 masklen = offsetof(struct sockaddr_dl, sdl_data[0])
7297 + ((namelen > 0) ? namelen : 0);
7298 socksize = masklen + ifp->if_addrlen;
7299 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
7300 if ((u_int32_t)socksize < sizeof(struct sockaddr_dl)) {
7301 socksize = sizeof(struct sockaddr_dl);
7302 }
7303 socksize = ROUNDUP(socksize);
7304 #undef ROUNDUP
7305
7306 ifa = ifp->if_lladdr;
7307 if (socksize > DLIL_SDLMAXLEN ||
7308 (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) {
7309 /*
7310 * Rare, but in the event that the link address requires
7311 * more storage space than DLIL_SDLMAXLEN, allocate the
7312 * largest possible storages for address and mask, such
7313 * that we can reuse the same space when if_addrlen grows.
7314 * This same space will be used when if_addrlen shrinks.
7315 */
7316 if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
7317 int ifasize = sizeof(*ifa) + 2 * SOCK_MAXADDRLEN;
7318 ifa = _MALLOC(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
7319 if (ifa == NULL) {
7320 return NULL;
7321 }
7322 ifa_lock_init(ifa);
7323 /* Don't set IFD_ALLOC, as this is permanent */
7324 ifa->ifa_debug = IFD_LINK;
7325 }
7326 IFA_LOCK(ifa);
7327 /* address and mask sockaddr_dl locations */
7328 asdl = (struct sockaddr_dl *)(ifa + 1);
7329 bzero(asdl, SOCK_MAXADDRLEN);
7330 msdl = (struct sockaddr_dl *)(void *)
7331 ((char *)asdl + SOCK_MAXADDRLEN);
7332 bzero(msdl, SOCK_MAXADDRLEN);
7333 } else {
7334 VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
7335 /*
7336 * Use the storage areas for address and mask within the
7337 * dlil_ifnet structure. This is the most common case.
7338 */
7339 if (ifa == NULL) {
7340 ifa = &dl_if->dl_if_lladdr.ifa;
7341 ifa_lock_init(ifa);
7342 /* Don't set IFD_ALLOC, as this is permanent */
7343 ifa->ifa_debug = IFD_LINK;
7344 }
7345 IFA_LOCK(ifa);
7346 /* address and mask sockaddr_dl locations */
7347 asdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.asdl;
7348 bzero(asdl, sizeof(dl_if->dl_if_lladdr.asdl));
7349 msdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.msdl;
7350 bzero(msdl, sizeof(dl_if->dl_if_lladdr.msdl));
7351 }
7352
7353 /* hold a permanent reference for the ifnet itself */
7354 IFA_ADDREF_LOCKED(ifa);
7355 oifa = ifp->if_lladdr;
7356 ifp->if_lladdr = ifa;
7357
7358 VERIFY(ifa->ifa_debug == IFD_LINK);
7359 ifa->ifa_ifp = ifp;
7360 ifa->ifa_rtrequest = link_rtrequest;
7361 ifa->ifa_addr = (struct sockaddr *)asdl;
7362 asdl->sdl_len = socksize;
7363 asdl->sdl_family = AF_LINK;
7364 if (namelen > 0) {
7365 bcopy(workbuf, asdl->sdl_data, min(namelen,
7366 sizeof(asdl->sdl_data)));
7367 asdl->sdl_nlen = namelen;
7368 } else {
7369 asdl->sdl_nlen = 0;
7370 }
7371 asdl->sdl_index = ifp->if_index;
7372 asdl->sdl_type = ifp->if_type;
7373 if (ll_addr != NULL) {
7374 asdl->sdl_alen = ll_addr->sdl_alen;
7375 bcopy(CONST_LLADDR(ll_addr), LLADDR(asdl), asdl->sdl_alen);
7376 } else {
7377 asdl->sdl_alen = 0;
7378 }
7379 ifa->ifa_netmask = (struct sockaddr *)msdl;
7380 msdl->sdl_len = masklen;
7381 while (namelen > 0) {
7382 msdl->sdl_data[--namelen] = 0xff;
7383 }
7384 IFA_UNLOCK(ifa);
7385
7386 if (oifa != NULL) {
7387 IFA_REMREF(oifa);
7388 }
7389
7390 return ifa;
7391 }
7392
7393 static void
7394 if_purgeaddrs(struct ifnet *ifp)
7395 {
7396 #if INET
7397 in_purgeaddrs(ifp);
7398 #endif /* INET */
7399 #if INET6
7400 in6_purgeaddrs(ifp);
7401 #endif /* INET6 */
7402 }
7403
7404 errno_t
7405 ifnet_detach(ifnet_t ifp)
7406 {
7407 struct ifnet *delegated_ifp;
7408 struct nd_ifinfo *ndi = NULL;
7409
7410 if (ifp == NULL) {
7411 return EINVAL;
7412 }
7413
7414 ndi = ND_IFINFO(ifp);
7415 if (NULL != ndi) {
7416 ndi->cga_initialized = FALSE;
7417 }
7418
7419 lck_mtx_lock(rnh_lock);
7420 ifnet_head_lock_exclusive();
7421 ifnet_lock_exclusive(ifp);
7422
7423 if (ifp->if_output_netem != NULL) {
7424 netem_destroy(ifp->if_output_netem);
7425 ifp->if_output_netem = NULL;
7426 }
7427
7428 /*
7429 * Check to see if this interface has previously triggered
7430 * aggressive protocol draining; if so, decrement the global
7431 * refcnt and clear PR_AGGDRAIN on the route domain if
7432 * there are no more of such an interface around.
7433 */
7434 (void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
7435
7436 lck_mtx_lock_spin(&ifp->if_ref_lock);
7437 if (!(ifp->if_refflags & IFRF_ATTACHED)) {
7438 lck_mtx_unlock(&ifp->if_ref_lock);
7439 ifnet_lock_done(ifp);
7440 ifnet_head_done();
7441 lck_mtx_unlock(rnh_lock);
7442 return EINVAL;
7443 } else if (ifp->if_refflags & IFRF_DETACHING) {
7444 /* Interface has already been detached */
7445 lck_mtx_unlock(&ifp->if_ref_lock);
7446 ifnet_lock_done(ifp);
7447 ifnet_head_done();
7448 lck_mtx_unlock(rnh_lock);
7449 return ENXIO;
7450 }
7451 VERIFY(!(ifp->if_refflags & IFRF_EMBRYONIC));
7452 /* Indicate this interface is being detached */
7453 ifp->if_refflags &= ~IFRF_ATTACHED;
7454 ifp->if_refflags |= IFRF_DETACHING;
7455 lck_mtx_unlock(&ifp->if_ref_lock);
7456
7457 if (dlil_verbose) {
7458 DLIL_PRINTF("%s: detaching\n", if_name(ifp));
7459 }
7460
7461 /* clean up flow control entry object if there's any */
7462 if (ifp->if_eflags & IFEF_TXSTART) {
7463 ifnet_flowadv(ifp->if_flowhash);
7464 }
7465
7466 /* Reset ECN enable/disable flags */
7467 ifp->if_eflags &= ~IFEF_ECN_DISABLE;
7468 ifp->if_eflags &= ~IFEF_ECN_ENABLE;
7469
7470 /* Reset CLAT46 flag */
7471 ifp->if_eflags &= ~IFEF_CLAT46;
7472
7473 /*
7474 * We do not reset the TCP keep alive counters in case
7475 * a TCP connection stays connection after the interface
7476 * went down
7477 */
7478 if (ifp->if_tcp_kao_cnt > 0) {
7479 os_log(OS_LOG_DEFAULT, "%s %s tcp_kao_cnt %u not zero",
7480 __func__, if_name(ifp), ifp->if_tcp_kao_cnt);
7481 }
7482 ifp->if_tcp_kao_max = 0;
7483
7484 /*
7485 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
7486 * no longer be visible during lookups from this point.
7487 */
7488 VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
7489 TAILQ_REMOVE(&ifnet_head, ifp, if_link);
7490 ifp->if_link.tqe_next = NULL;
7491 ifp->if_link.tqe_prev = NULL;
7492 if (ifp->if_ordered_link.tqe_next != NULL ||
7493 ifp->if_ordered_link.tqe_prev != NULL) {
7494 ifnet_remove_from_ordered_list(ifp);
7495 }
7496 ifindex2ifnet[ifp->if_index] = NULL;
7497
7498 /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */
7499 ifp->if_eflags &= ~(IFEF_IPV4_ROUTER | IFEF_IPV6_ROUTER);
7500
7501 /* Record detach PC stacktrace */
7502 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
7503
7504 /* Clear logging parameters */
7505 bzero(&ifp->if_log, sizeof(ifp->if_log));
7506
7507 /* Clear delegated interface info (reference released below) */
7508 delegated_ifp = ifp->if_delegated.ifp;
7509 bzero(&ifp->if_delegated, sizeof(ifp->if_delegated));
7510
7511 /* Reset interface state */
7512 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
7513
7514 ifnet_lock_done(ifp);
7515 ifnet_head_done();
7516 lck_mtx_unlock(rnh_lock);
7517
7518
7519 /* Release reference held on the delegated interface */
7520 if (delegated_ifp != NULL) {
7521 ifnet_release(delegated_ifp);
7522 }
7523
7524 /* Reset Link Quality Metric (unless loopback [lo0]) */
7525 if (ifp != lo_ifp) {
7526 if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
7527 }
7528
7529 /* Reset TCP local statistics */
7530 if (ifp->if_tcp_stat != NULL) {
7531 bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
7532 }
7533
7534 /* Reset UDP local statistics */
7535 if (ifp->if_udp_stat != NULL) {
7536 bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
7537 }
7538
7539 /* Reset ifnet IPv4 stats */
7540 if (ifp->if_ipv4_stat != NULL) {
7541 bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
7542 }
7543
7544 /* Reset ifnet IPv6 stats */
7545 if (ifp->if_ipv6_stat != NULL) {
7546 bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
7547 }
7548
7549 /* Release memory held for interface link status report */
7550 if (ifp->if_link_status != NULL) {
7551 FREE(ifp->if_link_status, M_TEMP);
7552 ifp->if_link_status = NULL;
7553 }
7554
7555 /* Clear agent IDs */
7556 if (ifp->if_agentids != NULL) {
7557 FREE(ifp->if_agentids, M_NETAGENT);
7558 ifp->if_agentids = NULL;
7559 }
7560 ifp->if_agentcount = 0;
7561
7562
7563 /* Let BPF know we're detaching */
7564 bpfdetach(ifp);
7565
7566 /* Mark the interface as DOWN */
7567 if_down(ifp);
7568
7569 /* Disable forwarding cached route */
7570 lck_mtx_lock(&ifp->if_cached_route_lock);
7571 ifp->if_fwd_cacheok = 0;
7572 lck_mtx_unlock(&ifp->if_cached_route_lock);
7573
7574 /* Disable data threshold and wait for any pending event posting */
7575 ifp->if_data_threshold = 0;
7576 VERIFY(ifp->if_dt_tcall != NULL);
7577 (void) thread_call_cancel_wait(ifp->if_dt_tcall);
7578
7579 /*
7580 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
7581 * references to the info structures and leave them attached to
7582 * this ifnet.
7583 */
7584 #if INET
7585 igmp_domifdetach(ifp);
7586 #endif /* INET */
7587 #if INET6
7588 mld_domifdetach(ifp);
7589 #endif /* INET6 */
7590
7591 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0);
7592
7593 /* Let worker thread take care of the rest, to avoid reentrancy */
7594 dlil_if_lock();
7595 ifnet_detaching_enqueue(ifp);
7596 dlil_if_unlock();
7597
7598 return 0;
7599 }
7600
7601 static void
7602 ifnet_detaching_enqueue(struct ifnet *ifp)
7603 {
7604 dlil_if_lock_assert();
7605
7606 ++ifnet_detaching_cnt;
7607 VERIFY(ifnet_detaching_cnt != 0);
7608 TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
7609 wakeup((caddr_t)&ifnet_delayed_run);
7610 }
7611
7612 static struct ifnet *
7613 ifnet_detaching_dequeue(void)
7614 {
7615 struct ifnet *ifp;
7616
7617 dlil_if_lock_assert();
7618
7619 ifp = TAILQ_FIRST(&ifnet_detaching_head);
7620 VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
7621 if (ifp != NULL) {
7622 VERIFY(ifnet_detaching_cnt != 0);
7623 --ifnet_detaching_cnt;
7624 TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
7625 ifp->if_detaching_link.tqe_next = NULL;
7626 ifp->if_detaching_link.tqe_prev = NULL;
7627 }
7628 return ifp;
7629 }
7630
7631 static int
7632 ifnet_detacher_thread_cont(int err)
7633 {
7634 #pragma unused(err)
7635 struct ifnet *ifp;
7636
7637 for (;;) {
7638 dlil_if_lock_assert();
7639 while (ifnet_detaching_cnt == 0) {
7640 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
7641 (PZERO - 1), "ifnet_detacher_cont", 0,
7642 ifnet_detacher_thread_cont);
7643 /* NOTREACHED */
7644 }
7645
7646 net_update_uptime();
7647
7648 VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
7649
7650 /* Take care of detaching ifnet */
7651 ifp = ifnet_detaching_dequeue();
7652 if (ifp != NULL) {
7653 dlil_if_unlock();
7654 ifnet_detach_final(ifp);
7655 dlil_if_lock();
7656 }
7657 }
7658 }
7659
7660 __dead2
7661 static void
7662 ifnet_detacher_thread_func(void *v, wait_result_t w)
7663 {
7664 #pragma unused(v, w)
7665 dlil_decr_pending_thread_count();
7666 dlil_if_lock();
7667 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
7668 (PZERO - 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont);
7669 /*
7670 * msleep0() shouldn't have returned as PCATCH was not set;
7671 * therefore assert in this case.
7672 */
7673 dlil_if_unlock();
7674 VERIFY(0);
7675 }
7676
7677 static void
7678 ifnet_detach_final(struct ifnet *ifp)
7679 {
7680 struct ifnet_filter *filter, *filter_next;
7681 struct ifnet_filter_head fhead;
7682 struct dlil_threading_info *inp;
7683 struct ifaddr *ifa;
7684 ifnet_detached_func if_free;
7685 int i;
7686
7687 lck_mtx_lock(&ifp->if_ref_lock);
7688 if (!(ifp->if_refflags & IFRF_DETACHING)) {
7689 panic("%s: flags mismatch (detaching not set) ifp=%p",
7690 __func__, ifp);
7691 /* NOTREACHED */
7692 }
7693
7694 /*
7695 * Wait until the existing IO references get released
7696 * before we proceed with ifnet_detach. This is not a
7697 * common case, so block without using a continuation.
7698 */
7699 while (ifp->if_refio > 0) {
7700 DLIL_PRINTF("%s: Waiting for IO references on %s interface "
7701 "to be released\n", __func__, if_name(ifp));
7702 (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
7703 (PZERO - 1), "ifnet_ioref_wait", NULL);
7704 }
7705
7706 VERIFY(ifp->if_datamov == 0);
7707 VERIFY(ifp->if_drainers == 0);
7708 VERIFY(ifp->if_suspend == 0);
7709 ifp->if_refflags &= ~IFRF_READY;
7710 lck_mtx_unlock(&ifp->if_ref_lock);
7711
7712 /* Drain and destroy send queue */
7713 ifclassq_teardown(ifp);
7714
7715 /* Detach interface filters */
7716 lck_mtx_lock(&ifp->if_flt_lock);
7717 if_flt_monitor_enter(ifp);
7718
7719 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
7720 fhead = ifp->if_flt_head;
7721 TAILQ_INIT(&ifp->if_flt_head);
7722
7723 for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
7724 filter_next = TAILQ_NEXT(filter, filt_next);
7725 lck_mtx_unlock(&ifp->if_flt_lock);
7726
7727 dlil_detach_filter_internal(filter, 1);
7728 lck_mtx_lock(&ifp->if_flt_lock);
7729 }
7730 if_flt_monitor_leave(ifp);
7731 lck_mtx_unlock(&ifp->if_flt_lock);
7732
7733 /* Tell upper layers to drop their network addresses */
7734 if_purgeaddrs(ifp);
7735
7736 ifnet_lock_exclusive(ifp);
7737
7738 /* Uplumb all protocols */
7739 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
7740 struct if_proto *proto;
7741
7742 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
7743 while (proto != NULL) {
7744 protocol_family_t family = proto->protocol_family;
7745 ifnet_lock_done(ifp);
7746 proto_unplumb(family, ifp);
7747 ifnet_lock_exclusive(ifp);
7748 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
7749 }
7750 /* There should not be any protocols left */
7751 VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
7752 }
7753 zfree(dlif_phash_zone, ifp->if_proto_hash);
7754 ifp->if_proto_hash = NULL;
7755
7756 /* Detach (permanent) link address from if_addrhead */
7757 ifa = TAILQ_FIRST(&ifp->if_addrhead);
7758 VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
7759 IFA_LOCK(ifa);
7760 if_detach_link_ifa(ifp, ifa);
7761 IFA_UNLOCK(ifa);
7762
7763 /* Remove (permanent) link address from ifnet_addrs[] */
7764 IFA_REMREF(ifa);
7765 ifnet_addrs[ifp->if_index - 1] = NULL;
7766
7767 /* This interface should not be on {ifnet_head,detaching} */
7768 VERIFY(ifp->if_link.tqe_next == NULL);
7769 VERIFY(ifp->if_link.tqe_prev == NULL);
7770 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
7771 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
7772 VERIFY(ifp->if_ordered_link.tqe_next == NULL);
7773 VERIFY(ifp->if_ordered_link.tqe_prev == NULL);
7774
7775 /* The slot should have been emptied */
7776 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
7777
7778 /* There should not be any addresses left */
7779 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
7780
7781 /*
7782 * Signal the starter thread to terminate itself.
7783 */
7784 if (ifp->if_start_thread != THREAD_NULL) {
7785 lck_mtx_lock_spin(&ifp->if_start_lock);
7786 ifp->if_start_flags = 0;
7787 ifp->if_start_thread = THREAD_NULL;
7788 wakeup_one((caddr_t)&ifp->if_start_thread);
7789 lck_mtx_unlock(&ifp->if_start_lock);
7790 }
7791
7792 /*
7793 * Signal the poller thread to terminate itself.
7794 */
7795 if (ifp->if_poll_thread != THREAD_NULL) {
7796 lck_mtx_lock_spin(&ifp->if_poll_lock);
7797 ifp->if_poll_thread = THREAD_NULL;
7798 wakeup_one((caddr_t)&ifp->if_poll_thread);
7799 lck_mtx_unlock(&ifp->if_poll_lock);
7800 }
7801
7802 /*
7803 * If thread affinity was set for the workloop thread, we will need
7804 * to tear down the affinity and release the extra reference count
7805 * taken at attach time. Does not apply to lo0 or other interfaces
7806 * without dedicated input threads.
7807 */
7808 if ((inp = ifp->if_inp) != NULL) {
7809 VERIFY(inp != dlil_main_input_thread);
7810
7811 if (inp->net_affinity) {
7812 struct thread *tp, *wtp, *ptp;
7813
7814 lck_mtx_lock_spin(&inp->input_lck);
7815 wtp = inp->wloop_thr;
7816 inp->wloop_thr = THREAD_NULL;
7817 ptp = inp->poll_thr;
7818 inp->poll_thr = THREAD_NULL;
7819 tp = inp->input_thr; /* don't nullify now */
7820 inp->tag = 0;
7821 inp->net_affinity = FALSE;
7822 lck_mtx_unlock(&inp->input_lck);
7823
7824 /* Tear down poll thread affinity */
7825 if (ptp != NULL) {
7826 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
7827 VERIFY(ifp->if_xflags & IFXF_LEGACY);
7828 (void) dlil_affinity_set(ptp,
7829 THREAD_AFFINITY_TAG_NULL);
7830 thread_deallocate(ptp);
7831 }
7832
7833 /* Tear down workloop thread affinity */
7834 if (wtp != NULL) {
7835 (void) dlil_affinity_set(wtp,
7836 THREAD_AFFINITY_TAG_NULL);
7837 thread_deallocate(wtp);
7838 }
7839
7840 /* Tear down DLIL input thread affinity */
7841 (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
7842 thread_deallocate(tp);
7843 }
7844
7845 /* disassociate ifp DLIL input thread */
7846 ifp->if_inp = NULL;
7847
7848 /* tell the input thread to terminate */
7849 lck_mtx_lock_spin(&inp->input_lck);
7850 inp->input_waiting |= DLIL_INPUT_TERMINATE;
7851 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
7852 wakeup_one((caddr_t)&inp->input_waiting);
7853 }
7854 lck_mtx_unlock(&inp->input_lck);
7855 ifnet_lock_done(ifp);
7856
7857 /* wait for the input thread to terminate */
7858 lck_mtx_lock_spin(&inp->input_lck);
7859 while ((inp->input_waiting & DLIL_INPUT_TERMINATE_COMPLETE)
7860 == 0) {
7861 (void) msleep(&inp->input_waiting, &inp->input_lck,
7862 (PZERO - 1) | PSPIN, inp->input_name, NULL);
7863 }
7864 lck_mtx_unlock(&inp->input_lck);
7865 ifnet_lock_exclusive(ifp);
7866
7867 /* clean-up input thread state */
7868 dlil_clean_threading_info(inp);
7869 /* clean-up poll parameters */
7870 VERIFY(ifp->if_poll_thread == THREAD_NULL);
7871 dlil_reset_rxpoll_params(ifp);
7872 }
7873
7874 /* The driver might unload, so point these to ourselves */
7875 if_free = ifp->if_free;
7876 ifp->if_output_dlil = ifp_if_output;
7877 ifp->if_output = ifp_if_output;
7878 ifp->if_pre_enqueue = ifp_if_output;
7879 ifp->if_start = ifp_if_start;
7880 ifp->if_output_ctl = ifp_if_ctl;
7881 ifp->if_input_dlil = ifp_if_input;
7882 ifp->if_input_poll = ifp_if_input_poll;
7883 ifp->if_input_ctl = ifp_if_ctl;
7884 ifp->if_ioctl = ifp_if_ioctl;
7885 ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
7886 ifp->if_free = ifp_if_free;
7887 ifp->if_demux = ifp_if_demux;
7888 ifp->if_event = ifp_if_event;
7889 ifp->if_framer_legacy = ifp_if_framer;
7890 ifp->if_framer = ifp_if_framer_extended;
7891 ifp->if_add_proto = ifp_if_add_proto;
7892 ifp->if_del_proto = ifp_if_del_proto;
7893 ifp->if_check_multi = ifp_if_check_multi;
7894
7895 /* wipe out interface description */
7896 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
7897 ifp->if_desc.ifd_len = 0;
7898 VERIFY(ifp->if_desc.ifd_desc != NULL);
7899 bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
7900
7901 /* there shouldn't be any delegation by now */
7902 VERIFY(ifp->if_delegated.ifp == NULL);
7903 VERIFY(ifp->if_delegated.type == 0);
7904 VERIFY(ifp->if_delegated.family == 0);
7905 VERIFY(ifp->if_delegated.subfamily == 0);
7906 VERIFY(ifp->if_delegated.expensive == 0);
7907 VERIFY(ifp->if_delegated.constrained == 0);
7908
7909 /* QoS marking get cleared */
7910 ifp->if_eflags &= ~IFEF_QOSMARKING_ENABLED;
7911 if_set_qosmarking_mode(ifp, IFRTYPE_QOSMARKING_MODE_NONE);
7912
7913
7914 ifnet_lock_done(ifp);
7915
7916 #if PF
7917 /*
7918 * Detach this interface from packet filter, if enabled.
7919 */
7920 pf_ifnet_hook(ifp, 0);
7921 #endif /* PF */
7922
7923 /* Filter list should be empty */
7924 lck_mtx_lock_spin(&ifp->if_flt_lock);
7925 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
7926 VERIFY(ifp->if_flt_busy == 0);
7927 VERIFY(ifp->if_flt_waiters == 0);
7928 lck_mtx_unlock(&ifp->if_flt_lock);
7929
7930 /* Last chance to drain send queue */
7931 if_qflush(ifp, 0);
7932
7933 /* Last chance to cleanup any cached route */
7934 lck_mtx_lock(&ifp->if_cached_route_lock);
7935 VERIFY(!ifp->if_fwd_cacheok);
7936 ROUTE_RELEASE(&ifp->if_fwd_route);
7937 bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
7938 ROUTE_RELEASE(&ifp->if_src_route);
7939 bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
7940 ROUTE_RELEASE(&ifp->if_src_route6);
7941 bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
7942 lck_mtx_unlock(&ifp->if_cached_route_lock);
7943
7944 VERIFY(ifp->if_data_threshold == 0);
7945 VERIFY(ifp->if_dt_tcall != NULL);
7946 VERIFY(!thread_call_isactive(ifp->if_dt_tcall));
7947
7948 ifnet_llreach_ifdetach(ifp);
7949
7950 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0);
7951
7952 /*
7953 * Finally, mark this ifnet as detached.
7954 */
7955 lck_mtx_lock_spin(&ifp->if_ref_lock);
7956 if (!(ifp->if_refflags & IFRF_DETACHING)) {
7957 panic("%s: flags mismatch (detaching not set) ifp=%p",
7958 __func__, ifp);
7959 /* NOTREACHED */
7960 }
7961 ifp->if_refflags &= ~IFRF_DETACHING;
7962 lck_mtx_unlock(&ifp->if_ref_lock);
7963 if (if_free != NULL) {
7964 if_free(ifp);
7965 }
7966
7967 if (dlil_verbose) {
7968 DLIL_PRINTF("%s: detached\n", if_name(ifp));
7969 }
7970
7971 /* Release reference held during ifnet attach */
7972 ifnet_release(ifp);
7973 }
7974
7975 errno_t
7976 ifp_if_output(struct ifnet *ifp, struct mbuf *m)
7977 {
7978 #pragma unused(ifp)
7979 m_freem_list(m);
7980 return 0;
7981 }
7982
7983 void
7984 ifp_if_start(struct ifnet *ifp)
7985 {
7986 ifnet_purge(ifp);
7987 }
7988
7989 static errno_t
7990 ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
7991 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
7992 boolean_t poll, struct thread *tp)
7993 {
7994 #pragma unused(ifp, m_tail, s, poll, tp)
7995 m_freem_list(m_head);
7996 return ENXIO;
7997 }
7998
7999 static void
8000 ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
8001 struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
8002 {
8003 #pragma unused(ifp, flags, max_cnt)
8004 if (m_head != NULL) {
8005 *m_head = NULL;
8006 }
8007 if (m_tail != NULL) {
8008 *m_tail = NULL;
8009 }
8010 if (cnt != NULL) {
8011 *cnt = 0;
8012 }
8013 if (len != NULL) {
8014 *len = 0;
8015 }
8016 }
8017
8018 static errno_t
8019 ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
8020 {
8021 #pragma unused(ifp, cmd, arglen, arg)
8022 return EOPNOTSUPP;
8023 }
8024
8025 static errno_t
8026 ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
8027 {
8028 #pragma unused(ifp, fh, pf)
8029 m_freem(m);
8030 return EJUSTRETURN;
8031 }
8032
8033 static errno_t
8034 ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
8035 const struct ifnet_demux_desc *da, u_int32_t dc)
8036 {
8037 #pragma unused(ifp, pf, da, dc)
8038 return EINVAL;
8039 }
8040
8041 static errno_t
8042 ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
8043 {
8044 #pragma unused(ifp, pf)
8045 return EINVAL;
8046 }
8047
8048 static errno_t
8049 ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
8050 {
8051 #pragma unused(ifp, sa)
8052 return EOPNOTSUPP;
8053 }
8054
8055 #if CONFIG_EMBEDDED
8056 static errno_t
8057 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
8058 const struct sockaddr *sa, const char *ll, const char *t,
8059 u_int32_t *pre, u_int32_t *post)
8060 #else
8061 static errno_t
8062 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
8063 const struct sockaddr *sa, const char *ll, const char *t)
8064 #endif /* !CONFIG_EMBEDDED */
8065 {
8066 #pragma unused(ifp, m, sa, ll, t)
8067 #if CONFIG_EMBEDDED
8068 return ifp_if_framer_extended(ifp, m, sa, ll, t, pre, post);
8069 #else
8070 return ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL);
8071 #endif /* !CONFIG_EMBEDDED */
8072 }
8073
8074 static errno_t
8075 ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
8076 const struct sockaddr *sa, const char *ll, const char *t,
8077 u_int32_t *pre, u_int32_t *post)
8078 {
8079 #pragma unused(ifp, sa, ll, t)
8080 m_freem(*m);
8081 *m = NULL;
8082
8083 if (pre != NULL) {
8084 *pre = 0;
8085 }
8086 if (post != NULL) {
8087 *post = 0;
8088 }
8089
8090 return EJUSTRETURN;
8091 }
8092
8093 errno_t
8094 ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
8095 {
8096 #pragma unused(ifp, cmd, arg)
8097 return EOPNOTSUPP;
8098 }
8099
8100 static errno_t
8101 ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
8102 {
8103 #pragma unused(ifp, tm, f)
8104 /* XXX not sure what to do here */
8105 return 0;
8106 }
8107
8108 static void
8109 ifp_if_free(struct ifnet *ifp)
8110 {
8111 #pragma unused(ifp)
8112 }
8113
8114 static void
8115 ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
8116 {
8117 #pragma unused(ifp, e)
8118 }
8119
8120 int
8121 dlil_if_acquire(u_int32_t family, const void *uniqueid,
8122 size_t uniqueid_len, const char *ifxname, struct ifnet **ifp)
8123 {
8124 struct ifnet *ifp1 = NULL;
8125 struct dlil_ifnet *dlifp1 = NULL;
8126 struct dlil_ifnet *dlifp1_saved = NULL;
8127 void *buf, *base, **pbuf;
8128 int ret = 0;
8129
8130 VERIFY(*ifp == NULL);
8131 dlil_if_lock();
8132 /*
8133 * We absolutely can't have an interface with the same name
8134 * in in-use state.
8135 * To make sure of that list has to be traversed completely
8136 */
8137 TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
8138 ifp1 = (struct ifnet *)dlifp1;
8139
8140 if (ifp1->if_family != family) {
8141 continue;
8142 }
8143
8144 /*
8145 * If interface is in use, return EBUSY if either unique id
8146 * or interface extended names are the same
8147 */
8148 lck_mtx_lock(&dlifp1->dl_if_lock);
8149 if (strncmp(ifxname, ifp1->if_xname, IFXNAMSIZ) == 0) {
8150 if (dlifp1->dl_if_flags & DLIF_INUSE) {
8151 lck_mtx_unlock(&dlifp1->dl_if_lock);
8152 ret = EBUSY;
8153 goto end;
8154 }
8155 }
8156
8157 if (uniqueid_len) {
8158 if (uniqueid_len == dlifp1->dl_if_uniqueid_len &&
8159 bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len) == 0) {
8160 if (dlifp1->dl_if_flags & DLIF_INUSE) {
8161 lck_mtx_unlock(&dlifp1->dl_if_lock);
8162 ret = EBUSY;
8163 goto end;
8164 } else {
8165 /* Cache the first interface that can be recycled */
8166 if (*ifp == NULL) {
8167 *ifp = ifp1;
8168 dlifp1_saved = dlifp1;
8169 }
8170 /*
8171 * XXX Do not break or jump to end as we have to traverse
8172 * the whole list to ensure there are no name collisions
8173 */
8174 }
8175 }
8176 }
8177 lck_mtx_unlock(&dlifp1->dl_if_lock);
8178 }
8179
8180 /* If there's an interface that can be recycled, use that */
8181 if (*ifp != NULL) {
8182 if (dlifp1_saved != NULL) {
8183 lck_mtx_lock(&dlifp1_saved->dl_if_lock);
8184 dlifp1_saved->dl_if_flags |= (DLIF_INUSE | DLIF_REUSE);
8185 lck_mtx_unlock(&dlifp1_saved->dl_if_lock);
8186 dlifp1_saved = NULL;
8187 }
8188 goto end;
8189 }
8190
8191 /* no interface found, allocate a new one */
8192 buf = zalloc(dlif_zone);
8193 if (buf == NULL) {
8194 ret = ENOMEM;
8195 goto end;
8196 }
8197 bzero(buf, dlif_bufsize);
8198
8199 /* Get the 64-bit aligned base address for this object */
8200 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
8201 sizeof(u_int64_t));
8202 VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize));
8203
8204 /*
8205 * Wind back a pointer size from the aligned base and
8206 * save the original address so we can free it later.
8207 */
8208 pbuf = (void **)((intptr_t)base - sizeof(void *));
8209 *pbuf = buf;
8210 dlifp1 = base;
8211
8212 if (uniqueid_len) {
8213 MALLOC(dlifp1->dl_if_uniqueid, void *, uniqueid_len,
8214 M_NKE, M_WAITOK);
8215 if (dlifp1->dl_if_uniqueid == NULL) {
8216 zfree(dlif_zone, buf);
8217 ret = ENOMEM;
8218 goto end;
8219 }
8220 bcopy(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len);
8221 dlifp1->dl_if_uniqueid_len = uniqueid_len;
8222 }
8223
8224 ifp1 = (struct ifnet *)dlifp1;
8225 dlifp1->dl_if_flags = DLIF_INUSE;
8226 if (ifnet_debug) {
8227 dlifp1->dl_if_flags |= DLIF_DEBUG;
8228 dlifp1->dl_if_trace = dlil_if_trace;
8229 }
8230 ifp1->if_name = dlifp1->dl_if_namestorage;
8231 ifp1->if_xname = dlifp1->dl_if_xnamestorage;
8232
8233 /* initialize interface description */
8234 ifp1->if_desc.ifd_maxlen = IF_DESCSIZE;
8235 ifp1->if_desc.ifd_len = 0;
8236 ifp1->if_desc.ifd_desc = dlifp1->dl_if_descstorage;
8237
8238
8239 #if CONFIG_MACF_NET
8240 mac_ifnet_label_init(ifp1);
8241 #endif
8242
8243 if ((ret = dlil_alloc_local_stats(ifp1)) != 0) {
8244 DLIL_PRINTF("%s: failed to allocate if local stats, "
8245 "error: %d\n", __func__, ret);
8246 /* This probably shouldn't be fatal */
8247 ret = 0;
8248 }
8249
8250 lck_mtx_init(&dlifp1->dl_if_lock, ifnet_lock_group, ifnet_lock_attr);
8251 lck_rw_init(&ifp1->if_lock, ifnet_lock_group, ifnet_lock_attr);
8252 lck_mtx_init(&ifp1->if_ref_lock, ifnet_lock_group, ifnet_lock_attr);
8253 lck_mtx_init(&ifp1->if_flt_lock, ifnet_lock_group, ifnet_lock_attr);
8254 lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group,
8255 ifnet_lock_attr);
8256 lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr);
8257 #if INET
8258 lck_rw_init(&ifp1->if_inetdata_lock, ifnet_lock_group,
8259 ifnet_lock_attr);
8260 ifp1->if_inetdata = NULL;
8261 #endif
8262 #if INET6
8263 lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group,
8264 ifnet_lock_attr);
8265 ifp1->if_inet6data = NULL;
8266 #endif
8267 lck_rw_init(&ifp1->if_link_status_lock, ifnet_lock_group,
8268 ifnet_lock_attr);
8269 ifp1->if_link_status = NULL;
8270
8271 /* for send data paths */
8272 lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group,
8273 ifnet_lock_attr);
8274 lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_snd_lock_group,
8275 ifnet_lock_attr);
8276 lck_mtx_init(&ifp1->if_snd.ifcq_lock, ifnet_snd_lock_group,
8277 ifnet_lock_attr);
8278
8279 /* for receive data paths */
8280 lck_mtx_init(&ifp1->if_poll_lock, ifnet_rcv_lock_group,
8281 ifnet_lock_attr);
8282
8283 /* thread call allocation is done with sleeping zalloc */
8284 ifp1->if_dt_tcall = thread_call_allocate_with_options(dlil_dt_tcall_fn,
8285 ifp1, THREAD_CALL_PRIORITY_KERNEL, THREAD_CALL_OPTIONS_ONCE);
8286 if (ifp1->if_dt_tcall == NULL) {
8287 panic_plain("%s: couldn't create if_dt_tcall", __func__);
8288 /* NOTREACHED */
8289 }
8290
8291 TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
8292
8293 *ifp = ifp1;
8294
8295 end:
8296 dlil_if_unlock();
8297
8298 VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof(u_int64_t)) &&
8299 IS_P2ALIGNED(&ifp1->if_data, sizeof(u_int64_t))));
8300
8301 return ret;
8302 }
8303
8304 __private_extern__ void
8305 dlil_if_release(ifnet_t ifp)
8306 {
8307 struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
8308
8309 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_count) > 0);
8310 if (!(ifp->if_xflags & IFXF_ALLOC_KPI)) {
8311 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_os_count) > 0);
8312 }
8313
8314 ifnet_lock_exclusive(ifp);
8315 lck_mtx_lock(&dlifp->dl_if_lock);
8316 dlifp->dl_if_flags &= ~DLIF_INUSE;
8317 strlcpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ);
8318 ifp->if_name = dlifp->dl_if_namestorage;
8319 /* Reset external name (name + unit) */
8320 ifp->if_xname = dlifp->dl_if_xnamestorage;
8321 snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ,
8322 "%s?", ifp->if_name);
8323 lck_mtx_unlock(&dlifp->dl_if_lock);
8324 #if CONFIG_MACF_NET
8325 /*
8326 * We can either recycle the MAC label here or in dlil_if_acquire().
8327 * It seems logical to do it here but this means that anything that
8328 * still has a handle on ifp will now see it as unlabeled.
8329 * Since the interface is "dead" that may be OK. Revisit later.
8330 */
8331 mac_ifnet_label_recycle(ifp);
8332 #endif
8333 ifnet_lock_done(ifp);
8334 }
8335
8336 __private_extern__ void
8337 dlil_if_lock(void)
8338 {
8339 lck_mtx_lock(&dlil_ifnet_lock);
8340 }
8341
8342 __private_extern__ void
8343 dlil_if_unlock(void)
8344 {
8345 lck_mtx_unlock(&dlil_ifnet_lock);
8346 }
8347
8348 __private_extern__ void
8349 dlil_if_lock_assert(void)
8350 {
8351 LCK_MTX_ASSERT(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
8352 }
8353
8354 __private_extern__ void
8355 dlil_proto_unplumb_all(struct ifnet *ifp)
8356 {
8357 /*
8358 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
8359 * each bucket contains exactly one entry; PF_VLAN does not need an
8360 * explicit unplumb.
8361 *
8362 * if_proto_hash[3] is for other protocols; we expect anything
8363 * in this bucket to respond to the DETACHING event (which would
8364 * have happened by now) and do the unplumb then.
8365 */
8366 (void) proto_unplumb(PF_INET, ifp);
8367 #if INET6
8368 (void) proto_unplumb(PF_INET6, ifp);
8369 #endif /* INET6 */
8370 }
8371
8372 static void
8373 ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
8374 {
8375 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
8376 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
8377
8378 route_copyout(dst, &ifp->if_src_route, sizeof(*dst));
8379
8380 lck_mtx_unlock(&ifp->if_cached_route_lock);
8381 }
8382
8383 static void
8384 ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
8385 {
8386 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
8387 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
8388
8389 if (ifp->if_fwd_cacheok) {
8390 route_copyin(src, &ifp->if_src_route, sizeof(*src));
8391 } else {
8392 ROUTE_RELEASE(src);
8393 }
8394 lck_mtx_unlock(&ifp->if_cached_route_lock);
8395 }
8396
8397 #if INET6
8398 static void
8399 ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
8400 {
8401 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
8402 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
8403
8404 route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
8405 sizeof(*dst));
8406
8407 lck_mtx_unlock(&ifp->if_cached_route_lock);
8408 }
8409
8410 static void
8411 ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
8412 {
8413 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
8414 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
8415
8416 if (ifp->if_fwd_cacheok) {
8417 route_copyin((struct route *)src,
8418 (struct route *)&ifp->if_src_route6, sizeof(*src));
8419 } else {
8420 ROUTE_RELEASE(src);
8421 }
8422 lck_mtx_unlock(&ifp->if_cached_route_lock);
8423 }
8424 #endif /* INET6 */
8425
8426 struct rtentry *
8427 ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
8428 {
8429 struct route src_rt;
8430 struct sockaddr_in *dst;
8431
8432 dst = (struct sockaddr_in *)(void *)(&src_rt.ro_dst);
8433
8434 ifp_src_route_copyout(ifp, &src_rt);
8435
8436 if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
8437 ROUTE_RELEASE(&src_rt);
8438 if (dst->sin_family != AF_INET) {
8439 bzero(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
8440 dst->sin_len = sizeof(src_rt.ro_dst);
8441 dst->sin_family = AF_INET;
8442 }
8443 dst->sin_addr = src_ip;
8444
8445 VERIFY(src_rt.ro_rt == NULL);
8446 src_rt.ro_rt = rtalloc1_scoped((struct sockaddr *)dst,
8447 0, 0, ifp->if_index);
8448
8449 if (src_rt.ro_rt != NULL) {
8450 /* retain a ref, copyin consumes one */
8451 struct rtentry *rte = src_rt.ro_rt;
8452 RT_ADDREF(rte);
8453 ifp_src_route_copyin(ifp, &src_rt);
8454 src_rt.ro_rt = rte;
8455 }
8456 }
8457
8458 return src_rt.ro_rt;
8459 }
8460
8461 #if INET6
8462 struct rtentry *
8463 ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
8464 {
8465 struct route_in6 src_rt;
8466
8467 ifp_src_route6_copyout(ifp, &src_rt);
8468
8469 if (ROUTE_UNUSABLE(&src_rt) ||
8470 !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
8471 ROUTE_RELEASE(&src_rt);
8472 if (src_rt.ro_dst.sin6_family != AF_INET6) {
8473 bzero(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
8474 src_rt.ro_dst.sin6_len = sizeof(src_rt.ro_dst);
8475 src_rt.ro_dst.sin6_family = AF_INET6;
8476 }
8477 src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
8478 bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
8479 sizeof(src_rt.ro_dst.sin6_addr));
8480
8481 if (src_rt.ro_rt == NULL) {
8482 src_rt.ro_rt = rtalloc1_scoped(
8483 (struct sockaddr *)&src_rt.ro_dst, 0, 0,
8484 ifp->if_index);
8485
8486 if (src_rt.ro_rt != NULL) {
8487 /* retain a ref, copyin consumes one */
8488 struct rtentry *rte = src_rt.ro_rt;
8489 RT_ADDREF(rte);
8490 ifp_src_route6_copyin(ifp, &src_rt);
8491 src_rt.ro_rt = rte;
8492 }
8493 }
8494 }
8495
8496 return src_rt.ro_rt;
8497 }
8498 #endif /* INET6 */
8499
8500 void
8501 if_lqm_update(struct ifnet *ifp, int lqm, int locked)
8502 {
8503 struct kev_dl_link_quality_metric_data ev_lqm_data;
8504
8505 VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
8506
8507 /* Normalize to edge */
8508 if (lqm >= 0 && lqm <= IFNET_LQM_THRESH_ABORT) {
8509 lqm = IFNET_LQM_THRESH_ABORT;
8510 atomic_bitset_32(&tcbinfo.ipi_flags,
8511 INPCBINFO_HANDLE_LQM_ABORT);
8512 inpcb_timer_sched(&tcbinfo, INPCB_TIMER_FAST);
8513 } else if (lqm > IFNET_LQM_THRESH_ABORT &&
8514 lqm <= IFNET_LQM_THRESH_MINIMALLY_VIABLE) {
8515 lqm = IFNET_LQM_THRESH_MINIMALLY_VIABLE;
8516 } else if (lqm > IFNET_LQM_THRESH_MINIMALLY_VIABLE &&
8517 lqm <= IFNET_LQM_THRESH_POOR) {
8518 lqm = IFNET_LQM_THRESH_POOR;
8519 } else if (lqm > IFNET_LQM_THRESH_POOR &&
8520 lqm <= IFNET_LQM_THRESH_GOOD) {
8521 lqm = IFNET_LQM_THRESH_GOOD;
8522 }
8523
8524 /*
8525 * Take the lock if needed
8526 */
8527 if (!locked) {
8528 ifnet_lock_exclusive(ifp);
8529 }
8530
8531 if (lqm == ifp->if_interface_state.lqm_state &&
8532 (ifp->if_interface_state.valid_bitmask &
8533 IF_INTERFACE_STATE_LQM_STATE_VALID)) {
8534 /*
8535 * Release the lock if was not held by the caller
8536 */
8537 if (!locked) {
8538 ifnet_lock_done(ifp);
8539 }
8540 return; /* nothing to update */
8541 }
8542 ifp->if_interface_state.valid_bitmask |=
8543 IF_INTERFACE_STATE_LQM_STATE_VALID;
8544 ifp->if_interface_state.lqm_state = lqm;
8545
8546 /*
8547 * Don't want to hold the lock when issuing kernel events
8548 */
8549 ifnet_lock_done(ifp);
8550
8551 bzero(&ev_lqm_data, sizeof(ev_lqm_data));
8552 ev_lqm_data.link_quality_metric = lqm;
8553
8554 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
8555 (struct net_event_data *)&ev_lqm_data, sizeof(ev_lqm_data));
8556
8557 /*
8558 * Reacquire the lock for the caller
8559 */
8560 if (locked) {
8561 ifnet_lock_exclusive(ifp);
8562 }
8563 }
8564
8565 static void
8566 if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
8567 {
8568 struct kev_dl_rrc_state kev;
8569
8570 if (rrc_state == ifp->if_interface_state.rrc_state &&
8571 (ifp->if_interface_state.valid_bitmask &
8572 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
8573 return;
8574 }
8575
8576 ifp->if_interface_state.valid_bitmask |=
8577 IF_INTERFACE_STATE_RRC_STATE_VALID;
8578
8579 ifp->if_interface_state.rrc_state = rrc_state;
8580
8581 /*
8582 * Don't want to hold the lock when issuing kernel events
8583 */
8584 ifnet_lock_done(ifp);
8585
8586 bzero(&kev, sizeof(struct kev_dl_rrc_state));
8587 kev.rrc_state = rrc_state;
8588
8589 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
8590 (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state));
8591
8592 ifnet_lock_exclusive(ifp);
8593 }
8594
8595 errno_t
8596 if_state_update(struct ifnet *ifp,
8597 struct if_interface_state *if_interface_state)
8598 {
8599 u_short if_index_available = 0;
8600
8601 ifnet_lock_exclusive(ifp);
8602
8603 if ((ifp->if_type != IFT_CELLULAR) &&
8604 (if_interface_state->valid_bitmask &
8605 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
8606 ifnet_lock_done(ifp);
8607 return ENOTSUP;
8608 }
8609 if ((if_interface_state->valid_bitmask &
8610 IF_INTERFACE_STATE_LQM_STATE_VALID) &&
8611 (if_interface_state->lqm_state < IFNET_LQM_MIN ||
8612 if_interface_state->lqm_state > IFNET_LQM_MAX)) {
8613 ifnet_lock_done(ifp);
8614 return EINVAL;
8615 }
8616 if ((if_interface_state->valid_bitmask &
8617 IF_INTERFACE_STATE_RRC_STATE_VALID) &&
8618 if_interface_state->rrc_state !=
8619 IF_INTERFACE_STATE_RRC_STATE_IDLE &&
8620 if_interface_state->rrc_state !=
8621 IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
8622 ifnet_lock_done(ifp);
8623 return EINVAL;
8624 }
8625
8626 if (if_interface_state->valid_bitmask &
8627 IF_INTERFACE_STATE_LQM_STATE_VALID) {
8628 if_lqm_update(ifp, if_interface_state->lqm_state, 1);
8629 }
8630 if (if_interface_state->valid_bitmask &
8631 IF_INTERFACE_STATE_RRC_STATE_VALID) {
8632 if_rrc_state_update(ifp, if_interface_state->rrc_state);
8633 }
8634 if (if_interface_state->valid_bitmask &
8635 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
8636 ifp->if_interface_state.valid_bitmask |=
8637 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
8638 ifp->if_interface_state.interface_availability =
8639 if_interface_state->interface_availability;
8640
8641 if (ifp->if_interface_state.interface_availability ==
8642 IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
8643 os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) available\n",
8644 __func__, if_name(ifp), ifp->if_index);
8645 if_index_available = ifp->if_index;
8646 } else {
8647 os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) unavailable)\n",
8648 __func__, if_name(ifp), ifp->if_index);
8649 }
8650 }
8651 ifnet_lock_done(ifp);
8652
8653 /*
8654 * Check if the TCP connections going on this interface should be
8655 * forced to send probe packets instead of waiting for TCP timers
8656 * to fire. This is done on an explicit notification such as
8657 * SIOCSIFINTERFACESTATE which marks the interface as available.
8658 */
8659 if (if_index_available > 0) {
8660 tcp_interface_send_probe(if_index_available);
8661 }
8662
8663 return 0;
8664 }
8665
8666 void
8667 if_get_state(struct ifnet *ifp,
8668 struct if_interface_state *if_interface_state)
8669 {
8670 ifnet_lock_shared(ifp);
8671
8672 if_interface_state->valid_bitmask = 0;
8673
8674 if (ifp->if_interface_state.valid_bitmask &
8675 IF_INTERFACE_STATE_RRC_STATE_VALID) {
8676 if_interface_state->valid_bitmask |=
8677 IF_INTERFACE_STATE_RRC_STATE_VALID;
8678 if_interface_state->rrc_state =
8679 ifp->if_interface_state.rrc_state;
8680 }
8681 if (ifp->if_interface_state.valid_bitmask &
8682 IF_INTERFACE_STATE_LQM_STATE_VALID) {
8683 if_interface_state->valid_bitmask |=
8684 IF_INTERFACE_STATE_LQM_STATE_VALID;
8685 if_interface_state->lqm_state =
8686 ifp->if_interface_state.lqm_state;
8687 }
8688 if (ifp->if_interface_state.valid_bitmask &
8689 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
8690 if_interface_state->valid_bitmask |=
8691 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
8692 if_interface_state->interface_availability =
8693 ifp->if_interface_state.interface_availability;
8694 }
8695
8696 ifnet_lock_done(ifp);
8697 }
8698
8699 errno_t
8700 if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
8701 {
8702 ifnet_lock_exclusive(ifp);
8703 if (conn_probe > 1) {
8704 ifnet_lock_done(ifp);
8705 return EINVAL;
8706 }
8707 if (conn_probe == 0) {
8708 ifp->if_eflags &= ~IFEF_PROBE_CONNECTIVITY;
8709 } else {
8710 ifp->if_eflags |= IFEF_PROBE_CONNECTIVITY;
8711 }
8712 ifnet_lock_done(ifp);
8713
8714 #if NECP
8715 necp_update_all_clients();
8716 #endif /* NECP */
8717
8718 tcp_probe_connectivity(ifp, conn_probe);
8719 return 0;
8720 }
8721
8722 /* for uuid.c */
8723 static int
8724 get_ether_index(int * ret_other_index)
8725 {
8726 struct ifnet *ifp;
8727 int en0_index = 0;
8728 int other_en_index = 0;
8729 int any_ether_index = 0;
8730 short best_unit = 0;
8731
8732 *ret_other_index = 0;
8733 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
8734 /*
8735 * find en0, or if not en0, the lowest unit en*, and if not
8736 * that, any ethernet
8737 */
8738 ifnet_lock_shared(ifp);
8739 if (strcmp(ifp->if_name, "en") == 0) {
8740 if (ifp->if_unit == 0) {
8741 /* found en0, we're done */
8742 en0_index = ifp->if_index;
8743 ifnet_lock_done(ifp);
8744 break;
8745 }
8746 if (other_en_index == 0 || ifp->if_unit < best_unit) {
8747 other_en_index = ifp->if_index;
8748 best_unit = ifp->if_unit;
8749 }
8750 } else if (ifp->if_type == IFT_ETHER && any_ether_index == 0) {
8751 any_ether_index = ifp->if_index;
8752 }
8753 ifnet_lock_done(ifp);
8754 }
8755 if (en0_index == 0) {
8756 if (other_en_index != 0) {
8757 *ret_other_index = other_en_index;
8758 } else if (any_ether_index != 0) {
8759 *ret_other_index = any_ether_index;
8760 }
8761 }
8762 return en0_index;
8763 }
8764
8765 int
8766 uuid_get_ethernet(u_int8_t *node)
8767 {
8768 static int en0_index;
8769 struct ifnet *ifp;
8770 int other_index = 0;
8771 int the_index = 0;
8772 int ret;
8773
8774 ifnet_head_lock_shared();
8775 if (en0_index == 0 || ifindex2ifnet[en0_index] == NULL) {
8776 en0_index = get_ether_index(&other_index);
8777 }
8778 if (en0_index != 0) {
8779 the_index = en0_index;
8780 } else if (other_index != 0) {
8781 the_index = other_index;
8782 }
8783 if (the_index != 0) {
8784 ifp = ifindex2ifnet[the_index];
8785 VERIFY(ifp != NULL);
8786 memcpy(node, IF_LLADDR(ifp), ETHER_ADDR_LEN);
8787 ret = 0;
8788 } else {
8789 ret = -1;
8790 }
8791 ifnet_head_done();
8792 return ret;
8793 }
8794
8795 static int
8796 sysctl_rxpoll SYSCTL_HANDLER_ARGS
8797 {
8798 #pragma unused(arg1, arg2)
8799 uint32_t i;
8800 int err;
8801
8802 i = if_rxpoll;
8803
8804 err = sysctl_handle_int(oidp, &i, 0, req);
8805 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8806 return err;
8807 }
8808
8809 if (net_rxpoll == 0) {
8810 return ENXIO;
8811 }
8812
8813 if_rxpoll = i;
8814 return err;
8815 }
8816
8817 static int
8818 sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
8819 {
8820 #pragma unused(arg1, arg2)
8821 uint64_t q;
8822 int err;
8823
8824 q = if_rxpoll_mode_holdtime;
8825
8826 err = sysctl_handle_quad(oidp, &q, 0, req);
8827 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8828 return err;
8829 }
8830
8831 if (q < IF_RXPOLL_MODE_HOLDTIME_MIN) {
8832 q = IF_RXPOLL_MODE_HOLDTIME_MIN;
8833 }
8834
8835 if_rxpoll_mode_holdtime = q;
8836
8837 return err;
8838 }
8839
8840 static int
8841 sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
8842 {
8843 #pragma unused(arg1, arg2)
8844 uint64_t q;
8845 int err;
8846
8847 q = if_rxpoll_sample_holdtime;
8848
8849 err = sysctl_handle_quad(oidp, &q, 0, req);
8850 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8851 return err;
8852 }
8853
8854 if (q < IF_RXPOLL_SAMPLETIME_MIN) {
8855 q = IF_RXPOLL_SAMPLETIME_MIN;
8856 }
8857
8858 if_rxpoll_sample_holdtime = q;
8859
8860 return err;
8861 }
8862
8863 static int
8864 sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
8865 {
8866 #pragma unused(arg1, arg2)
8867 uint64_t q;
8868 int err;
8869
8870 q = if_rxpoll_interval_time;
8871
8872 err = sysctl_handle_quad(oidp, &q, 0, req);
8873 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8874 return err;
8875 }
8876
8877 if (q < IF_RXPOLL_INTERVALTIME_MIN) {
8878 q = IF_RXPOLL_INTERVALTIME_MIN;
8879 }
8880
8881 if_rxpoll_interval_time = q;
8882
8883 return err;
8884 }
8885
8886 static int
8887 sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
8888 {
8889 #pragma unused(arg1, arg2)
8890 uint32_t i;
8891 int err;
8892
8893 i = if_sysctl_rxpoll_wlowat;
8894
8895 err = sysctl_handle_int(oidp, &i, 0, req);
8896 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8897 return err;
8898 }
8899
8900 if (i == 0 || i >= if_sysctl_rxpoll_whiwat) {
8901 return EINVAL;
8902 }
8903
8904 if_sysctl_rxpoll_wlowat = i;
8905 return err;
8906 }
8907
8908 static int
8909 sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
8910 {
8911 #pragma unused(arg1, arg2)
8912 uint32_t i;
8913 int err;
8914
8915 i = if_sysctl_rxpoll_whiwat;
8916
8917 err = sysctl_handle_int(oidp, &i, 0, req);
8918 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8919 return err;
8920 }
8921
8922 if (i <= if_sysctl_rxpoll_wlowat) {
8923 return EINVAL;
8924 }
8925
8926 if_sysctl_rxpoll_whiwat = i;
8927 return err;
8928 }
8929
8930 static int
8931 sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
8932 {
8933 #pragma unused(arg1, arg2)
8934 int i, err;
8935
8936 i = if_sndq_maxlen;
8937
8938 err = sysctl_handle_int(oidp, &i, 0, req);
8939 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8940 return err;
8941 }
8942
8943 if (i < IF_SNDQ_MINLEN) {
8944 i = IF_SNDQ_MINLEN;
8945 }
8946
8947 if_sndq_maxlen = i;
8948 return err;
8949 }
8950
8951 static int
8952 sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
8953 {
8954 #pragma unused(arg1, arg2)
8955 int i, err;
8956
8957 i = if_rcvq_maxlen;
8958
8959 err = sysctl_handle_int(oidp, &i, 0, req);
8960 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8961 return err;
8962 }
8963
8964 if (i < IF_RCVQ_MINLEN) {
8965 i = IF_RCVQ_MINLEN;
8966 }
8967
8968 if_rcvq_maxlen = i;
8969 return err;
8970 }
8971
8972 int
8973 dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
8974 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
8975 {
8976 struct kev_dl_node_presence kev;
8977 struct sockaddr_dl *sdl;
8978 struct sockaddr_in6 *sin6;
8979 int ret = 0;
8980
8981 VERIFY(ifp);
8982 VERIFY(sa);
8983 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
8984
8985 bzero(&kev, sizeof(kev));
8986 sin6 = &kev.sin6_node_address;
8987 sdl = &kev.sdl_node_address;
8988 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
8989 kev.rssi = rssi;
8990 kev.link_quality_metric = lqm;
8991 kev.node_proximity_metric = npm;
8992 bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
8993
8994 ret = nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
8995 if (ret == 0) {
8996 int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
8997 &kev.link_data, sizeof(kev));
8998 if (err != 0) {
8999 log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with"
9000 "error %d\n", __func__, err);
9001 }
9002 }
9003 return ret;
9004 }
9005
9006 void
9007 dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
9008 {
9009 struct kev_dl_node_absence kev = {};
9010 struct sockaddr_in6 *kev_sin6 = NULL;
9011 struct sockaddr_dl *kev_sdl = NULL;
9012
9013 VERIFY(ifp != NULL);
9014 VERIFY(sa != NULL);
9015 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
9016
9017 kev_sin6 = &kev.sin6_node_address;
9018 kev_sdl = &kev.sdl_node_address;
9019
9020 if (sa->sa_family == AF_INET6) {
9021 /*
9022 * If IPv6 address is given, get the link layer
9023 * address from what was cached in the neighbor cache
9024 */
9025 VERIFY(sa->sa_len <= sizeof(*kev_sin6));
9026 bcopy(sa, kev_sin6, sa->sa_len);
9027 nd6_alt_node_absent(ifp, kev_sin6, kev_sdl);
9028 } else {
9029 /*
9030 * If passed address is AF_LINK type, derive the address
9031 * based on the link address.
9032 */
9033 nd6_alt_node_addr_decompose(ifp, sa, kev_sdl, kev_sin6);
9034 nd6_alt_node_absent(ifp, kev_sin6, NULL);
9035 }
9036
9037 kev_sdl->sdl_type = ifp->if_type;
9038 kev_sdl->sdl_index = ifp->if_index;
9039
9040 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
9041 &kev.link_data, sizeof(kev));
9042 }
9043
9044 int
9045 dlil_node_present_v2(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr_dl *sdl,
9046 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
9047 {
9048 struct kev_dl_node_presence kev = {};
9049 struct sockaddr_dl *kev_sdl = NULL;
9050 struct sockaddr_in6 *kev_sin6 = NULL;
9051 int ret = 0;
9052
9053 VERIFY(ifp != NULL);
9054 VERIFY(sa != NULL && sdl != NULL);
9055 VERIFY(sa->sa_family == AF_INET6 && sdl->sdl_family == AF_LINK);
9056
9057 kev_sin6 = &kev.sin6_node_address;
9058 kev_sdl = &kev.sdl_node_address;
9059
9060 VERIFY(sdl->sdl_len <= sizeof(*kev_sdl));
9061 bcopy(sdl, kev_sdl, sdl->sdl_len);
9062 kev_sdl->sdl_type = ifp->if_type;
9063 kev_sdl->sdl_index = ifp->if_index;
9064
9065 VERIFY(sa->sa_len <= sizeof(*kev_sin6));
9066 bcopy(sa, kev_sin6, sa->sa_len);
9067
9068 kev.rssi = rssi;
9069 kev.link_quality_metric = lqm;
9070 kev.node_proximity_metric = npm;
9071 bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
9072
9073 ret = nd6_alt_node_present(ifp, SIN6(sa), sdl, rssi, lqm, npm);
9074 if (ret == 0) {
9075 int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
9076 &kev.link_data, sizeof(kev));
9077 if (err != 0) {
9078 log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with",
9079 "error %d\n", __func__, err);
9080 }
9081 }
9082 return ret;
9083 }
9084
9085 const void *
9086 dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
9087 kauth_cred_t *credp)
9088 {
9089 const u_int8_t *bytes;
9090 size_t size;
9091
9092 bytes = CONST_LLADDR(sdl);
9093 size = sdl->sdl_alen;
9094
9095 #if CONFIG_MACF
9096 if (dlil_lladdr_ckreq) {
9097 switch (sdl->sdl_type) {
9098 case IFT_ETHER:
9099 case IFT_IEEE1394:
9100 break;
9101 default:
9102 credp = NULL;
9103 break;
9104 }
9105 ;
9106
9107 if (credp && mac_system_check_info(*credp, "net.link.addr")) {
9108 static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
9109 [0] = 2
9110 };
9111
9112 bytes = unspec;
9113 }
9114 }
9115 #else
9116 #pragma unused(credp)
9117 #endif
9118
9119 if (sizep != NULL) {
9120 *sizep = size;
9121 }
9122 return bytes;
9123 }
9124
9125 void
9126 dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
9127 u_int8_t info[DLIL_MODARGLEN])
9128 {
9129 struct kev_dl_issues kev;
9130 struct timeval tv;
9131
9132 VERIFY(ifp != NULL);
9133 VERIFY(modid != NULL);
9134 _CASSERT(sizeof(kev.modid) == DLIL_MODIDLEN);
9135 _CASSERT(sizeof(kev.info) == DLIL_MODARGLEN);
9136
9137 bzero(&kev, sizeof(kev));
9138
9139 microtime(&tv);
9140 kev.timestamp = tv.tv_sec;
9141 bcopy(modid, &kev.modid, DLIL_MODIDLEN);
9142 if (info != NULL) {
9143 bcopy(info, &kev.info, DLIL_MODARGLEN);
9144 }
9145
9146 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
9147 &kev.link_data, sizeof(kev));
9148 }
9149
9150 errno_t
9151 ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
9152 struct proc *p)
9153 {
9154 u_int32_t level = IFNET_THROTTLE_OFF;
9155 errno_t result = 0;
9156
9157 VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
9158
9159 if (cmd == SIOCSIFOPPORTUNISTIC) {
9160 /*
9161 * XXX: Use priv_check_cred() instead of root check?
9162 */
9163 if ((result = proc_suser(p)) != 0) {
9164 return result;
9165 }
9166
9167 if (ifr->ifr_opportunistic.ifo_flags ==
9168 IFRIFOF_BLOCK_OPPORTUNISTIC) {
9169 level = IFNET_THROTTLE_OPPORTUNISTIC;
9170 } else if (ifr->ifr_opportunistic.ifo_flags == 0) {
9171 level = IFNET_THROTTLE_OFF;
9172 } else {
9173 result = EINVAL;
9174 }
9175
9176 if (result == 0) {
9177 result = ifnet_set_throttle(ifp, level);
9178 }
9179 } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
9180 ifr->ifr_opportunistic.ifo_flags = 0;
9181 if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
9182 ifr->ifr_opportunistic.ifo_flags |=
9183 IFRIFOF_BLOCK_OPPORTUNISTIC;
9184 }
9185 }
9186
9187 /*
9188 * Return the count of current opportunistic connections
9189 * over the interface.
9190 */
9191 if (result == 0) {
9192 uint32_t flags = 0;
9193 flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
9194 INPCB_OPPORTUNISTIC_SETCMD : 0;
9195 flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
9196 INPCB_OPPORTUNISTIC_THROTTLEON : 0;
9197 ifr->ifr_opportunistic.ifo_inuse =
9198 udp_count_opportunistic(ifp->if_index, flags) +
9199 tcp_count_opportunistic(ifp->if_index, flags);
9200 }
9201
9202 if (result == EALREADY) {
9203 result = 0;
9204 }
9205
9206 return result;
9207 }
9208
9209 int
9210 ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
9211 {
9212 struct ifclassq *ifq;
9213 int err = 0;
9214
9215 if (!(ifp->if_eflags & IFEF_TXSTART)) {
9216 return ENXIO;
9217 }
9218
9219 *level = IFNET_THROTTLE_OFF;
9220
9221 ifq = &ifp->if_snd;
9222 IFCQ_LOCK(ifq);
9223 /* Throttling works only for IFCQ, not ALTQ instances */
9224 if (IFCQ_IS_ENABLED(ifq)) {
9225 IFCQ_GET_THROTTLE(ifq, *level, err);
9226 }
9227 IFCQ_UNLOCK(ifq);
9228
9229 return err;
9230 }
9231
9232 int
9233 ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
9234 {
9235 struct ifclassq *ifq;
9236 int err = 0;
9237
9238 if (!(ifp->if_eflags & IFEF_TXSTART)) {
9239 return ENXIO;
9240 }
9241
9242 ifq = &ifp->if_snd;
9243
9244 switch (level) {
9245 case IFNET_THROTTLE_OFF:
9246 case IFNET_THROTTLE_OPPORTUNISTIC:
9247 break;
9248 default:
9249 return EINVAL;
9250 }
9251
9252 IFCQ_LOCK(ifq);
9253 if (IFCQ_IS_ENABLED(ifq)) {
9254 IFCQ_SET_THROTTLE(ifq, level, err);
9255 }
9256 IFCQ_UNLOCK(ifq);
9257
9258 if (err == 0) {
9259 DLIL_PRINTF("%s: throttling level set to %d\n", if_name(ifp),
9260 level);
9261 #if NECP
9262 necp_update_all_clients();
9263 #endif /* NECP */
9264 if (level == IFNET_THROTTLE_OFF) {
9265 ifnet_start(ifp);
9266 }
9267 }
9268
9269 return err;
9270 }
9271
9272 errno_t
9273 ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
9274 struct proc *p)
9275 {
9276 #pragma unused(p)
9277 errno_t result = 0;
9278 uint32_t flags;
9279 int level, category, subcategory;
9280
9281 VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG);
9282
9283 if (cmd == SIOCSIFLOG) {
9284 if ((result = priv_check_cred(kauth_cred_get(),
9285 PRIV_NET_INTERFACE_CONTROL, 0)) != 0) {
9286 return result;
9287 }
9288
9289 level = ifr->ifr_log.ifl_level;
9290 if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX) {
9291 result = EINVAL;
9292 }
9293
9294 flags = ifr->ifr_log.ifl_flags;
9295 if ((flags &= IFNET_LOGF_MASK) == 0) {
9296 result = EINVAL;
9297 }
9298
9299 category = ifr->ifr_log.ifl_category;
9300 subcategory = ifr->ifr_log.ifl_subcategory;
9301
9302 if (result == 0) {
9303 result = ifnet_set_log(ifp, level, flags,
9304 category, subcategory);
9305 }
9306 } else {
9307 result = ifnet_get_log(ifp, &level, &flags, &category,
9308 &subcategory);
9309 if (result == 0) {
9310 ifr->ifr_log.ifl_level = level;
9311 ifr->ifr_log.ifl_flags = flags;
9312 ifr->ifr_log.ifl_category = category;
9313 ifr->ifr_log.ifl_subcategory = subcategory;
9314 }
9315 }
9316
9317 return result;
9318 }
9319
9320 int
9321 ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags,
9322 int32_t category, int32_t subcategory)
9323 {
9324 int err = 0;
9325
9326 VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX);
9327 VERIFY(flags & IFNET_LOGF_MASK);
9328
9329 /*
9330 * The logging level applies to all facilities; make sure to
9331 * update them all with the most current level.
9332 */
9333 flags |= ifp->if_log.flags;
9334
9335 if (ifp->if_output_ctl != NULL) {
9336 struct ifnet_log_params l;
9337
9338 bzero(&l, sizeof(l));
9339 l.level = level;
9340 l.flags = flags;
9341 l.flags &= ~IFNET_LOGF_DLIL;
9342 l.category = category;
9343 l.subcategory = subcategory;
9344
9345 /* Send this request to lower layers */
9346 if (l.flags != 0) {
9347 err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
9348 sizeof(l), &l);
9349 }
9350 } else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
9351 /*
9352 * If targeted to the lower layers without an output
9353 * control callback registered on the interface, just
9354 * silently ignore facilities other than ours.
9355 */
9356 flags &= IFNET_LOGF_DLIL;
9357 if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL))) {
9358 level = 0;
9359 }
9360 }
9361
9362 if (err == 0) {
9363 if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT) {
9364 ifp->if_log.flags = 0;
9365 } else {
9366 ifp->if_log.flags |= flags;
9367 }
9368
9369 log(LOG_INFO, "%s: logging level set to %d flags=%b "
9370 "arg=%b, category=%d subcategory=%d\n", if_name(ifp),
9371 ifp->if_log.level, ifp->if_log.flags,
9372 IFNET_LOGF_BITS, flags, IFNET_LOGF_BITS,
9373 category, subcategory);
9374 }
9375
9376 return err;
9377 }
9378
9379 int
9380 ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
9381 int32_t *category, int32_t *subcategory)
9382 {
9383 if (level != NULL) {
9384 *level = ifp->if_log.level;
9385 }
9386 if (flags != NULL) {
9387 *flags = ifp->if_log.flags;
9388 }
9389 if (category != NULL) {
9390 *category = ifp->if_log.category;
9391 }
9392 if (subcategory != NULL) {
9393 *subcategory = ifp->if_log.subcategory;
9394 }
9395
9396 return 0;
9397 }
9398
9399 int
9400 ifnet_notify_address(struct ifnet *ifp, int af)
9401 {
9402 struct ifnet_notify_address_params na;
9403
9404 #if PF
9405 (void) pf_ifaddr_hook(ifp);
9406 #endif /* PF */
9407
9408 if (ifp->if_output_ctl == NULL) {
9409 return EOPNOTSUPP;
9410 }
9411
9412 bzero(&na, sizeof(na));
9413 na.address_family = af;
9414
9415 return ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
9416 sizeof(na), &na);
9417 }
9418
9419 errno_t
9420 ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
9421 {
9422 if (ifp == NULL || flowid == NULL) {
9423 return EINVAL;
9424 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
9425 !IF_FULLY_ATTACHED(ifp)) {
9426 return ENXIO;
9427 }
9428
9429 *flowid = ifp->if_flowhash;
9430
9431 return 0;
9432 }
9433
9434 errno_t
9435 ifnet_disable_output(struct ifnet *ifp)
9436 {
9437 int err;
9438
9439 if (ifp == NULL) {
9440 return EINVAL;
9441 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
9442 !IF_FULLY_ATTACHED(ifp)) {
9443 return ENXIO;
9444 }
9445
9446 if ((err = ifnet_fc_add(ifp)) == 0) {
9447 lck_mtx_lock_spin(&ifp->if_start_lock);
9448 ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
9449 lck_mtx_unlock(&ifp->if_start_lock);
9450 }
9451 return err;
9452 }
9453
9454 errno_t
9455 ifnet_enable_output(struct ifnet *ifp)
9456 {
9457 if (ifp == NULL) {
9458 return EINVAL;
9459 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
9460 !IF_FULLY_ATTACHED(ifp)) {
9461 return ENXIO;
9462 }
9463
9464 ifnet_start_common(ifp, TRUE);
9465 return 0;
9466 }
9467
9468 void
9469 ifnet_flowadv(uint32_t flowhash)
9470 {
9471 struct ifnet_fc_entry *ifce;
9472 struct ifnet *ifp;
9473
9474 ifce = ifnet_fc_get(flowhash);
9475 if (ifce == NULL) {
9476 return;
9477 }
9478
9479 VERIFY(ifce->ifce_ifp != NULL);
9480 ifp = ifce->ifce_ifp;
9481
9482 /* flow hash gets recalculated per attach, so check */
9483 if (ifnet_is_attached(ifp, 1)) {
9484 if (ifp->if_flowhash == flowhash) {
9485 (void) ifnet_enable_output(ifp);
9486 }
9487 ifnet_decr_iorefcnt(ifp);
9488 }
9489 ifnet_fc_entry_free(ifce);
9490 }
9491
9492 /*
9493 * Function to compare ifnet_fc_entries in ifnet flow control tree
9494 */
9495 static inline int
9496 ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
9497 {
9498 return fc1->ifce_flowhash - fc2->ifce_flowhash;
9499 }
9500
9501 static int
9502 ifnet_fc_add(struct ifnet *ifp)
9503 {
9504 struct ifnet_fc_entry keyfc, *ifce;
9505 uint32_t flowhash;
9506
9507 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART));
9508 VERIFY(ifp->if_flowhash != 0);
9509 flowhash = ifp->if_flowhash;
9510
9511 bzero(&keyfc, sizeof(keyfc));
9512 keyfc.ifce_flowhash = flowhash;
9513
9514 lck_mtx_lock_spin(&ifnet_fc_lock);
9515 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
9516 if (ifce != NULL && ifce->ifce_ifp == ifp) {
9517 /* Entry is already in ifnet_fc_tree, return */
9518 lck_mtx_unlock(&ifnet_fc_lock);
9519 return 0;
9520 }
9521
9522 if (ifce != NULL) {
9523 /*
9524 * There is a different fc entry with the same flow hash
9525 * but different ifp pointer. There can be a collision
9526 * on flow hash but the probability is low. Let's just
9527 * avoid adding a second one when there is a collision.
9528 */
9529 lck_mtx_unlock(&ifnet_fc_lock);
9530 return EAGAIN;
9531 }
9532
9533 /* become regular mutex */
9534 lck_mtx_convert_spin(&ifnet_fc_lock);
9535
9536 ifce = zalloc(ifnet_fc_zone);
9537 if (ifce == NULL) {
9538 /* memory allocation failed */
9539 lck_mtx_unlock(&ifnet_fc_lock);
9540 return ENOMEM;
9541 }
9542 bzero(ifce, ifnet_fc_zone_size);
9543
9544 ifce->ifce_flowhash = flowhash;
9545 ifce->ifce_ifp = ifp;
9546
9547 RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
9548 lck_mtx_unlock(&ifnet_fc_lock);
9549 return 0;
9550 }
9551
9552 static struct ifnet_fc_entry *
9553 ifnet_fc_get(uint32_t flowhash)
9554 {
9555 struct ifnet_fc_entry keyfc, *ifce;
9556 struct ifnet *ifp;
9557
9558 bzero(&keyfc, sizeof(keyfc));
9559 keyfc.ifce_flowhash = flowhash;
9560
9561 lck_mtx_lock_spin(&ifnet_fc_lock);
9562 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
9563 if (ifce == NULL) {
9564 /* Entry is not present in ifnet_fc_tree, return */
9565 lck_mtx_unlock(&ifnet_fc_lock);
9566 return NULL;
9567 }
9568
9569 RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
9570
9571 VERIFY(ifce->ifce_ifp != NULL);
9572 ifp = ifce->ifce_ifp;
9573
9574 /* become regular mutex */
9575 lck_mtx_convert_spin(&ifnet_fc_lock);
9576
9577 if (!ifnet_is_attached(ifp, 0)) {
9578 /*
9579 * This ifp is not attached or in the process of being
9580 * detached; just don't process it.
9581 */
9582 ifnet_fc_entry_free(ifce);
9583 ifce = NULL;
9584 }
9585 lck_mtx_unlock(&ifnet_fc_lock);
9586
9587 return ifce;
9588 }
9589
9590 static void
9591 ifnet_fc_entry_free(struct ifnet_fc_entry *ifce)
9592 {
9593 zfree(ifnet_fc_zone, ifce);
9594 }
9595
9596 static uint32_t
9597 ifnet_calc_flowhash(struct ifnet *ifp)
9598 {
9599 struct ifnet_flowhash_key fh __attribute__((aligned(8)));
9600 uint32_t flowhash = 0;
9601
9602 if (ifnet_flowhash_seed == 0) {
9603 ifnet_flowhash_seed = RandomULong();
9604 }
9605
9606 bzero(&fh, sizeof(fh));
9607
9608 (void) snprintf(fh.ifk_name, sizeof(fh.ifk_name), "%s", ifp->if_name);
9609 fh.ifk_unit = ifp->if_unit;
9610 fh.ifk_flags = ifp->if_flags;
9611 fh.ifk_eflags = ifp->if_eflags;
9612 fh.ifk_capabilities = ifp->if_capabilities;
9613 fh.ifk_capenable = ifp->if_capenable;
9614 fh.ifk_output_sched_model = ifp->if_output_sched_model;
9615 fh.ifk_rand1 = RandomULong();
9616 fh.ifk_rand2 = RandomULong();
9617
9618 try_again:
9619 flowhash = net_flowhash(&fh, sizeof(fh), ifnet_flowhash_seed);
9620 if (flowhash == 0) {
9621 /* try to get a non-zero flowhash */
9622 ifnet_flowhash_seed = RandomULong();
9623 goto try_again;
9624 }
9625
9626 return flowhash;
9627 }
9628
9629 int
9630 ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
9631 uint16_t flags, uint8_t *data)
9632 {
9633 #pragma unused(flags)
9634 int error = 0;
9635
9636 switch (family) {
9637 case AF_INET:
9638 if_inetdata_lock_exclusive(ifp);
9639 if (IN_IFEXTRA(ifp) != NULL) {
9640 if (len == 0) {
9641 /* Allow clearing the signature */
9642 IN_IFEXTRA(ifp)->netsig_len = 0;
9643 bzero(IN_IFEXTRA(ifp)->netsig,
9644 sizeof(IN_IFEXTRA(ifp)->netsig));
9645 if_inetdata_lock_done(ifp);
9646 break;
9647 } else if (len > sizeof(IN_IFEXTRA(ifp)->netsig)) {
9648 error = EINVAL;
9649 if_inetdata_lock_done(ifp);
9650 break;
9651 }
9652 IN_IFEXTRA(ifp)->netsig_len = len;
9653 bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
9654 } else {
9655 error = ENOMEM;
9656 }
9657 if_inetdata_lock_done(ifp);
9658 break;
9659
9660 case AF_INET6:
9661 if_inet6data_lock_exclusive(ifp);
9662 if (IN6_IFEXTRA(ifp) != NULL) {
9663 if (len == 0) {
9664 /* Allow clearing the signature */
9665 IN6_IFEXTRA(ifp)->netsig_len = 0;
9666 bzero(IN6_IFEXTRA(ifp)->netsig,
9667 sizeof(IN6_IFEXTRA(ifp)->netsig));
9668 if_inet6data_lock_done(ifp);
9669 break;
9670 } else if (len > sizeof(IN6_IFEXTRA(ifp)->netsig)) {
9671 error = EINVAL;
9672 if_inet6data_lock_done(ifp);
9673 break;
9674 }
9675 IN6_IFEXTRA(ifp)->netsig_len = len;
9676 bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
9677 } else {
9678 error = ENOMEM;
9679 }
9680 if_inet6data_lock_done(ifp);
9681 break;
9682
9683 default:
9684 error = EINVAL;
9685 break;
9686 }
9687
9688 return error;
9689 }
9690
9691 int
9692 ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
9693 uint16_t *flags, uint8_t *data)
9694 {
9695 int error = 0;
9696
9697 if (ifp == NULL || len == NULL || data == NULL) {
9698 return EINVAL;
9699 }
9700
9701 switch (family) {
9702 case AF_INET:
9703 if_inetdata_lock_shared(ifp);
9704 if (IN_IFEXTRA(ifp) != NULL) {
9705 if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
9706 error = EINVAL;
9707 if_inetdata_lock_done(ifp);
9708 break;
9709 }
9710 if ((*len = IN_IFEXTRA(ifp)->netsig_len) > 0) {
9711 bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
9712 } else {
9713 error = ENOENT;
9714 }
9715 } else {
9716 error = ENOMEM;
9717 }
9718 if_inetdata_lock_done(ifp);
9719 break;
9720
9721 case AF_INET6:
9722 if_inet6data_lock_shared(ifp);
9723 if (IN6_IFEXTRA(ifp) != NULL) {
9724 if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
9725 error = EINVAL;
9726 if_inet6data_lock_done(ifp);
9727 break;
9728 }
9729 if ((*len = IN6_IFEXTRA(ifp)->netsig_len) > 0) {
9730 bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
9731 } else {
9732 error = ENOENT;
9733 }
9734 } else {
9735 error = ENOMEM;
9736 }
9737 if_inet6data_lock_done(ifp);
9738 break;
9739
9740 default:
9741 error = EINVAL;
9742 break;
9743 }
9744
9745 if (error == 0 && flags != NULL) {
9746 *flags = 0;
9747 }
9748
9749 return error;
9750 }
9751
9752 #if INET6
9753 int
9754 ifnet_set_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
9755 {
9756 int i, error = 0, one_set = 0;
9757
9758 if_inet6data_lock_exclusive(ifp);
9759
9760 if (IN6_IFEXTRA(ifp) == NULL) {
9761 error = ENOMEM;
9762 goto out;
9763 }
9764
9765 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
9766 uint32_t prefix_len =
9767 prefixes[i].prefix_len;
9768 struct in6_addr *prefix =
9769 &prefixes[i].ipv6_prefix;
9770
9771 if (prefix_len == 0) {
9772 clat_log0((LOG_DEBUG,
9773 "NAT64 prefixes purged from Interface %s\n",
9774 if_name(ifp)));
9775 /* Allow clearing the signature */
9776 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = 0;
9777 bzero(&IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
9778 sizeof(struct in6_addr));
9779
9780 continue;
9781 } else if (prefix_len != NAT64_PREFIX_LEN_32 &&
9782 prefix_len != NAT64_PREFIX_LEN_40 &&
9783 prefix_len != NAT64_PREFIX_LEN_48 &&
9784 prefix_len != NAT64_PREFIX_LEN_56 &&
9785 prefix_len != NAT64_PREFIX_LEN_64 &&
9786 prefix_len != NAT64_PREFIX_LEN_96) {
9787 clat_log0((LOG_DEBUG,
9788 "NAT64 prefixlen is incorrect %d\n", prefix_len));
9789 error = EINVAL;
9790 goto out;
9791 }
9792
9793 if (IN6_IS_SCOPE_EMBED(prefix)) {
9794 clat_log0((LOG_DEBUG,
9795 "NAT64 prefix has interface/link local scope.\n"));
9796 error = EINVAL;
9797 goto out;
9798 }
9799
9800 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = prefix_len;
9801 bcopy(prefix, &IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
9802 sizeof(struct in6_addr));
9803 clat_log0((LOG_DEBUG,
9804 "NAT64 prefix set to %s with prefixlen: %d\n",
9805 ip6_sprintf(prefix), prefix_len));
9806 one_set = 1;
9807 }
9808
9809 out:
9810 if_inet6data_lock_done(ifp);
9811
9812 if (error == 0 && one_set != 0) {
9813 necp_update_all_clients();
9814 }
9815
9816 return error;
9817 }
9818
9819 int
9820 ifnet_get_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
9821 {
9822 int i, found_one = 0, error = 0;
9823
9824 if (ifp == NULL) {
9825 return EINVAL;
9826 }
9827
9828 if_inet6data_lock_shared(ifp);
9829
9830 if (IN6_IFEXTRA(ifp) == NULL) {
9831 error = ENOMEM;
9832 goto out;
9833 }
9834
9835 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
9836 if (IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len != 0) {
9837 found_one = 1;
9838 }
9839 }
9840
9841 if (found_one == 0) {
9842 error = ENOENT;
9843 goto out;
9844 }
9845
9846 if (prefixes) {
9847 bcopy(IN6_IFEXTRA(ifp)->nat64_prefixes, prefixes,
9848 sizeof(IN6_IFEXTRA(ifp)->nat64_prefixes));
9849 }
9850
9851 out:
9852 if_inet6data_lock_done(ifp);
9853
9854 return error;
9855 }
9856 #endif
9857
9858 static void
9859 dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff,
9860 protocol_family_t pf)
9861 {
9862 #pragma unused(ifp)
9863 uint32_t did_sw;
9864
9865 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_FINALIZE_FORCED) ||
9866 (m->m_pkthdr.csum_flags & (CSUM_TSO_IPV4 | CSUM_TSO_IPV6))) {
9867 return;
9868 }
9869
9870 switch (pf) {
9871 case PF_INET:
9872 did_sw = in_finalize_cksum(m, hoff, m->m_pkthdr.csum_flags);
9873 if (did_sw & CSUM_DELAY_IP) {
9874 hwcksum_dbg_finalized_hdr++;
9875 }
9876 if (did_sw & CSUM_DELAY_DATA) {
9877 hwcksum_dbg_finalized_data++;
9878 }
9879 break;
9880 #if INET6
9881 case PF_INET6:
9882 /*
9883 * Checksum offload should not have been enabled when
9884 * extension headers exist; that also means that we
9885 * cannot force-finalize packets with extension headers.
9886 * Indicate to the callee should it skip such case by
9887 * setting optlen to -1.
9888 */
9889 did_sw = in6_finalize_cksum(m, hoff, -1, -1,
9890 m->m_pkthdr.csum_flags);
9891 if (did_sw & CSUM_DELAY_IPV6_DATA) {
9892 hwcksum_dbg_finalized_data++;
9893 }
9894 break;
9895 #endif /* INET6 */
9896 default:
9897 return;
9898 }
9899 }
9900
9901 static void
9902 dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header,
9903 protocol_family_t pf)
9904 {
9905 uint16_t sum = 0;
9906 uint32_t hlen;
9907
9908 if (frame_header == NULL ||
9909 frame_header < (char *)mbuf_datastart(m) ||
9910 frame_header > (char *)m->m_data) {
9911 DLIL_PRINTF("%s: frame header pointer 0x%llx out of range "
9912 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp),
9913 (uint64_t)VM_KERNEL_ADDRPERM(frame_header),
9914 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
9915 (uint64_t)VM_KERNEL_ADDRPERM(m->m_data),
9916 (uint64_t)VM_KERNEL_ADDRPERM(m));
9917 return;
9918 }
9919 hlen = (m->m_data - frame_header);
9920
9921 switch (pf) {
9922 case PF_INET:
9923 #if INET6
9924 case PF_INET6:
9925 #endif /* INET6 */
9926 break;
9927 default:
9928 return;
9929 }
9930
9931 /*
9932 * Force partial checksum offload; useful to simulate cases
9933 * where the hardware does not support partial checksum offload,
9934 * in order to validate correctness throughout the layers above.
9935 */
9936 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED) {
9937 uint32_t foff = hwcksum_dbg_partial_rxoff_forced;
9938
9939 if (foff > (uint32_t)m->m_pkthdr.len) {
9940 return;
9941 }
9942
9943 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
9944
9945 /* Compute 16-bit 1's complement sum from forced offset */
9946 sum = m_sum16(m, foff, (m->m_pkthdr.len - foff));
9947
9948 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
9949 m->m_pkthdr.csum_rx_val = sum;
9950 m->m_pkthdr.csum_rx_start = (foff + hlen);
9951
9952 hwcksum_dbg_partial_forced++;
9953 hwcksum_dbg_partial_forced_bytes += m->m_pkthdr.len;
9954 }
9955
9956 /*
9957 * Partial checksum offload verification (and adjustment);
9958 * useful to validate and test cases where the hardware
9959 * supports partial checksum offload.
9960 */
9961 if ((m->m_pkthdr.csum_flags &
9962 (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
9963 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
9964 uint32_t rxoff;
9965
9966 /* Start offset must begin after frame header */
9967 rxoff = m->m_pkthdr.csum_rx_start;
9968 if (hlen > rxoff) {
9969 hwcksum_dbg_bad_rxoff++;
9970 if (dlil_verbose) {
9971 DLIL_PRINTF("%s: partial cksum start offset %d "
9972 "is less than frame header length %d for "
9973 "mbuf 0x%llx\n", if_name(ifp), rxoff, hlen,
9974 (uint64_t)VM_KERNEL_ADDRPERM(m));
9975 }
9976 return;
9977 }
9978 rxoff -= hlen;
9979
9980 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
9981 /*
9982 * Compute the expected 16-bit 1's complement sum;
9983 * skip this if we've already computed it above
9984 * when partial checksum offload is forced.
9985 */
9986 sum = m_sum16(m, rxoff, (m->m_pkthdr.len - rxoff));
9987
9988 /* Hardware or driver is buggy */
9989 if (sum != m->m_pkthdr.csum_rx_val) {
9990 hwcksum_dbg_bad_cksum++;
9991 if (dlil_verbose) {
9992 DLIL_PRINTF("%s: bad partial cksum value "
9993 "0x%x (expected 0x%x) for mbuf "
9994 "0x%llx [rx_start %d]\n",
9995 if_name(ifp),
9996 m->m_pkthdr.csum_rx_val, sum,
9997 (uint64_t)VM_KERNEL_ADDRPERM(m),
9998 m->m_pkthdr.csum_rx_start);
9999 }
10000 return;
10001 }
10002 }
10003 hwcksum_dbg_verified++;
10004
10005 /*
10006 * This code allows us to emulate various hardwares that
10007 * perform 16-bit 1's complement sum beginning at various
10008 * start offset values.
10009 */
10010 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ) {
10011 uint32_t aoff = hwcksum_dbg_partial_rxoff_adj;
10012
10013 if (aoff == rxoff || aoff > (uint32_t)m->m_pkthdr.len) {
10014 return;
10015 }
10016
10017 sum = m_adj_sum16(m, rxoff, aoff,
10018 m_pktlen(m) - aoff, sum);
10019
10020 m->m_pkthdr.csum_rx_val = sum;
10021 m->m_pkthdr.csum_rx_start = (aoff + hlen);
10022
10023 hwcksum_dbg_adjusted++;
10024 }
10025 }
10026 }
10027
10028 static int
10029 sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
10030 {
10031 #pragma unused(arg1, arg2)
10032 u_int32_t i;
10033 int err;
10034
10035 i = hwcksum_dbg_mode;
10036
10037 err = sysctl_handle_int(oidp, &i, 0, req);
10038 if (err != 0 || req->newptr == USER_ADDR_NULL) {
10039 return err;
10040 }
10041
10042 if (hwcksum_dbg == 0) {
10043 return ENODEV;
10044 }
10045
10046 if ((i & ~HWCKSUM_DBG_MASK) != 0) {
10047 return EINVAL;
10048 }
10049
10050 hwcksum_dbg_mode = (i & HWCKSUM_DBG_MASK);
10051
10052 return err;
10053 }
10054
10055 static int
10056 sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
10057 {
10058 #pragma unused(arg1, arg2)
10059 u_int32_t i;
10060 int err;
10061
10062 i = hwcksum_dbg_partial_rxoff_forced;
10063
10064 err = sysctl_handle_int(oidp, &i, 0, req);
10065 if (err != 0 || req->newptr == USER_ADDR_NULL) {
10066 return err;
10067 }
10068
10069 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
10070 return ENODEV;
10071 }
10072
10073 hwcksum_dbg_partial_rxoff_forced = i;
10074
10075 return err;
10076 }
10077
10078 static int
10079 sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
10080 {
10081 #pragma unused(arg1, arg2)
10082 u_int32_t i;
10083 int err;
10084
10085 i = hwcksum_dbg_partial_rxoff_adj;
10086
10087 err = sysctl_handle_int(oidp, &i, 0, req);
10088 if (err != 0 || req->newptr == USER_ADDR_NULL) {
10089 return err;
10090 }
10091
10092 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ)) {
10093 return ENODEV;
10094 }
10095
10096 hwcksum_dbg_partial_rxoff_adj = i;
10097
10098 return err;
10099 }
10100
10101 static int
10102 sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
10103 {
10104 #pragma unused(oidp, arg1, arg2)
10105 int err;
10106
10107 if (req->oldptr == USER_ADDR_NULL) {
10108 }
10109 if (req->newptr != USER_ADDR_NULL) {
10110 return EPERM;
10111 }
10112 err = SYSCTL_OUT(req, &tx_chain_len_stats,
10113 sizeof(struct chain_len_stats));
10114
10115 return err;
10116 }
10117
10118
10119 #if DEBUG || DEVELOPMENT
10120 /* Blob for sum16 verification */
10121 static uint8_t sumdata[] = {
10122 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
10123 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
10124 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
10125 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
10126 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
10127 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
10128 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
10129 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
10130 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
10131 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
10132 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
10133 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
10134 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
10135 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
10136 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
10137 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
10138 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
10139 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
10140 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
10141 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
10142 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
10143 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
10144 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
10145 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
10146 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
10147 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
10148 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
10149 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
10150 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
10151 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
10152 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
10153 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
10154 0xc8, 0x28, 0x02, 0x00, 0x00
10155 };
10156
10157 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
10158 static struct {
10159 boolean_t init;
10160 uint16_t len;
10161 uint16_t sumr; /* reference */
10162 uint16_t sumrp; /* reference, precomputed */
10163 } sumtbl[] = {
10164 { FALSE, 0, 0, 0x0000 },
10165 { FALSE, 1, 0, 0x001f },
10166 { FALSE, 2, 0, 0x8b1f },
10167 { FALSE, 3, 0, 0x8b27 },
10168 { FALSE, 7, 0, 0x790e },
10169 { FALSE, 11, 0, 0xcb6d },
10170 { FALSE, 20, 0, 0x20dd },
10171 { FALSE, 27, 0, 0xbabd },
10172 { FALSE, 32, 0, 0xf3e8 },
10173 { FALSE, 37, 0, 0x197d },
10174 { FALSE, 43, 0, 0x9eae },
10175 { FALSE, 64, 0, 0x4678 },
10176 { FALSE, 127, 0, 0x9399 },
10177 { FALSE, 256, 0, 0xd147 },
10178 { FALSE, 325, 0, 0x0358 },
10179 };
10180 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
10181
10182 static void
10183 dlil_verify_sum16(void)
10184 {
10185 struct mbuf *m;
10186 uint8_t *buf;
10187 int n;
10188
10189 /* Make sure test data plus extra room for alignment fits in cluster */
10190 _CASSERT((sizeof(sumdata) + (sizeof(uint64_t) * 2)) <= MCLBYTES);
10191
10192 kprintf("DLIL: running SUM16 self-tests ... ");
10193
10194 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
10195 m_align(m, sizeof(sumdata) + (sizeof(uint64_t) * 2));
10196
10197 buf = mtod(m, uint8_t *); /* base address */
10198
10199 for (n = 0; n < SUMTBL_MAX; n++) {
10200 uint16_t len = sumtbl[n].len;
10201 int i;
10202
10203 /* Verify for all possible alignments */
10204 for (i = 0; i < (int)sizeof(uint64_t); i++) {
10205 uint16_t sum, sumr;
10206 uint8_t *c;
10207
10208 /* Copy over test data to mbuf */
10209 VERIFY(len <= sizeof(sumdata));
10210 c = buf + i;
10211 bcopy(sumdata, c, len);
10212
10213 /* Zero-offset test (align by data pointer) */
10214 m->m_data = (caddr_t)c;
10215 m->m_len = len;
10216 sum = m_sum16(m, 0, len);
10217
10218 if (!sumtbl[n].init) {
10219 sumr = in_cksum_mbuf_ref(m, len, 0, 0);
10220 sumtbl[n].sumr = sumr;
10221 sumtbl[n].init = TRUE;
10222 } else {
10223 sumr = sumtbl[n].sumr;
10224 }
10225
10226 /* Something is horribly broken; stop now */
10227 if (sumr != sumtbl[n].sumrp) {
10228 panic_plain("\n%s: broken in_cksum_mbuf_ref() "
10229 "for len=%d align=%d sum=0x%04x "
10230 "[expected=0x%04x]\n", __func__,
10231 len, i, sum, sumr);
10232 /* NOTREACHED */
10233 } else if (sum != sumr) {
10234 panic_plain("\n%s: broken m_sum16() for len=%d "
10235 "align=%d sum=0x%04x [expected=0x%04x]\n",
10236 __func__, len, i, sum, sumr);
10237 /* NOTREACHED */
10238 }
10239
10240 /* Alignment test by offset (fixed data pointer) */
10241 m->m_data = (caddr_t)buf;
10242 m->m_len = i + len;
10243 sum = m_sum16(m, i, len);
10244
10245 /* Something is horribly broken; stop now */
10246 if (sum != sumr) {
10247 panic_plain("\n%s: broken m_sum16() for len=%d "
10248 "offset=%d sum=0x%04x [expected=0x%04x]\n",
10249 __func__, len, i, sum, sumr);
10250 /* NOTREACHED */
10251 }
10252 #if INET
10253 /* Simple sum16 contiguous buffer test by aligment */
10254 sum = b_sum16(c, len);
10255
10256 /* Something is horribly broken; stop now */
10257 if (sum != sumr) {
10258 panic_plain("\n%s: broken b_sum16() for len=%d "
10259 "align=%d sum=0x%04x [expected=0x%04x]\n",
10260 __func__, len, i, sum, sumr);
10261 /* NOTREACHED */
10262 }
10263 #endif /* INET */
10264 }
10265 }
10266 m_freem(m);
10267
10268 kprintf("PASSED\n");
10269 }
10270 #endif /* DEBUG || DEVELOPMENT */
10271
10272 #define CASE_STRINGIFY(x) case x: return #x
10273
10274 __private_extern__ const char *
10275 dlil_kev_dl_code_str(u_int32_t event_code)
10276 {
10277 switch (event_code) {
10278 CASE_STRINGIFY(KEV_DL_SIFFLAGS);
10279 CASE_STRINGIFY(KEV_DL_SIFMETRICS);
10280 CASE_STRINGIFY(KEV_DL_SIFMTU);
10281 CASE_STRINGIFY(KEV_DL_SIFPHYS);
10282 CASE_STRINGIFY(KEV_DL_SIFMEDIA);
10283 CASE_STRINGIFY(KEV_DL_SIFGENERIC);
10284 CASE_STRINGIFY(KEV_DL_ADDMULTI);
10285 CASE_STRINGIFY(KEV_DL_DELMULTI);
10286 CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
10287 CASE_STRINGIFY(KEV_DL_IF_DETACHING);
10288 CASE_STRINGIFY(KEV_DL_IF_DETACHED);
10289 CASE_STRINGIFY(KEV_DL_LINK_OFF);
10290 CASE_STRINGIFY(KEV_DL_LINK_ON);
10291 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
10292 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
10293 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
10294 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
10295 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
10296 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
10297 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
10298 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
10299 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
10300 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED);
10301 CASE_STRINGIFY(KEV_DL_ISSUES);
10302 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
10303 default:
10304 break;
10305 }
10306 return "";
10307 }
10308
10309 static void
10310 dlil_dt_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
10311 {
10312 #pragma unused(arg1)
10313 struct ifnet *ifp = arg0;
10314
10315 if (ifnet_is_attached(ifp, 1)) {
10316 nstat_ifnet_threshold_reached(ifp->if_index);
10317 ifnet_decr_iorefcnt(ifp);
10318 }
10319 }
10320
10321 void
10322 ifnet_notify_data_threshold(struct ifnet *ifp)
10323 {
10324 uint64_t bytes = (ifp->if_ibytes + ifp->if_obytes);
10325 uint64_t oldbytes = ifp->if_dt_bytes;
10326
10327 ASSERT(ifp->if_dt_tcall != NULL);
10328
10329 /*
10330 * If we went over the threshold, notify NetworkStatistics.
10331 * We rate-limit it based on the threshold interval value.
10332 */
10333 if (threshold_notify && (bytes - oldbytes) > ifp->if_data_threshold &&
10334 OSCompareAndSwap64(oldbytes, bytes, &ifp->if_dt_bytes) &&
10335 !thread_call_isactive(ifp->if_dt_tcall)) {
10336 uint64_t tival = (threshold_interval * NSEC_PER_SEC);
10337 uint64_t now = mach_absolute_time(), deadline = now;
10338 uint64_t ival;
10339
10340 if (tival != 0) {
10341 nanoseconds_to_absolutetime(tival, &ival);
10342 clock_deadline_for_periodic_event(ival, now, &deadline);
10343 (void) thread_call_enter_delayed(ifp->if_dt_tcall,
10344 deadline);
10345 } else {
10346 (void) thread_call_enter(ifp->if_dt_tcall);
10347 }
10348 }
10349 }
10350
10351 #if (DEVELOPMENT || DEBUG)
10352 /*
10353 * The sysctl variable name contains the input parameters of
10354 * ifnet_get_keepalive_offload_frames()
10355 * ifp (interface index): name[0]
10356 * frames_array_count: name[1]
10357 * frame_data_offset: name[2]
10358 * The return length gives used_frames_count
10359 */
10360 static int
10361 sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
10362 {
10363 #pragma unused(oidp)
10364 int *name = (int *)arg1;
10365 u_int namelen = arg2;
10366 int idx;
10367 ifnet_t ifp = NULL;
10368 u_int32_t frames_array_count;
10369 size_t frame_data_offset;
10370 u_int32_t used_frames_count;
10371 struct ifnet_keepalive_offload_frame *frames_array = NULL;
10372 int error = 0;
10373 u_int32_t i;
10374
10375 /*
10376 * Only root can get look at other people TCP frames
10377 */
10378 error = proc_suser(current_proc());
10379 if (error != 0) {
10380 goto done;
10381 }
10382 /*
10383 * Validate the input parameters
10384 */
10385 if (req->newptr != USER_ADDR_NULL) {
10386 error = EPERM;
10387 goto done;
10388 }
10389 if (namelen != 3) {
10390 error = EINVAL;
10391 goto done;
10392 }
10393 if (req->oldptr == USER_ADDR_NULL) {
10394 error = EINVAL;
10395 goto done;
10396 }
10397 if (req->oldlen == 0) {
10398 error = EINVAL;
10399 goto done;
10400 }
10401 idx = name[0];
10402 frames_array_count = name[1];
10403 frame_data_offset = name[2];
10404
10405 /* Make sure the passed buffer is large enough */
10406 if (frames_array_count * sizeof(struct ifnet_keepalive_offload_frame) >
10407 req->oldlen) {
10408 error = ENOMEM;
10409 goto done;
10410 }
10411
10412 ifnet_head_lock_shared();
10413 if (!IF_INDEX_IN_RANGE(idx)) {
10414 ifnet_head_done();
10415 error = ENOENT;
10416 goto done;
10417 }
10418 ifp = ifindex2ifnet[idx];
10419 ifnet_head_done();
10420
10421 frames_array = _MALLOC(frames_array_count *
10422 sizeof(struct ifnet_keepalive_offload_frame), M_TEMP, M_WAITOK);
10423 if (frames_array == NULL) {
10424 error = ENOMEM;
10425 goto done;
10426 }
10427
10428 error = ifnet_get_keepalive_offload_frames(ifp, frames_array,
10429 frames_array_count, frame_data_offset, &used_frames_count);
10430 if (error != 0) {
10431 DLIL_PRINTF("%s: ifnet_get_keepalive_offload_frames error %d\n",
10432 __func__, error);
10433 goto done;
10434 }
10435
10436 for (i = 0; i < used_frames_count; i++) {
10437 error = SYSCTL_OUT(req, frames_array + i,
10438 sizeof(struct ifnet_keepalive_offload_frame));
10439 if (error != 0) {
10440 goto done;
10441 }
10442 }
10443 done:
10444 if (frames_array != NULL) {
10445 _FREE(frames_array, M_TEMP);
10446 }
10447 return error;
10448 }
10449 #endif /* DEVELOPMENT || DEBUG */
10450
10451 void
10452 ifnet_update_stats_per_flow(struct ifnet_stats_per_flow *ifs,
10453 struct ifnet *ifp)
10454 {
10455 tcp_update_stats_per_flow(ifs, ifp);
10456 }